1 /* $NetBSD: intel_pm.c,v 1.25 2020/12/16 19:49:05 christos Exp $ */ 2 3 /* 4 * Copyright © 2012 Intel Corporation 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 23 * IN THE SOFTWARE. 24 * 25 * Authors: 26 * Eugeni Dodonov <eugeni.dodonov@intel.com> 27 * 28 */ 29 30 #include <sys/cdefs.h> 31 __KERNEL_RCSID(0, "$NetBSD: intel_pm.c,v 1.25 2020/12/16 19:49:05 christos Exp $"); 32 33 #include <linux/cpufreq.h> 34 #include "i915_drv.h" 35 #include "i915_trace.h" 36 #include "intel_drv.h" 37 #ifndef __NetBSD__ 38 #include "../../../platform/x86/intel_ips.h" 39 #endif 40 #include <linux/module.h> 41 42 #include <linux/nbsd-namespace.h> 43 44 /** 45 * RC6 is a special power stage which allows the GPU to enter an very 46 * low-voltage mode when idle, using down to 0V while at this stage. This 47 * stage is entered automatically when the GPU is idle when RC6 support is 48 * enabled, and as soon as new workload arises GPU wakes up automatically as well. 49 * 50 * There are different RC6 modes available in Intel GPU, which differentiate 51 * among each other with the latency required to enter and leave RC6 and 52 * voltage consumed by the GPU in different states. 53 * 54 * The combination of the following flags define which states GPU is allowed 55 * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and 56 * RC6pp is deepest RC6. Their support by hardware varies according to the 57 * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one 58 * which brings the most power savings; deeper states save more power, but 59 * require higher latency to switch to and wake up. 60 */ 61 #define INTEL_RC6_ENABLE (1<<0) 62 #define INTEL_RC6p_ENABLE (1<<1) 63 #define INTEL_RC6pp_ENABLE (1<<2) 64 65 static void bxt_init_clock_gating(struct drm_device *dev) 66 { 67 struct drm_i915_private *dev_priv = dev->dev_private; 68 69 /* WaDisableSDEUnitClockGating:bxt */ 70 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | 71 GEN8_SDEUNIT_CLOCK_GATE_DISABLE); 72 73 /* 74 * FIXME: 75 * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only. 76 */ 77 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | 78 GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ); 79 80 /* 81 * Lower the display internal timeout. 82 * This is needed to avoid any hard hangs when DSI port PLL 83 * is off and a MMIO access is attempted by any privilege 84 * application, using batch buffers or any other means. 85 */ 86 I915_WRITE(RM_TIMEOUT, MMIO_TIMEOUT_US(950)); 87 } 88 89 static void i915_pineview_get_mem_freq(struct drm_device *dev) 90 { 91 struct drm_i915_private *dev_priv = dev->dev_private; 92 u32 tmp; 93 94 tmp = I915_READ(CLKCFG); 95 96 switch (tmp & CLKCFG_FSB_MASK) { 97 case CLKCFG_FSB_533: 98 dev_priv->fsb_freq = 533; /* 133*4 */ 99 break; 100 case CLKCFG_FSB_800: 101 dev_priv->fsb_freq = 800; /* 200*4 */ 102 break; 103 case CLKCFG_FSB_667: 104 dev_priv->fsb_freq = 667; /* 167*4 */ 105 break; 106 case CLKCFG_FSB_400: 107 dev_priv->fsb_freq = 400; /* 100*4 */ 108 break; 109 } 110 111 switch (tmp & CLKCFG_MEM_MASK) { 112 case CLKCFG_MEM_533: 113 dev_priv->mem_freq = 533; 114 break; 115 case CLKCFG_MEM_667: 116 dev_priv->mem_freq = 667; 117 break; 118 case CLKCFG_MEM_800: 119 dev_priv->mem_freq = 800; 120 break; 121 } 122 123 /* detect pineview DDR3 setting */ 124 tmp = I915_READ(CSHRDDR3CTL); 125 dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0; 126 } 127 128 static void i915_ironlake_get_mem_freq(struct drm_device *dev) 129 { 130 struct drm_i915_private *dev_priv = dev->dev_private; 131 u16 ddrpll, csipll; 132 133 ddrpll = I915_READ16(DDRMPLL1); 134 csipll = I915_READ16(CSIPLL0); 135 136 switch (ddrpll & 0xff) { 137 case 0xc: 138 dev_priv->mem_freq = 800; 139 break; 140 case 0x10: 141 dev_priv->mem_freq = 1066; 142 break; 143 case 0x14: 144 dev_priv->mem_freq = 1333; 145 break; 146 case 0x18: 147 dev_priv->mem_freq = 1600; 148 break; 149 default: 150 DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n", 151 ddrpll & 0xff); 152 dev_priv->mem_freq = 0; 153 break; 154 } 155 156 dev_priv->ips.r_t = dev_priv->mem_freq; 157 158 switch (csipll & 0x3ff) { 159 case 0x00c: 160 dev_priv->fsb_freq = 3200; 161 break; 162 case 0x00e: 163 dev_priv->fsb_freq = 3733; 164 break; 165 case 0x010: 166 dev_priv->fsb_freq = 4266; 167 break; 168 case 0x012: 169 dev_priv->fsb_freq = 4800; 170 break; 171 case 0x014: 172 dev_priv->fsb_freq = 5333; 173 break; 174 case 0x016: 175 dev_priv->fsb_freq = 5866; 176 break; 177 case 0x018: 178 dev_priv->fsb_freq = 6400; 179 break; 180 default: 181 DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n", 182 csipll & 0x3ff); 183 dev_priv->fsb_freq = 0; 184 break; 185 } 186 187 if (dev_priv->fsb_freq == 3200) { 188 dev_priv->ips.c_m = 0; 189 } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) { 190 dev_priv->ips.c_m = 1; 191 } else { 192 dev_priv->ips.c_m = 2; 193 } 194 } 195 196 static const struct cxsr_latency cxsr_latency_table[] = { 197 {1, 0, 800, 400, 3382, 33382, 3983, 33983}, /* DDR2-400 SC */ 198 {1, 0, 800, 667, 3354, 33354, 3807, 33807}, /* DDR2-667 SC */ 199 {1, 0, 800, 800, 3347, 33347, 3763, 33763}, /* DDR2-800 SC */ 200 {1, 1, 800, 667, 6420, 36420, 6873, 36873}, /* DDR3-667 SC */ 201 {1, 1, 800, 800, 5902, 35902, 6318, 36318}, /* DDR3-800 SC */ 202 203 {1, 0, 667, 400, 3400, 33400, 4021, 34021}, /* DDR2-400 SC */ 204 {1, 0, 667, 667, 3372, 33372, 3845, 33845}, /* DDR2-667 SC */ 205 {1, 0, 667, 800, 3386, 33386, 3822, 33822}, /* DDR2-800 SC */ 206 {1, 1, 667, 667, 6438, 36438, 6911, 36911}, /* DDR3-667 SC */ 207 {1, 1, 667, 800, 5941, 35941, 6377, 36377}, /* DDR3-800 SC */ 208 209 {1, 0, 400, 400, 3472, 33472, 4173, 34173}, /* DDR2-400 SC */ 210 {1, 0, 400, 667, 3443, 33443, 3996, 33996}, /* DDR2-667 SC */ 211 {1, 0, 400, 800, 3430, 33430, 3946, 33946}, /* DDR2-800 SC */ 212 {1, 1, 400, 667, 6509, 36509, 7062, 37062}, /* DDR3-667 SC */ 213 {1, 1, 400, 800, 5985, 35985, 6501, 36501}, /* DDR3-800 SC */ 214 215 {0, 0, 800, 400, 3438, 33438, 4065, 34065}, /* DDR2-400 SC */ 216 {0, 0, 800, 667, 3410, 33410, 3889, 33889}, /* DDR2-667 SC */ 217 {0, 0, 800, 800, 3403, 33403, 3845, 33845}, /* DDR2-800 SC */ 218 {0, 1, 800, 667, 6476, 36476, 6955, 36955}, /* DDR3-667 SC */ 219 {0, 1, 800, 800, 5958, 35958, 6400, 36400}, /* DDR3-800 SC */ 220 221 {0, 0, 667, 400, 3456, 33456, 4103, 34106}, /* DDR2-400 SC */ 222 {0, 0, 667, 667, 3428, 33428, 3927, 33927}, /* DDR2-667 SC */ 223 {0, 0, 667, 800, 3443, 33443, 3905, 33905}, /* DDR2-800 SC */ 224 {0, 1, 667, 667, 6494, 36494, 6993, 36993}, /* DDR3-667 SC */ 225 {0, 1, 667, 800, 5998, 35998, 6460, 36460}, /* DDR3-800 SC */ 226 227 {0, 0, 400, 400, 3528, 33528, 4255, 34255}, /* DDR2-400 SC */ 228 {0, 0, 400, 667, 3500, 33500, 4079, 34079}, /* DDR2-667 SC */ 229 {0, 0, 400, 800, 3487, 33487, 4029, 34029}, /* DDR2-800 SC */ 230 {0, 1, 400, 667, 6566, 36566, 7145, 37145}, /* DDR3-667 SC */ 231 {0, 1, 400, 800, 6042, 36042, 6584, 36584}, /* DDR3-800 SC */ 232 }; 233 234 static const struct cxsr_latency *intel_get_cxsr_latency(int is_desktop, 235 int is_ddr3, 236 int fsb, 237 int mem) 238 { 239 const struct cxsr_latency *latency; 240 int i; 241 242 if (fsb == 0 || mem == 0) 243 return NULL; 244 245 for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) { 246 latency = &cxsr_latency_table[i]; 247 if (is_desktop == latency->is_desktop && 248 is_ddr3 == latency->is_ddr3 && 249 fsb == latency->fsb_freq && mem == latency->mem_freq) 250 return latency; 251 } 252 253 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n"); 254 255 return NULL; 256 } 257 258 static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable) 259 { 260 u32 val; 261 262 mutex_lock(&dev_priv->rps.hw_lock); 263 264 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2); 265 if (enable) 266 val &= ~FORCE_DDR_HIGH_FREQ; 267 else 268 val |= FORCE_DDR_HIGH_FREQ; 269 val &= ~FORCE_DDR_LOW_FREQ; 270 val |= FORCE_DDR_FREQ_REQ_ACK; 271 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val); 272 273 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) & 274 FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) 275 DRM_ERROR("timed out waiting for Punit DDR DVFS request\n"); 276 277 mutex_unlock(&dev_priv->rps.hw_lock); 278 } 279 280 static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable) 281 { 282 u32 val; 283 284 mutex_lock(&dev_priv->rps.hw_lock); 285 286 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); 287 if (enable) 288 val |= DSP_MAXFIFO_PM5_ENABLE; 289 else 290 val &= ~DSP_MAXFIFO_PM5_ENABLE; 291 vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val); 292 293 mutex_unlock(&dev_priv->rps.hw_lock); 294 } 295 296 #define FW_WM(value, plane) \ 297 (((u32)(value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK) 298 299 void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable) 300 { 301 struct drm_device *dev = dev_priv->dev; 302 u32 val; 303 304 if (IS_VALLEYVIEW(dev)) { 305 I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0); 306 POSTING_READ(FW_BLC_SELF_VLV); 307 dev_priv->wm.vlv.cxsr = enable; 308 } else if (IS_G4X(dev) || IS_CRESTLINE(dev)) { 309 I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0); 310 POSTING_READ(FW_BLC_SELF); 311 } else if (IS_PINEVIEW(dev)) { 312 val = I915_READ(DSPFW3) & ~PINEVIEW_SELF_REFRESH_EN; 313 val |= enable ? PINEVIEW_SELF_REFRESH_EN : 0; 314 I915_WRITE(DSPFW3, val); 315 POSTING_READ(DSPFW3); 316 } else if (IS_I945G(dev) || IS_I945GM(dev)) { 317 val = enable ? _MASKED_BIT_ENABLE(FW_BLC_SELF_EN) : 318 _MASKED_BIT_DISABLE(FW_BLC_SELF_EN); 319 I915_WRITE(FW_BLC_SELF, val); 320 POSTING_READ(FW_BLC_SELF); 321 } else if (IS_I915GM(dev)) { 322 val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) : 323 _MASKED_BIT_DISABLE(INSTPM_SELF_EN); 324 I915_WRITE(INSTPM, val); 325 POSTING_READ(INSTPM); 326 } else { 327 return; 328 } 329 330 DRM_DEBUG_KMS("memory self-refresh is %s\n", 331 enable ? "enabled" : "disabled"); 332 } 333 334 335 /* 336 * Latency for FIFO fetches is dependent on several factors: 337 * - memory configuration (speed, channels) 338 * - chipset 339 * - current MCH state 340 * It can be fairly high in some situations, so here we assume a fairly 341 * pessimal value. It's a tradeoff between extra memory fetches (if we 342 * set this value too high, the FIFO will fetch frequently to stay full) 343 * and power consumption (set it too low to save power and we might see 344 * FIFO underruns and display "flicker"). 345 * 346 * A value of 5us seems to be a good balance; safe for very low end 347 * platforms but not overly aggressive on lower latency configs. 348 */ 349 static const int pessimal_latency_ns = 5000; 350 351 #define VLV_FIFO_START(dsparb, dsparb2, lo_shift, hi_shift) \ 352 ((((dsparb) >> (lo_shift)) & 0xff) | ((((dsparb2) >> (hi_shift)) & 0x1) << 8)) 353 354 static int vlv_get_fifo_size(struct drm_device *dev, 355 enum pipe pipe, int plane) 356 { 357 struct drm_i915_private *dev_priv = dev->dev_private; 358 int sprite0_start, sprite1_start, size; 359 360 switch (pipe) { 361 uint32_t dsparb, dsparb2, dsparb3; 362 case PIPE_A: 363 dsparb = I915_READ(DSPARB); 364 dsparb2 = I915_READ(DSPARB2); 365 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 0, 0); 366 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 8, 4); 367 break; 368 case PIPE_B: 369 dsparb = I915_READ(DSPARB); 370 dsparb2 = I915_READ(DSPARB2); 371 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 16, 8); 372 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 24, 12); 373 break; 374 case PIPE_C: 375 dsparb2 = I915_READ(DSPARB2); 376 dsparb3 = I915_READ(DSPARB3); 377 sprite0_start = VLV_FIFO_START(dsparb3, dsparb2, 0, 16); 378 sprite1_start = VLV_FIFO_START(dsparb3, dsparb2, 8, 20); 379 break; 380 default: 381 return 0; 382 } 383 384 switch (plane) { 385 case 0: 386 size = sprite0_start; 387 break; 388 case 1: 389 size = sprite1_start - sprite0_start; 390 break; 391 case 2: 392 size = 512 - 1 - sprite1_start; 393 break; 394 default: 395 return 0; 396 } 397 398 DRM_DEBUG_KMS("Pipe %c %s %c FIFO size: %d\n", 399 pipe_name(pipe), plane == 0 ? "primary" : "sprite", 400 plane == 0 ? plane_name(pipe) : sprite_name(pipe, plane - 1), 401 size); 402 403 return size; 404 } 405 406 static int i9xx_get_fifo_size(struct drm_device *dev, int plane) 407 { 408 struct drm_i915_private *dev_priv = dev->dev_private; 409 uint32_t dsparb = I915_READ(DSPARB); 410 int size; 411 412 size = dsparb & 0x7f; 413 if (plane) 414 size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size; 415 416 DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb, 417 plane ? "B" : "A", size); 418 419 return size; 420 } 421 422 static int i830_get_fifo_size(struct drm_device *dev, int plane) 423 { 424 struct drm_i915_private *dev_priv = dev->dev_private; 425 uint32_t dsparb = I915_READ(DSPARB); 426 int size; 427 428 size = dsparb & 0x1ff; 429 if (plane) 430 size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size; 431 size >>= 1; /* Convert to cachelines */ 432 433 DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb, 434 plane ? "B" : "A", size); 435 436 return size; 437 } 438 439 static int i845_get_fifo_size(struct drm_device *dev, int plane) 440 { 441 struct drm_i915_private *dev_priv = dev->dev_private; 442 uint32_t dsparb = I915_READ(DSPARB); 443 int size; 444 445 size = dsparb & 0x7f; 446 size >>= 2; /* Convert to cachelines */ 447 448 DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb, 449 plane ? "B" : "A", 450 size); 451 452 return size; 453 } 454 455 /* Pineview has different values for various configs */ 456 static const struct intel_watermark_params pineview_display_wm = { 457 .fifo_size = PINEVIEW_DISPLAY_FIFO, 458 .max_wm = PINEVIEW_MAX_WM, 459 .default_wm = PINEVIEW_DFT_WM, 460 .guard_size = PINEVIEW_GUARD_WM, 461 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, 462 }; 463 static const struct intel_watermark_params pineview_display_hplloff_wm = { 464 .fifo_size = PINEVIEW_DISPLAY_FIFO, 465 .max_wm = PINEVIEW_MAX_WM, 466 .default_wm = PINEVIEW_DFT_HPLLOFF_WM, 467 .guard_size = PINEVIEW_GUARD_WM, 468 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, 469 }; 470 static const struct intel_watermark_params pineview_cursor_wm = { 471 .fifo_size = PINEVIEW_CURSOR_FIFO, 472 .max_wm = PINEVIEW_CURSOR_MAX_WM, 473 .default_wm = PINEVIEW_CURSOR_DFT_WM, 474 .guard_size = PINEVIEW_CURSOR_GUARD_WM, 475 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, 476 }; 477 static const struct intel_watermark_params pineview_cursor_hplloff_wm = { 478 .fifo_size = PINEVIEW_CURSOR_FIFO, 479 .max_wm = PINEVIEW_CURSOR_MAX_WM, 480 .default_wm = PINEVIEW_CURSOR_DFT_WM, 481 .guard_size = PINEVIEW_CURSOR_GUARD_WM, 482 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, 483 }; 484 static const struct intel_watermark_params g4x_wm_info = { 485 .fifo_size = G4X_FIFO_SIZE, 486 .max_wm = G4X_MAX_WM, 487 .default_wm = G4X_MAX_WM, 488 .guard_size = 2, 489 .cacheline_size = G4X_FIFO_LINE_SIZE, 490 }; 491 static const struct intel_watermark_params g4x_cursor_wm_info = { 492 .fifo_size = I965_CURSOR_FIFO, 493 .max_wm = I965_CURSOR_MAX_WM, 494 .default_wm = I965_CURSOR_DFT_WM, 495 .guard_size = 2, 496 .cacheline_size = G4X_FIFO_LINE_SIZE, 497 }; 498 static const struct intel_watermark_params valleyview_wm_info __unused = { 499 .fifo_size = VALLEYVIEW_FIFO_SIZE, 500 .max_wm = VALLEYVIEW_MAX_WM, 501 .default_wm = VALLEYVIEW_MAX_WM, 502 .guard_size = 2, 503 .cacheline_size = G4X_FIFO_LINE_SIZE, 504 }; 505 static const struct intel_watermark_params valleyview_cursor_wm_info __unused = { 506 .fifo_size = I965_CURSOR_FIFO, 507 .max_wm = VALLEYVIEW_CURSOR_MAX_WM, 508 .default_wm = I965_CURSOR_DFT_WM, 509 .guard_size = 2, 510 .cacheline_size = G4X_FIFO_LINE_SIZE, 511 }; 512 static const struct intel_watermark_params i965_cursor_wm_info = { 513 .fifo_size = I965_CURSOR_FIFO, 514 .max_wm = I965_CURSOR_MAX_WM, 515 .default_wm = I965_CURSOR_DFT_WM, 516 .guard_size = 2, 517 .cacheline_size = I915_FIFO_LINE_SIZE, 518 }; 519 static const struct intel_watermark_params i945_wm_info = { 520 .fifo_size = I945_FIFO_SIZE, 521 .max_wm = I915_MAX_WM, 522 .default_wm = 1, 523 .guard_size = 2, 524 .cacheline_size = I915_FIFO_LINE_SIZE, 525 }; 526 static const struct intel_watermark_params i915_wm_info = { 527 .fifo_size = I915_FIFO_SIZE, 528 .max_wm = I915_MAX_WM, 529 .default_wm = 1, 530 .guard_size = 2, 531 .cacheline_size = I915_FIFO_LINE_SIZE, 532 }; 533 static const struct intel_watermark_params i830_a_wm_info = { 534 .fifo_size = I855GM_FIFO_SIZE, 535 .max_wm = I915_MAX_WM, 536 .default_wm = 1, 537 .guard_size = 2, 538 .cacheline_size = I830_FIFO_LINE_SIZE, 539 }; 540 static const struct intel_watermark_params i830_bc_wm_info = { 541 .fifo_size = I855GM_FIFO_SIZE, 542 .max_wm = I915_MAX_WM/2, 543 .default_wm = 1, 544 .guard_size = 2, 545 .cacheline_size = I830_FIFO_LINE_SIZE, 546 }; 547 static const struct intel_watermark_params i845_wm_info = { 548 .fifo_size = I830_FIFO_SIZE, 549 .max_wm = I915_MAX_WM, 550 .default_wm = 1, 551 .guard_size = 2, 552 .cacheline_size = I830_FIFO_LINE_SIZE, 553 }; 554 555 /** 556 * intel_calculate_wm - calculate watermark level 557 * @clock_in_khz: pixel clock 558 * @wm: chip FIFO params 559 * @pixel_size: display pixel size 560 * @latency_ns: memory latency for the platform 561 * 562 * Calculate the watermark level (the level at which the display plane will 563 * start fetching from memory again). Each chip has a different display 564 * FIFO size and allocation, so the caller needs to figure that out and pass 565 * in the correct intel_watermark_params structure. 566 * 567 * As the pixel clock runs, the FIFO will be drained at a rate that depends 568 * on the pixel size. When it reaches the watermark level, it'll start 569 * fetching FIFO line sized based chunks from memory until the FIFO fills 570 * past the watermark point. If the FIFO drains completely, a FIFO underrun 571 * will occur, and a display engine hang could result. 572 */ 573 static unsigned long intel_calculate_wm(unsigned long clock_in_khz, 574 const struct intel_watermark_params *wm, 575 int fifo_size, 576 int pixel_size, 577 unsigned long latency_ns) 578 { 579 long entries_required, wm_size; 580 581 /* 582 * Note: we need to make sure we don't overflow for various clock & 583 * latency values. 584 * clocks go from a few thousand to several hundred thousand. 585 * latency is usually a few thousand 586 */ 587 entries_required = ((clock_in_khz / 1000) * pixel_size * latency_ns) / 588 1000; 589 entries_required = DIV_ROUND_UP(entries_required, wm->cacheline_size); 590 591 DRM_DEBUG_KMS("FIFO entries required for mode: %ld\n", entries_required); 592 593 wm_size = fifo_size - (entries_required + wm->guard_size); 594 595 DRM_DEBUG_KMS("FIFO watermark level: %ld\n", wm_size); 596 597 /* Don't promote wm_size to unsigned... */ 598 if (wm_size > (long)wm->max_wm) 599 wm_size = wm->max_wm; 600 if (wm_size <= 0) 601 wm_size = wm->default_wm; 602 603 /* 604 * Bspec seems to indicate that the value shouldn't be lower than 605 * 'burst size + 1'. Certainly 830 is quite unhappy with low values. 606 * Lets go for 8 which is the burst size since certain platforms 607 * already use a hardcoded 8 (which is what the spec says should be 608 * done). 609 */ 610 if (wm_size <= 8) 611 wm_size = 8; 612 613 return wm_size; 614 } 615 616 static struct drm_crtc *single_enabled_crtc(struct drm_device *dev) 617 { 618 struct drm_crtc *crtc, *enabled = NULL; 619 620 for_each_crtc(dev, crtc) { 621 if (intel_crtc_active(crtc)) { 622 if (enabled) 623 return NULL; 624 enabled = crtc; 625 } 626 } 627 628 return enabled; 629 } 630 631 static void pineview_update_wm(struct drm_crtc *unused_crtc) 632 { 633 struct drm_device *dev = unused_crtc->dev; 634 struct drm_i915_private *dev_priv = dev->dev_private; 635 struct drm_crtc *crtc; 636 const struct cxsr_latency *latency; 637 u32 reg; 638 unsigned long wm; 639 640 latency = intel_get_cxsr_latency(IS_PINEVIEW_G(dev), dev_priv->is_ddr3, 641 dev_priv->fsb_freq, dev_priv->mem_freq); 642 if (!latency) { 643 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n"); 644 intel_set_memory_cxsr(dev_priv, false); 645 return; 646 } 647 648 crtc = single_enabled_crtc(dev); 649 if (crtc) { 650 const struct drm_display_mode *adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; 651 int pixel_size = crtc->primary->state->fb->bits_per_pixel / 8; 652 int clock = adjusted_mode->crtc_clock; 653 654 /* Display SR */ 655 wm = intel_calculate_wm(clock, &pineview_display_wm, 656 pineview_display_wm.fifo_size, 657 pixel_size, latency->display_sr); 658 reg = I915_READ(DSPFW1); 659 reg &= ~DSPFW_SR_MASK; 660 reg |= FW_WM(wm, SR); 661 I915_WRITE(DSPFW1, reg); 662 DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg); 663 664 /* cursor SR */ 665 wm = intel_calculate_wm(clock, &pineview_cursor_wm, 666 pineview_display_wm.fifo_size, 667 pixel_size, latency->cursor_sr); 668 reg = I915_READ(DSPFW3); 669 reg &= ~DSPFW_CURSOR_SR_MASK; 670 reg |= FW_WM(wm, CURSOR_SR); 671 I915_WRITE(DSPFW3, reg); 672 673 /* Display HPLL off SR */ 674 wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm, 675 pineview_display_hplloff_wm.fifo_size, 676 pixel_size, latency->display_hpll_disable); 677 reg = I915_READ(DSPFW3); 678 reg &= ~DSPFW_HPLL_SR_MASK; 679 reg |= FW_WM(wm, HPLL_SR); 680 I915_WRITE(DSPFW3, reg); 681 682 /* cursor HPLL off SR */ 683 wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm, 684 pineview_display_hplloff_wm.fifo_size, 685 pixel_size, latency->cursor_hpll_disable); 686 reg = I915_READ(DSPFW3); 687 reg &= ~DSPFW_HPLL_CURSOR_MASK; 688 reg |= FW_WM(wm, HPLL_CURSOR); 689 I915_WRITE(DSPFW3, reg); 690 DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg); 691 692 intel_set_memory_cxsr(dev_priv, true); 693 } else { 694 intel_set_memory_cxsr(dev_priv, false); 695 } 696 } 697 698 static bool g4x_compute_wm0(struct drm_device *dev, 699 int plane, 700 const struct intel_watermark_params *display, 701 int display_latency_ns, 702 const struct intel_watermark_params *cursor, 703 int cursor_latency_ns, 704 int *plane_wm, 705 int *cursor_wm) 706 { 707 struct drm_crtc *crtc; 708 const struct drm_display_mode *adjusted_mode; 709 int htotal, hdisplay, clock, pixel_size; 710 int line_time_us, line_count; 711 int entries, tlb_miss; 712 713 crtc = intel_get_crtc_for_plane(dev, plane); 714 if (!intel_crtc_active(crtc)) { 715 *cursor_wm = cursor->guard_size; 716 *plane_wm = display->guard_size; 717 return false; 718 } 719 720 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; 721 clock = adjusted_mode->crtc_clock; 722 htotal = adjusted_mode->crtc_htotal; 723 hdisplay = to_intel_crtc(crtc)->config->pipe_src_w; 724 pixel_size = crtc->primary->state->fb->bits_per_pixel / 8; 725 726 /* Use the small buffer method to calculate plane watermark */ 727 entries = ((clock * pixel_size / 1000) * display_latency_ns) / 1000; 728 tlb_miss = display->fifo_size*display->cacheline_size - hdisplay * 8; 729 if (tlb_miss > 0) 730 entries += tlb_miss; 731 entries = DIV_ROUND_UP(entries, display->cacheline_size); 732 *plane_wm = entries + display->guard_size; 733 if (*plane_wm > (int)display->max_wm) 734 *plane_wm = display->max_wm; 735 736 /* Use the large buffer method to calculate cursor watermark */ 737 line_time_us = max(htotal * 1000 / clock, 1); 738 line_count = (cursor_latency_ns / line_time_us + 1000) / 1000; 739 entries = line_count * crtc->cursor->state->crtc_w * pixel_size; 740 tlb_miss = cursor->fifo_size*cursor->cacheline_size - hdisplay * 8; 741 if (tlb_miss > 0) 742 entries += tlb_miss; 743 entries = DIV_ROUND_UP(entries, cursor->cacheline_size); 744 *cursor_wm = entries + cursor->guard_size; 745 if (*cursor_wm > (int)cursor->max_wm) 746 *cursor_wm = (int)cursor->max_wm; 747 748 return true; 749 } 750 751 /* 752 * Check the wm result. 753 * 754 * If any calculated watermark values is larger than the maximum value that 755 * can be programmed into the associated watermark register, that watermark 756 * must be disabled. 757 */ 758 static bool g4x_check_srwm(struct drm_device *dev, 759 int display_wm, int cursor_wm, 760 const struct intel_watermark_params *display, 761 const struct intel_watermark_params *cursor) 762 { 763 DRM_DEBUG_KMS("SR watermark: display plane %d, cursor %d\n", 764 display_wm, cursor_wm); 765 766 if (display_wm > display->max_wm) { 767 DRM_DEBUG_KMS("display watermark is too large(%d/%ld), disabling\n", 768 display_wm, display->max_wm); 769 return false; 770 } 771 772 if (cursor_wm > cursor->max_wm) { 773 DRM_DEBUG_KMS("cursor watermark is too large(%d/%ld), disabling\n", 774 cursor_wm, cursor->max_wm); 775 return false; 776 } 777 778 if (!(display_wm || cursor_wm)) { 779 DRM_DEBUG_KMS("SR latency is 0, disabling\n"); 780 return false; 781 } 782 783 return true; 784 } 785 786 static bool g4x_compute_srwm(struct drm_device *dev, 787 int plane, 788 int latency_ns, 789 const struct intel_watermark_params *display, 790 const struct intel_watermark_params *cursor, 791 int *display_wm, int *cursor_wm) 792 { 793 struct drm_crtc *crtc; 794 const struct drm_display_mode *adjusted_mode; 795 int hdisplay, htotal, pixel_size, clock; 796 unsigned long line_time_us; 797 int line_count, line_size; 798 int small, large; 799 int entries; 800 801 if (!latency_ns) { 802 *display_wm = *cursor_wm = 0; 803 return false; 804 } 805 806 crtc = intel_get_crtc_for_plane(dev, plane); 807 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; 808 clock = adjusted_mode->crtc_clock; 809 htotal = adjusted_mode->crtc_htotal; 810 hdisplay = to_intel_crtc(crtc)->config->pipe_src_w; 811 pixel_size = crtc->primary->state->fb->bits_per_pixel / 8; 812 813 line_time_us = max(htotal * 1000 / clock, 1); 814 line_count = (latency_ns / line_time_us + 1000) / 1000; 815 line_size = hdisplay * pixel_size; 816 817 /* Use the minimum of the small and large buffer method for primary */ 818 small = ((clock * pixel_size / 1000) * latency_ns) / 1000; 819 large = line_count * line_size; 820 821 entries = DIV_ROUND_UP(min(small, large), display->cacheline_size); 822 *display_wm = entries + display->guard_size; 823 824 /* calculate the self-refresh watermark for display cursor */ 825 entries = line_count * pixel_size * crtc->cursor->state->crtc_w; 826 entries = DIV_ROUND_UP(entries, cursor->cacheline_size); 827 *cursor_wm = entries + cursor->guard_size; 828 829 return g4x_check_srwm(dev, 830 *display_wm, *cursor_wm, 831 display, cursor); 832 } 833 834 #define FW_WM_VLV(value, plane) \ 835 (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK_VLV) 836 837 static void vlv_write_wm_values(struct intel_crtc *crtc, 838 const struct vlv_wm_values *wm) 839 { 840 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); 841 enum pipe pipe = crtc->pipe; 842 843 I915_WRITE(VLV_DDL(pipe), 844 (wm->ddl[pipe].cursor << DDL_CURSOR_SHIFT) | 845 (wm->ddl[pipe].sprite[1] << DDL_SPRITE_SHIFT(1)) | 846 (wm->ddl[pipe].sprite[0] << DDL_SPRITE_SHIFT(0)) | 847 (wm->ddl[pipe].primary << DDL_PLANE_SHIFT)); 848 849 I915_WRITE(DSPFW1, 850 FW_WM(wm->sr.plane, SR) | 851 FW_WM(wm->pipe[PIPE_B].cursor, CURSORB) | 852 FW_WM_VLV(wm->pipe[PIPE_B].primary, PLANEB) | 853 FW_WM_VLV(wm->pipe[PIPE_A].primary, PLANEA)); 854 I915_WRITE(DSPFW2, 855 FW_WM_VLV(wm->pipe[PIPE_A].sprite[1], SPRITEB) | 856 FW_WM(wm->pipe[PIPE_A].cursor, CURSORA) | 857 FW_WM_VLV(wm->pipe[PIPE_A].sprite[0], SPRITEA)); 858 I915_WRITE(DSPFW3, 859 FW_WM(wm->sr.cursor, CURSOR_SR)); 860 861 if (IS_CHERRYVIEW(dev_priv)) { 862 I915_WRITE(DSPFW7_CHV, 863 FW_WM_VLV(wm->pipe[PIPE_B].sprite[1], SPRITED) | 864 FW_WM_VLV(wm->pipe[PIPE_B].sprite[0], SPRITEC)); 865 I915_WRITE(DSPFW8_CHV, 866 FW_WM_VLV(wm->pipe[PIPE_C].sprite[1], SPRITEF) | 867 FW_WM_VLV(wm->pipe[PIPE_C].sprite[0], SPRITEE)); 868 I915_WRITE(DSPFW9_CHV, 869 FW_WM_VLV(wm->pipe[PIPE_C].primary, PLANEC) | 870 FW_WM(wm->pipe[PIPE_C].cursor, CURSORC)); 871 I915_WRITE(DSPHOWM, 872 FW_WM(wm->sr.plane >> 9, SR_HI) | 873 FW_WM(wm->pipe[PIPE_C].sprite[1] >> 8, SPRITEF_HI) | 874 FW_WM(wm->pipe[PIPE_C].sprite[0] >> 8, SPRITEE_HI) | 875 FW_WM(wm->pipe[PIPE_C].primary >> 8, PLANEC_HI) | 876 FW_WM(wm->pipe[PIPE_B].sprite[1] >> 8, SPRITED_HI) | 877 FW_WM(wm->pipe[PIPE_B].sprite[0] >> 8, SPRITEC_HI) | 878 FW_WM(wm->pipe[PIPE_B].primary >> 8, PLANEB_HI) | 879 FW_WM(wm->pipe[PIPE_A].sprite[1] >> 8, SPRITEB_HI) | 880 FW_WM(wm->pipe[PIPE_A].sprite[0] >> 8, SPRITEA_HI) | 881 FW_WM(wm->pipe[PIPE_A].primary >> 8, PLANEA_HI)); 882 } else { 883 I915_WRITE(DSPFW7, 884 FW_WM_VLV(wm->pipe[PIPE_B].sprite[1], SPRITED) | 885 FW_WM_VLV(wm->pipe[PIPE_B].sprite[0], SPRITEC)); 886 I915_WRITE(DSPHOWM, 887 FW_WM(wm->sr.plane >> 9, SR_HI) | 888 FW_WM(wm->pipe[PIPE_B].sprite[1] >> 8, SPRITED_HI) | 889 FW_WM(wm->pipe[PIPE_B].sprite[0] >> 8, SPRITEC_HI) | 890 FW_WM(wm->pipe[PIPE_B].primary >> 8, PLANEB_HI) | 891 FW_WM(wm->pipe[PIPE_A].sprite[1] >> 8, SPRITEB_HI) | 892 FW_WM(wm->pipe[PIPE_A].sprite[0] >> 8, SPRITEA_HI) | 893 FW_WM(wm->pipe[PIPE_A].primary >> 8, PLANEA_HI)); 894 } 895 896 /* zero (unused) WM1 watermarks */ 897 I915_WRITE(DSPFW4, 0); 898 I915_WRITE(DSPFW5, 0); 899 I915_WRITE(DSPFW6, 0); 900 I915_WRITE(DSPHOWM1, 0); 901 902 POSTING_READ(DSPFW1); 903 } 904 905 #undef FW_WM_VLV 906 907 enum vlv_wm_level { 908 VLV_WM_LEVEL_PM2, 909 VLV_WM_LEVEL_PM5, 910 VLV_WM_LEVEL_DDR_DVFS, 911 }; 912 913 /* latency must be in 0.1us units. */ 914 static unsigned int vlv_wm_method2(unsigned int pixel_rate, 915 unsigned int pipe_htotal, 916 unsigned int horiz_pixels, 917 unsigned int bytes_per_pixel, 918 unsigned int latency) 919 { 920 unsigned int ret; 921 922 ret = (latency * pixel_rate) / (pipe_htotal * 10000); 923 ret = (ret + 1) * horiz_pixels * bytes_per_pixel; 924 ret = DIV_ROUND_UP(ret, 64); 925 926 return ret; 927 } 928 929 static void vlv_setup_wm_latency(struct drm_device *dev) 930 { 931 struct drm_i915_private *dev_priv = dev->dev_private; 932 933 /* all latencies in usec */ 934 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3; 935 936 dev_priv->wm.max_level = VLV_WM_LEVEL_PM2; 937 938 if (IS_CHERRYVIEW(dev_priv)) { 939 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12; 940 dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33; 941 942 dev_priv->wm.max_level = VLV_WM_LEVEL_DDR_DVFS; 943 } 944 } 945 946 static uint16_t vlv_compute_wm_level(struct intel_plane *plane, 947 struct intel_crtc *crtc, 948 const struct intel_plane_state *state, 949 int level) 950 { 951 struct drm_i915_private *dev_priv = to_i915(plane->base.dev); 952 int clock, htotal, pixel_size, width, wm; 953 954 if (dev_priv->wm.pri_latency[level] == 0) 955 return USHRT_MAX; 956 957 if (!state->visible) 958 return 0; 959 960 pixel_size = drm_format_plane_cpp(state->base.fb->pixel_format, 0); 961 clock = crtc->config->base.adjusted_mode.crtc_clock; 962 htotal = crtc->config->base.adjusted_mode.crtc_htotal; 963 width = crtc->config->pipe_src_w; 964 if (WARN_ON(htotal == 0)) 965 htotal = 1; 966 967 if (plane->base.type == DRM_PLANE_TYPE_CURSOR) { 968 /* 969 * FIXME the formula gives values that are 970 * too big for the cursor FIFO, and hence we 971 * would never be able to use cursors. For 972 * now just hardcode the watermark. 973 */ 974 wm = 63; 975 } else { 976 wm = vlv_wm_method2(clock, htotal, width, pixel_size, 977 dev_priv->wm.pri_latency[level] * 10); 978 } 979 980 return min_t(int, wm, USHRT_MAX); 981 } 982 983 static void vlv_compute_fifo(struct intel_crtc *crtc) 984 { 985 struct drm_device *dev = crtc->base.dev; 986 struct vlv_wm_state *wm_state = &crtc->wm_state; 987 struct intel_plane *plane; 988 unsigned int total_rate = 0; 989 const int fifo_size = 512 - 1; 990 int fifo_extra, fifo_left = fifo_size; 991 992 for_each_intel_plane_on_crtc(dev, crtc, plane) { 993 struct intel_plane_state *state = 994 to_intel_plane_state(plane->base.state); 995 996 if (plane->base.type == DRM_PLANE_TYPE_CURSOR) 997 continue; 998 999 if (state->visible) { 1000 wm_state->num_active_planes++; 1001 total_rate += drm_format_plane_cpp(state->base.fb->pixel_format, 0); 1002 } 1003 } 1004 1005 for_each_intel_plane_on_crtc(dev, crtc, plane) { 1006 struct intel_plane_state *state = 1007 to_intel_plane_state(plane->base.state); 1008 unsigned int rate; 1009 1010 if (plane->base.type == DRM_PLANE_TYPE_CURSOR) { 1011 plane->wm.fifo_size = 63; 1012 continue; 1013 } 1014 1015 if (!state->visible) { 1016 plane->wm.fifo_size = 0; 1017 continue; 1018 } 1019 1020 rate = drm_format_plane_cpp(state->base.fb->pixel_format, 0); 1021 plane->wm.fifo_size = fifo_size * rate / total_rate; 1022 fifo_left -= plane->wm.fifo_size; 1023 } 1024 1025 fifo_extra = DIV_ROUND_UP(fifo_left, wm_state->num_active_planes ?: 1); 1026 1027 /* spread the remainder evenly */ 1028 for_each_intel_plane_on_crtc(dev, crtc, plane) { 1029 int plane_extra; 1030 1031 if (fifo_left == 0) 1032 break; 1033 1034 if (plane->base.type == DRM_PLANE_TYPE_CURSOR) 1035 continue; 1036 1037 /* give it all to the first plane if none are active */ 1038 if (plane->wm.fifo_size == 0 && 1039 wm_state->num_active_planes) 1040 continue; 1041 1042 plane_extra = min(fifo_extra, fifo_left); 1043 plane->wm.fifo_size += plane_extra; 1044 fifo_left -= plane_extra; 1045 } 1046 1047 WARN_ON(fifo_left != 0); 1048 } 1049 1050 static void vlv_invert_wms(struct intel_crtc *crtc) 1051 { 1052 struct vlv_wm_state *wm_state = &crtc->wm_state; 1053 int level; 1054 1055 for (level = 0; level < wm_state->num_levels; level++) { 1056 struct drm_device *dev = crtc->base.dev; 1057 const int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1; 1058 struct intel_plane *plane; 1059 1060 wm_state->sr[level].plane = sr_fifo_size - wm_state->sr[level].plane; 1061 wm_state->sr[level].cursor = 63 - wm_state->sr[level].cursor; 1062 1063 for_each_intel_plane_on_crtc(dev, crtc, plane) { 1064 switch (plane->base.type) { 1065 int sprite; 1066 case DRM_PLANE_TYPE_CURSOR: 1067 wm_state->wm[level].cursor = plane->wm.fifo_size - 1068 wm_state->wm[level].cursor; 1069 break; 1070 case DRM_PLANE_TYPE_PRIMARY: 1071 wm_state->wm[level].primary = plane->wm.fifo_size - 1072 wm_state->wm[level].primary; 1073 break; 1074 case DRM_PLANE_TYPE_OVERLAY: 1075 sprite = plane->plane; 1076 wm_state->wm[level].sprite[sprite] = plane->wm.fifo_size - 1077 wm_state->wm[level].sprite[sprite]; 1078 break; 1079 } 1080 } 1081 } 1082 } 1083 1084 static void vlv_compute_wm(struct intel_crtc *crtc) 1085 { 1086 struct drm_device *dev = crtc->base.dev; 1087 struct vlv_wm_state *wm_state = &crtc->wm_state; 1088 struct intel_plane *plane; 1089 int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1; 1090 int level; 1091 1092 memset(wm_state, 0, sizeof(*wm_state)); 1093 1094 wm_state->cxsr = crtc->pipe != PIPE_C && crtc->wm.cxsr_allowed; 1095 wm_state->num_levels = to_i915(dev)->wm.max_level + 1; 1096 1097 wm_state->num_active_planes = 0; 1098 1099 vlv_compute_fifo(crtc); 1100 1101 if (wm_state->num_active_planes != 1) 1102 wm_state->cxsr = false; 1103 1104 if (wm_state->cxsr) { 1105 for (level = 0; level < wm_state->num_levels; level++) { 1106 wm_state->sr[level].plane = sr_fifo_size; 1107 wm_state->sr[level].cursor = 63; 1108 } 1109 } 1110 1111 for_each_intel_plane_on_crtc(dev, crtc, plane) { 1112 struct intel_plane_state *state = 1113 to_intel_plane_state(plane->base.state); 1114 1115 if (!state->visible) 1116 continue; 1117 1118 /* normal watermarks */ 1119 for (level = 0; level < wm_state->num_levels; level++) { 1120 int wm = vlv_compute_wm_level(plane, crtc, state, level); 1121 int max_wm = plane->base.type == DRM_PLANE_TYPE_CURSOR ? 63 : 511; 1122 1123 /* hack */ 1124 if (WARN_ON(level == 0 && wm > max_wm)) 1125 wm = max_wm; 1126 1127 if (wm > plane->wm.fifo_size) 1128 break; 1129 1130 switch (plane->base.type) { 1131 int sprite; 1132 case DRM_PLANE_TYPE_CURSOR: 1133 wm_state->wm[level].cursor = wm; 1134 break; 1135 case DRM_PLANE_TYPE_PRIMARY: 1136 wm_state->wm[level].primary = wm; 1137 break; 1138 case DRM_PLANE_TYPE_OVERLAY: 1139 sprite = plane->plane; 1140 wm_state->wm[level].sprite[sprite] = wm; 1141 break; 1142 } 1143 } 1144 1145 wm_state->num_levels = level; 1146 1147 if (!wm_state->cxsr) 1148 continue; 1149 1150 /* maxfifo watermarks */ 1151 switch (plane->base.type) { 1152 int sprite, level; 1153 case DRM_PLANE_TYPE_CURSOR: 1154 for (level = 0; level < wm_state->num_levels; level++) 1155 wm_state->sr[level].cursor = 1156 wm_state->wm[level].cursor; 1157 break; 1158 case DRM_PLANE_TYPE_PRIMARY: 1159 for (level = 0; level < wm_state->num_levels; level++) 1160 wm_state->sr[level].plane = 1161 min(wm_state->sr[level].plane, 1162 wm_state->wm[level].primary); 1163 break; 1164 case DRM_PLANE_TYPE_OVERLAY: 1165 sprite = plane->plane; 1166 for (level = 0; level < wm_state->num_levels; level++) 1167 wm_state->sr[level].plane = 1168 min(wm_state->sr[level].plane, 1169 wm_state->wm[level].sprite[sprite]); 1170 break; 1171 } 1172 } 1173 1174 /* clear any (partially) filled invalid levels */ 1175 for (level = wm_state->num_levels; level < to_i915(dev)->wm.max_level + 1; level++) { 1176 memset(&wm_state->wm[level], 0, sizeof(wm_state->wm[level])); 1177 memset(&wm_state->sr[level], 0, sizeof(wm_state->sr[level])); 1178 } 1179 1180 vlv_invert_wms(crtc); 1181 } 1182 1183 #define VLV_FIFO(plane, value) \ 1184 (((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV) 1185 1186 static void vlv_pipe_set_fifo_size(struct intel_crtc *crtc) 1187 { 1188 struct drm_device *dev = crtc->base.dev; 1189 struct drm_i915_private *dev_priv = to_i915(dev); 1190 struct intel_plane *plane; 1191 int sprite0_start = 0, sprite1_start = 0, fifo_size = 0; 1192 1193 for_each_intel_plane_on_crtc(dev, crtc, plane) { 1194 if (plane->base.type == DRM_PLANE_TYPE_CURSOR) { 1195 WARN_ON(plane->wm.fifo_size != 63); 1196 continue; 1197 } 1198 1199 if (plane->base.type == DRM_PLANE_TYPE_PRIMARY) 1200 sprite0_start = plane->wm.fifo_size; 1201 else if (plane->plane == 0) 1202 sprite1_start = sprite0_start + plane->wm.fifo_size; 1203 else 1204 fifo_size = sprite1_start + plane->wm.fifo_size; 1205 } 1206 1207 WARN_ON(fifo_size != 512 - 1); 1208 1209 DRM_DEBUG_KMS("Pipe %c FIFO split %d / %d / %d\n", 1210 pipe_name(crtc->pipe), sprite0_start, 1211 sprite1_start, fifo_size); 1212 1213 switch (crtc->pipe) { 1214 uint32_t dsparb, dsparb2, dsparb3; 1215 case PIPE_A: 1216 dsparb = I915_READ(DSPARB); 1217 dsparb2 = I915_READ(DSPARB2); 1218 1219 dsparb &= ~(VLV_FIFO(SPRITEA, 0xff) | 1220 VLV_FIFO(SPRITEB, 0xff)); 1221 dsparb |= (VLV_FIFO(SPRITEA, sprite0_start) | 1222 VLV_FIFO(SPRITEB, sprite1_start)); 1223 1224 dsparb2 &= ~(VLV_FIFO(SPRITEA_HI, 0x1) | 1225 VLV_FIFO(SPRITEB_HI, 0x1)); 1226 dsparb2 |= (VLV_FIFO(SPRITEA_HI, sprite0_start >> 8) | 1227 VLV_FIFO(SPRITEB_HI, sprite1_start >> 8)); 1228 1229 I915_WRITE(DSPARB, dsparb); 1230 I915_WRITE(DSPARB2, dsparb2); 1231 break; 1232 case PIPE_B: 1233 dsparb = I915_READ(DSPARB); 1234 dsparb2 = I915_READ(DSPARB2); 1235 1236 dsparb &= ~(VLV_FIFO(SPRITEC, 0xff) | 1237 VLV_FIFO(SPRITED, 0xff)); 1238 dsparb |= (VLV_FIFO(SPRITEC, sprite0_start) | 1239 VLV_FIFO(SPRITED, sprite1_start)); 1240 1241 dsparb2 &= ~(VLV_FIFO(SPRITEC_HI, 0xff) | 1242 VLV_FIFO(SPRITED_HI, 0xff)); 1243 dsparb2 |= (VLV_FIFO(SPRITEC_HI, sprite0_start >> 8) | 1244 VLV_FIFO(SPRITED_HI, sprite1_start >> 8)); 1245 1246 I915_WRITE(DSPARB, dsparb); 1247 I915_WRITE(DSPARB2, dsparb2); 1248 break; 1249 case PIPE_C: 1250 dsparb3 = I915_READ(DSPARB3); 1251 dsparb2 = I915_READ(DSPARB2); 1252 1253 dsparb3 &= ~(VLV_FIFO(SPRITEE, 0xff) | 1254 VLV_FIFO(SPRITEF, 0xff)); 1255 dsparb3 |= (VLV_FIFO(SPRITEE, sprite0_start) | 1256 VLV_FIFO(SPRITEF, sprite1_start)); 1257 1258 dsparb2 &= ~(VLV_FIFO(SPRITEE_HI, 0xff) | 1259 VLV_FIFO(SPRITEF_HI, 0xff)); 1260 dsparb2 |= (VLV_FIFO(SPRITEE_HI, sprite0_start >> 8) | 1261 VLV_FIFO(SPRITEF_HI, sprite1_start >> 8)); 1262 1263 I915_WRITE(DSPARB3, dsparb3); 1264 I915_WRITE(DSPARB2, dsparb2); 1265 break; 1266 default: 1267 break; 1268 } 1269 } 1270 1271 #undef VLV_FIFO 1272 1273 static void vlv_merge_wm(struct drm_device *dev, 1274 struct vlv_wm_values *wm) 1275 { 1276 struct intel_crtc *crtc; 1277 int num_active_crtcs = 0; 1278 1279 wm->level = to_i915(dev)->wm.max_level; 1280 wm->cxsr = true; 1281 1282 for_each_intel_crtc(dev, crtc) { 1283 const struct vlv_wm_state *wm_state = &crtc->wm_state; 1284 1285 if (!crtc->active) 1286 continue; 1287 1288 if (!wm_state->cxsr) 1289 wm->cxsr = false; 1290 1291 num_active_crtcs++; 1292 wm->level = min_t(int, wm->level, wm_state->num_levels - 1); 1293 } 1294 1295 if (num_active_crtcs != 1) 1296 wm->cxsr = false; 1297 1298 if (num_active_crtcs > 1) 1299 wm->level = VLV_WM_LEVEL_PM2; 1300 1301 for_each_intel_crtc(dev, crtc) { 1302 struct vlv_wm_state *wm_state = &crtc->wm_state; 1303 enum pipe pipe = crtc->pipe; 1304 1305 if (!crtc->active) 1306 continue; 1307 1308 wm->pipe[pipe] = wm_state->wm[wm->level]; 1309 if (wm->cxsr) 1310 wm->sr = wm_state->sr[wm->level]; 1311 1312 wm->ddl[pipe].primary = DDL_PRECISION_HIGH | 2; 1313 wm->ddl[pipe].sprite[0] = DDL_PRECISION_HIGH | 2; 1314 wm->ddl[pipe].sprite[1] = DDL_PRECISION_HIGH | 2; 1315 wm->ddl[pipe].cursor = DDL_PRECISION_HIGH | 2; 1316 } 1317 } 1318 1319 static void vlv_update_wm(struct drm_crtc *crtc) 1320 { 1321 struct drm_device *dev = crtc->dev; 1322 struct drm_i915_private *dev_priv = dev->dev_private; 1323 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 1324 enum pipe pipe = intel_crtc->pipe; 1325 struct vlv_wm_values wm = {}; 1326 1327 vlv_compute_wm(intel_crtc); 1328 vlv_merge_wm(dev, &wm); 1329 1330 if (memcmp(&dev_priv->wm.vlv, &wm, sizeof(wm)) == 0) { 1331 /* FIXME should be part of crtc atomic commit */ 1332 vlv_pipe_set_fifo_size(intel_crtc); 1333 return; 1334 } 1335 1336 if (wm.level < VLV_WM_LEVEL_DDR_DVFS && 1337 dev_priv->wm.vlv.level >= VLV_WM_LEVEL_DDR_DVFS) 1338 chv_set_memory_dvfs(dev_priv, false); 1339 1340 if (wm.level < VLV_WM_LEVEL_PM5 && 1341 dev_priv->wm.vlv.level >= VLV_WM_LEVEL_PM5) 1342 chv_set_memory_pm5(dev_priv, false); 1343 1344 if (!wm.cxsr && dev_priv->wm.vlv.cxsr) 1345 intel_set_memory_cxsr(dev_priv, false); 1346 1347 /* FIXME should be part of crtc atomic commit */ 1348 vlv_pipe_set_fifo_size(intel_crtc); 1349 1350 vlv_write_wm_values(intel_crtc, &wm); 1351 1352 DRM_DEBUG_KMS("Setting FIFO watermarks - %c: plane=%d, cursor=%d, " 1353 "sprite0=%d, sprite1=%d, SR: plane=%d, cursor=%d level=%d cxsr=%d\n", 1354 pipe_name(pipe), wm.pipe[pipe].primary, wm.pipe[pipe].cursor, 1355 wm.pipe[pipe].sprite[0], wm.pipe[pipe].sprite[1], 1356 wm.sr.plane, wm.sr.cursor, wm.level, wm.cxsr); 1357 1358 if (wm.cxsr && !dev_priv->wm.vlv.cxsr) 1359 intel_set_memory_cxsr(dev_priv, true); 1360 1361 if (wm.level >= VLV_WM_LEVEL_PM5 && 1362 dev_priv->wm.vlv.level < VLV_WM_LEVEL_PM5) 1363 chv_set_memory_pm5(dev_priv, true); 1364 1365 if (wm.level >= VLV_WM_LEVEL_DDR_DVFS && 1366 dev_priv->wm.vlv.level < VLV_WM_LEVEL_DDR_DVFS) 1367 chv_set_memory_dvfs(dev_priv, true); 1368 1369 dev_priv->wm.vlv = wm; 1370 } 1371 1372 #define single_plane_enabled(mask) is_power_of_2(mask) 1373 1374 static void g4x_update_wm(struct drm_crtc *crtc) 1375 { 1376 struct drm_device *dev = crtc->dev; 1377 static const int sr_latency_ns = 12000; 1378 struct drm_i915_private *dev_priv = dev->dev_private; 1379 int planea_wm, planeb_wm, cursora_wm, cursorb_wm; 1380 int plane_sr, cursor_sr; 1381 unsigned int enabled = 0; 1382 bool cxsr_enabled; 1383 1384 if (g4x_compute_wm0(dev, PIPE_A, 1385 &g4x_wm_info, pessimal_latency_ns, 1386 &g4x_cursor_wm_info, pessimal_latency_ns, 1387 &planea_wm, &cursora_wm)) 1388 enabled |= 1 << PIPE_A; 1389 1390 if (g4x_compute_wm0(dev, PIPE_B, 1391 &g4x_wm_info, pessimal_latency_ns, 1392 &g4x_cursor_wm_info, pessimal_latency_ns, 1393 &planeb_wm, &cursorb_wm)) 1394 enabled |= 1 << PIPE_B; 1395 1396 if (single_plane_enabled(enabled) && 1397 g4x_compute_srwm(dev, ffs(enabled) - 1, 1398 sr_latency_ns, 1399 &g4x_wm_info, 1400 &g4x_cursor_wm_info, 1401 &plane_sr, &cursor_sr)) { 1402 cxsr_enabled = true; 1403 } else { 1404 cxsr_enabled = false; 1405 intel_set_memory_cxsr(dev_priv, false); 1406 plane_sr = cursor_sr = 0; 1407 } 1408 1409 DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, " 1410 "B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n", 1411 planea_wm, cursora_wm, 1412 planeb_wm, cursorb_wm, 1413 plane_sr, cursor_sr); 1414 1415 I915_WRITE(DSPFW1, 1416 FW_WM(plane_sr, SR) | 1417 FW_WM(cursorb_wm, CURSORB) | 1418 FW_WM(planeb_wm, PLANEB) | 1419 FW_WM(planea_wm, PLANEA)); 1420 I915_WRITE(DSPFW2, 1421 (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) | 1422 FW_WM(cursora_wm, CURSORA)); 1423 /* HPLL off in SR has some issues on G4x... disable it */ 1424 I915_WRITE(DSPFW3, 1425 (I915_READ(DSPFW3) & ~(DSPFW_HPLL_SR_EN | DSPFW_CURSOR_SR_MASK)) | 1426 FW_WM(cursor_sr, CURSOR_SR)); 1427 1428 if (cxsr_enabled) 1429 intel_set_memory_cxsr(dev_priv, true); 1430 } 1431 1432 static void i965_update_wm(struct drm_crtc *unused_crtc) 1433 { 1434 struct drm_device *dev = unused_crtc->dev; 1435 struct drm_i915_private *dev_priv = dev->dev_private; 1436 struct drm_crtc *crtc; 1437 int srwm = 1; 1438 int cursor_sr = 16; 1439 bool cxsr_enabled; 1440 1441 /* Calc sr entries for one plane configs */ 1442 crtc = single_enabled_crtc(dev); 1443 if (crtc) { 1444 /* self-refresh has much higher latency */ 1445 static const int sr_latency_ns = 12000; 1446 const struct drm_display_mode *adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; 1447 int clock = adjusted_mode->crtc_clock; 1448 int htotal = adjusted_mode->crtc_htotal; 1449 int hdisplay = to_intel_crtc(crtc)->config->pipe_src_w; 1450 int pixel_size = crtc->primary->state->fb->bits_per_pixel / 8; 1451 unsigned long line_time_us; 1452 int entries; 1453 1454 line_time_us = max(htotal * 1000 / clock, 1); 1455 1456 /* Use ns/us then divide to preserve precision */ 1457 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) * 1458 pixel_size * hdisplay; 1459 entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE); 1460 srwm = I965_FIFO_SIZE - entries; 1461 if (srwm < 0) 1462 srwm = 1; 1463 srwm &= 0x1ff; 1464 DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n", 1465 entries, srwm); 1466 1467 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) * 1468 pixel_size * crtc->cursor->state->crtc_w; 1469 entries = DIV_ROUND_UP(entries, 1470 i965_cursor_wm_info.cacheline_size); 1471 cursor_sr = i965_cursor_wm_info.fifo_size - 1472 (entries + i965_cursor_wm_info.guard_size); 1473 1474 if (cursor_sr > i965_cursor_wm_info.max_wm) 1475 cursor_sr = i965_cursor_wm_info.max_wm; 1476 1477 DRM_DEBUG_KMS("self-refresh watermark: display plane %d " 1478 "cursor %d\n", srwm, cursor_sr); 1479 1480 cxsr_enabled = true; 1481 } else { 1482 cxsr_enabled = false; 1483 /* Turn off self refresh if both pipes are enabled */ 1484 intel_set_memory_cxsr(dev_priv, false); 1485 } 1486 1487 DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n", 1488 srwm); 1489 1490 /* 965 has limitations... */ 1491 I915_WRITE(DSPFW1, FW_WM(srwm, SR) | 1492 FW_WM(8, CURSORB) | 1493 FW_WM(8, PLANEB) | 1494 FW_WM(8, PLANEA)); 1495 I915_WRITE(DSPFW2, FW_WM(8, CURSORA) | 1496 FW_WM(8, PLANEC_OLD)); 1497 /* update cursor SR watermark */ 1498 I915_WRITE(DSPFW3, FW_WM(cursor_sr, CURSOR_SR)); 1499 1500 if (cxsr_enabled) 1501 intel_set_memory_cxsr(dev_priv, true); 1502 } 1503 1504 #undef FW_WM 1505 1506 static void i9xx_update_wm(struct drm_crtc *unused_crtc) 1507 { 1508 struct drm_device *dev = unused_crtc->dev; 1509 struct drm_i915_private *dev_priv = dev->dev_private; 1510 const struct intel_watermark_params *wm_info; 1511 uint32_t fwater_lo; 1512 uint32_t fwater_hi; 1513 int cwm, srwm = 1; 1514 int fifo_size; 1515 int planea_wm, planeb_wm; 1516 struct drm_crtc *crtc, *enabled = NULL; 1517 1518 if (IS_I945GM(dev)) 1519 wm_info = &i945_wm_info; 1520 else if (!IS_GEN2(dev)) 1521 wm_info = &i915_wm_info; 1522 else 1523 wm_info = &i830_a_wm_info; 1524 1525 fifo_size = dev_priv->display.get_fifo_size(dev, 0); 1526 crtc = intel_get_crtc_for_plane(dev, 0); 1527 if (intel_crtc_active(crtc)) { 1528 const struct drm_display_mode *adjusted_mode; 1529 int cpp = crtc->primary->state->fb->bits_per_pixel / 8; 1530 if (IS_GEN2(dev)) 1531 cpp = 4; 1532 1533 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; 1534 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock, 1535 wm_info, fifo_size, cpp, 1536 pessimal_latency_ns); 1537 enabled = crtc; 1538 } else { 1539 planea_wm = fifo_size - wm_info->guard_size; 1540 if (planea_wm > (long)wm_info->max_wm) 1541 planea_wm = wm_info->max_wm; 1542 } 1543 1544 if (IS_GEN2(dev)) 1545 wm_info = &i830_bc_wm_info; 1546 1547 fifo_size = dev_priv->display.get_fifo_size(dev, 1); 1548 crtc = intel_get_crtc_for_plane(dev, 1); 1549 if (intel_crtc_active(crtc)) { 1550 const struct drm_display_mode *adjusted_mode; 1551 int cpp = crtc->primary->state->fb->bits_per_pixel / 8; 1552 if (IS_GEN2(dev)) 1553 cpp = 4; 1554 1555 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; 1556 planeb_wm = intel_calculate_wm(adjusted_mode->crtc_clock, 1557 wm_info, fifo_size, cpp, 1558 pessimal_latency_ns); 1559 if (enabled == NULL) 1560 enabled = crtc; 1561 else 1562 enabled = NULL; 1563 } else { 1564 planeb_wm = fifo_size - wm_info->guard_size; 1565 if (planeb_wm > (long)wm_info->max_wm) 1566 planeb_wm = wm_info->max_wm; 1567 } 1568 1569 DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm); 1570 1571 if (IS_I915GM(dev) && enabled) { 1572 struct drm_i915_gem_object *obj; 1573 1574 obj = intel_fb_obj(enabled->primary->state->fb); 1575 1576 /* self-refresh seems busted with untiled */ 1577 if (obj->tiling_mode == I915_TILING_NONE) 1578 enabled = NULL; 1579 } 1580 1581 /* 1582 * Overlay gets an aggressive default since video jitter is bad. 1583 */ 1584 cwm = 2; 1585 1586 /* Play safe and disable self-refresh before adjusting watermarks. */ 1587 intel_set_memory_cxsr(dev_priv, false); 1588 1589 /* Calc sr entries for one plane configs */ 1590 if (HAS_FW_BLC(dev) && enabled) { 1591 /* self-refresh has much higher latency */ 1592 static const int sr_latency_ns = 6000; 1593 const struct drm_display_mode *adjusted_mode = &to_intel_crtc(enabled)->config->base.adjusted_mode; 1594 int clock = adjusted_mode->crtc_clock; 1595 int htotal = adjusted_mode->crtc_htotal; 1596 int hdisplay = to_intel_crtc(enabled)->config->pipe_src_w; 1597 int pixel_size = enabled->primary->state->fb->bits_per_pixel / 8; 1598 unsigned long line_time_us; 1599 int entries; 1600 1601 line_time_us = max(htotal * 1000 / clock, 1); 1602 1603 /* Use ns/us then divide to preserve precision */ 1604 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) * 1605 pixel_size * hdisplay; 1606 entries = DIV_ROUND_UP(entries, wm_info->cacheline_size); 1607 DRM_DEBUG_KMS("self-refresh entries: %d\n", entries); 1608 srwm = wm_info->fifo_size - entries; 1609 if (srwm < 0) 1610 srwm = 1; 1611 1612 if (IS_I945G(dev) || IS_I945GM(dev)) 1613 I915_WRITE(FW_BLC_SELF, 1614 FW_BLC_SELF_FIFO_MASK | (srwm & 0xff)); 1615 else if (IS_I915GM(dev)) 1616 I915_WRITE(FW_BLC_SELF, srwm & 0x3f); 1617 } 1618 1619 DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n", 1620 planea_wm, planeb_wm, cwm, srwm); 1621 1622 fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f); 1623 fwater_hi = (cwm & 0x1f); 1624 1625 /* Set request length to 8 cachelines per fetch */ 1626 fwater_lo = fwater_lo | (1 << 24) | (1 << 8); 1627 fwater_hi = fwater_hi | (1 << 8); 1628 1629 I915_WRITE(FW_BLC, fwater_lo); 1630 I915_WRITE(FW_BLC2, fwater_hi); 1631 1632 if (enabled) 1633 intel_set_memory_cxsr(dev_priv, true); 1634 } 1635 1636 static void i845_update_wm(struct drm_crtc *unused_crtc) 1637 { 1638 struct drm_device *dev = unused_crtc->dev; 1639 struct drm_i915_private *dev_priv = dev->dev_private; 1640 struct drm_crtc *crtc; 1641 const struct drm_display_mode *adjusted_mode; 1642 uint32_t fwater_lo; 1643 int planea_wm; 1644 1645 crtc = single_enabled_crtc(dev); 1646 if (crtc == NULL) 1647 return; 1648 1649 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; 1650 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock, 1651 &i845_wm_info, 1652 dev_priv->display.get_fifo_size(dev, 0), 1653 4, pessimal_latency_ns); 1654 fwater_lo = I915_READ(FW_BLC) & ~0xfff; 1655 fwater_lo |= (3<<8) | planea_wm; 1656 1657 DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm); 1658 1659 I915_WRITE(FW_BLC, fwater_lo); 1660 } 1661 1662 uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config) 1663 { 1664 uint32_t pixel_rate; 1665 1666 pixel_rate = pipe_config->base.adjusted_mode.crtc_clock; 1667 1668 /* We only use IF-ID interlacing. If we ever use PF-ID we'll need to 1669 * adjust the pixel_rate here. */ 1670 1671 if (pipe_config->pch_pfit.enabled) { 1672 uint64_t pipe_w, pipe_h, pfit_w, pfit_h; 1673 uint32_t pfit_size = pipe_config->pch_pfit.size; 1674 1675 pipe_w = pipe_config->pipe_src_w; 1676 pipe_h = pipe_config->pipe_src_h; 1677 1678 pfit_w = (pfit_size >> 16) & 0xFFFF; 1679 pfit_h = pfit_size & 0xFFFF; 1680 if (pipe_w < pfit_w) 1681 pipe_w = pfit_w; 1682 if (pipe_h < pfit_h) 1683 pipe_h = pfit_h; 1684 1685 pixel_rate = div_u64((uint64_t) pixel_rate * pipe_w * pipe_h, 1686 pfit_w * pfit_h); 1687 } 1688 1689 return pixel_rate; 1690 } 1691 1692 /* latency must be in 0.1us units. */ 1693 static uint32_t ilk_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel, 1694 uint32_t latency) 1695 { 1696 uint64_t ret; 1697 1698 if (WARN(latency == 0, "Latency value missing\n")) 1699 return UINT_MAX; 1700 1701 ret = (uint64_t) pixel_rate * bytes_per_pixel * latency; 1702 ret = DIV_ROUND_UP_ULL(ret, 64 * 10000) + 2; 1703 1704 return ret; 1705 } 1706 1707 /* latency must be in 0.1us units. */ 1708 static uint32_t ilk_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal, 1709 uint32_t horiz_pixels, uint8_t bytes_per_pixel, 1710 uint32_t latency) 1711 { 1712 uint32_t ret; 1713 1714 if (WARN(latency == 0, "Latency value missing\n")) 1715 return UINT_MAX; 1716 1717 ret = (latency * pixel_rate) / (pipe_htotal * 10000); 1718 ret = (ret + 1) * horiz_pixels * bytes_per_pixel; 1719 ret = DIV_ROUND_UP(ret, 64) + 2; 1720 return ret; 1721 } 1722 1723 static uint32_t ilk_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels, 1724 uint8_t bytes_per_pixel) 1725 { 1726 return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2; 1727 } 1728 1729 struct skl_pipe_wm_parameters { 1730 bool active; 1731 uint32_t pipe_htotal; 1732 uint32_t pixel_rate; /* in KHz */ 1733 struct intel_plane_wm_parameters plane[I915_MAX_PLANES]; 1734 }; 1735 1736 struct ilk_wm_maximums { 1737 uint16_t pri; 1738 uint16_t spr; 1739 uint16_t cur; 1740 uint16_t fbc; 1741 }; 1742 1743 /* used in computing the new watermarks state */ 1744 struct intel_wm_config { 1745 unsigned int num_pipes_active; 1746 bool sprites_enabled; 1747 bool sprites_scaled; 1748 }; 1749 1750 /* 1751 * For both WM_PIPE and WM_LP. 1752 * mem_value must be in 0.1us units. 1753 */ 1754 static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate, 1755 const struct intel_plane_state *pstate, 1756 uint32_t mem_value, 1757 bool is_lp) 1758 { 1759 int bpp = pstate->base.fb ? pstate->base.fb->bits_per_pixel / 8 : 0; 1760 uint32_t method1, method2; 1761 1762 if (!cstate->base.active || !pstate->visible) 1763 return 0; 1764 1765 method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), bpp, mem_value); 1766 1767 if (!is_lp) 1768 return method1; 1769 1770 method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate), 1771 cstate->base.adjusted_mode.crtc_htotal, 1772 drm_rect_width(&pstate->dst), 1773 bpp, 1774 mem_value); 1775 1776 return min(method1, method2); 1777 } 1778 1779 /* 1780 * For both WM_PIPE and WM_LP. 1781 * mem_value must be in 0.1us units. 1782 */ 1783 static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate, 1784 const struct intel_plane_state *pstate, 1785 uint32_t mem_value) 1786 { 1787 int bpp = pstate->base.fb ? pstate->base.fb->bits_per_pixel / 8 : 0; 1788 uint32_t method1, method2; 1789 1790 if (!cstate->base.active || !pstate->visible) 1791 return 0; 1792 1793 method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), bpp, mem_value); 1794 method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate), 1795 cstate->base.adjusted_mode.crtc_htotal, 1796 drm_rect_width(&pstate->dst), 1797 bpp, 1798 mem_value); 1799 return min(method1, method2); 1800 } 1801 1802 /* 1803 * For both WM_PIPE and WM_LP. 1804 * mem_value must be in 0.1us units. 1805 */ 1806 static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate, 1807 const struct intel_plane_state *pstate, 1808 uint32_t mem_value) 1809 { 1810 /* 1811 * We treat the cursor plane as always-on for the purposes of watermark 1812 * calculation. Until we have two-stage watermark programming merged, 1813 * this is necessary to avoid flickering. 1814 */ 1815 int cpp = 4; 1816 int width = pstate->visible ? pstate->base.crtc_w : 64; 1817 1818 if (!cstate->base.active) 1819 return 0; 1820 1821 return ilk_wm_method2(ilk_pipe_pixel_rate(cstate), 1822 cstate->base.adjusted_mode.crtc_htotal, 1823 width, cpp, mem_value); 1824 } 1825 1826 /* Only for WM_LP. */ 1827 static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate, 1828 const struct intel_plane_state *pstate, 1829 uint32_t pri_val) 1830 { 1831 int bpp = pstate->base.fb ? pstate->base.fb->bits_per_pixel / 8 : 0; 1832 1833 if (!cstate->base.active || !pstate->visible) 1834 return 0; 1835 1836 return ilk_wm_fbc(pri_val, drm_rect_width(&pstate->dst), bpp); 1837 } 1838 1839 static unsigned int ilk_display_fifo_size(const struct drm_device *dev) 1840 { 1841 if (INTEL_INFO(dev)->gen >= 8) 1842 return 3072; 1843 else if (INTEL_INFO(dev)->gen >= 7) 1844 return 768; 1845 else 1846 return 512; 1847 } 1848 1849 static unsigned int ilk_plane_wm_reg_max(const struct drm_device *dev, 1850 int level, bool is_sprite) 1851 { 1852 if (INTEL_INFO(dev)->gen >= 8) 1853 /* BDW primary/sprite plane watermarks */ 1854 return level == 0 ? 255 : 2047; 1855 else if (INTEL_INFO(dev)->gen >= 7) 1856 /* IVB/HSW primary/sprite plane watermarks */ 1857 return level == 0 ? 127 : 1023; 1858 else if (!is_sprite) 1859 /* ILK/SNB primary plane watermarks */ 1860 return level == 0 ? 127 : 511; 1861 else 1862 /* ILK/SNB sprite plane watermarks */ 1863 return level == 0 ? 63 : 255; 1864 } 1865 1866 static unsigned int ilk_cursor_wm_reg_max(const struct drm_device *dev, 1867 int level) 1868 { 1869 if (INTEL_INFO(dev)->gen >= 7) 1870 return level == 0 ? 63 : 255; 1871 else 1872 return level == 0 ? 31 : 63; 1873 } 1874 1875 static unsigned int ilk_fbc_wm_reg_max(const struct drm_device *dev) 1876 { 1877 if (INTEL_INFO(dev)->gen >= 8) 1878 return 31; 1879 else 1880 return 15; 1881 } 1882 1883 /* Calculate the maximum primary/sprite plane watermark */ 1884 static unsigned int ilk_plane_wm_max(const struct drm_device *dev, 1885 int level, 1886 const struct intel_wm_config *config, 1887 enum intel_ddb_partitioning ddb_partitioning, 1888 bool is_sprite) 1889 { 1890 unsigned int fifo_size = ilk_display_fifo_size(dev); 1891 1892 /* if sprites aren't enabled, sprites get nothing */ 1893 if (is_sprite && !config->sprites_enabled) 1894 return 0; 1895 1896 /* HSW allows LP1+ watermarks even with multiple pipes */ 1897 if (level == 0 || config->num_pipes_active > 1) { 1898 fifo_size /= INTEL_INFO(dev)->num_pipes; 1899 1900 /* 1901 * For some reason the non self refresh 1902 * FIFO size is only half of the self 1903 * refresh FIFO size on ILK/SNB. 1904 */ 1905 if (INTEL_INFO(dev)->gen <= 6) 1906 fifo_size /= 2; 1907 } 1908 1909 if (config->sprites_enabled) { 1910 /* level 0 is always calculated with 1:1 split */ 1911 if (level > 0 && ddb_partitioning == INTEL_DDB_PART_5_6) { 1912 if (is_sprite) 1913 fifo_size *= 5; 1914 fifo_size /= 6; 1915 } else { 1916 fifo_size /= 2; 1917 } 1918 } 1919 1920 /* clamp to max that the registers can hold */ 1921 return min(fifo_size, ilk_plane_wm_reg_max(dev, level, is_sprite)); 1922 } 1923 1924 /* Calculate the maximum cursor plane watermark */ 1925 static unsigned int ilk_cursor_wm_max(const struct drm_device *dev, 1926 int level, 1927 const struct intel_wm_config *config) 1928 { 1929 /* HSW LP1+ watermarks w/ multiple pipes */ 1930 if (level > 0 && config->num_pipes_active > 1) 1931 return 64; 1932 1933 /* otherwise just report max that registers can hold */ 1934 return ilk_cursor_wm_reg_max(dev, level); 1935 } 1936 1937 static void ilk_compute_wm_maximums(const struct drm_device *dev, 1938 int level, 1939 const struct intel_wm_config *config, 1940 enum intel_ddb_partitioning ddb_partitioning, 1941 struct ilk_wm_maximums *max) 1942 { 1943 max->pri = ilk_plane_wm_max(dev, level, config, ddb_partitioning, false); 1944 max->spr = ilk_plane_wm_max(dev, level, config, ddb_partitioning, true); 1945 max->cur = ilk_cursor_wm_max(dev, level, config); 1946 max->fbc = ilk_fbc_wm_reg_max(dev); 1947 } 1948 1949 static void ilk_compute_wm_reg_maximums(struct drm_device *dev, 1950 int level, 1951 struct ilk_wm_maximums *max) 1952 { 1953 max->pri = ilk_plane_wm_reg_max(dev, level, false); 1954 max->spr = ilk_plane_wm_reg_max(dev, level, true); 1955 max->cur = ilk_cursor_wm_reg_max(dev, level); 1956 max->fbc = ilk_fbc_wm_reg_max(dev); 1957 } 1958 1959 static bool ilk_validate_wm_level(int level, 1960 const struct ilk_wm_maximums *max, 1961 struct intel_wm_level *result) 1962 { 1963 bool ret; 1964 1965 /* already determined to be invalid? */ 1966 if (!result->enable) 1967 return false; 1968 1969 result->enable = result->pri_val <= max->pri && 1970 result->spr_val <= max->spr && 1971 result->cur_val <= max->cur; 1972 1973 ret = result->enable; 1974 1975 /* 1976 * HACK until we can pre-compute everything, 1977 * and thus fail gracefully if LP0 watermarks 1978 * are exceeded... 1979 */ 1980 if (level == 0 && !result->enable) { 1981 if (result->pri_val > max->pri) 1982 DRM_DEBUG_KMS("Primary WM%d too large %u (max %u)\n", 1983 level, result->pri_val, max->pri); 1984 if (result->spr_val > max->spr) 1985 DRM_DEBUG_KMS("Sprite WM%d too large %u (max %u)\n", 1986 level, result->spr_val, max->spr); 1987 if (result->cur_val > max->cur) 1988 DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n", 1989 level, result->cur_val, max->cur); 1990 1991 result->pri_val = min_t(uint32_t, result->pri_val, max->pri); 1992 result->spr_val = min_t(uint32_t, result->spr_val, max->spr); 1993 result->cur_val = min_t(uint32_t, result->cur_val, max->cur); 1994 result->enable = true; 1995 } 1996 1997 return ret; 1998 } 1999 2000 static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv, 2001 const struct intel_crtc *intel_crtc, 2002 int level, 2003 struct intel_crtc_state *cstate, 2004 struct intel_wm_level *result) 2005 { 2006 struct intel_plane *intel_plane; 2007 uint16_t pri_latency = dev_priv->wm.pri_latency[level]; 2008 uint16_t spr_latency = dev_priv->wm.spr_latency[level]; 2009 uint16_t cur_latency = dev_priv->wm.cur_latency[level]; 2010 2011 /* WM1+ latency values stored in 0.5us units */ 2012 if (level > 0) { 2013 pri_latency *= 5; 2014 spr_latency *= 5; 2015 cur_latency *= 5; 2016 } 2017 2018 for_each_intel_plane_on_crtc(dev_priv->dev, intel_crtc, intel_plane) { 2019 struct intel_plane_state *pstate = 2020 to_intel_plane_state(intel_plane->base.state); 2021 2022 switch (intel_plane->base.type) { 2023 case DRM_PLANE_TYPE_PRIMARY: 2024 result->pri_val = ilk_compute_pri_wm(cstate, pstate, 2025 pri_latency, 2026 level); 2027 result->fbc_val = ilk_compute_fbc_wm(cstate, pstate, 2028 result->pri_val); 2029 break; 2030 case DRM_PLANE_TYPE_OVERLAY: 2031 result->spr_val = ilk_compute_spr_wm(cstate, pstate, 2032 spr_latency); 2033 break; 2034 case DRM_PLANE_TYPE_CURSOR: 2035 result->cur_val = ilk_compute_cur_wm(cstate, pstate, 2036 cur_latency); 2037 break; 2038 } 2039 } 2040 2041 result->enable = true; 2042 } 2043 2044 static uint32_t 2045 hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc) 2046 { 2047 struct drm_i915_private *dev_priv = dev->dev_private; 2048 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 2049 const struct drm_display_mode *adjusted_mode = &intel_crtc->config->base.adjusted_mode; 2050 u32 linetime, ips_linetime; 2051 2052 if (!intel_crtc->active) 2053 return 0; 2054 2055 /* The WM are computed with base on how long it takes to fill a single 2056 * row at the given clock rate, multiplied by 8. 2057 * */ 2058 linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8, 2059 adjusted_mode->crtc_clock); 2060 ips_linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8, 2061 dev_priv->cdclk_freq); 2062 2063 return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) | 2064 PIPE_WM_LINETIME_TIME(linetime); 2065 } 2066 2067 static void intel_read_wm_latency(struct drm_device *dev, uint16_t wm[8]) 2068 { 2069 struct drm_i915_private *dev_priv = dev->dev_private; 2070 2071 if (IS_GEN9(dev)) { 2072 uint32_t val; 2073 int ret, i; 2074 int level, max_level = ilk_wm_max_level(dev); 2075 2076 /* read the first set of memory latencies[0:3] */ 2077 val = 0; /* data0 to be programmed to 0 for first set */ 2078 mutex_lock(&dev_priv->rps.hw_lock); 2079 ret = sandybridge_pcode_read(dev_priv, 2080 GEN9_PCODE_READ_MEM_LATENCY, 2081 &val); 2082 mutex_unlock(&dev_priv->rps.hw_lock); 2083 2084 if (ret) { 2085 DRM_ERROR("SKL Mailbox read error = %d\n", ret); 2086 return; 2087 } 2088 2089 wm[0] = val & GEN9_MEM_LATENCY_LEVEL_MASK; 2090 wm[1] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) & 2091 GEN9_MEM_LATENCY_LEVEL_MASK; 2092 wm[2] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) & 2093 GEN9_MEM_LATENCY_LEVEL_MASK; 2094 wm[3] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) & 2095 GEN9_MEM_LATENCY_LEVEL_MASK; 2096 2097 /* read the second set of memory latencies[4:7] */ 2098 val = 1; /* data0 to be programmed to 1 for second set */ 2099 mutex_lock(&dev_priv->rps.hw_lock); 2100 ret = sandybridge_pcode_read(dev_priv, 2101 GEN9_PCODE_READ_MEM_LATENCY, 2102 &val); 2103 mutex_unlock(&dev_priv->rps.hw_lock); 2104 if (ret) { 2105 DRM_ERROR("SKL Mailbox read error = %d\n", ret); 2106 return; 2107 } 2108 2109 wm[4] = val & GEN9_MEM_LATENCY_LEVEL_MASK; 2110 wm[5] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) & 2111 GEN9_MEM_LATENCY_LEVEL_MASK; 2112 wm[6] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) & 2113 GEN9_MEM_LATENCY_LEVEL_MASK; 2114 wm[7] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) & 2115 GEN9_MEM_LATENCY_LEVEL_MASK; 2116 2117 /* 2118 * If a level n (n > 1) has a 0us latency, all levels m (m >= n) 2119 * need to be disabled. We make sure to sanitize the values out 2120 * of the punit to satisfy this requirement. 2121 */ 2122 for (level = 1; level <= max_level; level++) { 2123 if (wm[level] == 0) { 2124 for (i = level + 1; i <= max_level; i++) 2125 wm[i] = 0; 2126 break; 2127 } 2128 } 2129 2130 /* 2131 * WaWmMemoryReadLatency:skl 2132 * 2133 * punit doesn't take into account the read latency so we need 2134 * to add 2us to the various latency levels we retrieve from the 2135 * punit when level 0 response data us 0us. 2136 */ 2137 if (wm[0] == 0) { 2138 wm[0] += 2; 2139 for (level = 1; level <= max_level; level++) { 2140 if (wm[level] == 0) 2141 break; 2142 wm[level] += 2; 2143 } 2144 } 2145 2146 } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { 2147 uint64_t sskpd = I915_READ64(MCH_SSKPD); 2148 2149 wm[0] = (sskpd >> 56) & 0xFF; 2150 if (wm[0] == 0) 2151 wm[0] = sskpd & 0xF; 2152 wm[1] = (sskpd >> 4) & 0xFF; 2153 wm[2] = (sskpd >> 12) & 0xFF; 2154 wm[3] = (sskpd >> 20) & 0x1FF; 2155 wm[4] = (sskpd >> 32) & 0x1FF; 2156 } else if (INTEL_INFO(dev)->gen >= 6) { 2157 uint32_t sskpd = I915_READ(MCH_SSKPD); 2158 2159 wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK; 2160 wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK; 2161 wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK; 2162 wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK; 2163 } else if (INTEL_INFO(dev)->gen >= 5) { 2164 uint32_t mltr = I915_READ(MLTR_ILK); 2165 2166 /* ILK primary LP0 latency is 700 ns */ 2167 wm[0] = 7; 2168 wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK; 2169 wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK; 2170 } 2171 } 2172 2173 static void intel_fixup_spr_wm_latency(struct drm_device *dev, uint16_t wm[5]) 2174 { 2175 /* ILK sprite LP0 latency is 1300 ns */ 2176 if (INTEL_INFO(dev)->gen == 5) 2177 wm[0] = 13; 2178 } 2179 2180 static void intel_fixup_cur_wm_latency(struct drm_device *dev, uint16_t wm[5]) 2181 { 2182 /* ILK cursor LP0 latency is 1300 ns */ 2183 if (INTEL_INFO(dev)->gen == 5) 2184 wm[0] = 13; 2185 2186 /* WaDoubleCursorLP3Latency:ivb */ 2187 if (IS_IVYBRIDGE(dev)) 2188 wm[3] *= 2; 2189 } 2190 2191 int ilk_wm_max_level(const struct drm_device *dev) 2192 { 2193 /* how many WM levels are we expecting */ 2194 if (INTEL_INFO(dev)->gen >= 9) 2195 return 7; 2196 else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) 2197 return 4; 2198 else if (INTEL_INFO(dev)->gen >= 6) 2199 return 3; 2200 else 2201 return 2; 2202 } 2203 2204 static void intel_print_wm_latency(struct drm_device *dev, 2205 const char *name, 2206 const uint16_t wm[8]) 2207 { 2208 int level, max_level = ilk_wm_max_level(dev); 2209 2210 for (level = 0; level <= max_level; level++) { 2211 unsigned int latency = wm[level]; 2212 2213 if (latency == 0) { 2214 DRM_ERROR("%s WM%d latency not provided\n", 2215 name, level); 2216 continue; 2217 } 2218 2219 /* 2220 * - latencies are in us on gen9. 2221 * - before then, WM1+ latency values are in 0.5us units 2222 */ 2223 if (IS_GEN9(dev)) 2224 latency *= 10; 2225 else if (level > 0) 2226 latency *= 5; 2227 2228 DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n", 2229 name, level, wm[level], 2230 latency / 10, latency % 10); 2231 } 2232 } 2233 2234 static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv, 2235 uint16_t wm[5], uint16_t min) 2236 { 2237 int level, max_level = ilk_wm_max_level(dev_priv->dev); 2238 2239 if (wm[0] >= min) 2240 return false; 2241 2242 wm[0] = max(wm[0], min); 2243 for (level = 1; level <= max_level; level++) 2244 wm[level] = max_t(uint16_t, wm[level], DIV_ROUND_UP(min, 5)); 2245 2246 return true; 2247 } 2248 2249 static void snb_wm_latency_quirk(struct drm_device *dev) 2250 { 2251 struct drm_i915_private *dev_priv = dev->dev_private; 2252 bool changed; 2253 2254 /* 2255 * The BIOS provided WM memory latency values are often 2256 * inadequate for high resolution displays. Adjust them. 2257 */ 2258 changed = ilk_increase_wm_latency(dev_priv, dev_priv->wm.pri_latency, 12) | 2259 ilk_increase_wm_latency(dev_priv, dev_priv->wm.spr_latency, 12) | 2260 ilk_increase_wm_latency(dev_priv, dev_priv->wm.cur_latency, 12); 2261 2262 if (!changed) 2263 return; 2264 2265 DRM_DEBUG_KMS("WM latency values increased to avoid potential underruns\n"); 2266 intel_print_wm_latency(dev, "Primary", dev_priv->wm.pri_latency); 2267 intel_print_wm_latency(dev, "Sprite", dev_priv->wm.spr_latency); 2268 intel_print_wm_latency(dev, "Cursor", dev_priv->wm.cur_latency); 2269 } 2270 2271 static void ilk_setup_wm_latency(struct drm_device *dev) 2272 { 2273 struct drm_i915_private *dev_priv = dev->dev_private; 2274 2275 intel_read_wm_latency(dev, dev_priv->wm.pri_latency); 2276 2277 memcpy(dev_priv->wm.spr_latency, dev_priv->wm.pri_latency, 2278 sizeof(dev_priv->wm.pri_latency)); 2279 memcpy(dev_priv->wm.cur_latency, dev_priv->wm.pri_latency, 2280 sizeof(dev_priv->wm.pri_latency)); 2281 2282 intel_fixup_spr_wm_latency(dev, dev_priv->wm.spr_latency); 2283 intel_fixup_cur_wm_latency(dev, dev_priv->wm.cur_latency); 2284 2285 intel_print_wm_latency(dev, "Primary", dev_priv->wm.pri_latency); 2286 intel_print_wm_latency(dev, "Sprite", dev_priv->wm.spr_latency); 2287 intel_print_wm_latency(dev, "Cursor", dev_priv->wm.cur_latency); 2288 2289 if (IS_GEN6(dev)) 2290 snb_wm_latency_quirk(dev); 2291 } 2292 2293 static void skl_setup_wm_latency(struct drm_device *dev) 2294 { 2295 struct drm_i915_private *dev_priv = dev->dev_private; 2296 2297 intel_read_wm_latency(dev, dev_priv->wm.skl_latency); 2298 intel_print_wm_latency(dev, "Gen9 Plane", dev_priv->wm.skl_latency); 2299 } 2300 2301 static void ilk_compute_wm_config(struct drm_device *dev, 2302 struct intel_wm_config *config) 2303 { 2304 struct intel_crtc *intel_crtc; 2305 2306 /* Compute the currently _active_ config */ 2307 for_each_intel_crtc(dev, intel_crtc) { 2308 const struct intel_pipe_wm *wm = &intel_crtc->wm.active; 2309 2310 if (!wm->pipe_enabled) 2311 continue; 2312 2313 config->sprites_enabled |= wm->sprites_enabled; 2314 config->sprites_scaled |= wm->sprites_scaled; 2315 config->num_pipes_active++; 2316 } 2317 } 2318 2319 /* Compute new watermarks for the pipe */ 2320 static bool intel_compute_pipe_wm(struct intel_crtc_state *cstate, 2321 struct intel_pipe_wm *pipe_wm) 2322 { 2323 struct drm_crtc *crtc = cstate->base.crtc; 2324 struct drm_device *dev = crtc->dev; 2325 const struct drm_i915_private *dev_priv = dev->dev_private; 2326 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 2327 struct intel_plane *intel_plane; 2328 struct intel_plane_state *sprstate = NULL; 2329 int level, max_level = ilk_wm_max_level(dev); 2330 /* LP0 watermark maximums depend on this pipe alone */ 2331 struct intel_wm_config config = { 2332 .num_pipes_active = 1, 2333 }; 2334 struct ilk_wm_maximums max; 2335 2336 for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) { 2337 if (intel_plane->base.type == DRM_PLANE_TYPE_OVERLAY) { 2338 sprstate = to_intel_plane_state(intel_plane->base.state); 2339 break; 2340 } 2341 } 2342 2343 config.sprites_enabled = sprstate->visible; 2344 config.sprites_scaled = sprstate->visible && 2345 (drm_rect_width(&sprstate->dst) != drm_rect_width(&sprstate->src) >> 16 || 2346 drm_rect_height(&sprstate->dst) != drm_rect_height(&sprstate->src) >> 16); 2347 2348 pipe_wm->pipe_enabled = cstate->base.active; 2349 pipe_wm->sprites_enabled = sprstate->visible; 2350 pipe_wm->sprites_scaled = config.sprites_scaled; 2351 2352 /* ILK/SNB: LP2+ watermarks only w/o sprites */ 2353 if (INTEL_INFO(dev)->gen <= 6 && sprstate->visible) 2354 max_level = 1; 2355 2356 /* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */ 2357 if (config.sprites_scaled) 2358 max_level = 0; 2359 2360 ilk_compute_wm_level(dev_priv, intel_crtc, 0, cstate, &pipe_wm->wm[0]); 2361 2362 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) 2363 pipe_wm->linetime = hsw_compute_linetime_wm(dev, crtc); 2364 2365 /* LP0 watermarks always use 1/2 DDB partitioning */ 2366 ilk_compute_wm_maximums(dev, 0, &config, INTEL_DDB_PART_1_2, &max); 2367 2368 /* At least LP0 must be valid */ 2369 if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0])) 2370 return false; 2371 2372 ilk_compute_wm_reg_maximums(dev, 1, &max); 2373 2374 for (level = 1; level <= max_level; level++) { 2375 struct intel_wm_level wm = {}; 2376 2377 ilk_compute_wm_level(dev_priv, intel_crtc, level, cstate, &wm); 2378 2379 /* 2380 * Disable any watermark level that exceeds the 2381 * register maximums since such watermarks are 2382 * always invalid. 2383 */ 2384 if (!ilk_validate_wm_level(level, &max, &wm)) 2385 break; 2386 2387 pipe_wm->wm[level] = wm; 2388 } 2389 2390 return true; 2391 } 2392 2393 /* 2394 * Merge the watermarks from all active pipes for a specific level. 2395 */ 2396 static void ilk_merge_wm_level(struct drm_device *dev, 2397 int level, 2398 struct intel_wm_level *ret_wm) 2399 { 2400 const struct intel_crtc *intel_crtc; 2401 2402 ret_wm->enable = true; 2403 2404 for_each_intel_crtc(dev, intel_crtc) { 2405 const struct intel_pipe_wm *active = &intel_crtc->wm.active; 2406 const struct intel_wm_level *wm = &active->wm[level]; 2407 2408 if (!active->pipe_enabled) 2409 continue; 2410 2411 /* 2412 * The watermark values may have been used in the past, 2413 * so we must maintain them in the registers for some 2414 * time even if the level is now disabled. 2415 */ 2416 if (!wm->enable) 2417 ret_wm->enable = false; 2418 2419 ret_wm->pri_val = max(ret_wm->pri_val, wm->pri_val); 2420 ret_wm->spr_val = max(ret_wm->spr_val, wm->spr_val); 2421 ret_wm->cur_val = max(ret_wm->cur_val, wm->cur_val); 2422 ret_wm->fbc_val = max(ret_wm->fbc_val, wm->fbc_val); 2423 } 2424 } 2425 2426 /* 2427 * Merge all low power watermarks for all active pipes. 2428 */ 2429 static void ilk_wm_merge(struct drm_device *dev, 2430 const struct intel_wm_config *config, 2431 const struct ilk_wm_maximums *max, 2432 struct intel_pipe_wm *merged) 2433 { 2434 struct drm_i915_private *dev_priv = dev->dev_private; 2435 int level, max_level = ilk_wm_max_level(dev); 2436 int last_enabled_level = max_level; 2437 2438 /* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */ 2439 if ((INTEL_INFO(dev)->gen <= 6 || IS_IVYBRIDGE(dev)) && 2440 config->num_pipes_active > 1) 2441 return; 2442 2443 /* ILK: FBC WM must be disabled always */ 2444 merged->fbc_wm_enabled = INTEL_INFO(dev)->gen >= 6; 2445 2446 /* merge each WM1+ level */ 2447 for (level = 1; level <= max_level; level++) { 2448 struct intel_wm_level *wm = &merged->wm[level]; 2449 2450 ilk_merge_wm_level(dev, level, wm); 2451 2452 if (level > last_enabled_level) 2453 wm->enable = false; 2454 else if (!ilk_validate_wm_level(level, max, wm)) 2455 /* make sure all following levels get disabled */ 2456 last_enabled_level = level - 1; 2457 2458 /* 2459 * The spec says it is preferred to disable 2460 * FBC WMs instead of disabling a WM level. 2461 */ 2462 if (wm->fbc_val > max->fbc) { 2463 if (wm->enable) 2464 merged->fbc_wm_enabled = false; 2465 wm->fbc_val = 0; 2466 } 2467 } 2468 2469 /* ILK: LP2+ must be disabled when FBC WM is disabled but FBC enabled */ 2470 /* 2471 * FIXME this is racy. FBC might get enabled later. 2472 * What we should check here is whether FBC can be 2473 * enabled sometime later. 2474 */ 2475 if (IS_GEN5(dev) && !merged->fbc_wm_enabled && 2476 intel_fbc_enabled(dev_priv)) { 2477 for (level = 2; level <= max_level; level++) { 2478 struct intel_wm_level *wm = &merged->wm[level]; 2479 2480 wm->enable = false; 2481 } 2482 } 2483 } 2484 2485 static int ilk_wm_lp_to_level(int wm_lp, const struct intel_pipe_wm *pipe_wm) 2486 { 2487 /* LP1,LP2,LP3 levels are either 1,2,3 or 1,3,4 */ 2488 return wm_lp + (wm_lp >= 2 && pipe_wm->wm[4].enable); 2489 } 2490 2491 /* The value we need to program into the WM_LPx latency field */ 2492 static unsigned int ilk_wm_lp_latency(struct drm_device *dev, int level) 2493 { 2494 struct drm_i915_private *dev_priv = dev->dev_private; 2495 2496 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) 2497 return 2 * level; 2498 else 2499 return dev_priv->wm.pri_latency[level]; 2500 } 2501 2502 static void ilk_compute_wm_results(struct drm_device *dev, 2503 const struct intel_pipe_wm *merged, 2504 enum intel_ddb_partitioning partitioning, 2505 struct ilk_wm_values *results) 2506 { 2507 struct intel_crtc *intel_crtc; 2508 int level, wm_lp; 2509 2510 results->enable_fbc_wm = merged->fbc_wm_enabled; 2511 results->partitioning = partitioning; 2512 2513 /* LP1+ register values */ 2514 for (wm_lp = 1; wm_lp <= 3; wm_lp++) { 2515 const struct intel_wm_level *r; 2516 2517 level = ilk_wm_lp_to_level(wm_lp, merged); 2518 2519 r = &merged->wm[level]; 2520 2521 /* 2522 * Maintain the watermark values even if the level is 2523 * disabled. Doing otherwise could cause underruns. 2524 */ 2525 results->wm_lp[wm_lp - 1] = 2526 (ilk_wm_lp_latency(dev, level) << WM1_LP_LATENCY_SHIFT) | 2527 (r->pri_val << WM1_LP_SR_SHIFT) | 2528 r->cur_val; 2529 2530 if (r->enable) 2531 results->wm_lp[wm_lp - 1] |= WM1_LP_SR_EN; 2532 2533 if (INTEL_INFO(dev)->gen >= 8) 2534 results->wm_lp[wm_lp - 1] |= 2535 r->fbc_val << WM1_LP_FBC_SHIFT_BDW; 2536 else 2537 results->wm_lp[wm_lp - 1] |= 2538 r->fbc_val << WM1_LP_FBC_SHIFT; 2539 2540 /* 2541 * Always set WM1S_LP_EN when spr_val != 0, even if the 2542 * level is disabled. Doing otherwise could cause underruns. 2543 */ 2544 if (INTEL_INFO(dev)->gen <= 6 && r->spr_val) { 2545 WARN_ON(wm_lp != 1); 2546 results->wm_lp_spr[wm_lp - 1] = WM1S_LP_EN | r->spr_val; 2547 } else 2548 results->wm_lp_spr[wm_lp - 1] = r->spr_val; 2549 } 2550 2551 /* LP0 register values */ 2552 for_each_intel_crtc(dev, intel_crtc) { 2553 enum pipe pipe = intel_crtc->pipe; 2554 const struct intel_wm_level *r = 2555 &intel_crtc->wm.active.wm[0]; 2556 2557 if (WARN_ON(!r->enable)) 2558 continue; 2559 2560 results->wm_linetime[pipe] = intel_crtc->wm.active.linetime; 2561 2562 results->wm_pipe[pipe] = 2563 (r->pri_val << WM0_PIPE_PLANE_SHIFT) | 2564 (r->spr_val << WM0_PIPE_SPRITE_SHIFT) | 2565 r->cur_val; 2566 } 2567 } 2568 2569 /* Find the result with the highest level enabled. Check for enable_fbc_wm in 2570 * case both are at the same level. Prefer r1 in case they're the same. */ 2571 static struct intel_pipe_wm *ilk_find_best_result(struct drm_device *dev, 2572 struct intel_pipe_wm *r1, 2573 struct intel_pipe_wm *r2) 2574 { 2575 int level, max_level = ilk_wm_max_level(dev); 2576 int level1 = 0, level2 = 0; 2577 2578 for (level = 1; level <= max_level; level++) { 2579 if (r1->wm[level].enable) 2580 level1 = level; 2581 if (r2->wm[level].enable) 2582 level2 = level; 2583 } 2584 2585 if (level1 == level2) { 2586 if (r2->fbc_wm_enabled && !r1->fbc_wm_enabled) 2587 return r2; 2588 else 2589 return r1; 2590 } else if (level1 > level2) { 2591 return r1; 2592 } else { 2593 return r2; 2594 } 2595 } 2596 2597 /* dirty bits used to track which watermarks need changes */ 2598 #define WM_DIRTY_PIPE(pipe) (1 << (pipe)) 2599 #define WM_DIRTY_LINETIME(pipe) (1 << (8 + (pipe))) 2600 #define WM_DIRTY_LP(wm_lp) (1 << (15 + (wm_lp))) 2601 #define WM_DIRTY_LP_ALL (WM_DIRTY_LP(1) | WM_DIRTY_LP(2) | WM_DIRTY_LP(3)) 2602 #define WM_DIRTY_FBC (1 << 24) 2603 #define WM_DIRTY_DDB (1 << 25) 2604 2605 static unsigned int ilk_compute_wm_dirty(struct drm_i915_private *dev_priv, 2606 const struct ilk_wm_values *old, 2607 const struct ilk_wm_values *new) 2608 { 2609 unsigned int dirty = 0; 2610 enum pipe pipe; 2611 int wm_lp; 2612 2613 for_each_pipe(dev_priv, pipe) { 2614 if (old->wm_linetime[pipe] != new->wm_linetime[pipe]) { 2615 dirty |= WM_DIRTY_LINETIME(pipe); 2616 /* Must disable LP1+ watermarks too */ 2617 dirty |= WM_DIRTY_LP_ALL; 2618 } 2619 2620 if (old->wm_pipe[pipe] != new->wm_pipe[pipe]) { 2621 dirty |= WM_DIRTY_PIPE(pipe); 2622 /* Must disable LP1+ watermarks too */ 2623 dirty |= WM_DIRTY_LP_ALL; 2624 } 2625 } 2626 2627 if (old->enable_fbc_wm != new->enable_fbc_wm) { 2628 dirty |= WM_DIRTY_FBC; 2629 /* Must disable LP1+ watermarks too */ 2630 dirty |= WM_DIRTY_LP_ALL; 2631 } 2632 2633 if (old->partitioning != new->partitioning) { 2634 dirty |= WM_DIRTY_DDB; 2635 /* Must disable LP1+ watermarks too */ 2636 dirty |= WM_DIRTY_LP_ALL; 2637 } 2638 2639 /* LP1+ watermarks already deemed dirty, no need to continue */ 2640 if (dirty & WM_DIRTY_LP_ALL) 2641 return dirty; 2642 2643 /* Find the lowest numbered LP1+ watermark in need of an update... */ 2644 for (wm_lp = 1; wm_lp <= 3; wm_lp++) { 2645 if (old->wm_lp[wm_lp - 1] != new->wm_lp[wm_lp - 1] || 2646 old->wm_lp_spr[wm_lp - 1] != new->wm_lp_spr[wm_lp - 1]) 2647 break; 2648 } 2649 2650 /* ...and mark it and all higher numbered LP1+ watermarks as dirty */ 2651 for (; wm_lp <= 3; wm_lp++) 2652 dirty |= WM_DIRTY_LP(wm_lp); 2653 2654 return dirty; 2655 } 2656 2657 static bool _ilk_disable_lp_wm(struct drm_i915_private *dev_priv, 2658 unsigned int dirty) 2659 { 2660 struct ilk_wm_values *previous = &dev_priv->wm.hw; 2661 bool changed = false; 2662 2663 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] & WM1_LP_SR_EN) { 2664 previous->wm_lp[2] &= ~WM1_LP_SR_EN; 2665 I915_WRITE(WM3_LP_ILK, previous->wm_lp[2]); 2666 changed = true; 2667 } 2668 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] & WM1_LP_SR_EN) { 2669 previous->wm_lp[1] &= ~WM1_LP_SR_EN; 2670 I915_WRITE(WM2_LP_ILK, previous->wm_lp[1]); 2671 changed = true; 2672 } 2673 if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] & WM1_LP_SR_EN) { 2674 previous->wm_lp[0] &= ~WM1_LP_SR_EN; 2675 I915_WRITE(WM1_LP_ILK, previous->wm_lp[0]); 2676 changed = true; 2677 } 2678 2679 /* 2680 * Don't touch WM1S_LP_EN here. 2681 * Doing so could cause underruns. 2682 */ 2683 2684 return changed; 2685 } 2686 2687 /* 2688 * The spec says we shouldn't write when we don't need, because every write 2689 * causes WMs to be re-evaluated, expending some power. 2690 */ 2691 static void ilk_write_wm_values(struct drm_i915_private *dev_priv, 2692 struct ilk_wm_values *results) 2693 { 2694 struct drm_device *dev = dev_priv->dev; 2695 struct ilk_wm_values *previous = &dev_priv->wm.hw; 2696 unsigned int dirty; 2697 uint32_t val; 2698 2699 dirty = ilk_compute_wm_dirty(dev_priv, previous, results); 2700 if (!dirty) 2701 return; 2702 2703 _ilk_disable_lp_wm(dev_priv, dirty); 2704 2705 if (dirty & WM_DIRTY_PIPE(PIPE_A)) 2706 I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]); 2707 if (dirty & WM_DIRTY_PIPE(PIPE_B)) 2708 I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]); 2709 if (dirty & WM_DIRTY_PIPE(PIPE_C)) 2710 I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]); 2711 2712 if (dirty & WM_DIRTY_LINETIME(PIPE_A)) 2713 I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]); 2714 if (dirty & WM_DIRTY_LINETIME(PIPE_B)) 2715 I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]); 2716 if (dirty & WM_DIRTY_LINETIME(PIPE_C)) 2717 I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]); 2718 2719 if (dirty & WM_DIRTY_DDB) { 2720 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { 2721 val = I915_READ(WM_MISC); 2722 if (results->partitioning == INTEL_DDB_PART_1_2) 2723 val &= ~WM_MISC_DATA_PARTITION_5_6; 2724 else 2725 val |= WM_MISC_DATA_PARTITION_5_6; 2726 I915_WRITE(WM_MISC, val); 2727 } else { 2728 val = I915_READ(DISP_ARB_CTL2); 2729 if (results->partitioning == INTEL_DDB_PART_1_2) 2730 val &= ~DISP_DATA_PARTITION_5_6; 2731 else 2732 val |= DISP_DATA_PARTITION_5_6; 2733 I915_WRITE(DISP_ARB_CTL2, val); 2734 } 2735 } 2736 2737 if (dirty & WM_DIRTY_FBC) { 2738 val = I915_READ(DISP_ARB_CTL); 2739 if (results->enable_fbc_wm) 2740 val &= ~DISP_FBC_WM_DIS; 2741 else 2742 val |= DISP_FBC_WM_DIS; 2743 I915_WRITE(DISP_ARB_CTL, val); 2744 } 2745 2746 if (dirty & WM_DIRTY_LP(1) && 2747 previous->wm_lp_spr[0] != results->wm_lp_spr[0]) 2748 I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]); 2749 2750 if (INTEL_INFO(dev)->gen >= 7) { 2751 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp_spr[1] != results->wm_lp_spr[1]) 2752 I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]); 2753 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp_spr[2] != results->wm_lp_spr[2]) 2754 I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]); 2755 } 2756 2757 if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] != results->wm_lp[0]) 2758 I915_WRITE(WM1_LP_ILK, results->wm_lp[0]); 2759 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] != results->wm_lp[1]) 2760 I915_WRITE(WM2_LP_ILK, results->wm_lp[1]); 2761 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] != results->wm_lp[2]) 2762 I915_WRITE(WM3_LP_ILK, results->wm_lp[2]); 2763 2764 dev_priv->wm.hw = *results; 2765 } 2766 2767 static bool ilk_disable_lp_wm(struct drm_device *dev) 2768 { 2769 struct drm_i915_private *dev_priv = dev->dev_private; 2770 2771 return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL); 2772 } 2773 2774 /* 2775 * On gen9, we need to allocate Display Data Buffer (DDB) portions to the 2776 * different active planes. 2777 */ 2778 2779 #define SKL_DDB_SIZE 896 /* in blocks */ 2780 #define BXT_DDB_SIZE 512 2781 2782 static void 2783 skl_ddb_get_pipe_allocation_limits(struct drm_device *dev, 2784 struct drm_crtc *for_crtc, 2785 const struct intel_wm_config *config, 2786 const struct skl_pipe_wm_parameters *params, 2787 struct skl_ddb_entry *alloc /* out */) 2788 { 2789 struct drm_crtc *crtc; 2790 unsigned int pipe_size, ddb_size; 2791 int nth_active_pipe; 2792 2793 if (!params->active) { 2794 alloc->start = 0; 2795 alloc->end = 0; 2796 return; 2797 } 2798 2799 if (IS_BROXTON(dev)) 2800 ddb_size = BXT_DDB_SIZE; 2801 else 2802 ddb_size = SKL_DDB_SIZE; 2803 2804 ddb_size -= 4; /* 4 blocks for bypass path allocation */ 2805 2806 nth_active_pipe = 0; 2807 for_each_crtc(dev, crtc) { 2808 if (!to_intel_crtc(crtc)->active) 2809 continue; 2810 2811 if (crtc == for_crtc) 2812 break; 2813 2814 nth_active_pipe++; 2815 } 2816 2817 pipe_size = ddb_size / config->num_pipes_active; 2818 alloc->start = nth_active_pipe * ddb_size / config->num_pipes_active; 2819 alloc->end = alloc->start + pipe_size; 2820 } 2821 2822 static unsigned int skl_cursor_allocation(const struct intel_wm_config *config) 2823 { 2824 if (config->num_pipes_active == 1) 2825 return 32; 2826 2827 return 8; 2828 } 2829 2830 static void skl_ddb_entry_init_from_hw(struct skl_ddb_entry *entry, u32 reg) 2831 { 2832 entry->start = reg & 0x3ff; 2833 entry->end = (reg >> 16) & 0x3ff; 2834 if (entry->end) 2835 entry->end += 1; 2836 } 2837 2838 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, 2839 struct skl_ddb_allocation *ddb /* out */) 2840 { 2841 enum pipe pipe; 2842 int plane; 2843 u32 val; 2844 2845 memset(ddb, 0, sizeof(*ddb)); 2846 2847 for_each_pipe(dev_priv, pipe) { 2848 if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PIPE(pipe))) 2849 continue; 2850 2851 for_each_plane(dev_priv, pipe, plane) { 2852 val = I915_READ(PLANE_BUF_CFG(pipe, plane)); 2853 skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane], 2854 val); 2855 } 2856 2857 val = I915_READ(CUR_BUF_CFG(pipe)); 2858 skl_ddb_entry_init_from_hw(&ddb->plane[pipe][PLANE_CURSOR], 2859 val); 2860 } 2861 } 2862 2863 static unsigned int 2864 skl_plane_relative_data_rate(const struct intel_plane_wm_parameters *p, int y) 2865 { 2866 2867 /* for planar format */ 2868 if (p->y_bytes_per_pixel) { 2869 if (y) /* y-plane data rate */ 2870 return p->horiz_pixels * p->vert_pixels * p->y_bytes_per_pixel; 2871 else /* uv-plane data rate */ 2872 return (p->horiz_pixels/2) * (p->vert_pixels/2) * p->bytes_per_pixel; 2873 } 2874 2875 /* for packed formats */ 2876 return p->horiz_pixels * p->vert_pixels * p->bytes_per_pixel; 2877 } 2878 2879 /* 2880 * We don't overflow 32 bits. Worst case is 3 planes enabled, each fetching 2881 * a 8192x4096@32bpp framebuffer: 2882 * 3 * 4096 * 8192 * 4 < 2^32 2883 */ 2884 static unsigned int 2885 skl_get_total_relative_data_rate(struct intel_crtc *intel_crtc, 2886 const struct skl_pipe_wm_parameters *params) 2887 { 2888 unsigned int total_data_rate = 0; 2889 int plane; 2890 2891 for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) { 2892 const struct intel_plane_wm_parameters *p; 2893 2894 p = ¶ms->plane[plane]; 2895 if (!p->enabled) 2896 continue; 2897 2898 total_data_rate += skl_plane_relative_data_rate(p, 0); /* packed/uv */ 2899 if (p->y_bytes_per_pixel) { 2900 total_data_rate += skl_plane_relative_data_rate(p, 1); /* y-plane */ 2901 } 2902 } 2903 2904 return total_data_rate; 2905 } 2906 2907 static void 2908 skl_allocate_pipe_ddb(struct drm_crtc *crtc, 2909 const struct intel_wm_config *config, 2910 const struct skl_pipe_wm_parameters *params, 2911 struct skl_ddb_allocation *ddb /* out */) 2912 { 2913 struct drm_device *dev = crtc->dev; 2914 struct drm_i915_private *dev_priv = dev->dev_private; 2915 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 2916 enum pipe pipe = intel_crtc->pipe; 2917 struct skl_ddb_entry *alloc = &ddb->pipe[pipe]; 2918 uint16_t alloc_size, start, cursor_blocks; 2919 uint16_t minimum[I915_MAX_PLANES]; 2920 uint16_t y_minimum[I915_MAX_PLANES]; 2921 unsigned int total_data_rate; 2922 int plane; 2923 2924 skl_ddb_get_pipe_allocation_limits(dev, crtc, config, params, alloc); 2925 alloc_size = skl_ddb_entry_size(alloc); 2926 if (alloc_size == 0) { 2927 memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe])); 2928 memset(&ddb->plane[pipe][PLANE_CURSOR], 0, 2929 sizeof(ddb->plane[pipe][PLANE_CURSOR])); 2930 return; 2931 } 2932 2933 cursor_blocks = skl_cursor_allocation(config); 2934 ddb->plane[pipe][PLANE_CURSOR].start = alloc->end - cursor_blocks; 2935 ddb->plane[pipe][PLANE_CURSOR].end = alloc->end; 2936 2937 alloc_size -= cursor_blocks; 2938 alloc->end -= cursor_blocks; 2939 2940 /* 1. Allocate the mininum required blocks for each active plane */ 2941 for_each_plane(dev_priv, pipe, plane) { 2942 const struct intel_plane_wm_parameters *p; 2943 2944 p = ¶ms->plane[plane]; 2945 if (!p->enabled) 2946 continue; 2947 2948 minimum[plane] = 8; 2949 alloc_size -= minimum[plane]; 2950 y_minimum[plane] = p->y_bytes_per_pixel ? 8 : 0; 2951 alloc_size -= y_minimum[plane]; 2952 } 2953 2954 /* 2955 * 2. Distribute the remaining space in proportion to the amount of 2956 * data each plane needs to fetch from memory. 2957 * 2958 * FIXME: we may not allocate every single block here. 2959 */ 2960 total_data_rate = skl_get_total_relative_data_rate(intel_crtc, params); 2961 2962 start = alloc->start; 2963 for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) { 2964 const struct intel_plane_wm_parameters *p; 2965 unsigned int data_rate, y_data_rate; 2966 uint16_t plane_blocks, y_plane_blocks = 0; 2967 2968 p = ¶ms->plane[plane]; 2969 if (!p->enabled) 2970 continue; 2971 2972 data_rate = skl_plane_relative_data_rate(p, 0); 2973 2974 /* 2975 * allocation for (packed formats) or (uv-plane part of planar format): 2976 * promote the expression to 64 bits to avoid overflowing, the 2977 * result is < available as data_rate / total_data_rate < 1 2978 */ 2979 plane_blocks = minimum[plane]; 2980 plane_blocks += div_u64((uint64_t)alloc_size * data_rate, 2981 total_data_rate); 2982 2983 ddb->plane[pipe][plane].start = start; 2984 ddb->plane[pipe][plane].end = start + plane_blocks; 2985 2986 start += plane_blocks; 2987 2988 /* 2989 * allocation for y_plane part of planar format: 2990 */ 2991 if (p->y_bytes_per_pixel) { 2992 y_data_rate = skl_plane_relative_data_rate(p, 1); 2993 y_plane_blocks = y_minimum[plane]; 2994 y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate, 2995 total_data_rate); 2996 2997 ddb->y_plane[pipe][plane].start = start; 2998 ddb->y_plane[pipe][plane].end = start + y_plane_blocks; 2999 3000 start += y_plane_blocks; 3001 } 3002 3003 } 3004 3005 } 3006 3007 static uint32_t skl_pipe_pixel_rate(const struct intel_crtc_state *config) 3008 { 3009 /* TODO: Take into account the scalers once we support them */ 3010 return config->base.adjusted_mode.crtc_clock; 3011 } 3012 3013 /* 3014 * The max latency should be 257 (max the punit can code is 255 and we add 2us 3015 * for the read latency) and bytes_per_pixel should always be <= 8, so that 3016 * should allow pixel_rate up to ~2 GHz which seems sufficient since max 3017 * 2xcdclk is 1350 MHz and the pixel rate should never exceed that. 3018 */ 3019 static uint32_t skl_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel, 3020 uint32_t latency) 3021 { 3022 uint32_t wm_intermediate_val, ret; 3023 3024 if (latency == 0) 3025 return UINT_MAX; 3026 3027 wm_intermediate_val = latency * pixel_rate * bytes_per_pixel / 512; 3028 ret = DIV_ROUND_UP(wm_intermediate_val, 1000); 3029 3030 return ret; 3031 } 3032 3033 static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal, 3034 uint32_t horiz_pixels, uint8_t bytes_per_pixel, 3035 uint64_t tiling, uint32_t latency) 3036 { 3037 uint32_t ret; 3038 uint32_t plane_bytes_per_line, plane_blocks_per_line; 3039 uint32_t wm_intermediate_val; 3040 3041 if (latency == 0) 3042 return UINT_MAX; 3043 3044 plane_bytes_per_line = horiz_pixels * bytes_per_pixel; 3045 3046 if (tiling == I915_FORMAT_MOD_Y_TILED || 3047 tiling == I915_FORMAT_MOD_Yf_TILED) { 3048 plane_bytes_per_line *= 4; 3049 plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); 3050 plane_blocks_per_line /= 4; 3051 } else { 3052 plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); 3053 } 3054 3055 wm_intermediate_val = latency * pixel_rate; 3056 ret = DIV_ROUND_UP(wm_intermediate_val, pipe_htotal * 1000) * 3057 plane_blocks_per_line; 3058 3059 return ret; 3060 } 3061 3062 static bool skl_ddb_allocation_changed(const struct skl_ddb_allocation *new_ddb, 3063 const struct intel_crtc *intel_crtc) 3064 { 3065 struct drm_device *dev = intel_crtc->base.dev; 3066 struct drm_i915_private *dev_priv = dev->dev_private; 3067 const struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb; 3068 enum pipe pipe = intel_crtc->pipe; 3069 3070 if (memcmp(new_ddb->plane[pipe], cur_ddb->plane[pipe], 3071 sizeof(new_ddb->plane[pipe]))) 3072 return true; 3073 3074 if (memcmp(&new_ddb->plane[pipe][PLANE_CURSOR], &cur_ddb->plane[pipe][PLANE_CURSOR], 3075 sizeof(new_ddb->plane[pipe][PLANE_CURSOR]))) 3076 return true; 3077 3078 return false; 3079 } 3080 3081 static void skl_compute_wm_global_parameters(struct drm_device *dev, 3082 struct intel_wm_config *config) 3083 { 3084 struct drm_crtc *crtc; 3085 struct drm_plane *plane; 3086 3087 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) 3088 config->num_pipes_active += to_intel_crtc(crtc)->active; 3089 3090 /* FIXME: I don't think we need those two global parameters on SKL */ 3091 list_for_each_entry(plane, &dev->mode_config.plane_list, head) { 3092 struct intel_plane *intel_plane = to_intel_plane(plane); 3093 3094 config->sprites_enabled |= intel_plane->wm.enabled; 3095 config->sprites_scaled |= intel_plane->wm.scaled; 3096 } 3097 } 3098 3099 static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc, 3100 struct skl_pipe_wm_parameters *p) 3101 { 3102 struct drm_device *dev = crtc->dev; 3103 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 3104 enum pipe pipe = intel_crtc->pipe; 3105 struct drm_plane *plane; 3106 struct drm_framebuffer *fb; 3107 int i = 1; /* Index for sprite planes start */ 3108 3109 p->active = intel_crtc->active; 3110 if (p->active) { 3111 p->pipe_htotal = intel_crtc->config->base.adjusted_mode.crtc_htotal; 3112 p->pixel_rate = skl_pipe_pixel_rate(intel_crtc->config); 3113 3114 fb = crtc->primary->state->fb; 3115 /* For planar: Bpp is for uv plane, y_Bpp is for y plane */ 3116 if (fb) { 3117 p->plane[0].enabled = true; 3118 p->plane[0].bytes_per_pixel = fb->pixel_format == DRM_FORMAT_NV12 ? 3119 drm_format_plane_cpp(fb->pixel_format, 1) : 3120 drm_format_plane_cpp(fb->pixel_format, 0); 3121 p->plane[0].y_bytes_per_pixel = fb->pixel_format == DRM_FORMAT_NV12 ? 3122 drm_format_plane_cpp(fb->pixel_format, 0) : 0; 3123 p->plane[0].tiling = fb->modifier[0]; 3124 } else { 3125 p->plane[0].enabled = false; 3126 p->plane[0].bytes_per_pixel = 0; 3127 p->plane[0].y_bytes_per_pixel = 0; 3128 p->plane[0].tiling = DRM_FORMAT_MOD_NONE; 3129 } 3130 p->plane[0].horiz_pixels = intel_crtc->config->pipe_src_w; 3131 p->plane[0].vert_pixels = intel_crtc->config->pipe_src_h; 3132 p->plane[0].rotation = crtc->primary->state->rotation; 3133 3134 fb = crtc->cursor->state->fb; 3135 p->plane[PLANE_CURSOR].y_bytes_per_pixel = 0; 3136 if (fb) { 3137 p->plane[PLANE_CURSOR].enabled = true; 3138 p->plane[PLANE_CURSOR].bytes_per_pixel = fb->bits_per_pixel / 8; 3139 p->plane[PLANE_CURSOR].horiz_pixels = crtc->cursor->state->crtc_w; 3140 p->plane[PLANE_CURSOR].vert_pixels = crtc->cursor->state->crtc_h; 3141 } else { 3142 p->plane[PLANE_CURSOR].enabled = false; 3143 p->plane[PLANE_CURSOR].bytes_per_pixel = 0; 3144 p->plane[PLANE_CURSOR].horiz_pixels = 64; 3145 p->plane[PLANE_CURSOR].vert_pixels = 64; 3146 } 3147 } 3148 3149 list_for_each_entry(plane, &dev->mode_config.plane_list, head) { 3150 struct intel_plane *intel_plane = to_intel_plane(plane); 3151 3152 if (intel_plane->pipe == pipe && 3153 plane->type == DRM_PLANE_TYPE_OVERLAY) 3154 p->plane[i++] = intel_plane->wm; 3155 } 3156 } 3157 3158 static bool skl_compute_plane_wm(const struct drm_i915_private *dev_priv, 3159 struct skl_pipe_wm_parameters *p, 3160 struct intel_plane_wm_parameters *p_params, 3161 uint16_t ddb_allocation, 3162 int level, 3163 uint16_t *out_blocks, /* out */ 3164 uint8_t *out_lines /* out */) 3165 { 3166 uint32_t latency = dev_priv->wm.skl_latency[level]; 3167 uint32_t method1, method2; 3168 uint32_t plane_bytes_per_line, plane_blocks_per_line; 3169 uint32_t res_blocks, res_lines; 3170 uint32_t selected_result; 3171 uint8_t bytes_per_pixel; 3172 3173 if (latency == 0 || !p->active || !p_params->enabled) 3174 return false; 3175 3176 bytes_per_pixel = p_params->y_bytes_per_pixel ? 3177 p_params->y_bytes_per_pixel : 3178 p_params->bytes_per_pixel; 3179 method1 = skl_wm_method1(p->pixel_rate, 3180 bytes_per_pixel, 3181 latency); 3182 method2 = skl_wm_method2(p->pixel_rate, 3183 p->pipe_htotal, 3184 p_params->horiz_pixels, 3185 bytes_per_pixel, 3186 p_params->tiling, 3187 latency); 3188 3189 plane_bytes_per_line = p_params->horiz_pixels * bytes_per_pixel; 3190 plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); 3191 3192 if (p_params->tiling == I915_FORMAT_MOD_Y_TILED || 3193 p_params->tiling == I915_FORMAT_MOD_Yf_TILED) { 3194 uint32_t min_scanlines = 4; 3195 uint32_t y_tile_minimum; 3196 if (intel_rotation_90_or_270(p_params->rotation)) { 3197 switch (p_params->bytes_per_pixel) { 3198 case 1: 3199 min_scanlines = 16; 3200 break; 3201 case 2: 3202 min_scanlines = 8; 3203 break; 3204 case 8: 3205 WARN(1, "Unsupported pixel depth for rotation"); 3206 } 3207 } 3208 y_tile_minimum = plane_blocks_per_line * min_scanlines; 3209 selected_result = max(method2, y_tile_minimum); 3210 } else { 3211 if ((ddb_allocation / plane_blocks_per_line) >= 1) 3212 selected_result = min(method1, method2); 3213 else 3214 selected_result = method1; 3215 } 3216 3217 res_blocks = selected_result + 1; 3218 res_lines = DIV_ROUND_UP(selected_result, plane_blocks_per_line); 3219 3220 if (level >= 1 && level <= 7) { 3221 if (p_params->tiling == I915_FORMAT_MOD_Y_TILED || 3222 p_params->tiling == I915_FORMAT_MOD_Yf_TILED) 3223 res_lines += 4; 3224 else 3225 res_blocks++; 3226 } 3227 3228 if (res_blocks >= ddb_allocation || res_lines > 31) 3229 return false; 3230 3231 *out_blocks = res_blocks; 3232 *out_lines = res_lines; 3233 3234 return true; 3235 } 3236 3237 static void skl_compute_wm_level(const struct drm_i915_private *dev_priv, 3238 struct skl_ddb_allocation *ddb, 3239 struct skl_pipe_wm_parameters *p, 3240 enum pipe pipe, 3241 int level, 3242 int num_planes, 3243 struct skl_wm_level *result) 3244 { 3245 uint16_t ddb_blocks; 3246 int i; 3247 3248 for (i = 0; i < num_planes; i++) { 3249 ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][i]); 3250 3251 result->plane_en[i] = skl_compute_plane_wm(dev_priv, 3252 p, &p->plane[i], 3253 ddb_blocks, 3254 level, 3255 &result->plane_res_b[i], 3256 &result->plane_res_l[i]); 3257 } 3258 3259 ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][PLANE_CURSOR]); 3260 result->plane_en[PLANE_CURSOR] = skl_compute_plane_wm(dev_priv, p, 3261 &p->plane[PLANE_CURSOR], 3262 ddb_blocks, level, 3263 &result->plane_res_b[PLANE_CURSOR], 3264 &result->plane_res_l[PLANE_CURSOR]); 3265 } 3266 3267 static uint32_t 3268 skl_compute_linetime_wm(struct drm_crtc *crtc, struct skl_pipe_wm_parameters *p) 3269 { 3270 if (!to_intel_crtc(crtc)->active) 3271 return 0; 3272 3273 if (WARN_ON(p->pixel_rate == 0)) 3274 return 0; 3275 3276 return DIV_ROUND_UP(8 * p->pipe_htotal * 1000, p->pixel_rate); 3277 } 3278 3279 static void skl_compute_transition_wm(struct drm_crtc *crtc, 3280 struct skl_pipe_wm_parameters *params, 3281 struct skl_wm_level *trans_wm /* out */) 3282 { 3283 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 3284 int i; 3285 3286 if (!params->active) 3287 return; 3288 3289 /* Until we know more, just disable transition WMs */ 3290 for (i = 0; i < intel_num_planes(intel_crtc); i++) 3291 trans_wm->plane_en[i] = false; 3292 trans_wm->plane_en[PLANE_CURSOR] = false; 3293 } 3294 3295 static void skl_compute_pipe_wm(struct drm_crtc *crtc, 3296 struct skl_ddb_allocation *ddb, 3297 struct skl_pipe_wm_parameters *params, 3298 struct skl_pipe_wm *pipe_wm) 3299 { 3300 struct drm_device *dev = crtc->dev; 3301 const struct drm_i915_private *dev_priv = dev->dev_private; 3302 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 3303 int level, max_level = ilk_wm_max_level(dev); 3304 3305 for (level = 0; level <= max_level; level++) { 3306 skl_compute_wm_level(dev_priv, ddb, params, intel_crtc->pipe, 3307 level, intel_num_planes(intel_crtc), 3308 &pipe_wm->wm[level]); 3309 } 3310 pipe_wm->linetime = skl_compute_linetime_wm(crtc, params); 3311 3312 skl_compute_transition_wm(crtc, params, &pipe_wm->trans_wm); 3313 } 3314 3315 static void skl_compute_wm_results(struct drm_device *dev, 3316 struct skl_pipe_wm_parameters *p, 3317 struct skl_pipe_wm *p_wm, 3318 struct skl_wm_values *r, 3319 struct intel_crtc *intel_crtc) 3320 { 3321 int level, max_level = ilk_wm_max_level(dev); 3322 enum pipe pipe = intel_crtc->pipe; 3323 uint32_t temp; 3324 int i; 3325 3326 for (level = 0; level <= max_level; level++) { 3327 for (i = 0; i < intel_num_planes(intel_crtc); i++) { 3328 temp = 0; 3329 3330 temp |= p_wm->wm[level].plane_res_l[i] << 3331 PLANE_WM_LINES_SHIFT; 3332 temp |= p_wm->wm[level].plane_res_b[i]; 3333 if (p_wm->wm[level].plane_en[i]) 3334 temp |= PLANE_WM_EN; 3335 3336 r->plane[pipe][i][level] = temp; 3337 } 3338 3339 temp = 0; 3340 3341 temp |= p_wm->wm[level].plane_res_l[PLANE_CURSOR] << PLANE_WM_LINES_SHIFT; 3342 temp |= p_wm->wm[level].plane_res_b[PLANE_CURSOR]; 3343 3344 if (p_wm->wm[level].plane_en[PLANE_CURSOR]) 3345 temp |= PLANE_WM_EN; 3346 3347 r->plane[pipe][PLANE_CURSOR][level] = temp; 3348 3349 } 3350 3351 /* transition WMs */ 3352 for (i = 0; i < intel_num_planes(intel_crtc); i++) { 3353 temp = 0; 3354 temp |= p_wm->trans_wm.plane_res_l[i] << PLANE_WM_LINES_SHIFT; 3355 temp |= p_wm->trans_wm.plane_res_b[i]; 3356 if (p_wm->trans_wm.plane_en[i]) 3357 temp |= PLANE_WM_EN; 3358 3359 r->plane_trans[pipe][i] = temp; 3360 } 3361 3362 temp = 0; 3363 temp |= p_wm->trans_wm.plane_res_l[PLANE_CURSOR] << PLANE_WM_LINES_SHIFT; 3364 temp |= p_wm->trans_wm.plane_res_b[PLANE_CURSOR]; 3365 if (p_wm->trans_wm.plane_en[PLANE_CURSOR]) 3366 temp |= PLANE_WM_EN; 3367 3368 r->plane_trans[pipe][PLANE_CURSOR] = temp; 3369 3370 r->wm_linetime[pipe] = p_wm->linetime; 3371 } 3372 3373 static void skl_ddb_entry_write(struct drm_i915_private *dev_priv, uint32_t reg, 3374 const struct skl_ddb_entry *entry) 3375 { 3376 if (entry->end) 3377 I915_WRITE(reg, (entry->end - 1) << 16 | entry->start); 3378 else 3379 I915_WRITE(reg, 0); 3380 } 3381 3382 static void skl_write_wm_values(struct drm_i915_private *dev_priv, 3383 const struct skl_wm_values *new) 3384 { 3385 struct drm_device *dev = dev_priv->dev; 3386 struct intel_crtc *crtc; 3387 3388 list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) { 3389 int i, level, max_level = ilk_wm_max_level(dev); 3390 enum pipe pipe = crtc->pipe; 3391 3392 if (!new->dirty[pipe]) 3393 continue; 3394 3395 I915_WRITE(PIPE_WM_LINETIME(pipe), new->wm_linetime[pipe]); 3396 3397 for (level = 0; level <= max_level; level++) { 3398 for (i = 0; i < intel_num_planes(crtc); i++) 3399 I915_WRITE(PLANE_WM(pipe, i, level), 3400 new->plane[pipe][i][level]); 3401 I915_WRITE(CUR_WM(pipe, level), 3402 new->plane[pipe][PLANE_CURSOR][level]); 3403 } 3404 for (i = 0; i < intel_num_planes(crtc); i++) 3405 I915_WRITE(PLANE_WM_TRANS(pipe, i), 3406 new->plane_trans[pipe][i]); 3407 I915_WRITE(CUR_WM_TRANS(pipe), 3408 new->plane_trans[pipe][PLANE_CURSOR]); 3409 3410 for (i = 0; i < intel_num_planes(crtc); i++) { 3411 skl_ddb_entry_write(dev_priv, 3412 PLANE_BUF_CFG(pipe, i), 3413 &new->ddb.plane[pipe][i]); 3414 skl_ddb_entry_write(dev_priv, 3415 PLANE_NV12_BUF_CFG(pipe, i), 3416 &new->ddb.y_plane[pipe][i]); 3417 } 3418 3419 skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe), 3420 &new->ddb.plane[pipe][PLANE_CURSOR]); 3421 } 3422 } 3423 3424 /* 3425 * When setting up a new DDB allocation arrangement, we need to correctly 3426 * sequence the times at which the new allocations for the pipes are taken into 3427 * account or we'll have pipes fetching from space previously allocated to 3428 * another pipe. 3429 * 3430 * Roughly the sequence looks like: 3431 * 1. re-allocate the pipe(s) with the allocation being reduced and not 3432 * overlapping with a previous light-up pipe (another way to put it is: 3433 * pipes with their new allocation strickly included into their old ones). 3434 * 2. re-allocate the other pipes that get their allocation reduced 3435 * 3. allocate the pipes having their allocation increased 3436 * 3437 * Steps 1. and 2. are here to take care of the following case: 3438 * - Initially DDB looks like this: 3439 * | B | C | 3440 * - enable pipe A. 3441 * - pipe B has a reduced DDB allocation that overlaps with the old pipe C 3442 * allocation 3443 * | A | B | C | 3444 * 3445 * We need to sequence the re-allocation: C, B, A (and not B, C, A). 3446 */ 3447 3448 static void 3449 skl_wm_flush_pipe(struct drm_i915_private *dev_priv, enum pipe pipe, int pass) 3450 { 3451 int plane; 3452 3453 DRM_DEBUG_KMS("flush pipe %c (pass %d)\n", pipe_name(pipe), pass); 3454 3455 for_each_plane(dev_priv, pipe, plane) { 3456 I915_WRITE(PLANE_SURF(pipe, plane), 3457 I915_READ(PLANE_SURF(pipe, plane))); 3458 } 3459 I915_WRITE(CURBASE(pipe), I915_READ(CURBASE(pipe))); 3460 } 3461 3462 static bool 3463 skl_ddb_allocation_included(const struct skl_ddb_allocation *old, 3464 const struct skl_ddb_allocation *new, 3465 enum pipe pipe) 3466 { 3467 uint16_t old_size, new_size; 3468 3469 old_size = skl_ddb_entry_size(&old->pipe[pipe]); 3470 new_size = skl_ddb_entry_size(&new->pipe[pipe]); 3471 3472 return old_size != new_size && 3473 new->pipe[pipe].start >= old->pipe[pipe].start && 3474 new->pipe[pipe].end <= old->pipe[pipe].end; 3475 } 3476 3477 static void skl_flush_wm_values(struct drm_i915_private *dev_priv, 3478 struct skl_wm_values *new_values) 3479 { 3480 struct drm_device *dev = dev_priv->dev; 3481 struct skl_ddb_allocation *cur_ddb, *new_ddb; 3482 bool reallocated[I915_MAX_PIPES] = {}; 3483 struct intel_crtc *crtc; 3484 enum pipe pipe; 3485 3486 new_ddb = &new_values->ddb; 3487 cur_ddb = &dev_priv->wm.skl_hw.ddb; 3488 3489 /* 3490 * First pass: flush the pipes with the new allocation contained into 3491 * the old space. 3492 * 3493 * We'll wait for the vblank on those pipes to ensure we can safely 3494 * re-allocate the freed space without this pipe fetching from it. 3495 */ 3496 for_each_intel_crtc(dev, crtc) { 3497 if (!crtc->active) 3498 continue; 3499 3500 pipe = crtc->pipe; 3501 3502 if (!skl_ddb_allocation_included(cur_ddb, new_ddb, pipe)) 3503 continue; 3504 3505 skl_wm_flush_pipe(dev_priv, pipe, 1); 3506 intel_wait_for_vblank(dev, pipe); 3507 3508 reallocated[pipe] = true; 3509 } 3510 3511 3512 /* 3513 * Second pass: flush the pipes that are having their allocation 3514 * reduced, but overlapping with a previous allocation. 3515 * 3516 * Here as well we need to wait for the vblank to make sure the freed 3517 * space is not used anymore. 3518 */ 3519 for_each_intel_crtc(dev, crtc) { 3520 if (!crtc->active) 3521 continue; 3522 3523 pipe = crtc->pipe; 3524 3525 if (reallocated[pipe]) 3526 continue; 3527 3528 if (skl_ddb_entry_size(&new_ddb->pipe[pipe]) < 3529 skl_ddb_entry_size(&cur_ddb->pipe[pipe])) { 3530 skl_wm_flush_pipe(dev_priv, pipe, 2); 3531 intel_wait_for_vblank(dev, pipe); 3532 reallocated[pipe] = true; 3533 } 3534 } 3535 3536 /* 3537 * Third pass: flush the pipes that got more space allocated. 3538 * 3539 * We don't need to actively wait for the update here, next vblank 3540 * will just get more DDB space with the correct WM values. 3541 */ 3542 for_each_intel_crtc(dev, crtc) { 3543 if (!crtc->active) 3544 continue; 3545 3546 pipe = crtc->pipe; 3547 3548 /* 3549 * At this point, only the pipes more space than before are 3550 * left to re-allocate. 3551 */ 3552 if (reallocated[pipe]) 3553 continue; 3554 3555 skl_wm_flush_pipe(dev_priv, pipe, 3); 3556 } 3557 } 3558 3559 static bool skl_update_pipe_wm(struct drm_crtc *crtc, 3560 struct skl_pipe_wm_parameters *params, 3561 struct intel_wm_config *config, 3562 struct skl_ddb_allocation *ddb, /* out */ 3563 struct skl_pipe_wm *pipe_wm /* out */) 3564 { 3565 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 3566 3567 skl_compute_wm_pipe_parameters(crtc, params); 3568 skl_allocate_pipe_ddb(crtc, config, params, ddb); 3569 skl_compute_pipe_wm(crtc, ddb, params, pipe_wm); 3570 3571 if (!memcmp(&intel_crtc->wm.skl_active, pipe_wm, sizeof(*pipe_wm))) 3572 return false; 3573 3574 intel_crtc->wm.skl_active = *pipe_wm; 3575 3576 return true; 3577 } 3578 3579 static void skl_update_other_pipe_wm(struct drm_device *dev, 3580 struct drm_crtc *crtc, 3581 struct intel_wm_config *config, 3582 struct skl_wm_values *r) 3583 { 3584 struct intel_crtc *intel_crtc; 3585 struct intel_crtc *this_crtc = to_intel_crtc(crtc); 3586 3587 /* 3588 * If the WM update hasn't changed the allocation for this_crtc (the 3589 * crtc we are currently computing the new WM values for), other 3590 * enabled crtcs will keep the same allocation and we don't need to 3591 * recompute anything for them. 3592 */ 3593 if (!skl_ddb_allocation_changed(&r->ddb, this_crtc)) 3594 return; 3595 3596 /* 3597 * Otherwise, because of this_crtc being freshly enabled/disabled, the 3598 * other active pipes need new DDB allocation and WM values. 3599 */ 3600 list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list, 3601 base.head) { 3602 struct skl_pipe_wm_parameters params = {}; 3603 struct skl_pipe_wm pipe_wm = {}; 3604 bool wm_changed; 3605 3606 if (this_crtc->pipe == intel_crtc->pipe) 3607 continue; 3608 3609 if (!intel_crtc->active) 3610 continue; 3611 3612 wm_changed = skl_update_pipe_wm(&intel_crtc->base, 3613 ¶ms, config, 3614 &r->ddb, &pipe_wm); 3615 3616 /* 3617 * If we end up re-computing the other pipe WM values, it's 3618 * because it was really needed, so we expect the WM values to 3619 * be different. 3620 */ 3621 WARN_ON(!wm_changed); 3622 3623 skl_compute_wm_results(dev, ¶ms, &pipe_wm, r, intel_crtc); 3624 r->dirty[intel_crtc->pipe] = true; 3625 } 3626 } 3627 3628 static void skl_clear_wm(struct skl_wm_values *watermarks, enum pipe pipe) 3629 { 3630 watermarks->wm_linetime[pipe] = 0; 3631 memset(watermarks->plane[pipe], 0, 3632 sizeof(uint32_t) * 8 * I915_MAX_PLANES); 3633 memset(watermarks->plane_trans[pipe], 3634 0, sizeof(uint32_t) * I915_MAX_PLANES); 3635 watermarks->plane_trans[pipe][PLANE_CURSOR] = 0; 3636 3637 /* Clear ddb entries for pipe */ 3638 memset(&watermarks->ddb.pipe[pipe], 0, sizeof(struct skl_ddb_entry)); 3639 memset(&watermarks->ddb.plane[pipe], 0, 3640 sizeof(struct skl_ddb_entry) * I915_MAX_PLANES); 3641 memset(&watermarks->ddb.y_plane[pipe], 0, 3642 sizeof(struct skl_ddb_entry) * I915_MAX_PLANES); 3643 memset(&watermarks->ddb.plane[pipe][PLANE_CURSOR], 0, 3644 sizeof(struct skl_ddb_entry)); 3645 3646 } 3647 3648 static void skl_update_wm(struct drm_crtc *crtc) 3649 { 3650 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 3651 struct drm_device *dev = crtc->dev; 3652 struct drm_i915_private *dev_priv = dev->dev_private; 3653 struct skl_pipe_wm_parameters params = {}; 3654 struct skl_wm_values *results = &dev_priv->wm.skl_results; 3655 struct skl_pipe_wm pipe_wm = {}; 3656 struct intel_wm_config config = {}; 3657 3658 3659 /* Clear all dirty flags */ 3660 memset(results->dirty, 0, sizeof(bool) * I915_MAX_PIPES); 3661 3662 skl_clear_wm(results, intel_crtc->pipe); 3663 3664 skl_compute_wm_global_parameters(dev, &config); 3665 3666 if (!skl_update_pipe_wm(crtc, ¶ms, &config, 3667 &results->ddb, &pipe_wm)) 3668 return; 3669 3670 skl_compute_wm_results(dev, ¶ms, &pipe_wm, results, intel_crtc); 3671 results->dirty[intel_crtc->pipe] = true; 3672 3673 skl_update_other_pipe_wm(dev, crtc, &config, results); 3674 skl_write_wm_values(dev_priv, results); 3675 skl_flush_wm_values(dev_priv, results); 3676 3677 /* store the new configuration */ 3678 dev_priv->wm.skl_hw = *results; 3679 } 3680 3681 static void 3682 skl_update_sprite_wm(struct drm_plane *plane, struct drm_crtc *crtc, 3683 uint32_t sprite_width, uint32_t sprite_height, 3684 int pixel_size, bool enabled, bool scaled) 3685 { 3686 struct intel_plane *intel_plane = to_intel_plane(plane); 3687 struct drm_framebuffer *fb = plane->state->fb; 3688 3689 intel_plane->wm.enabled = enabled; 3690 intel_plane->wm.scaled = scaled; 3691 intel_plane->wm.horiz_pixels = sprite_width; 3692 intel_plane->wm.vert_pixels = sprite_height; 3693 intel_plane->wm.tiling = DRM_FORMAT_MOD_NONE; 3694 3695 /* For planar: Bpp is for UV plane, y_Bpp is for Y plane */ 3696 intel_plane->wm.bytes_per_pixel = 3697 (fb && fb->pixel_format == DRM_FORMAT_NV12) ? 3698 drm_format_plane_cpp(plane->state->fb->pixel_format, 1) : pixel_size; 3699 intel_plane->wm.y_bytes_per_pixel = 3700 (fb && fb->pixel_format == DRM_FORMAT_NV12) ? 3701 drm_format_plane_cpp(plane->state->fb->pixel_format, 0) : 0; 3702 3703 /* 3704 * Framebuffer can be NULL on plane disable, but it does not 3705 * matter for watermarks if we assume no tiling in that case. 3706 */ 3707 if (fb) 3708 intel_plane->wm.tiling = fb->modifier[0]; 3709 intel_plane->wm.rotation = plane->state->rotation; 3710 3711 skl_update_wm(crtc); 3712 } 3713 3714 static void ilk_update_wm(struct drm_crtc *crtc) 3715 { 3716 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 3717 struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state); 3718 struct drm_device *dev = crtc->dev; 3719 struct drm_i915_private *dev_priv = dev->dev_private; 3720 struct ilk_wm_maximums max; 3721 struct ilk_wm_values results = {}; 3722 enum intel_ddb_partitioning partitioning; 3723 struct intel_pipe_wm pipe_wm = {}; 3724 struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm; 3725 struct intel_wm_config config = {}; 3726 3727 WARN_ON(cstate->base.active != intel_crtc->active); 3728 3729 intel_compute_pipe_wm(cstate, &pipe_wm); 3730 3731 if (!memcmp(&intel_crtc->wm.active, &pipe_wm, sizeof(pipe_wm))) 3732 return; 3733 3734 intel_crtc->wm.active = pipe_wm; 3735 3736 ilk_compute_wm_config(dev, &config); 3737 3738 ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_1_2, &max); 3739 ilk_wm_merge(dev, &config, &max, &lp_wm_1_2); 3740 3741 /* 5/6 split only in single pipe config on IVB+ */ 3742 if (INTEL_INFO(dev)->gen >= 7 && 3743 config.num_pipes_active == 1 && config.sprites_enabled) { 3744 ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_5_6, &max); 3745 ilk_wm_merge(dev, &config, &max, &lp_wm_5_6); 3746 3747 best_lp_wm = ilk_find_best_result(dev, &lp_wm_1_2, &lp_wm_5_6); 3748 } else { 3749 best_lp_wm = &lp_wm_1_2; 3750 } 3751 3752 partitioning = (best_lp_wm == &lp_wm_1_2) ? 3753 INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6; 3754 3755 ilk_compute_wm_results(dev, best_lp_wm, partitioning, &results); 3756 3757 ilk_write_wm_values(dev_priv, &results); 3758 } 3759 3760 static void 3761 ilk_update_sprite_wm(struct drm_plane *plane, 3762 struct drm_crtc *crtc, 3763 uint32_t sprite_width, uint32_t sprite_height, 3764 int pixel_size, bool enabled, bool scaled) 3765 { 3766 struct drm_device *dev = plane->dev; 3767 struct intel_plane *intel_plane = to_intel_plane(plane); 3768 3769 /* 3770 * IVB workaround: must disable low power watermarks for at least 3771 * one frame before enabling scaling. LP watermarks can be re-enabled 3772 * when scaling is disabled. 3773 * 3774 * WaCxSRDisabledForSpriteScaling:ivb 3775 */ 3776 if (IS_IVYBRIDGE(dev) && scaled && ilk_disable_lp_wm(dev)) 3777 intel_wait_for_vblank(dev, intel_plane->pipe); 3778 3779 ilk_update_wm(crtc); 3780 } 3781 3782 static void skl_pipe_wm_active_state(uint32_t val, 3783 struct skl_pipe_wm *active, 3784 bool is_transwm, 3785 bool is_cursor, 3786 int i, 3787 int level) 3788 { 3789 bool is_enabled = (val & PLANE_WM_EN) != 0; 3790 3791 if (!is_transwm) { 3792 if (!is_cursor) { 3793 active->wm[level].plane_en[i] = is_enabled; 3794 active->wm[level].plane_res_b[i] = 3795 val & PLANE_WM_BLOCKS_MASK; 3796 active->wm[level].plane_res_l[i] = 3797 (val >> PLANE_WM_LINES_SHIFT) & 3798 PLANE_WM_LINES_MASK; 3799 } else { 3800 active->wm[level].plane_en[PLANE_CURSOR] = is_enabled; 3801 active->wm[level].plane_res_b[PLANE_CURSOR] = 3802 val & PLANE_WM_BLOCKS_MASK; 3803 active->wm[level].plane_res_l[PLANE_CURSOR] = 3804 (val >> PLANE_WM_LINES_SHIFT) & 3805 PLANE_WM_LINES_MASK; 3806 } 3807 } else { 3808 if (!is_cursor) { 3809 active->trans_wm.plane_en[i] = is_enabled; 3810 active->trans_wm.plane_res_b[i] = 3811 val & PLANE_WM_BLOCKS_MASK; 3812 active->trans_wm.plane_res_l[i] = 3813 (val >> PLANE_WM_LINES_SHIFT) & 3814 PLANE_WM_LINES_MASK; 3815 } else { 3816 active->trans_wm.plane_en[PLANE_CURSOR] = is_enabled; 3817 active->trans_wm.plane_res_b[PLANE_CURSOR] = 3818 val & PLANE_WM_BLOCKS_MASK; 3819 active->trans_wm.plane_res_l[PLANE_CURSOR] = 3820 (val >> PLANE_WM_LINES_SHIFT) & 3821 PLANE_WM_LINES_MASK; 3822 } 3823 } 3824 } 3825 3826 static void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc) 3827 { 3828 struct drm_device *dev = crtc->dev; 3829 struct drm_i915_private *dev_priv = dev->dev_private; 3830 struct skl_wm_values *hw = &dev_priv->wm.skl_hw; 3831 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 3832 struct skl_pipe_wm *active = &intel_crtc->wm.skl_active; 3833 enum pipe pipe = intel_crtc->pipe; 3834 int level, i, max_level; 3835 uint32_t temp; 3836 3837 max_level = ilk_wm_max_level(dev); 3838 3839 hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe)); 3840 3841 for (level = 0; level <= max_level; level++) { 3842 for (i = 0; i < intel_num_planes(intel_crtc); i++) 3843 hw->plane[pipe][i][level] = 3844 I915_READ(PLANE_WM(pipe, i, level)); 3845 hw->plane[pipe][PLANE_CURSOR][level] = I915_READ(CUR_WM(pipe, level)); 3846 } 3847 3848 for (i = 0; i < intel_num_planes(intel_crtc); i++) 3849 hw->plane_trans[pipe][i] = I915_READ(PLANE_WM_TRANS(pipe, i)); 3850 hw->plane_trans[pipe][PLANE_CURSOR] = I915_READ(CUR_WM_TRANS(pipe)); 3851 3852 if (!intel_crtc->active) 3853 return; 3854 3855 hw->dirty[pipe] = true; 3856 3857 active->linetime = hw->wm_linetime[pipe]; 3858 3859 for (level = 0; level <= max_level; level++) { 3860 for (i = 0; i < intel_num_planes(intel_crtc); i++) { 3861 temp = hw->plane[pipe][i][level]; 3862 skl_pipe_wm_active_state(temp, active, false, 3863 false, i, level); 3864 } 3865 temp = hw->plane[pipe][PLANE_CURSOR][level]; 3866 skl_pipe_wm_active_state(temp, active, false, true, i, level); 3867 } 3868 3869 for (i = 0; i < intel_num_planes(intel_crtc); i++) { 3870 temp = hw->plane_trans[pipe][i]; 3871 skl_pipe_wm_active_state(temp, active, true, false, i, 0); 3872 } 3873 3874 temp = hw->plane_trans[pipe][PLANE_CURSOR]; 3875 skl_pipe_wm_active_state(temp, active, true, true, i, 0); 3876 } 3877 3878 void skl_wm_get_hw_state(struct drm_device *dev) 3879 { 3880 struct drm_i915_private *dev_priv = dev->dev_private; 3881 struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb; 3882 struct drm_crtc *crtc; 3883 3884 skl_ddb_get_hw_state(dev_priv, ddb); 3885 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) 3886 skl_pipe_wm_get_hw_state(crtc); 3887 } 3888 3889 static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc) 3890 { 3891 struct drm_device *dev = crtc->dev; 3892 struct drm_i915_private *dev_priv = dev->dev_private; 3893 struct ilk_wm_values *hw = &dev_priv->wm.hw; 3894 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 3895 struct intel_pipe_wm *active = &intel_crtc->wm.active; 3896 enum pipe pipe = intel_crtc->pipe; 3897 static const unsigned int wm0_pipe_reg[] = { 3898 [PIPE_A] = WM0_PIPEA_ILK, 3899 [PIPE_B] = WM0_PIPEB_ILK, 3900 [PIPE_C] = WM0_PIPEC_IVB, 3901 }; 3902 3903 hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]); 3904 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) 3905 hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe)); 3906 3907 memset(active, 0, sizeof(*active)); 3908 3909 active->pipe_enabled = intel_crtc->active; 3910 3911 if (active->pipe_enabled) { 3912 u32 tmp = hw->wm_pipe[pipe]; 3913 3914 /* 3915 * For active pipes LP0 watermark is marked as 3916 * enabled, and LP1+ watermaks as disabled since 3917 * we can't really reverse compute them in case 3918 * multiple pipes are active. 3919 */ 3920 active->wm[0].enable = true; 3921 active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT; 3922 active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT; 3923 active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK; 3924 active->linetime = hw->wm_linetime[pipe]; 3925 } else { 3926 int level, max_level = ilk_wm_max_level(dev); 3927 3928 /* 3929 * For inactive pipes, all watermark levels 3930 * should be marked as enabled but zeroed, 3931 * which is what we'd compute them to. 3932 */ 3933 for (level = 0; level <= max_level; level++) 3934 active->wm[level].enable = true; 3935 } 3936 } 3937 3938 #define _FW_WM(value, plane) \ 3939 (((value) & DSPFW_ ## plane ## _MASK) >> DSPFW_ ## plane ## _SHIFT) 3940 #define _FW_WM_VLV(value, plane) \ 3941 (((value) & DSPFW_ ## plane ## _MASK_VLV) >> DSPFW_ ## plane ## _SHIFT) 3942 3943 static void vlv_read_wm_values(struct drm_i915_private *dev_priv, 3944 struct vlv_wm_values *wm) 3945 { 3946 enum pipe pipe; 3947 uint32_t tmp; 3948 3949 for_each_pipe(dev_priv, pipe) { 3950 tmp = I915_READ(VLV_DDL(pipe)); 3951 3952 wm->ddl[pipe].primary = 3953 (tmp >> DDL_PLANE_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK); 3954 wm->ddl[pipe].cursor = 3955 (tmp >> DDL_CURSOR_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK); 3956 wm->ddl[pipe].sprite[0] = 3957 (tmp >> DDL_SPRITE_SHIFT(0)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK); 3958 wm->ddl[pipe].sprite[1] = 3959 (tmp >> DDL_SPRITE_SHIFT(1)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK); 3960 } 3961 3962 tmp = I915_READ(DSPFW1); 3963 wm->sr.plane = _FW_WM(tmp, SR); 3964 wm->pipe[PIPE_B].cursor = _FW_WM(tmp, CURSORB); 3965 wm->pipe[PIPE_B].primary = _FW_WM_VLV(tmp, PLANEB); 3966 wm->pipe[PIPE_A].primary = _FW_WM_VLV(tmp, PLANEA); 3967 3968 tmp = I915_READ(DSPFW2); 3969 wm->pipe[PIPE_A].sprite[1] = _FW_WM_VLV(tmp, SPRITEB); 3970 wm->pipe[PIPE_A].cursor = _FW_WM(tmp, CURSORA); 3971 wm->pipe[PIPE_A].sprite[0] = _FW_WM_VLV(tmp, SPRITEA); 3972 3973 tmp = I915_READ(DSPFW3); 3974 wm->sr.cursor = _FW_WM(tmp, CURSOR_SR); 3975 3976 if (IS_CHERRYVIEW(dev_priv)) { 3977 tmp = I915_READ(DSPFW7_CHV); 3978 wm->pipe[PIPE_B].sprite[1] = _FW_WM_VLV(tmp, SPRITED); 3979 wm->pipe[PIPE_B].sprite[0] = _FW_WM_VLV(tmp, SPRITEC); 3980 3981 tmp = I915_READ(DSPFW8_CHV); 3982 wm->pipe[PIPE_C].sprite[1] = _FW_WM_VLV(tmp, SPRITEF); 3983 wm->pipe[PIPE_C].sprite[0] = _FW_WM_VLV(tmp, SPRITEE); 3984 3985 tmp = I915_READ(DSPFW9_CHV); 3986 wm->pipe[PIPE_C].primary = _FW_WM_VLV(tmp, PLANEC); 3987 wm->pipe[PIPE_C].cursor = _FW_WM(tmp, CURSORC); 3988 3989 tmp = I915_READ(DSPHOWM); 3990 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9; 3991 wm->pipe[PIPE_C].sprite[1] |= _FW_WM(tmp, SPRITEF_HI) << 8; 3992 wm->pipe[PIPE_C].sprite[0] |= _FW_WM(tmp, SPRITEE_HI) << 8; 3993 wm->pipe[PIPE_C].primary |= _FW_WM(tmp, PLANEC_HI) << 8; 3994 wm->pipe[PIPE_B].sprite[1] |= _FW_WM(tmp, SPRITED_HI) << 8; 3995 wm->pipe[PIPE_B].sprite[0] |= _FW_WM(tmp, SPRITEC_HI) << 8; 3996 wm->pipe[PIPE_B].primary |= _FW_WM(tmp, PLANEB_HI) << 8; 3997 wm->pipe[PIPE_A].sprite[1] |= _FW_WM(tmp, SPRITEB_HI) << 8; 3998 wm->pipe[PIPE_A].sprite[0] |= _FW_WM(tmp, SPRITEA_HI) << 8; 3999 wm->pipe[PIPE_A].primary |= _FW_WM(tmp, PLANEA_HI) << 8; 4000 } else { 4001 tmp = I915_READ(DSPFW7); 4002 wm->pipe[PIPE_B].sprite[1] = _FW_WM_VLV(tmp, SPRITED); 4003 wm->pipe[PIPE_B].sprite[0] = _FW_WM_VLV(tmp, SPRITEC); 4004 4005 tmp = I915_READ(DSPHOWM); 4006 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9; 4007 wm->pipe[PIPE_B].sprite[1] |= _FW_WM(tmp, SPRITED_HI) << 8; 4008 wm->pipe[PIPE_B].sprite[0] |= _FW_WM(tmp, SPRITEC_HI) << 8; 4009 wm->pipe[PIPE_B].primary |= _FW_WM(tmp, PLANEB_HI) << 8; 4010 wm->pipe[PIPE_A].sprite[1] |= _FW_WM(tmp, SPRITEB_HI) << 8; 4011 wm->pipe[PIPE_A].sprite[0] |= _FW_WM(tmp, SPRITEA_HI) << 8; 4012 wm->pipe[PIPE_A].primary |= _FW_WM(tmp, PLANEA_HI) << 8; 4013 } 4014 } 4015 4016 #undef _FW_WM 4017 #undef _FW_WM_VLV 4018 4019 void vlv_wm_get_hw_state(struct drm_device *dev) 4020 { 4021 struct drm_i915_private *dev_priv = to_i915(dev); 4022 struct vlv_wm_values *wm = &dev_priv->wm.vlv; 4023 struct intel_plane *plane; 4024 enum pipe pipe; 4025 u32 val; 4026 4027 vlv_read_wm_values(dev_priv, wm); 4028 4029 for_each_intel_plane(dev, plane) { 4030 switch (plane->base.type) { 4031 int sprite; 4032 case DRM_PLANE_TYPE_CURSOR: 4033 plane->wm.fifo_size = 63; 4034 break; 4035 case DRM_PLANE_TYPE_PRIMARY: 4036 plane->wm.fifo_size = vlv_get_fifo_size(dev, plane->pipe, 0); 4037 break; 4038 case DRM_PLANE_TYPE_OVERLAY: 4039 sprite = plane->plane; 4040 plane->wm.fifo_size = vlv_get_fifo_size(dev, plane->pipe, sprite + 1); 4041 break; 4042 } 4043 } 4044 4045 wm->cxsr = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN; 4046 wm->level = VLV_WM_LEVEL_PM2; 4047 4048 if (IS_CHERRYVIEW(dev_priv)) { 4049 mutex_lock(&dev_priv->rps.hw_lock); 4050 4051 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); 4052 if (val & DSP_MAXFIFO_PM5_ENABLE) 4053 wm->level = VLV_WM_LEVEL_PM5; 4054 4055 /* 4056 * If DDR DVFS is disabled in the BIOS, Punit 4057 * will never ack the request. So if that happens 4058 * assume we don't have to enable/disable DDR DVFS 4059 * dynamically. To test that just set the REQ_ACK 4060 * bit to poke the Punit, but don't change the 4061 * HIGH/LOW bits so that we don't actually change 4062 * the current state. 4063 */ 4064 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2); 4065 val |= FORCE_DDR_FREQ_REQ_ACK; 4066 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val); 4067 4068 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) & 4069 FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) { 4070 DRM_DEBUG_KMS("Punit not acking DDR DVFS request, " 4071 "assuming DDR DVFS is disabled\n"); 4072 dev_priv->wm.max_level = VLV_WM_LEVEL_PM5; 4073 } else { 4074 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2); 4075 if ((val & FORCE_DDR_HIGH_FREQ) == 0) 4076 wm->level = VLV_WM_LEVEL_DDR_DVFS; 4077 } 4078 4079 mutex_unlock(&dev_priv->rps.hw_lock); 4080 } 4081 4082 for_each_pipe(dev_priv, pipe) 4083 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n", 4084 pipe_name(pipe), wm->pipe[pipe].primary, wm->pipe[pipe].cursor, 4085 wm->pipe[pipe].sprite[0], wm->pipe[pipe].sprite[1]); 4086 4087 DRM_DEBUG_KMS("Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n", 4088 wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr); 4089 } 4090 4091 void ilk_wm_get_hw_state(struct drm_device *dev) 4092 { 4093 struct drm_i915_private *dev_priv = dev->dev_private; 4094 struct ilk_wm_values *hw = &dev_priv->wm.hw; 4095 struct drm_crtc *crtc; 4096 4097 for_each_crtc(dev, crtc) 4098 ilk_pipe_wm_get_hw_state(crtc); 4099 4100 hw->wm_lp[0] = I915_READ(WM1_LP_ILK); 4101 hw->wm_lp[1] = I915_READ(WM2_LP_ILK); 4102 hw->wm_lp[2] = I915_READ(WM3_LP_ILK); 4103 4104 hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK); 4105 if (INTEL_INFO(dev)->gen >= 7) { 4106 hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB); 4107 hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB); 4108 } 4109 4110 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) 4111 hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ? 4112 INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2; 4113 else if (IS_IVYBRIDGE(dev)) 4114 hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ? 4115 INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2; 4116 4117 hw->enable_fbc_wm = 4118 !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS); 4119 } 4120 4121 /** 4122 * intel_update_watermarks - update FIFO watermark values based on current modes 4123 * 4124 * Calculate watermark values for the various WM regs based on current mode 4125 * and plane configuration. 4126 * 4127 * There are several cases to deal with here: 4128 * - normal (i.e. non-self-refresh) 4129 * - self-refresh (SR) mode 4130 * - lines are large relative to FIFO size (buffer can hold up to 2) 4131 * - lines are small relative to FIFO size (buffer can hold more than 2 4132 * lines), so need to account for TLB latency 4133 * 4134 * The normal calculation is: 4135 * watermark = dotclock * bytes per pixel * latency 4136 * where latency is platform & configuration dependent (we assume pessimal 4137 * values here). 4138 * 4139 * The SR calculation is: 4140 * watermark = (trunc(latency/line time)+1) * surface width * 4141 * bytes per pixel 4142 * where 4143 * line time = htotal / dotclock 4144 * surface width = hdisplay for normal plane and 64 for cursor 4145 * and latency is assumed to be high, as above. 4146 * 4147 * The final value programmed to the register should always be rounded up, 4148 * and include an extra 2 entries to account for clock crossings. 4149 * 4150 * We don't use the sprite, so we can ignore that. And on Crestline we have 4151 * to set the non-SR watermarks to 8. 4152 */ 4153 void intel_update_watermarks(struct drm_crtc *crtc) 4154 { 4155 struct drm_i915_private *dev_priv = crtc->dev->dev_private; 4156 4157 if (dev_priv->display.update_wm) 4158 dev_priv->display.update_wm(crtc); 4159 } 4160 4161 void intel_update_sprite_watermarks(struct drm_plane *plane, 4162 struct drm_crtc *crtc, 4163 uint32_t sprite_width, 4164 uint32_t sprite_height, 4165 int pixel_size, 4166 bool enabled, bool scaled) 4167 { 4168 struct drm_i915_private *dev_priv = plane->dev->dev_private; 4169 4170 if (dev_priv->display.update_sprite_wm) 4171 dev_priv->display.update_sprite_wm(plane, crtc, 4172 sprite_width, sprite_height, 4173 pixel_size, enabled, scaled); 4174 } 4175 4176 /** 4177 * Lock protecting IPS related data structures 4178 */ 4179 #ifdef __NetBSD__ 4180 spinlock_t mchdev_lock; 4181 #else 4182 DEFINE_SPINLOCK(mchdev_lock); 4183 #endif 4184 4185 /* Global for IPS driver to get at the current i915 device. Protected by 4186 * mchdev_lock. */ 4187 static struct drm_i915_private *i915_mch_dev; 4188 4189 bool ironlake_set_drps(struct drm_device *dev, u8 val) 4190 { 4191 struct drm_i915_private *dev_priv = dev->dev_private; 4192 u16 rgvswctl; 4193 4194 assert_spin_locked(&mchdev_lock); 4195 4196 rgvswctl = I915_READ16(MEMSWCTL); 4197 if (rgvswctl & MEMCTL_CMD_STS) { 4198 DRM_DEBUG("gpu busy, RCS change rejected\n"); 4199 return false; /* still busy with another command */ 4200 } 4201 4202 rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) | 4203 (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM; 4204 I915_WRITE16(MEMSWCTL, rgvswctl); 4205 POSTING_READ16(MEMSWCTL); 4206 4207 rgvswctl |= MEMCTL_CMD_STS; 4208 I915_WRITE16(MEMSWCTL, rgvswctl); 4209 4210 return true; 4211 } 4212 4213 static void ironlake_enable_drps(struct drm_device *dev) 4214 { 4215 struct drm_i915_private *dev_priv = dev->dev_private; 4216 u32 rgvmodectl = I915_READ(MEMMODECTL); 4217 u8 fmax, fmin, fstart, vstart; 4218 4219 spin_lock_irq(&mchdev_lock); 4220 4221 /* Enable temp reporting */ 4222 I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN); 4223 I915_WRITE16(TSC1, I915_READ(TSC1) | TSE); 4224 4225 /* 100ms RC evaluation intervals */ 4226 I915_WRITE(RCUPEI, 100000); 4227 I915_WRITE(RCDNEI, 100000); 4228 4229 /* Set max/min thresholds to 90ms and 80ms respectively */ 4230 I915_WRITE(RCBMAXAVG, 90000); 4231 I915_WRITE(RCBMINAVG, 80000); 4232 4233 I915_WRITE(MEMIHYST, 1); 4234 4235 /* Set up min, max, and cur for interrupt handling */ 4236 fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT; 4237 fmin = (rgvmodectl & MEMMODE_FMIN_MASK); 4238 fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> 4239 MEMMODE_FSTART_SHIFT; 4240 4241 vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >> 4242 PXVFREQ_PX_SHIFT; 4243 4244 dev_priv->ips.fmax = fmax; /* IPS callback will increase this */ 4245 dev_priv->ips.fstart = fstart; 4246 4247 dev_priv->ips.max_delay = fstart; 4248 dev_priv->ips.min_delay = fmin; 4249 dev_priv->ips.cur_delay = fstart; 4250 4251 DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n", 4252 fmax, fmin, fstart); 4253 4254 I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN); 4255 4256 /* 4257 * Interrupts will be enabled in ironlake_irq_postinstall 4258 */ 4259 4260 I915_WRITE(VIDSTART, vstart); 4261 POSTING_READ(VIDSTART); 4262 4263 rgvmodectl |= MEMMODE_SWMODE_EN; 4264 I915_WRITE(MEMMODECTL, rgvmodectl); 4265 4266 if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10)) 4267 DRM_ERROR("stuck trying to change perf mode\n"); 4268 mdelay(1); 4269 4270 ironlake_set_drps(dev, fstart); 4271 4272 dev_priv->ips.last_count1 = I915_READ(DMIEC) + 4273 I915_READ(DDREC) + I915_READ(CSIEC); 4274 dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies); 4275 dev_priv->ips.last_count2 = I915_READ(GFXEC); 4276 dev_priv->ips.last_time2 = ktime_get_raw_ns(); 4277 4278 spin_unlock_irq(&mchdev_lock); 4279 } 4280 4281 static void ironlake_disable_drps(struct drm_device *dev) 4282 { 4283 struct drm_i915_private *dev_priv = dev->dev_private; 4284 u16 rgvswctl; 4285 4286 spin_lock_irq(&mchdev_lock); 4287 4288 rgvswctl = I915_READ16(MEMSWCTL); 4289 4290 /* Ack interrupts, disable EFC interrupt */ 4291 I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN); 4292 I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG); 4293 I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT); 4294 I915_WRITE(DEIIR, DE_PCU_EVENT); 4295 I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT); 4296 4297 /* Go back to the starting frequency */ 4298 ironlake_set_drps(dev, dev_priv->ips.fstart); 4299 mdelay(1); 4300 rgvswctl |= MEMCTL_CMD_STS; 4301 I915_WRITE(MEMSWCTL, rgvswctl); 4302 mdelay(1); 4303 4304 spin_unlock_irq(&mchdev_lock); 4305 } 4306 4307 /* There's a funny hw issue where the hw returns all 0 when reading from 4308 * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value 4309 * ourselves, instead of doing a rmw cycle (which might result in us clearing 4310 * all limits and the gpu stuck at whatever frequency it is at atm). 4311 */ 4312 static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val) 4313 { 4314 u32 limits; 4315 4316 /* Only set the down limit when we've reached the lowest level to avoid 4317 * getting more interrupts, otherwise leave this clear. This prevents a 4318 * race in the hw when coming out of rc6: There's a tiny window where 4319 * the hw runs at the minimal clock before selecting the desired 4320 * frequency, if the down threshold expires in that window we will not 4321 * receive a down interrupt. */ 4322 if (IS_GEN9(dev_priv->dev)) { 4323 limits = (dev_priv->rps.max_freq_softlimit) << 23; 4324 if (val <= dev_priv->rps.min_freq_softlimit) 4325 limits |= (dev_priv->rps.min_freq_softlimit) << 14; 4326 } else { 4327 limits = dev_priv->rps.max_freq_softlimit << 24; 4328 if (val <= dev_priv->rps.min_freq_softlimit) 4329 limits |= dev_priv->rps.min_freq_softlimit << 16; 4330 } 4331 4332 return limits; 4333 } 4334 4335 static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) 4336 { 4337 int new_power; 4338 u32 threshold_up = 0, threshold_down = 0; /* in % */ 4339 u32 ei_up = 0, ei_down = 0; 4340 4341 new_power = dev_priv->rps.power; 4342 switch (dev_priv->rps.power) { 4343 case LOW_POWER: 4344 if (val > dev_priv->rps.efficient_freq + 1 && val > dev_priv->rps.cur_freq) 4345 new_power = BETWEEN; 4346 break; 4347 4348 case BETWEEN: 4349 if (val <= dev_priv->rps.efficient_freq && val < dev_priv->rps.cur_freq) 4350 new_power = LOW_POWER; 4351 else if (val >= dev_priv->rps.rp0_freq && val > dev_priv->rps.cur_freq) 4352 new_power = HIGH_POWER; 4353 break; 4354 4355 case HIGH_POWER: 4356 if (val < (dev_priv->rps.rp1_freq + dev_priv->rps.rp0_freq) >> 1 && val < dev_priv->rps.cur_freq) 4357 new_power = BETWEEN; 4358 break; 4359 } 4360 /* Max/min bins are special */ 4361 if (val <= dev_priv->rps.min_freq_softlimit) 4362 new_power = LOW_POWER; 4363 if (val >= dev_priv->rps.max_freq_softlimit) 4364 new_power = HIGH_POWER; 4365 if (new_power == dev_priv->rps.power) 4366 return; 4367 4368 /* Note the units here are not exactly 1us, but 1280ns. */ 4369 switch (new_power) { 4370 case LOW_POWER: 4371 /* Upclock if more than 95% busy over 16ms */ 4372 ei_up = 16000; 4373 threshold_up = 95; 4374 4375 /* Downclock if less than 85% busy over 32ms */ 4376 ei_down = 32000; 4377 threshold_down = 85; 4378 break; 4379 4380 case BETWEEN: 4381 /* Upclock if more than 90% busy over 13ms */ 4382 ei_up = 13000; 4383 threshold_up = 90; 4384 4385 /* Downclock if less than 75% busy over 32ms */ 4386 ei_down = 32000; 4387 threshold_down = 75; 4388 break; 4389 4390 case HIGH_POWER: 4391 /* Upclock if more than 85% busy over 10ms */ 4392 ei_up = 10000; 4393 threshold_up = 85; 4394 4395 /* Downclock if less than 60% busy over 32ms */ 4396 ei_down = 32000; 4397 threshold_down = 60; 4398 break; 4399 } 4400 4401 /* When byt can survive without system hang with dynamic 4402 * sw freq adjustments, this restriction can be lifted. 4403 */ 4404 if (IS_VALLEYVIEW(dev_priv)) 4405 goto skip_hw_write; 4406 4407 I915_WRITE(GEN6_RP_UP_EI, 4408 GT_INTERVAL_FROM_US(dev_priv, ei_up)); 4409 I915_WRITE(GEN6_RP_UP_THRESHOLD, 4410 GT_INTERVAL_FROM_US(dev_priv, (ei_up * threshold_up / 100))); 4411 4412 I915_WRITE(GEN6_RP_DOWN_EI, 4413 GT_INTERVAL_FROM_US(dev_priv, ei_down)); 4414 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 4415 GT_INTERVAL_FROM_US(dev_priv, (ei_down * threshold_down / 100))); 4416 4417 I915_WRITE(GEN6_RP_CONTROL, 4418 GEN6_RP_MEDIA_TURBO | 4419 GEN6_RP_MEDIA_HW_NORMAL_MODE | 4420 GEN6_RP_MEDIA_IS_GFX | 4421 GEN6_RP_ENABLE | 4422 GEN6_RP_UP_BUSY_AVG | 4423 GEN6_RP_DOWN_IDLE_AVG); 4424 4425 skip_hw_write: 4426 dev_priv->rps.power = new_power; 4427 dev_priv->rps.up_threshold = threshold_up; 4428 dev_priv->rps.down_threshold = threshold_down; 4429 dev_priv->rps.last_adj = 0; 4430 } 4431 4432 static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) 4433 { 4434 u32 mask = 0; 4435 4436 /* We use UP_EI_EXPIRED interrupts for both up/down in manual mode */ 4437 if (val > dev_priv->rps.min_freq_softlimit) 4438 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT; 4439 if (val < dev_priv->rps.max_freq_softlimit) 4440 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; 4441 4442 mask &= dev_priv->pm_rps_events; 4443 4444 return gen6_sanitize_rps_pm_mask(dev_priv, ~mask); 4445 } 4446 4447 /* gen6_set_rps is called to update the frequency request, but should also be 4448 * called when the range (min_delay and max_delay) is modified so that we can 4449 * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */ 4450 static void gen6_set_rps(struct drm_device *dev, u8 val) 4451 { 4452 struct drm_i915_private *dev_priv = dev->dev_private; 4453 4454 /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */ 4455 if (IS_BROXTON(dev) && (INTEL_REVID(dev) < BXT_REVID_B0)) 4456 return; 4457 4458 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 4459 WARN_ON(val > dev_priv->rps.max_freq); 4460 WARN_ON(val < dev_priv->rps.min_freq); 4461 4462 /* min/max delay may still have been modified so be sure to 4463 * write the limits value. 4464 */ 4465 if (val != dev_priv->rps.cur_freq) { 4466 gen6_set_rps_thresholds(dev_priv, val); 4467 4468 if (IS_GEN9(dev)) 4469 I915_WRITE(GEN6_RPNSWREQ, 4470 GEN9_FREQUENCY(val)); 4471 else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) 4472 I915_WRITE(GEN6_RPNSWREQ, 4473 HSW_FREQUENCY(val)); 4474 else 4475 I915_WRITE(GEN6_RPNSWREQ, 4476 GEN6_FREQUENCY(val) | 4477 GEN6_OFFSET(0) | 4478 GEN6_AGGRESSIVE_TURBO); 4479 } 4480 4481 /* Make sure we continue to get interrupts 4482 * until we hit the minimum or maximum frequencies. 4483 */ 4484 I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val)); 4485 I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); 4486 4487 POSTING_READ(GEN6_RPNSWREQ); 4488 4489 dev_priv->rps.cur_freq = val; 4490 trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); 4491 } 4492 4493 static void valleyview_set_rps(struct drm_device *dev, u8 val) 4494 { 4495 struct drm_i915_private *dev_priv = dev->dev_private; 4496 4497 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 4498 WARN_ON(val > dev_priv->rps.max_freq); 4499 WARN_ON(val < dev_priv->rps.min_freq); 4500 4501 if (WARN_ONCE(IS_CHERRYVIEW(dev) && (val & 1), 4502 "Odd GPU freq value\n")) 4503 val &= ~1; 4504 4505 I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); 4506 4507 if (val != dev_priv->rps.cur_freq) { 4508 vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); 4509 if (!IS_CHERRYVIEW(dev_priv)) 4510 gen6_set_rps_thresholds(dev_priv, val); 4511 } 4512 4513 dev_priv->rps.cur_freq = val; 4514 trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); 4515 } 4516 4517 /* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down 4518 * 4519 * * If Gfx is Idle, then 4520 * 1. Forcewake Media well. 4521 * 2. Request idle freq. 4522 * 3. Release Forcewake of Media well. 4523 */ 4524 static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) 4525 { 4526 u32 val = dev_priv->rps.idle_freq; 4527 4528 if (dev_priv->rps.cur_freq <= val) 4529 return; 4530 4531 /* Wake up the media well, as that takes a lot less 4532 * power than the Render well. */ 4533 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA); 4534 valleyview_set_rps(dev_priv->dev, val); 4535 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA); 4536 } 4537 4538 void gen6_rps_busy(struct drm_i915_private *dev_priv) 4539 { 4540 mutex_lock(&dev_priv->rps.hw_lock); 4541 if (dev_priv->rps.enabled) { 4542 if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED) 4543 gen6_rps_reset_ei(dev_priv); 4544 I915_WRITE(GEN6_PMINTRMSK, 4545 gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq)); 4546 } 4547 mutex_unlock(&dev_priv->rps.hw_lock); 4548 } 4549 4550 void gen6_rps_idle(struct drm_i915_private *dev_priv) 4551 { 4552 struct drm_device *dev = dev_priv->dev; 4553 4554 mutex_lock(&dev_priv->rps.hw_lock); 4555 if (dev_priv->rps.enabled) { 4556 if (IS_VALLEYVIEW(dev)) 4557 vlv_set_rps_idle(dev_priv); 4558 else 4559 gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq); 4560 dev_priv->rps.last_adj = 0; 4561 I915_WRITE(GEN6_PMINTRMSK, 4562 gen6_sanitize_rps_pm_mask(dev_priv, ~0)); 4563 } 4564 mutex_unlock(&dev_priv->rps.hw_lock); 4565 4566 spin_lock(&dev_priv->rps.client_lock); 4567 while (!list_empty(&dev_priv->rps.clients)) 4568 list_del_init(dev_priv->rps.clients.next); 4569 spin_unlock(&dev_priv->rps.client_lock); 4570 } 4571 4572 void gen6_rps_boost(struct drm_i915_private *dev_priv, 4573 struct intel_rps_client *rps, 4574 unsigned long submitted) 4575 { 4576 /* This is intentionally racy! We peek at the state here, then 4577 * validate inside the RPS worker. 4578 */ 4579 if (!(dev_priv->mm.busy && 4580 dev_priv->rps.enabled && 4581 dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)) 4582 return; 4583 4584 /* Force a RPS boost (and don't count it against the client) if 4585 * the GPU is severely congested. 4586 */ 4587 if (rps && time_after(jiffies, submitted + DRM_I915_THROTTLE_JIFFIES)) 4588 rps = NULL; 4589 4590 spin_lock(&dev_priv->rps.client_lock); 4591 if (rps == NULL || list_empty(&rps->link)) { 4592 spin_lock_irq(&dev_priv->irq_lock); 4593 if (dev_priv->rps.interrupts_enabled) { 4594 dev_priv->rps.client_boost = true; 4595 queue_work(dev_priv->wq, &dev_priv->rps.work); 4596 } 4597 spin_unlock_irq(&dev_priv->irq_lock); 4598 4599 if (rps != NULL) { 4600 list_add(&rps->link, &dev_priv->rps.clients); 4601 rps->boosts++; 4602 } else 4603 dev_priv->rps.boosts++; 4604 } 4605 spin_unlock(&dev_priv->rps.client_lock); 4606 } 4607 4608 void intel_set_rps(struct drm_device *dev, u8 val) 4609 { 4610 if (IS_VALLEYVIEW(dev)) 4611 valleyview_set_rps(dev, val); 4612 else 4613 gen6_set_rps(dev, val); 4614 } 4615 4616 static void gen9_disable_rc6(struct drm_device *dev) 4617 { 4618 struct drm_i915_private *dev_priv = dev->dev_private; 4619 4620 I915_WRITE(GEN6_RC_CONTROL, 0); 4621 } 4622 4623 static void gen9_disable_rps(struct drm_device *dev) 4624 { 4625 struct drm_i915_private *dev_priv = dev->dev_private; 4626 4627 I915_WRITE(GEN9_PG_ENABLE, 0); 4628 } 4629 4630 static void gen6_disable_rc6(struct drm_device *dev) 4631 { 4632 struct drm_i915_private *dev_priv = dev->dev_private; 4633 4634 I915_WRITE(GEN6_RC_CONTROL, 0); 4635 } 4636 4637 static void gen6_disable_rps(struct drm_device *dev) 4638 { 4639 struct drm_i915_private *dev_priv = dev->dev_private; 4640 4641 I915_WRITE(GEN6_RPNSWREQ, 1UL << 31); 4642 } 4643 4644 static void cherryview_disable_rc6(struct drm_device *dev) 4645 { 4646 struct drm_i915_private *dev_priv = dev->dev_private; 4647 4648 I915_WRITE(GEN6_RC_CONTROL, 0); 4649 } 4650 4651 static void valleyview_disable_rc6(struct drm_device *dev) 4652 { 4653 struct drm_i915_private *dev_priv = dev->dev_private; 4654 4655 /* we're doing forcewake before Disabling RC6, 4656 * This what the BIOS expects when going into suspend */ 4657 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4658 4659 I915_WRITE(GEN6_RC_CONTROL, 0); 4660 4661 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 4662 } 4663 4664 static void intel_print_rc6_info(struct drm_device *dev, u32 mode) 4665 { 4666 if (IS_VALLEYVIEW(dev)) { 4667 if (mode & (GEN7_RC_CTL_TO_MODE | GEN6_RC_CTL_EI_MODE(1))) 4668 mode = GEN6_RC_CTL_RC6_ENABLE; 4669 else 4670 mode = 0; 4671 } 4672 if (HAS_RC6p(dev)) 4673 DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s RC6p %s RC6pp %s\n", 4674 (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off", 4675 (mode & GEN6_RC_CTL_RC6p_ENABLE) ? "on" : "off", 4676 (mode & GEN6_RC_CTL_RC6pp_ENABLE) ? "on" : "off"); 4677 4678 else 4679 DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s\n", 4680 (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off"); 4681 } 4682 4683 static int sanitize_rc6_option(const struct drm_device *dev, int enable_rc6) 4684 { 4685 /* No RC6 before Ironlake and code is gone for ilk. */ 4686 if (INTEL_INFO(dev)->gen < 6) 4687 return 0; 4688 4689 /* Respect the kernel parameter if it is set */ 4690 if (enable_rc6 >= 0) { 4691 int mask; 4692 4693 if (HAS_RC6p(dev)) 4694 mask = INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE | 4695 INTEL_RC6pp_ENABLE; 4696 else 4697 mask = INTEL_RC6_ENABLE; 4698 4699 if ((enable_rc6 & mask) != enable_rc6) 4700 DRM_DEBUG_KMS("Adjusting RC6 mask to %d (requested %d, valid %d)\n", 4701 enable_rc6 & mask, enable_rc6, mask); 4702 4703 return enable_rc6 & mask; 4704 } 4705 4706 if (IS_IVYBRIDGE(dev)) 4707 return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE); 4708 4709 return INTEL_RC6_ENABLE; 4710 } 4711 4712 int intel_enable_rc6(const struct drm_device *dev) 4713 { 4714 return i915.enable_rc6; 4715 } 4716 4717 static void gen6_init_rps_frequencies(struct drm_device *dev) 4718 { 4719 struct drm_i915_private *dev_priv = dev->dev_private; 4720 uint32_t rp_state_cap; 4721 u32 ddcc_status = 0; 4722 int ret; 4723 4724 /* All of these values are in units of 50MHz */ 4725 dev_priv->rps.cur_freq = 0; 4726 /* static values from HW: RP0 > RP1 > RPn (min_freq) */ 4727 if (IS_BROXTON(dev)) { 4728 rp_state_cap = I915_READ(BXT_RP_STATE_CAP); 4729 dev_priv->rps.rp0_freq = (rp_state_cap >> 16) & 0xff; 4730 dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; 4731 dev_priv->rps.min_freq = (rp_state_cap >> 0) & 0xff; 4732 } else { 4733 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); 4734 dev_priv->rps.rp0_freq = (rp_state_cap >> 0) & 0xff; 4735 dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; 4736 dev_priv->rps.min_freq = (rp_state_cap >> 16) & 0xff; 4737 } 4738 4739 /* hw_max = RP0 until we check for overclocking */ 4740 dev_priv->rps.max_freq = dev_priv->rps.rp0_freq; 4741 4742 dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq; 4743 if (IS_HASWELL(dev) || IS_BROADWELL(dev) || 4744 IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) { 4745 ret = sandybridge_pcode_read(dev_priv, 4746 HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, 4747 &ddcc_status); 4748 if (0 == ret) 4749 dev_priv->rps.efficient_freq = 4750 clamp_t(u8, 4751 ((ddcc_status >> 8) & 0xff), 4752 dev_priv->rps.min_freq, 4753 dev_priv->rps.max_freq); 4754 } 4755 4756 if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) { 4757 /* Store the frequency values in 16.66 MHZ units, which is 4758 the natural hardware unit for SKL */ 4759 dev_priv->rps.rp0_freq *= GEN9_FREQ_SCALER; 4760 dev_priv->rps.rp1_freq *= GEN9_FREQ_SCALER; 4761 dev_priv->rps.min_freq *= GEN9_FREQ_SCALER; 4762 dev_priv->rps.max_freq *= GEN9_FREQ_SCALER; 4763 dev_priv->rps.efficient_freq *= GEN9_FREQ_SCALER; 4764 } 4765 4766 dev_priv->rps.idle_freq = dev_priv->rps.min_freq; 4767 4768 /* Preserve min/max settings in case of re-init */ 4769 if (dev_priv->rps.max_freq_softlimit == 0) 4770 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; 4771 4772 if (dev_priv->rps.min_freq_softlimit == 0) { 4773 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) 4774 dev_priv->rps.min_freq_softlimit = 4775 max_t(int, dev_priv->rps.efficient_freq, 4776 intel_freq_opcode(dev_priv, 450)); 4777 else 4778 dev_priv->rps.min_freq_softlimit = 4779 dev_priv->rps.min_freq; 4780 } 4781 } 4782 4783 /* See the Gen9_GT_PM_Programming_Guide doc for the below */ 4784 static void gen9_enable_rps(struct drm_device *dev) 4785 { 4786 struct drm_i915_private *dev_priv = dev->dev_private; 4787 4788 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4789 4790 gen6_init_rps_frequencies(dev); 4791 4792 /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */ 4793 if (IS_BROXTON(dev) && (INTEL_REVID(dev) < BXT_REVID_B0)) { 4794 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 4795 return; 4796 } 4797 4798 /* Program defaults and thresholds for RPS*/ 4799 I915_WRITE(GEN6_RC_VIDEO_FREQ, 4800 GEN9_FREQUENCY(dev_priv->rps.rp1_freq)); 4801 4802 /* 1 second timeout*/ 4803 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 4804 GT_INTERVAL_FROM_US(dev_priv, 1000000)); 4805 4806 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa); 4807 4808 /* Leaning on the below call to gen6_set_rps to program/setup the 4809 * Up/Down EI & threshold registers, as well as the RP_CONTROL, 4810 * RP_INTERRUPT_LIMITS & RPNSWREQ registers */ 4811 dev_priv->rps.power = HIGH_POWER; /* force a reset */ 4812 gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit); 4813 4814 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 4815 } 4816 4817 static void gen9_enable_rc6(struct drm_device *dev) 4818 { 4819 struct drm_i915_private *dev_priv = dev->dev_private; 4820 struct intel_engine_cs *ring; 4821 uint32_t rc6_mask = 0; 4822 int unused; 4823 4824 /* 1a: Software RC state - RC0 */ 4825 I915_WRITE(GEN6_RC_STATE, 0); 4826 4827 /* 1b: Get forcewake during program sequence. Although the driver 4828 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ 4829 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4830 4831 /* 2a: Disable RC states. */ 4832 I915_WRITE(GEN6_RC_CONTROL, 0); 4833 4834 /* 2b: Program RC6 thresholds.*/ 4835 4836 /* WaRsDoubleRc6WrlWithCoarsePowerGating: Doubling WRL only when CPG is enabled */ 4837 if (IS_SKYLAKE(dev)) 4838 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16); 4839 else 4840 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16); 4841 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ 4842 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ 4843 for_each_ring(ring, dev_priv, unused) 4844 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10); 4845 4846 if (HAS_GUC_UCODE(dev)) 4847 I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA); 4848 4849 I915_WRITE(GEN6_RC_SLEEP, 0); 4850 4851 /* 2c: Program Coarse Power Gating Policies. */ 4852 I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 25); 4853 I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 25); 4854 4855 /* 3a: Enable RC6 */ 4856 if (!dev_priv->rps.ctx_corrupted && 4857 intel_enable_rc6(dev) & INTEL_RC6_ENABLE) 4858 rc6_mask = GEN6_RC_CTL_RC6_ENABLE; 4859 DRM_INFO("RC6 %s\n", (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ? 4860 "on" : "off"); 4861 /* WaRsUseTimeoutMode */ 4862 if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_D0) || 4863 (IS_BROXTON(dev) && INTEL_REVID(dev) <= BXT_REVID_A0)) { 4864 I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us */ 4865 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | 4866 GEN7_RC_CTL_TO_MODE | 4867 rc6_mask); 4868 } else { 4869 I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ 4870 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | 4871 GEN6_RC_CTL_EI_MODE(1) | 4872 rc6_mask); 4873 } 4874 4875 /* 4876 * 3b: Enable Coarse Power Gating only when RC6 is enabled. 4877 * WaRsDisableCoarsePowerGating:skl,bxt - Render/Media PG need to be disabled with RC6. 4878 */ 4879 if ((IS_BROXTON(dev) && (INTEL_REVID(dev) < BXT_REVID_B0)) || 4880 INTEL_INFO(dev)->gen == 9) 4881 I915_WRITE(GEN9_PG_ENABLE, 0); 4882 else 4883 I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ? 4884 (GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE) : 0); 4885 4886 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 4887 4888 } 4889 4890 static void gen8_enable_rps(struct drm_device *dev) 4891 { 4892 struct drm_i915_private *dev_priv = dev->dev_private; 4893 struct intel_engine_cs *ring; 4894 uint32_t rc6_mask = 0; 4895 int unused; 4896 4897 /* 1a: Software RC state - RC0 */ 4898 I915_WRITE(GEN6_RC_STATE, 0); 4899 4900 /* 1c & 1d: Get forcewake during program sequence. Although the driver 4901 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ 4902 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4903 4904 /* 2a: Disable RC states. */ 4905 I915_WRITE(GEN6_RC_CONTROL, 0); 4906 4907 /* Initialize rps frequencies */ 4908 gen6_init_rps_frequencies(dev); 4909 4910 /* 2b: Program RC6 thresholds.*/ 4911 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); 4912 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ 4913 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ 4914 for_each_ring(ring, dev_priv, unused) 4915 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10); 4916 I915_WRITE(GEN6_RC_SLEEP, 0); 4917 if (IS_BROADWELL(dev)) 4918 I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ 4919 else 4920 I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ 4921 4922 /* 3: Enable RC6 */ 4923 if (!dev_priv->rps.ctx_corrupted && 4924 intel_enable_rc6(dev) & INTEL_RC6_ENABLE) 4925 rc6_mask = GEN6_RC_CTL_RC6_ENABLE; 4926 intel_print_rc6_info(dev, rc6_mask); 4927 if (IS_BROADWELL(dev)) 4928 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | 4929 GEN7_RC_CTL_TO_MODE | 4930 rc6_mask); 4931 else 4932 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | 4933 GEN6_RC_CTL_EI_MODE(1) | 4934 rc6_mask); 4935 4936 /* 4 Program defaults and thresholds for RPS*/ 4937 I915_WRITE(GEN6_RPNSWREQ, 4938 HSW_FREQUENCY(dev_priv->rps.rp1_freq)); 4939 I915_WRITE(GEN6_RC_VIDEO_FREQ, 4940 HSW_FREQUENCY(dev_priv->rps.rp1_freq)); 4941 /* NB: Docs say 1s, and 1000000 - which aren't equivalent */ 4942 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */ 4943 4944 /* Docs recommend 900MHz, and 300 MHz respectively */ 4945 I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, 4946 dev_priv->rps.max_freq_softlimit << 24 | 4947 dev_priv->rps.min_freq_softlimit << 16); 4948 4949 I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */ 4950 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/ 4951 I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */ 4952 I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */ 4953 4954 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); 4955 4956 /* 5: Enable RPS */ 4957 I915_WRITE(GEN6_RP_CONTROL, 4958 GEN6_RP_MEDIA_TURBO | 4959 GEN6_RP_MEDIA_HW_NORMAL_MODE | 4960 GEN6_RP_MEDIA_IS_GFX | 4961 GEN6_RP_ENABLE | 4962 GEN6_RP_UP_BUSY_AVG | 4963 GEN6_RP_DOWN_IDLE_AVG); 4964 4965 /* 6: Ring frequency + overclocking (our driver does this later */ 4966 4967 dev_priv->rps.power = HIGH_POWER; /* force a reset */ 4968 gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq); 4969 4970 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 4971 } 4972 4973 static void gen6_enable_rps(struct drm_device *dev) 4974 { 4975 struct drm_i915_private *dev_priv = dev->dev_private; 4976 struct intel_engine_cs *ring; 4977 u32 rc6vids, pcu_mbox = 0, rc6_mask = 0; 4978 u32 gtfifodbg; 4979 int rc6_mode; 4980 int i, ret; 4981 4982 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 4983 4984 /* Here begins a magic sequence of register writes to enable 4985 * auto-downclocking. 4986 * 4987 * Perhaps there might be some value in exposing these to 4988 * userspace... 4989 */ 4990 I915_WRITE(GEN6_RC_STATE, 0); 4991 4992 /* Clear the DBG now so we don't confuse earlier errors */ 4993 if ((gtfifodbg = I915_READ(GTFIFODBG))) { 4994 DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg); 4995 I915_WRITE(GTFIFODBG, gtfifodbg); 4996 } 4997 4998 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4999 5000 /* Initialize rps frequencies */ 5001 gen6_init_rps_frequencies(dev); 5002 5003 /* disable the counters and set deterministic thresholds */ 5004 I915_WRITE(GEN6_RC_CONTROL, 0); 5005 5006 I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16); 5007 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30); 5008 I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30); 5009 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); 5010 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); 5011 5012 for_each_ring(ring, dev_priv, i) 5013 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10); 5014 5015 I915_WRITE(GEN6_RC_SLEEP, 0); 5016 I915_WRITE(GEN6_RC1e_THRESHOLD, 1000); 5017 if (IS_IVYBRIDGE(dev)) 5018 I915_WRITE(GEN6_RC6_THRESHOLD, 125000); 5019 else 5020 I915_WRITE(GEN6_RC6_THRESHOLD, 50000); 5021 I915_WRITE(GEN6_RC6p_THRESHOLD, 150000); 5022 I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */ 5023 5024 /* Check if we are enabling RC6 */ 5025 rc6_mode = intel_enable_rc6(dev_priv->dev); 5026 if (rc6_mode & INTEL_RC6_ENABLE) 5027 rc6_mask |= GEN6_RC_CTL_RC6_ENABLE; 5028 5029 /* We don't use those on Haswell */ 5030 if (!IS_HASWELL(dev)) { 5031 if (rc6_mode & INTEL_RC6p_ENABLE) 5032 rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE; 5033 5034 if (rc6_mode & INTEL_RC6pp_ENABLE) 5035 rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE; 5036 } 5037 5038 intel_print_rc6_info(dev, rc6_mask); 5039 5040 I915_WRITE(GEN6_RC_CONTROL, 5041 rc6_mask | 5042 GEN6_RC_CTL_EI_MODE(1) | 5043 GEN6_RC_CTL_HW_ENABLE); 5044 5045 /* Power down if completely idle for over 50ms */ 5046 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000); 5047 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); 5048 5049 ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 0); 5050 if (ret) 5051 DRM_DEBUG_DRIVER("Failed to set the min frequency\n"); 5052 5053 ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox); 5054 if (!ret && (pcu_mbox & __BIT(31))) { /* OC supported */ 5055 DRM_DEBUG_DRIVER("Overclocking supported. Max: %dMHz, Overclock max: %dMHz\n", 5056 (dev_priv->rps.max_freq_softlimit & 0xff) * 50, 5057 (pcu_mbox & 0xff) * 50); 5058 dev_priv->rps.max_freq = pcu_mbox & 0xff; 5059 } 5060 5061 dev_priv->rps.power = HIGH_POWER; /* force a reset */ 5062 gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq); 5063 5064 rc6vids = 0; 5065 ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); 5066 if (IS_GEN6(dev) && ret) { 5067 DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n"); 5068 } else if (IS_GEN6(dev) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) { 5069 DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n", 5070 GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450); 5071 rc6vids &= 0xffff00; 5072 rc6vids |= GEN6_ENCODE_RC6_VID(450); 5073 ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids); 5074 if (ret) 5075 DRM_ERROR("Couldn't fix incorrect rc6 voltage\n"); 5076 } 5077 5078 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5079 } 5080 5081 static void __gen6_update_ring_freq(struct drm_device *dev) 5082 { 5083 struct drm_i915_private *dev_priv = dev->dev_private; 5084 int min_freq = 15; 5085 unsigned int gpu_freq; 5086 unsigned int max_ia_freq, min_ring_freq; 5087 unsigned int max_gpu_freq, min_gpu_freq; 5088 int scaling_factor = 180; 5089 #ifndef __NetBSD__ 5090 struct cpufreq_policy *policy; 5091 #endif 5092 5093 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 5094 5095 #ifdef __NetBSD__ 5096 { 5097 extern uint64_t tsc_freq; /* x86 TSC frequency in Hz */ 5098 max_ia_freq = (tsc_freq / 1000); 5099 } 5100 #else 5101 policy = cpufreq_cpu_get(0); 5102 if (policy) { 5103 max_ia_freq = policy->cpuinfo.max_freq; 5104 cpufreq_cpu_put(policy); 5105 } else { 5106 /* 5107 * Default to measured freq if none found, PCU will ensure we 5108 * don't go over 5109 */ 5110 max_ia_freq = tsc_khz; 5111 } 5112 #endif 5113 5114 /* Convert from kHz to MHz */ 5115 max_ia_freq /= 1000; 5116 5117 min_ring_freq = I915_READ(DCLK) & 0xf; 5118 /* convert DDR frequency from units of 266.6MHz to bandwidth */ 5119 min_ring_freq = mult_frac(min_ring_freq, 8, 3); 5120 5121 if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) { 5122 /* Convert GT frequency to 50 HZ units */ 5123 min_gpu_freq = dev_priv->rps.min_freq / GEN9_FREQ_SCALER; 5124 max_gpu_freq = dev_priv->rps.max_freq / GEN9_FREQ_SCALER; 5125 } else { 5126 min_gpu_freq = dev_priv->rps.min_freq; 5127 max_gpu_freq = dev_priv->rps.max_freq; 5128 } 5129 5130 /* 5131 * For each potential GPU frequency, load a ring frequency we'd like 5132 * to use for memory access. We do this by specifying the IA frequency 5133 * the PCU should use as a reference to determine the ring frequency. 5134 */ 5135 for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) { 5136 int diff = max_gpu_freq - gpu_freq; 5137 unsigned int ia_freq = 0, ring_freq = 0; 5138 5139 if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) { 5140 /* 5141 * ring_freq = 2 * GT. ring_freq is in 100MHz units 5142 * No floor required for ring frequency on SKL. 5143 */ 5144 ring_freq = gpu_freq; 5145 } else if (INTEL_INFO(dev)->gen >= 8) { 5146 /* max(2 * GT, DDR). NB: GT is 50MHz units */ 5147 ring_freq = max(min_ring_freq, gpu_freq); 5148 } else if (IS_HASWELL(dev)) { 5149 ring_freq = mult_frac(gpu_freq, 5, 4); 5150 ring_freq = max(min_ring_freq, ring_freq); 5151 /* leave ia_freq as the default, chosen by cpufreq */ 5152 } else { 5153 /* On older processors, there is no separate ring 5154 * clock domain, so in order to boost the bandwidth 5155 * of the ring, we need to upclock the CPU (ia_freq). 5156 * 5157 * For GPU frequencies less than 750MHz, 5158 * just use the lowest ring freq. 5159 */ 5160 if (gpu_freq < min_freq) 5161 ia_freq = 800; 5162 else 5163 ia_freq = max_ia_freq - ((diff * scaling_factor) / 2); 5164 ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100); 5165 } 5166 5167 sandybridge_pcode_write(dev_priv, 5168 GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 5169 ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT | 5170 ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT | 5171 gpu_freq); 5172 } 5173 } 5174 5175 void gen6_update_ring_freq(struct drm_device *dev) 5176 { 5177 struct drm_i915_private *dev_priv = dev->dev_private; 5178 5179 if (!HAS_CORE_RING_FREQ(dev)) 5180 return; 5181 5182 mutex_lock(&dev_priv->rps.hw_lock); 5183 __gen6_update_ring_freq(dev); 5184 mutex_unlock(&dev_priv->rps.hw_lock); 5185 } 5186 5187 static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv) 5188 { 5189 struct drm_device *dev = dev_priv->dev; 5190 u32 val, rp0; 5191 5192 val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE); 5193 5194 switch (INTEL_INFO(dev)->eu_total) { 5195 case 8: 5196 /* (2 * 4) config */ 5197 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT); 5198 break; 5199 case 12: 5200 /* (2 * 6) config */ 5201 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT); 5202 break; 5203 case 16: 5204 /* (2 * 8) config */ 5205 default: 5206 /* Setting (2 * 8) Min RP0 for any other combination */ 5207 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT); 5208 break; 5209 } 5210 5211 rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK); 5212 5213 return rp0; 5214 } 5215 5216 static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv) 5217 { 5218 u32 val, rpe; 5219 5220 val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG); 5221 rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK; 5222 5223 return rpe; 5224 } 5225 5226 static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv) 5227 { 5228 u32 val, rp1; 5229 5230 val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE); 5231 rp1 = (val & FB_GFX_FREQ_FUSE_MASK); 5232 5233 return rp1; 5234 } 5235 5236 static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv) 5237 { 5238 u32 val, rp1; 5239 5240 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE); 5241 5242 rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT; 5243 5244 return rp1; 5245 } 5246 5247 static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv) 5248 { 5249 u32 val, rp0; 5250 5251 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE); 5252 5253 rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT; 5254 /* Clamp to max */ 5255 rp0 = min_t(u32, rp0, 0xea); 5256 5257 return rp0; 5258 } 5259 5260 static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv) 5261 { 5262 u32 val, rpe; 5263 5264 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO); 5265 rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT; 5266 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI); 5267 rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5; 5268 5269 return rpe; 5270 } 5271 5272 static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv) 5273 { 5274 return vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff; 5275 } 5276 5277 /* Check that the pctx buffer wasn't move under us. */ 5278 static void valleyview_check_pctx(struct drm_i915_private *dev_priv) 5279 { 5280 unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095; 5281 5282 WARN_ON(pctx_addr != dev_priv->mm.stolen_base + 5283 dev_priv->vlv_pctx->stolen->start); 5284 } 5285 5286 5287 /* Check that the pcbr address is not empty. */ 5288 static void cherryview_check_pctx(struct drm_i915_private *dev_priv) 5289 { 5290 unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095; 5291 5292 WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0); 5293 } 5294 5295 static void cherryview_setup_pctx(struct drm_device *dev) 5296 { 5297 struct drm_i915_private *dev_priv = dev->dev_private; 5298 unsigned long pctx_paddr, paddr; 5299 struct i915_gtt *gtt = &dev_priv->gtt; 5300 u32 pcbr; 5301 int pctx_size = 32*1024; 5302 5303 WARN_ON(!mutex_is_locked(&dev->struct_mutex)); 5304 5305 pcbr = I915_READ(VLV_PCBR); 5306 if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) { 5307 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); 5308 paddr = (dev_priv->mm.stolen_base + 5309 (gtt->stolen_size - pctx_size)); 5310 5311 pctx_paddr = (paddr & (~4095)); 5312 I915_WRITE(VLV_PCBR, pctx_paddr); 5313 } 5314 5315 DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR)); 5316 } 5317 5318 static void valleyview_setup_pctx(struct drm_device *dev) 5319 { 5320 struct drm_i915_private *dev_priv = dev->dev_private; 5321 struct drm_i915_gem_object *pctx; 5322 unsigned long pctx_paddr; 5323 u32 pcbr; 5324 int pctx_size = 24*1024; 5325 5326 WARN_ON(!mutex_is_locked(&dev->struct_mutex)); 5327 5328 pcbr = I915_READ(VLV_PCBR); 5329 if (pcbr) { 5330 /* BIOS set it up already, grab the pre-alloc'd space */ 5331 int pcbr_offset; 5332 5333 pcbr_offset = (pcbr & (~4095)) - dev_priv->mm.stolen_base; 5334 pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv->dev, 5335 pcbr_offset, 5336 I915_GTT_OFFSET_NONE, 5337 pctx_size); 5338 goto out; 5339 } 5340 5341 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); 5342 5343 /* 5344 * From the Gunit register HAS: 5345 * The Gfx driver is expected to program this register and ensure 5346 * proper allocation within Gfx stolen memory. For example, this 5347 * register should be programmed such than the PCBR range does not 5348 * overlap with other ranges, such as the frame buffer, protected 5349 * memory, or any other relevant ranges. 5350 */ 5351 pctx = i915_gem_object_create_stolen(dev, pctx_size); 5352 if (!pctx) { 5353 DRM_DEBUG("not enough stolen space for PCTX, disabling\n"); 5354 return; 5355 } 5356 5357 pctx_paddr = dev_priv->mm.stolen_base + pctx->stolen->start; 5358 I915_WRITE(VLV_PCBR, pctx_paddr); 5359 5360 out: 5361 DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR)); 5362 dev_priv->vlv_pctx = pctx; 5363 } 5364 5365 static void valleyview_cleanup_pctx(struct drm_device *dev) 5366 { 5367 struct drm_i915_private *dev_priv = dev->dev_private; 5368 5369 if (WARN_ON(!dev_priv->vlv_pctx)) 5370 return; 5371 5372 drm_gem_object_unreference(&dev_priv->vlv_pctx->base); 5373 dev_priv->vlv_pctx = NULL; 5374 } 5375 5376 static void valleyview_init_gt_powersave(struct drm_device *dev) 5377 { 5378 struct drm_i915_private *dev_priv = dev->dev_private; 5379 u32 val; 5380 5381 valleyview_setup_pctx(dev); 5382 5383 mutex_lock(&dev_priv->rps.hw_lock); 5384 5385 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); 5386 switch ((val >> 6) & 3) { 5387 case 0: 5388 case 1: 5389 dev_priv->mem_freq = 800; 5390 break; 5391 case 2: 5392 dev_priv->mem_freq = 1066; 5393 break; 5394 case 3: 5395 dev_priv->mem_freq = 1333; 5396 break; 5397 } 5398 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); 5399 5400 dev_priv->rps.max_freq = valleyview_rps_max_freq(dev_priv); 5401 dev_priv->rps.rp0_freq = dev_priv->rps.max_freq; 5402 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", 5403 intel_gpu_freq(dev_priv, dev_priv->rps.max_freq), 5404 dev_priv->rps.max_freq); 5405 5406 dev_priv->rps.efficient_freq = valleyview_rps_rpe_freq(dev_priv); 5407 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", 5408 intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), 5409 dev_priv->rps.efficient_freq); 5410 5411 dev_priv->rps.rp1_freq = valleyview_rps_guar_freq(dev_priv); 5412 DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n", 5413 intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), 5414 dev_priv->rps.rp1_freq); 5415 5416 dev_priv->rps.min_freq = valleyview_rps_min_freq(dev_priv); 5417 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", 5418 intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), 5419 dev_priv->rps.min_freq); 5420 5421 dev_priv->rps.idle_freq = dev_priv->rps.min_freq; 5422 5423 /* Preserve min/max settings in case of re-init */ 5424 if (dev_priv->rps.max_freq_softlimit == 0) 5425 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; 5426 5427 if (dev_priv->rps.min_freq_softlimit == 0) 5428 dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; 5429 5430 mutex_unlock(&dev_priv->rps.hw_lock); 5431 } 5432 5433 static void cherryview_init_gt_powersave(struct drm_device *dev) 5434 { 5435 struct drm_i915_private *dev_priv = dev->dev_private; 5436 u32 val; 5437 5438 cherryview_setup_pctx(dev); 5439 5440 mutex_lock(&dev_priv->rps.hw_lock); 5441 5442 mutex_lock(&dev_priv->sb_lock); 5443 val = vlv_cck_read(dev_priv, CCK_FUSE_REG); 5444 mutex_unlock(&dev_priv->sb_lock); 5445 5446 switch ((val >> 2) & 0x7) { 5447 case 3: 5448 dev_priv->mem_freq = 2000; 5449 break; 5450 default: 5451 dev_priv->mem_freq = 1600; 5452 break; 5453 } 5454 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); 5455 5456 dev_priv->rps.max_freq = cherryview_rps_max_freq(dev_priv); 5457 dev_priv->rps.rp0_freq = dev_priv->rps.max_freq; 5458 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", 5459 intel_gpu_freq(dev_priv, dev_priv->rps.max_freq), 5460 dev_priv->rps.max_freq); 5461 5462 dev_priv->rps.efficient_freq = cherryview_rps_rpe_freq(dev_priv); 5463 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", 5464 intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), 5465 dev_priv->rps.efficient_freq); 5466 5467 dev_priv->rps.rp1_freq = cherryview_rps_guar_freq(dev_priv); 5468 DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n", 5469 intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), 5470 dev_priv->rps.rp1_freq); 5471 5472 /* PUnit validated range is only [RPe, RP0] */ 5473 dev_priv->rps.min_freq = dev_priv->rps.efficient_freq; 5474 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", 5475 intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), 5476 dev_priv->rps.min_freq); 5477 5478 WARN_ONCE((dev_priv->rps.max_freq | 5479 dev_priv->rps.efficient_freq | 5480 dev_priv->rps.rp1_freq | 5481 dev_priv->rps.min_freq) & 1, 5482 "Odd GPU freq values\n"); 5483 5484 dev_priv->rps.idle_freq = dev_priv->rps.min_freq; 5485 5486 /* Preserve min/max settings in case of re-init */ 5487 if (dev_priv->rps.max_freq_softlimit == 0) 5488 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; 5489 5490 if (dev_priv->rps.min_freq_softlimit == 0) 5491 dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; 5492 5493 mutex_unlock(&dev_priv->rps.hw_lock); 5494 } 5495 5496 static void valleyview_cleanup_gt_powersave(struct drm_device *dev) 5497 { 5498 valleyview_cleanup_pctx(dev); 5499 } 5500 5501 static void cherryview_enable_rps(struct drm_device *dev) 5502 { 5503 struct drm_i915_private *dev_priv = dev->dev_private; 5504 struct intel_engine_cs *ring; 5505 u32 gtfifodbg, val, rc6_mode = 0, pcbr; 5506 int i; 5507 5508 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 5509 5510 gtfifodbg = I915_READ(GTFIFODBG); 5511 if (gtfifodbg) { 5512 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n", 5513 gtfifodbg); 5514 I915_WRITE(GTFIFODBG, gtfifodbg); 5515 } 5516 5517 cherryview_check_pctx(dev_priv); 5518 5519 /* 1a & 1b: Get forcewake during program sequence. Although the driver 5520 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ 5521 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5522 5523 /* Disable RC states. */ 5524 I915_WRITE(GEN6_RC_CONTROL, 0); 5525 5526 /* 2a: Program RC6 thresholds.*/ 5527 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); 5528 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ 5529 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ 5530 5531 for_each_ring(ring, dev_priv, i) 5532 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10); 5533 I915_WRITE(GEN6_RC_SLEEP, 0); 5534 5535 /* TO threshold set to 500 us ( 0x186 * 1.28 us) */ 5536 I915_WRITE(GEN6_RC6_THRESHOLD, 0x186); 5537 5538 /* allows RC6 residency counter to work */ 5539 I915_WRITE(VLV_COUNTER_CONTROL, 5540 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | 5541 VLV_MEDIA_RC6_COUNT_EN | 5542 VLV_RENDER_RC6_COUNT_EN)); 5543 5544 /* For now we assume BIOS is allocating and populating the PCBR */ 5545 pcbr = I915_READ(VLV_PCBR); 5546 5547 /* 3: Enable RC6 */ 5548 if ((intel_enable_rc6(dev) & INTEL_RC6_ENABLE) && 5549 (pcbr >> VLV_PCBR_ADDR_SHIFT)) 5550 rc6_mode = GEN7_RC_CTL_TO_MODE; 5551 5552 I915_WRITE(GEN6_RC_CONTROL, rc6_mode); 5553 5554 /* 4 Program defaults and thresholds for RPS*/ 5555 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); 5556 I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); 5557 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); 5558 I915_WRITE(GEN6_RP_UP_EI, 66000); 5559 I915_WRITE(GEN6_RP_DOWN_EI, 350000); 5560 5561 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); 5562 5563 /* 5: Enable RPS */ 5564 I915_WRITE(GEN6_RP_CONTROL, 5565 GEN6_RP_MEDIA_HW_NORMAL_MODE | 5566 GEN6_RP_MEDIA_IS_GFX | 5567 GEN6_RP_ENABLE | 5568 GEN6_RP_UP_BUSY_AVG | 5569 GEN6_RP_DOWN_IDLE_AVG); 5570 5571 /* Setting Fixed Bias */ 5572 val = VLV_OVERRIDE_EN | 5573 VLV_SOC_TDP_EN | 5574 CHV_BIAS_CPU_50_SOC_50; 5575 vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val); 5576 5577 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); 5578 5579 /* RPS code assumes GPLL is used */ 5580 WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); 5581 5582 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); 5583 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); 5584 5585 dev_priv->rps.cur_freq = (val >> 8) & 0xff; 5586 DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n", 5587 intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq), 5588 dev_priv->rps.cur_freq); 5589 5590 DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n", 5591 intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), 5592 dev_priv->rps.efficient_freq); 5593 5594 valleyview_set_rps(dev_priv->dev, dev_priv->rps.efficient_freq); 5595 5596 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5597 } 5598 5599 static void valleyview_enable_rps(struct drm_device *dev) 5600 { 5601 struct drm_i915_private *dev_priv = dev->dev_private; 5602 struct intel_engine_cs *ring; 5603 u32 gtfifodbg, val, rc6_mode = 0; 5604 int i; 5605 5606 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 5607 5608 valleyview_check_pctx(dev_priv); 5609 5610 if ((gtfifodbg = I915_READ(GTFIFODBG))) { 5611 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n", 5612 gtfifodbg); 5613 I915_WRITE(GTFIFODBG, gtfifodbg); 5614 } 5615 5616 /* If VLV, Forcewake all wells, else re-direct to regular path */ 5617 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5618 5619 /* Disable RC states. */ 5620 I915_WRITE(GEN6_RC_CONTROL, 0); 5621 5622 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); 5623 I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); 5624 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); 5625 I915_WRITE(GEN6_RP_UP_EI, 66000); 5626 I915_WRITE(GEN6_RP_DOWN_EI, 350000); 5627 5628 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); 5629 5630 I915_WRITE(GEN6_RP_CONTROL, 5631 GEN6_RP_MEDIA_TURBO | 5632 GEN6_RP_MEDIA_HW_NORMAL_MODE | 5633 GEN6_RP_MEDIA_IS_GFX | 5634 GEN6_RP_ENABLE | 5635 GEN6_RP_UP_BUSY_AVG | 5636 GEN6_RP_DOWN_IDLE_CONT); 5637 5638 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000); 5639 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); 5640 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); 5641 5642 for_each_ring(ring, dev_priv, i) 5643 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10); 5644 5645 I915_WRITE(GEN6_RC6_THRESHOLD, 0x557); 5646 5647 /* allows RC6 residency counter to work */ 5648 I915_WRITE(VLV_COUNTER_CONTROL, 5649 _MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN | 5650 VLV_RENDER_RC0_COUNT_EN | 5651 VLV_MEDIA_RC6_COUNT_EN | 5652 VLV_RENDER_RC6_COUNT_EN)); 5653 5654 if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE) 5655 rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL; 5656 5657 intel_print_rc6_info(dev, rc6_mode); 5658 5659 I915_WRITE(GEN6_RC_CONTROL, rc6_mode); 5660 5661 /* Setting Fixed Bias */ 5662 val = VLV_OVERRIDE_EN | 5663 VLV_SOC_TDP_EN | 5664 VLV_BIAS_CPU_125_SOC_875; 5665 vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val); 5666 5667 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); 5668 5669 /* RPS code assumes GPLL is used */ 5670 WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); 5671 5672 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); 5673 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); 5674 5675 dev_priv->rps.cur_freq = (val >> 8) & 0xff; 5676 DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n", 5677 intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq), 5678 dev_priv->rps.cur_freq); 5679 5680 DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n", 5681 intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), 5682 dev_priv->rps.efficient_freq); 5683 5684 valleyview_set_rps(dev_priv->dev, dev_priv->rps.efficient_freq); 5685 5686 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5687 } 5688 5689 static unsigned long intel_pxfreq(u32 vidfreq) 5690 { 5691 unsigned long freq; 5692 int div = (vidfreq & 0x3f0000) >> 16; 5693 int post = (vidfreq & 0x3000) >> 12; 5694 int pre = (vidfreq & 0x7); 5695 5696 if (!pre) 5697 return 0; 5698 5699 freq = ((div * 133333) / ((1<<post) * pre)); 5700 5701 return freq; 5702 } 5703 5704 static const struct cparams { 5705 u16 i; 5706 u16 t; 5707 u16 m; 5708 u16 c; 5709 } cparams[] = { 5710 { 1, 1333, 301, 28664 }, 5711 { 1, 1066, 294, 24460 }, 5712 { 1, 800, 294, 25192 }, 5713 { 0, 1333, 276, 27605 }, 5714 { 0, 1066, 276, 27605 }, 5715 { 0, 800, 231, 23784 }, 5716 }; 5717 5718 static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv) 5719 { 5720 u64 total_count, diff, ret; 5721 u32 count1, count2, count3, m = 0, c = 0; 5722 unsigned long now = jiffies_to_msecs(jiffies), diff1; 5723 int i; 5724 5725 assert_spin_locked(&mchdev_lock); 5726 5727 diff1 = now - dev_priv->ips.last_time1; 5728 5729 /* Prevent division-by-zero if we are asking too fast. 5730 * Also, we don't get interesting results if we are polling 5731 * faster than once in 10ms, so just return the saved value 5732 * in such cases. 5733 */ 5734 if (diff1 <= 10) 5735 return dev_priv->ips.chipset_power; 5736 5737 count1 = I915_READ(DMIEC); 5738 count2 = I915_READ(DDREC); 5739 count3 = I915_READ(CSIEC); 5740 5741 total_count = count1 + count2 + count3; 5742 5743 /* FIXME: handle per-counter overflow */ 5744 if (total_count < dev_priv->ips.last_count1) { 5745 diff = ~0UL - dev_priv->ips.last_count1; 5746 diff += total_count; 5747 } else { 5748 diff = total_count - dev_priv->ips.last_count1; 5749 } 5750 5751 for (i = 0; i < ARRAY_SIZE(cparams); i++) { 5752 if (cparams[i].i == dev_priv->ips.c_m && 5753 cparams[i].t == dev_priv->ips.r_t) { 5754 m = cparams[i].m; 5755 c = cparams[i].c; 5756 break; 5757 } 5758 } 5759 5760 diff = div_u64(diff, diff1); 5761 ret = ((m * diff) + c); 5762 ret = div_u64(ret, 10); 5763 5764 dev_priv->ips.last_count1 = total_count; 5765 dev_priv->ips.last_time1 = now; 5766 5767 dev_priv->ips.chipset_power = ret; 5768 5769 return ret; 5770 } 5771 5772 unsigned long i915_chipset_val(struct drm_i915_private *dev_priv) 5773 { 5774 struct drm_device *dev = dev_priv->dev; 5775 unsigned long val; 5776 5777 if (INTEL_INFO(dev)->gen != 5) 5778 return 0; 5779 5780 spin_lock_irq(&mchdev_lock); 5781 5782 val = __i915_chipset_val(dev_priv); 5783 5784 spin_unlock_irq(&mchdev_lock); 5785 5786 return val; 5787 } 5788 5789 unsigned long i915_mch_val(struct drm_i915_private *dev_priv) 5790 { 5791 unsigned long m, x, b; 5792 u32 tsfs; 5793 5794 tsfs = I915_READ(TSFS); 5795 5796 m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT); 5797 x = I915_READ8(TR1); 5798 5799 b = tsfs & TSFS_INTR_MASK; 5800 5801 return ((m * x) / 127) - b; 5802 } 5803 5804 static int _pxvid_to_vd(u8 pxvid) 5805 { 5806 if (pxvid == 0) 5807 return 0; 5808 5809 if (pxvid >= 8 && pxvid < 31) 5810 pxvid = 31; 5811 5812 return (pxvid + 2) * 125; 5813 } 5814 5815 static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid) 5816 { 5817 struct drm_device *dev = dev_priv->dev; 5818 const int vd = _pxvid_to_vd(pxvid); 5819 const int vm = vd - 1125; 5820 5821 if (INTEL_INFO(dev)->is_mobile) 5822 return vm > 0 ? vm : 0; 5823 5824 return vd; 5825 } 5826 5827 static void __i915_update_gfx_val(struct drm_i915_private *dev_priv) 5828 { 5829 u64 now, diff, diffms; 5830 u32 count; 5831 5832 assert_spin_locked(&mchdev_lock); 5833 5834 now = ktime_get_raw_ns(); 5835 diffms = now - dev_priv->ips.last_time2; 5836 do_div(diffms, NSEC_PER_MSEC); 5837 5838 /* Don't divide by 0 */ 5839 if (!diffms) 5840 return; 5841 5842 count = I915_READ(GFXEC); 5843 5844 if (count < dev_priv->ips.last_count2) { 5845 diff = ~0UL - dev_priv->ips.last_count2; 5846 diff += count; 5847 } else { 5848 diff = count - dev_priv->ips.last_count2; 5849 } 5850 5851 dev_priv->ips.last_count2 = count; 5852 dev_priv->ips.last_time2 = now; 5853 5854 /* More magic constants... */ 5855 diff = diff * 1181; 5856 diff = div_u64(diff, diffms * 10); 5857 dev_priv->ips.gfx_power = diff; 5858 } 5859 5860 void i915_update_gfx_val(struct drm_i915_private *dev_priv) 5861 { 5862 struct drm_device *dev = dev_priv->dev; 5863 5864 if (INTEL_INFO(dev)->gen != 5) 5865 return; 5866 5867 spin_lock_irq(&mchdev_lock); 5868 5869 __i915_update_gfx_val(dev_priv); 5870 5871 spin_unlock_irq(&mchdev_lock); 5872 } 5873 5874 static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv) 5875 { 5876 unsigned long t, corr, state1, corr2, state2; 5877 u32 pxvid, ext_v; 5878 5879 assert_spin_locked(&mchdev_lock); 5880 5881 pxvid = I915_READ(PXVFREQ(dev_priv->rps.cur_freq)); 5882 pxvid = (pxvid >> 24) & 0x7f; 5883 ext_v = pvid_to_extvid(dev_priv, pxvid); 5884 5885 state1 = ext_v; 5886 5887 t = i915_mch_val(dev_priv); 5888 5889 /* Revel in the empirically derived constants */ 5890 5891 /* Correction factor in 1/100000 units */ 5892 if (t > 80) 5893 corr = ((t * 2349) + 135940); 5894 else if (t >= 50) 5895 corr = ((t * 964) + 29317); 5896 else /* < 50 */ 5897 corr = ((t * 301) + 1004); 5898 5899 corr = corr * ((150142 * state1) / 10000 - 78642); 5900 corr /= 100000; 5901 corr2 = (corr * dev_priv->ips.corr); 5902 5903 state2 = (corr2 * state1) / 10000; 5904 state2 /= 100; /* convert to mW */ 5905 5906 __i915_update_gfx_val(dev_priv); 5907 5908 return dev_priv->ips.gfx_power + state2; 5909 } 5910 5911 unsigned long i915_gfx_val(struct drm_i915_private *dev_priv) 5912 { 5913 struct drm_device *dev = dev_priv->dev; 5914 unsigned long val; 5915 5916 if (INTEL_INFO(dev)->gen != 5) 5917 return 0; 5918 5919 spin_lock_irq(&mchdev_lock); 5920 5921 val = __i915_gfx_val(dev_priv); 5922 5923 spin_unlock_irq(&mchdev_lock); 5924 5925 return val; 5926 } 5927 5928 /** 5929 * i915_read_mch_val - return value for IPS use 5930 * 5931 * Calculate and return a value for the IPS driver to use when deciding whether 5932 * we have thermal and power headroom to increase CPU or GPU power budget. 5933 */ 5934 unsigned long i915_read_mch_val(void) 5935 { 5936 struct drm_i915_private *dev_priv; 5937 unsigned long chipset_val, graphics_val, ret = 0; 5938 5939 spin_lock_irq(&mchdev_lock); 5940 if (!i915_mch_dev) 5941 goto out_unlock; 5942 dev_priv = i915_mch_dev; 5943 5944 chipset_val = __i915_chipset_val(dev_priv); 5945 graphics_val = __i915_gfx_val(dev_priv); 5946 5947 ret = chipset_val + graphics_val; 5948 5949 out_unlock: 5950 spin_unlock_irq(&mchdev_lock); 5951 5952 return ret; 5953 } 5954 EXPORT_SYMBOL_GPL(i915_read_mch_val); 5955 5956 /** 5957 * i915_gpu_raise - raise GPU frequency limit 5958 * 5959 * Raise the limit; IPS indicates we have thermal headroom. 5960 */ 5961 bool i915_gpu_raise(void) 5962 { 5963 struct drm_i915_private *dev_priv; 5964 bool ret = true; 5965 5966 spin_lock_irq(&mchdev_lock); 5967 if (!i915_mch_dev) { 5968 ret = false; 5969 goto out_unlock; 5970 } 5971 dev_priv = i915_mch_dev; 5972 5973 if (dev_priv->ips.max_delay > dev_priv->ips.fmax) 5974 dev_priv->ips.max_delay--; 5975 5976 out_unlock: 5977 spin_unlock_irq(&mchdev_lock); 5978 5979 return ret; 5980 } 5981 EXPORT_SYMBOL_GPL(i915_gpu_raise); 5982 5983 /** 5984 * i915_gpu_lower - lower GPU frequency limit 5985 * 5986 * IPS indicates we're close to a thermal limit, so throttle back the GPU 5987 * frequency maximum. 5988 */ 5989 bool i915_gpu_lower(void) 5990 { 5991 struct drm_i915_private *dev_priv; 5992 bool ret = true; 5993 5994 spin_lock_irq(&mchdev_lock); 5995 if (!i915_mch_dev) { 5996 ret = false; 5997 goto out_unlock; 5998 } 5999 dev_priv = i915_mch_dev; 6000 6001 if (dev_priv->ips.max_delay < dev_priv->ips.min_delay) 6002 dev_priv->ips.max_delay++; 6003 6004 out_unlock: 6005 spin_unlock_irq(&mchdev_lock); 6006 6007 return ret; 6008 } 6009 EXPORT_SYMBOL_GPL(i915_gpu_lower); 6010 6011 /** 6012 * i915_gpu_busy - indicate GPU business to IPS 6013 * 6014 * Tell the IPS driver whether or not the GPU is busy. 6015 */ 6016 bool i915_gpu_busy(void) 6017 { 6018 struct drm_i915_private *dev_priv; 6019 struct intel_engine_cs *ring; 6020 bool ret = false; 6021 int i; 6022 6023 spin_lock_irq(&mchdev_lock); 6024 if (!i915_mch_dev) 6025 goto out_unlock; 6026 dev_priv = i915_mch_dev; 6027 6028 for_each_ring(ring, dev_priv, i) 6029 ret |= !list_empty(&ring->request_list); 6030 6031 out_unlock: 6032 spin_unlock_irq(&mchdev_lock); 6033 6034 return ret; 6035 } 6036 EXPORT_SYMBOL_GPL(i915_gpu_busy); 6037 6038 /** 6039 * i915_gpu_turbo_disable - disable graphics turbo 6040 * 6041 * Disable graphics turbo by resetting the max frequency and setting the 6042 * current frequency to the default. 6043 */ 6044 bool i915_gpu_turbo_disable(void) 6045 { 6046 struct drm_i915_private *dev_priv; 6047 bool ret = true; 6048 6049 spin_lock_irq(&mchdev_lock); 6050 if (!i915_mch_dev) { 6051 ret = false; 6052 goto out_unlock; 6053 } 6054 dev_priv = i915_mch_dev; 6055 6056 dev_priv->ips.max_delay = dev_priv->ips.fstart; 6057 6058 if (!ironlake_set_drps(dev_priv->dev, dev_priv->ips.fstart)) 6059 ret = false; 6060 6061 out_unlock: 6062 spin_unlock_irq(&mchdev_lock); 6063 6064 return ret; 6065 } 6066 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); 6067 6068 /** 6069 * Tells the intel_ips driver that the i915 driver is now loaded, if 6070 * IPS got loaded first. 6071 * 6072 * This awkward dance is so that neither module has to depend on the 6073 * other in order for IPS to do the appropriate communication of 6074 * GPU turbo limits to i915. 6075 */ 6076 static void 6077 ips_ping_for_i915_load(void) 6078 { 6079 #ifndef __NetBSD__ /* XXX IPS GPU turbo limits what? */ 6080 void (*link)(void); 6081 6082 link = symbol_get(ips_link_to_i915_driver); 6083 if (link) { 6084 link(); 6085 symbol_put(ips_link_to_i915_driver); 6086 } 6087 #endif 6088 } 6089 6090 void intel_gpu_ips_init(struct drm_i915_private *dev_priv) 6091 { 6092 /* We only register the i915 ips part with intel-ips once everything is 6093 * set up, to avoid intel-ips sneaking in and reading bogus values. */ 6094 spin_lock_irq(&mchdev_lock); 6095 i915_mch_dev = dev_priv; 6096 spin_unlock_irq(&mchdev_lock); 6097 6098 ips_ping_for_i915_load(); 6099 } 6100 6101 void intel_gpu_ips_teardown(void) 6102 { 6103 spin_lock_irq(&mchdev_lock); 6104 i915_mch_dev = NULL; 6105 spin_unlock_irq(&mchdev_lock); 6106 } 6107 6108 static void intel_init_emon(struct drm_device *dev) 6109 { 6110 struct drm_i915_private *dev_priv = dev->dev_private; 6111 u32 lcfuse; 6112 u8 pxw[16]; 6113 int i; 6114 6115 /* Disable to program */ 6116 I915_WRITE(ECR, 0); 6117 POSTING_READ(ECR); 6118 6119 /* Program energy weights for various events */ 6120 I915_WRITE(SDEW, 0x15040d00); 6121 I915_WRITE(CSIEW0, 0x007f0000); 6122 I915_WRITE(CSIEW1, 0x1e220004); 6123 I915_WRITE(CSIEW2, 0x04000004); 6124 6125 for (i = 0; i < 5; i++) 6126 I915_WRITE(PEW(i), 0); 6127 for (i = 0; i < 3; i++) 6128 I915_WRITE(DEW(i), 0); 6129 6130 /* Program P-state weights to account for frequency power adjustment */ 6131 for (i = 0; i < 16; i++) { 6132 u32 pxvidfreq = I915_READ(PXVFREQ(i)); 6133 unsigned long freq = intel_pxfreq(pxvidfreq); 6134 unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >> 6135 PXVFREQ_PX_SHIFT; 6136 unsigned long val; 6137 6138 val = vid * vid; 6139 val *= (freq / 1000); 6140 val *= 255; 6141 val /= (127*127*900); 6142 if (val > 0xff) 6143 DRM_ERROR("bad pxval: %ld\n", val); 6144 pxw[i] = val; 6145 } 6146 /* Render standby states get 0 weight */ 6147 pxw[14] = 0; 6148 pxw[15] = 0; 6149 6150 for (i = 0; i < 4; i++) { 6151 u32 val = ((u32)pxw[i*4] << 24) | ((u32)pxw[(i*4)+1] << 16) | 6152 ((u32)pxw[(i*4)+2] << 8) | ((u32)pxw[(i*4)+3]); 6153 I915_WRITE(PXW(i), val); 6154 } 6155 6156 /* Adjust magic regs to magic values (more experimental results) */ 6157 I915_WRITE(OGW0, 0); 6158 I915_WRITE(OGW1, 0); 6159 I915_WRITE(EG0, 0x00007f00); 6160 I915_WRITE(EG1, 0x0000000e); 6161 I915_WRITE(EG2, 0x000e0000); 6162 I915_WRITE(EG3, 0x68000300); 6163 I915_WRITE(EG4, 0x42000000); 6164 I915_WRITE(EG5, 0x00140031); 6165 I915_WRITE(EG6, 0); 6166 I915_WRITE(EG7, 0); 6167 6168 for (i = 0; i < 8; i++) 6169 I915_WRITE(PXWL(i), 0); 6170 6171 /* Enable PMON + select events */ 6172 I915_WRITE(ECR, 0x80000019); 6173 6174 lcfuse = I915_READ(LCFUSE02); 6175 6176 dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK); 6177 } 6178 6179 static bool i915_rc6_ctx_corrupted(struct drm_i915_private *dev_priv) 6180 { 6181 return !I915_READ(GEN8_RC6_CTX_INFO); 6182 } 6183 6184 static void i915_rc6_ctx_wa_init(struct drm_i915_private *i915) 6185 { 6186 if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915)) 6187 return; 6188 6189 if (i915_rc6_ctx_corrupted(i915)) { 6190 DRM_INFO("RC6 context corrupted, disabling runtime power management\n"); 6191 i915->rps.ctx_corrupted = true; 6192 intel_runtime_pm_get(i915); 6193 } 6194 } 6195 6196 static void i915_rc6_ctx_wa_cleanup(struct drm_i915_private *i915) 6197 { 6198 if (i915->rps.ctx_corrupted) { 6199 intel_runtime_pm_put(i915); 6200 i915->rps.ctx_corrupted = false; 6201 } 6202 } 6203 6204 /** 6205 * i915_rc6_ctx_wa_suspend - system suspend sequence for the RC6 CTX WA 6206 * @i915: i915 device 6207 * 6208 * Perform any steps needed to clean up the RC6 CTX WA before system suspend. 6209 */ 6210 void i915_rc6_ctx_wa_suspend(struct drm_i915_private *i915) 6211 { 6212 if (i915->rps.ctx_corrupted) 6213 intel_runtime_pm_put(i915); 6214 } 6215 6216 /** 6217 * i915_rc6_ctx_wa_resume - system resume sequence for the RC6 CTX WA 6218 * @i915: i915 device 6219 * 6220 * Perform any steps needed to re-init the RC6 CTX WA after system resume. 6221 */ 6222 void i915_rc6_ctx_wa_resume(struct drm_i915_private *i915) 6223 { 6224 if (!i915->rps.ctx_corrupted) 6225 return; 6226 6227 if (i915_rc6_ctx_corrupted(i915)) { 6228 intel_runtime_pm_get(i915); 6229 return; 6230 } 6231 6232 DRM_INFO("RC6 context restored, re-enabling runtime power management\n"); 6233 i915->rps.ctx_corrupted = false; 6234 } 6235 6236 static void intel_disable_rc6(struct drm_device *dev); 6237 6238 /** 6239 * i915_rc6_ctx_wa_check - check for a new RC6 CTX corruption 6240 * @i915: i915 device 6241 * 6242 * Check if an RC6 CTX corruption has happened since the last check and if so 6243 * disable RC6 and runtime power management. 6244 * 6245 * Return false if no context corruption has happened since the last call of 6246 * this function, true otherwise. 6247 */ 6248 bool i915_rc6_ctx_wa_check(struct drm_i915_private *i915) 6249 { 6250 if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915)) 6251 return false; 6252 6253 if (i915->rps.ctx_corrupted) 6254 return false; 6255 6256 if (!i915_rc6_ctx_corrupted(i915)) 6257 return false; 6258 6259 DRM_NOTE("RC6 context corruption, disabling runtime power management\n"); 6260 6261 intel_disable_rc6(i915->dev); 6262 i915->rps.ctx_corrupted = true; 6263 intel_runtime_pm_get_noresume(i915); 6264 6265 return true; 6266 } 6267 6268 void intel_init_gt_powersave(struct drm_device *dev) 6269 { 6270 i915.enable_rc6 = sanitize_rc6_option(dev, i915.enable_rc6); 6271 6272 i915_rc6_ctx_wa_init(to_i915(dev)); 6273 6274 if (IS_CHERRYVIEW(dev)) 6275 cherryview_init_gt_powersave(dev); 6276 else if (IS_VALLEYVIEW(dev)) 6277 valleyview_init_gt_powersave(dev); 6278 } 6279 6280 void intel_cleanup_gt_powersave(struct drm_device *dev) 6281 { 6282 if (IS_CHERRYVIEW(dev)) 6283 return; 6284 else if (IS_VALLEYVIEW(dev)) 6285 valleyview_cleanup_gt_powersave(dev); 6286 6287 i915_rc6_ctx_wa_cleanup(to_i915(dev)); 6288 } 6289 6290 static void gen6_suspend_rps(struct drm_device *dev) 6291 { 6292 struct drm_i915_private *dev_priv = dev->dev_private; 6293 6294 flush_delayed_work(&dev_priv->rps.delayed_resume_work); 6295 6296 gen6_disable_rps_interrupts(dev); 6297 } 6298 6299 /** 6300 * intel_suspend_gt_powersave - suspend PM work and helper threads 6301 * @dev: drm device 6302 * 6303 * We don't want to disable RC6 or other features here, we just want 6304 * to make sure any work we've queued has finished and won't bother 6305 * us while we're suspended. 6306 */ 6307 void intel_suspend_gt_powersave(struct drm_device *dev) 6308 { 6309 struct drm_i915_private *dev_priv = dev->dev_private; 6310 6311 if (INTEL_INFO(dev)->gen < 6) 6312 return; 6313 6314 gen6_suspend_rps(dev); 6315 6316 /* Force GPU to min freq during suspend */ 6317 gen6_rps_idle(dev_priv); 6318 } 6319 6320 static void __intel_disable_rc6(struct drm_device *dev) 6321 { 6322 if (INTEL_INFO(dev)->gen >= 9) 6323 gen9_disable_rc6(dev); 6324 else if (IS_CHERRYVIEW(dev)) 6325 cherryview_disable_rc6(dev); 6326 else if (IS_VALLEYVIEW(dev)) 6327 valleyview_disable_rc6(dev); 6328 else 6329 gen6_disable_rc6(dev); 6330 } 6331 6332 static void intel_disable_rc6(struct drm_device *dev) 6333 { 6334 struct drm_i915_private *dev_priv = to_i915(dev); 6335 6336 mutex_lock(&dev_priv->rps.hw_lock); 6337 __intel_disable_rc6(dev); 6338 mutex_unlock(&dev_priv->rps.hw_lock); 6339 } 6340 6341 static void intel_disable_rps(struct drm_device *dev) 6342 { 6343 if (IS_CHERRYVIEW(dev) || IS_VALLEYVIEW(dev)) 6344 return; 6345 6346 if (INTEL_INFO(dev)->gen >= 9) 6347 gen9_disable_rps(dev); 6348 else 6349 gen6_disable_rps(dev); 6350 } 6351 6352 void intel_disable_gt_powersave(struct drm_device *dev) 6353 { 6354 struct drm_i915_private *dev_priv = dev->dev_private; 6355 6356 if (IS_IRONLAKE_M(dev)) { 6357 ironlake_disable_drps(dev); 6358 } else if (INTEL_INFO(dev)->gen >= 6) { 6359 intel_suspend_gt_powersave(dev); 6360 6361 mutex_lock(&dev_priv->rps.hw_lock); 6362 6363 __intel_disable_rc6(dev); 6364 intel_disable_rps(dev); 6365 6366 dev_priv->rps.enabled = false; 6367 6368 mutex_unlock(&dev_priv->rps.hw_lock); 6369 } 6370 } 6371 6372 static void intel_gen6_powersave_work(struct work_struct *work) 6373 { 6374 struct drm_i915_private *dev_priv = 6375 container_of(work, struct drm_i915_private, 6376 rps.delayed_resume_work.work); 6377 struct drm_device *dev = dev_priv->dev; 6378 6379 mutex_lock(&dev_priv->rps.hw_lock); 6380 6381 gen6_reset_rps_interrupts(dev); 6382 6383 if (IS_CHERRYVIEW(dev)) { 6384 cherryview_enable_rps(dev); 6385 } else if (IS_VALLEYVIEW(dev)) { 6386 valleyview_enable_rps(dev); 6387 } else if (INTEL_INFO(dev)->gen >= 9) { 6388 gen9_enable_rc6(dev); 6389 gen9_enable_rps(dev); 6390 if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) 6391 __gen6_update_ring_freq(dev); 6392 } else if (IS_BROADWELL(dev)) { 6393 gen8_enable_rps(dev); 6394 __gen6_update_ring_freq(dev); 6395 } else { 6396 gen6_enable_rps(dev); 6397 __gen6_update_ring_freq(dev); 6398 } 6399 6400 WARN_ON(dev_priv->rps.max_freq < dev_priv->rps.min_freq); 6401 WARN_ON(dev_priv->rps.idle_freq > dev_priv->rps.max_freq); 6402 6403 WARN_ON(dev_priv->rps.efficient_freq < dev_priv->rps.min_freq); 6404 WARN_ON(dev_priv->rps.efficient_freq > dev_priv->rps.max_freq); 6405 6406 dev_priv->rps.enabled = true; 6407 6408 gen6_enable_rps_interrupts(dev); 6409 6410 mutex_unlock(&dev_priv->rps.hw_lock); 6411 6412 intel_runtime_pm_put(dev_priv); 6413 } 6414 6415 void intel_enable_gt_powersave(struct drm_device *dev) 6416 { 6417 struct drm_i915_private *dev_priv = dev->dev_private; 6418 6419 /* Powersaving is controlled by the host when inside a VM */ 6420 if (intel_vgpu_active(dev)) 6421 return; 6422 6423 if (IS_IRONLAKE_M(dev)) { 6424 mutex_lock(&dev->struct_mutex); 6425 ironlake_enable_drps(dev); 6426 intel_init_emon(dev); 6427 mutex_unlock(&dev->struct_mutex); 6428 } else if (INTEL_INFO(dev)->gen >= 6) { 6429 /* 6430 * PCU communication is slow and this doesn't need to be 6431 * done at any specific time, so do this out of our fast path 6432 * to make resume and init faster. 6433 * 6434 * We depend on the HW RC6 power context save/restore 6435 * mechanism when entering D3 through runtime PM suspend. So 6436 * disable RPM until RPS/RC6 is properly setup. We can only 6437 * get here via the driver load/system resume/runtime resume 6438 * paths, so the _noresume version is enough (and in case of 6439 * runtime resume it's necessary). 6440 */ 6441 if (schedule_delayed_work(&dev_priv->rps.delayed_resume_work, 6442 round_jiffies_up_relative(HZ))) 6443 intel_runtime_pm_get_noresume(dev_priv); 6444 } 6445 } 6446 6447 void intel_reset_gt_powersave(struct drm_device *dev) 6448 { 6449 struct drm_i915_private *dev_priv = dev->dev_private; 6450 6451 if (INTEL_INFO(dev)->gen < 6) 6452 return; 6453 6454 gen6_suspend_rps(dev); 6455 dev_priv->rps.enabled = false; 6456 } 6457 6458 static void ibx_init_clock_gating(struct drm_device *dev) 6459 { 6460 struct drm_i915_private *dev_priv = dev->dev_private; 6461 6462 /* 6463 * On Ibex Peak and Cougar Point, we need to disable clock 6464 * gating for the panel power sequencer or it will fail to 6465 * start up when no ports are active. 6466 */ 6467 I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE); 6468 } 6469 6470 static void g4x_disable_trickle_feed(struct drm_device *dev) 6471 { 6472 struct drm_i915_private *dev_priv = dev->dev_private; 6473 enum pipe pipe; 6474 6475 for_each_pipe(dev_priv, pipe) { 6476 I915_WRITE(DSPCNTR(pipe), 6477 I915_READ(DSPCNTR(pipe)) | 6478 DISPPLANE_TRICKLE_FEED_DISABLE); 6479 6480 I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe))); 6481 POSTING_READ(DSPSURF(pipe)); 6482 } 6483 } 6484 6485 static void ilk_init_lp_watermarks(struct drm_device *dev) 6486 { 6487 struct drm_i915_private *dev_priv = dev->dev_private; 6488 6489 I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN); 6490 I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN); 6491 I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN); 6492 6493 /* 6494 * Don't touch WM1S_LP_EN here. 6495 * Doing so could cause underruns. 6496 */ 6497 } 6498 6499 static void ironlake_init_clock_gating(struct drm_device *dev) 6500 { 6501 struct drm_i915_private *dev_priv = dev->dev_private; 6502 uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; 6503 6504 /* 6505 * Required for FBC 6506 * WaFbcDisableDpfcClockGating:ilk 6507 */ 6508 dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE | 6509 ILK_DPFCUNIT_CLOCK_GATE_DISABLE | 6510 ILK_DPFDUNIT_CLOCK_GATE_ENABLE; 6511 6512 I915_WRITE(PCH_3DCGDIS0, 6513 MARIUNIT_CLOCK_GATE_DISABLE | 6514 SVSMUNIT_CLOCK_GATE_DISABLE); 6515 I915_WRITE(PCH_3DCGDIS1, 6516 VFMUNIT_CLOCK_GATE_DISABLE); 6517 6518 /* 6519 * According to the spec the following bits should be set in 6520 * order to enable memory self-refresh 6521 * The bit 22/21 of 0x42004 6522 * The bit 5 of 0x42020 6523 * The bit 15 of 0x45000 6524 */ 6525 I915_WRITE(ILK_DISPLAY_CHICKEN2, 6526 (I915_READ(ILK_DISPLAY_CHICKEN2) | 6527 ILK_DPARB_GATE | ILK_VSDPFD_FULL)); 6528 dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE; 6529 I915_WRITE(DISP_ARB_CTL, 6530 (I915_READ(DISP_ARB_CTL) | 6531 DISP_FBC_WM_DIS)); 6532 6533 ilk_init_lp_watermarks(dev); 6534 6535 /* 6536 * Based on the document from hardware guys the following bits 6537 * should be set unconditionally in order to enable FBC. 6538 * The bit 22 of 0x42000 6539 * The bit 22 of 0x42004 6540 * The bit 7,8,9 of 0x42020. 6541 */ 6542 if (IS_IRONLAKE_M(dev)) { 6543 /* WaFbcAsynchFlipDisableFbcQueue:ilk */ 6544 I915_WRITE(ILK_DISPLAY_CHICKEN1, 6545 I915_READ(ILK_DISPLAY_CHICKEN1) | 6546 ILK_FBCQ_DIS); 6547 I915_WRITE(ILK_DISPLAY_CHICKEN2, 6548 I915_READ(ILK_DISPLAY_CHICKEN2) | 6549 ILK_DPARB_GATE); 6550 } 6551 6552 I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate); 6553 6554 I915_WRITE(ILK_DISPLAY_CHICKEN2, 6555 I915_READ(ILK_DISPLAY_CHICKEN2) | 6556 ILK_ELPIN_409_SELECT); 6557 I915_WRITE(_3D_CHICKEN2, 6558 _3D_CHICKEN2_WM_READ_PIPELINED << 16 | 6559 _3D_CHICKEN2_WM_READ_PIPELINED); 6560 6561 /* WaDisableRenderCachePipelinedFlush:ilk */ 6562 I915_WRITE(CACHE_MODE_0, 6563 _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); 6564 6565 /* WaDisable_RenderCache_OperationalFlush:ilk */ 6566 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 6567 6568 g4x_disable_trickle_feed(dev); 6569 6570 ibx_init_clock_gating(dev); 6571 } 6572 6573 static void cpt_init_clock_gating(struct drm_device *dev) 6574 { 6575 struct drm_i915_private *dev_priv = dev->dev_private; 6576 int pipe; 6577 uint32_t val; 6578 6579 /* 6580 * On Ibex Peak and Cougar Point, we need to disable clock 6581 * gating for the panel power sequencer or it will fail to 6582 * start up when no ports are active. 6583 */ 6584 I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE | 6585 PCH_DPLUNIT_CLOCK_GATE_DISABLE | 6586 PCH_CPUNIT_CLOCK_GATE_DISABLE); 6587 I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) | 6588 DPLS_EDP_PPS_FIX_DIS); 6589 /* The below fixes the weird display corruption, a few pixels shifted 6590 * downward, on (only) LVDS of some HP laptops with IVY. 6591 */ 6592 for_each_pipe(dev_priv, pipe) { 6593 val = I915_READ(TRANS_CHICKEN2(pipe)); 6594 val |= TRANS_CHICKEN2_TIMING_OVERRIDE; 6595 val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED; 6596 if (dev_priv->vbt.fdi_rx_polarity_inverted) 6597 val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED; 6598 val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK; 6599 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER; 6600 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH; 6601 I915_WRITE(TRANS_CHICKEN2(pipe), val); 6602 } 6603 /* WADP0ClockGatingDisable */ 6604 for_each_pipe(dev_priv, pipe) { 6605 I915_WRITE(TRANS_CHICKEN1(pipe), 6606 TRANS_CHICKEN1_DP0UNIT_GC_DISABLE); 6607 } 6608 } 6609 6610 static void gen6_check_mch_setup(struct drm_device *dev) 6611 { 6612 struct drm_i915_private *dev_priv = dev->dev_private; 6613 uint32_t tmp; 6614 6615 tmp = I915_READ(MCH_SSKPD); 6616 if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL) 6617 DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n", 6618 tmp); 6619 } 6620 6621 static void gen6_init_clock_gating(struct drm_device *dev) 6622 { 6623 struct drm_i915_private *dev_priv = dev->dev_private; 6624 uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; 6625 6626 I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate); 6627 6628 I915_WRITE(ILK_DISPLAY_CHICKEN2, 6629 I915_READ(ILK_DISPLAY_CHICKEN2) | 6630 ILK_ELPIN_409_SELECT); 6631 6632 /* WaDisableHiZPlanesWhenMSAAEnabled:snb */ 6633 I915_WRITE(_3D_CHICKEN, 6634 _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB)); 6635 6636 /* WaDisable_RenderCache_OperationalFlush:snb */ 6637 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 6638 6639 /* 6640 * BSpec recoomends 8x4 when MSAA is used, 6641 * however in practice 16x4 seems fastest. 6642 * 6643 * Note that PS/WM thread counts depend on the WIZ hashing 6644 * disable bit, which we don't touch here, but it's good 6645 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 6646 */ 6647 I915_WRITE(GEN6_GT_MODE, 6648 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); 6649 6650 ilk_init_lp_watermarks(dev); 6651 6652 I915_WRITE(CACHE_MODE_0, 6653 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); 6654 6655 I915_WRITE(GEN6_UCGCTL1, 6656 I915_READ(GEN6_UCGCTL1) | 6657 GEN6_BLBUNIT_CLOCK_GATE_DISABLE | 6658 GEN6_CSUNIT_CLOCK_GATE_DISABLE); 6659 6660 /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock 6661 * gating disable must be set. Failure to set it results in 6662 * flickering pixels due to Z write ordering failures after 6663 * some amount of runtime in the Mesa "fire" demo, and Unigine 6664 * Sanctuary and Tropics, and apparently anything else with 6665 * alpha test or pixel discard. 6666 * 6667 * According to the spec, bit 11 (RCCUNIT) must also be set, 6668 * but we didn't debug actual testcases to find it out. 6669 * 6670 * WaDisableRCCUnitClockGating:snb 6671 * WaDisableRCPBUnitClockGating:snb 6672 */ 6673 I915_WRITE(GEN6_UCGCTL2, 6674 GEN6_RCPBUNIT_CLOCK_GATE_DISABLE | 6675 GEN6_RCCUNIT_CLOCK_GATE_DISABLE); 6676 6677 /* WaStripsFansDisableFastClipPerformanceFix:snb */ 6678 I915_WRITE(_3D_CHICKEN3, 6679 _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL)); 6680 6681 /* 6682 * Bspec says: 6683 * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and 6684 * 3DSTATE_SF number of SF output attributes is more than 16." 6685 */ 6686 I915_WRITE(_3D_CHICKEN3, 6687 _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH)); 6688 6689 /* 6690 * According to the spec the following bits should be 6691 * set in order to enable memory self-refresh and fbc: 6692 * The bit21 and bit22 of 0x42000 6693 * The bit21 and bit22 of 0x42004 6694 * The bit5 and bit7 of 0x42020 6695 * The bit14 of 0x70180 6696 * The bit14 of 0x71180 6697 * 6698 * WaFbcAsynchFlipDisableFbcQueue:snb 6699 */ 6700 I915_WRITE(ILK_DISPLAY_CHICKEN1, 6701 I915_READ(ILK_DISPLAY_CHICKEN1) | 6702 ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS); 6703 I915_WRITE(ILK_DISPLAY_CHICKEN2, 6704 I915_READ(ILK_DISPLAY_CHICKEN2) | 6705 ILK_DPARB_GATE | ILK_VSDPFD_FULL); 6706 I915_WRITE(ILK_DSPCLK_GATE_D, 6707 I915_READ(ILK_DSPCLK_GATE_D) | 6708 ILK_DPARBUNIT_CLOCK_GATE_ENABLE | 6709 ILK_DPFDUNIT_CLOCK_GATE_ENABLE); 6710 6711 g4x_disable_trickle_feed(dev); 6712 6713 cpt_init_clock_gating(dev); 6714 6715 gen6_check_mch_setup(dev); 6716 } 6717 6718 static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv) 6719 { 6720 uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE); 6721 6722 /* 6723 * WaVSThreadDispatchOverride:ivb,vlv 6724 * 6725 * This actually overrides the dispatch 6726 * mode for all thread types. 6727 */ 6728 reg &= ~GEN7_FF_SCHED_MASK; 6729 reg |= GEN7_FF_TS_SCHED_HW; 6730 reg |= GEN7_FF_VS_SCHED_HW; 6731 reg |= GEN7_FF_DS_SCHED_HW; 6732 6733 I915_WRITE(GEN7_FF_THREAD_MODE, reg); 6734 } 6735 6736 static void lpt_init_clock_gating(struct drm_device *dev) 6737 { 6738 struct drm_i915_private *dev_priv = dev->dev_private; 6739 6740 /* 6741 * TODO: this bit should only be enabled when really needed, then 6742 * disabled when not needed anymore in order to save power. 6743 */ 6744 if (HAS_PCH_LPT_LP(dev)) 6745 I915_WRITE(SOUTH_DSPCLK_GATE_D, 6746 I915_READ(SOUTH_DSPCLK_GATE_D) | 6747 PCH_LP_PARTITION_LEVEL_DISABLE); 6748 6749 /* WADPOClockGatingDisable:hsw */ 6750 I915_WRITE(TRANS_CHICKEN1(PIPE_A), 6751 I915_READ(TRANS_CHICKEN1(PIPE_A)) | 6752 TRANS_CHICKEN1_DP0UNIT_GC_DISABLE); 6753 } 6754 6755 static void lpt_suspend_hw(struct drm_device *dev) 6756 { 6757 struct drm_i915_private *dev_priv = dev->dev_private; 6758 6759 if (HAS_PCH_LPT_LP(dev)) { 6760 uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D); 6761 6762 val &= ~PCH_LP_PARTITION_LEVEL_DISABLE; 6763 I915_WRITE(SOUTH_DSPCLK_GATE_D, val); 6764 } 6765 } 6766 6767 static void broadwell_init_clock_gating(struct drm_device *dev) 6768 { 6769 struct drm_i915_private *dev_priv = dev->dev_private; 6770 enum pipe pipe; 6771 uint32_t misccpctl; 6772 6773 ilk_init_lp_watermarks(dev); 6774 6775 /* WaSwitchSolVfFArbitrationPriority:bdw */ 6776 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); 6777 6778 /* WaPsrDPAMaskVBlankInSRD:bdw */ 6779 I915_WRITE(CHICKEN_PAR1_1, 6780 I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD); 6781 6782 /* WaPsrDPRSUnmaskVBlankInSRD:bdw */ 6783 for_each_pipe(dev_priv, pipe) { 6784 I915_WRITE(CHICKEN_PIPESL_1(pipe), 6785 I915_READ(CHICKEN_PIPESL_1(pipe)) | 6786 BDW_DPRS_MASK_VBLANK_SRD); 6787 } 6788 6789 /* WaVSRefCountFullforceMissDisable:bdw */ 6790 /* WaDSRefCountFullforceMissDisable:bdw */ 6791 I915_WRITE(GEN7_FF_THREAD_MODE, 6792 I915_READ(GEN7_FF_THREAD_MODE) & 6793 ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); 6794 6795 I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, 6796 _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); 6797 6798 /* WaDisableSDEUnitClockGating:bdw */ 6799 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | 6800 GEN8_SDEUNIT_CLOCK_GATE_DISABLE); 6801 6802 /* 6803 * WaProgramL3SqcReg1Default:bdw 6804 * WaTempDisableDOPClkGating:bdw 6805 */ 6806 misccpctl = I915_READ(GEN7_MISCCPCTL); 6807 I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); 6808 I915_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT); 6809 /* 6810 * Wait at least 100 clocks before re-enabling clock gating. See 6811 * the definition of L3SQCREG1 in BSpec. 6812 */ 6813 POSTING_READ(GEN8_L3SQCREG1); 6814 udelay(1); 6815 I915_WRITE(GEN7_MISCCPCTL, misccpctl); 6816 6817 /* 6818 * WaGttCachingOffByDefault:bdw 6819 * GTT cache may not work with big pages, so if those 6820 * are ever enabled GTT cache may need to be disabled. 6821 */ 6822 I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL); 6823 6824 lpt_init_clock_gating(dev); 6825 } 6826 6827 static void haswell_init_clock_gating(struct drm_device *dev) 6828 { 6829 struct drm_i915_private *dev_priv = dev->dev_private; 6830 6831 ilk_init_lp_watermarks(dev); 6832 6833 /* L3 caching of data atomics doesn't work -- disable it. */ 6834 I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); 6835 I915_WRITE(HSW_ROW_CHICKEN3, 6836 _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE)); 6837 6838 /* This is required by WaCatErrorRejectionIssue:hsw */ 6839 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, 6840 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | 6841 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); 6842 6843 /* WaVSRefCountFullforceMissDisable:hsw */ 6844 I915_WRITE(GEN7_FF_THREAD_MODE, 6845 I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME); 6846 6847 /* WaDisable_RenderCache_OperationalFlush:hsw */ 6848 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 6849 6850 /* enable HiZ Raw Stall Optimization */ 6851 I915_WRITE(CACHE_MODE_0_GEN7, 6852 _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); 6853 6854 /* WaDisable4x2SubspanOptimization:hsw */ 6855 I915_WRITE(CACHE_MODE_1, 6856 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); 6857 6858 /* 6859 * BSpec recommends 8x4 when MSAA is used, 6860 * however in practice 16x4 seems fastest. 6861 * 6862 * Note that PS/WM thread counts depend on the WIZ hashing 6863 * disable bit, which we don't touch here, but it's good 6864 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 6865 */ 6866 I915_WRITE(GEN7_GT_MODE, 6867 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); 6868 6869 /* WaSampleCChickenBitEnable:hsw */ 6870 I915_WRITE(HALF_SLICE_CHICKEN3, 6871 _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE)); 6872 6873 /* WaSwitchSolVfFArbitrationPriority:hsw */ 6874 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); 6875 6876 /* WaRsPkgCStateDisplayPMReq:hsw */ 6877 I915_WRITE(CHICKEN_PAR1_1, 6878 I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES); 6879 6880 lpt_init_clock_gating(dev); 6881 } 6882 6883 static void ivybridge_init_clock_gating(struct drm_device *dev) 6884 { 6885 struct drm_i915_private *dev_priv = dev->dev_private; 6886 uint32_t snpcr; 6887 6888 ilk_init_lp_watermarks(dev); 6889 6890 I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE); 6891 6892 /* WaDisableEarlyCull:ivb */ 6893 I915_WRITE(_3D_CHICKEN3, 6894 _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); 6895 6896 /* WaDisableBackToBackFlipFix:ivb */ 6897 I915_WRITE(IVB_CHICKEN3, 6898 CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | 6899 CHICKEN3_DGMG_DONE_FIX_DISABLE); 6900 6901 /* WaDisablePSDDualDispatchEnable:ivb */ 6902 if (IS_IVB_GT1(dev)) 6903 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, 6904 _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); 6905 6906 /* WaDisable_RenderCache_OperationalFlush:ivb */ 6907 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 6908 6909 /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */ 6910 I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1, 6911 GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC); 6912 6913 /* WaApplyL3ControlAndL3ChickenMode:ivb */ 6914 I915_WRITE(GEN7_L3CNTLREG1, 6915 GEN7_WA_FOR_GEN7_L3_CONTROL); 6916 I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, 6917 GEN7_WA_L3_CHICKEN_MODE); 6918 if (IS_IVB_GT1(dev)) 6919 I915_WRITE(GEN7_ROW_CHICKEN2, 6920 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); 6921 else { 6922 /* must write both registers */ 6923 I915_WRITE(GEN7_ROW_CHICKEN2, 6924 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); 6925 I915_WRITE(GEN7_ROW_CHICKEN2_GT2, 6926 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); 6927 } 6928 6929 /* WaForceL3Serialization:ivb */ 6930 I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & 6931 ~L3SQ_URB_READ_CAM_MATCH_DISABLE); 6932 6933 /* 6934 * According to the spec, bit 13 (RCZUNIT) must be set on IVB. 6935 * This implements the WaDisableRCZUnitClockGating:ivb workaround. 6936 */ 6937 I915_WRITE(GEN6_UCGCTL2, 6938 GEN6_RCZUNIT_CLOCK_GATE_DISABLE); 6939 6940 /* This is required by WaCatErrorRejectionIssue:ivb */ 6941 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, 6942 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | 6943 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); 6944 6945 g4x_disable_trickle_feed(dev); 6946 6947 gen7_setup_fixed_func_scheduler(dev_priv); 6948 6949 if (0) { /* causes HiZ corruption on ivb:gt1 */ 6950 /* enable HiZ Raw Stall Optimization */ 6951 I915_WRITE(CACHE_MODE_0_GEN7, 6952 _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); 6953 } 6954 6955 /* WaDisable4x2SubspanOptimization:ivb */ 6956 I915_WRITE(CACHE_MODE_1, 6957 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); 6958 6959 /* 6960 * BSpec recommends 8x4 when MSAA is used, 6961 * however in practice 16x4 seems fastest. 6962 * 6963 * Note that PS/WM thread counts depend on the WIZ hashing 6964 * disable bit, which we don't touch here, but it's good 6965 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 6966 */ 6967 I915_WRITE(GEN7_GT_MODE, 6968 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); 6969 6970 snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); 6971 snpcr &= ~GEN6_MBC_SNPCR_MASK; 6972 snpcr |= GEN6_MBC_SNPCR_MED; 6973 I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr); 6974 6975 if (!HAS_PCH_NOP(dev)) 6976 cpt_init_clock_gating(dev); 6977 6978 gen6_check_mch_setup(dev); 6979 } 6980 6981 static void vlv_init_display_clock_gating(struct drm_i915_private *dev_priv) 6982 { 6983 u32 val; 6984 6985 /* 6986 * On driver load, a pipe may be active and driving a DSI display. 6987 * Preserve DPOUNIT_CLOCK_GATE_DISABLE to avoid the pipe getting stuck 6988 * (and never recovering) in this case. intel_dsi_post_disable() will 6989 * clear it when we turn off the display. 6990 */ 6991 val = I915_READ(DSPCLK_GATE_D); 6992 val &= DPOUNIT_CLOCK_GATE_DISABLE; 6993 val |= VRHUNIT_CLOCK_GATE_DISABLE; 6994 I915_WRITE(DSPCLK_GATE_D, val); 6995 6996 /* 6997 * Disable trickle feed and enable pnd deadline calculation 6998 */ 6999 I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE); 7000 I915_WRITE(CBR1_VLV, 0); 7001 } 7002 7003 static void valleyview_init_clock_gating(struct drm_device *dev) 7004 { 7005 struct drm_i915_private *dev_priv = dev->dev_private; 7006 7007 vlv_init_display_clock_gating(dev_priv); 7008 7009 /* WaDisableEarlyCull:vlv */ 7010 I915_WRITE(_3D_CHICKEN3, 7011 _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); 7012 7013 /* WaDisableBackToBackFlipFix:vlv */ 7014 I915_WRITE(IVB_CHICKEN3, 7015 CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | 7016 CHICKEN3_DGMG_DONE_FIX_DISABLE); 7017 7018 /* WaPsdDispatchEnable:vlv */ 7019 /* WaDisablePSDDualDispatchEnable:vlv */ 7020 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, 7021 _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP | 7022 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); 7023 7024 /* WaDisable_RenderCache_OperationalFlush:vlv */ 7025 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 7026 7027 /* WaForceL3Serialization:vlv */ 7028 I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & 7029 ~L3SQ_URB_READ_CAM_MATCH_DISABLE); 7030 7031 /* WaDisableDopClockGating:vlv */ 7032 I915_WRITE(GEN7_ROW_CHICKEN2, 7033 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); 7034 7035 /* This is required by WaCatErrorRejectionIssue:vlv */ 7036 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, 7037 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | 7038 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); 7039 7040 gen7_setup_fixed_func_scheduler(dev_priv); 7041 7042 /* 7043 * According to the spec, bit 13 (RCZUNIT) must be set on IVB. 7044 * This implements the WaDisableRCZUnitClockGating:vlv workaround. 7045 */ 7046 I915_WRITE(GEN6_UCGCTL2, 7047 GEN6_RCZUNIT_CLOCK_GATE_DISABLE); 7048 7049 /* WaDisableL3Bank2xClockGate:vlv 7050 * Disabling L3 clock gating- MMIO 940c[25] = 1 7051 * Set bit 25, to disable L3_BANK_2x_CLK_GATING */ 7052 I915_WRITE(GEN7_UCGCTL4, 7053 I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE); 7054 7055 /* 7056 * BSpec says this must be set, even though 7057 * WaDisable4x2SubspanOptimization isn't listed for VLV. 7058 */ 7059 I915_WRITE(CACHE_MODE_1, 7060 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); 7061 7062 /* 7063 * BSpec recommends 8x4 when MSAA is used, 7064 * however in practice 16x4 seems fastest. 7065 * 7066 * Note that PS/WM thread counts depend on the WIZ hashing 7067 * disable bit, which we don't touch here, but it's good 7068 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 7069 */ 7070 I915_WRITE(GEN7_GT_MODE, 7071 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); 7072 7073 /* 7074 * WaIncreaseL3CreditsForVLVB0:vlv 7075 * This is the hardware default actually. 7076 */ 7077 I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE); 7078 7079 /* 7080 * WaDisableVLVClockGating_VBIIssue:vlv 7081 * Disable clock gating on th GCFG unit to prevent a delay 7082 * in the reporting of vblank events. 7083 */ 7084 I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS); 7085 } 7086 7087 static void cherryview_init_clock_gating(struct drm_device *dev) 7088 { 7089 struct drm_i915_private *dev_priv = dev->dev_private; 7090 7091 vlv_init_display_clock_gating(dev_priv); 7092 7093 /* WaVSRefCountFullforceMissDisable:chv */ 7094 /* WaDSRefCountFullforceMissDisable:chv */ 7095 I915_WRITE(GEN7_FF_THREAD_MODE, 7096 I915_READ(GEN7_FF_THREAD_MODE) & 7097 ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); 7098 7099 /* WaDisableSemaphoreAndSyncFlipWait:chv */ 7100 I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, 7101 _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); 7102 7103 /* WaDisableCSUnitClockGating:chv */ 7104 I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) | 7105 GEN6_CSUNIT_CLOCK_GATE_DISABLE); 7106 7107 /* WaDisableSDEUnitClockGating:chv */ 7108 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | 7109 GEN8_SDEUNIT_CLOCK_GATE_DISABLE); 7110 7111 /* 7112 * GTT cache may not work with big pages, so if those 7113 * are ever enabled GTT cache may need to be disabled. 7114 */ 7115 I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL); 7116 } 7117 7118 static void g4x_init_clock_gating(struct drm_device *dev) 7119 { 7120 struct drm_i915_private *dev_priv = dev->dev_private; 7121 uint32_t dspclk_gate; 7122 7123 I915_WRITE(RENCLK_GATE_D1, 0); 7124 I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE | 7125 GS_UNIT_CLOCK_GATE_DISABLE | 7126 CL_UNIT_CLOCK_GATE_DISABLE); 7127 I915_WRITE(RAMCLK_GATE_D, 0); 7128 dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE | 7129 OVRUNIT_CLOCK_GATE_DISABLE | 7130 OVCUNIT_CLOCK_GATE_DISABLE; 7131 if (IS_GM45(dev)) 7132 dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE; 7133 I915_WRITE(DSPCLK_GATE_D, dspclk_gate); 7134 7135 /* WaDisableRenderCachePipelinedFlush */ 7136 I915_WRITE(CACHE_MODE_0, 7137 _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); 7138 7139 /* WaDisable_RenderCache_OperationalFlush:g4x */ 7140 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 7141 7142 g4x_disable_trickle_feed(dev); 7143 } 7144 7145 static void crestline_init_clock_gating(struct drm_device *dev) 7146 { 7147 struct drm_i915_private *dev_priv = dev->dev_private; 7148 7149 I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE); 7150 I915_WRITE(RENCLK_GATE_D2, 0); 7151 I915_WRITE(DSPCLK_GATE_D, 0); 7152 I915_WRITE(RAMCLK_GATE_D, 0); 7153 I915_WRITE16(DEUC, 0); 7154 I915_WRITE(MI_ARB_STATE, 7155 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); 7156 7157 /* WaDisable_RenderCache_OperationalFlush:gen4 */ 7158 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 7159 } 7160 7161 static void broadwater_init_clock_gating(struct drm_device *dev) 7162 { 7163 struct drm_i915_private *dev_priv = dev->dev_private; 7164 7165 I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE | 7166 I965_RCC_CLOCK_GATE_DISABLE | 7167 I965_RCPB_CLOCK_GATE_DISABLE | 7168 I965_ISC_CLOCK_GATE_DISABLE | 7169 I965_FBC_CLOCK_GATE_DISABLE); 7170 I915_WRITE(RENCLK_GATE_D2, 0); 7171 I915_WRITE(MI_ARB_STATE, 7172 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); 7173 7174 /* WaDisable_RenderCache_OperationalFlush:gen4 */ 7175 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 7176 } 7177 7178 static void gen3_init_clock_gating(struct drm_device *dev) 7179 { 7180 struct drm_i915_private *dev_priv = dev->dev_private; 7181 u32 dstate = I915_READ(D_STATE); 7182 7183 dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING | 7184 DSTATE_DOT_CLOCK_GATING; 7185 I915_WRITE(D_STATE, dstate); 7186 7187 if (IS_PINEVIEW(dev)) 7188 I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY)); 7189 7190 /* IIR "flip pending" means done if this bit is set */ 7191 I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE)); 7192 7193 /* interrupts should cause a wake up from C3 */ 7194 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN)); 7195 7196 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ 7197 I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE)); 7198 7199 I915_WRITE(MI_ARB_STATE, 7200 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); 7201 } 7202 7203 static void i85x_init_clock_gating(struct drm_device *dev) 7204 { 7205 struct drm_i915_private *dev_priv = dev->dev_private; 7206 7207 I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE); 7208 7209 /* interrupts should cause a wake up from C3 */ 7210 I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) | 7211 _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE)); 7212 7213 I915_WRITE(MEM_MODE, 7214 _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE)); 7215 } 7216 7217 static void i830_init_clock_gating(struct drm_device *dev) 7218 { 7219 struct drm_i915_private *dev_priv = dev->dev_private; 7220 7221 I915_WRITE(DSPCLK_GATE_D, OVRUNIT_CLOCK_GATE_DISABLE); 7222 7223 I915_WRITE(MEM_MODE, 7224 _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) | 7225 _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE)); 7226 } 7227 7228 void intel_init_clock_gating(struct drm_device *dev) 7229 { 7230 struct drm_i915_private *dev_priv = dev->dev_private; 7231 7232 if (dev_priv->display.init_clock_gating) 7233 dev_priv->display.init_clock_gating(dev); 7234 } 7235 7236 void intel_suspend_hw(struct drm_device *dev) 7237 { 7238 if (HAS_PCH_LPT(dev)) 7239 lpt_suspend_hw(dev); 7240 } 7241 7242 /* Set up chip specific power management-related functions */ 7243 void intel_init_pm(struct drm_device *dev) 7244 { 7245 struct drm_i915_private *dev_priv = dev->dev_private; 7246 7247 intel_fbc_init(dev_priv); 7248 7249 /* For cxsr */ 7250 if (IS_PINEVIEW(dev)) 7251 i915_pineview_get_mem_freq(dev); 7252 else if (IS_GEN5(dev)) 7253 i915_ironlake_get_mem_freq(dev); 7254 7255 /* For FIFO watermark updates */ 7256 if (INTEL_INFO(dev)->gen >= 9) { 7257 skl_setup_wm_latency(dev); 7258 7259 if (IS_BROXTON(dev)) 7260 dev_priv->display.init_clock_gating = 7261 bxt_init_clock_gating; 7262 dev_priv->display.update_wm = skl_update_wm; 7263 dev_priv->display.update_sprite_wm = skl_update_sprite_wm; 7264 } else if (HAS_PCH_SPLIT(dev)) { 7265 ilk_setup_wm_latency(dev); 7266 7267 if ((IS_GEN5(dev) && dev_priv->wm.pri_latency[1] && 7268 dev_priv->wm.spr_latency[1] && dev_priv->wm.cur_latency[1]) || 7269 (!IS_GEN5(dev) && dev_priv->wm.pri_latency[0] && 7270 dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) { 7271 dev_priv->display.update_wm = ilk_update_wm; 7272 dev_priv->display.update_sprite_wm = ilk_update_sprite_wm; 7273 } else { 7274 DRM_DEBUG_KMS("Failed to read display plane latency. " 7275 "Disable CxSR\n"); 7276 } 7277 7278 if (IS_GEN5(dev)) 7279 dev_priv->display.init_clock_gating = ironlake_init_clock_gating; 7280 else if (IS_GEN6(dev)) 7281 dev_priv->display.init_clock_gating = gen6_init_clock_gating; 7282 else if (IS_IVYBRIDGE(dev)) 7283 dev_priv->display.init_clock_gating = ivybridge_init_clock_gating; 7284 else if (IS_HASWELL(dev)) 7285 dev_priv->display.init_clock_gating = haswell_init_clock_gating; 7286 else if (INTEL_INFO(dev)->gen == 8) 7287 dev_priv->display.init_clock_gating = broadwell_init_clock_gating; 7288 } else if (IS_CHERRYVIEW(dev)) { 7289 vlv_setup_wm_latency(dev); 7290 7291 dev_priv->display.update_wm = vlv_update_wm; 7292 dev_priv->display.init_clock_gating = 7293 cherryview_init_clock_gating; 7294 } else if (IS_VALLEYVIEW(dev)) { 7295 vlv_setup_wm_latency(dev); 7296 7297 dev_priv->display.update_wm = vlv_update_wm; 7298 dev_priv->display.init_clock_gating = 7299 valleyview_init_clock_gating; 7300 } else if (IS_PINEVIEW(dev)) { 7301 if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev), 7302 dev_priv->is_ddr3, 7303 dev_priv->fsb_freq, 7304 dev_priv->mem_freq)) { 7305 DRM_INFO("failed to find known CxSR latency " 7306 "(found ddr%s fsb freq %d, mem freq %d), " 7307 "disabling CxSR\n", 7308 (dev_priv->is_ddr3 == 1) ? "3" : "2", 7309 dev_priv->fsb_freq, dev_priv->mem_freq); 7310 /* Disable CxSR and never update its watermark again */ 7311 intel_set_memory_cxsr(dev_priv, false); 7312 dev_priv->display.update_wm = NULL; 7313 } else 7314 dev_priv->display.update_wm = pineview_update_wm; 7315 dev_priv->display.init_clock_gating = gen3_init_clock_gating; 7316 } else if (IS_G4X(dev)) { 7317 dev_priv->display.update_wm = g4x_update_wm; 7318 dev_priv->display.init_clock_gating = g4x_init_clock_gating; 7319 } else if (IS_GEN4(dev)) { 7320 dev_priv->display.update_wm = i965_update_wm; 7321 if (IS_CRESTLINE(dev)) 7322 dev_priv->display.init_clock_gating = crestline_init_clock_gating; 7323 else if (IS_BROADWATER(dev)) 7324 dev_priv->display.init_clock_gating = broadwater_init_clock_gating; 7325 } else if (IS_GEN3(dev)) { 7326 dev_priv->display.update_wm = i9xx_update_wm; 7327 dev_priv->display.get_fifo_size = i9xx_get_fifo_size; 7328 dev_priv->display.init_clock_gating = gen3_init_clock_gating; 7329 } else if (IS_GEN2(dev)) { 7330 if (INTEL_INFO(dev)->num_pipes == 1) { 7331 dev_priv->display.update_wm = i845_update_wm; 7332 dev_priv->display.get_fifo_size = i845_get_fifo_size; 7333 } else { 7334 dev_priv->display.update_wm = i9xx_update_wm; 7335 dev_priv->display.get_fifo_size = i830_get_fifo_size; 7336 } 7337 7338 if (IS_I85X(dev) || IS_I865G(dev)) 7339 dev_priv->display.init_clock_gating = i85x_init_clock_gating; 7340 else 7341 dev_priv->display.init_clock_gating = i830_init_clock_gating; 7342 } else { 7343 DRM_ERROR("unexpected fall-through in intel_init_pm\n"); 7344 } 7345 } 7346 7347 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val) 7348 { 7349 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 7350 7351 if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) { 7352 DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed\n"); 7353 return -EAGAIN; 7354 } 7355 7356 I915_WRITE(GEN6_PCODE_DATA, *val); 7357 I915_WRITE(GEN6_PCODE_DATA1, 0); 7358 I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox); 7359 7360 if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0, 7361 500)) { 7362 DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox); 7363 return -ETIMEDOUT; 7364 } 7365 7366 *val = I915_READ(GEN6_PCODE_DATA); 7367 I915_WRITE(GEN6_PCODE_DATA, 0); 7368 7369 return 0; 7370 } 7371 7372 int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u32 mbox, u32 val) 7373 { 7374 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 7375 7376 if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) { 7377 DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed\n"); 7378 return -EAGAIN; 7379 } 7380 7381 I915_WRITE(GEN6_PCODE_DATA, val); 7382 I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox); 7383 7384 if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0, 7385 500)) { 7386 DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox); 7387 return -ETIMEDOUT; 7388 } 7389 7390 I915_WRITE(GEN6_PCODE_DATA, 0); 7391 7392 return 0; 7393 } 7394 7395 static int vlv_gpu_freq_div(unsigned int czclk_freq) 7396 { 7397 switch (czclk_freq) { 7398 case 200: 7399 return 10; 7400 case 267: 7401 return 12; 7402 case 320: 7403 case 333: 7404 return 16; 7405 case 400: 7406 return 20; 7407 default: 7408 return -1; 7409 } 7410 } 7411 7412 static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val) 7413 { 7414 int div, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->czclk_freq, 1000); 7415 7416 div = vlv_gpu_freq_div(czclk_freq); 7417 if (div < 0) 7418 return div; 7419 7420 return DIV_ROUND_CLOSEST(czclk_freq * (val + 6 - 0xbd), div); 7421 } 7422 7423 static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val) 7424 { 7425 int mul, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->czclk_freq, 1000); 7426 7427 mul = vlv_gpu_freq_div(czclk_freq); 7428 if (mul < 0) 7429 return mul; 7430 7431 return DIV_ROUND_CLOSEST(mul * val, czclk_freq) + 0xbd - 6; 7432 } 7433 7434 static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val) 7435 { 7436 int div, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->czclk_freq, 1000); 7437 7438 div = vlv_gpu_freq_div(czclk_freq) / 2; 7439 if (div < 0) 7440 return div; 7441 7442 return DIV_ROUND_CLOSEST(czclk_freq * val, 2 * div) / 2; 7443 } 7444 7445 static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val) 7446 { 7447 int mul, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->czclk_freq, 1000); 7448 7449 mul = vlv_gpu_freq_div(czclk_freq) / 2; 7450 if (mul < 0) 7451 return mul; 7452 7453 /* CHV needs even values */ 7454 return DIV_ROUND_CLOSEST(val * 2 * mul, czclk_freq) * 2; 7455 } 7456 7457 int intel_gpu_freq(struct drm_i915_private *dev_priv, int val) 7458 { 7459 if (IS_GEN9(dev_priv->dev)) 7460 return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER, 7461 GEN9_FREQ_SCALER); 7462 else if (IS_CHERRYVIEW(dev_priv->dev)) 7463 return chv_gpu_freq(dev_priv, val); 7464 else if (IS_VALLEYVIEW(dev_priv->dev)) 7465 return byt_gpu_freq(dev_priv, val); 7466 else 7467 return val * GT_FREQUENCY_MULTIPLIER; 7468 } 7469 7470 int intel_freq_opcode(struct drm_i915_private *dev_priv, int val) 7471 { 7472 if (IS_GEN9(dev_priv->dev)) 7473 return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER, 7474 GT_FREQUENCY_MULTIPLIER); 7475 else if (IS_CHERRYVIEW(dev_priv->dev)) 7476 return chv_freq_opcode(dev_priv, val); 7477 else if (IS_VALLEYVIEW(dev_priv->dev)) 7478 return byt_freq_opcode(dev_priv, val); 7479 else 7480 return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER); 7481 } 7482 7483 struct request_boost { 7484 struct work_struct work; 7485 struct drm_i915_gem_request *req; 7486 }; 7487 7488 static void __intel_rps_boost_work(struct work_struct *work) 7489 { 7490 struct request_boost *boost = container_of(work, struct request_boost, work); 7491 struct drm_i915_gem_request *req = boost->req; 7492 7493 if (!i915_gem_request_completed(req, true)) 7494 gen6_rps_boost(to_i915(req->ring->dev), NULL, 7495 req->emitted_jiffies); 7496 7497 i915_gem_request_unreference__unlocked(req); 7498 kfree(boost); 7499 } 7500 7501 void intel_queue_rps_boost_for_request(struct drm_device *dev, 7502 struct drm_i915_gem_request *req) 7503 { 7504 struct request_boost *boost; 7505 7506 if (req == NULL || INTEL_INFO(dev)->gen < 6) 7507 return; 7508 7509 if (i915_gem_request_completed(req, true)) 7510 return; 7511 7512 boost = kmalloc(sizeof(*boost), GFP_ATOMIC); 7513 if (boost == NULL) 7514 return; 7515 7516 i915_gem_request_reference(req); 7517 boost->req = req; 7518 7519 INIT_WORK(&boost->work, __intel_rps_boost_work); 7520 queue_work(to_i915(dev)->wq, &boost->work); 7521 } 7522 7523 void intel_pm_setup(struct drm_device *dev) 7524 { 7525 struct drm_i915_private *dev_priv = dev->dev_private; 7526 7527 mutex_init(&dev_priv->rps.hw_lock); 7528 spin_lock_init(&dev_priv->rps.client_lock); 7529 7530 INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work, 7531 intel_gen6_powersave_work); 7532 INIT_LIST_HEAD(&dev_priv->rps.clients); 7533 INIT_LIST_HEAD(&dev_priv->rps.semaphores.link); 7534 INIT_LIST_HEAD(&dev_priv->rps.mmioflips.link); 7535 7536 dev_priv->pm.suspended = false; 7537 } 7538