xref: /dflybsd-src/sys/dev/drm/i915/intel_pm.c (revision 23832f75edc9855492226d612851679a00de2f9c)
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eugeni Dodonov <eugeni.dodonov@intel.com>
25  *
26  */
27 
28 #include "i915_drv.h"
29 #include "intel_drv.h"
30 #include <linux/module.h>
31 #include <machine/clock.h>
32 
33 #define FORCEWAKE_ACK_TIMEOUT_MS 2
34 
35 /* FBC, or Frame Buffer Compression, is a technique employed to compress the
36  * framebuffer contents in-memory, aiming at reducing the required bandwidth
37  * during in-memory transfers and, therefore, reduce the power packet.
38  *
39  * The benefits of FBC are mostly visible with solid backgrounds and
40  * variation-less patterns.
41  *
42  * FBC-related functionality can be enabled by the means of the
43  * i915.i915_enable_fbc parameter
44  */
45 
46 static bool intel_crtc_active(struct drm_crtc *crtc)
47 {
48 	/* Be paranoid as we can arrive here with only partial
49 	 * state retrieved from the hardware during setup.
50 	 */
51 	return to_intel_crtc(crtc)->active && crtc->fb && crtc->mode.clock;
52 }
53 
54 static void i8xx_disable_fbc(struct drm_device *dev)
55 {
56 	struct drm_i915_private *dev_priv = dev->dev_private;
57 	u32 fbc_ctl;
58 
59 	/* Disable compression */
60 	fbc_ctl = I915_READ(FBC_CONTROL);
61 	if ((fbc_ctl & FBC_CTL_EN) == 0)
62 		return;
63 
64 	fbc_ctl &= ~FBC_CTL_EN;
65 	I915_WRITE(FBC_CONTROL, fbc_ctl);
66 
67 	/* Wait for compressing bit to clear */
68 	if (wait_for((I915_READ(FBC_STATUS) & FBC_STAT_COMPRESSING) == 0, 10)) {
69 		DRM_DEBUG_KMS("FBC idle timed out\n");
70 		return;
71 	}
72 
73 	DRM_DEBUG_KMS("disabled FBC\n");
74 }
75 
76 static void i8xx_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
77 {
78 	struct drm_device *dev = crtc->dev;
79 	struct drm_i915_private *dev_priv = dev->dev_private;
80 	struct drm_framebuffer *fb = crtc->fb;
81 	struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
82 	struct drm_i915_gem_object *obj = intel_fb->obj;
83 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
84 	int cfb_pitch;
85 	int plane, i;
86 	u32 fbc_ctl, fbc_ctl2;
87 
88 	cfb_pitch = dev_priv->cfb_size / FBC_LL_SIZE;
89 	if (fb->pitches[0] < cfb_pitch)
90 		cfb_pitch = fb->pitches[0];
91 
92 	/* FBC_CTL wants 64B units */
93 	cfb_pitch = (cfb_pitch / 64) - 1;
94 	plane = intel_crtc->plane == 0 ? FBC_CTL_PLANEA : FBC_CTL_PLANEB;
95 
96 	/* Clear old tags */
97 	for (i = 0; i < (FBC_LL_SIZE / 32) + 1; i++)
98 		I915_WRITE(FBC_TAG + (i * 4), 0);
99 
100 	/* Set it up... */
101 	fbc_ctl2 = FBC_CTL_FENCE_DBL | FBC_CTL_IDLE_IMM | FBC_CTL_CPU_FENCE;
102 	fbc_ctl2 |= plane;
103 	I915_WRITE(FBC_CONTROL2, fbc_ctl2);
104 	I915_WRITE(FBC_FENCE_OFF, crtc->y);
105 
106 	/* enable it... */
107 	fbc_ctl = FBC_CTL_EN | FBC_CTL_PERIODIC;
108 	if (IS_I945GM(dev))
109 		fbc_ctl |= FBC_CTL_C3_IDLE; /* 945 needs special SR handling */
110 	fbc_ctl |= (cfb_pitch & 0xff) << FBC_CTL_STRIDE_SHIFT;
111 	fbc_ctl |= (interval & 0x2fff) << FBC_CTL_INTERVAL_SHIFT;
112 	fbc_ctl |= obj->fence_reg;
113 	I915_WRITE(FBC_CONTROL, fbc_ctl);
114 
115 	DRM_DEBUG_KMS("enabled FBC, pitch %d, yoff %d, plane %c, ",
116 		      cfb_pitch, crtc->y, plane_name(intel_crtc->plane));
117 }
118 
119 static bool i8xx_fbc_enabled(struct drm_device *dev)
120 {
121 	struct drm_i915_private *dev_priv = dev->dev_private;
122 
123 	return I915_READ(FBC_CONTROL) & FBC_CTL_EN;
124 }
125 
126 static void g4x_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
127 {
128 	struct drm_device *dev = crtc->dev;
129 	struct drm_i915_private *dev_priv = dev->dev_private;
130 	struct drm_framebuffer *fb = crtc->fb;
131 	struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
132 	struct drm_i915_gem_object *obj = intel_fb->obj;
133 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
134 	int plane = intel_crtc->plane == 0 ? DPFC_CTL_PLANEA : DPFC_CTL_PLANEB;
135 	unsigned long stall_watermark = 200;
136 	u32 dpfc_ctl;
137 
138 	dpfc_ctl = plane | DPFC_SR_EN | DPFC_CTL_LIMIT_1X;
139 	dpfc_ctl |= DPFC_CTL_FENCE_EN | obj->fence_reg;
140 	I915_WRITE(DPFC_CHICKEN, DPFC_HT_MODIFY);
141 
142 	I915_WRITE(DPFC_RECOMP_CTL, DPFC_RECOMP_STALL_EN |
143 		   (stall_watermark << DPFC_RECOMP_STALL_WM_SHIFT) |
144 		   (interval << DPFC_RECOMP_TIMER_COUNT_SHIFT));
145 	I915_WRITE(DPFC_FENCE_YOFF, crtc->y);
146 
147 	/* enable it... */
148 	I915_WRITE(DPFC_CONTROL, I915_READ(DPFC_CONTROL) | DPFC_CTL_EN);
149 
150 	DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane));
151 }
152 
153 static void g4x_disable_fbc(struct drm_device *dev)
154 {
155 	struct drm_i915_private *dev_priv = dev->dev_private;
156 	u32 dpfc_ctl;
157 
158 	/* Disable compression */
159 	dpfc_ctl = I915_READ(DPFC_CONTROL);
160 	if (dpfc_ctl & DPFC_CTL_EN) {
161 		dpfc_ctl &= ~DPFC_CTL_EN;
162 		I915_WRITE(DPFC_CONTROL, dpfc_ctl);
163 
164 		DRM_DEBUG_KMS("disabled FBC\n");
165 	}
166 }
167 
168 static bool g4x_fbc_enabled(struct drm_device *dev)
169 {
170 	struct drm_i915_private *dev_priv = dev->dev_private;
171 
172 	return I915_READ(DPFC_CONTROL) & DPFC_CTL_EN;
173 }
174 
175 static void sandybridge_blit_fbc_update(struct drm_device *dev)
176 {
177 	struct drm_i915_private *dev_priv = dev->dev_private;
178 	u32 blt_ecoskpd;
179 
180 	/* Make sure blitter notifies FBC of writes */
181 	gen6_gt_force_wake_get(dev_priv);
182 	blt_ecoskpd = I915_READ(GEN6_BLITTER_ECOSKPD);
183 	blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY <<
184 		GEN6_BLITTER_LOCK_SHIFT;
185 	I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
186 	blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY;
187 	I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
188 	blt_ecoskpd &= ~(GEN6_BLITTER_FBC_NOTIFY <<
189 			 GEN6_BLITTER_LOCK_SHIFT);
190 	I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
191 	POSTING_READ(GEN6_BLITTER_ECOSKPD);
192 	gen6_gt_force_wake_put(dev_priv);
193 }
194 
195 static void ironlake_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
196 {
197 	struct drm_device *dev = crtc->dev;
198 	struct drm_i915_private *dev_priv = dev->dev_private;
199 	struct drm_framebuffer *fb = crtc->fb;
200 	struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
201 	struct drm_i915_gem_object *obj = intel_fb->obj;
202 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
203 	int plane = intel_crtc->plane == 0 ? DPFC_CTL_PLANEA : DPFC_CTL_PLANEB;
204 	unsigned long stall_watermark = 200;
205 	u32 dpfc_ctl;
206 
207 	dpfc_ctl = I915_READ(ILK_DPFC_CONTROL);
208 	dpfc_ctl &= DPFC_RESERVED;
209 	dpfc_ctl |= (plane | DPFC_CTL_LIMIT_1X);
210 	/* Set persistent mode for front-buffer rendering, ala X. */
211 	dpfc_ctl |= DPFC_CTL_PERSISTENT_MODE;
212 	dpfc_ctl |= (DPFC_CTL_FENCE_EN | obj->fence_reg);
213 	I915_WRITE(ILK_DPFC_CHICKEN, DPFC_HT_MODIFY);
214 
215 	I915_WRITE(ILK_DPFC_RECOMP_CTL, DPFC_RECOMP_STALL_EN |
216 		   (stall_watermark << DPFC_RECOMP_STALL_WM_SHIFT) |
217 		   (interval << DPFC_RECOMP_TIMER_COUNT_SHIFT));
218 	I915_WRITE(ILK_DPFC_FENCE_YOFF, crtc->y);
219 	I915_WRITE(ILK_FBC_RT_BASE, obj->gtt_offset | ILK_FBC_RT_VALID);
220 	/* enable it... */
221 	I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN);
222 
223 	if (IS_GEN6(dev)) {
224 		I915_WRITE(SNB_DPFC_CTL_SA,
225 			   SNB_CPU_FENCE_ENABLE | obj->fence_reg);
226 		I915_WRITE(DPFC_CPU_FENCE_OFFSET, crtc->y);
227 		sandybridge_blit_fbc_update(dev);
228 	}
229 
230 	DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane));
231 }
232 
233 static void ironlake_disable_fbc(struct drm_device *dev)
234 {
235 	struct drm_i915_private *dev_priv = dev->dev_private;
236 	u32 dpfc_ctl;
237 
238 	/* Disable compression */
239 	dpfc_ctl = I915_READ(ILK_DPFC_CONTROL);
240 	if (dpfc_ctl & DPFC_CTL_EN) {
241 		dpfc_ctl &= ~DPFC_CTL_EN;
242 		I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl);
243 
244 		if (IS_IVYBRIDGE(dev))
245 			/* WaFbcDisableDpfcClockGating:ivb */
246 			I915_WRITE(ILK_DSPCLK_GATE_D,
247 				   I915_READ(ILK_DSPCLK_GATE_D) &
248 				   ~ILK_DPFCUNIT_CLOCK_GATE_DISABLE);
249 
250 		if (IS_HASWELL(dev))
251 			/* WaFbcDisableDpfcClockGating:hsw */
252 			I915_WRITE(HSW_CLKGATE_DISABLE_PART_1,
253 				   I915_READ(HSW_CLKGATE_DISABLE_PART_1) &
254 				   ~HSW_DPFC_GATING_DISABLE);
255 
256 		DRM_DEBUG_KMS("disabled FBC\n");
257 	}
258 }
259 
260 static bool ironlake_fbc_enabled(struct drm_device *dev)
261 {
262 	struct drm_i915_private *dev_priv = dev->dev_private;
263 
264 	return I915_READ(ILK_DPFC_CONTROL) & DPFC_CTL_EN;
265 }
266 
267 static void gen7_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
268 {
269 	struct drm_device *dev = crtc->dev;
270 	struct drm_i915_private *dev_priv = dev->dev_private;
271 	struct drm_framebuffer *fb = crtc->fb;
272 	struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
273 	struct drm_i915_gem_object *obj = intel_fb->obj;
274 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
275 
276 	I915_WRITE(IVB_FBC_RT_BASE, obj->gtt_offset);
277 
278 	I915_WRITE(ILK_DPFC_CONTROL, DPFC_CTL_EN | DPFC_CTL_LIMIT_1X |
279 		   IVB_DPFC_CTL_FENCE_EN |
280 		   intel_crtc->plane << IVB_DPFC_CTL_PLANE_SHIFT);
281 
282 	if (IS_IVYBRIDGE(dev)) {
283 		/* WaFbcAsynchFlipDisableFbcQueue:ivb */
284 		I915_WRITE(ILK_DISPLAY_CHICKEN1, ILK_FBCQ_DIS);
285 		/* WaFbcDisableDpfcClockGating:ivb */
286 		I915_WRITE(ILK_DSPCLK_GATE_D,
287 			   I915_READ(ILK_DSPCLK_GATE_D) |
288 			   ILK_DPFCUNIT_CLOCK_GATE_DISABLE);
289 	} else {
290 		/* WaFbcAsynchFlipDisableFbcQueue:hsw */
291 		I915_WRITE(HSW_PIPE_SLICE_CHICKEN_1(intel_crtc->pipe),
292 			   HSW_BYPASS_FBC_QUEUE);
293 		/* WaFbcDisableDpfcClockGating:hsw */
294 		I915_WRITE(HSW_CLKGATE_DISABLE_PART_1,
295 			   I915_READ(HSW_CLKGATE_DISABLE_PART_1) |
296 			   HSW_DPFC_GATING_DISABLE);
297 	}
298 
299 	I915_WRITE(SNB_DPFC_CTL_SA,
300 		   SNB_CPU_FENCE_ENABLE | obj->fence_reg);
301 	I915_WRITE(DPFC_CPU_FENCE_OFFSET, crtc->y);
302 
303 	sandybridge_blit_fbc_update(dev);
304 
305 	DRM_DEBUG_KMS("enabled fbc on plane %d\n", intel_crtc->plane);
306 }
307 
308 bool intel_fbc_enabled(struct drm_device *dev)
309 {
310 	struct drm_i915_private *dev_priv = dev->dev_private;
311 
312 	if (!dev_priv->display.fbc_enabled)
313 		return false;
314 
315 	return dev_priv->display.fbc_enabled(dev);
316 }
317 
318 static void intel_fbc_work_fn(struct work_struct *__work)
319 {
320 	struct intel_fbc_work *work =
321 		container_of(to_delayed_work(__work),
322 			     struct intel_fbc_work, work);
323 	struct drm_device *dev = work->crtc->dev;
324 	struct drm_i915_private *dev_priv = dev->dev_private;
325 
326 	mutex_lock(&dev->struct_mutex);
327 	if (work == dev_priv->fbc_work) {
328 		/* Double check that we haven't switched fb without cancelling
329 		 * the prior work.
330 		 */
331 		if (work->crtc->fb == work->fb) {
332 			dev_priv->display.enable_fbc(work->crtc,
333 						     work->interval);
334 
335 			dev_priv->cfb_plane = to_intel_crtc(work->crtc)->plane;
336 			dev_priv->cfb_fb = work->crtc->fb->base.id;
337 			dev_priv->cfb_y = work->crtc->y;
338 		}
339 
340 		dev_priv->fbc_work = NULL;
341 	}
342 	mutex_unlock(&dev->struct_mutex);
343 
344 	kfree(work);
345 }
346 
347 static void intel_cancel_fbc_work(struct drm_i915_private *dev_priv)
348 {
349 	if (dev_priv->fbc_work == NULL)
350 		return;
351 
352 	DRM_DEBUG_KMS("cancelling pending FBC enable\n");
353 
354 	/* Synchronisation is provided by struct_mutex and checking of
355 	 * dev_priv->fbc_work, so we can perform the cancellation
356 	 * entirely asynchronously.
357 	 */
358 	if (cancel_delayed_work(&dev_priv->fbc_work->work))
359 		/* tasklet was killed before being run, clean up */
360 		kfree(dev_priv->fbc_work);
361 
362 	/* Mark the work as no longer wanted so that if it does
363 	 * wake-up (because the work was already running and waiting
364 	 * for our mutex), it will discover that is no longer
365 	 * necessary to run.
366 	 */
367 	dev_priv->fbc_work = NULL;
368 }
369 
370 void intel_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
371 {
372 	struct intel_fbc_work *work;
373 	struct drm_device *dev = crtc->dev;
374 	struct drm_i915_private *dev_priv = dev->dev_private;
375 
376 	if (!dev_priv->display.enable_fbc)
377 		return;
378 
379 	intel_cancel_fbc_work(dev_priv);
380 
381 	work = kzalloc(sizeof *work, GFP_KERNEL);
382 	if (work == NULL) {
383 		dev_priv->display.enable_fbc(crtc, interval);
384 		return;
385 	}
386 
387 	work->crtc = crtc;
388 	work->fb = crtc->fb;
389 	work->interval = interval;
390 	INIT_DELAYED_WORK(&work->work, intel_fbc_work_fn);
391 
392 	dev_priv->fbc_work = work;
393 
394 	DRM_DEBUG_KMS("scheduling delayed FBC enable\n");
395 
396 	/* Delay the actual enabling to let pageflipping cease and the
397 	 * display to settle before starting the compression. Note that
398 	 * this delay also serves a second purpose: it allows for a
399 	 * vblank to pass after disabling the FBC before we attempt
400 	 * to modify the control registers.
401 	 *
402 	 * A more complicated solution would involve tracking vblanks
403 	 * following the termination of the page-flipping sequence
404 	 * and indeed performing the enable as a co-routine and not
405 	 * waiting synchronously upon the vblank.
406 	 */
407 	schedule_delayed_work(&work->work, msecs_to_jiffies(50));
408 }
409 
410 void intel_disable_fbc(struct drm_device *dev)
411 {
412 	struct drm_i915_private *dev_priv = dev->dev_private;
413 
414 	intel_cancel_fbc_work(dev_priv);
415 
416 	if (!dev_priv->display.disable_fbc)
417 		return;
418 
419 	dev_priv->display.disable_fbc(dev);
420 	dev_priv->cfb_plane = -1;
421 }
422 
423 /**
424  * intel_update_fbc - enable/disable FBC as needed
425  * @dev: the drm_device
426  *
427  * Set up the framebuffer compression hardware at mode set time.  We
428  * enable it if possible:
429  *   - plane A only (on pre-965)
430  *   - no pixel mulitply/line duplication
431  *   - no alpha buffer discard
432  *   - no dual wide
433  *   - framebuffer <= max_hdisplay in width, max_vdisplay in height
434  *
435  * We can't assume that any compression will take place (worst case),
436  * so the compressed buffer has to be the same size as the uncompressed
437  * one.  It also must reside (along with the line length buffer) in
438  * stolen memory.
439  *
440  * We need to enable/disable FBC on a global basis.
441  */
442 void intel_update_fbc(struct drm_device *dev)
443 {
444 	struct drm_i915_private *dev_priv = dev->dev_private;
445 	struct drm_crtc *crtc = NULL, *tmp_crtc;
446 	struct intel_crtc *intel_crtc;
447 	struct drm_framebuffer *fb;
448 	struct intel_framebuffer *intel_fb;
449 	struct drm_i915_gem_object *obj;
450 	int enable_fbc;
451 	unsigned int max_hdisplay, max_vdisplay;
452 
453 	if (!i915_powersave)
454 		return;
455 
456 	if (!I915_HAS_FBC(dev))
457 		return;
458 
459 	/*
460 	 * If FBC is already on, we just have to verify that we can
461 	 * keep it that way...
462 	 * Need to disable if:
463 	 *   - more than one pipe is active
464 	 *   - changing FBC params (stride, fence, mode)
465 	 *   - new fb is too large to fit in compressed buffer
466 	 *   - going to an unsupported config (interlace, pixel multiply, etc.)
467 	 */
468 	list_for_each_entry(tmp_crtc, &dev->mode_config.crtc_list, head) {
469 		if (intel_crtc_active(tmp_crtc) &&
470 		    !to_intel_crtc(tmp_crtc)->primary_disabled) {
471 			if (crtc) {
472 				DRM_DEBUG_KMS("more than one pipe active, disabling compression\n");
473 				dev_priv->no_fbc_reason = FBC_MULTIPLE_PIPES;
474 				goto out_disable;
475 			}
476 			crtc = tmp_crtc;
477 		}
478 	}
479 
480 	if (!crtc || crtc->fb == NULL) {
481 		DRM_DEBUG_KMS("no output, disabling\n");
482 		dev_priv->no_fbc_reason = FBC_NO_OUTPUT;
483 		goto out_disable;
484 	}
485 
486 	intel_crtc = to_intel_crtc(crtc);
487 	fb = crtc->fb;
488 	intel_fb = to_intel_framebuffer(fb);
489 	obj = intel_fb->obj;
490 
491 	enable_fbc = i915_enable_fbc;
492 	if (enable_fbc < 0) {
493 		DRM_DEBUG_KMS("fbc set to per-chip default\n");
494 		enable_fbc = 1;
495 		if (INTEL_INFO(dev)->gen <= 7 && !IS_HASWELL(dev))
496 			enable_fbc = 0;
497 	}
498 	if (!enable_fbc) {
499 		DRM_DEBUG_KMS("fbc disabled per module param\n");
500 		dev_priv->no_fbc_reason = FBC_MODULE_PARAM;
501 		goto out_disable;
502 	}
503 	if ((crtc->mode.flags & DRM_MODE_FLAG_INTERLACE) ||
504 	    (crtc->mode.flags & DRM_MODE_FLAG_DBLSCAN)) {
505 		DRM_DEBUG_KMS("mode incompatible with compression, "
506 			      "disabling\n");
507 		dev_priv->no_fbc_reason = FBC_UNSUPPORTED_MODE;
508 		goto out_disable;
509 	}
510 
511 	if (IS_G4X(dev) || INTEL_INFO(dev)->gen >= 5) {
512 		max_hdisplay = 4096;
513 		max_vdisplay = 2048;
514 	} else {
515 		max_hdisplay = 2048;
516 		max_vdisplay = 1536;
517 	}
518 	if ((crtc->mode.hdisplay > max_hdisplay) ||
519 	    (crtc->mode.vdisplay > max_vdisplay)) {
520 		DRM_DEBUG_KMS("mode too large for compression, disabling\n");
521 		dev_priv->no_fbc_reason = FBC_MODE_TOO_LARGE;
522 		goto out_disable;
523 	}
524 	if ((IS_I915GM(dev) || IS_I945GM(dev) || IS_HASWELL(dev)) &&
525 	    intel_crtc->plane != 0) {
526 		DRM_DEBUG_KMS("plane not 0, disabling compression\n");
527 		dev_priv->no_fbc_reason = FBC_BAD_PLANE;
528 		goto out_disable;
529 	}
530 
531 	/* The use of a CPU fence is mandatory in order to detect writes
532 	 * by the CPU to the scanout and trigger updates to the FBC.
533 	 */
534 	if (obj->tiling_mode != I915_TILING_X ||
535 	    obj->fence_reg == I915_FENCE_REG_NONE) {
536 		DRM_DEBUG_KMS("framebuffer not tiled or fenced, disabling compression\n");
537 		dev_priv->no_fbc_reason = FBC_NOT_TILED;
538 		goto out_disable;
539 	}
540 
541 #ifdef DDB
542 	/* If the kernel debugger is active, always disable compression */
543 	if (db_active)
544 		goto out_disable;
545 #endif
546 
547 	if (i915_gem_stolen_setup_compression(dev, intel_fb->obj->base.size)) {
548 		DRM_DEBUG_KMS("framebuffer too large, disabling compression\n");
549 		dev_priv->no_fbc_reason = FBC_STOLEN_TOO_SMALL;
550 		goto out_disable;
551 	}
552 
553 	/* If the scanout has not changed, don't modify the FBC settings.
554 	 * Note that we make the fundamental assumption that the fb->obj
555 	 * cannot be unpinned (and have its GTT offset and fence revoked)
556 	 * without first being decoupled from the scanout and FBC disabled.
557 	 */
558 	if (dev_priv->cfb_plane == intel_crtc->plane &&
559 	    dev_priv->cfb_fb == fb->base.id &&
560 	    dev_priv->cfb_y == crtc->y)
561 		return;
562 
563 	if (intel_fbc_enabled(dev)) {
564 		/* We update FBC along two paths, after changing fb/crtc
565 		 * configuration (modeswitching) and after page-flipping
566 		 * finishes. For the latter, we know that not only did
567 		 * we disable the FBC at the start of the page-flip
568 		 * sequence, but also more than one vblank has passed.
569 		 *
570 		 * For the former case of modeswitching, it is possible
571 		 * to switch between two FBC valid configurations
572 		 * instantaneously so we do need to disable the FBC
573 		 * before we can modify its control registers. We also
574 		 * have to wait for the next vblank for that to take
575 		 * effect. However, since we delay enabling FBC we can
576 		 * assume that a vblank has passed since disabling and
577 		 * that we can safely alter the registers in the deferred
578 		 * callback.
579 		 *
580 		 * In the scenario that we go from a valid to invalid
581 		 * and then back to valid FBC configuration we have
582 		 * no strict enforcement that a vblank occurred since
583 		 * disabling the FBC. However, along all current pipe
584 		 * disabling paths we do need to wait for a vblank at
585 		 * some point. And we wait before enabling FBC anyway.
586 		 */
587 		DRM_DEBUG_KMS("disabling active FBC for update\n");
588 		intel_disable_fbc(dev);
589 	}
590 
591 	intel_enable_fbc(crtc, 500);
592 	return;
593 
594 out_disable:
595 	/* Multiple disables should be harmless */
596 	if (intel_fbc_enabled(dev)) {
597 		DRM_DEBUG_KMS("unsupported config, disabling FBC\n");
598 		intel_disable_fbc(dev);
599 	}
600 	i915_gem_stolen_cleanup_compression(dev);
601 }
602 
603 static void i915_pineview_get_mem_freq(struct drm_device *dev)
604 {
605 	drm_i915_private_t *dev_priv = dev->dev_private;
606 	u32 tmp;
607 
608 	tmp = I915_READ(CLKCFG);
609 
610 	switch (tmp & CLKCFG_FSB_MASK) {
611 	case CLKCFG_FSB_533:
612 		dev_priv->fsb_freq = 533; /* 133*4 */
613 		break;
614 	case CLKCFG_FSB_800:
615 		dev_priv->fsb_freq = 800; /* 200*4 */
616 		break;
617 	case CLKCFG_FSB_667:
618 		dev_priv->fsb_freq =  667; /* 167*4 */
619 		break;
620 	case CLKCFG_FSB_400:
621 		dev_priv->fsb_freq = 400; /* 100*4 */
622 		break;
623 	}
624 
625 	switch (tmp & CLKCFG_MEM_MASK) {
626 	case CLKCFG_MEM_533:
627 		dev_priv->mem_freq = 533;
628 		break;
629 	case CLKCFG_MEM_667:
630 		dev_priv->mem_freq = 667;
631 		break;
632 	case CLKCFG_MEM_800:
633 		dev_priv->mem_freq = 800;
634 		break;
635 	}
636 
637 	/* detect pineview DDR3 setting */
638 	tmp = I915_READ(CSHRDDR3CTL);
639 	dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0;
640 }
641 
642 static void i915_ironlake_get_mem_freq(struct drm_device *dev)
643 {
644 	drm_i915_private_t *dev_priv = dev->dev_private;
645 	u16 ddrpll, csipll;
646 
647 	ddrpll = I915_READ16(DDRMPLL1);
648 	csipll = I915_READ16(CSIPLL0);
649 
650 	switch (ddrpll & 0xff) {
651 	case 0xc:
652 		dev_priv->mem_freq = 800;
653 		break;
654 	case 0x10:
655 		dev_priv->mem_freq = 1066;
656 		break;
657 	case 0x14:
658 		dev_priv->mem_freq = 1333;
659 		break;
660 	case 0x18:
661 		dev_priv->mem_freq = 1600;
662 		break;
663 	default:
664 		DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n",
665 				 ddrpll & 0xff);
666 		dev_priv->mem_freq = 0;
667 		break;
668 	}
669 
670 	dev_priv->ips.r_t = dev_priv->mem_freq;
671 
672 	switch (csipll & 0x3ff) {
673 	case 0x00c:
674 		dev_priv->fsb_freq = 3200;
675 		break;
676 	case 0x00e:
677 		dev_priv->fsb_freq = 3733;
678 		break;
679 	case 0x010:
680 		dev_priv->fsb_freq = 4266;
681 		break;
682 	case 0x012:
683 		dev_priv->fsb_freq = 4800;
684 		break;
685 	case 0x014:
686 		dev_priv->fsb_freq = 5333;
687 		break;
688 	case 0x016:
689 		dev_priv->fsb_freq = 5866;
690 		break;
691 	case 0x018:
692 		dev_priv->fsb_freq = 6400;
693 		break;
694 	default:
695 		DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n",
696 				 csipll & 0x3ff);
697 		dev_priv->fsb_freq = 0;
698 		break;
699 	}
700 
701 	if (dev_priv->fsb_freq == 3200) {
702 		dev_priv->ips.c_m = 0;
703 	} else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) {
704 		dev_priv->ips.c_m = 1;
705 	} else {
706 		dev_priv->ips.c_m = 2;
707 	}
708 }
709 
710 static const struct cxsr_latency cxsr_latency_table[] = {
711 	{1, 0, 800, 400, 3382, 33382, 3983, 33983},    /* DDR2-400 SC */
712 	{1, 0, 800, 667, 3354, 33354, 3807, 33807},    /* DDR2-667 SC */
713 	{1, 0, 800, 800, 3347, 33347, 3763, 33763},    /* DDR2-800 SC */
714 	{1, 1, 800, 667, 6420, 36420, 6873, 36873},    /* DDR3-667 SC */
715 	{1, 1, 800, 800, 5902, 35902, 6318, 36318},    /* DDR3-800 SC */
716 
717 	{1, 0, 667, 400, 3400, 33400, 4021, 34021},    /* DDR2-400 SC */
718 	{1, 0, 667, 667, 3372, 33372, 3845, 33845},    /* DDR2-667 SC */
719 	{1, 0, 667, 800, 3386, 33386, 3822, 33822},    /* DDR2-800 SC */
720 	{1, 1, 667, 667, 6438, 36438, 6911, 36911},    /* DDR3-667 SC */
721 	{1, 1, 667, 800, 5941, 35941, 6377, 36377},    /* DDR3-800 SC */
722 
723 	{1, 0, 400, 400, 3472, 33472, 4173, 34173},    /* DDR2-400 SC */
724 	{1, 0, 400, 667, 3443, 33443, 3996, 33996},    /* DDR2-667 SC */
725 	{1, 0, 400, 800, 3430, 33430, 3946, 33946},    /* DDR2-800 SC */
726 	{1, 1, 400, 667, 6509, 36509, 7062, 37062},    /* DDR3-667 SC */
727 	{1, 1, 400, 800, 5985, 35985, 6501, 36501},    /* DDR3-800 SC */
728 
729 	{0, 0, 800, 400, 3438, 33438, 4065, 34065},    /* DDR2-400 SC */
730 	{0, 0, 800, 667, 3410, 33410, 3889, 33889},    /* DDR2-667 SC */
731 	{0, 0, 800, 800, 3403, 33403, 3845, 33845},    /* DDR2-800 SC */
732 	{0, 1, 800, 667, 6476, 36476, 6955, 36955},    /* DDR3-667 SC */
733 	{0, 1, 800, 800, 5958, 35958, 6400, 36400},    /* DDR3-800 SC */
734 
735 	{0, 0, 667, 400, 3456, 33456, 4103, 34106},    /* DDR2-400 SC */
736 	{0, 0, 667, 667, 3428, 33428, 3927, 33927},    /* DDR2-667 SC */
737 	{0, 0, 667, 800, 3443, 33443, 3905, 33905},    /* DDR2-800 SC */
738 	{0, 1, 667, 667, 6494, 36494, 6993, 36993},    /* DDR3-667 SC */
739 	{0, 1, 667, 800, 5998, 35998, 6460, 36460},    /* DDR3-800 SC */
740 
741 	{0, 0, 400, 400, 3528, 33528, 4255, 34255},    /* DDR2-400 SC */
742 	{0, 0, 400, 667, 3500, 33500, 4079, 34079},    /* DDR2-667 SC */
743 	{0, 0, 400, 800, 3487, 33487, 4029, 34029},    /* DDR2-800 SC */
744 	{0, 1, 400, 667, 6566, 36566, 7145, 37145},    /* DDR3-667 SC */
745 	{0, 1, 400, 800, 6042, 36042, 6584, 36584},    /* DDR3-800 SC */
746 };
747 
748 static const struct cxsr_latency *intel_get_cxsr_latency(int is_desktop,
749 							 int is_ddr3,
750 							 int fsb,
751 							 int mem)
752 {
753 	const struct cxsr_latency *latency;
754 	int i;
755 
756 	if (fsb == 0 || mem == 0)
757 		return NULL;
758 
759 	for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) {
760 		latency = &cxsr_latency_table[i];
761 		if (is_desktop == latency->is_desktop &&
762 		    is_ddr3 == latency->is_ddr3 &&
763 		    fsb == latency->fsb_freq && mem == latency->mem_freq)
764 			return latency;
765 	}
766 
767 	DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
768 
769 	return NULL;
770 }
771 
772 static void pineview_disable_cxsr(struct drm_device *dev)
773 {
774 	struct drm_i915_private *dev_priv = dev->dev_private;
775 
776 	/* deactivate cxsr */
777 	I915_WRITE(DSPFW3, I915_READ(DSPFW3) & ~PINEVIEW_SELF_REFRESH_EN);
778 }
779 
780 /*
781  * Latency for FIFO fetches is dependent on several factors:
782  *   - memory configuration (speed, channels)
783  *   - chipset
784  *   - current MCH state
785  * It can be fairly high in some situations, so here we assume a fairly
786  * pessimal value.  It's a tradeoff between extra memory fetches (if we
787  * set this value too high, the FIFO will fetch frequently to stay full)
788  * and power consumption (set it too low to save power and we might see
789  * FIFO underruns and display "flicker").
790  *
791  * A value of 5us seems to be a good balance; safe for very low end
792  * platforms but not overly aggressive on lower latency configs.
793  */
794 static const int latency_ns = 5000;
795 
796 static int i9xx_get_fifo_size(struct drm_device *dev, int plane)
797 {
798 	struct drm_i915_private *dev_priv = dev->dev_private;
799 	uint32_t dsparb = I915_READ(DSPARB);
800 	int size;
801 
802 	size = dsparb & 0x7f;
803 	if (plane)
804 		size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size;
805 
806 	DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
807 		      plane ? "B" : "A", size);
808 
809 	return size;
810 }
811 
812 static int i85x_get_fifo_size(struct drm_device *dev, int plane)
813 {
814 	struct drm_i915_private *dev_priv = dev->dev_private;
815 	uint32_t dsparb = I915_READ(DSPARB);
816 	int size;
817 
818 	size = dsparb & 0x1ff;
819 	if (plane)
820 		size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size;
821 	size >>= 1; /* Convert to cachelines */
822 
823 	DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
824 		      plane ? "B" : "A", size);
825 
826 	return size;
827 }
828 
829 static int i845_get_fifo_size(struct drm_device *dev, int plane)
830 {
831 	struct drm_i915_private *dev_priv = dev->dev_private;
832 	uint32_t dsparb = I915_READ(DSPARB);
833 	int size;
834 
835 	size = dsparb & 0x7f;
836 	size >>= 2; /* Convert to cachelines */
837 
838 	DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
839 		      plane ? "B" : "A",
840 		      size);
841 
842 	return size;
843 }
844 
845 static int i830_get_fifo_size(struct drm_device *dev, int plane)
846 {
847 	struct drm_i915_private *dev_priv = dev->dev_private;
848 	uint32_t dsparb = I915_READ(DSPARB);
849 	int size;
850 
851 	size = dsparb & 0x7f;
852 	size >>= 1; /* Convert to cachelines */
853 
854 	DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
855 		      plane ? "B" : "A", size);
856 
857 	return size;
858 }
859 
860 /* Pineview has different values for various configs */
861 static const struct intel_watermark_params pineview_display_wm = {
862 	PINEVIEW_DISPLAY_FIFO,
863 	PINEVIEW_MAX_WM,
864 	PINEVIEW_DFT_WM,
865 	PINEVIEW_GUARD_WM,
866 	PINEVIEW_FIFO_LINE_SIZE
867 };
868 static const struct intel_watermark_params pineview_display_hplloff_wm = {
869 	PINEVIEW_DISPLAY_FIFO,
870 	PINEVIEW_MAX_WM,
871 	PINEVIEW_DFT_HPLLOFF_WM,
872 	PINEVIEW_GUARD_WM,
873 	PINEVIEW_FIFO_LINE_SIZE
874 };
875 static const struct intel_watermark_params pineview_cursor_wm = {
876 	PINEVIEW_CURSOR_FIFO,
877 	PINEVIEW_CURSOR_MAX_WM,
878 	PINEVIEW_CURSOR_DFT_WM,
879 	PINEVIEW_CURSOR_GUARD_WM,
880 	PINEVIEW_FIFO_LINE_SIZE,
881 };
882 static const struct intel_watermark_params pineview_cursor_hplloff_wm = {
883 	PINEVIEW_CURSOR_FIFO,
884 	PINEVIEW_CURSOR_MAX_WM,
885 	PINEVIEW_CURSOR_DFT_WM,
886 	PINEVIEW_CURSOR_GUARD_WM,
887 	PINEVIEW_FIFO_LINE_SIZE
888 };
889 static const struct intel_watermark_params g4x_wm_info = {
890 	G4X_FIFO_SIZE,
891 	G4X_MAX_WM,
892 	G4X_MAX_WM,
893 	2,
894 	G4X_FIFO_LINE_SIZE,
895 };
896 static const struct intel_watermark_params g4x_cursor_wm_info = {
897 	I965_CURSOR_FIFO,
898 	I965_CURSOR_MAX_WM,
899 	I965_CURSOR_DFT_WM,
900 	2,
901 	G4X_FIFO_LINE_SIZE,
902 };
903 static const struct intel_watermark_params valleyview_wm_info = {
904 	VALLEYVIEW_FIFO_SIZE,
905 	VALLEYVIEW_MAX_WM,
906 	VALLEYVIEW_MAX_WM,
907 	2,
908 	G4X_FIFO_LINE_SIZE,
909 };
910 static const struct intel_watermark_params valleyview_cursor_wm_info = {
911 	I965_CURSOR_FIFO,
912 	VALLEYVIEW_CURSOR_MAX_WM,
913 	I965_CURSOR_DFT_WM,
914 	2,
915 	G4X_FIFO_LINE_SIZE,
916 };
917 static const struct intel_watermark_params i965_cursor_wm_info = {
918 	I965_CURSOR_FIFO,
919 	I965_CURSOR_MAX_WM,
920 	I965_CURSOR_DFT_WM,
921 	2,
922 	I915_FIFO_LINE_SIZE,
923 };
924 static const struct intel_watermark_params i945_wm_info = {
925 	I945_FIFO_SIZE,
926 	I915_MAX_WM,
927 	1,
928 	2,
929 	I915_FIFO_LINE_SIZE
930 };
931 static const struct intel_watermark_params i915_wm_info = {
932 	I915_FIFO_SIZE,
933 	I915_MAX_WM,
934 	1,
935 	2,
936 	I915_FIFO_LINE_SIZE
937 };
938 static const struct intel_watermark_params i855_wm_info = {
939 	I855GM_FIFO_SIZE,
940 	I915_MAX_WM,
941 	1,
942 	2,
943 	I830_FIFO_LINE_SIZE
944 };
945 static const struct intel_watermark_params i830_wm_info = {
946 	I830_FIFO_SIZE,
947 	I915_MAX_WM,
948 	1,
949 	2,
950 	I830_FIFO_LINE_SIZE
951 };
952 
953 static const struct intel_watermark_params ironlake_display_wm_info = {
954 	ILK_DISPLAY_FIFO,
955 	ILK_DISPLAY_MAXWM,
956 	ILK_DISPLAY_DFTWM,
957 	2,
958 	ILK_FIFO_LINE_SIZE
959 };
960 static const struct intel_watermark_params ironlake_cursor_wm_info = {
961 	ILK_CURSOR_FIFO,
962 	ILK_CURSOR_MAXWM,
963 	ILK_CURSOR_DFTWM,
964 	2,
965 	ILK_FIFO_LINE_SIZE
966 };
967 static const struct intel_watermark_params ironlake_display_srwm_info = {
968 	ILK_DISPLAY_SR_FIFO,
969 	ILK_DISPLAY_MAX_SRWM,
970 	ILK_DISPLAY_DFT_SRWM,
971 	2,
972 	ILK_FIFO_LINE_SIZE
973 };
974 static const struct intel_watermark_params ironlake_cursor_srwm_info = {
975 	ILK_CURSOR_SR_FIFO,
976 	ILK_CURSOR_MAX_SRWM,
977 	ILK_CURSOR_DFT_SRWM,
978 	2,
979 	ILK_FIFO_LINE_SIZE
980 };
981 
982 static const struct intel_watermark_params sandybridge_display_wm_info = {
983 	SNB_DISPLAY_FIFO,
984 	SNB_DISPLAY_MAXWM,
985 	SNB_DISPLAY_DFTWM,
986 	2,
987 	SNB_FIFO_LINE_SIZE
988 };
989 static const struct intel_watermark_params sandybridge_cursor_wm_info = {
990 	SNB_CURSOR_FIFO,
991 	SNB_CURSOR_MAXWM,
992 	SNB_CURSOR_DFTWM,
993 	2,
994 	SNB_FIFO_LINE_SIZE
995 };
996 static const struct intel_watermark_params sandybridge_display_srwm_info = {
997 	SNB_DISPLAY_SR_FIFO,
998 	SNB_DISPLAY_MAX_SRWM,
999 	SNB_DISPLAY_DFT_SRWM,
1000 	2,
1001 	SNB_FIFO_LINE_SIZE
1002 };
1003 static const struct intel_watermark_params sandybridge_cursor_srwm_info = {
1004 	SNB_CURSOR_SR_FIFO,
1005 	SNB_CURSOR_MAX_SRWM,
1006 	SNB_CURSOR_DFT_SRWM,
1007 	2,
1008 	SNB_FIFO_LINE_SIZE
1009 };
1010 
1011 
1012 /**
1013  * intel_calculate_wm - calculate watermark level
1014  * @clock_in_khz: pixel clock
1015  * @wm: chip FIFO params
1016  * @pixel_size: display pixel size
1017  * @latency_ns: memory latency for the platform
1018  *
1019  * Calculate the watermark level (the level at which the display plane will
1020  * start fetching from memory again).  Each chip has a different display
1021  * FIFO size and allocation, so the caller needs to figure that out and pass
1022  * in the correct intel_watermark_params structure.
1023  *
1024  * As the pixel clock runs, the FIFO will be drained at a rate that depends
1025  * on the pixel size.  When it reaches the watermark level, it'll start
1026  * fetching FIFO line sized based chunks from memory until the FIFO fills
1027  * past the watermark point.  If the FIFO drains completely, a FIFO underrun
1028  * will occur, and a display engine hang could result.
1029  */
1030 static unsigned long intel_calculate_wm(unsigned long clock_in_khz,
1031 					const struct intel_watermark_params *wm,
1032 					int fifo_size,
1033 					int pixel_size,
1034 					unsigned long latency_ns)
1035 {
1036 	long entries_required, wm_size;
1037 
1038 	/*
1039 	 * Note: we need to make sure we don't overflow for various clock &
1040 	 * latency values.
1041 	 * clocks go from a few thousand to several hundred thousand.
1042 	 * latency is usually a few thousand
1043 	 */
1044 	entries_required = ((clock_in_khz / 1000) * pixel_size * latency_ns) /
1045 		1000;
1046 	entries_required = DIV_ROUND_UP(entries_required, wm->cacheline_size);
1047 
1048 	DRM_DEBUG_KMS("FIFO entries required for mode: %ld\n", entries_required);
1049 
1050 	wm_size = fifo_size - (entries_required + wm->guard_size);
1051 
1052 	DRM_DEBUG_KMS("FIFO watermark level: %ld\n", wm_size);
1053 
1054 	/* Don't promote wm_size to unsigned... */
1055 	if (wm_size > (long)wm->max_wm)
1056 		wm_size = wm->max_wm;
1057 	if (wm_size <= 0)
1058 		wm_size = wm->default_wm;
1059 	return wm_size;
1060 }
1061 
1062 static struct drm_crtc *single_enabled_crtc(struct drm_device *dev)
1063 {
1064 	struct drm_crtc *crtc, *enabled = NULL;
1065 
1066 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
1067 		if (intel_crtc_active(crtc)) {
1068 			if (enabled)
1069 				return NULL;
1070 			enabled = crtc;
1071 		}
1072 	}
1073 
1074 	return enabled;
1075 }
1076 
1077 static void pineview_update_wm(struct drm_device *dev)
1078 {
1079 	struct drm_i915_private *dev_priv = dev->dev_private;
1080 	struct drm_crtc *crtc;
1081 	const struct cxsr_latency *latency;
1082 	u32 reg;
1083 	unsigned long wm;
1084 
1085 	latency = intel_get_cxsr_latency(IS_PINEVIEW_G(dev), dev_priv->is_ddr3,
1086 					 dev_priv->fsb_freq, dev_priv->mem_freq);
1087 	if (!latency) {
1088 		DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
1089 		pineview_disable_cxsr(dev);
1090 		return;
1091 	}
1092 
1093 	crtc = single_enabled_crtc(dev);
1094 	if (crtc) {
1095 		int clock = crtc->mode.clock;
1096 		int pixel_size = crtc->fb->bits_per_pixel / 8;
1097 
1098 		/* Display SR */
1099 		wm = intel_calculate_wm(clock, &pineview_display_wm,
1100 					pineview_display_wm.fifo_size,
1101 					pixel_size, latency->display_sr);
1102 		reg = I915_READ(DSPFW1);
1103 		reg &= ~DSPFW_SR_MASK;
1104 		reg |= wm << DSPFW_SR_SHIFT;
1105 		I915_WRITE(DSPFW1, reg);
1106 		DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg);
1107 
1108 		/* cursor SR */
1109 		wm = intel_calculate_wm(clock, &pineview_cursor_wm,
1110 					pineview_display_wm.fifo_size,
1111 					pixel_size, latency->cursor_sr);
1112 		reg = I915_READ(DSPFW3);
1113 		reg &= ~DSPFW_CURSOR_SR_MASK;
1114 		reg |= (wm & 0x3f) << DSPFW_CURSOR_SR_SHIFT;
1115 		I915_WRITE(DSPFW3, reg);
1116 
1117 		/* Display HPLL off SR */
1118 		wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm,
1119 					pineview_display_hplloff_wm.fifo_size,
1120 					pixel_size, latency->display_hpll_disable);
1121 		reg = I915_READ(DSPFW3);
1122 		reg &= ~DSPFW_HPLL_SR_MASK;
1123 		reg |= wm & DSPFW_HPLL_SR_MASK;
1124 		I915_WRITE(DSPFW3, reg);
1125 
1126 		/* cursor HPLL off SR */
1127 		wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm,
1128 					pineview_display_hplloff_wm.fifo_size,
1129 					pixel_size, latency->cursor_hpll_disable);
1130 		reg = I915_READ(DSPFW3);
1131 		reg &= ~DSPFW_HPLL_CURSOR_MASK;
1132 		reg |= (wm & 0x3f) << DSPFW_HPLL_CURSOR_SHIFT;
1133 		I915_WRITE(DSPFW3, reg);
1134 		DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg);
1135 
1136 		/* activate cxsr */
1137 		I915_WRITE(DSPFW3,
1138 			   I915_READ(DSPFW3) | PINEVIEW_SELF_REFRESH_EN);
1139 		DRM_DEBUG_KMS("Self-refresh is enabled\n");
1140 	} else {
1141 		pineview_disable_cxsr(dev);
1142 		DRM_DEBUG_KMS("Self-refresh is disabled\n");
1143 	}
1144 }
1145 
1146 static bool g4x_compute_wm0(struct drm_device *dev,
1147 			    int plane,
1148 			    const struct intel_watermark_params *display,
1149 			    int display_latency_ns,
1150 			    const struct intel_watermark_params *cursor,
1151 			    int cursor_latency_ns,
1152 			    int *plane_wm,
1153 			    int *cursor_wm)
1154 {
1155 	struct drm_crtc *crtc;
1156 	int htotal, hdisplay, clock, pixel_size;
1157 	int line_time_us, line_count;
1158 	int entries, tlb_miss;
1159 
1160 	crtc = intel_get_crtc_for_plane(dev, plane);
1161 	if (!intel_crtc_active(crtc)) {
1162 		*cursor_wm = cursor->guard_size;
1163 		*plane_wm = display->guard_size;
1164 		return false;
1165 	}
1166 
1167 	htotal = crtc->mode.htotal;
1168 	hdisplay = crtc->mode.hdisplay;
1169 	clock = crtc->mode.clock;
1170 	pixel_size = crtc->fb->bits_per_pixel / 8;
1171 
1172 	/* Use the small buffer method to calculate plane watermark */
1173 	entries = ((clock * pixel_size / 1000) * display_latency_ns) / 1000;
1174 	tlb_miss = display->fifo_size*display->cacheline_size - hdisplay * 8;
1175 	if (tlb_miss > 0)
1176 		entries += tlb_miss;
1177 	entries = DIV_ROUND_UP(entries, display->cacheline_size);
1178 	*plane_wm = entries + display->guard_size;
1179 	if (*plane_wm > (int)display->max_wm)
1180 		*plane_wm = display->max_wm;
1181 
1182 	/* Use the large buffer method to calculate cursor watermark */
1183 	line_time_us = ((htotal * 1000) / clock);
1184 	line_count = (cursor_latency_ns / line_time_us + 1000) / 1000;
1185 	entries = line_count * 64 * pixel_size;
1186 	tlb_miss = cursor->fifo_size*cursor->cacheline_size - hdisplay * 8;
1187 	if (tlb_miss > 0)
1188 		entries += tlb_miss;
1189 	entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
1190 	*cursor_wm = entries + cursor->guard_size;
1191 	if (*cursor_wm > (int)cursor->max_wm)
1192 		*cursor_wm = (int)cursor->max_wm;
1193 
1194 	return true;
1195 }
1196 
1197 /*
1198  * Check the wm result.
1199  *
1200  * If any calculated watermark values is larger than the maximum value that
1201  * can be programmed into the associated watermark register, that watermark
1202  * must be disabled.
1203  */
1204 static bool g4x_check_srwm(struct drm_device *dev,
1205 			   int display_wm, int cursor_wm,
1206 			   const struct intel_watermark_params *display,
1207 			   const struct intel_watermark_params *cursor)
1208 {
1209 	DRM_DEBUG_KMS("SR watermark: display plane %d, cursor %d\n",
1210 		      display_wm, cursor_wm);
1211 
1212 	if (display_wm > display->max_wm) {
1213 		DRM_DEBUG_KMS("display watermark is too large(%d/%ld), disabling\n",
1214 			      display_wm, display->max_wm);
1215 		return false;
1216 	}
1217 
1218 	if (cursor_wm > cursor->max_wm) {
1219 		DRM_DEBUG_KMS("cursor watermark is too large(%d/%ld), disabling\n",
1220 			      cursor_wm, cursor->max_wm);
1221 		return false;
1222 	}
1223 
1224 	if (!(display_wm || cursor_wm)) {
1225 		DRM_DEBUG_KMS("SR latency is 0, disabling\n");
1226 		return false;
1227 	}
1228 
1229 	return true;
1230 }
1231 
1232 static bool g4x_compute_srwm(struct drm_device *dev,
1233 			     int plane,
1234 			     int latency_ns,
1235 			     const struct intel_watermark_params *display,
1236 			     const struct intel_watermark_params *cursor,
1237 			     int *display_wm, int *cursor_wm)
1238 {
1239 	struct drm_crtc *crtc;
1240 	int hdisplay, htotal, pixel_size, clock;
1241 	unsigned long line_time_us;
1242 	int line_count, line_size;
1243 	int small, large;
1244 	int entries;
1245 
1246 	if (!latency_ns) {
1247 		*display_wm = *cursor_wm = 0;
1248 		return false;
1249 	}
1250 
1251 	crtc = intel_get_crtc_for_plane(dev, plane);
1252 	hdisplay = crtc->mode.hdisplay;
1253 	htotal = crtc->mode.htotal;
1254 	clock = crtc->mode.clock;
1255 	pixel_size = crtc->fb->bits_per_pixel / 8;
1256 
1257 	line_time_us = (htotal * 1000) / clock;
1258 	line_count = (latency_ns / line_time_us + 1000) / 1000;
1259 	line_size = hdisplay * pixel_size;
1260 
1261 	/* Use the minimum of the small and large buffer method for primary */
1262 	small = ((clock * pixel_size / 1000) * latency_ns) / 1000;
1263 	large = line_count * line_size;
1264 
1265 	entries = DIV_ROUND_UP(min(small, large), display->cacheline_size);
1266 	*display_wm = entries + display->guard_size;
1267 
1268 	/* calculate the self-refresh watermark for display cursor */
1269 	entries = line_count * pixel_size * 64;
1270 	entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
1271 	*cursor_wm = entries + cursor->guard_size;
1272 
1273 	return g4x_check_srwm(dev,
1274 			      *display_wm, *cursor_wm,
1275 			      display, cursor);
1276 }
1277 
1278 static bool vlv_compute_drain_latency(struct drm_device *dev,
1279 				     int plane,
1280 				     int *plane_prec_mult,
1281 				     int *plane_dl,
1282 				     int *cursor_prec_mult,
1283 				     int *cursor_dl)
1284 {
1285 	struct drm_crtc *crtc;
1286 	int clock, pixel_size;
1287 	int entries;
1288 
1289 	crtc = intel_get_crtc_for_plane(dev, plane);
1290 	if (!intel_crtc_active(crtc))
1291 		return false;
1292 
1293 	clock = crtc->mode.clock;	/* VESA DOT Clock */
1294 	pixel_size = crtc->fb->bits_per_pixel / 8;	/* BPP */
1295 
1296 	entries = (clock / 1000) * pixel_size;
1297 	*plane_prec_mult = (entries > 256) ?
1298 		DRAIN_LATENCY_PRECISION_32 : DRAIN_LATENCY_PRECISION_16;
1299 	*plane_dl = (64 * (*plane_prec_mult) * 4) / ((clock / 1000) *
1300 						     pixel_size);
1301 
1302 	entries = (clock / 1000) * 4;	/* BPP is always 4 for cursor */
1303 	*cursor_prec_mult = (entries > 256) ?
1304 		DRAIN_LATENCY_PRECISION_32 : DRAIN_LATENCY_PRECISION_16;
1305 	*cursor_dl = (64 * (*cursor_prec_mult) * 4) / ((clock / 1000) * 4);
1306 
1307 	return true;
1308 }
1309 
1310 /*
1311  * Update drain latency registers of memory arbiter
1312  *
1313  * Valleyview SoC has a new memory arbiter and needs drain latency registers
1314  * to be programmed. Each plane has a drain latency multiplier and a drain
1315  * latency value.
1316  */
1317 
1318 static void vlv_update_drain_latency(struct drm_device *dev)
1319 {
1320 	struct drm_i915_private *dev_priv = dev->dev_private;
1321 	int planea_prec, planea_dl, planeb_prec, planeb_dl;
1322 	int cursora_prec, cursora_dl, cursorb_prec, cursorb_dl;
1323 	int plane_prec_mult, cursor_prec_mult; /* Precision multiplier is
1324 							either 16 or 32 */
1325 
1326 	/* For plane A, Cursor A */
1327 	if (vlv_compute_drain_latency(dev, 0, &plane_prec_mult, &planea_dl,
1328 				      &cursor_prec_mult, &cursora_dl)) {
1329 		cursora_prec = (cursor_prec_mult == DRAIN_LATENCY_PRECISION_32) ?
1330 			DDL_CURSORA_PRECISION_32 : DDL_CURSORA_PRECISION_16;
1331 		planea_prec = (plane_prec_mult == DRAIN_LATENCY_PRECISION_32) ?
1332 			DDL_PLANEA_PRECISION_32 : DDL_PLANEA_PRECISION_16;
1333 
1334 		I915_WRITE(VLV_DDL1, cursora_prec |
1335 				(cursora_dl << DDL_CURSORA_SHIFT) |
1336 				planea_prec | planea_dl);
1337 	}
1338 
1339 	/* For plane B, Cursor B */
1340 	if (vlv_compute_drain_latency(dev, 1, &plane_prec_mult, &planeb_dl,
1341 				      &cursor_prec_mult, &cursorb_dl)) {
1342 		cursorb_prec = (cursor_prec_mult == DRAIN_LATENCY_PRECISION_32) ?
1343 			DDL_CURSORB_PRECISION_32 : DDL_CURSORB_PRECISION_16;
1344 		planeb_prec = (plane_prec_mult == DRAIN_LATENCY_PRECISION_32) ?
1345 			DDL_PLANEB_PRECISION_32 : DDL_PLANEB_PRECISION_16;
1346 
1347 		I915_WRITE(VLV_DDL2, cursorb_prec |
1348 				(cursorb_dl << DDL_CURSORB_SHIFT) |
1349 				planeb_prec | planeb_dl);
1350 	}
1351 }
1352 
1353 #define single_plane_enabled(mask) is_power_of_2(mask)
1354 
1355 static void valleyview_update_wm(struct drm_device *dev)
1356 {
1357 	static const int sr_latency_ns = 12000;
1358 	struct drm_i915_private *dev_priv = dev->dev_private;
1359 	int planea_wm, planeb_wm, cursora_wm, cursorb_wm;
1360 	int plane_sr, cursor_sr;
1361 	int ignore_plane_sr, ignore_cursor_sr;
1362 	unsigned int enabled = 0;
1363 
1364 	vlv_update_drain_latency(dev);
1365 
1366 	if (g4x_compute_wm0(dev, PIPE_A,
1367 			    &valleyview_wm_info, latency_ns,
1368 			    &valleyview_cursor_wm_info, latency_ns,
1369 			    &planea_wm, &cursora_wm))
1370 		enabled |= 1 << PIPE_A;
1371 
1372 	if (g4x_compute_wm0(dev, PIPE_B,
1373 			    &valleyview_wm_info, latency_ns,
1374 			    &valleyview_cursor_wm_info, latency_ns,
1375 			    &planeb_wm, &cursorb_wm))
1376 		enabled |= 1 << PIPE_B;
1377 
1378 	if (single_plane_enabled(enabled) &&
1379 	    g4x_compute_srwm(dev, ffs(enabled) - 1,
1380 			     sr_latency_ns,
1381 			     &valleyview_wm_info,
1382 			     &valleyview_cursor_wm_info,
1383 			     &plane_sr, &ignore_cursor_sr) &&
1384 	    g4x_compute_srwm(dev, ffs(enabled) - 1,
1385 			     2*sr_latency_ns,
1386 			     &valleyview_wm_info,
1387 			     &valleyview_cursor_wm_info,
1388 			     &ignore_plane_sr, &cursor_sr)) {
1389 		I915_WRITE(FW_BLC_SELF_VLV, FW_CSPWRDWNEN);
1390 	} else {
1391 		I915_WRITE(FW_BLC_SELF_VLV,
1392 			   I915_READ(FW_BLC_SELF_VLV) & ~FW_CSPWRDWNEN);
1393 		plane_sr = cursor_sr = 0;
1394 	}
1395 
1396 	DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n",
1397 		      planea_wm, cursora_wm,
1398 		      planeb_wm, cursorb_wm,
1399 		      plane_sr, cursor_sr);
1400 
1401 	I915_WRITE(DSPFW1,
1402 		   (plane_sr << DSPFW_SR_SHIFT) |
1403 		   (cursorb_wm << DSPFW_CURSORB_SHIFT) |
1404 		   (planeb_wm << DSPFW_PLANEB_SHIFT) |
1405 		   planea_wm);
1406 	I915_WRITE(DSPFW2,
1407 		   (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
1408 		   (cursora_wm << DSPFW_CURSORA_SHIFT));
1409 	I915_WRITE(DSPFW3,
1410 		   (I915_READ(DSPFW3) & ~DSPFW_CURSOR_SR_MASK) |
1411 		   (cursor_sr << DSPFW_CURSOR_SR_SHIFT));
1412 }
1413 
1414 static void g4x_update_wm(struct drm_device *dev)
1415 {
1416 	static const int sr_latency_ns = 12000;
1417 	struct drm_i915_private *dev_priv = dev->dev_private;
1418 	int planea_wm, planeb_wm, cursora_wm, cursorb_wm;
1419 	int plane_sr, cursor_sr;
1420 	unsigned int enabled = 0;
1421 
1422 	if (g4x_compute_wm0(dev, PIPE_A,
1423 			    &g4x_wm_info, latency_ns,
1424 			    &g4x_cursor_wm_info, latency_ns,
1425 			    &planea_wm, &cursora_wm))
1426 		enabled |= 1 << PIPE_A;
1427 
1428 	if (g4x_compute_wm0(dev, PIPE_B,
1429 			    &g4x_wm_info, latency_ns,
1430 			    &g4x_cursor_wm_info, latency_ns,
1431 			    &planeb_wm, &cursorb_wm))
1432 		enabled |= 1 << PIPE_B;
1433 
1434 	if (single_plane_enabled(enabled) &&
1435 	    g4x_compute_srwm(dev, ffs(enabled) - 1,
1436 			     sr_latency_ns,
1437 			     &g4x_wm_info,
1438 			     &g4x_cursor_wm_info,
1439 			     &plane_sr, &cursor_sr)) {
1440 		I915_WRITE(FW_BLC_SELF, FW_BLC_SELF_EN);
1441 	} else {
1442 		I915_WRITE(FW_BLC_SELF,
1443 			   I915_READ(FW_BLC_SELF) & ~FW_BLC_SELF_EN);
1444 		plane_sr = cursor_sr = 0;
1445 	}
1446 
1447 	DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n",
1448 		      planea_wm, cursora_wm,
1449 		      planeb_wm, cursorb_wm,
1450 		      plane_sr, cursor_sr);
1451 
1452 	I915_WRITE(DSPFW1,
1453 		   (plane_sr << DSPFW_SR_SHIFT) |
1454 		   (cursorb_wm << DSPFW_CURSORB_SHIFT) |
1455 		   (planeb_wm << DSPFW_PLANEB_SHIFT) |
1456 		   planea_wm);
1457 	I915_WRITE(DSPFW2,
1458 		   (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
1459 		   (cursora_wm << DSPFW_CURSORA_SHIFT));
1460 	/* HPLL off in SR has some issues on G4x... disable it */
1461 	I915_WRITE(DSPFW3,
1462 		   (I915_READ(DSPFW3) & ~(DSPFW_HPLL_SR_EN | DSPFW_CURSOR_SR_MASK)) |
1463 		   (cursor_sr << DSPFW_CURSOR_SR_SHIFT));
1464 }
1465 
1466 static void i965_update_wm(struct drm_device *dev)
1467 {
1468 	struct drm_i915_private *dev_priv = dev->dev_private;
1469 	struct drm_crtc *crtc;
1470 	int srwm = 1;
1471 	int cursor_sr = 16;
1472 
1473 	/* Calc sr entries for one plane configs */
1474 	crtc = single_enabled_crtc(dev);
1475 	if (crtc) {
1476 		/* self-refresh has much higher latency */
1477 		static const int sr_latency_ns = 12000;
1478 		int clock = crtc->mode.clock;
1479 		int htotal = crtc->mode.htotal;
1480 		int hdisplay = crtc->mode.hdisplay;
1481 		int pixel_size = crtc->fb->bits_per_pixel / 8;
1482 		unsigned long line_time_us;
1483 		int entries;
1484 
1485 		line_time_us = ((htotal * 1000) / clock);
1486 
1487 		/* Use ns/us then divide to preserve precision */
1488 		entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1489 			pixel_size * hdisplay;
1490 		entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE);
1491 		srwm = I965_FIFO_SIZE - entries;
1492 		if (srwm < 0)
1493 			srwm = 1;
1494 		srwm &= 0x1ff;
1495 		DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n",
1496 			      entries, srwm);
1497 
1498 		entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1499 			pixel_size * 64;
1500 		entries = DIV_ROUND_UP(entries,
1501 					  i965_cursor_wm_info.cacheline_size);
1502 		cursor_sr = i965_cursor_wm_info.fifo_size -
1503 			(entries + i965_cursor_wm_info.guard_size);
1504 
1505 		if (cursor_sr > i965_cursor_wm_info.max_wm)
1506 			cursor_sr = i965_cursor_wm_info.max_wm;
1507 
1508 		DRM_DEBUG_KMS("self-refresh watermark: display plane %d "
1509 			      "cursor %d\n", srwm, cursor_sr);
1510 
1511 		if (IS_CRESTLINE(dev))
1512 			I915_WRITE(FW_BLC_SELF, FW_BLC_SELF_EN);
1513 	} else {
1514 		/* Turn off self refresh if both pipes are enabled */
1515 		if (IS_CRESTLINE(dev))
1516 			I915_WRITE(FW_BLC_SELF, I915_READ(FW_BLC_SELF)
1517 				   & ~FW_BLC_SELF_EN);
1518 	}
1519 
1520 	DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n",
1521 		      srwm);
1522 
1523 	/* 965 has limitations... */
1524 	I915_WRITE(DSPFW1, (srwm << DSPFW_SR_SHIFT) |
1525 		   (8 << 16) | (8 << 8) | (8 << 0));
1526 	I915_WRITE(DSPFW2, (8 << 8) | (8 << 0));
1527 	/* update cursor SR watermark */
1528 	I915_WRITE(DSPFW3, (cursor_sr << DSPFW_CURSOR_SR_SHIFT));
1529 }
1530 
1531 static void i9xx_update_wm(struct drm_device *dev)
1532 {
1533 	struct drm_i915_private *dev_priv = dev->dev_private;
1534 	const struct intel_watermark_params *wm_info;
1535 	uint32_t fwater_lo;
1536 	uint32_t fwater_hi;
1537 	int cwm, srwm = 1;
1538 	int fifo_size;
1539 	int planea_wm, planeb_wm;
1540 	struct drm_crtc *crtc, *enabled = NULL;
1541 
1542 	if (IS_I945GM(dev))
1543 		wm_info = &i945_wm_info;
1544 	else if (!IS_GEN2(dev))
1545 		wm_info = &i915_wm_info;
1546 	else
1547 		wm_info = &i855_wm_info;
1548 
1549 	fifo_size = dev_priv->display.get_fifo_size(dev, 0);
1550 	crtc = intel_get_crtc_for_plane(dev, 0);
1551 	if (intel_crtc_active(crtc)) {
1552 		int cpp = crtc->fb->bits_per_pixel / 8;
1553 		if (IS_GEN2(dev))
1554 			cpp = 4;
1555 
1556 		planea_wm = intel_calculate_wm(crtc->mode.clock,
1557 					       wm_info, fifo_size, cpp,
1558 					       latency_ns);
1559 		enabled = crtc;
1560 	} else
1561 		planea_wm = fifo_size - wm_info->guard_size;
1562 
1563 	fifo_size = dev_priv->display.get_fifo_size(dev, 1);
1564 	crtc = intel_get_crtc_for_plane(dev, 1);
1565 	if (intel_crtc_active(crtc)) {
1566 		int cpp = crtc->fb->bits_per_pixel / 8;
1567 		if (IS_GEN2(dev))
1568 			cpp = 4;
1569 
1570 		planeb_wm = intel_calculate_wm(crtc->mode.clock,
1571 					       wm_info, fifo_size, cpp,
1572 					       latency_ns);
1573 		if (enabled == NULL)
1574 			enabled = crtc;
1575 		else
1576 			enabled = NULL;
1577 	} else
1578 		planeb_wm = fifo_size - wm_info->guard_size;
1579 
1580 	DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm);
1581 
1582 	/*
1583 	 * Overlay gets an aggressive default since video jitter is bad.
1584 	 */
1585 	cwm = 2;
1586 
1587 	/* Play safe and disable self-refresh before adjusting watermarks. */
1588 	if (IS_I945G(dev) || IS_I945GM(dev))
1589 		I915_WRITE(FW_BLC_SELF, FW_BLC_SELF_EN_MASK | 0);
1590 	else if (IS_I915GM(dev))
1591 		I915_WRITE(INSTPM, I915_READ(INSTPM) & ~INSTPM_SELF_EN);
1592 
1593 	/* Calc sr entries for one plane configs */
1594 	if (HAS_FW_BLC(dev) && enabled) {
1595 		/* self-refresh has much higher latency */
1596 		static const int sr_latency_ns = 6000;
1597 		int clock = enabled->mode.clock;
1598 		int htotal = enabled->mode.htotal;
1599 		int hdisplay = enabled->mode.hdisplay;
1600 		int pixel_size = enabled->fb->bits_per_pixel / 8;
1601 		unsigned long line_time_us;
1602 		int entries;
1603 
1604 		line_time_us = (htotal * 1000) / clock;
1605 
1606 		/* Use ns/us then divide to preserve precision */
1607 		entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1608 			pixel_size * hdisplay;
1609 		entries = DIV_ROUND_UP(entries, wm_info->cacheline_size);
1610 		DRM_DEBUG_KMS("self-refresh entries: %d\n", entries);
1611 		srwm = wm_info->fifo_size - entries;
1612 		if (srwm < 0)
1613 			srwm = 1;
1614 
1615 		if (IS_I945G(dev) || IS_I945GM(dev))
1616 			I915_WRITE(FW_BLC_SELF,
1617 				   FW_BLC_SELF_FIFO_MASK | (srwm & 0xff));
1618 		else if (IS_I915GM(dev))
1619 			I915_WRITE(FW_BLC_SELF, srwm & 0x3f);
1620 	}
1621 
1622 	DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n",
1623 		      planea_wm, planeb_wm, cwm, srwm);
1624 
1625 	fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f);
1626 	fwater_hi = (cwm & 0x1f);
1627 
1628 	/* Set request length to 8 cachelines per fetch */
1629 	fwater_lo = fwater_lo | (1 << 24) | (1 << 8);
1630 	fwater_hi = fwater_hi | (1 << 8);
1631 
1632 	I915_WRITE(FW_BLC, fwater_lo);
1633 	I915_WRITE(FW_BLC2, fwater_hi);
1634 
1635 	if (HAS_FW_BLC(dev)) {
1636 		if (enabled) {
1637 			if (IS_I945G(dev) || IS_I945GM(dev))
1638 				I915_WRITE(FW_BLC_SELF,
1639 					   FW_BLC_SELF_EN_MASK | FW_BLC_SELF_EN);
1640 			else if (IS_I915GM(dev))
1641 				I915_WRITE(INSTPM, I915_READ(INSTPM) | INSTPM_SELF_EN);
1642 			DRM_DEBUG_KMS("memory self refresh enabled\n");
1643 		} else
1644 			DRM_DEBUG_KMS("memory self refresh disabled\n");
1645 	}
1646 }
1647 
1648 static void i830_update_wm(struct drm_device *dev)
1649 {
1650 	struct drm_i915_private *dev_priv = dev->dev_private;
1651 	struct drm_crtc *crtc;
1652 	uint32_t fwater_lo;
1653 	int planea_wm;
1654 
1655 	crtc = single_enabled_crtc(dev);
1656 	if (crtc == NULL)
1657 		return;
1658 
1659 	planea_wm = intel_calculate_wm(crtc->mode.clock, &i830_wm_info,
1660 				       dev_priv->display.get_fifo_size(dev, 0),
1661 				       4, latency_ns);
1662 	fwater_lo = I915_READ(FW_BLC) & ~0xfff;
1663 	fwater_lo |= (3<<8) | planea_wm;
1664 
1665 	DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm);
1666 
1667 	I915_WRITE(FW_BLC, fwater_lo);
1668 }
1669 
1670 #define ILK_LP0_PLANE_LATENCY		700
1671 #define ILK_LP0_CURSOR_LATENCY		1300
1672 
1673 /*
1674  * Check the wm result.
1675  *
1676  * If any calculated watermark values is larger than the maximum value that
1677  * can be programmed into the associated watermark register, that watermark
1678  * must be disabled.
1679  */
1680 static bool ironlake_check_srwm(struct drm_device *dev, int level,
1681 				int fbc_wm, int display_wm, int cursor_wm,
1682 				const struct intel_watermark_params *display,
1683 				const struct intel_watermark_params *cursor)
1684 {
1685 	struct drm_i915_private *dev_priv = dev->dev_private;
1686 
1687 	DRM_DEBUG_KMS("watermark %d: display plane %d, fbc lines %d,"
1688 		      " cursor %d\n", level, display_wm, fbc_wm, cursor_wm);
1689 
1690 	if (fbc_wm > SNB_FBC_MAX_SRWM) {
1691 		DRM_DEBUG_KMS("fbc watermark(%d) is too large(%d), disabling wm%d+\n",
1692 			      fbc_wm, SNB_FBC_MAX_SRWM, level);
1693 
1694 		/* fbc has it's own way to disable FBC WM */
1695 		I915_WRITE(DISP_ARB_CTL,
1696 			   I915_READ(DISP_ARB_CTL) | DISP_FBC_WM_DIS);
1697 		return false;
1698 	} else if (INTEL_INFO(dev)->gen >= 6) {
1699 		/* enable FBC WM (except on ILK, where it must remain off) */
1700 		I915_WRITE(DISP_ARB_CTL,
1701 			   I915_READ(DISP_ARB_CTL) & ~DISP_FBC_WM_DIS);
1702 	}
1703 
1704 	if (display_wm > display->max_wm) {
1705 		DRM_DEBUG_KMS("display watermark(%d) is too large(%d), disabling wm%d+\n",
1706 			      display_wm, SNB_DISPLAY_MAX_SRWM, level);
1707 		return false;
1708 	}
1709 
1710 	if (cursor_wm > cursor->max_wm) {
1711 		DRM_DEBUG_KMS("cursor watermark(%d) is too large(%d), disabling wm%d+\n",
1712 			      cursor_wm, SNB_CURSOR_MAX_SRWM, level);
1713 		return false;
1714 	}
1715 
1716 	if (!(fbc_wm || display_wm || cursor_wm)) {
1717 		DRM_DEBUG_KMS("latency %d is 0, disabling wm%d+\n", level, level);
1718 		return false;
1719 	}
1720 
1721 	return true;
1722 }
1723 
1724 /*
1725  * Compute watermark values of WM[1-3],
1726  */
1727 static bool ironlake_compute_srwm(struct drm_device *dev, int level, int plane,
1728 				  int latency_ns,
1729 				  const struct intel_watermark_params *display,
1730 				  const struct intel_watermark_params *cursor,
1731 				  int *fbc_wm, int *display_wm, int *cursor_wm)
1732 {
1733 	struct drm_crtc *crtc;
1734 	unsigned long line_time_us;
1735 	int hdisplay, htotal, pixel_size, clock;
1736 	int line_count, line_size;
1737 	int small, large;
1738 	int entries;
1739 
1740 	if (!latency_ns) {
1741 		*fbc_wm = *display_wm = *cursor_wm = 0;
1742 		return false;
1743 	}
1744 
1745 	crtc = intel_get_crtc_for_plane(dev, plane);
1746 	hdisplay = crtc->mode.hdisplay;
1747 	htotal = crtc->mode.htotal;
1748 	clock = crtc->mode.clock;
1749 	pixel_size = crtc->fb->bits_per_pixel / 8;
1750 
1751 	line_time_us = (htotal * 1000) / clock;
1752 	line_count = (latency_ns / line_time_us + 1000) / 1000;
1753 	line_size = hdisplay * pixel_size;
1754 
1755 	/* Use the minimum of the small and large buffer method for primary */
1756 	small = ((clock * pixel_size / 1000) * latency_ns) / 1000;
1757 	large = line_count * line_size;
1758 
1759 	entries = DIV_ROUND_UP(min(small, large), display->cacheline_size);
1760 	*display_wm = entries + display->guard_size;
1761 
1762 	/*
1763 	 * Spec says:
1764 	 * FBC WM = ((Final Primary WM * 64) / number of bytes per line) + 2
1765 	 */
1766 	*fbc_wm = DIV_ROUND_UP(*display_wm * 64, line_size) + 2;
1767 
1768 	/* calculate the self-refresh watermark for display cursor */
1769 	entries = line_count * pixel_size * 64;
1770 	entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
1771 	*cursor_wm = entries + cursor->guard_size;
1772 
1773 	return ironlake_check_srwm(dev, level,
1774 				   *fbc_wm, *display_wm, *cursor_wm,
1775 				   display, cursor);
1776 }
1777 
1778 static void ironlake_update_wm(struct drm_device *dev)
1779 {
1780 	struct drm_i915_private *dev_priv = dev->dev_private;
1781 	int fbc_wm, plane_wm, cursor_wm;
1782 	unsigned int enabled;
1783 
1784 	enabled = 0;
1785 	if (g4x_compute_wm0(dev, PIPE_A,
1786 			    &ironlake_display_wm_info,
1787 			    ILK_LP0_PLANE_LATENCY,
1788 			    &ironlake_cursor_wm_info,
1789 			    ILK_LP0_CURSOR_LATENCY,
1790 			    &plane_wm, &cursor_wm)) {
1791 		I915_WRITE(WM0_PIPEA_ILK,
1792 			   (plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm);
1793 		DRM_DEBUG_KMS("FIFO watermarks For pipe A -"
1794 			      " plane %d, " "cursor: %d\n",
1795 			      plane_wm, cursor_wm);
1796 		enabled |= 1 << PIPE_A;
1797 	}
1798 
1799 	if (g4x_compute_wm0(dev, PIPE_B,
1800 			    &ironlake_display_wm_info,
1801 			    ILK_LP0_PLANE_LATENCY,
1802 			    &ironlake_cursor_wm_info,
1803 			    ILK_LP0_CURSOR_LATENCY,
1804 			    &plane_wm, &cursor_wm)) {
1805 		I915_WRITE(WM0_PIPEB_ILK,
1806 			   (plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm);
1807 		DRM_DEBUG_KMS("FIFO watermarks For pipe B -"
1808 			      " plane %d, cursor: %d\n",
1809 			      plane_wm, cursor_wm);
1810 		enabled |= 1 << PIPE_B;
1811 	}
1812 
1813 	/*
1814 	 * Calculate and update the self-refresh watermark only when one
1815 	 * display plane is used.
1816 	 */
1817 	I915_WRITE(WM3_LP_ILK, 0);
1818 	I915_WRITE(WM2_LP_ILK, 0);
1819 	I915_WRITE(WM1_LP_ILK, 0);
1820 
1821 	if (!single_plane_enabled(enabled))
1822 		return;
1823 	enabled = ffs(enabled) - 1;
1824 
1825 	/* WM1 */
1826 	if (!ironlake_compute_srwm(dev, 1, enabled,
1827 				   ILK_READ_WM1_LATENCY() * 500,
1828 				   &ironlake_display_srwm_info,
1829 				   &ironlake_cursor_srwm_info,
1830 				   &fbc_wm, &plane_wm, &cursor_wm))
1831 		return;
1832 
1833 	I915_WRITE(WM1_LP_ILK,
1834 		   WM1_LP_SR_EN |
1835 		   (ILK_READ_WM1_LATENCY() << WM1_LP_LATENCY_SHIFT) |
1836 		   (fbc_wm << WM1_LP_FBC_SHIFT) |
1837 		   (plane_wm << WM1_LP_SR_SHIFT) |
1838 		   cursor_wm);
1839 
1840 	/* WM2 */
1841 	if (!ironlake_compute_srwm(dev, 2, enabled,
1842 				   ILK_READ_WM2_LATENCY() * 500,
1843 				   &ironlake_display_srwm_info,
1844 				   &ironlake_cursor_srwm_info,
1845 				   &fbc_wm, &plane_wm, &cursor_wm))
1846 		return;
1847 
1848 	I915_WRITE(WM2_LP_ILK,
1849 		   WM2_LP_EN |
1850 		   (ILK_READ_WM2_LATENCY() << WM1_LP_LATENCY_SHIFT) |
1851 		   (fbc_wm << WM1_LP_FBC_SHIFT) |
1852 		   (plane_wm << WM1_LP_SR_SHIFT) |
1853 		   cursor_wm);
1854 
1855 	/*
1856 	 * WM3 is unsupported on ILK, probably because we don't have latency
1857 	 * data for that power state
1858 	 */
1859 }
1860 
1861 static void sandybridge_update_wm(struct drm_device *dev)
1862 {
1863 	struct drm_i915_private *dev_priv = dev->dev_private;
1864 	int latency = SNB_READ_WM0_LATENCY() * 100;	/* In unit 0.1us */
1865 	u32 val;
1866 	int fbc_wm, plane_wm, cursor_wm;
1867 	unsigned int enabled;
1868 
1869 	enabled = 0;
1870 	if (g4x_compute_wm0(dev, PIPE_A,
1871 			    &sandybridge_display_wm_info, latency,
1872 			    &sandybridge_cursor_wm_info, latency,
1873 			    &plane_wm, &cursor_wm)) {
1874 		val = I915_READ(WM0_PIPEA_ILK);
1875 		val &= ~(WM0_PIPE_PLANE_MASK | WM0_PIPE_CURSOR_MASK);
1876 		I915_WRITE(WM0_PIPEA_ILK, val |
1877 			   ((plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm));
1878 		DRM_DEBUG_KMS("FIFO watermarks For pipe A -"
1879 			      " plane %d, " "cursor: %d\n",
1880 			      plane_wm, cursor_wm);
1881 		enabled |= 1 << PIPE_A;
1882 	}
1883 
1884 	if (g4x_compute_wm0(dev, PIPE_B,
1885 			    &sandybridge_display_wm_info, latency,
1886 			    &sandybridge_cursor_wm_info, latency,
1887 			    &plane_wm, &cursor_wm)) {
1888 		val = I915_READ(WM0_PIPEB_ILK);
1889 		val &= ~(WM0_PIPE_PLANE_MASK | WM0_PIPE_CURSOR_MASK);
1890 		I915_WRITE(WM0_PIPEB_ILK, val |
1891 			   ((plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm));
1892 		DRM_DEBUG_KMS("FIFO watermarks For pipe B -"
1893 			      " plane %d, cursor: %d\n",
1894 			      plane_wm, cursor_wm);
1895 		enabled |= 1 << PIPE_B;
1896 	}
1897 
1898 	/*
1899 	 * Calculate and update the self-refresh watermark only when one
1900 	 * display plane is used.
1901 	 *
1902 	 * SNB support 3 levels of watermark.
1903 	 *
1904 	 * WM1/WM2/WM2 watermarks have to be enabled in the ascending order,
1905 	 * and disabled in the descending order
1906 	 *
1907 	 */
1908 	I915_WRITE(WM3_LP_ILK, 0);
1909 	I915_WRITE(WM2_LP_ILK, 0);
1910 	I915_WRITE(WM1_LP_ILK, 0);
1911 
1912 	if (!single_plane_enabled(enabled) ||
1913 	    dev_priv->sprite_scaling_enabled)
1914 		return;
1915 	enabled = ffs(enabled) - 1;
1916 
1917 	/* WM1 */
1918 	if (!ironlake_compute_srwm(dev, 1, enabled,
1919 				   SNB_READ_WM1_LATENCY() * 500,
1920 				   &sandybridge_display_srwm_info,
1921 				   &sandybridge_cursor_srwm_info,
1922 				   &fbc_wm, &plane_wm, &cursor_wm))
1923 		return;
1924 
1925 	I915_WRITE(WM1_LP_ILK,
1926 		   WM1_LP_SR_EN |
1927 		   (SNB_READ_WM1_LATENCY() << WM1_LP_LATENCY_SHIFT) |
1928 		   (fbc_wm << WM1_LP_FBC_SHIFT) |
1929 		   (plane_wm << WM1_LP_SR_SHIFT) |
1930 		   cursor_wm);
1931 
1932 	/* WM2 */
1933 	if (!ironlake_compute_srwm(dev, 2, enabled,
1934 				   SNB_READ_WM2_LATENCY() * 500,
1935 				   &sandybridge_display_srwm_info,
1936 				   &sandybridge_cursor_srwm_info,
1937 				   &fbc_wm, &plane_wm, &cursor_wm))
1938 		return;
1939 
1940 	I915_WRITE(WM2_LP_ILK,
1941 		   WM2_LP_EN |
1942 		   (SNB_READ_WM2_LATENCY() << WM1_LP_LATENCY_SHIFT) |
1943 		   (fbc_wm << WM1_LP_FBC_SHIFT) |
1944 		   (plane_wm << WM1_LP_SR_SHIFT) |
1945 		   cursor_wm);
1946 
1947 	/* WM3 */
1948 	if (!ironlake_compute_srwm(dev, 3, enabled,
1949 				   SNB_READ_WM3_LATENCY() * 500,
1950 				   &sandybridge_display_srwm_info,
1951 				   &sandybridge_cursor_srwm_info,
1952 				   &fbc_wm, &plane_wm, &cursor_wm))
1953 		return;
1954 
1955 	I915_WRITE(WM3_LP_ILK,
1956 		   WM3_LP_EN |
1957 		   (SNB_READ_WM3_LATENCY() << WM1_LP_LATENCY_SHIFT) |
1958 		   (fbc_wm << WM1_LP_FBC_SHIFT) |
1959 		   (plane_wm << WM1_LP_SR_SHIFT) |
1960 		   cursor_wm);
1961 }
1962 
1963 static void ivybridge_update_wm(struct drm_device *dev)
1964 {
1965 	struct drm_i915_private *dev_priv = dev->dev_private;
1966 	int latency = SNB_READ_WM0_LATENCY() * 100;	/* In unit 0.1us */
1967 	u32 val;
1968 	int fbc_wm, plane_wm, cursor_wm;
1969 	int ignore_fbc_wm, ignore_plane_wm, ignore_cursor_wm;
1970 	unsigned int enabled;
1971 
1972 	enabled = 0;
1973 	if (g4x_compute_wm0(dev, PIPE_A,
1974 			    &sandybridge_display_wm_info, latency,
1975 			    &sandybridge_cursor_wm_info, latency,
1976 			    &plane_wm, &cursor_wm)) {
1977 		val = I915_READ(WM0_PIPEA_ILK);
1978 		val &= ~(WM0_PIPE_PLANE_MASK | WM0_PIPE_CURSOR_MASK);
1979 		I915_WRITE(WM0_PIPEA_ILK, val |
1980 			   ((plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm));
1981 		DRM_DEBUG_KMS("FIFO watermarks For pipe A -"
1982 			      " plane %d, " "cursor: %d\n",
1983 			      plane_wm, cursor_wm);
1984 		enabled |= 1 << PIPE_A;
1985 	}
1986 
1987 	if (g4x_compute_wm0(dev, PIPE_B,
1988 			    &sandybridge_display_wm_info, latency,
1989 			    &sandybridge_cursor_wm_info, latency,
1990 			    &plane_wm, &cursor_wm)) {
1991 		val = I915_READ(WM0_PIPEB_ILK);
1992 		val &= ~(WM0_PIPE_PLANE_MASK | WM0_PIPE_CURSOR_MASK);
1993 		I915_WRITE(WM0_PIPEB_ILK, val |
1994 			   ((plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm));
1995 		DRM_DEBUG_KMS("FIFO watermarks For pipe B -"
1996 			      " plane %d, cursor: %d\n",
1997 			      plane_wm, cursor_wm);
1998 		enabled |= 1 << PIPE_B;
1999 	}
2000 
2001 	if (g4x_compute_wm0(dev, PIPE_C,
2002 			    &sandybridge_display_wm_info, latency,
2003 			    &sandybridge_cursor_wm_info, latency,
2004 			    &plane_wm, &cursor_wm)) {
2005 		val = I915_READ(WM0_PIPEC_IVB);
2006 		val &= ~(WM0_PIPE_PLANE_MASK | WM0_PIPE_CURSOR_MASK);
2007 		I915_WRITE(WM0_PIPEC_IVB, val |
2008 			   ((plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm));
2009 		DRM_DEBUG_KMS("FIFO watermarks For pipe C -"
2010 			      " plane %d, cursor: %d\n",
2011 			      plane_wm, cursor_wm);
2012 		enabled |= 1 << PIPE_C;
2013 	}
2014 
2015 	/*
2016 	 * Calculate and update the self-refresh watermark only when one
2017 	 * display plane is used.
2018 	 *
2019 	 * SNB support 3 levels of watermark.
2020 	 *
2021 	 * WM1/WM2/WM2 watermarks have to be enabled in the ascending order,
2022 	 * and disabled in the descending order
2023 	 *
2024 	 */
2025 	I915_WRITE(WM3_LP_ILK, 0);
2026 	I915_WRITE(WM2_LP_ILK, 0);
2027 	I915_WRITE(WM1_LP_ILK, 0);
2028 
2029 	if (!single_plane_enabled(enabled) ||
2030 	    dev_priv->sprite_scaling_enabled)
2031 		return;
2032 	enabled = ffs(enabled) - 1;
2033 
2034 	/* WM1 */
2035 	if (!ironlake_compute_srwm(dev, 1, enabled,
2036 				   SNB_READ_WM1_LATENCY() * 500,
2037 				   &sandybridge_display_srwm_info,
2038 				   &sandybridge_cursor_srwm_info,
2039 				   &fbc_wm, &plane_wm, &cursor_wm))
2040 		return;
2041 
2042 	I915_WRITE(WM1_LP_ILK,
2043 		   WM1_LP_SR_EN |
2044 		   (SNB_READ_WM1_LATENCY() << WM1_LP_LATENCY_SHIFT) |
2045 		   (fbc_wm << WM1_LP_FBC_SHIFT) |
2046 		   (plane_wm << WM1_LP_SR_SHIFT) |
2047 		   cursor_wm);
2048 
2049 	/* WM2 */
2050 	if (!ironlake_compute_srwm(dev, 2, enabled,
2051 				   SNB_READ_WM2_LATENCY() * 500,
2052 				   &sandybridge_display_srwm_info,
2053 				   &sandybridge_cursor_srwm_info,
2054 				   &fbc_wm, &plane_wm, &cursor_wm))
2055 		return;
2056 
2057 	I915_WRITE(WM2_LP_ILK,
2058 		   WM2_LP_EN |
2059 		   (SNB_READ_WM2_LATENCY() << WM1_LP_LATENCY_SHIFT) |
2060 		   (fbc_wm << WM1_LP_FBC_SHIFT) |
2061 		   (plane_wm << WM1_LP_SR_SHIFT) |
2062 		   cursor_wm);
2063 
2064 	/* WM3, note we have to correct the cursor latency */
2065 	if (!ironlake_compute_srwm(dev, 3, enabled,
2066 				   SNB_READ_WM3_LATENCY() * 500,
2067 				   &sandybridge_display_srwm_info,
2068 				   &sandybridge_cursor_srwm_info,
2069 				   &fbc_wm, &plane_wm, &ignore_cursor_wm) ||
2070 	    !ironlake_compute_srwm(dev, 3, enabled,
2071 				   2 * SNB_READ_WM3_LATENCY() * 500,
2072 				   &sandybridge_display_srwm_info,
2073 				   &sandybridge_cursor_srwm_info,
2074 				   &ignore_fbc_wm, &ignore_plane_wm, &cursor_wm))
2075 		return;
2076 
2077 	I915_WRITE(WM3_LP_ILK,
2078 		   WM3_LP_EN |
2079 		   (SNB_READ_WM3_LATENCY() << WM1_LP_LATENCY_SHIFT) |
2080 		   (fbc_wm << WM1_LP_FBC_SHIFT) |
2081 		   (plane_wm << WM1_LP_SR_SHIFT) |
2082 		   cursor_wm);
2083 }
2084 
2085 static uint32_t hsw_wm_get_pixel_rate(struct drm_device *dev,
2086 				      struct drm_crtc *crtc)
2087 {
2088 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
2089 	uint32_t pixel_rate, pfit_size;
2090 
2091 	pixel_rate = intel_crtc->config.adjusted_mode.clock;
2092 
2093 	/* We only use IF-ID interlacing. If we ever use PF-ID we'll need to
2094 	 * adjust the pixel_rate here. */
2095 
2096 	pfit_size = intel_crtc->config.pch_pfit.size;
2097 	if (pfit_size) {
2098 		uint64_t pipe_w, pipe_h, pfit_w, pfit_h;
2099 
2100 		pipe_w = intel_crtc->config.requested_mode.hdisplay;
2101 		pipe_h = intel_crtc->config.requested_mode.vdisplay;
2102 		pfit_w = (pfit_size >> 16) & 0xFFFF;
2103 		pfit_h = pfit_size & 0xFFFF;
2104 		if (pipe_w < pfit_w)
2105 			pipe_w = pfit_w;
2106 		if (pipe_h < pfit_h)
2107 			pipe_h = pfit_h;
2108 
2109 		pixel_rate = div_u64((uint64_t) pixel_rate * pipe_w * pipe_h,
2110 				     pfit_w * pfit_h);
2111 	}
2112 
2113 	return pixel_rate;
2114 }
2115 
2116 static uint32_t hsw_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel,
2117 			       uint32_t latency)
2118 {
2119 	uint64_t ret;
2120 
2121 	ret = (uint64_t) pixel_rate * bytes_per_pixel * latency;
2122 	ret = DIV_ROUND_UP_ULL(ret, 64 * 10000) + 2;
2123 
2124 	return ret;
2125 }
2126 
2127 static uint32_t hsw_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
2128 			       uint32_t horiz_pixels, uint8_t bytes_per_pixel,
2129 			       uint32_t latency)
2130 {
2131 	uint32_t ret;
2132 
2133 	ret = (latency * pixel_rate) / (pipe_htotal * 10000);
2134 	ret = (ret + 1) * horiz_pixels * bytes_per_pixel;
2135 	ret = DIV_ROUND_UP(ret, 64) + 2;
2136 	return ret;
2137 }
2138 
2139 static uint32_t hsw_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels,
2140 			   uint8_t bytes_per_pixel)
2141 {
2142 	return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2;
2143 }
2144 
2145 struct hsw_pipe_wm_parameters {
2146 	bool active;
2147 	bool sprite_enabled;
2148 	uint8_t pri_bytes_per_pixel;
2149 	uint8_t spr_bytes_per_pixel;
2150 	uint8_t cur_bytes_per_pixel;
2151 	uint32_t pri_horiz_pixels;
2152 	uint32_t spr_horiz_pixels;
2153 	uint32_t cur_horiz_pixels;
2154 	uint32_t pipe_htotal;
2155 	uint32_t pixel_rate;
2156 };
2157 
2158 struct hsw_wm_maximums {
2159 	uint16_t pri;
2160 	uint16_t spr;
2161 	uint16_t cur;
2162 	uint16_t fbc;
2163 };
2164 
2165 struct hsw_lp_wm_result {
2166 	bool enable;
2167 	bool fbc_enable;
2168 	uint32_t pri_val;
2169 	uint32_t spr_val;
2170 	uint32_t cur_val;
2171 	uint32_t fbc_val;
2172 };
2173 
2174 struct hsw_wm_values {
2175 	uint32_t wm_pipe[3];
2176 	uint32_t wm_lp[3];
2177 	uint32_t wm_lp_spr[3];
2178 	uint32_t wm_linetime[3];
2179 	bool enable_fbc_wm;
2180 };
2181 
2182 enum hsw_data_buf_partitioning {
2183 	HSW_DATA_BUF_PART_1_2,
2184 	HSW_DATA_BUF_PART_5_6,
2185 };
2186 
2187 /* For both WM_PIPE and WM_LP. */
2188 static uint32_t hsw_compute_pri_wm(struct hsw_pipe_wm_parameters *params,
2189 				   uint32_t mem_value,
2190 				   bool is_lp)
2191 {
2192 	uint32_t method1, method2;
2193 
2194 	/* TODO: for now, assume the primary plane is always enabled. */
2195 	if (!params->active)
2196 		return 0;
2197 
2198 	method1 = hsw_wm_method1(params->pixel_rate,
2199 				 params->pri_bytes_per_pixel,
2200 				 mem_value);
2201 
2202 	if (!is_lp)
2203 		return method1;
2204 
2205 	method2 = hsw_wm_method2(params->pixel_rate,
2206 				 params->pipe_htotal,
2207 				 params->pri_horiz_pixels,
2208 				 params->pri_bytes_per_pixel,
2209 				 mem_value);
2210 
2211 	return min(method1, method2);
2212 }
2213 
2214 /* For both WM_PIPE and WM_LP. */
2215 static uint32_t hsw_compute_spr_wm(struct hsw_pipe_wm_parameters *params,
2216 				   uint32_t mem_value)
2217 {
2218 	uint32_t method1, method2;
2219 
2220 	if (!params->active || !params->sprite_enabled)
2221 		return 0;
2222 
2223 	method1 = hsw_wm_method1(params->pixel_rate,
2224 				 params->spr_bytes_per_pixel,
2225 				 mem_value);
2226 	method2 = hsw_wm_method2(params->pixel_rate,
2227 				 params->pipe_htotal,
2228 				 params->spr_horiz_pixels,
2229 				 params->spr_bytes_per_pixel,
2230 				 mem_value);
2231 	return min(method1, method2);
2232 }
2233 
2234 /* For both WM_PIPE and WM_LP. */
2235 static uint32_t hsw_compute_cur_wm(struct hsw_pipe_wm_parameters *params,
2236 				   uint32_t mem_value)
2237 {
2238 	if (!params->active)
2239 		return 0;
2240 
2241 	return hsw_wm_method2(params->pixel_rate,
2242 			      params->pipe_htotal,
2243 			      params->cur_horiz_pixels,
2244 			      params->cur_bytes_per_pixel,
2245 			      mem_value);
2246 }
2247 
2248 /* Only for WM_LP. */
2249 static uint32_t hsw_compute_fbc_wm(struct hsw_pipe_wm_parameters *params,
2250 				   uint32_t pri_val,
2251 				   uint32_t mem_value)
2252 {
2253 	if (!params->active)
2254 		return 0;
2255 
2256 	return hsw_wm_fbc(pri_val,
2257 			  params->pri_horiz_pixels,
2258 			  params->pri_bytes_per_pixel);
2259 }
2260 
2261 static bool hsw_compute_lp_wm(uint32_t mem_value, struct hsw_wm_maximums *max,
2262 			      struct hsw_pipe_wm_parameters *params,
2263 			      struct hsw_lp_wm_result *result)
2264 {
2265 	enum i915_pipe pipe;
2266 	uint32_t pri_val[3], spr_val[3], cur_val[3], fbc_val[3];
2267 
2268 	for (pipe = PIPE_A; pipe <= PIPE_C; pipe++) {
2269 		struct hsw_pipe_wm_parameters *p = &params[pipe];
2270 
2271 		pri_val[pipe] = hsw_compute_pri_wm(p, mem_value, true);
2272 		spr_val[pipe] = hsw_compute_spr_wm(p, mem_value);
2273 		cur_val[pipe] = hsw_compute_cur_wm(p, mem_value);
2274 		fbc_val[pipe] = hsw_compute_fbc_wm(p, pri_val[pipe], mem_value);
2275 	}
2276 
2277 	result->pri_val = max3(pri_val[0], pri_val[1], pri_val[2]);
2278 	result->spr_val = max3(spr_val[0], spr_val[1], spr_val[2]);
2279 	result->cur_val = max3(cur_val[0], cur_val[1], cur_val[2]);
2280 	result->fbc_val = max3(fbc_val[0], fbc_val[1], fbc_val[2]);
2281 
2282 	if (result->fbc_val > max->fbc) {
2283 		result->fbc_enable = false;
2284 		result->fbc_val = 0;
2285 	} else {
2286 		result->fbc_enable = true;
2287 	}
2288 
2289 	result->enable = result->pri_val <= max->pri &&
2290 			 result->spr_val <= max->spr &&
2291 			 result->cur_val <= max->cur;
2292 	return result->enable;
2293 }
2294 
2295 static uint32_t hsw_compute_wm_pipe(struct drm_i915_private *dev_priv,
2296 				    uint32_t mem_value, enum i915_pipe pipe,
2297 				    struct hsw_pipe_wm_parameters *params)
2298 {
2299 	uint32_t pri_val, cur_val, spr_val;
2300 
2301 	pri_val = hsw_compute_pri_wm(params, mem_value, false);
2302 	spr_val = hsw_compute_spr_wm(params, mem_value);
2303 	cur_val = hsw_compute_cur_wm(params, mem_value);
2304 
2305 	WARN(pri_val > 127,
2306 	     "Primary WM error, mode not supported for pipe %c\n",
2307 	     pipe_name(pipe));
2308 	WARN(spr_val > 127,
2309 	     "Sprite WM error, mode not supported for pipe %c\n",
2310 	     pipe_name(pipe));
2311 	WARN(cur_val > 63,
2312 	     "Cursor WM error, mode not supported for pipe %c\n",
2313 	     pipe_name(pipe));
2314 
2315 	return (pri_val << WM0_PIPE_PLANE_SHIFT) |
2316 	       (spr_val << WM0_PIPE_SPRITE_SHIFT) |
2317 	       cur_val;
2318 }
2319 
2320 static uint32_t
2321 hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
2322 {
2323 	struct drm_i915_private *dev_priv = dev->dev_private;
2324 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
2325 	struct drm_display_mode *mode = &intel_crtc->config.adjusted_mode;
2326 	u32 linetime, ips_linetime;
2327 
2328 	if (!intel_crtc_active(crtc))
2329 		return 0;
2330 
2331 	/* The WM are computed with base on how long it takes to fill a single
2332 	 * row at the given clock rate, multiplied by 8.
2333 	 * */
2334 	linetime = DIV_ROUND_CLOSEST(mode->htotal * 1000 * 8, mode->clock);
2335 	ips_linetime = DIV_ROUND_CLOSEST(mode->htotal * 1000 * 8,
2336 					 intel_ddi_get_cdclk_freq(dev_priv));
2337 
2338 	return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
2339 	       PIPE_WM_LINETIME_TIME(linetime);
2340 }
2341 
2342 static void hsw_compute_wm_parameters(struct drm_device *dev,
2343 				      struct hsw_pipe_wm_parameters *params,
2344 				      uint32_t *wm,
2345 				      struct hsw_wm_maximums *lp_max_1_2,
2346 				      struct hsw_wm_maximums *lp_max_5_6)
2347 {
2348 	struct drm_i915_private *dev_priv = dev->dev_private;
2349 	struct drm_crtc *crtc;
2350 	struct drm_plane *plane;
2351 	uint64_t sskpd = I915_READ64(MCH_SSKPD);
2352 	enum i915_pipe pipe;
2353 	int pipes_active = 0, sprites_enabled = 0;
2354 
2355 	if ((sskpd >> 56) & 0xFF)
2356 		wm[0] = (sskpd >> 56) & 0xFF;
2357 	else
2358 		wm[0] = sskpd & 0xF;
2359 	wm[1] = ((sskpd >> 4) & 0xFF) * 5;
2360 	wm[2] = ((sskpd >> 12) & 0xFF) * 5;
2361 	wm[3] = ((sskpd >> 20) & 0x1FF) * 5;
2362 	wm[4] = ((sskpd >> 32) & 0x1FF) * 5;
2363 
2364 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
2365 		struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
2366 		struct hsw_pipe_wm_parameters *p;
2367 
2368 		pipe = intel_crtc->pipe;
2369 		p = &params[pipe];
2370 
2371 		p->active = intel_crtc_active(crtc);
2372 		if (!p->active)
2373 			continue;
2374 
2375 		pipes_active++;
2376 
2377 		p->pipe_htotal = intel_crtc->config.adjusted_mode.htotal;
2378 		p->pixel_rate = hsw_wm_get_pixel_rate(dev, crtc);
2379 		p->pri_bytes_per_pixel = crtc->fb->bits_per_pixel / 8;
2380 		p->cur_bytes_per_pixel = 4;
2381 		p->pri_horiz_pixels =
2382 			intel_crtc->config.requested_mode.hdisplay;
2383 		p->cur_horiz_pixels = 64;
2384 	}
2385 
2386 	list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
2387 		struct intel_plane *intel_plane = to_intel_plane(plane);
2388 		struct hsw_pipe_wm_parameters *p;
2389 
2390 		pipe = intel_plane->pipe;
2391 		p = &params[pipe];
2392 
2393 		p->sprite_enabled = intel_plane->wm.enable;
2394 		p->spr_bytes_per_pixel = intel_plane->wm.bytes_per_pixel;
2395 		p->spr_horiz_pixels = intel_plane->wm.horiz_pixels;
2396 
2397 		if (p->sprite_enabled)
2398 			sprites_enabled++;
2399 	}
2400 
2401 	if (pipes_active > 1) {
2402 		lp_max_1_2->pri = lp_max_5_6->pri = sprites_enabled ? 128 : 256;
2403 		lp_max_1_2->spr = lp_max_5_6->spr = 128;
2404 		lp_max_1_2->cur = lp_max_5_6->cur = 64;
2405 	} else {
2406 		lp_max_1_2->pri = sprites_enabled ? 384 : 768;
2407 		lp_max_5_6->pri = sprites_enabled ? 128 : 768;
2408 		lp_max_1_2->spr = 384;
2409 		lp_max_5_6->spr = 640;
2410 		lp_max_1_2->cur = lp_max_5_6->cur = 255;
2411 	}
2412 	lp_max_1_2->fbc = lp_max_5_6->fbc = 15;
2413 }
2414 
2415 static void hsw_compute_wm_results(struct drm_device *dev,
2416 				   struct hsw_pipe_wm_parameters *params,
2417 				   uint32_t *wm,
2418 				   struct hsw_wm_maximums *lp_maximums,
2419 				   struct hsw_wm_values *results)
2420 {
2421 	struct drm_i915_private *dev_priv = dev->dev_private;
2422 	struct drm_crtc *crtc;
2423 	struct hsw_lp_wm_result lp_results[4] = {};
2424 	enum i915_pipe pipe;
2425 	int level, max_level, wm_lp;
2426 
2427 	for (level = 1; level <= 4; level++)
2428 		if (!hsw_compute_lp_wm(wm[level], lp_maximums, params,
2429 				       &lp_results[level - 1]))
2430 			break;
2431 	max_level = level - 1;
2432 
2433 	/* The spec says it is preferred to disable FBC WMs instead of disabling
2434 	 * a WM level. */
2435 	results->enable_fbc_wm = true;
2436 	for (level = 1; level <= max_level; level++) {
2437 		if (!lp_results[level - 1].fbc_enable) {
2438 			results->enable_fbc_wm = false;
2439 			break;
2440 		}
2441 	}
2442 
2443 	memset(results, 0, sizeof(*results));
2444 	for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
2445 		const struct hsw_lp_wm_result *r;
2446 
2447 		level = (max_level == 4 && wm_lp > 1) ? wm_lp + 1 : wm_lp;
2448 		if (level > max_level)
2449 			break;
2450 
2451 		r = &lp_results[level - 1];
2452 		results->wm_lp[wm_lp - 1] = HSW_WM_LP_VAL(level * 2,
2453 							  r->fbc_val,
2454 							  r->pri_val,
2455 							  r->cur_val);
2456 		results->wm_lp_spr[wm_lp - 1] = r->spr_val;
2457 	}
2458 
2459 	for_each_pipe(pipe)
2460 		results->wm_pipe[pipe] = hsw_compute_wm_pipe(dev_priv, wm[0],
2461 							     pipe,
2462 							     &params[pipe]);
2463 
2464 	for_each_pipe(pipe) {
2465 		crtc = dev_priv->pipe_to_crtc_mapping[pipe];
2466 		results->wm_linetime[pipe] = hsw_compute_linetime_wm(dev, crtc);
2467 	}
2468 }
2469 
2470 /* Find the result with the highest level enabled. Check for enable_fbc_wm in
2471  * case both are at the same level. Prefer r1 in case they're the same. */
2472 static struct hsw_wm_values *hsw_find_best_result(struct hsw_wm_values *r1,
2473 					   struct hsw_wm_values *r2)
2474 {
2475 	int i, val_r1 = 0, val_r2 = 0;
2476 
2477 	for (i = 0; i < 3; i++) {
2478 		if (r1->wm_lp[i] & WM3_LP_EN)
2479 			val_r1 = r1->wm_lp[i] & WM1_LP_LATENCY_MASK;
2480 		if (r2->wm_lp[i] & WM3_LP_EN)
2481 			val_r2 = r2->wm_lp[i] & WM1_LP_LATENCY_MASK;
2482 	}
2483 
2484 	if (val_r1 == val_r2) {
2485 		if (r2->enable_fbc_wm && !r1->enable_fbc_wm)
2486 			return r2;
2487 		else
2488 			return r1;
2489 	} else if (val_r1 > val_r2) {
2490 		return r1;
2491 	} else {
2492 		return r2;
2493 	}
2494 }
2495 
2496 /*
2497  * The spec says we shouldn't write when we don't need, because every write
2498  * causes WMs to be re-evaluated, expending some power.
2499  */
2500 static void hsw_write_wm_values(struct drm_i915_private *dev_priv,
2501 				struct hsw_wm_values *results,
2502 				enum hsw_data_buf_partitioning partitioning)
2503 {
2504 	struct hsw_wm_values previous;
2505 	uint32_t val;
2506 	enum hsw_data_buf_partitioning prev_partitioning;
2507 	bool prev_enable_fbc_wm;
2508 
2509 	previous.wm_pipe[0] = I915_READ(WM0_PIPEA_ILK);
2510 	previous.wm_pipe[1] = I915_READ(WM0_PIPEB_ILK);
2511 	previous.wm_pipe[2] = I915_READ(WM0_PIPEC_IVB);
2512 	previous.wm_lp[0] = I915_READ(WM1_LP_ILK);
2513 	previous.wm_lp[1] = I915_READ(WM2_LP_ILK);
2514 	previous.wm_lp[2] = I915_READ(WM3_LP_ILK);
2515 	previous.wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
2516 	previous.wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
2517 	previous.wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
2518 	previous.wm_linetime[0] = I915_READ(PIPE_WM_LINETIME(PIPE_A));
2519 	previous.wm_linetime[1] = I915_READ(PIPE_WM_LINETIME(PIPE_B));
2520 	previous.wm_linetime[2] = I915_READ(PIPE_WM_LINETIME(PIPE_C));
2521 
2522 	prev_partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ?
2523 			    HSW_DATA_BUF_PART_5_6 : HSW_DATA_BUF_PART_1_2;
2524 
2525 	prev_enable_fbc_wm = !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS);
2526 
2527 	if (memcmp(results->wm_pipe, previous.wm_pipe,
2528 		   sizeof(results->wm_pipe)) == 0 &&
2529 	    memcmp(results->wm_lp, previous.wm_lp,
2530 		   sizeof(results->wm_lp)) == 0 &&
2531 	    memcmp(results->wm_lp_spr, previous.wm_lp_spr,
2532 		   sizeof(results->wm_lp_spr)) == 0 &&
2533 	    memcmp(results->wm_linetime, previous.wm_linetime,
2534 		   sizeof(results->wm_linetime)) == 0 &&
2535 	    partitioning == prev_partitioning &&
2536 	    results->enable_fbc_wm == prev_enable_fbc_wm)
2537 		return;
2538 
2539 	if (previous.wm_lp[2] != 0)
2540 		I915_WRITE(WM3_LP_ILK, 0);
2541 	if (previous.wm_lp[1] != 0)
2542 		I915_WRITE(WM2_LP_ILK, 0);
2543 	if (previous.wm_lp[0] != 0)
2544 		I915_WRITE(WM1_LP_ILK, 0);
2545 
2546 	if (previous.wm_pipe[0] != results->wm_pipe[0])
2547 		I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]);
2548 	if (previous.wm_pipe[1] != results->wm_pipe[1])
2549 		I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]);
2550 	if (previous.wm_pipe[2] != results->wm_pipe[2])
2551 		I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]);
2552 
2553 	if (previous.wm_linetime[0] != results->wm_linetime[0])
2554 		I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]);
2555 	if (previous.wm_linetime[1] != results->wm_linetime[1])
2556 		I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]);
2557 	if (previous.wm_linetime[2] != results->wm_linetime[2])
2558 		I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]);
2559 
2560 	if (prev_partitioning != partitioning) {
2561 		val = I915_READ(WM_MISC);
2562 		if (partitioning == HSW_DATA_BUF_PART_1_2)
2563 			val &= ~WM_MISC_DATA_PARTITION_5_6;
2564 		else
2565 			val |= WM_MISC_DATA_PARTITION_5_6;
2566 		I915_WRITE(WM_MISC, val);
2567 	}
2568 
2569 	if (prev_enable_fbc_wm != results->enable_fbc_wm) {
2570 		val = I915_READ(DISP_ARB_CTL);
2571 		if (results->enable_fbc_wm)
2572 			val &= ~DISP_FBC_WM_DIS;
2573 		else
2574 			val |= DISP_FBC_WM_DIS;
2575 		I915_WRITE(DISP_ARB_CTL, val);
2576 	}
2577 
2578 	if (previous.wm_lp_spr[0] != results->wm_lp_spr[0])
2579 		I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
2580 	if (previous.wm_lp_spr[1] != results->wm_lp_spr[1])
2581 		I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]);
2582 	if (previous.wm_lp_spr[2] != results->wm_lp_spr[2])
2583 		I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]);
2584 
2585 	if (results->wm_lp[0] != 0)
2586 		I915_WRITE(WM1_LP_ILK, results->wm_lp[0]);
2587 	if (results->wm_lp[1] != 0)
2588 		I915_WRITE(WM2_LP_ILK, results->wm_lp[1]);
2589 	if (results->wm_lp[2] != 0)
2590 		I915_WRITE(WM3_LP_ILK, results->wm_lp[2]);
2591 }
2592 
2593 static void haswell_update_wm(struct drm_device *dev)
2594 {
2595 	struct drm_i915_private *dev_priv = dev->dev_private;
2596 	struct hsw_wm_maximums lp_max_1_2, lp_max_5_6;
2597 	struct hsw_pipe_wm_parameters params[3];
2598 	struct hsw_wm_values results_1_2, results_5_6, *best_results;
2599 	uint32_t wm[5];
2600 	enum hsw_data_buf_partitioning partitioning;
2601 
2602 	hsw_compute_wm_parameters(dev, params, wm, &lp_max_1_2, &lp_max_5_6);
2603 
2604 	hsw_compute_wm_results(dev, params, wm, &lp_max_1_2, &results_1_2);
2605 	if (lp_max_1_2.pri != lp_max_5_6.pri) {
2606 		hsw_compute_wm_results(dev, params, wm, &lp_max_5_6,
2607 				       &results_5_6);
2608 		best_results = hsw_find_best_result(&results_1_2, &results_5_6);
2609 	} else {
2610 		best_results = &results_1_2;
2611 	}
2612 
2613 	partitioning = (best_results == &results_1_2) ?
2614 		       HSW_DATA_BUF_PART_1_2 : HSW_DATA_BUF_PART_5_6;
2615 
2616 	hsw_write_wm_values(dev_priv, best_results, partitioning);
2617 }
2618 
2619 static void haswell_update_sprite_wm(struct drm_device *dev, int pipe,
2620 				     uint32_t sprite_width, int pixel_size,
2621 				     bool enable)
2622 {
2623 	struct drm_plane *plane;
2624 
2625 	list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
2626 		struct intel_plane *intel_plane = to_intel_plane(plane);
2627 
2628 		if (intel_plane->pipe == pipe) {
2629 			intel_plane->wm.enable = enable;
2630 			intel_plane->wm.horiz_pixels = sprite_width + 1;
2631 			intel_plane->wm.bytes_per_pixel = pixel_size;
2632 			break;
2633 		}
2634 	}
2635 
2636 	haswell_update_wm(dev);
2637 }
2638 
2639 static bool
2640 sandybridge_compute_sprite_wm(struct drm_device *dev, int plane,
2641 			      uint32_t sprite_width, int pixel_size,
2642 			      const struct intel_watermark_params *display,
2643 			      int display_latency_ns, int *sprite_wm)
2644 {
2645 	struct drm_crtc *crtc;
2646 	int clock;
2647 	int entries, tlb_miss;
2648 
2649 	crtc = intel_get_crtc_for_plane(dev, plane);
2650 	if (!intel_crtc_active(crtc)) {
2651 		*sprite_wm = display->guard_size;
2652 		return false;
2653 	}
2654 
2655 	clock = crtc->mode.clock;
2656 
2657 	/* Use the small buffer method to calculate the sprite watermark */
2658 	entries = ((clock * pixel_size / 1000) * display_latency_ns) / 1000;
2659 	tlb_miss = display->fifo_size*display->cacheline_size -
2660 		sprite_width * 8;
2661 	if (tlb_miss > 0)
2662 		entries += tlb_miss;
2663 	entries = DIV_ROUND_UP(entries, display->cacheline_size);
2664 	*sprite_wm = entries + display->guard_size;
2665 	if (*sprite_wm > (int)display->max_wm)
2666 		*sprite_wm = display->max_wm;
2667 
2668 	return true;
2669 }
2670 
2671 static bool
2672 sandybridge_compute_sprite_srwm(struct drm_device *dev, int plane,
2673 				uint32_t sprite_width, int pixel_size,
2674 				const struct intel_watermark_params *display,
2675 				int latency_ns, int *sprite_wm)
2676 {
2677 	struct drm_crtc *crtc;
2678 	unsigned long line_time_us;
2679 	int clock;
2680 	int line_count, line_size;
2681 	int small, large;
2682 	int entries;
2683 
2684 	if (!latency_ns) {
2685 		*sprite_wm = 0;
2686 		return false;
2687 	}
2688 
2689 	crtc = intel_get_crtc_for_plane(dev, plane);
2690 	clock = crtc->mode.clock;
2691 	if (!clock) {
2692 		*sprite_wm = 0;
2693 		return false;
2694 	}
2695 
2696 	line_time_us = (sprite_width * 1000) / clock;
2697 	if (!line_time_us) {
2698 		*sprite_wm = 0;
2699 		return false;
2700 	}
2701 
2702 	line_count = (latency_ns / line_time_us + 1000) / 1000;
2703 	line_size = sprite_width * pixel_size;
2704 
2705 	/* Use the minimum of the small and large buffer method for primary */
2706 	small = ((clock * pixel_size / 1000) * latency_ns) / 1000;
2707 	large = line_count * line_size;
2708 
2709 	entries = DIV_ROUND_UP(min(small, large), display->cacheline_size);
2710 	*sprite_wm = entries + display->guard_size;
2711 
2712 	return *sprite_wm > 0x3ff ? false : true;
2713 }
2714 
2715 static void sandybridge_update_sprite_wm(struct drm_device *dev, int pipe,
2716 					 uint32_t sprite_width, int pixel_size,
2717 					 bool enable)
2718 {
2719 	struct drm_i915_private *dev_priv = dev->dev_private;
2720 	int latency = SNB_READ_WM0_LATENCY() * 100;	/* In unit 0.1us */
2721 	u32 val;
2722 	int sprite_wm, reg;
2723 	int ret;
2724 
2725 	if (!enable)
2726 		return;
2727 
2728 	switch (pipe) {
2729 	case 0:
2730 		reg = WM0_PIPEA_ILK;
2731 		break;
2732 	case 1:
2733 		reg = WM0_PIPEB_ILK;
2734 		break;
2735 	case 2:
2736 		reg = WM0_PIPEC_IVB;
2737 		break;
2738 	default:
2739 		return; /* bad pipe */
2740 	}
2741 
2742 	ret = sandybridge_compute_sprite_wm(dev, pipe, sprite_width, pixel_size,
2743 					    &sandybridge_display_wm_info,
2744 					    latency, &sprite_wm);
2745 	if (!ret) {
2746 		DRM_DEBUG_KMS("failed to compute sprite wm for pipe %c\n",
2747 			      pipe_name(pipe));
2748 		return;
2749 	}
2750 
2751 	val = I915_READ(reg);
2752 	val &= ~WM0_PIPE_SPRITE_MASK;
2753 	I915_WRITE(reg, val | (sprite_wm << WM0_PIPE_SPRITE_SHIFT));
2754 	DRM_DEBUG_KMS("sprite watermarks For pipe %c - %d\n", pipe_name(pipe), sprite_wm);
2755 
2756 
2757 	ret = sandybridge_compute_sprite_srwm(dev, pipe, sprite_width,
2758 					      pixel_size,
2759 					      &sandybridge_display_srwm_info,
2760 					      SNB_READ_WM1_LATENCY() * 500,
2761 					      &sprite_wm);
2762 	if (!ret) {
2763 		DRM_DEBUG_KMS("failed to compute sprite lp1 wm on pipe %c\n",
2764 			      pipe_name(pipe));
2765 		return;
2766 	}
2767 	I915_WRITE(WM1S_LP_ILK, sprite_wm);
2768 
2769 	/* Only IVB has two more LP watermarks for sprite */
2770 	if (!IS_IVYBRIDGE(dev))
2771 		return;
2772 
2773 	ret = sandybridge_compute_sprite_srwm(dev, pipe, sprite_width,
2774 					      pixel_size,
2775 					      &sandybridge_display_srwm_info,
2776 					      SNB_READ_WM2_LATENCY() * 500,
2777 					      &sprite_wm);
2778 	if (!ret) {
2779 		DRM_DEBUG_KMS("failed to compute sprite lp2 wm on pipe %c\n",
2780 			      pipe_name(pipe));
2781 		return;
2782 	}
2783 	I915_WRITE(WM2S_LP_IVB, sprite_wm);
2784 
2785 	ret = sandybridge_compute_sprite_srwm(dev, pipe, sprite_width,
2786 					      pixel_size,
2787 					      &sandybridge_display_srwm_info,
2788 					      SNB_READ_WM3_LATENCY() * 500,
2789 					      &sprite_wm);
2790 	if (!ret) {
2791 		DRM_DEBUG_KMS("failed to compute sprite lp3 wm on pipe %c\n",
2792 			      pipe_name(pipe));
2793 		return;
2794 	}
2795 	I915_WRITE(WM3S_LP_IVB, sprite_wm);
2796 }
2797 
2798 /**
2799  * intel_update_watermarks - update FIFO watermark values based on current modes
2800  *
2801  * Calculate watermark values for the various WM regs based on current mode
2802  * and plane configuration.
2803  *
2804  * There are several cases to deal with here:
2805  *   - normal (i.e. non-self-refresh)
2806  *   - self-refresh (SR) mode
2807  *   - lines are large relative to FIFO size (buffer can hold up to 2)
2808  *   - lines are small relative to FIFO size (buffer can hold more than 2
2809  *     lines), so need to account for TLB latency
2810  *
2811  *   The normal calculation is:
2812  *     watermark = dotclock * bytes per pixel * latency
2813  *   where latency is platform & configuration dependent (we assume pessimal
2814  *   values here).
2815  *
2816  *   The SR calculation is:
2817  *     watermark = (trunc(latency/line time)+1) * surface width *
2818  *       bytes per pixel
2819  *   where
2820  *     line time = htotal / dotclock
2821  *     surface width = hdisplay for normal plane and 64 for cursor
2822  *   and latency is assumed to be high, as above.
2823  *
2824  * The final value programmed to the register should always be rounded up,
2825  * and include an extra 2 entries to account for clock crossings.
2826  *
2827  * We don't use the sprite, so we can ignore that.  And on Crestline we have
2828  * to set the non-SR watermarks to 8.
2829  */
2830 void intel_update_watermarks(struct drm_device *dev)
2831 {
2832 	struct drm_i915_private *dev_priv = dev->dev_private;
2833 
2834 	if (dev_priv->display.update_wm)
2835 		dev_priv->display.update_wm(dev);
2836 }
2837 
2838 void intel_update_sprite_watermarks(struct drm_device *dev, int pipe,
2839 				    uint32_t sprite_width, int pixel_size,
2840 				    bool enable)
2841 {
2842 	struct drm_i915_private *dev_priv = dev->dev_private;
2843 
2844 	if (dev_priv->display.update_sprite_wm)
2845 		dev_priv->display.update_sprite_wm(dev, pipe, sprite_width,
2846 						   pixel_size, enable);
2847 }
2848 
2849 static struct drm_i915_gem_object *
2850 intel_alloc_context_page(struct drm_device *dev)
2851 {
2852 	struct drm_i915_gem_object *ctx;
2853 	int ret;
2854 
2855 	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
2856 
2857 	ctx = i915_gem_alloc_object(dev, 4096);
2858 	if (!ctx) {
2859 		DRM_DEBUG("failed to alloc power context, RC6 disabled\n");
2860 		return NULL;
2861 	}
2862 
2863 	ret = i915_gem_object_pin(ctx, 4096, true, false);
2864 	if (ret) {
2865 		DRM_ERROR("failed to pin power context: %d\n", ret);
2866 		goto err_unref;
2867 	}
2868 
2869 	ret = i915_gem_object_set_to_gtt_domain(ctx, 1);
2870 	if (ret) {
2871 		DRM_ERROR("failed to set-domain on power context: %d\n", ret);
2872 		goto err_unpin;
2873 	}
2874 
2875 	return ctx;
2876 
2877 err_unpin:
2878 	i915_gem_object_unpin(ctx);
2879 err_unref:
2880 	drm_gem_object_unreference(&ctx->base);
2881 	return NULL;
2882 }
2883 
2884 /**
2885  * Lock protecting IPS related data structures
2886  */
2887 struct lock mchdev_lock;
2888 LOCK_SYSINIT(mchdev, &mchdev_lock, "mchdev", LK_CANRECURSE);
2889 
2890 /* Global for IPS driver to get at the current i915 device. Protected by
2891  * mchdev_lock. */
2892 static struct drm_i915_private *i915_mch_dev;
2893 
2894 bool ironlake_set_drps(struct drm_device *dev, u8 val)
2895 {
2896 	struct drm_i915_private *dev_priv = dev->dev_private;
2897 	u16 rgvswctl;
2898 
2899 	rgvswctl = I915_READ16(MEMSWCTL);
2900 	if (rgvswctl & MEMCTL_CMD_STS) {
2901 		DRM_DEBUG("gpu busy, RCS change rejected\n");
2902 		return false; /* still busy with another command */
2903 	}
2904 
2905 	rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
2906 		(val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
2907 	I915_WRITE16(MEMSWCTL, rgvswctl);
2908 	POSTING_READ16(MEMSWCTL);
2909 
2910 	rgvswctl |= MEMCTL_CMD_STS;
2911 	I915_WRITE16(MEMSWCTL, rgvswctl);
2912 
2913 	return true;
2914 }
2915 
2916 static void ironlake_enable_drps(struct drm_device *dev)
2917 {
2918 	struct drm_i915_private *dev_priv = dev->dev_private;
2919 	u32 rgvmodectl = I915_READ(MEMMODECTL);
2920 	u8 fmax, fmin, fstart, vstart;
2921 
2922 	lockmgr(&mchdev_lock, LK_EXCLUSIVE);
2923 
2924 	/* Enable temp reporting */
2925 	I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
2926 	I915_WRITE16(TSC1, I915_READ(TSC1) | TSE);
2927 
2928 	/* 100ms RC evaluation intervals */
2929 	I915_WRITE(RCUPEI, 100000);
2930 	I915_WRITE(RCDNEI, 100000);
2931 
2932 	/* Set max/min thresholds to 90ms and 80ms respectively */
2933 	I915_WRITE(RCBMAXAVG, 90000);
2934 	I915_WRITE(RCBMINAVG, 80000);
2935 
2936 	I915_WRITE(MEMIHYST, 1);
2937 
2938 	/* Set up min, max, and cur for interrupt handling */
2939 	fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
2940 	fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
2941 	fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
2942 		MEMMODE_FSTART_SHIFT;
2943 
2944 	vstart = (I915_READ(PXVFREQ_BASE + (fstart * 4)) & PXVFREQ_PX_MASK) >>
2945 		PXVFREQ_PX_SHIFT;
2946 
2947 	dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
2948 	dev_priv->ips.fstart = fstart;
2949 
2950 	dev_priv->ips.max_delay = fstart;
2951 	dev_priv->ips.min_delay = fmin;
2952 	dev_priv->ips.cur_delay = fstart;
2953 
2954 	DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
2955 			 fmax, fmin, fstart);
2956 
2957 	I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
2958 
2959 	/*
2960 	 * Interrupts will be enabled in ironlake_irq_postinstall
2961 	 */
2962 
2963 	I915_WRITE(VIDSTART, vstart);
2964 	POSTING_READ(VIDSTART);
2965 
2966 	rgvmodectl |= MEMMODE_SWMODE_EN;
2967 	I915_WRITE(MEMMODECTL, rgvmodectl);
2968 
2969 	if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
2970 		DRM_ERROR("stuck trying to change perf mode\n");
2971 	mdelay(1);
2972 
2973 	ironlake_set_drps(dev, fstart);
2974 
2975 	dev_priv->ips.last_count1 = I915_READ(0x112e4) + I915_READ(0x112e8) +
2976 		I915_READ(0x112e0);
2977 	dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
2978 	dev_priv->ips.last_count2 = I915_READ(0x112f4);
2979 	getrawmonotonic(&dev_priv->ips.last_time2);
2980 
2981 	lockmgr(&mchdev_lock, LK_RELEASE);
2982 }
2983 
2984 static void ironlake_disable_drps(struct drm_device *dev)
2985 {
2986 	struct drm_i915_private *dev_priv = dev->dev_private;
2987 	u16 rgvswctl;
2988 
2989 	lockmgr(&mchdev_lock, LK_EXCLUSIVE);
2990 
2991 	rgvswctl = I915_READ16(MEMSWCTL);
2992 
2993 	/* Ack interrupts, disable EFC interrupt */
2994 	I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
2995 	I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
2996 	I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
2997 	I915_WRITE(DEIIR, DE_PCU_EVENT);
2998 	I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
2999 
3000 	/* Go back to the starting frequency */
3001 	ironlake_set_drps(dev, dev_priv->ips.fstart);
3002 	mdelay(1);
3003 	rgvswctl |= MEMCTL_CMD_STS;
3004 	I915_WRITE(MEMSWCTL, rgvswctl);
3005 	mdelay(1);
3006 
3007 	lockmgr(&mchdev_lock, LK_RELEASE);
3008 }
3009 
3010 /* There's a funny hw issue where the hw returns all 0 when reading from
3011  * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
3012  * ourselves, instead of doing a rmw cycle (which might result in us clearing
3013  * all limits and the gpu stuck at whatever frequency it is at atm).
3014  */
3015 static u32 gen6_rps_limits(struct drm_i915_private *dev_priv, u8 *val)
3016 {
3017 	u32 limits;
3018 
3019 	limits = 0;
3020 
3021 	if (*val >= dev_priv->rps.max_delay)
3022 		*val = dev_priv->rps.max_delay;
3023 	limits |= dev_priv->rps.max_delay << 24;
3024 
3025 	/* Only set the down limit when we've reached the lowest level to avoid
3026 	 * getting more interrupts, otherwise leave this clear. This prevents a
3027 	 * race in the hw when coming out of rc6: There's a tiny window where
3028 	 * the hw runs at the minimal clock before selecting the desired
3029 	 * frequency, if the down threshold expires in that window we will not
3030 	 * receive a down interrupt. */
3031 	if (*val <= dev_priv->rps.min_delay) {
3032 		*val = dev_priv->rps.min_delay;
3033 		limits |= dev_priv->rps.min_delay << 16;
3034 	}
3035 
3036 	return limits;
3037 }
3038 
3039 void gen6_set_rps(struct drm_device *dev, u8 val)
3040 {
3041 	struct drm_i915_private *dev_priv = dev->dev_private;
3042 	u32 limits = gen6_rps_limits(dev_priv, &val);
3043 
3044 	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
3045 	WARN_ON(val > dev_priv->rps.max_delay);
3046 	WARN_ON(val < dev_priv->rps.min_delay);
3047 
3048 	if (val == dev_priv->rps.cur_delay)
3049 		return;
3050 
3051 	if (IS_HASWELL(dev))
3052 		I915_WRITE(GEN6_RPNSWREQ,
3053 			   HSW_FREQUENCY(val));
3054 	else
3055 		I915_WRITE(GEN6_RPNSWREQ,
3056 			   GEN6_FREQUENCY(val) |
3057 			   GEN6_OFFSET(0) |
3058 			   GEN6_AGGRESSIVE_TURBO);
3059 
3060 	/* Make sure we continue to get interrupts
3061 	 * until we hit the minimum or maximum frequencies.
3062 	 */
3063 	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, limits);
3064 
3065 	POSTING_READ(GEN6_RPNSWREQ);
3066 
3067 	dev_priv->rps.cur_delay = val;
3068 
3069 	trace_intel_gpu_freq_change(val * 50);
3070 }
3071 
3072 /*
3073  * Wait until the previous freq change has completed,
3074  * or the timeout elapsed, and then update our notion
3075  * of the current GPU frequency.
3076  */
3077 static void vlv_update_rps_cur_delay(struct drm_i915_private *dev_priv)
3078 {
3079 	unsigned long timeout = jiffies + msecs_to_jiffies(10);
3080 	u32 pval;
3081 
3082 	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
3083 
3084 	do {
3085 		pval = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
3086 		if (time_after(jiffies, timeout)) {
3087 			DRM_DEBUG_DRIVER("timed out waiting for Punit\n");
3088 			break;
3089 		}
3090 		udelay(10);
3091 	} while (pval & 1);
3092 
3093 	pval >>= 8;
3094 
3095 	if (pval != dev_priv->rps.cur_delay)
3096 		DRM_DEBUG_DRIVER("Punit overrode GPU freq: %d MHz (%u) requested, but got %d Mhz (%u)\n",
3097 				 vlv_gpu_freq(dev_priv->mem_freq, dev_priv->rps.cur_delay),
3098 				 dev_priv->rps.cur_delay,
3099 				 vlv_gpu_freq(dev_priv->mem_freq, pval), pval);
3100 
3101 	dev_priv->rps.cur_delay = pval;
3102 }
3103 
3104 void valleyview_set_rps(struct drm_device *dev, u8 val)
3105 {
3106 	struct drm_i915_private *dev_priv = dev->dev_private;
3107 
3108 	gen6_rps_limits(dev_priv, &val);
3109 
3110 	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
3111 	WARN_ON(val > dev_priv->rps.max_delay);
3112 	WARN_ON(val < dev_priv->rps.min_delay);
3113 
3114 	vlv_update_rps_cur_delay(dev_priv);
3115 
3116 	DRM_DEBUG_DRIVER("GPU freq request from %d MHz (%u) to %d MHz (%u)\n",
3117 			 vlv_gpu_freq(dev_priv->mem_freq,
3118 				      dev_priv->rps.cur_delay),
3119 			 dev_priv->rps.cur_delay,
3120 			 vlv_gpu_freq(dev_priv->mem_freq, val), val);
3121 
3122 	if (val == dev_priv->rps.cur_delay)
3123 		return;
3124 
3125 	vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
3126 
3127 	dev_priv->rps.cur_delay = val;
3128 
3129 	trace_intel_gpu_freq_change(vlv_gpu_freq(dev_priv->mem_freq, val));
3130 }
3131 
3132 
3133 static void gen6_disable_rps(struct drm_device *dev)
3134 {
3135 	struct drm_i915_private *dev_priv = dev->dev_private;
3136 
3137 	I915_WRITE(GEN6_RC_CONTROL, 0);
3138 	I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
3139 	I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
3140 	I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) & ~GEN6_PM_RPS_EVENTS);
3141 	/* Complete PM interrupt masking here doesn't race with the rps work
3142 	 * item again unmasking PM interrupts because that is using a different
3143 	 * register (PMIMR) to mask PM interrupts. The only risk is in leaving
3144 	 * stale bits in PMIIR and PMIMR which gen6_enable_rps will clean up. */
3145 
3146 	lockmgr(&dev_priv->rps.lock, LK_EXCLUSIVE);
3147 	dev_priv->rps.pm_iir = 0;
3148 	lockmgr(&dev_priv->rps.lock, LK_RELEASE);
3149 
3150 	I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
3151 }
3152 
3153 static void valleyview_disable_rps(struct drm_device *dev)
3154 {
3155 	struct drm_i915_private *dev_priv = dev->dev_private;
3156 
3157 	I915_WRITE(GEN6_RC_CONTROL, 0);
3158 	I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
3159 	I915_WRITE(GEN6_PMIER, 0);
3160 	/* Complete PM interrupt masking here doesn't race with the rps work
3161 	 * item again unmasking PM interrupts because that is using a different
3162 	 * register (PMIMR) to mask PM interrupts. The only risk is in leaving
3163 	 * stale bits in PMIIR and PMIMR which gen6_enable_rps will clean up. */
3164 
3165 	lockmgr(&dev_priv->rps.lock, LK_EXCLUSIVE);
3166 	dev_priv->rps.pm_iir = 0;
3167 	lockmgr(&dev_priv->rps.lock, LK_RELEASE);
3168 
3169 	I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
3170 
3171 	if (dev_priv->vlv_pctx) {
3172 		drm_gem_object_unreference(&dev_priv->vlv_pctx->base);
3173 		dev_priv->vlv_pctx = NULL;
3174 	}
3175 }
3176 
3177 int intel_enable_rc6(const struct drm_device *dev)
3178 {
3179 	/* Respect the kernel parameter if it is set */
3180 	if (i915_enable_rc6 >= 0)
3181 		return i915_enable_rc6;
3182 
3183 	/* Disable RC6 on Ironlake */
3184 	if (INTEL_INFO(dev)->gen == 5)
3185 		return 0;
3186 
3187 	if (IS_HASWELL(dev)) {
3188 		DRM_DEBUG_DRIVER("Haswell: only RC6 available\n");
3189 		return INTEL_RC6_ENABLE;
3190 	}
3191 
3192 	/* snb/ivb have more than one rc6 state. */
3193 	if (INTEL_INFO(dev)->gen == 6) {
3194 		DRM_DEBUG_DRIVER("Sandybridge: deep RC6 disabled\n");
3195 		return INTEL_RC6_ENABLE;
3196 	}
3197 
3198 	DRM_DEBUG_DRIVER("RC6 and deep RC6 enabled\n");
3199 	return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE);
3200 }
3201 
3202 static void gen6_enable_rps(struct drm_device *dev)
3203 {
3204 	struct drm_i915_private *dev_priv = dev->dev_private;
3205 	struct intel_ring_buffer *ring;
3206 	u32 rp_state_cap;
3207 	u32 gt_perf_status;
3208 	u32 rc6vids, pcu_mbox, rc6_mask = 0;
3209 	u32 gtfifodbg;
3210 	int rc6_mode;
3211 	int i, ret;
3212 
3213 	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
3214 
3215 	/* Here begins a magic sequence of register writes to enable
3216 	 * auto-downclocking.
3217 	 *
3218 	 * Perhaps there might be some value in exposing these to
3219 	 * userspace...
3220 	 */
3221 	I915_WRITE(GEN6_RC_STATE, 0);
3222 
3223 	/* Clear the DBG now so we don't confuse earlier errors */
3224 	if ((gtfifodbg = I915_READ(GTFIFODBG))) {
3225 		DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
3226 		I915_WRITE(GTFIFODBG, gtfifodbg);
3227 	}
3228 
3229 	gen6_gt_force_wake_get(dev_priv);
3230 
3231 	rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
3232 	gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS);
3233 
3234 	/* In units of 50MHz */
3235 	dev_priv->rps.hw_max = dev_priv->rps.max_delay = rp_state_cap & 0xff;
3236 	dev_priv->rps.min_delay = (rp_state_cap & 0xff0000) >> 16;
3237 	dev_priv->rps.cur_delay = 0;
3238 
3239 	/* disable the counters and set deterministic thresholds */
3240 	I915_WRITE(GEN6_RC_CONTROL, 0);
3241 
3242 	I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
3243 	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
3244 	I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
3245 	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
3246 	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
3247 
3248 	for_each_ring(ring, dev_priv, i)
3249 		I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
3250 
3251 	I915_WRITE(GEN6_RC_SLEEP, 0);
3252 	I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
3253 	I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
3254 	I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
3255 	I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
3256 
3257 	/* Check if we are enabling RC6 */
3258 	rc6_mode = intel_enable_rc6(dev_priv->dev);
3259 	if (rc6_mode & INTEL_RC6_ENABLE)
3260 		rc6_mask |= GEN6_RC_CTL_RC6_ENABLE;
3261 
3262 	/* We don't use those on Haswell */
3263 	if (!IS_HASWELL(dev)) {
3264 		if (rc6_mode & INTEL_RC6p_ENABLE)
3265 			rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
3266 
3267 		if (rc6_mode & INTEL_RC6pp_ENABLE)
3268 			rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
3269 	}
3270 
3271 	DRM_INFO("Enabling RC6 states: RC6 %s, RC6p %s, RC6pp %s\n",
3272 			(rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off",
3273 			(rc6_mask & GEN6_RC_CTL_RC6p_ENABLE) ? "on" : "off",
3274 			(rc6_mask & GEN6_RC_CTL_RC6pp_ENABLE) ? "on" : "off");
3275 
3276 	I915_WRITE(GEN6_RC_CONTROL,
3277 		   rc6_mask |
3278 		   GEN6_RC_CTL_EI_MODE(1) |
3279 		   GEN6_RC_CTL_HW_ENABLE);
3280 
3281 	if (IS_HASWELL(dev)) {
3282 		I915_WRITE(GEN6_RPNSWREQ,
3283 			   HSW_FREQUENCY(10));
3284 		I915_WRITE(GEN6_RC_VIDEO_FREQ,
3285 			   HSW_FREQUENCY(12));
3286 	} else {
3287 		I915_WRITE(GEN6_RPNSWREQ,
3288 			   GEN6_FREQUENCY(10) |
3289 			   GEN6_OFFSET(0) |
3290 			   GEN6_AGGRESSIVE_TURBO);
3291 		I915_WRITE(GEN6_RC_VIDEO_FREQ,
3292 			   GEN6_FREQUENCY(12));
3293 	}
3294 
3295 	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
3296 	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
3297 		   dev_priv->rps.max_delay << 24 |
3298 		   dev_priv->rps.min_delay << 16);
3299 
3300 	I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
3301 	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
3302 	I915_WRITE(GEN6_RP_UP_EI, 66000);
3303 	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
3304 
3305 	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
3306 	I915_WRITE(GEN6_RP_CONTROL,
3307 		   GEN6_RP_MEDIA_TURBO |
3308 		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
3309 		   GEN6_RP_MEDIA_IS_GFX |
3310 		   GEN6_RP_ENABLE |
3311 		   GEN6_RP_UP_BUSY_AVG |
3312 		   (IS_HASWELL(dev) ? GEN7_RP_DOWN_IDLE_AVG : GEN6_RP_DOWN_IDLE_CONT));
3313 
3314 	ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 0);
3315 	if (!ret) {
3316 		pcu_mbox = 0;
3317 		ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox);
3318 		if (!ret && (pcu_mbox & (1<<31))) { /* OC supported */
3319 			DRM_DEBUG_DRIVER("Overclocking supported. Max: %dMHz, Overclock max: %dMHz\n",
3320 					 (dev_priv->rps.max_delay & 0xff) * 50,
3321 					 (pcu_mbox & 0xff) * 50);
3322 			dev_priv->rps.hw_max = pcu_mbox & 0xff;
3323 		}
3324 	} else {
3325 		DRM_DEBUG_DRIVER("Failed to set the min frequency\n");
3326 	}
3327 
3328 	gen6_set_rps(dev_priv->dev, (gt_perf_status & 0xff00) >> 8);
3329 
3330 	/* requires MSI enabled */
3331 	I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) | GEN6_PM_RPS_EVENTS);
3332 	lockmgr(&dev_priv->rps.lock, LK_EXCLUSIVE);
3333 	/* FIXME: Our interrupt enabling sequence is bonghits.
3334 	 * dev_priv->rps.pm_iir really should be 0 here. */
3335 	dev_priv->rps.pm_iir = 0;
3336 	I915_WRITE(GEN6_PMIMR, I915_READ(GEN6_PMIMR) & ~GEN6_PM_RPS_EVENTS);
3337 	I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
3338 	lockmgr(&dev_priv->rps.lock, LK_RELEASE);
3339 	/* unmask all PM interrupts */
3340 	I915_WRITE(GEN6_PMINTRMSK, 0);
3341 
3342 	rc6vids = 0;
3343 	ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
3344 	if (IS_GEN6(dev) && ret) {
3345 		DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
3346 	} else if (IS_GEN6(dev) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
3347 		DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
3348 			  GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
3349 		rc6vids &= 0xffff00;
3350 		rc6vids |= GEN6_ENCODE_RC6_VID(450);
3351 		ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
3352 		if (ret)
3353 			DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
3354 	}
3355 
3356 	gen6_gt_force_wake_put(dev_priv);
3357 }
3358 
3359 static void gen6_update_ring_freq(struct drm_device *dev)
3360 {
3361 	struct drm_i915_private *dev_priv = dev->dev_private;
3362 	int min_freq = 15;
3363 	unsigned int gpu_freq;
3364 	unsigned int max_ia_freq, min_ring_freq;
3365 	int scaling_factor = 180;
3366 
3367 	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
3368 
3369 #if 0
3370 	max_ia_freq = cpufreq_quick_get_max(0);
3371 	/*
3372 	 * Default to measured freq if none found, PCU will ensure we don't go
3373 	 * over
3374 	 */
3375 	if (!max_ia_freq)
3376 		max_ia_freq = tsc_khz;
3377 #else
3378 	max_ia_freq = tsc_frequency / 1000;
3379 #endif
3380 
3381 	/* Convert from kHz to MHz */
3382 	max_ia_freq /= 1000;
3383 
3384 	min_ring_freq = I915_READ(MCHBAR_MIRROR_BASE_SNB + DCLK);
3385 	/* convert DDR frequency from units of 133.3MHz to bandwidth */
3386 	min_ring_freq = (2 * 4 * min_ring_freq + 2) / 3;
3387 
3388 	/*
3389 	 * For each potential GPU frequency, load a ring frequency we'd like
3390 	 * to use for memory access.  We do this by specifying the IA frequency
3391 	 * the PCU should use as a reference to determine the ring frequency.
3392 	 */
3393 	for (gpu_freq = dev_priv->rps.max_delay; gpu_freq >= dev_priv->rps.min_delay;
3394 	     gpu_freq--) {
3395 		int diff = dev_priv->rps.max_delay - gpu_freq;
3396 		unsigned int ia_freq = 0, ring_freq = 0;
3397 
3398 		if (IS_HASWELL(dev)) {
3399 			ring_freq = (gpu_freq * 5 + 3) / 4;
3400 			ring_freq = max(min_ring_freq, ring_freq);
3401 			/* leave ia_freq as the default, chosen by cpufreq */
3402 		} else {
3403 			/* On older processors, there is no separate ring
3404 			 * clock domain, so in order to boost the bandwidth
3405 			 * of the ring, we need to upclock the CPU (ia_freq).
3406 			 *
3407 			 * For GPU frequencies less than 750MHz,
3408 			 * just use the lowest ring freq.
3409 			 */
3410 			if (gpu_freq < min_freq)
3411 				ia_freq = 800;
3412 			else
3413 				ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
3414 			ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
3415 		}
3416 
3417 		sandybridge_pcode_write(dev_priv,
3418 					GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
3419 					ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
3420 					ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
3421 					gpu_freq);
3422 	}
3423 }
3424 
3425 int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
3426 {
3427 	u32 val, rp0;
3428 
3429 	val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
3430 
3431 	rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
3432 	/* Clamp to max */
3433 	rp0 = min_t(u32, rp0, 0xea);
3434 
3435 	return rp0;
3436 }
3437 
3438 static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
3439 {
3440 	u32 val, rpe;
3441 
3442 	val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
3443 	rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
3444 	val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
3445 	rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
3446 
3447 	return rpe;
3448 }
3449 
3450 int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
3451 {
3452 	return vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff;
3453 }
3454 
3455 static void vlv_rps_timer_work(struct work_struct *work)
3456 {
3457 	drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
3458 						    rps.vlv_work.work);
3459 
3460 	/*
3461 	 * Timer fired, we must be idle.  Drop to min voltage state.
3462 	 * Note: we use RPe here since it should match the
3463 	 * Vmin we were shooting for.  That should give us better
3464 	 * perf when we come back out of RC6 than if we used the
3465 	 * min freq available.
3466 	 */
3467 	mutex_lock(&dev_priv->rps.hw_lock);
3468 	if (dev_priv->rps.cur_delay > dev_priv->rps.rpe_delay)
3469 		valleyview_set_rps(dev_priv->dev, dev_priv->rps.rpe_delay);
3470 	mutex_unlock(&dev_priv->rps.hw_lock);
3471 }
3472 
3473 static void valleyview_setup_pctx(struct drm_device *dev)
3474 {
3475 	struct drm_i915_private *dev_priv = dev->dev_private;
3476 	struct drm_i915_gem_object *pctx;
3477 	unsigned long pctx_paddr;
3478 	u32 pcbr;
3479 	int pctx_size = 24*1024;
3480 
3481 	pcbr = I915_READ(VLV_PCBR);
3482 	if (pcbr) {
3483 		/* BIOS set it up already, grab the pre-alloc'd space */
3484 		int pcbr_offset;
3485 
3486 		pcbr_offset = (pcbr & (~4095)) - dev_priv->mm.stolen_base;
3487 		pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv->dev,
3488 								      pcbr_offset,
3489 								      -1,
3490 								      pctx_size);
3491 		goto out;
3492 	}
3493 
3494 	/*
3495 	 * From the Gunit register HAS:
3496 	 * The Gfx driver is expected to program this register and ensure
3497 	 * proper allocation within Gfx stolen memory.  For example, this
3498 	 * register should be programmed such than the PCBR range does not
3499 	 * overlap with other ranges, such as the frame buffer, protected
3500 	 * memory, or any other relevant ranges.
3501 	 */
3502 	pctx = i915_gem_object_create_stolen(dev, pctx_size);
3503 	if (!pctx) {
3504 		DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
3505 		return;
3506 	}
3507 
3508 	pctx_paddr = dev_priv->mm.stolen_base + pctx->stolen->start;
3509 	I915_WRITE(VLV_PCBR, pctx_paddr);
3510 
3511 out:
3512 	dev_priv->vlv_pctx = pctx;
3513 }
3514 
3515 static void valleyview_enable_rps(struct drm_device *dev)
3516 {
3517 	struct drm_i915_private *dev_priv = dev->dev_private;
3518 	struct intel_ring_buffer *ring;
3519 	u32 gtfifodbg, val;
3520 	int i;
3521 
3522 	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
3523 
3524 	if ((gtfifodbg = I915_READ(GTFIFODBG))) {
3525 		DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
3526 		I915_WRITE(GTFIFODBG, gtfifodbg);
3527 	}
3528 
3529 	valleyview_setup_pctx(dev);
3530 
3531 	gen6_gt_force_wake_get(dev_priv);
3532 
3533 	I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
3534 	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
3535 	I915_WRITE(GEN6_RP_UP_EI, 66000);
3536 	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
3537 
3538 	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
3539 
3540 	I915_WRITE(GEN6_RP_CONTROL,
3541 		   GEN6_RP_MEDIA_TURBO |
3542 		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
3543 		   GEN6_RP_MEDIA_IS_GFX |
3544 		   GEN6_RP_ENABLE |
3545 		   GEN6_RP_UP_BUSY_AVG |
3546 		   GEN6_RP_DOWN_IDLE_CONT);
3547 
3548 	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
3549 	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
3550 	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
3551 
3552 	for_each_ring(ring, dev_priv, i)
3553 		I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
3554 
3555 	I915_WRITE(GEN6_RC6_THRESHOLD, 0xc350);
3556 
3557 	/* allows RC6 residency counter to work */
3558 	I915_WRITE(0x138104, _MASKED_BIT_ENABLE(0x3));
3559 	I915_WRITE(GEN6_RC_CONTROL,
3560 		   GEN7_RC_CTL_TO_MODE);
3561 
3562 	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
3563 	switch ((val >> 6) & 3) {
3564 	case 0:
3565 	case 1:
3566 		dev_priv->mem_freq = 800;
3567 		break;
3568 	case 2:
3569 		dev_priv->mem_freq = 1066;
3570 		break;
3571 	case 3:
3572 		dev_priv->mem_freq = 1333;
3573 		break;
3574 	}
3575 	DRM_DEBUG_DRIVER("DDR speed: %d MHz", dev_priv->mem_freq);
3576 
3577 	DRM_DEBUG_DRIVER("GPLL enabled? %s\n", val & 0x10 ? "yes" : "no");
3578 	DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
3579 
3580 	dev_priv->rps.cur_delay = (val >> 8) & 0xff;
3581 	DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n",
3582 			 vlv_gpu_freq(dev_priv->mem_freq,
3583 				      dev_priv->rps.cur_delay),
3584 			 dev_priv->rps.cur_delay);
3585 
3586 	dev_priv->rps.max_delay = valleyview_rps_max_freq(dev_priv);
3587 	dev_priv->rps.hw_max = dev_priv->rps.max_delay;
3588 	DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
3589 			 vlv_gpu_freq(dev_priv->mem_freq,
3590 				      dev_priv->rps.max_delay),
3591 			 dev_priv->rps.max_delay);
3592 
3593 	dev_priv->rps.rpe_delay = valleyview_rps_rpe_freq(dev_priv);
3594 	DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
3595 			 vlv_gpu_freq(dev_priv->mem_freq,
3596 				      dev_priv->rps.rpe_delay),
3597 			 dev_priv->rps.rpe_delay);
3598 
3599 	dev_priv->rps.min_delay = valleyview_rps_min_freq(dev_priv);
3600 	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
3601 			 vlv_gpu_freq(dev_priv->mem_freq,
3602 				      dev_priv->rps.min_delay),
3603 			 dev_priv->rps.min_delay);
3604 
3605 	DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n",
3606 			 vlv_gpu_freq(dev_priv->mem_freq,
3607 				      dev_priv->rps.rpe_delay),
3608 			 dev_priv->rps.rpe_delay);
3609 
3610 	INIT_DELAYED_WORK(&dev_priv->rps.vlv_work, vlv_rps_timer_work);
3611 
3612 	valleyview_set_rps(dev_priv->dev, dev_priv->rps.rpe_delay);
3613 
3614 	/* requires MSI enabled */
3615 	I915_WRITE(GEN6_PMIER, GEN6_PM_RPS_EVENTS);
3616 	lockmgr(&dev_priv->rps.lock, LK_EXCLUSIVE);
3617 	WARN_ON(dev_priv->rps.pm_iir != 0);
3618 	I915_WRITE(GEN6_PMIMR, 0);
3619 	lockmgr(&dev_priv->rps.lock, LK_RELEASE);
3620 	/* enable all PM interrupts */
3621 	I915_WRITE(GEN6_PMINTRMSK, 0);
3622 
3623 	gen6_gt_force_wake_put(dev_priv);
3624 }
3625 
3626 void ironlake_teardown_rc6(struct drm_device *dev)
3627 {
3628 	struct drm_i915_private *dev_priv = dev->dev_private;
3629 
3630 	if (dev_priv->ips.renderctx) {
3631 		i915_gem_object_unpin(dev_priv->ips.renderctx);
3632 		drm_gem_object_unreference(&dev_priv->ips.renderctx->base);
3633 		dev_priv->ips.renderctx = NULL;
3634 	}
3635 
3636 	if (dev_priv->ips.pwrctx) {
3637 		i915_gem_object_unpin(dev_priv->ips.pwrctx);
3638 		drm_gem_object_unreference(&dev_priv->ips.pwrctx->base);
3639 		dev_priv->ips.pwrctx = NULL;
3640 	}
3641 }
3642 
3643 static void ironlake_disable_rc6(struct drm_device *dev)
3644 {
3645 	struct drm_i915_private *dev_priv = dev->dev_private;
3646 
3647 	if (I915_READ(PWRCTXA)) {
3648 		/* Wake the GPU, prevent RC6, then restore RSTDBYCTL */
3649 		I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) | RCX_SW_EXIT);
3650 		wait_for(((I915_READ(RSTDBYCTL) & RSX_STATUS_MASK) == RSX_STATUS_ON),
3651 			 50);
3652 
3653 		I915_WRITE(PWRCTXA, 0);
3654 		POSTING_READ(PWRCTXA);
3655 
3656 		I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT);
3657 		POSTING_READ(RSTDBYCTL);
3658 	}
3659 }
3660 
3661 static int ironlake_setup_rc6(struct drm_device *dev)
3662 {
3663 	struct drm_i915_private *dev_priv = dev->dev_private;
3664 
3665 	if (dev_priv->ips.renderctx == NULL)
3666 		dev_priv->ips.renderctx = intel_alloc_context_page(dev);
3667 	if (!dev_priv->ips.renderctx)
3668 		return -ENOMEM;
3669 
3670 	if (dev_priv->ips.pwrctx == NULL)
3671 		dev_priv->ips.pwrctx = intel_alloc_context_page(dev);
3672 	if (!dev_priv->ips.pwrctx) {
3673 		ironlake_teardown_rc6(dev);
3674 		return -ENOMEM;
3675 	}
3676 
3677 	return 0;
3678 }
3679 
3680 static void ironlake_enable_rc6(struct drm_device *dev)
3681 {
3682 	struct drm_i915_private *dev_priv = dev->dev_private;
3683 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
3684 	bool was_interruptible;
3685 	int ret;
3686 
3687 	/* rc6 disabled by default due to repeated reports of hanging during
3688 	 * boot and resume.
3689 	 */
3690 	if (!intel_enable_rc6(dev))
3691 		return;
3692 
3693 	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
3694 
3695 	ret = ironlake_setup_rc6(dev);
3696 	if (ret)
3697 		return;
3698 
3699 	was_interruptible = dev_priv->mm.interruptible;
3700 	dev_priv->mm.interruptible = false;
3701 
3702 	/*
3703 	 * GPU can automatically power down the render unit if given a page
3704 	 * to save state.
3705 	 */
3706 	ret = intel_ring_begin(ring, 6);
3707 	if (ret) {
3708 		ironlake_teardown_rc6(dev);
3709 		dev_priv->mm.interruptible = was_interruptible;
3710 		return;
3711 	}
3712 
3713 	intel_ring_emit(ring, MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN);
3714 	intel_ring_emit(ring, MI_SET_CONTEXT);
3715 	intel_ring_emit(ring, dev_priv->ips.renderctx->gtt_offset |
3716 			MI_MM_SPACE_GTT |
3717 			MI_SAVE_EXT_STATE_EN |
3718 			MI_RESTORE_EXT_STATE_EN |
3719 			MI_RESTORE_INHIBIT);
3720 	intel_ring_emit(ring, MI_SUSPEND_FLUSH);
3721 	intel_ring_emit(ring, MI_NOOP);
3722 	intel_ring_emit(ring, MI_FLUSH);
3723 	intel_ring_advance(ring);
3724 
3725 	/*
3726 	 * Wait for the command parser to advance past MI_SET_CONTEXT. The HW
3727 	 * does an implicit flush, combined with MI_FLUSH above, it should be
3728 	 * safe to assume that renderctx is valid
3729 	 */
3730 	ret = intel_ring_idle(ring);
3731 	dev_priv->mm.interruptible = was_interruptible;
3732 	if (ret) {
3733 		DRM_ERROR("failed to enable ironlake power savings\n");
3734 		ironlake_teardown_rc6(dev);
3735 		return;
3736 	}
3737 
3738 	I915_WRITE(PWRCTXA, dev_priv->ips.pwrctx->gtt_offset | PWRCTX_EN);
3739 	I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT);
3740 }
3741 
3742 static unsigned long intel_pxfreq(u32 vidfreq)
3743 {
3744 	unsigned long freq;
3745 	int div = (vidfreq & 0x3f0000) >> 16;
3746 	int post = (vidfreq & 0x3000) >> 12;
3747 	int pre = (vidfreq & 0x7);
3748 
3749 	if (!pre)
3750 		return 0;
3751 
3752 	freq = ((div * 133333) / ((1<<post) * pre));
3753 
3754 	return freq;
3755 }
3756 
3757 static const struct cparams {
3758 	u16 i;
3759 	u16 t;
3760 	u16 m;
3761 	u16 c;
3762 } cparams[] = {
3763 	{ 1, 1333, 301, 28664 },
3764 	{ 1, 1066, 294, 24460 },
3765 	{ 1, 800, 294, 25192 },
3766 	{ 0, 1333, 276, 27605 },
3767 	{ 0, 1066, 276, 27605 },
3768 	{ 0, 800, 231, 23784 },
3769 };
3770 
3771 static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
3772 {
3773 	u64 total_count, diff, ret;
3774 	u32 count1, count2, count3, m = 0, c = 0;
3775 	unsigned long now = jiffies_to_msecs(jiffies), diff1;
3776 	int i;
3777 
3778 	diff1 = now - dev_priv->ips.last_time1;
3779 
3780 	/* Prevent division-by-zero if we are asking too fast.
3781 	 * Also, we don't get interesting results if we are polling
3782 	 * faster than once in 10ms, so just return the saved value
3783 	 * in such cases.
3784 	 */
3785 	if (diff1 <= 10)
3786 		return dev_priv->ips.chipset_power;
3787 
3788 	count1 = I915_READ(DMIEC);
3789 	count2 = I915_READ(DDREC);
3790 	count3 = I915_READ(CSIEC);
3791 
3792 	total_count = count1 + count2 + count3;
3793 
3794 	/* FIXME: handle per-counter overflow */
3795 	if (total_count < dev_priv->ips.last_count1) {
3796 		diff = ~0UL - dev_priv->ips.last_count1;
3797 		diff += total_count;
3798 	} else {
3799 		diff = total_count - dev_priv->ips.last_count1;
3800 	}
3801 
3802 	for (i = 0; i < ARRAY_SIZE(cparams); i++) {
3803 		if (cparams[i].i == dev_priv->ips.c_m &&
3804 		    cparams[i].t == dev_priv->ips.r_t) {
3805 			m = cparams[i].m;
3806 			c = cparams[i].c;
3807 			break;
3808 		}
3809 	}
3810 
3811 	diff = div_u64(diff, diff1);
3812 	ret = ((m * diff) + c);
3813 	ret = div_u64(ret, 10);
3814 
3815 	dev_priv->ips.last_count1 = total_count;
3816 	dev_priv->ips.last_time1 = now;
3817 
3818 	dev_priv->ips.chipset_power = ret;
3819 
3820 	return ret;
3821 }
3822 
3823 unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
3824 {
3825 	unsigned long val;
3826 
3827 	if (dev_priv->info->gen != 5)
3828 		return 0;
3829 
3830 	lockmgr(&mchdev_lock, LK_EXCLUSIVE);
3831 
3832 	val = __i915_chipset_val(dev_priv);
3833 
3834 	lockmgr(&mchdev_lock, LK_RELEASE);
3835 
3836 	return val;
3837 }
3838 
3839 unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
3840 {
3841 	unsigned long m, x, b;
3842 	u32 tsfs;
3843 
3844 	tsfs = I915_READ(TSFS);
3845 
3846 	m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
3847 	x = I915_READ8(TR1);
3848 
3849 	b = tsfs & TSFS_INTR_MASK;
3850 
3851 	return ((m * x) / 127) - b;
3852 }
3853 
3854 static u16 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
3855 {
3856 	static const struct v_table {
3857 		u16 vd; /* in .1 mil */
3858 		u16 vm; /* in .1 mil */
3859 	} v_table[] = {
3860 		{ 0, 0, },
3861 		{ 375, 0, },
3862 		{ 500, 0, },
3863 		{ 625, 0, },
3864 		{ 750, 0, },
3865 		{ 875, 0, },
3866 		{ 1000, 0, },
3867 		{ 1125, 0, },
3868 		{ 4125, 3000, },
3869 		{ 4125, 3000, },
3870 		{ 4125, 3000, },
3871 		{ 4125, 3000, },
3872 		{ 4125, 3000, },
3873 		{ 4125, 3000, },
3874 		{ 4125, 3000, },
3875 		{ 4125, 3000, },
3876 		{ 4125, 3000, },
3877 		{ 4125, 3000, },
3878 		{ 4125, 3000, },
3879 		{ 4125, 3000, },
3880 		{ 4125, 3000, },
3881 		{ 4125, 3000, },
3882 		{ 4125, 3000, },
3883 		{ 4125, 3000, },
3884 		{ 4125, 3000, },
3885 		{ 4125, 3000, },
3886 		{ 4125, 3000, },
3887 		{ 4125, 3000, },
3888 		{ 4125, 3000, },
3889 		{ 4125, 3000, },
3890 		{ 4125, 3000, },
3891 		{ 4125, 3000, },
3892 		{ 4250, 3125, },
3893 		{ 4375, 3250, },
3894 		{ 4500, 3375, },
3895 		{ 4625, 3500, },
3896 		{ 4750, 3625, },
3897 		{ 4875, 3750, },
3898 		{ 5000, 3875, },
3899 		{ 5125, 4000, },
3900 		{ 5250, 4125, },
3901 		{ 5375, 4250, },
3902 		{ 5500, 4375, },
3903 		{ 5625, 4500, },
3904 		{ 5750, 4625, },
3905 		{ 5875, 4750, },
3906 		{ 6000, 4875, },
3907 		{ 6125, 5000, },
3908 		{ 6250, 5125, },
3909 		{ 6375, 5250, },
3910 		{ 6500, 5375, },
3911 		{ 6625, 5500, },
3912 		{ 6750, 5625, },
3913 		{ 6875, 5750, },
3914 		{ 7000, 5875, },
3915 		{ 7125, 6000, },
3916 		{ 7250, 6125, },
3917 		{ 7375, 6250, },
3918 		{ 7500, 6375, },
3919 		{ 7625, 6500, },
3920 		{ 7750, 6625, },
3921 		{ 7875, 6750, },
3922 		{ 8000, 6875, },
3923 		{ 8125, 7000, },
3924 		{ 8250, 7125, },
3925 		{ 8375, 7250, },
3926 		{ 8500, 7375, },
3927 		{ 8625, 7500, },
3928 		{ 8750, 7625, },
3929 		{ 8875, 7750, },
3930 		{ 9000, 7875, },
3931 		{ 9125, 8000, },
3932 		{ 9250, 8125, },
3933 		{ 9375, 8250, },
3934 		{ 9500, 8375, },
3935 		{ 9625, 8500, },
3936 		{ 9750, 8625, },
3937 		{ 9875, 8750, },
3938 		{ 10000, 8875, },
3939 		{ 10125, 9000, },
3940 		{ 10250, 9125, },
3941 		{ 10375, 9250, },
3942 		{ 10500, 9375, },
3943 		{ 10625, 9500, },
3944 		{ 10750, 9625, },
3945 		{ 10875, 9750, },
3946 		{ 11000, 9875, },
3947 		{ 11125, 10000, },
3948 		{ 11250, 10125, },
3949 		{ 11375, 10250, },
3950 		{ 11500, 10375, },
3951 		{ 11625, 10500, },
3952 		{ 11750, 10625, },
3953 		{ 11875, 10750, },
3954 		{ 12000, 10875, },
3955 		{ 12125, 11000, },
3956 		{ 12250, 11125, },
3957 		{ 12375, 11250, },
3958 		{ 12500, 11375, },
3959 		{ 12625, 11500, },
3960 		{ 12750, 11625, },
3961 		{ 12875, 11750, },
3962 		{ 13000, 11875, },
3963 		{ 13125, 12000, },
3964 		{ 13250, 12125, },
3965 		{ 13375, 12250, },
3966 		{ 13500, 12375, },
3967 		{ 13625, 12500, },
3968 		{ 13750, 12625, },
3969 		{ 13875, 12750, },
3970 		{ 14000, 12875, },
3971 		{ 14125, 13000, },
3972 		{ 14250, 13125, },
3973 		{ 14375, 13250, },
3974 		{ 14500, 13375, },
3975 		{ 14625, 13500, },
3976 		{ 14750, 13625, },
3977 		{ 14875, 13750, },
3978 		{ 15000, 13875, },
3979 		{ 15125, 14000, },
3980 		{ 15250, 14125, },
3981 		{ 15375, 14250, },
3982 		{ 15500, 14375, },
3983 		{ 15625, 14500, },
3984 		{ 15750, 14625, },
3985 		{ 15875, 14750, },
3986 		{ 16000, 14875, },
3987 		{ 16125, 15000, },
3988 	};
3989 	if (dev_priv->info->is_mobile)
3990 		return v_table[pxvid].vm;
3991 	else
3992 		return v_table[pxvid].vd;
3993 }
3994 
3995 static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
3996 {
3997 	struct timespec now, diff1;
3998 	u64 diff;
3999 	unsigned long diffms;
4000 	u32 count;
4001 
4002 	getrawmonotonic(&now);
4003 	diff1 = timespec_sub(now, dev_priv->ips.last_time2);
4004 
4005 	/* Don't divide by 0 */
4006 	diffms = diff1.tv_sec * 1000 + diff1.tv_nsec / 1000000;
4007 	if (!diffms)
4008 		return;
4009 
4010 	count = I915_READ(GFXEC);
4011 
4012 	if (count < dev_priv->ips.last_count2) {
4013 		diff = ~0UL - dev_priv->ips.last_count2;
4014 		diff += count;
4015 	} else {
4016 		diff = count - dev_priv->ips.last_count2;
4017 	}
4018 
4019 	dev_priv->ips.last_count2 = count;
4020 	dev_priv->ips.last_time2 = now;
4021 
4022 	/* More magic constants... */
4023 	diff = diff * 1181;
4024 	diff = div_u64(diff, diffms * 10);
4025 	dev_priv->ips.gfx_power = diff;
4026 }
4027 
4028 void i915_update_gfx_val(struct drm_i915_private *dev_priv)
4029 {
4030 	if (dev_priv->info->gen != 5)
4031 		return;
4032 
4033 	lockmgr(&mchdev_lock, LK_EXCLUSIVE);
4034 
4035 	__i915_update_gfx_val(dev_priv);
4036 
4037 	lockmgr(&mchdev_lock, LK_RELEASE);
4038 }
4039 
4040 static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
4041 {
4042 	unsigned long t, corr, state1, corr2, state2;
4043 	u32 pxvid, ext_v;
4044 
4045 	pxvid = I915_READ(PXVFREQ_BASE + (dev_priv->rps.cur_delay * 4));
4046 	pxvid = (pxvid >> 24) & 0x7f;
4047 	ext_v = pvid_to_extvid(dev_priv, pxvid);
4048 
4049 	state1 = ext_v;
4050 
4051 	t = i915_mch_val(dev_priv);
4052 
4053 	/* Revel in the empirically derived constants */
4054 
4055 	/* Correction factor in 1/100000 units */
4056 	if (t > 80)
4057 		corr = ((t * 2349) + 135940);
4058 	else if (t >= 50)
4059 		corr = ((t * 964) + 29317);
4060 	else /* < 50 */
4061 		corr = ((t * 301) + 1004);
4062 
4063 	corr = corr * ((150142 * state1) / 10000 - 78642);
4064 	corr /= 100000;
4065 	corr2 = (corr * dev_priv->ips.corr);
4066 
4067 	state2 = (corr2 * state1) / 10000;
4068 	state2 /= 100; /* convert to mW */
4069 
4070 	__i915_update_gfx_val(dev_priv);
4071 
4072 	return dev_priv->ips.gfx_power + state2;
4073 }
4074 
4075 unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
4076 {
4077 	unsigned long val;
4078 
4079 	if (dev_priv->info->gen != 5)
4080 		return 0;
4081 
4082 	lockmgr(&mchdev_lock, LK_EXCLUSIVE);
4083 
4084 	val = __i915_gfx_val(dev_priv);
4085 
4086 	lockmgr(&mchdev_lock, LK_RELEASE);
4087 
4088 	return val;
4089 }
4090 
4091 /**
4092  * i915_read_mch_val - return value for IPS use
4093  *
4094  * Calculate and return a value for the IPS driver to use when deciding whether
4095  * we have thermal and power headroom to increase CPU or GPU power budget.
4096  */
4097 unsigned long i915_read_mch_val(void)
4098 {
4099 	struct drm_i915_private *dev_priv;
4100 	unsigned long chipset_val, graphics_val, ret = 0;
4101 
4102 	lockmgr(&mchdev_lock, LK_EXCLUSIVE);
4103 	if (!i915_mch_dev)
4104 		goto out_unlock;
4105 	dev_priv = i915_mch_dev;
4106 
4107 	chipset_val = __i915_chipset_val(dev_priv);
4108 	graphics_val = __i915_gfx_val(dev_priv);
4109 
4110 	ret = chipset_val + graphics_val;
4111 
4112 out_unlock:
4113 	lockmgr(&mchdev_lock, LK_RELEASE);
4114 
4115 	return ret;
4116 }
4117 
4118 /**
4119  * i915_gpu_raise - raise GPU frequency limit
4120  *
4121  * Raise the limit; IPS indicates we have thermal headroom.
4122  */
4123 bool i915_gpu_raise(void)
4124 {
4125 	struct drm_i915_private *dev_priv;
4126 	bool ret = true;
4127 
4128 	lockmgr(&mchdev_lock, LK_EXCLUSIVE);
4129 	if (!i915_mch_dev) {
4130 		ret = false;
4131 		goto out_unlock;
4132 	}
4133 	dev_priv = i915_mch_dev;
4134 
4135 	if (dev_priv->ips.max_delay > dev_priv->ips.fmax)
4136 		dev_priv->ips.max_delay--;
4137 
4138 out_unlock:
4139 	lockmgr(&mchdev_lock, LK_RELEASE);
4140 
4141 	return ret;
4142 }
4143 
4144 /**
4145  * i915_gpu_lower - lower GPU frequency limit
4146  *
4147  * IPS indicates we're close to a thermal limit, so throttle back the GPU
4148  * frequency maximum.
4149  */
4150 bool i915_gpu_lower(void)
4151 {
4152 	struct drm_i915_private *dev_priv;
4153 	bool ret = true;
4154 
4155 	lockmgr(&mchdev_lock, LK_EXCLUSIVE);
4156 	if (!i915_mch_dev) {
4157 		ret = false;
4158 		goto out_unlock;
4159 	}
4160 	dev_priv = i915_mch_dev;
4161 
4162 	if (dev_priv->ips.max_delay < dev_priv->ips.min_delay)
4163 		dev_priv->ips.max_delay++;
4164 
4165 out_unlock:
4166 	lockmgr(&mchdev_lock, LK_RELEASE);
4167 
4168 	return ret;
4169 }
4170 
4171 /**
4172  * i915_gpu_busy - indicate GPU business to IPS
4173  *
4174  * Tell the IPS driver whether or not the GPU is busy.
4175  */
4176 bool i915_gpu_busy(void)
4177 {
4178 	struct drm_i915_private *dev_priv;
4179 	struct intel_ring_buffer *ring;
4180 	bool ret = false;
4181 	int i;
4182 
4183 	lockmgr(&mchdev_lock, LK_EXCLUSIVE);
4184 	if (!i915_mch_dev)
4185 		goto out_unlock;
4186 	dev_priv = i915_mch_dev;
4187 
4188 	for_each_ring(ring, dev_priv, i)
4189 		ret |= !list_empty(&ring->request_list);
4190 
4191 out_unlock:
4192 	lockmgr(&mchdev_lock, LK_RELEASE);
4193 
4194 	return ret;
4195 }
4196 
4197 /**
4198  * i915_gpu_turbo_disable - disable graphics turbo
4199  *
4200  * Disable graphics turbo by resetting the max frequency and setting the
4201  * current frequency to the default.
4202  */
4203 bool i915_gpu_turbo_disable(void)
4204 {
4205 	struct drm_i915_private *dev_priv;
4206 	bool ret = true;
4207 
4208 	lockmgr(&mchdev_lock, LK_EXCLUSIVE);
4209 	if (!i915_mch_dev) {
4210 		ret = false;
4211 		goto out_unlock;
4212 	}
4213 	dev_priv = i915_mch_dev;
4214 
4215 	dev_priv->ips.max_delay = dev_priv->ips.fstart;
4216 
4217 	if (!ironlake_set_drps(dev_priv->dev, dev_priv->ips.fstart))
4218 		ret = false;
4219 
4220 out_unlock:
4221 	lockmgr(&mchdev_lock, LK_RELEASE);
4222 
4223 	return ret;
4224 }
4225 
4226 #if 0
4227 /**
4228  * Tells the intel_ips driver that the i915 driver is now loaded, if
4229  * IPS got loaded first.
4230  *
4231  * This awkward dance is so that neither module has to depend on the
4232  * other in order for IPS to do the appropriate communication of
4233  * GPU turbo limits to i915.
4234  */
4235 static void
4236 ips_ping_for_i915_load(void)
4237 {
4238 	void (*link)(void);
4239 
4240 	link = symbol_get(ips_link_to_i915_driver);
4241 	if (link) {
4242 		link();
4243 		symbol_put(ips_link_to_i915_driver);
4244 	}
4245 }
4246 #endif
4247 
4248 void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
4249 {
4250 	/* We only register the i915 ips part with intel-ips once everything is
4251 	 * set up, to avoid intel-ips sneaking in and reading bogus values. */
4252 	lockmgr(&mchdev_lock, LK_EXCLUSIVE);
4253 	i915_mch_dev = dev_priv;
4254 	lockmgr(&mchdev_lock, LK_RELEASE);
4255 }
4256 
4257 void intel_gpu_ips_teardown(void)
4258 {
4259 	lockmgr(&mchdev_lock, LK_EXCLUSIVE);
4260 	i915_mch_dev = NULL;
4261 	lockmgr(&mchdev_lock, LK_RELEASE);
4262 }
4263 static void intel_init_emon(struct drm_device *dev)
4264 {
4265 	struct drm_i915_private *dev_priv = dev->dev_private;
4266 	u32 lcfuse;
4267 	u8 pxw[16];
4268 	int i;
4269 
4270 	/* Disable to program */
4271 	I915_WRITE(ECR, 0);
4272 	POSTING_READ(ECR);
4273 
4274 	/* Program energy weights for various events */
4275 	I915_WRITE(SDEW, 0x15040d00);
4276 	I915_WRITE(CSIEW0, 0x007f0000);
4277 	I915_WRITE(CSIEW1, 0x1e220004);
4278 	I915_WRITE(CSIEW2, 0x04000004);
4279 
4280 	for (i = 0; i < 5; i++)
4281 		I915_WRITE(PEW + (i * 4), 0);
4282 	for (i = 0; i < 3; i++)
4283 		I915_WRITE(DEW + (i * 4), 0);
4284 
4285 	/* Program P-state weights to account for frequency power adjustment */
4286 	for (i = 0; i < 16; i++) {
4287 		u32 pxvidfreq = I915_READ(PXVFREQ_BASE + (i * 4));
4288 		unsigned long freq = intel_pxfreq(pxvidfreq);
4289 		unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
4290 			PXVFREQ_PX_SHIFT;
4291 		unsigned long val;
4292 
4293 		val = vid * vid;
4294 		val *= (freq / 1000);
4295 		val *= 255;
4296 		val /= (127*127*900);
4297 		if (val > 0xff)
4298 			DRM_ERROR("bad pxval: %ld\n", val);
4299 		pxw[i] = val;
4300 	}
4301 	/* Render standby states get 0 weight */
4302 	pxw[14] = 0;
4303 	pxw[15] = 0;
4304 
4305 	for (i = 0; i < 4; i++) {
4306 		u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
4307 			(pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
4308 		I915_WRITE(PXW + (i * 4), val);
4309 	}
4310 
4311 	/* Adjust magic regs to magic values (more experimental results) */
4312 	I915_WRITE(OGW0, 0);
4313 	I915_WRITE(OGW1, 0);
4314 	I915_WRITE(EG0, 0x00007f00);
4315 	I915_WRITE(EG1, 0x0000000e);
4316 	I915_WRITE(EG2, 0x000e0000);
4317 	I915_WRITE(EG3, 0x68000300);
4318 	I915_WRITE(EG4, 0x42000000);
4319 	I915_WRITE(EG5, 0x00140031);
4320 	I915_WRITE(EG6, 0);
4321 	I915_WRITE(EG7, 0);
4322 
4323 	for (i = 0; i < 8; i++)
4324 		I915_WRITE(PXWL + (i * 4), 0);
4325 
4326 	/* Enable PMON + select events */
4327 	I915_WRITE(ECR, 0x80000019);
4328 
4329 	lcfuse = I915_READ(LCFUSE02);
4330 
4331 	dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
4332 }
4333 
4334 void intel_disable_gt_powersave(struct drm_device *dev)
4335 {
4336 	struct drm_i915_private *dev_priv = dev->dev_private;
4337 
4338 	/* Interrupts should be disabled already to avoid re-arming. */
4339 	WARN_ON(dev->irq_enabled);
4340 
4341 	if (IS_IRONLAKE_M(dev)) {
4342 		ironlake_disable_drps(dev);
4343 		ironlake_disable_rc6(dev);
4344 	} else if (INTEL_INFO(dev)->gen >= 6) {
4345 		cancel_delayed_work_sync(&dev_priv->rps.delayed_resume_work);
4346 		cancel_work_sync(&dev_priv->rps.work);
4347 		if (IS_VALLEYVIEW(dev))
4348 			cancel_delayed_work_sync(&dev_priv->rps.vlv_work);
4349 		mutex_lock(&dev_priv->rps.hw_lock);
4350 		if (IS_VALLEYVIEW(dev))
4351 			valleyview_disable_rps(dev);
4352 		else
4353 			gen6_disable_rps(dev);
4354 		mutex_unlock(&dev_priv->rps.hw_lock);
4355 	}
4356 }
4357 
4358 static void intel_gen6_powersave_work(struct work_struct *work)
4359 {
4360 	struct drm_i915_private *dev_priv =
4361 		container_of(work, struct drm_i915_private,
4362 			     rps.delayed_resume_work.work);
4363 	struct drm_device *dev = dev_priv->dev;
4364 
4365 	mutex_lock(&dev_priv->rps.hw_lock);
4366 
4367 	if (IS_VALLEYVIEW(dev)) {
4368 		valleyview_enable_rps(dev);
4369 	} else {
4370 		gen6_enable_rps(dev);
4371 		gen6_update_ring_freq(dev);
4372 	}
4373 	mutex_unlock(&dev_priv->rps.hw_lock);
4374 }
4375 
4376 void intel_enable_gt_powersave(struct drm_device *dev)
4377 {
4378 	struct drm_i915_private *dev_priv = dev->dev_private;
4379 
4380 	if (IS_IRONLAKE_M(dev)) {
4381 		ironlake_enable_drps(dev);
4382 		ironlake_enable_rc6(dev);
4383 		intel_init_emon(dev);
4384 	} else if (IS_GEN6(dev) || IS_GEN7(dev)) {
4385 		/*
4386 		 * PCU communication is slow and this doesn't need to be
4387 		 * done at any specific time, so do this out of our fast path
4388 		 * to make resume and init faster.
4389 		 */
4390 		schedule_delayed_work(&dev_priv->rps.delayed_resume_work,
4391 				      round_jiffies_up_relative(HZ));
4392 	}
4393 }
4394 
4395 static void ibx_init_clock_gating(struct drm_device *dev)
4396 {
4397 	struct drm_i915_private *dev_priv = dev->dev_private;
4398 
4399 	/*
4400 	 * On Ibex Peak and Cougar Point, we need to disable clock
4401 	 * gating for the panel power sequencer or it will fail to
4402 	 * start up when no ports are active.
4403 	 */
4404 	I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
4405 }
4406 
4407 static void g4x_disable_trickle_feed(struct drm_device *dev)
4408 {
4409 	struct drm_i915_private *dev_priv = dev->dev_private;
4410 	int pipe;
4411 
4412 	for_each_pipe(pipe) {
4413 		I915_WRITE(DSPCNTR(pipe),
4414 			   I915_READ(DSPCNTR(pipe)) |
4415 			   DISPPLANE_TRICKLE_FEED_DISABLE);
4416 		intel_flush_display_plane(dev_priv, pipe);
4417 	}
4418 }
4419 
4420 static void ironlake_init_clock_gating(struct drm_device *dev)
4421 {
4422 	struct drm_i915_private *dev_priv = dev->dev_private;
4423 	uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
4424 
4425 	/* Required for FBC */
4426 	dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
4427 		   ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
4428 		   ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
4429 
4430 	I915_WRITE(PCH_3DCGDIS0,
4431 		   MARIUNIT_CLOCK_GATE_DISABLE |
4432 		   SVSMUNIT_CLOCK_GATE_DISABLE);
4433 	I915_WRITE(PCH_3DCGDIS1,
4434 		   VFMUNIT_CLOCK_GATE_DISABLE);
4435 
4436 	/*
4437 	 * According to the spec the following bits should be set in
4438 	 * order to enable memory self-refresh
4439 	 * The bit 22/21 of 0x42004
4440 	 * The bit 5 of 0x42020
4441 	 * The bit 15 of 0x45000
4442 	 */
4443 	I915_WRITE(ILK_DISPLAY_CHICKEN2,
4444 		   (I915_READ(ILK_DISPLAY_CHICKEN2) |
4445 		    ILK_DPARB_GATE | ILK_VSDPFD_FULL));
4446 	dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
4447 	I915_WRITE(DISP_ARB_CTL,
4448 		   (I915_READ(DISP_ARB_CTL) |
4449 		    DISP_FBC_WM_DIS));
4450 	I915_WRITE(WM3_LP_ILK, 0);
4451 	I915_WRITE(WM2_LP_ILK, 0);
4452 	I915_WRITE(WM1_LP_ILK, 0);
4453 
4454 	/*
4455 	 * Based on the document from hardware guys the following bits
4456 	 * should be set unconditionally in order to enable FBC.
4457 	 * The bit 22 of 0x42000
4458 	 * The bit 22 of 0x42004
4459 	 * The bit 7,8,9 of 0x42020.
4460 	 */
4461 	if (IS_IRONLAKE_M(dev)) {
4462 		I915_WRITE(ILK_DISPLAY_CHICKEN1,
4463 			   I915_READ(ILK_DISPLAY_CHICKEN1) |
4464 			   ILK_FBCQ_DIS);
4465 		I915_WRITE(ILK_DISPLAY_CHICKEN2,
4466 			   I915_READ(ILK_DISPLAY_CHICKEN2) |
4467 			   ILK_DPARB_GATE);
4468 	}
4469 
4470 	I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
4471 
4472 	I915_WRITE(ILK_DISPLAY_CHICKEN2,
4473 		   I915_READ(ILK_DISPLAY_CHICKEN2) |
4474 		   ILK_ELPIN_409_SELECT);
4475 	I915_WRITE(_3D_CHICKEN2,
4476 		   _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
4477 		   _3D_CHICKEN2_WM_READ_PIPELINED);
4478 
4479 	/* WaDisableRenderCachePipelinedFlush:ilk */
4480 	I915_WRITE(CACHE_MODE_0,
4481 		   _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
4482 
4483 	g4x_disable_trickle_feed(dev);
4484 
4485 	ibx_init_clock_gating(dev);
4486 }
4487 
4488 static void cpt_init_clock_gating(struct drm_device *dev)
4489 {
4490 	struct drm_i915_private *dev_priv = dev->dev_private;
4491 	int pipe;
4492 	uint32_t val;
4493 
4494 	/*
4495 	 * On Ibex Peak and Cougar Point, we need to disable clock
4496 	 * gating for the panel power sequencer or it will fail to
4497 	 * start up when no ports are active.
4498 	 */
4499 	I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
4500 	I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
4501 		   DPLS_EDP_PPS_FIX_DIS);
4502 	/* The below fixes the weird display corruption, a few pixels shifted
4503 	 * downward, on (only) LVDS of some HP laptops with IVY.
4504 	 */
4505 	for_each_pipe(pipe) {
4506 		val = I915_READ(TRANS_CHICKEN2(pipe));
4507 		val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
4508 		val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
4509 		if (dev_priv->vbt.fdi_rx_polarity_inverted)
4510 			val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
4511 		val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
4512 		val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
4513 		val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
4514 		I915_WRITE(TRANS_CHICKEN2(pipe), val);
4515 	}
4516 	/* WADP0ClockGatingDisable */
4517 	for_each_pipe(pipe) {
4518 		I915_WRITE(TRANS_CHICKEN1(pipe),
4519 			   TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
4520 	}
4521 }
4522 
4523 static void gen6_check_mch_setup(struct drm_device *dev)
4524 {
4525 	struct drm_i915_private *dev_priv = dev->dev_private;
4526 	uint32_t tmp;
4527 
4528 	tmp = I915_READ(MCH_SSKPD);
4529 	if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL) {
4530 		DRM_INFO("Wrong MCH_SSKPD value: 0x%08x\n", tmp);
4531 		DRM_INFO("This can cause pipe underruns and display issues.\n");
4532 		DRM_INFO("Please upgrade your BIOS to fix this.\n");
4533 	}
4534 }
4535 
4536 static void gen6_init_clock_gating(struct drm_device *dev)
4537 {
4538 	struct drm_i915_private *dev_priv = dev->dev_private;
4539 	uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
4540 
4541 	I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
4542 
4543 	I915_WRITE(ILK_DISPLAY_CHICKEN2,
4544 		   I915_READ(ILK_DISPLAY_CHICKEN2) |
4545 		   ILK_ELPIN_409_SELECT);
4546 
4547 	/* WaDisableHiZPlanesWhenMSAAEnabled:snb */
4548 	I915_WRITE(_3D_CHICKEN,
4549 		   _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
4550 
4551 	/* WaSetupGtModeTdRowDispatch:snb */
4552 	if (IS_SNB_GT1(dev))
4553 		I915_WRITE(GEN6_GT_MODE,
4554 			   _MASKED_BIT_ENABLE(GEN6_TD_FOUR_ROW_DISPATCH_DISABLE));
4555 
4556 	I915_WRITE(WM3_LP_ILK, 0);
4557 	I915_WRITE(WM2_LP_ILK, 0);
4558 	I915_WRITE(WM1_LP_ILK, 0);
4559 
4560 	I915_WRITE(CACHE_MODE_0,
4561 		   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
4562 
4563 	I915_WRITE(GEN6_UCGCTL1,
4564 		   I915_READ(GEN6_UCGCTL1) |
4565 		   GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
4566 		   GEN6_CSUNIT_CLOCK_GATE_DISABLE);
4567 
4568 	/* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
4569 	 * gating disable must be set.  Failure to set it results in
4570 	 * flickering pixels due to Z write ordering failures after
4571 	 * some amount of runtime in the Mesa "fire" demo, and Unigine
4572 	 * Sanctuary and Tropics, and apparently anything else with
4573 	 * alpha test or pixel discard.
4574 	 *
4575 	 * According to the spec, bit 11 (RCCUNIT) must also be set,
4576 	 * but we didn't debug actual testcases to find it out.
4577 	 *
4578 	 * Also apply WaDisableVDSUnitClockGating:snb and
4579 	 * WaDisableRCPBUnitClockGating:snb.
4580 	 */
4581 	I915_WRITE(GEN6_UCGCTL2,
4582 		   GEN7_VDSUNIT_CLOCK_GATE_DISABLE |
4583 		   GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
4584 		   GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
4585 
4586 	/* Bspec says we need to always set all mask bits. */
4587 	I915_WRITE(_3D_CHICKEN3, (0xFFFF << 16) |
4588 		   _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL);
4589 
4590 	/*
4591 	 * According to the spec the following bits should be
4592 	 * set in order to enable memory self-refresh and fbc:
4593 	 * The bit21 and bit22 of 0x42000
4594 	 * The bit21 and bit22 of 0x42004
4595 	 * The bit5 and bit7 of 0x42020
4596 	 * The bit14 of 0x70180
4597 	 * The bit14 of 0x71180
4598 	 */
4599 	I915_WRITE(ILK_DISPLAY_CHICKEN1,
4600 		   I915_READ(ILK_DISPLAY_CHICKEN1) |
4601 		   ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
4602 	I915_WRITE(ILK_DISPLAY_CHICKEN2,
4603 		   I915_READ(ILK_DISPLAY_CHICKEN2) |
4604 		   ILK_DPARB_GATE | ILK_VSDPFD_FULL);
4605 	I915_WRITE(ILK_DSPCLK_GATE_D,
4606 		   I915_READ(ILK_DSPCLK_GATE_D) |
4607 		   ILK_DPARBUNIT_CLOCK_GATE_ENABLE  |
4608 		   ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
4609 
4610 	/* WaMbcDriverBootEnable:snb */
4611 	I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) |
4612 		   GEN6_MBCTL_ENABLE_BOOT_FETCH);
4613 
4614 	g4x_disable_trickle_feed(dev);
4615 
4616 	/* The default value should be 0x200 according to docs, but the two
4617 	 * platforms I checked have a 0 for this. (Maybe BIOS overrides?) */
4618 	I915_WRITE(GEN6_GT_MODE, _MASKED_BIT_DISABLE(0xffff));
4619 	I915_WRITE(GEN6_GT_MODE, _MASKED_BIT_ENABLE(GEN6_GT_MODE_HI));
4620 
4621 	cpt_init_clock_gating(dev);
4622 
4623 	gen6_check_mch_setup(dev);
4624 }
4625 
4626 static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
4627 {
4628 	uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE);
4629 
4630 	reg &= ~GEN7_FF_SCHED_MASK;
4631 	reg |= GEN7_FF_TS_SCHED_HW;
4632 	reg |= GEN7_FF_VS_SCHED_HW;
4633 	reg |= GEN7_FF_DS_SCHED_HW;
4634 
4635 	if (IS_HASWELL(dev_priv->dev))
4636 		reg &= ~GEN7_FF_VS_REF_CNT_FFME;
4637 
4638 	I915_WRITE(GEN7_FF_THREAD_MODE, reg);
4639 }
4640 
4641 static void lpt_init_clock_gating(struct drm_device *dev)
4642 {
4643 	struct drm_i915_private *dev_priv = dev->dev_private;
4644 
4645 	/*
4646 	 * TODO: this bit should only be enabled when really needed, then
4647 	 * disabled when not needed anymore in order to save power.
4648 	 */
4649 	if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE)
4650 		I915_WRITE(SOUTH_DSPCLK_GATE_D,
4651 			   I915_READ(SOUTH_DSPCLK_GATE_D) |
4652 			   PCH_LP_PARTITION_LEVEL_DISABLE);
4653 
4654 	/* WADPOClockGatingDisable:hsw */
4655 	I915_WRITE(_TRANSA_CHICKEN1,
4656 		   I915_READ(_TRANSA_CHICKEN1) |
4657 		   TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
4658 }
4659 
4660 static void lpt_suspend_hw(struct drm_device *dev)
4661 {
4662 	struct drm_i915_private *dev_priv = dev->dev_private;
4663 
4664 	if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) {
4665 		uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D);
4666 
4667 		val &= ~PCH_LP_PARTITION_LEVEL_DISABLE;
4668 		I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
4669 	}
4670 }
4671 
4672 static void haswell_init_clock_gating(struct drm_device *dev)
4673 {
4674 	struct drm_i915_private *dev_priv = dev->dev_private;
4675 
4676 	I915_WRITE(WM3_LP_ILK, 0);
4677 	I915_WRITE(WM2_LP_ILK, 0);
4678 	I915_WRITE(WM1_LP_ILK, 0);
4679 
4680 	/* According to the spec, bit 13 (RCZUNIT) must be set on IVB.
4681 	 * This implements the WaDisableRCZUnitClockGating:hsw workaround.
4682 	 */
4683 	I915_WRITE(GEN6_UCGCTL2, GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
4684 
4685 	/* Apply the WaDisableRHWOOptimizationForRenderHang:hsw workaround. */
4686 	I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
4687 		   GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
4688 
4689 	/* WaApplyL3ControlAndL3ChickenMode:hsw */
4690 	I915_WRITE(GEN7_L3CNTLREG1,
4691 			GEN7_WA_FOR_GEN7_L3_CONTROL);
4692 	I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
4693 			GEN7_WA_L3_CHICKEN_MODE);
4694 
4695 	/* This is required by WaCatErrorRejectionIssue:hsw */
4696 	I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
4697 			I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
4698 			GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
4699 
4700 	g4x_disable_trickle_feed(dev);
4701 
4702 	/* WaVSRefCountFullforceMissDisable:hsw */
4703 	gen7_setup_fixed_func_scheduler(dev_priv);
4704 
4705 	/* WaDisable4x2SubspanOptimization:hsw */
4706 	I915_WRITE(CACHE_MODE_1,
4707 		   _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
4708 
4709 	/* WaMbcDriverBootEnable:hsw */
4710 	I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) |
4711 		   GEN6_MBCTL_ENABLE_BOOT_FETCH);
4712 
4713 	/* WaSwitchSolVfFArbitrationPriority:hsw */
4714 	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
4715 
4716 	/* WaRsPkgCStateDisplayPMReq:hsw */
4717 	I915_WRITE(CHICKEN_PAR1_1,
4718 		   I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES);
4719 
4720 	lpt_init_clock_gating(dev);
4721 }
4722 
4723 static void ivybridge_init_clock_gating(struct drm_device *dev)
4724 {
4725 	struct drm_i915_private *dev_priv = dev->dev_private;
4726 	uint32_t snpcr;
4727 
4728 	I915_WRITE(WM3_LP_ILK, 0);
4729 	I915_WRITE(WM2_LP_ILK, 0);
4730 	I915_WRITE(WM1_LP_ILK, 0);
4731 
4732 	I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
4733 
4734 	/* WaDisableEarlyCull:ivb */
4735 	I915_WRITE(_3D_CHICKEN3,
4736 		   _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
4737 
4738 	/* WaDisableBackToBackFlipFix:ivb */
4739 	I915_WRITE(IVB_CHICKEN3,
4740 		   CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
4741 		   CHICKEN3_DGMG_DONE_FIX_DISABLE);
4742 
4743 	/* WaDisablePSDDualDispatchEnable:ivb */
4744 	if (IS_IVB_GT1(dev))
4745 		I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
4746 			   _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
4747 	else
4748 		I915_WRITE(GEN7_HALF_SLICE_CHICKEN1_GT2,
4749 			   _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
4750 
4751 	/* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
4752 	I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
4753 		   GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
4754 
4755 	/* WaApplyL3ControlAndL3ChickenMode:ivb */
4756 	I915_WRITE(GEN7_L3CNTLREG1,
4757 			GEN7_WA_FOR_GEN7_L3_CONTROL);
4758 	I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
4759 		   GEN7_WA_L3_CHICKEN_MODE);
4760 	if (IS_IVB_GT1(dev))
4761 		I915_WRITE(GEN7_ROW_CHICKEN2,
4762 			   _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
4763 	else
4764 		I915_WRITE(GEN7_ROW_CHICKEN2_GT2,
4765 			   _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
4766 
4767 
4768 	/* WaForceL3Serialization:ivb */
4769 	I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
4770 		   ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
4771 
4772 	/* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
4773 	 * gating disable must be set.  Failure to set it results in
4774 	 * flickering pixels due to Z write ordering failures after
4775 	 * some amount of runtime in the Mesa "fire" demo, and Unigine
4776 	 * Sanctuary and Tropics, and apparently anything else with
4777 	 * alpha test or pixel discard.
4778 	 *
4779 	 * According to the spec, bit 11 (RCCUNIT) must also be set,
4780 	 * but we didn't debug actual testcases to find it out.
4781 	 *
4782 	 * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
4783 	 * This implements the WaDisableRCZUnitClockGating:ivb workaround.
4784 	 */
4785 	I915_WRITE(GEN6_UCGCTL2,
4786 		   GEN6_RCZUNIT_CLOCK_GATE_DISABLE |
4787 		   GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
4788 
4789 	/* This is required by WaCatErrorRejectionIssue:ivb */
4790 	I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
4791 			I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
4792 			GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
4793 
4794 	g4x_disable_trickle_feed(dev);
4795 
4796 	/* WaMbcDriverBootEnable:ivb */
4797 	I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) |
4798 		   GEN6_MBCTL_ENABLE_BOOT_FETCH);
4799 
4800 	/* WaVSRefCountFullforceMissDisable:ivb */
4801 	gen7_setup_fixed_func_scheduler(dev_priv);
4802 
4803 	/* WaDisable4x2SubspanOptimization:ivb */
4804 	I915_WRITE(CACHE_MODE_1,
4805 		   _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
4806 
4807 	snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
4808 	snpcr &= ~GEN6_MBC_SNPCR_MASK;
4809 	snpcr |= GEN6_MBC_SNPCR_MED;
4810 	I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
4811 
4812 	if (!HAS_PCH_NOP(dev))
4813 		cpt_init_clock_gating(dev);
4814 
4815 	gen6_check_mch_setup(dev);
4816 }
4817 
4818 static void valleyview_init_clock_gating(struct drm_device *dev)
4819 {
4820 	struct drm_i915_private *dev_priv = dev->dev_private;
4821 
4822 	I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE);
4823 
4824 	/* WaDisableEarlyCull:vlv */
4825 	I915_WRITE(_3D_CHICKEN3,
4826 		   _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
4827 
4828 	/* WaDisableBackToBackFlipFix:vlv */
4829 	I915_WRITE(IVB_CHICKEN3,
4830 		   CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
4831 		   CHICKEN3_DGMG_DONE_FIX_DISABLE);
4832 
4833 	/* WaDisablePSDDualDispatchEnable:vlv */
4834 	I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
4835 		   _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP |
4836 				      GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
4837 
4838 	/* Apply the WaDisableRHWOOptimizationForRenderHang:vlv workaround. */
4839 	I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
4840 		   GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
4841 
4842 	/* WaApplyL3ControlAndL3ChickenMode:vlv */
4843 	I915_WRITE(GEN7_L3CNTLREG1, I915_READ(GEN7_L3CNTLREG1) | GEN7_L3AGDIS);
4844 	I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE);
4845 
4846 	/* WaForceL3Serialization:vlv */
4847 	I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
4848 		   ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
4849 
4850 	/* WaDisableDopClockGating:vlv */
4851 	I915_WRITE(GEN7_ROW_CHICKEN2,
4852 		   _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
4853 
4854 	/* This is required by WaCatErrorRejectionIssue:vlv */
4855 	I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
4856 		   I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
4857 		   GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
4858 
4859 	/* WaMbcDriverBootEnable:vlv */
4860 	I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) |
4861 		   GEN6_MBCTL_ENABLE_BOOT_FETCH);
4862 
4863 
4864 	/* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
4865 	 * gating disable must be set.  Failure to set it results in
4866 	 * flickering pixels due to Z write ordering failures after
4867 	 * some amount of runtime in the Mesa "fire" demo, and Unigine
4868 	 * Sanctuary and Tropics, and apparently anything else with
4869 	 * alpha test or pixel discard.
4870 	 *
4871 	 * According to the spec, bit 11 (RCCUNIT) must also be set,
4872 	 * but we didn't debug actual testcases to find it out.
4873 	 *
4874 	 * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
4875 	 * This implements the WaDisableRCZUnitClockGating:vlv workaround.
4876 	 *
4877 	 * Also apply WaDisableVDSUnitClockGating:vlv and
4878 	 * WaDisableRCPBUnitClockGating:vlv.
4879 	 */
4880 	I915_WRITE(GEN6_UCGCTL2,
4881 		   GEN7_VDSUNIT_CLOCK_GATE_DISABLE |
4882 		   GEN7_TDLUNIT_CLOCK_GATE_DISABLE |
4883 		   GEN6_RCZUNIT_CLOCK_GATE_DISABLE |
4884 		   GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
4885 		   GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
4886 
4887 	I915_WRITE(GEN7_UCGCTL4, GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
4888 
4889 	I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE);
4890 
4891 	I915_WRITE(CACHE_MODE_1,
4892 		   _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
4893 
4894 	/*
4895 	 * WaDisableVLVClockGating_VBIIssue:vlv
4896 	 * Disable clock gating on th GCFG unit to prevent a delay
4897 	 * in the reporting of vblank events.
4898 	 */
4899 	I915_WRITE(VLV_GUNIT_CLOCK_GATE, 0xffffffff);
4900 
4901 	/* Conservative clock gating settings for now */
4902 	I915_WRITE(0x9400, 0xffffffff);
4903 	I915_WRITE(0x9404, 0xffffffff);
4904 	I915_WRITE(0x9408, 0xffffffff);
4905 	I915_WRITE(0x940c, 0xffffffff);
4906 	I915_WRITE(0x9410, 0xffffffff);
4907 	I915_WRITE(0x9414, 0xffffffff);
4908 	I915_WRITE(0x9418, 0xffffffff);
4909 }
4910 
4911 static void g4x_init_clock_gating(struct drm_device *dev)
4912 {
4913 	struct drm_i915_private *dev_priv = dev->dev_private;
4914 	uint32_t dspclk_gate;
4915 
4916 	I915_WRITE(RENCLK_GATE_D1, 0);
4917 	I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE |
4918 		   GS_UNIT_CLOCK_GATE_DISABLE |
4919 		   CL_UNIT_CLOCK_GATE_DISABLE);
4920 	I915_WRITE(RAMCLK_GATE_D, 0);
4921 	dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE |
4922 		OVRUNIT_CLOCK_GATE_DISABLE |
4923 		OVCUNIT_CLOCK_GATE_DISABLE;
4924 	if (IS_GM45(dev))
4925 		dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE;
4926 	I915_WRITE(DSPCLK_GATE_D, dspclk_gate);
4927 
4928 	/* WaDisableRenderCachePipelinedFlush */
4929 	I915_WRITE(CACHE_MODE_0,
4930 		   _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
4931 
4932 	g4x_disable_trickle_feed(dev);
4933 }
4934 
4935 static void crestline_init_clock_gating(struct drm_device *dev)
4936 {
4937 	struct drm_i915_private *dev_priv = dev->dev_private;
4938 
4939 	I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE);
4940 	I915_WRITE(RENCLK_GATE_D2, 0);
4941 	I915_WRITE(DSPCLK_GATE_D, 0);
4942 	I915_WRITE(RAMCLK_GATE_D, 0);
4943 	I915_WRITE16(DEUC, 0);
4944 	I915_WRITE(MI_ARB_STATE,
4945 		   _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
4946 }
4947 
4948 static void broadwater_init_clock_gating(struct drm_device *dev)
4949 {
4950 	struct drm_i915_private *dev_priv = dev->dev_private;
4951 
4952 	I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE |
4953 		   I965_RCC_CLOCK_GATE_DISABLE |
4954 		   I965_RCPB_CLOCK_GATE_DISABLE |
4955 		   I965_ISC_CLOCK_GATE_DISABLE |
4956 		   I965_FBC_CLOCK_GATE_DISABLE);
4957 	I915_WRITE(RENCLK_GATE_D2, 0);
4958 	I915_WRITE(MI_ARB_STATE,
4959 		   _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
4960 }
4961 
4962 static void gen3_init_clock_gating(struct drm_device *dev)
4963 {
4964 	struct drm_i915_private *dev_priv = dev->dev_private;
4965 	u32 dstate = I915_READ(D_STATE);
4966 
4967 	dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING |
4968 		DSTATE_DOT_CLOCK_GATING;
4969 	I915_WRITE(D_STATE, dstate);
4970 
4971 	if (IS_PINEVIEW(dev))
4972 		I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY));
4973 
4974 	/* IIR "flip pending" means done if this bit is set */
4975 	I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE));
4976 }
4977 
4978 static void i85x_init_clock_gating(struct drm_device *dev)
4979 {
4980 	struct drm_i915_private *dev_priv = dev->dev_private;
4981 
4982 	I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE);
4983 }
4984 
4985 static void i830_init_clock_gating(struct drm_device *dev)
4986 {
4987 	struct drm_i915_private *dev_priv = dev->dev_private;
4988 
4989 	I915_WRITE(DSPCLK_GATE_D, OVRUNIT_CLOCK_GATE_DISABLE);
4990 }
4991 
4992 void intel_init_clock_gating(struct drm_device *dev)
4993 {
4994 	struct drm_i915_private *dev_priv = dev->dev_private;
4995 
4996 	dev_priv->display.init_clock_gating(dev);
4997 }
4998 
4999 void intel_suspend_hw(struct drm_device *dev)
5000 {
5001 	if (HAS_PCH_LPT(dev))
5002 		lpt_suspend_hw(dev);
5003 }
5004 
5005 /**
5006  * We should only use the power well if we explicitly asked the hardware to
5007  * enable it, so check if it's enabled and also check if we've requested it to
5008  * be enabled.
5009  */
5010 bool intel_display_power_enabled(struct drm_device *dev,
5011 				 enum intel_display_power_domain domain)
5012 {
5013 	struct drm_i915_private *dev_priv = dev->dev_private;
5014 
5015 	if (!HAS_POWER_WELL(dev))
5016 		return true;
5017 
5018 	switch (domain) {
5019 	case POWER_DOMAIN_PIPE_A:
5020 	case POWER_DOMAIN_TRANSCODER_EDP:
5021 		return true;
5022 	case POWER_DOMAIN_PIPE_B:
5023 	case POWER_DOMAIN_PIPE_C:
5024 	case POWER_DOMAIN_PIPE_A_PANEL_FITTER:
5025 	case POWER_DOMAIN_PIPE_B_PANEL_FITTER:
5026 	case POWER_DOMAIN_PIPE_C_PANEL_FITTER:
5027 	case POWER_DOMAIN_TRANSCODER_A:
5028 	case POWER_DOMAIN_TRANSCODER_B:
5029 	case POWER_DOMAIN_TRANSCODER_C:
5030 		return I915_READ(HSW_PWR_WELL_DRIVER) ==
5031 		       (HSW_PWR_WELL_ENABLE | HSW_PWR_WELL_STATE);
5032 	default:
5033 		BUG();
5034 	}
5035 }
5036 
5037 static void __intel_set_power_well(struct drm_device *dev, bool enable)
5038 {
5039 	struct drm_i915_private *dev_priv = dev->dev_private;
5040 	bool is_enabled, enable_requested;
5041 	uint32_t tmp;
5042 
5043 	tmp = I915_READ(HSW_PWR_WELL_DRIVER);
5044 	is_enabled = tmp & HSW_PWR_WELL_STATE;
5045 	enable_requested = tmp & HSW_PWR_WELL_ENABLE;
5046 
5047 	if (enable) {
5048 		if (!enable_requested)
5049 			I915_WRITE(HSW_PWR_WELL_DRIVER, HSW_PWR_WELL_ENABLE);
5050 
5051 		if (!is_enabled) {
5052 			DRM_DEBUG_KMS("Enabling power well\n");
5053 			if (wait_for((I915_READ(HSW_PWR_WELL_DRIVER) &
5054 				      HSW_PWR_WELL_STATE), 20))
5055 				DRM_ERROR("Timeout enabling power well\n");
5056 		}
5057 	} else {
5058 		if (enable_requested) {
5059 			enum i915_pipe p;
5060 
5061 			I915_WRITE(HSW_PWR_WELL_DRIVER, 0);
5062 			POSTING_READ(HSW_PWR_WELL_DRIVER);
5063 			DRM_DEBUG_KMS("Requesting to disable the power well\n");
5064 
5065 			/*
5066 			 * After this, the registers on the pipes that are part
5067 			 * of the power well will become zero, so we have to
5068 			 * adjust our counters according to that.
5069 			 *
5070 			 * FIXME: Should we do this in general in
5071 			 * drm_vblank_post_modeset?
5072 			 */
5073 			lockmgr(&dev->vbl_lock, LK_EXCLUSIVE);
5074 			for_each_pipe(p)
5075 				if (p != PIPE_A)
5076 					dev->last_vblank[p] = 0;
5077 			lockmgr(&dev->vbl_lock, LK_RELEASE);
5078 		}
5079 	}
5080 }
5081 
5082 static struct i915_power_well *hsw_pwr;
5083 
5084 #if 0
5085 /* Display audio driver power well request */
5086 static void i915_request_power_well(void)
5087 {
5088 	if (WARN_ON(!hsw_pwr))
5089 		return;
5090 
5091 	lockmgr(&hsw_pwr->lock, LK_EXCLUSIVE);
5092 	if (!hsw_pwr->count++ &&
5093 			!hsw_pwr->i915_request)
5094 		__intel_set_power_well(hsw_pwr->device, true);
5095 	lockmgr(&hsw_pwr->lock, LK_RELEASE);
5096 }
5097 
5098 /* Display audio driver power well release */
5099 static void i915_release_power_well(void)
5100 {
5101 	if (WARN_ON(!hsw_pwr))
5102 		return;
5103 
5104 	lockmgr(&hsw_pwr->lock, LK_EXCLUSIVE);
5105 	WARN_ON(!hsw_pwr->count);
5106 	if (!--hsw_pwr->count &&
5107 		       !hsw_pwr->i915_request)
5108 		__intel_set_power_well(hsw_pwr->device, false);
5109 	lockmgr(&hsw_pwr->lock, LK_RELEASE);
5110 }
5111 #endif
5112 
5113 int i915_init_power_well(struct drm_device *dev)
5114 {
5115 	struct drm_i915_private *dev_priv = dev->dev_private;
5116 
5117 	hsw_pwr = &dev_priv->power_well;
5118 
5119 	hsw_pwr->device = dev;
5120 	lockinit(&hsw_pwr->lock, "hswpl", 0, LK_CANRECURSE);
5121 	hsw_pwr->count = 0;
5122 
5123 	return 0;
5124 }
5125 
5126 void i915_remove_power_well(struct drm_device *dev)
5127 {
5128 	hsw_pwr = NULL;
5129 }
5130 
5131 void intel_set_power_well(struct drm_device *dev, bool enable)
5132 {
5133 	struct drm_i915_private *dev_priv = dev->dev_private;
5134 	struct i915_power_well *power_well = &dev_priv->power_well;
5135 
5136 	if (!HAS_POWER_WELL(dev))
5137 		return;
5138 
5139 	if (!i915_disable_power_well && !enable)
5140 		return;
5141 
5142 	lockmgr(&power_well->lock, LK_EXCLUSIVE);
5143 	power_well->i915_request = enable;
5144 
5145 	/* only reject "disable" power well request */
5146 	if (power_well->count && !enable) {
5147 		lockmgr(&power_well->lock, LK_RELEASE);
5148 		return;
5149 	}
5150 
5151 	__intel_set_power_well(dev, enable);
5152 	lockmgr(&power_well->lock, LK_RELEASE);
5153 }
5154 
5155 /*
5156  * Starting with Haswell, we have a "Power Down Well" that can be turned off
5157  * when not needed anymore. We have 4 registers that can request the power well
5158  * to be enabled, and it will only be disabled if none of the registers is
5159  * requesting it to be enabled.
5160  */
5161 void intel_init_power_well(struct drm_device *dev)
5162 {
5163 	struct drm_i915_private *dev_priv = dev->dev_private;
5164 
5165 	if (!HAS_POWER_WELL(dev))
5166 		return;
5167 
5168 	/* For now, we need the power well to be always enabled. */
5169 	intel_set_power_well(dev, true);
5170 
5171 	/* We're taking over the BIOS, so clear any requests made by it since
5172 	 * the driver is in charge now. */
5173 	if (I915_READ(HSW_PWR_WELL_BIOS) & HSW_PWR_WELL_ENABLE)
5174 		I915_WRITE(HSW_PWR_WELL_BIOS, 0);
5175 }
5176 
5177 /* Set up chip specific power management-related functions */
5178 void intel_init_pm(struct drm_device *dev)
5179 {
5180 	struct drm_i915_private *dev_priv = dev->dev_private;
5181 
5182 	if (I915_HAS_FBC(dev)) {
5183 		if (HAS_PCH_SPLIT(dev)) {
5184 			dev_priv->display.fbc_enabled = ironlake_fbc_enabled;
5185 			if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev))
5186 				dev_priv->display.enable_fbc =
5187 					gen7_enable_fbc;
5188 			else
5189 				dev_priv->display.enable_fbc =
5190 					ironlake_enable_fbc;
5191 			dev_priv->display.disable_fbc = ironlake_disable_fbc;
5192 		} else if (IS_GM45(dev)) {
5193 			dev_priv->display.fbc_enabled = g4x_fbc_enabled;
5194 			dev_priv->display.enable_fbc = g4x_enable_fbc;
5195 			dev_priv->display.disable_fbc = g4x_disable_fbc;
5196 		} else if (IS_CRESTLINE(dev)) {
5197 			dev_priv->display.fbc_enabled = i8xx_fbc_enabled;
5198 			dev_priv->display.enable_fbc = i8xx_enable_fbc;
5199 			dev_priv->display.disable_fbc = i8xx_disable_fbc;
5200 		}
5201 		/* 855GM needs testing */
5202 	}
5203 
5204 	/* For cxsr */
5205 	if (IS_PINEVIEW(dev))
5206 		i915_pineview_get_mem_freq(dev);
5207 	else if (IS_GEN5(dev))
5208 		i915_ironlake_get_mem_freq(dev);
5209 
5210 	/* For FIFO watermark updates */
5211 	if (HAS_PCH_SPLIT(dev)) {
5212 		if (IS_GEN5(dev)) {
5213 			if (I915_READ(MLTR_ILK) & ILK_SRLT_MASK)
5214 				dev_priv->display.update_wm = ironlake_update_wm;
5215 			else {
5216 				DRM_DEBUG_KMS("Failed to get proper latency. "
5217 					      "Disable CxSR\n");
5218 				dev_priv->display.update_wm = NULL;
5219 			}
5220 			dev_priv->display.init_clock_gating = ironlake_init_clock_gating;
5221 		} else if (IS_GEN6(dev)) {
5222 			if (SNB_READ_WM0_LATENCY()) {
5223 				dev_priv->display.update_wm = sandybridge_update_wm;
5224 				dev_priv->display.update_sprite_wm = sandybridge_update_sprite_wm;
5225 			} else {
5226 				DRM_DEBUG_KMS("Failed to read display plane latency. "
5227 					      "Disable CxSR\n");
5228 				dev_priv->display.update_wm = NULL;
5229 			}
5230 			dev_priv->display.init_clock_gating = gen6_init_clock_gating;
5231 		} else if (IS_IVYBRIDGE(dev)) {
5232 			if (SNB_READ_WM0_LATENCY()) {
5233 				dev_priv->display.update_wm = ivybridge_update_wm;
5234 				dev_priv->display.update_sprite_wm = sandybridge_update_sprite_wm;
5235 			} else {
5236 				DRM_DEBUG_KMS("Failed to read display plane latency. "
5237 					      "Disable CxSR\n");
5238 				dev_priv->display.update_wm = NULL;
5239 			}
5240 			dev_priv->display.init_clock_gating = ivybridge_init_clock_gating;
5241 		} else if (IS_HASWELL(dev)) {
5242 			if (I915_READ64(MCH_SSKPD)) {
5243 				dev_priv->display.update_wm = haswell_update_wm;
5244 				dev_priv->display.update_sprite_wm =
5245 					haswell_update_sprite_wm;
5246 			} else {
5247 				DRM_DEBUG_KMS("Failed to read display plane latency. "
5248 					      "Disable CxSR\n");
5249 				dev_priv->display.update_wm = NULL;
5250 			}
5251 			dev_priv->display.init_clock_gating = haswell_init_clock_gating;
5252 		} else
5253 			dev_priv->display.update_wm = NULL;
5254 	} else if (IS_VALLEYVIEW(dev)) {
5255 		dev_priv->display.update_wm = valleyview_update_wm;
5256 		dev_priv->display.init_clock_gating =
5257 			valleyview_init_clock_gating;
5258 	} else if (IS_PINEVIEW(dev)) {
5259 		if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev),
5260 					    dev_priv->is_ddr3,
5261 					    dev_priv->fsb_freq,
5262 					    dev_priv->mem_freq)) {
5263 			DRM_INFO("failed to find known CxSR latency "
5264 				 "(found ddr%s fsb freq %d, mem freq %d), "
5265 				 "disabling CxSR\n",
5266 				 (dev_priv->is_ddr3 == 1) ? "3" : "2",
5267 				 dev_priv->fsb_freq, dev_priv->mem_freq);
5268 			/* Disable CxSR and never update its watermark again */
5269 			pineview_disable_cxsr(dev);
5270 			dev_priv->display.update_wm = NULL;
5271 		} else
5272 			dev_priv->display.update_wm = pineview_update_wm;
5273 		dev_priv->display.init_clock_gating = gen3_init_clock_gating;
5274 	} else if (IS_G4X(dev)) {
5275 		dev_priv->display.update_wm = g4x_update_wm;
5276 		dev_priv->display.init_clock_gating = g4x_init_clock_gating;
5277 	} else if (IS_GEN4(dev)) {
5278 		dev_priv->display.update_wm = i965_update_wm;
5279 		if (IS_CRESTLINE(dev))
5280 			dev_priv->display.init_clock_gating = crestline_init_clock_gating;
5281 		else if (IS_BROADWATER(dev))
5282 			dev_priv->display.init_clock_gating = broadwater_init_clock_gating;
5283 	} else if (IS_GEN3(dev)) {
5284 		dev_priv->display.update_wm = i9xx_update_wm;
5285 		dev_priv->display.get_fifo_size = i9xx_get_fifo_size;
5286 		dev_priv->display.init_clock_gating = gen3_init_clock_gating;
5287 	} else if (IS_I865G(dev)) {
5288 		dev_priv->display.update_wm = i830_update_wm;
5289 		dev_priv->display.init_clock_gating = i85x_init_clock_gating;
5290 		dev_priv->display.get_fifo_size = i830_get_fifo_size;
5291 	} else if (IS_I85X(dev)) {
5292 		dev_priv->display.update_wm = i9xx_update_wm;
5293 		dev_priv->display.get_fifo_size = i85x_get_fifo_size;
5294 		dev_priv->display.init_clock_gating = i85x_init_clock_gating;
5295 	} else {
5296 		dev_priv->display.update_wm = i830_update_wm;
5297 		dev_priv->display.init_clock_gating = i830_init_clock_gating;
5298 		if (IS_845G(dev))
5299 			dev_priv->display.get_fifo_size = i845_get_fifo_size;
5300 		else
5301 			dev_priv->display.get_fifo_size = i830_get_fifo_size;
5302 	}
5303 }
5304 
5305 static void __gen6_gt_wait_for_thread_c0(struct drm_i915_private *dev_priv)
5306 {
5307 	u32 gt_thread_status_mask;
5308 
5309 	if (IS_HASWELL(dev_priv->dev))
5310 		gt_thread_status_mask = GEN6_GT_THREAD_STATUS_CORE_MASK_HSW;
5311 	else
5312 		gt_thread_status_mask = GEN6_GT_THREAD_STATUS_CORE_MASK;
5313 
5314 	/* w/a for a sporadic read returning 0 by waiting for the GT
5315 	 * thread to wake up.
5316 	 */
5317 	if (wait_for_atomic_us((I915_READ_NOTRACE(GEN6_GT_THREAD_STATUS_REG) & gt_thread_status_mask) == 0, 500))
5318 		DRM_ERROR("GT thread status wait timed out\n");
5319 }
5320 
5321 static void __gen6_gt_force_wake_reset(struct drm_i915_private *dev_priv)
5322 {
5323 	I915_WRITE_NOTRACE(FORCEWAKE, 0);
5324 	POSTING_READ(ECOBUS); /* something from same cacheline, but !FORCEWAKE */
5325 }
5326 
5327 static void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
5328 {
5329 	if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK) & 1) == 0,
5330 			    FORCEWAKE_ACK_TIMEOUT_MS))
5331 		DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n");
5332 
5333 	I915_WRITE_NOTRACE(FORCEWAKE, 1);
5334 	POSTING_READ(ECOBUS); /* something from same cacheline, but !FORCEWAKE */
5335 
5336 	if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK) & 1),
5337 			    FORCEWAKE_ACK_TIMEOUT_MS))
5338 		DRM_ERROR("Timed out waiting for forcewake to ack request.\n");
5339 
5340 	/* WaRsForcewakeWaitTC0:snb */
5341 	__gen6_gt_wait_for_thread_c0(dev_priv);
5342 }
5343 
5344 static void __gen6_gt_force_wake_mt_reset(struct drm_i915_private *dev_priv)
5345 {
5346 	I915_WRITE_NOTRACE(FORCEWAKE_MT, _MASKED_BIT_DISABLE(0xffff));
5347 	/* something from same cacheline, but !FORCEWAKE_MT */
5348 	POSTING_READ(ECOBUS);
5349 }
5350 
5351 static void __gen6_gt_force_wake_mt_get(struct drm_i915_private *dev_priv)
5352 {
5353 	u32 forcewake_ack;
5354 
5355 	if (IS_HASWELL(dev_priv->dev))
5356 		forcewake_ack = FORCEWAKE_ACK_HSW;
5357 	else
5358 		forcewake_ack = FORCEWAKE_MT_ACK;
5359 
5360 	if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack) & FORCEWAKE_KERNEL) == 0,
5361 			    FORCEWAKE_ACK_TIMEOUT_MS))
5362 		DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n");
5363 
5364 	I915_WRITE_NOTRACE(FORCEWAKE_MT, _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL));
5365 	/* something from same cacheline, but !FORCEWAKE_MT */
5366 	POSTING_READ(ECOBUS);
5367 
5368 	if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack) & FORCEWAKE_KERNEL),
5369 			    FORCEWAKE_ACK_TIMEOUT_MS))
5370 		DRM_ERROR("Timed out waiting for forcewake to ack request.\n");
5371 
5372 	/* WaRsForcewakeWaitTC0:ivb,hsw */
5373 	__gen6_gt_wait_for_thread_c0(dev_priv);
5374 }
5375 
5376 /*
5377  * Generally this is called implicitly by the register read function. However,
5378  * if some sequence requires the GT to not power down then this function should
5379  * be called at the beginning of the sequence followed by a call to
5380  * gen6_gt_force_wake_put() at the end of the sequence.
5381  */
5382 void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
5383 {
5384 
5385 	lockmgr(&dev_priv->gt_lock, LK_EXCLUSIVE);
5386 	if (dev_priv->forcewake_count++ == 0)
5387 		dev_priv->gt.force_wake_get(dev_priv);
5388 	lockmgr(&dev_priv->gt_lock, LK_RELEASE);
5389 }
5390 
5391 void gen6_gt_check_fifodbg(struct drm_i915_private *dev_priv)
5392 {
5393 	u32 gtfifodbg;
5394 	gtfifodbg = I915_READ_NOTRACE(GTFIFODBG);
5395 	if (WARN(gtfifodbg & GT_FIFO_CPU_ERROR_MASK,
5396 	     "MMIO read or write has been dropped %x\n", gtfifodbg))
5397 		I915_WRITE_NOTRACE(GTFIFODBG, GT_FIFO_CPU_ERROR_MASK);
5398 }
5399 
5400 static void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
5401 {
5402 	I915_WRITE_NOTRACE(FORCEWAKE, 0);
5403 	/* something from same cacheline, but !FORCEWAKE */
5404 	POSTING_READ(ECOBUS);
5405 	gen6_gt_check_fifodbg(dev_priv);
5406 }
5407 
5408 static void __gen6_gt_force_wake_mt_put(struct drm_i915_private *dev_priv)
5409 {
5410 	I915_WRITE_NOTRACE(FORCEWAKE_MT, _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL));
5411 	/* something from same cacheline, but !FORCEWAKE_MT */
5412 	POSTING_READ(ECOBUS);
5413 	gen6_gt_check_fifodbg(dev_priv);
5414 }
5415 
5416 /*
5417  * see gen6_gt_force_wake_get()
5418  */
5419 void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
5420 {
5421 	lockmgr(&dev_priv->gt_lock, LK_EXCLUSIVE);
5422 	if (--dev_priv->forcewake_count == 0)
5423 		dev_priv->gt.force_wake_put(dev_priv);
5424 	lockmgr(&dev_priv->gt_lock, LK_RELEASE);
5425 }
5426 
5427 int __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv)
5428 {
5429 	int ret = 0;
5430 
5431 	if (dev_priv->gt_fifo_count < GT_FIFO_NUM_RESERVED_ENTRIES) {
5432 		int loop = 500;
5433 		u32 fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES);
5434 		while (fifo <= GT_FIFO_NUM_RESERVED_ENTRIES && loop--) {
5435 			udelay(10);
5436 			fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES);
5437 		}
5438 		if (WARN_ON(loop < 0 && fifo <= GT_FIFO_NUM_RESERVED_ENTRIES))
5439 			++ret;
5440 		dev_priv->gt_fifo_count = fifo;
5441 	}
5442 	dev_priv->gt_fifo_count--;
5443 
5444 	return ret;
5445 }
5446 
5447 static void vlv_force_wake_reset(struct drm_i915_private *dev_priv)
5448 {
5449 	I915_WRITE_NOTRACE(FORCEWAKE_VLV, _MASKED_BIT_DISABLE(0xffff));
5450 	/* something from same cacheline, but !FORCEWAKE_VLV */
5451 	POSTING_READ(FORCEWAKE_ACK_VLV);
5452 }
5453 
5454 static void vlv_force_wake_get(struct drm_i915_private *dev_priv)
5455 {
5456 	if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK_VLV) & FORCEWAKE_KERNEL) == 0,
5457 			    FORCEWAKE_ACK_TIMEOUT_MS))
5458 		DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n");
5459 
5460 	I915_WRITE_NOTRACE(FORCEWAKE_VLV, _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL));
5461 	I915_WRITE_NOTRACE(FORCEWAKE_MEDIA_VLV,
5462 			   _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL));
5463 
5464 	if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK_VLV) & FORCEWAKE_KERNEL),
5465 			    FORCEWAKE_ACK_TIMEOUT_MS))
5466 		DRM_ERROR("Timed out waiting for GT to ack forcewake request.\n");
5467 
5468 	if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK_MEDIA_VLV) &
5469 			     FORCEWAKE_KERNEL),
5470 			    FORCEWAKE_ACK_TIMEOUT_MS))
5471 		DRM_ERROR("Timed out waiting for media to ack forcewake request.\n");
5472 
5473 	/* WaRsForcewakeWaitTC0:vlv */
5474 	__gen6_gt_wait_for_thread_c0(dev_priv);
5475 }
5476 
5477 static void vlv_force_wake_put(struct drm_i915_private *dev_priv)
5478 {
5479 	I915_WRITE_NOTRACE(FORCEWAKE_VLV, _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL));
5480 	I915_WRITE_NOTRACE(FORCEWAKE_MEDIA_VLV,
5481 			   _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL));
5482 	/* The below doubles as a POSTING_READ */
5483 	gen6_gt_check_fifodbg(dev_priv);
5484 }
5485 
5486 void intel_gt_sanitize(struct drm_device *dev)
5487 {
5488 	struct drm_i915_private *dev_priv = dev->dev_private;
5489 
5490 	if (IS_VALLEYVIEW(dev)) {
5491 		vlv_force_wake_reset(dev_priv);
5492 	} else if (INTEL_INFO(dev)->gen >= 6) {
5493 		__gen6_gt_force_wake_reset(dev_priv);
5494 		if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev))
5495 			__gen6_gt_force_wake_mt_reset(dev_priv);
5496 	}
5497 
5498 	/* BIOS often leaves RC6 enabled, but disable it for hw init */
5499 	if (INTEL_INFO(dev)->gen >= 6)
5500 		intel_disable_gt_powersave(dev);
5501 }
5502 
5503 void intel_gt_init(struct drm_device *dev)
5504 {
5505 	struct drm_i915_private *dev_priv = dev->dev_private;
5506 
5507 	if (IS_VALLEYVIEW(dev)) {
5508 		dev_priv->gt.force_wake_get = vlv_force_wake_get;
5509 		dev_priv->gt.force_wake_put = vlv_force_wake_put;
5510 	} else if (IS_HASWELL(dev)) {
5511 		dev_priv->gt.force_wake_get = __gen6_gt_force_wake_mt_get;
5512 		dev_priv->gt.force_wake_put = __gen6_gt_force_wake_mt_put;
5513 	} else if (IS_IVYBRIDGE(dev)) {
5514 		u32 ecobus;
5515 
5516 		/* IVB configs may use multi-threaded forcewake */
5517 
5518 		/* A small trick here - if the bios hasn't configured
5519 		 * MT forcewake, and if the device is in RC6, then
5520 		 * force_wake_mt_get will not wake the device and the
5521 		 * ECOBUS read will return zero. Which will be
5522 		 * (correctly) interpreted by the test below as MT
5523 		 * forcewake being disabled.
5524 		 */
5525 		mutex_lock(&dev->struct_mutex);
5526 		__gen6_gt_force_wake_mt_get(dev_priv);
5527 		ecobus = I915_READ_NOTRACE(ECOBUS);
5528 		__gen6_gt_force_wake_mt_put(dev_priv);
5529 		mutex_unlock(&dev->struct_mutex);
5530 
5531 		if (ecobus & FORCEWAKE_MT_ENABLE) {
5532 			dev_priv->gt.force_wake_get =
5533 						__gen6_gt_force_wake_mt_get;
5534 			dev_priv->gt.force_wake_put =
5535 						__gen6_gt_force_wake_mt_put;
5536 		} else {
5537 			DRM_INFO("No MT forcewake available on Ivybridge, this can result in issues\n");
5538 			DRM_INFO("when using vblank-synced partial screen updates.\n");
5539 			dev_priv->gt.force_wake_get = __gen6_gt_force_wake_get;
5540 			dev_priv->gt.force_wake_put = __gen6_gt_force_wake_put;
5541 		}
5542 	} else if (IS_GEN6(dev)) {
5543 		dev_priv->gt.force_wake_get = __gen6_gt_force_wake_get;
5544 		dev_priv->gt.force_wake_put = __gen6_gt_force_wake_put;
5545 	}
5546 }
5547 
5548 void intel_pm_init(struct drm_device *dev)
5549 {
5550 	struct drm_i915_private *dev_priv = dev->dev_private;
5551 
5552 	INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work,
5553 			  intel_gen6_powersave_work);
5554 }
5555 
5556 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u8 mbox, u32 *val)
5557 {
5558 	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
5559 
5560 	if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
5561 		DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed\n");
5562 		return -EAGAIN;
5563 	}
5564 
5565 	I915_WRITE(GEN6_PCODE_DATA, *val);
5566 	I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
5567 
5568 	if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
5569 		     500)) {
5570 		DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox);
5571 		return -ETIMEDOUT;
5572 	}
5573 
5574 	*val = I915_READ(GEN6_PCODE_DATA);
5575 	I915_WRITE(GEN6_PCODE_DATA, 0);
5576 
5577 	return 0;
5578 }
5579 
5580 int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u8 mbox, u32 val)
5581 {
5582 	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
5583 
5584 	if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
5585 		DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed\n");
5586 		return -EAGAIN;
5587 	}
5588 
5589 	I915_WRITE(GEN6_PCODE_DATA, val);
5590 	I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
5591 
5592 	if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
5593 		     500)) {
5594 		DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox);
5595 		return -ETIMEDOUT;
5596 	}
5597 
5598 	I915_WRITE(GEN6_PCODE_DATA, 0);
5599 
5600 	return 0;
5601 }
5602 
5603 int vlv_gpu_freq(int ddr_freq, int val)
5604 {
5605 	int mult, base;
5606 
5607 	switch (ddr_freq) {
5608 	case 800:
5609 		mult = 20;
5610 		base = 120;
5611 		break;
5612 	case 1066:
5613 		mult = 22;
5614 		base = 133;
5615 		break;
5616 	case 1333:
5617 		mult = 21;
5618 		base = 125;
5619 		break;
5620 	default:
5621 		return -1;
5622 	}
5623 
5624 	return ((val - 0xbd) * mult) + base;
5625 }
5626 
5627 int vlv_freq_opcode(int ddr_freq, int val)
5628 {
5629 	int mult, base;
5630 
5631 	switch (ddr_freq) {
5632 	case 800:
5633 		mult = 20;
5634 		base = 120;
5635 		break;
5636 	case 1066:
5637 		mult = 22;
5638 		base = 133;
5639 		break;
5640 	case 1333:
5641 		mult = 21;
5642 		base = 125;
5643 		break;
5644 	default:
5645 		return -1;
5646 	}
5647 
5648 	val /= mult;
5649 	val -= base / mult;
5650 	val += 0xbd;
5651 
5652 	if (val > 0xea)
5653 		val = 0xea;
5654 
5655 	return val;
5656 }
5657 
5658