xref: /dflybsd-src/sys/dev/drm/i915/intel_overlay.c (revision 872a09d51adf63b4bdae6adb1d96a53f76e161e2)
1 /*
2  * Copyright © 2009
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Daniel Vetter <daniel@ffwll.ch>
25  *
26  * Derived from Xorg ddx, xf86-video-intel, src/i830_video.c
27  */
28 #include <drm/drmP.h>
29 #include <drm/i915_drm.h>
30 #include "i915_drv.h"
31 #include "i915_reg.h"
32 #include "intel_drv.h"
33 #include "intel_frontbuffer.h"
34 
35 /* Limits for overlay size. According to intel doc, the real limits are:
36  * Y width: 4095, UV width (planar): 2047, Y height: 2047,
37  * UV width (planar): * 1023. But the xorg thinks 2048 for height and width. Use
38  * the mininum of both.  */
39 #define IMAGE_MAX_WIDTH		2048
40 #define IMAGE_MAX_HEIGHT	2046 /* 2 * 1023 */
41 /* on 830 and 845 these large limits result in the card hanging */
42 #define IMAGE_MAX_WIDTH_LEGACY	1024
43 #define IMAGE_MAX_HEIGHT_LEGACY	1088
44 
45 /* overlay register definitions */
46 /* OCMD register */
47 #define OCMD_TILED_SURFACE	(0x1<<19)
48 #define OCMD_MIRROR_MASK	(0x3<<17)
49 #define OCMD_MIRROR_MODE	(0x3<<17)
50 #define OCMD_MIRROR_HORIZONTAL	(0x1<<17)
51 #define OCMD_MIRROR_VERTICAL	(0x2<<17)
52 #define OCMD_MIRROR_BOTH	(0x3<<17)
53 #define OCMD_BYTEORDER_MASK	(0x3<<14) /* zero for YUYV or FOURCC YUY2 */
54 #define OCMD_UV_SWAP		(0x1<<14) /* YVYU */
55 #define OCMD_Y_SWAP		(0x2<<14) /* UYVY or FOURCC UYVY */
56 #define OCMD_Y_AND_UV_SWAP	(0x3<<14) /* VYUY */
57 #define OCMD_SOURCE_FORMAT_MASK (0xf<<10)
58 #define OCMD_RGB_888		(0x1<<10) /* not in i965 Intel docs */
59 #define OCMD_RGB_555		(0x2<<10) /* not in i965 Intel docs */
60 #define OCMD_RGB_565		(0x3<<10) /* not in i965 Intel docs */
61 #define OCMD_YUV_422_PACKED	(0x8<<10)
62 #define OCMD_YUV_411_PACKED	(0x9<<10) /* not in i965 Intel docs */
63 #define OCMD_YUV_420_PLANAR	(0xc<<10)
64 #define OCMD_YUV_422_PLANAR	(0xd<<10)
65 #define OCMD_YUV_410_PLANAR	(0xe<<10) /* also 411 */
66 #define OCMD_TVSYNCFLIP_PARITY	(0x1<<9)
67 #define OCMD_TVSYNCFLIP_ENABLE	(0x1<<7)
68 #define OCMD_BUF_TYPE_MASK	(0x1<<5)
69 #define OCMD_BUF_TYPE_FRAME	(0x0<<5)
70 #define OCMD_BUF_TYPE_FIELD	(0x1<<5)
71 #define OCMD_TEST_MODE		(0x1<<4)
72 #define OCMD_BUFFER_SELECT	(0x3<<2)
73 #define OCMD_BUFFER0		(0x0<<2)
74 #define OCMD_BUFFER1		(0x1<<2)
75 #define OCMD_FIELD_SELECT	(0x1<<2)
76 #define OCMD_FIELD0		(0x0<<1)
77 #define OCMD_FIELD1		(0x1<<1)
78 #define OCMD_ENABLE		(0x1<<0)
79 
80 /* OCONFIG register */
81 #define OCONF_PIPE_MASK		(0x1<<18)
82 #define OCONF_PIPE_A		(0x0<<18)
83 #define OCONF_PIPE_B		(0x1<<18)
84 #define OCONF_GAMMA2_ENABLE	(0x1<<16)
85 #define OCONF_CSC_MODE_BT601	(0x0<<5)
86 #define OCONF_CSC_MODE_BT709	(0x1<<5)
87 #define OCONF_CSC_BYPASS	(0x1<<4)
88 #define OCONF_CC_OUT_8BIT	(0x1<<3)
89 #define OCONF_TEST_MODE		(0x1<<2)
90 #define OCONF_THREE_LINE_BUFFER	(0x1<<0)
91 #define OCONF_TWO_LINE_BUFFER	(0x0<<0)
92 
93 /* DCLRKM (dst-key) register */
94 #define DST_KEY_ENABLE		(0x1<<31)
95 #define CLK_RGB24_MASK		0x0
96 #define CLK_RGB16_MASK		0x070307
97 #define CLK_RGB15_MASK		0x070707
98 #define CLK_RGB8I_MASK		0xffffff
99 
100 #define RGB16_TO_COLORKEY(c) \
101 	(((c & 0xF800) << 8) | ((c & 0x07E0) << 5) | ((c & 0x001F) << 3))
102 #define RGB15_TO_COLORKEY(c) \
103 	(((c & 0x7c00) << 9) | ((c & 0x03E0) << 6) | ((c & 0x001F) << 3))
104 
105 /* overlay flip addr flag */
106 #define OFC_UPDATE		0x1
107 
108 /* polyphase filter coefficients */
109 #define N_HORIZ_Y_TAPS          5
110 #define N_VERT_Y_TAPS           3
111 #define N_HORIZ_UV_TAPS         3
112 #define N_VERT_UV_TAPS          3
113 #define N_PHASES                17
114 #define MAX_TAPS                5
115 
116 /* memory bufferd overlay registers */
117 struct overlay_registers {
118 	u32 OBUF_0Y;
119 	u32 OBUF_1Y;
120 	u32 OBUF_0U;
121 	u32 OBUF_0V;
122 	u32 OBUF_1U;
123 	u32 OBUF_1V;
124 	u32 OSTRIDE;
125 	u32 YRGB_VPH;
126 	u32 UV_VPH;
127 	u32 HORZ_PH;
128 	u32 INIT_PHS;
129 	u32 DWINPOS;
130 	u32 DWINSZ;
131 	u32 SWIDTH;
132 	u32 SWIDTHSW;
133 	u32 SHEIGHT;
134 	u32 YRGBSCALE;
135 	u32 UVSCALE;
136 	u32 OCLRC0;
137 	u32 OCLRC1;
138 	u32 DCLRKV;
139 	u32 DCLRKM;
140 	u32 SCLRKVH;
141 	u32 SCLRKVL;
142 	u32 SCLRKEN;
143 	u32 OCONFIG;
144 	u32 OCMD;
145 	u32 RESERVED1; /* 0x6C */
146 	u32 OSTART_0Y;
147 	u32 OSTART_1Y;
148 	u32 OSTART_0U;
149 	u32 OSTART_0V;
150 	u32 OSTART_1U;
151 	u32 OSTART_1V;
152 	u32 OTILEOFF_0Y;
153 	u32 OTILEOFF_1Y;
154 	u32 OTILEOFF_0U;
155 	u32 OTILEOFF_0V;
156 	u32 OTILEOFF_1U;
157 	u32 OTILEOFF_1V;
158 	u32 FASTHSCALE; /* 0xA0 */
159 	u32 UVSCALEV; /* 0xA4 */
160 	u32 RESERVEDC[(0x200 - 0xA8) / 4]; /* 0xA8 - 0x1FC */
161 	u16 Y_VCOEFS[N_VERT_Y_TAPS * N_PHASES]; /* 0x200 */
162 	u16 RESERVEDD[0x100 / 2 - N_VERT_Y_TAPS * N_PHASES];
163 	u16 Y_HCOEFS[N_HORIZ_Y_TAPS * N_PHASES]; /* 0x300 */
164 	u16 RESERVEDE[0x200 / 2 - N_HORIZ_Y_TAPS * N_PHASES];
165 	u16 UV_VCOEFS[N_VERT_UV_TAPS * N_PHASES]; /* 0x500 */
166 	u16 RESERVEDF[0x100 / 2 - N_VERT_UV_TAPS * N_PHASES];
167 	u16 UV_HCOEFS[N_HORIZ_UV_TAPS * N_PHASES]; /* 0x600 */
168 	u16 RESERVEDG[0x100 / 2 - N_HORIZ_UV_TAPS * N_PHASES];
169 };
170 
171 struct intel_overlay {
172 	struct drm_i915_private *i915;
173 	struct intel_crtc *crtc;
174 	struct drm_i915_gem_object *vid_bo;
175 	struct drm_i915_gem_object *old_vid_bo;
176 	bool active;
177 	bool pfit_active;
178 	u32 pfit_vscale_ratio; /* shifted-point number, (1<<12) == 1.0 */
179 	u32 color_key:24;
180 	u32 color_key_enabled:1;
181 	u32 brightness, contrast, saturation;
182 	u32 old_xscale, old_yscale;
183 	/* register access */
184 	u32 flip_addr;
185 	struct drm_i915_gem_object *reg_bo;
186 	/* flip handling */
187 	struct i915_gem_active last_flip;
188 };
189 
190 static struct overlay_registers __iomem *
191 intel_overlay_map_regs(struct intel_overlay *overlay)
192 {
193 	struct drm_i915_private *dev_priv = overlay->i915;
194 	struct overlay_registers __iomem *regs;
195 
196 	if (OVERLAY_NEEDS_PHYSICAL(dev_priv))
197 		regs = (struct overlay_registers __iomem *)overlay->reg_bo->phys_handle->vaddr;
198 	else
199 		regs = io_mapping_map_wc(dev_priv->ggtt.mappable,
200 					 overlay->flip_addr,
201 					 PAGE_SIZE);
202 
203 	return regs;
204 }
205 
206 static void intel_overlay_unmap_regs(struct intel_overlay *overlay,
207 				     struct overlay_registers __iomem *regs)
208 {
209 	if (!OVERLAY_NEEDS_PHYSICAL(overlay->i915))
210 		io_mapping_unmap(regs);
211 }
212 
213 static void intel_overlay_submit_request(struct intel_overlay *overlay,
214 					 struct drm_i915_gem_request *req,
215 					 i915_gem_retire_fn retire)
216 {
217 	GEM_BUG_ON(i915_gem_active_peek(&overlay->last_flip,
218 					&overlay->i915->drm.struct_mutex));
219 	overlay->last_flip.retire = retire;
220 	i915_gem_active_set(&overlay->last_flip, req);
221 	i915_add_request(req);
222 }
223 
224 static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
225 					 struct drm_i915_gem_request *req,
226 					 i915_gem_retire_fn retire)
227 {
228 	intel_overlay_submit_request(overlay, req, retire);
229 	return i915_gem_active_retire(&overlay->last_flip,
230 				      &overlay->i915->drm.struct_mutex);
231 }
232 
233 static struct drm_i915_gem_request *alloc_request(struct intel_overlay *overlay)
234 {
235 	struct drm_i915_private *dev_priv = overlay->i915;
236 	struct intel_engine_cs *engine = &dev_priv->engine[RCS];
237 
238 	return i915_gem_request_alloc(engine, dev_priv->kernel_context);
239 }
240 
241 /* overlay needs to be disable in OCMD reg */
242 static int intel_overlay_on(struct intel_overlay *overlay)
243 {
244 	struct drm_i915_private *dev_priv = overlay->i915;
245 	struct drm_i915_gem_request *req;
246 	struct intel_ring *ring;
247 	int ret;
248 
249 	WARN_ON(overlay->active);
250 	WARN_ON(IS_I830(dev_priv) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE));
251 
252 	req = alloc_request(overlay);
253 	if (IS_ERR(req))
254 		return PTR_ERR(req);
255 
256 	ret = intel_ring_begin(req, 4);
257 	if (ret) {
258 		i915_add_request_no_flush(req);
259 		return ret;
260 	}
261 
262 	overlay->active = true;
263 
264 	ring = req->ring;
265 	intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_ON);
266 	intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE);
267 	intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
268 	intel_ring_emit(ring, MI_NOOP);
269 	intel_ring_advance(ring);
270 
271 	return intel_overlay_do_wait_request(overlay, req, NULL);
272 }
273 
274 /* overlay needs to be enabled in OCMD reg */
275 static int intel_overlay_continue(struct intel_overlay *overlay,
276 				  bool load_polyphase_filter)
277 {
278 	struct drm_i915_private *dev_priv = overlay->i915;
279 	struct drm_i915_gem_request *req;
280 	struct intel_ring *ring;
281 	u32 flip_addr = overlay->flip_addr;
282 	u32 tmp;
283 	int ret;
284 
285 	WARN_ON(!overlay->active);
286 
287 	if (load_polyphase_filter)
288 		flip_addr |= OFC_UPDATE;
289 
290 	/* check for underruns */
291 	tmp = I915_READ(DOVSTA);
292 	if (tmp & (1 << 17))
293 		DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp);
294 
295 	req = alloc_request(overlay);
296 	if (IS_ERR(req))
297 		return PTR_ERR(req);
298 
299 	ret = intel_ring_begin(req, 2);
300 	if (ret) {
301 		i915_add_request_no_flush(req);
302 		return ret;
303 	}
304 
305 	ring = req->ring;
306 	intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
307 	intel_ring_emit(ring, flip_addr);
308 	intel_ring_advance(ring);
309 
310 	intel_overlay_submit_request(overlay, req, NULL);
311 
312 	return 0;
313 }
314 
315 static void intel_overlay_release_old_vid_tail(struct i915_gem_active *active,
316 					       struct drm_i915_gem_request *req)
317 {
318 	struct intel_overlay *overlay =
319 		container_of(active, typeof(*overlay), last_flip);
320 	struct drm_i915_gem_object *obj = overlay->old_vid_bo;
321 
322 	i915_gem_track_fb(obj, NULL,
323 			  INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
324 
325 	i915_gem_object_ggtt_unpin(obj);
326 	i915_gem_object_put(obj);
327 
328 	overlay->old_vid_bo = NULL;
329 }
330 
331 static void intel_overlay_off_tail(struct i915_gem_active *active,
332 				   struct drm_i915_gem_request *req)
333 {
334 	struct intel_overlay *overlay =
335 		container_of(active, typeof(*overlay), last_flip);
336 	struct drm_i915_gem_object *obj = overlay->vid_bo;
337 
338 	/* never have the overlay hw on without showing a frame */
339 	if (WARN_ON(!obj))
340 		return;
341 
342 	i915_gem_object_ggtt_unpin(obj);
343 	i915_gem_object_put(obj);
344 	overlay->vid_bo = NULL;
345 
346 	overlay->crtc->overlay = NULL;
347 	overlay->crtc = NULL;
348 	overlay->active = false;
349 }
350 
351 /* overlay needs to be disabled in OCMD reg */
352 static int intel_overlay_off(struct intel_overlay *overlay)
353 {
354 	struct drm_i915_private *dev_priv = overlay->i915;
355 	struct drm_i915_gem_request *req;
356 	struct intel_ring *ring;
357 	u32 flip_addr = overlay->flip_addr;
358 	int ret;
359 
360 	WARN_ON(!overlay->active);
361 
362 	/* According to intel docs the overlay hw may hang (when switching
363 	 * off) without loading the filter coeffs. It is however unclear whether
364 	 * this applies to the disabling of the overlay or to the switching off
365 	 * of the hw. Do it in both cases */
366 	flip_addr |= OFC_UPDATE;
367 
368 	req = alloc_request(overlay);
369 	if (IS_ERR(req))
370 		return PTR_ERR(req);
371 
372 	ret = intel_ring_begin(req, 6);
373 	if (ret) {
374 		i915_add_request_no_flush(req);
375 		return ret;
376 	}
377 
378 	ring = req->ring;
379 	/* wait for overlay to go idle */
380 	intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
381 	intel_ring_emit(ring, flip_addr);
382 	intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
383 	/* turn overlay off */
384 	if (IS_I830(dev_priv)) {
385 		/* Workaround: Don't disable the overlay fully, since otherwise
386 		 * it dies on the next OVERLAY_ON cmd. */
387 		intel_ring_emit(ring, MI_NOOP);
388 		intel_ring_emit(ring, MI_NOOP);
389 		intel_ring_emit(ring, MI_NOOP);
390 	} else {
391 		intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_OFF);
392 		intel_ring_emit(ring, flip_addr);
393 		intel_ring_emit(ring,
394 				MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
395 	}
396 	intel_ring_advance(ring);
397 
398 	return intel_overlay_do_wait_request(overlay, req,
399 					     intel_overlay_off_tail);
400 }
401 
402 /* recover from an interruption due to a signal
403  * We have to be careful not to repeat work forever an make forward progess. */
404 static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
405 {
406 	return i915_gem_active_retire(&overlay->last_flip,
407 				      &overlay->i915->drm.struct_mutex);
408 }
409 
410 /* Wait for pending overlay flip and release old frame.
411  * Needs to be called before the overlay register are changed
412  * via intel_overlay_(un)map_regs
413  */
414 static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
415 {
416 	struct drm_i915_private *dev_priv = overlay->i915;
417 	int ret;
418 
419 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
420 
421 	/* Only wait if there is actually an old frame to release to
422 	 * guarantee forward progress.
423 	 */
424 	if (!overlay->old_vid_bo)
425 		return 0;
426 
427 	if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) {
428 		/* synchronous slowpath */
429 		struct drm_i915_gem_request *req;
430 		struct intel_ring *ring;
431 
432 		req = alloc_request(overlay);
433 		if (IS_ERR(req))
434 			return PTR_ERR(req);
435 
436 		ret = intel_ring_begin(req, 2);
437 		if (ret) {
438 			i915_add_request_no_flush(req);
439 			return ret;
440 		}
441 
442 		ring = req->ring;
443 		intel_ring_emit(ring,
444 				MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
445 		intel_ring_emit(ring, MI_NOOP);
446 		intel_ring_advance(ring);
447 
448 		ret = intel_overlay_do_wait_request(overlay, req,
449 						    intel_overlay_release_old_vid_tail);
450 		if (ret)
451 			return ret;
452 	} else
453 		intel_overlay_release_old_vid_tail(&overlay->last_flip, NULL);
454 
455 	return 0;
456 }
457 
458 void intel_overlay_reset(struct drm_i915_private *dev_priv)
459 {
460 	struct intel_overlay *overlay = dev_priv->overlay;
461 
462 	if (!overlay)
463 		return;
464 
465 	intel_overlay_release_old_vid(overlay);
466 
467 	overlay->old_xscale = 0;
468 	overlay->old_yscale = 0;
469 	overlay->crtc = NULL;
470 	overlay->active = false;
471 }
472 
473 struct put_image_params {
474 	int format;
475 	short dst_x;
476 	short dst_y;
477 	short dst_w;
478 	short dst_h;
479 	short src_w;
480 	short src_scan_h;
481 	short src_scan_w;
482 	short src_h;
483 	short stride_Y;
484 	short stride_UV;
485 	int offset_Y;
486 	int offset_U;
487 	int offset_V;
488 };
489 
490 static int packed_depth_bytes(u32 format)
491 {
492 	switch (format & I915_OVERLAY_DEPTH_MASK) {
493 	case I915_OVERLAY_YUV422:
494 		return 4;
495 	case I915_OVERLAY_YUV411:
496 		/* return 6; not implemented */
497 	default:
498 		return -EINVAL;
499 	}
500 }
501 
502 static int packed_width_bytes(u32 format, short width)
503 {
504 	switch (format & I915_OVERLAY_DEPTH_MASK) {
505 	case I915_OVERLAY_YUV422:
506 		return width << 1;
507 	default:
508 		return -EINVAL;
509 	}
510 }
511 
512 static int uv_hsubsampling(u32 format)
513 {
514 	switch (format & I915_OVERLAY_DEPTH_MASK) {
515 	case I915_OVERLAY_YUV422:
516 	case I915_OVERLAY_YUV420:
517 		return 2;
518 	case I915_OVERLAY_YUV411:
519 	case I915_OVERLAY_YUV410:
520 		return 4;
521 	default:
522 		return -EINVAL;
523 	}
524 }
525 
526 static int uv_vsubsampling(u32 format)
527 {
528 	switch (format & I915_OVERLAY_DEPTH_MASK) {
529 	case I915_OVERLAY_YUV420:
530 	case I915_OVERLAY_YUV410:
531 		return 2;
532 	case I915_OVERLAY_YUV422:
533 	case I915_OVERLAY_YUV411:
534 		return 1;
535 	default:
536 		return -EINVAL;
537 	}
538 }
539 
540 static u32 calc_swidthsw(struct drm_i915_private *dev_priv, u32 offset, u32 width)
541 {
542 	u32 mask, shift, ret;
543 	if (IS_GEN2(dev_priv)) {
544 		mask = 0x1f;
545 		shift = 5;
546 	} else {
547 		mask = 0x3f;
548 		shift = 6;
549 	}
550 	ret = ((offset + width + mask) >> shift) - (offset >> shift);
551 	if (!IS_GEN2(dev_priv))
552 		ret <<= 1;
553 	ret -= 1;
554 	return ret << 2;
555 }
556 
557 static const u16 y_static_hcoeffs[N_HORIZ_Y_TAPS * N_PHASES] = {
558 	0x3000, 0xb4a0, 0x1930, 0x1920, 0xb4a0,
559 	0x3000, 0xb500, 0x19d0, 0x1880, 0xb440,
560 	0x3000, 0xb540, 0x1a88, 0x2f80, 0xb3e0,
561 	0x3000, 0xb580, 0x1b30, 0x2e20, 0xb380,
562 	0x3000, 0xb5c0, 0x1bd8, 0x2cc0, 0xb320,
563 	0x3020, 0xb5e0, 0x1c60, 0x2b80, 0xb2c0,
564 	0x3020, 0xb5e0, 0x1cf8, 0x2a20, 0xb260,
565 	0x3020, 0xb5e0, 0x1d80, 0x28e0, 0xb200,
566 	0x3020, 0xb5c0, 0x1e08, 0x3f40, 0xb1c0,
567 	0x3020, 0xb580, 0x1e78, 0x3ce0, 0xb160,
568 	0x3040, 0xb520, 0x1ed8, 0x3aa0, 0xb120,
569 	0x3040, 0xb4a0, 0x1f30, 0x3880, 0xb0e0,
570 	0x3040, 0xb400, 0x1f78, 0x3680, 0xb0a0,
571 	0x3020, 0xb340, 0x1fb8, 0x34a0, 0xb060,
572 	0x3020, 0xb240, 0x1fe0, 0x32e0, 0xb040,
573 	0x3020, 0xb140, 0x1ff8, 0x3160, 0xb020,
574 	0xb000, 0x3000, 0x0800, 0x3000, 0xb000
575 };
576 
577 static const u16 uv_static_hcoeffs[N_HORIZ_UV_TAPS * N_PHASES] = {
578 	0x3000, 0x1800, 0x1800, 0xb000, 0x18d0, 0x2e60,
579 	0xb000, 0x1990, 0x2ce0, 0xb020, 0x1a68, 0x2b40,
580 	0xb040, 0x1b20, 0x29e0, 0xb060, 0x1bd8, 0x2880,
581 	0xb080, 0x1c88, 0x3e60, 0xb0a0, 0x1d28, 0x3c00,
582 	0xb0c0, 0x1db8, 0x39e0, 0xb0e0, 0x1e40, 0x37e0,
583 	0xb100, 0x1eb8, 0x3620, 0xb100, 0x1f18, 0x34a0,
584 	0xb100, 0x1f68, 0x3360, 0xb0e0, 0x1fa8, 0x3240,
585 	0xb0c0, 0x1fe0, 0x3140, 0xb060, 0x1ff0, 0x30a0,
586 	0x3000, 0x0800, 0x3000
587 };
588 
589 static void update_polyphase_filter(struct overlay_registers __iomem *regs)
590 {
591 	memcpy_toio(regs->Y_HCOEFS, y_static_hcoeffs, sizeof(y_static_hcoeffs));
592 	memcpy_toio(regs->UV_HCOEFS, uv_static_hcoeffs,
593 		    sizeof(uv_static_hcoeffs));
594 }
595 
596 static bool update_scaling_factors(struct intel_overlay *overlay,
597 				   struct overlay_registers __iomem *regs,
598 				   struct put_image_params *params)
599 {
600 	/* fixed point with a 12 bit shift */
601 	u32 xscale, yscale, xscale_UV, yscale_UV;
602 #define FP_SHIFT 12
603 #define FRACT_MASK 0xfff
604 	bool scale_changed = false;
605 	int uv_hscale = uv_hsubsampling(params->format);
606 	int uv_vscale = uv_vsubsampling(params->format);
607 
608 	if (params->dst_w > 1)
609 		xscale = ((params->src_scan_w - 1) << FP_SHIFT)
610 			/(params->dst_w);
611 	else
612 		xscale = 1 << FP_SHIFT;
613 
614 	if (params->dst_h > 1)
615 		yscale = ((params->src_scan_h - 1) << FP_SHIFT)
616 			/(params->dst_h);
617 	else
618 		yscale = 1 << FP_SHIFT;
619 
620 	/*if (params->format & I915_OVERLAY_YUV_PLANAR) {*/
621 	xscale_UV = xscale/uv_hscale;
622 	yscale_UV = yscale/uv_vscale;
623 	/* make the Y scale to UV scale ratio an exact multiply */
624 	xscale = xscale_UV * uv_hscale;
625 	yscale = yscale_UV * uv_vscale;
626 	/*} else {
627 	  xscale_UV = 0;
628 	  yscale_UV = 0;
629 	  }*/
630 
631 	if (xscale != overlay->old_xscale || yscale != overlay->old_yscale)
632 		scale_changed = true;
633 	overlay->old_xscale = xscale;
634 	overlay->old_yscale = yscale;
635 
636 	iowrite32(((yscale & FRACT_MASK) << 20) |
637 		  ((xscale >> FP_SHIFT)  << 16) |
638 		  ((xscale & FRACT_MASK) << 3),
639 		 &regs->YRGBSCALE);
640 
641 	iowrite32(((yscale_UV & FRACT_MASK) << 20) |
642 		  ((xscale_UV >> FP_SHIFT)  << 16) |
643 		  ((xscale_UV & FRACT_MASK) << 3),
644 		 &regs->UVSCALE);
645 
646 	iowrite32((((yscale    >> FP_SHIFT) << 16) |
647 		   ((yscale_UV >> FP_SHIFT) << 0)),
648 		 &regs->UVSCALEV);
649 
650 	if (scale_changed)
651 		update_polyphase_filter(regs);
652 
653 	return scale_changed;
654 }
655 
656 static void update_colorkey(struct intel_overlay *overlay,
657 			    struct overlay_registers __iomem *regs)
658 {
659 	u32 key = overlay->color_key;
660 	u32 flags;
661 
662 	flags = 0;
663 	if (overlay->color_key_enabled)
664 		flags |= DST_KEY_ENABLE;
665 
666 	switch (overlay->crtc->base.primary->fb->bits_per_pixel) {
667 	case 8:
668 		key = 0;
669 		flags |= CLK_RGB8I_MASK;
670 		break;
671 
672 	case 16:
673 		if (overlay->crtc->base.primary->fb->depth == 15) {
674 			key = RGB15_TO_COLORKEY(key);
675 			flags |= CLK_RGB15_MASK;
676 		} else {
677 			key = RGB16_TO_COLORKEY(key);
678 			flags |= CLK_RGB16_MASK;
679 		}
680 		break;
681 
682 	case 24:
683 	case 32:
684 		flags |= CLK_RGB24_MASK;
685 		break;
686 	}
687 
688 	iowrite32(key, &regs->DCLRKV);
689 	iowrite32(flags, &regs->DCLRKM);
690 }
691 
692 static u32 overlay_cmd_reg(struct put_image_params *params)
693 {
694 	u32 cmd = OCMD_ENABLE | OCMD_BUF_TYPE_FRAME | OCMD_BUFFER0;
695 
696 	if (params->format & I915_OVERLAY_YUV_PLANAR) {
697 		switch (params->format & I915_OVERLAY_DEPTH_MASK) {
698 		case I915_OVERLAY_YUV422:
699 			cmd |= OCMD_YUV_422_PLANAR;
700 			break;
701 		case I915_OVERLAY_YUV420:
702 			cmd |= OCMD_YUV_420_PLANAR;
703 			break;
704 		case I915_OVERLAY_YUV411:
705 		case I915_OVERLAY_YUV410:
706 			cmd |= OCMD_YUV_410_PLANAR;
707 			break;
708 		}
709 	} else { /* YUV packed */
710 		switch (params->format & I915_OVERLAY_DEPTH_MASK) {
711 		case I915_OVERLAY_YUV422:
712 			cmd |= OCMD_YUV_422_PACKED;
713 			break;
714 		case I915_OVERLAY_YUV411:
715 			cmd |= OCMD_YUV_411_PACKED;
716 			break;
717 		}
718 
719 		switch (params->format & I915_OVERLAY_SWAP_MASK) {
720 		case I915_OVERLAY_NO_SWAP:
721 			break;
722 		case I915_OVERLAY_UV_SWAP:
723 			cmd |= OCMD_UV_SWAP;
724 			break;
725 		case I915_OVERLAY_Y_SWAP:
726 			cmd |= OCMD_Y_SWAP;
727 			break;
728 		case I915_OVERLAY_Y_AND_UV_SWAP:
729 			cmd |= OCMD_Y_AND_UV_SWAP;
730 			break;
731 		}
732 	}
733 
734 	return cmd;
735 }
736 
737 static int intel_overlay_do_put_image(struct intel_overlay *overlay,
738 				      struct drm_i915_gem_object *new_bo,
739 				      struct put_image_params *params)
740 {
741 	int ret, tmp_width;
742 	struct overlay_registers __iomem *regs;
743 	bool scale_changed = false;
744 	struct drm_i915_private *dev_priv = overlay->i915;
745 	u32 swidth, swidthsw, sheight, ostride;
746 	enum i915_pipe pipe = overlay->crtc->pipe;
747 
748 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
749 	WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex));
750 
751 	ret = intel_overlay_release_old_vid(overlay);
752 	if (ret != 0)
753 		return ret;
754 
755 	ret = i915_gem_object_pin_to_display_plane(new_bo, 0,
756 						   &i915_ggtt_view_normal);
757 	if (ret != 0)
758 		return ret;
759 
760 	ret = i915_gem_object_put_fence(new_bo);
761 	if (ret)
762 		goto out_unpin;
763 
764 	if (!overlay->active) {
765 		u32 oconfig;
766 		regs = intel_overlay_map_regs(overlay);
767 		if (!regs) {
768 			ret = -ENOMEM;
769 			goto out_unpin;
770 		}
771 		oconfig = OCONF_CC_OUT_8BIT;
772 		if (IS_GEN4(dev_priv))
773 			oconfig |= OCONF_CSC_MODE_BT709;
774 		oconfig |= pipe == 0 ?
775 			OCONF_PIPE_A : OCONF_PIPE_B;
776 		iowrite32(oconfig, &regs->OCONFIG);
777 		intel_overlay_unmap_regs(overlay, regs);
778 
779 		ret = intel_overlay_on(overlay);
780 		if (ret != 0)
781 			goto out_unpin;
782 	}
783 
784 	regs = intel_overlay_map_regs(overlay);
785 	if (!regs) {
786 		ret = -ENOMEM;
787 		goto out_unpin;
788 	}
789 
790 	iowrite32((params->dst_y << 16) | params->dst_x, &regs->DWINPOS);
791 	iowrite32((params->dst_h << 16) | params->dst_w, &regs->DWINSZ);
792 
793 	if (params->format & I915_OVERLAY_YUV_PACKED)
794 		tmp_width = packed_width_bytes(params->format, params->src_w);
795 	else
796 		tmp_width = params->src_w;
797 
798 	swidth = params->src_w;
799 	swidthsw = calc_swidthsw(dev_priv, params->offset_Y, tmp_width);
800 	sheight = params->src_h;
801 	iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_Y, &regs->OBUF_0Y);
802 	ostride = params->stride_Y;
803 
804 	if (params->format & I915_OVERLAY_YUV_PLANAR) {
805 		int uv_hscale = uv_hsubsampling(params->format);
806 		int uv_vscale = uv_vsubsampling(params->format);
807 		u32 tmp_U, tmp_V;
808 		swidth |= (params->src_w/uv_hscale) << 16;
809 		tmp_U = calc_swidthsw(dev_priv, params->offset_U,
810 				      params->src_w/uv_hscale);
811 		tmp_V = calc_swidthsw(dev_priv, params->offset_V,
812 				      params->src_w/uv_hscale);
813 		swidthsw |= max_t(u32, tmp_U, tmp_V) << 16;
814 		sheight |= (params->src_h/uv_vscale) << 16;
815 		iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_U, &regs->OBUF_0U);
816 		iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_V, &regs->OBUF_0V);
817 		ostride |= params->stride_UV << 16;
818 	}
819 
820 	iowrite32(swidth, &regs->SWIDTH);
821 	iowrite32(swidthsw, &regs->SWIDTHSW);
822 	iowrite32(sheight, &regs->SHEIGHT);
823 	iowrite32(ostride, &regs->OSTRIDE);
824 
825 	scale_changed = update_scaling_factors(overlay, regs, params);
826 
827 	update_colorkey(overlay, regs);
828 
829 	iowrite32(overlay_cmd_reg(params), &regs->OCMD);
830 
831 	intel_overlay_unmap_regs(overlay, regs);
832 
833 	ret = intel_overlay_continue(overlay, scale_changed);
834 	if (ret)
835 		goto out_unpin;
836 
837 	i915_gem_track_fb(overlay->vid_bo, new_bo,
838 			  INTEL_FRONTBUFFER_OVERLAY(pipe));
839 
840 	overlay->old_vid_bo = overlay->vid_bo;
841 	overlay->vid_bo = new_bo;
842 
843 	intel_frontbuffer_flip(dev_priv, INTEL_FRONTBUFFER_OVERLAY(pipe));
844 
845 	return 0;
846 
847 out_unpin:
848 	i915_gem_object_ggtt_unpin(new_bo);
849 	return ret;
850 }
851 
852 int intel_overlay_switch_off(struct intel_overlay *overlay)
853 {
854 	struct drm_i915_private *dev_priv = overlay->i915;
855 	struct overlay_registers __iomem *regs;
856 	int ret;
857 
858 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
859 	WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex));
860 
861 	ret = intel_overlay_recover_from_interrupt(overlay);
862 	if (ret != 0)
863 		return ret;
864 
865 	if (!overlay->active)
866 		return 0;
867 
868 	ret = intel_overlay_release_old_vid(overlay);
869 	if (ret != 0)
870 		return ret;
871 
872 	regs = intel_overlay_map_regs(overlay);
873 	iowrite32(0, &regs->OCMD);
874 	intel_overlay_unmap_regs(overlay, regs);
875 
876 	return intel_overlay_off(overlay);
877 }
878 
879 static int check_overlay_possible_on_crtc(struct intel_overlay *overlay,
880 					  struct intel_crtc *crtc)
881 {
882 	if (!crtc->active)
883 		return -EINVAL;
884 
885 	/* can't use the overlay with double wide pipe */
886 	if (crtc->config->double_wide)
887 		return -EINVAL;
888 
889 	return 0;
890 }
891 
892 static void update_pfit_vscale_ratio(struct intel_overlay *overlay)
893 {
894 	struct drm_i915_private *dev_priv = overlay->i915;
895 	u32 pfit_control = I915_READ(PFIT_CONTROL);
896 	u32 ratio;
897 
898 	/* XXX: This is not the same logic as in the xorg driver, but more in
899 	 * line with the intel documentation for the i965
900 	 */
901 	if (INTEL_GEN(dev_priv) >= 4) {
902 		/* on i965 use the PGM reg to read out the autoscaler values */
903 		ratio = I915_READ(PFIT_PGM_RATIOS) >> PFIT_VERT_SCALE_SHIFT_965;
904 	} else {
905 		if (pfit_control & VERT_AUTO_SCALE)
906 			ratio = I915_READ(PFIT_AUTO_RATIOS);
907 		else
908 			ratio = I915_READ(PFIT_PGM_RATIOS);
909 		ratio >>= PFIT_VERT_SCALE_SHIFT;
910 	}
911 
912 	overlay->pfit_vscale_ratio = ratio;
913 }
914 
915 static int check_overlay_dst(struct intel_overlay *overlay,
916 			     struct drm_intel_overlay_put_image *rec)
917 {
918 	struct drm_display_mode *mode = &overlay->crtc->base.mode;
919 
920 	if (rec->dst_x < mode->hdisplay &&
921 	    rec->dst_x + rec->dst_width <= mode->hdisplay &&
922 	    rec->dst_y < mode->vdisplay &&
923 	    rec->dst_y + rec->dst_height <= mode->vdisplay)
924 		return 0;
925 	else
926 		return -EINVAL;
927 }
928 
929 static int check_overlay_scaling(struct put_image_params *rec)
930 {
931 	u32 tmp;
932 
933 	/* downscaling limit is 8.0 */
934 	tmp = ((rec->src_scan_h << 16) / rec->dst_h) >> 16;
935 	if (tmp > 7)
936 		return -EINVAL;
937 	tmp = ((rec->src_scan_w << 16) / rec->dst_w) >> 16;
938 	if (tmp > 7)
939 		return -EINVAL;
940 
941 	return 0;
942 }
943 
944 static int check_overlay_src(struct drm_i915_private *dev_priv,
945 			     struct drm_intel_overlay_put_image *rec,
946 			     struct drm_i915_gem_object *new_bo)
947 {
948 	int uv_hscale = uv_hsubsampling(rec->flags);
949 	int uv_vscale = uv_vsubsampling(rec->flags);
950 	u32 stride_mask;
951 	int depth;
952 	u32 tmp;
953 
954 	/* check src dimensions */
955 	if (IS_845G(dev_priv) || IS_I830(dev_priv)) {
956 		if (rec->src_height > IMAGE_MAX_HEIGHT_LEGACY ||
957 		    rec->src_width  > IMAGE_MAX_WIDTH_LEGACY)
958 			return -EINVAL;
959 	} else {
960 		if (rec->src_height > IMAGE_MAX_HEIGHT ||
961 		    rec->src_width  > IMAGE_MAX_WIDTH)
962 			return -EINVAL;
963 	}
964 
965 	/* better safe than sorry, use 4 as the maximal subsampling ratio */
966 	if (rec->src_height < N_VERT_Y_TAPS*4 ||
967 	    rec->src_width  < N_HORIZ_Y_TAPS*4)
968 		return -EINVAL;
969 
970 	/* check alignment constraints */
971 	switch (rec->flags & I915_OVERLAY_TYPE_MASK) {
972 	case I915_OVERLAY_RGB:
973 		/* not implemented */
974 		return -EINVAL;
975 
976 	case I915_OVERLAY_YUV_PACKED:
977 		if (uv_vscale != 1)
978 			return -EINVAL;
979 
980 		depth = packed_depth_bytes(rec->flags);
981 		if (depth < 0)
982 			return depth;
983 
984 		/* ignore UV planes */
985 		rec->stride_UV = 0;
986 		rec->offset_U = 0;
987 		rec->offset_V = 0;
988 		/* check pixel alignment */
989 		if (rec->offset_Y % depth)
990 			return -EINVAL;
991 		break;
992 
993 	case I915_OVERLAY_YUV_PLANAR:
994 		if (uv_vscale < 0 || uv_hscale < 0)
995 			return -EINVAL;
996 		/* no offset restrictions for planar formats */
997 		break;
998 
999 	default:
1000 		return -EINVAL;
1001 	}
1002 
1003 	if (rec->src_width % uv_hscale)
1004 		return -EINVAL;
1005 
1006 	/* stride checking */
1007 	if (IS_I830(dev_priv) || IS_845G(dev_priv))
1008 		stride_mask = 255;
1009 	else
1010 		stride_mask = 63;
1011 
1012 	if (rec->stride_Y & stride_mask || rec->stride_UV & stride_mask)
1013 		return -EINVAL;
1014 	if (IS_GEN4(dev_priv) && rec->stride_Y < 512)
1015 		return -EINVAL;
1016 
1017 	tmp = (rec->flags & I915_OVERLAY_TYPE_MASK) == I915_OVERLAY_YUV_PLANAR ?
1018 		4096 : 8192;
1019 	if (rec->stride_Y > tmp || rec->stride_UV > 2*1024)
1020 		return -EINVAL;
1021 
1022 	/* check buffer dimensions */
1023 	switch (rec->flags & I915_OVERLAY_TYPE_MASK) {
1024 	case I915_OVERLAY_RGB:
1025 	case I915_OVERLAY_YUV_PACKED:
1026 		/* always 4 Y values per depth pixels */
1027 		if (packed_width_bytes(rec->flags, rec->src_width) > rec->stride_Y)
1028 			return -EINVAL;
1029 
1030 		tmp = rec->stride_Y*rec->src_height;
1031 		if (rec->offset_Y + tmp > new_bo->base.size)
1032 			return -EINVAL;
1033 		break;
1034 
1035 	case I915_OVERLAY_YUV_PLANAR:
1036 		if (rec->src_width > rec->stride_Y)
1037 			return -EINVAL;
1038 		if (rec->src_width/uv_hscale > rec->stride_UV)
1039 			return -EINVAL;
1040 
1041 		tmp = rec->stride_Y * rec->src_height;
1042 		if (rec->offset_Y + tmp > new_bo->base.size)
1043 			return -EINVAL;
1044 
1045 		tmp = rec->stride_UV * (rec->src_height / uv_vscale);
1046 		if (rec->offset_U + tmp > new_bo->base.size ||
1047 		    rec->offset_V + tmp > new_bo->base.size)
1048 			return -EINVAL;
1049 		break;
1050 	}
1051 
1052 	return 0;
1053 }
1054 
1055 /**
1056  * Return the pipe currently connected to the panel fitter,
1057  * or -1 if the panel fitter is not present or not in use
1058  */
1059 static int intel_panel_fitter_pipe(struct drm_i915_private *dev_priv)
1060 {
1061 	u32  pfit_control;
1062 
1063 	/* i830 doesn't have a panel fitter */
1064 	if (INTEL_GEN(dev_priv) <= 3 &&
1065 	    (IS_I830(dev_priv) || !IS_MOBILE(dev_priv)))
1066 		return -1;
1067 
1068 	pfit_control = I915_READ(PFIT_CONTROL);
1069 
1070 	/* See if the panel fitter is in use */
1071 	if ((pfit_control & PFIT_ENABLE) == 0)
1072 		return -1;
1073 
1074 	/* 965 can place panel fitter on either pipe */
1075 	if (IS_GEN4(dev_priv))
1076 		return (pfit_control >> 29) & 0x3;
1077 
1078 	/* older chips can only use pipe 1 */
1079 	return 1;
1080 }
1081 
1082 int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data,
1083 				  struct drm_file *file_priv)
1084 {
1085 	struct drm_intel_overlay_put_image *put_image_rec = data;
1086 	struct drm_i915_private *dev_priv = to_i915(dev);
1087 	struct intel_overlay *overlay;
1088 	struct drm_crtc *drmmode_crtc;
1089 	struct intel_crtc *crtc;
1090 	struct drm_i915_gem_object *new_bo;
1091 	struct put_image_params *params;
1092 	int ret;
1093 
1094 	overlay = dev_priv->overlay;
1095 	if (!overlay) {
1096 		DRM_DEBUG("userspace bug: no overlay\n");
1097 		return -ENODEV;
1098 	}
1099 
1100 	if (!(put_image_rec->flags & I915_OVERLAY_ENABLE)) {
1101 		drm_modeset_lock_all(dev);
1102 		mutex_lock(&dev->struct_mutex);
1103 
1104 		ret = intel_overlay_switch_off(overlay);
1105 
1106 		mutex_unlock(&dev->struct_mutex);
1107 		drm_modeset_unlock_all(dev);
1108 
1109 		return ret;
1110 	}
1111 
1112 	params = kmalloc(sizeof(*params), M_DRM, GFP_KERNEL);
1113 	if (!params)
1114 		return -ENOMEM;
1115 
1116 	drmmode_crtc = drm_crtc_find(dev, put_image_rec->crtc_id);
1117 	if (!drmmode_crtc) {
1118 		ret = -ENOENT;
1119 		goto out_free;
1120 	}
1121 	crtc = to_intel_crtc(drmmode_crtc);
1122 
1123 	new_bo = i915_gem_object_lookup(file_priv, put_image_rec->bo_handle);
1124 	if (!new_bo) {
1125 		ret = -ENOENT;
1126 		goto out_free;
1127 	}
1128 
1129 	drm_modeset_lock_all(dev);
1130 	mutex_lock(&dev->struct_mutex);
1131 
1132 	if (i915_gem_object_is_tiled(new_bo)) {
1133 		DRM_DEBUG_KMS("buffer used for overlay image can not be tiled\n");
1134 		ret = -EINVAL;
1135 		goto out_unlock;
1136 	}
1137 
1138 	ret = intel_overlay_recover_from_interrupt(overlay);
1139 	if (ret != 0)
1140 		goto out_unlock;
1141 
1142 	if (overlay->crtc != crtc) {
1143 		struct drm_display_mode *mode = &crtc->base.mode;
1144 		ret = intel_overlay_switch_off(overlay);
1145 		if (ret != 0)
1146 			goto out_unlock;
1147 
1148 		ret = check_overlay_possible_on_crtc(overlay, crtc);
1149 		if (ret != 0)
1150 			goto out_unlock;
1151 
1152 		overlay->crtc = crtc;
1153 		crtc->overlay = overlay;
1154 
1155 		/* line too wide, i.e. one-line-mode */
1156 		if (mode->hdisplay > 1024 &&
1157 		    intel_panel_fitter_pipe(dev_priv) == crtc->pipe) {
1158 			overlay->pfit_active = true;
1159 			update_pfit_vscale_ratio(overlay);
1160 		} else
1161 			overlay->pfit_active = false;
1162 	}
1163 
1164 	ret = check_overlay_dst(overlay, put_image_rec);
1165 	if (ret != 0)
1166 		goto out_unlock;
1167 
1168 	if (overlay->pfit_active) {
1169 		params->dst_y = ((((u32)put_image_rec->dst_y) << 12) /
1170 				 overlay->pfit_vscale_ratio);
1171 		/* shifting right rounds downwards, so add 1 */
1172 		params->dst_h = ((((u32)put_image_rec->dst_height) << 12) /
1173 				 overlay->pfit_vscale_ratio) + 1;
1174 	} else {
1175 		params->dst_y = put_image_rec->dst_y;
1176 		params->dst_h = put_image_rec->dst_height;
1177 	}
1178 	params->dst_x = put_image_rec->dst_x;
1179 	params->dst_w = put_image_rec->dst_width;
1180 
1181 	params->src_w = put_image_rec->src_width;
1182 	params->src_h = put_image_rec->src_height;
1183 	params->src_scan_w = put_image_rec->src_scan_width;
1184 	params->src_scan_h = put_image_rec->src_scan_height;
1185 	if (params->src_scan_h > params->src_h ||
1186 	    params->src_scan_w > params->src_w) {
1187 		ret = -EINVAL;
1188 		goto out_unlock;
1189 	}
1190 
1191 	ret = check_overlay_src(dev_priv, put_image_rec, new_bo);
1192 	if (ret != 0)
1193 		goto out_unlock;
1194 	params->format = put_image_rec->flags & ~I915_OVERLAY_FLAGS_MASK;
1195 	params->stride_Y = put_image_rec->stride_Y;
1196 	params->stride_UV = put_image_rec->stride_UV;
1197 	params->offset_Y = put_image_rec->offset_Y;
1198 	params->offset_U = put_image_rec->offset_U;
1199 	params->offset_V = put_image_rec->offset_V;
1200 
1201 	/* Check scaling after src size to prevent a divide-by-zero. */
1202 	ret = check_overlay_scaling(params);
1203 	if (ret != 0)
1204 		goto out_unlock;
1205 
1206 	ret = intel_overlay_do_put_image(overlay, new_bo, params);
1207 	if (ret != 0)
1208 		goto out_unlock;
1209 
1210 	mutex_unlock(&dev->struct_mutex);
1211 	drm_modeset_unlock_all(dev);
1212 
1213 	kfree(params);
1214 
1215 	return 0;
1216 
1217 out_unlock:
1218 	mutex_unlock(&dev->struct_mutex);
1219 	drm_modeset_unlock_all(dev);
1220 	i915_gem_object_put_unlocked(new_bo);
1221 out_free:
1222 	kfree(params);
1223 
1224 	return ret;
1225 }
1226 
1227 static void update_reg_attrs(struct intel_overlay *overlay,
1228 			     struct overlay_registers __iomem *regs)
1229 {
1230 	iowrite32((overlay->contrast << 18) | (overlay->brightness & 0xff),
1231 		  &regs->OCLRC0);
1232 	iowrite32(overlay->saturation, &regs->OCLRC1);
1233 }
1234 
1235 static bool check_gamma_bounds(u32 gamma1, u32 gamma2)
1236 {
1237 	int i;
1238 
1239 	if (gamma1 & 0xff000000 || gamma2 & 0xff000000)
1240 		return false;
1241 
1242 	for (i = 0; i < 3; i++) {
1243 		if (((gamma1 >> i*8) & 0xff) >= ((gamma2 >> i*8) & 0xff))
1244 			return false;
1245 	}
1246 
1247 	return true;
1248 }
1249 
1250 static bool check_gamma5_errata(u32 gamma5)
1251 {
1252 	int i;
1253 
1254 	for (i = 0; i < 3; i++) {
1255 		if (((gamma5 >> i*8) & 0xff) == 0x80)
1256 			return false;
1257 	}
1258 
1259 	return true;
1260 }
1261 
1262 static int check_gamma(struct drm_intel_overlay_attrs *attrs)
1263 {
1264 	if (!check_gamma_bounds(0, attrs->gamma0) ||
1265 	    !check_gamma_bounds(attrs->gamma0, attrs->gamma1) ||
1266 	    !check_gamma_bounds(attrs->gamma1, attrs->gamma2) ||
1267 	    !check_gamma_bounds(attrs->gamma2, attrs->gamma3) ||
1268 	    !check_gamma_bounds(attrs->gamma3, attrs->gamma4) ||
1269 	    !check_gamma_bounds(attrs->gamma4, attrs->gamma5) ||
1270 	    !check_gamma_bounds(attrs->gamma5, 0x00ffffff))
1271 		return -EINVAL;
1272 
1273 	if (!check_gamma5_errata(attrs->gamma5))
1274 		return -EINVAL;
1275 
1276 	return 0;
1277 }
1278 
1279 int intel_overlay_attrs_ioctl(struct drm_device *dev, void *data,
1280 			      struct drm_file *file_priv)
1281 {
1282 	struct drm_intel_overlay_attrs *attrs = data;
1283 	struct drm_i915_private *dev_priv = to_i915(dev);
1284 	struct intel_overlay *overlay;
1285 	struct overlay_registers __iomem *regs;
1286 	int ret;
1287 
1288 	overlay = dev_priv->overlay;
1289 	if (!overlay) {
1290 		DRM_DEBUG("userspace bug: no overlay\n");
1291 		return -ENODEV;
1292 	}
1293 
1294 	drm_modeset_lock_all(dev);
1295 	mutex_lock(&dev->struct_mutex);
1296 
1297 	ret = -EINVAL;
1298 	if (!(attrs->flags & I915_OVERLAY_UPDATE_ATTRS)) {
1299 		attrs->color_key  = overlay->color_key;
1300 		attrs->brightness = overlay->brightness;
1301 		attrs->contrast   = overlay->contrast;
1302 		attrs->saturation = overlay->saturation;
1303 
1304 		if (!IS_GEN2(dev_priv)) {
1305 			attrs->gamma0 = I915_READ(OGAMC0);
1306 			attrs->gamma1 = I915_READ(OGAMC1);
1307 			attrs->gamma2 = I915_READ(OGAMC2);
1308 			attrs->gamma3 = I915_READ(OGAMC3);
1309 			attrs->gamma4 = I915_READ(OGAMC4);
1310 			attrs->gamma5 = I915_READ(OGAMC5);
1311 		}
1312 	} else {
1313 		if (attrs->brightness < -128 || attrs->brightness > 127)
1314 			goto out_unlock;
1315 		if (attrs->contrast > 255)
1316 			goto out_unlock;
1317 		if (attrs->saturation > 1023)
1318 			goto out_unlock;
1319 
1320 		overlay->color_key  = attrs->color_key;
1321 		overlay->brightness = attrs->brightness;
1322 		overlay->contrast   = attrs->contrast;
1323 		overlay->saturation = attrs->saturation;
1324 
1325 		regs = intel_overlay_map_regs(overlay);
1326 		if (!regs) {
1327 			ret = -ENOMEM;
1328 			goto out_unlock;
1329 		}
1330 
1331 		update_reg_attrs(overlay, regs);
1332 
1333 		intel_overlay_unmap_regs(overlay, regs);
1334 
1335 		if (attrs->flags & I915_OVERLAY_UPDATE_GAMMA) {
1336 			if (IS_GEN2(dev_priv))
1337 				goto out_unlock;
1338 
1339 			if (overlay->active) {
1340 				ret = -EBUSY;
1341 				goto out_unlock;
1342 			}
1343 
1344 			ret = check_gamma(attrs);
1345 			if (ret)
1346 				goto out_unlock;
1347 
1348 			I915_WRITE(OGAMC0, attrs->gamma0);
1349 			I915_WRITE(OGAMC1, attrs->gamma1);
1350 			I915_WRITE(OGAMC2, attrs->gamma2);
1351 			I915_WRITE(OGAMC3, attrs->gamma3);
1352 			I915_WRITE(OGAMC4, attrs->gamma4);
1353 			I915_WRITE(OGAMC5, attrs->gamma5);
1354 		}
1355 	}
1356 	overlay->color_key_enabled = (attrs->flags & I915_OVERLAY_DISABLE_DEST_COLORKEY) == 0;
1357 
1358 	ret = 0;
1359 out_unlock:
1360 	mutex_unlock(&dev->struct_mutex);
1361 	drm_modeset_unlock_all(dev);
1362 
1363 	return ret;
1364 }
1365 
1366 void intel_setup_overlay(struct drm_i915_private *dev_priv)
1367 {
1368 	struct intel_overlay *overlay;
1369 	struct drm_i915_gem_object *reg_bo;
1370 	struct overlay_registers __iomem *regs;
1371 	int ret;
1372 
1373 	if (!HAS_OVERLAY(dev_priv))
1374 		return;
1375 
1376 	overlay = kzalloc(sizeof(*overlay), GFP_KERNEL);
1377 	if (!overlay)
1378 		return;
1379 
1380 	mutex_lock(&dev_priv->drm.struct_mutex);
1381 	if (WARN_ON(dev_priv->overlay))
1382 		goto out_free;
1383 
1384 	overlay->i915 = dev_priv;
1385 
1386 	reg_bo = NULL;
1387 	if (!OVERLAY_NEEDS_PHYSICAL(dev_priv))
1388 		reg_bo = i915_gem_object_create_stolen(&dev_priv->drm,
1389 						       PAGE_SIZE);
1390 	if (reg_bo == NULL)
1391 		reg_bo = i915_gem_object_create(&dev_priv->drm, PAGE_SIZE);
1392 	if (IS_ERR(reg_bo))
1393 		goto out_free;
1394 	overlay->reg_bo = reg_bo;
1395 
1396 	if (OVERLAY_NEEDS_PHYSICAL(dev_priv)) {
1397 		ret = i915_gem_object_attach_phys(reg_bo, PAGE_SIZE);
1398 		if (ret) {
1399 			DRM_ERROR("failed to attach phys overlay regs\n");
1400 			goto out_free_bo;
1401 		}
1402 		overlay->flip_addr = reg_bo->phys_handle->busaddr;
1403 	} else {
1404 		ret = i915_gem_object_ggtt_pin(reg_bo, NULL,
1405 					       0, PAGE_SIZE, PIN_MAPPABLE);
1406 		if (ret) {
1407 			DRM_ERROR("failed to pin overlay register bo\n");
1408 			goto out_free_bo;
1409 		}
1410 		overlay->flip_addr = i915_gem_obj_ggtt_offset(reg_bo);
1411 
1412 		ret = i915_gem_object_set_to_gtt_domain(reg_bo, true);
1413 		if (ret) {
1414 			DRM_ERROR("failed to move overlay register bo into the GTT\n");
1415 			goto out_unpin_bo;
1416 		}
1417 	}
1418 
1419 	/* init all values */
1420 	overlay->color_key = 0x0101fe;
1421 	overlay->color_key_enabled = true;
1422 	overlay->brightness = -19;
1423 	overlay->contrast = 75;
1424 	overlay->saturation = 146;
1425 
1426 	regs = intel_overlay_map_regs(overlay);
1427 	if (!regs)
1428 		goto out_unpin_bo;
1429 
1430 	memset_io(regs, 0, sizeof(struct overlay_registers));
1431 	update_polyphase_filter(regs);
1432 	update_reg_attrs(overlay, regs);
1433 
1434 	intel_overlay_unmap_regs(overlay, regs);
1435 
1436 	dev_priv->overlay = overlay;
1437 	mutex_unlock(&dev_priv->drm.struct_mutex);
1438 	DRM_INFO("initialized overlay support\n");
1439 	return;
1440 
1441 out_unpin_bo:
1442 	if (!OVERLAY_NEEDS_PHYSICAL(dev_priv))
1443 		i915_gem_object_ggtt_unpin(reg_bo);
1444 out_free_bo:
1445 	i915_gem_object_put(reg_bo);
1446 out_free:
1447 	mutex_unlock(&dev_priv->drm.struct_mutex);
1448 	kfree(overlay);
1449 	return;
1450 }
1451 
1452 void intel_cleanup_overlay(struct drm_i915_private *dev_priv)
1453 {
1454 	if (!dev_priv->overlay)
1455 		return;
1456 
1457 	/* The bo's should be free'd by the generic code already.
1458 	 * Furthermore modesetting teardown happens beforehand so the
1459 	 * hardware should be off already */
1460 	WARN_ON(dev_priv->overlay->active);
1461 
1462 	i915_gem_object_put_unlocked(dev_priv->overlay->reg_bo);
1463 	kfree(dev_priv->overlay);
1464 }
1465 
1466 struct intel_overlay_error_state {
1467 	struct overlay_registers regs;
1468 	unsigned long base;
1469 	u32 dovsta;
1470 	u32 isr;
1471 };
1472 
1473 static struct overlay_registers __iomem *
1474 intel_overlay_map_regs_atomic(struct intel_overlay *overlay)
1475 {
1476 	struct drm_i915_private *dev_priv = overlay->i915;
1477 	struct overlay_registers __iomem *regs;
1478 
1479 	if (OVERLAY_NEEDS_PHYSICAL(dev_priv))
1480 		/* Cast to make sparse happy, but it's wc memory anyway, so
1481 		 * equivalent to the wc io mapping on X86. */
1482 		regs = (struct overlay_registers __iomem *)
1483 			overlay->reg_bo->phys_handle->vaddr;
1484 	else
1485 		regs = io_mapping_map_atomic_wc(dev_priv->ggtt.mappable,
1486 						overlay->flip_addr);
1487 
1488 	return regs;
1489 }
1490 
1491 static void intel_overlay_unmap_regs_atomic(struct intel_overlay *overlay,
1492 					struct overlay_registers __iomem *regs)
1493 {
1494 	if (!OVERLAY_NEEDS_PHYSICAL(overlay->i915))
1495 		io_mapping_unmap_atomic(regs);
1496 }
1497 
1498 struct intel_overlay_error_state *
1499 intel_overlay_capture_error_state(struct drm_i915_private *dev_priv)
1500 {
1501 	struct intel_overlay *overlay = dev_priv->overlay;
1502 	struct intel_overlay_error_state *error;
1503 	struct overlay_registers __iomem *regs;
1504 
1505 	if (!overlay || !overlay->active)
1506 		return NULL;
1507 
1508 	error = kmalloc(sizeof(*error), M_DRM, GFP_ATOMIC);
1509 	if (error == NULL)
1510 		return NULL;
1511 
1512 	error->dovsta = I915_READ(DOVSTA);
1513 	error->isr = I915_READ(ISR);
1514 	error->base = overlay->flip_addr;
1515 
1516 	regs = intel_overlay_map_regs_atomic(overlay);
1517 	if (!regs)
1518 		goto err;
1519 
1520 	memcpy_fromio(&error->regs, regs, sizeof(struct overlay_registers));
1521 	intel_overlay_unmap_regs_atomic(overlay, regs);
1522 
1523 	return error;
1524 
1525 err:
1526 	kfree(error);
1527 	return NULL;
1528 }
1529 
1530 void
1531 intel_overlay_print_error_state(struct drm_i915_error_state_buf *m,
1532 				struct intel_overlay_error_state *error)
1533 {
1534 	i915_error_printf(m, "Overlay, status: 0x%08x, interrupt: 0x%08x\n",
1535 			  error->dovsta, error->isr);
1536 	i915_error_printf(m, "  Register file at 0x%08lx:\n",
1537 			  error->base);
1538 
1539 #define P(x) i915_error_printf(m, "    " #x ":	0x%08x\n", error->regs.x)
1540 	P(OBUF_0Y);
1541 	P(OBUF_1Y);
1542 	P(OBUF_0U);
1543 	P(OBUF_0V);
1544 	P(OBUF_1U);
1545 	P(OBUF_1V);
1546 	P(OSTRIDE);
1547 	P(YRGB_VPH);
1548 	P(UV_VPH);
1549 	P(HORZ_PH);
1550 	P(INIT_PHS);
1551 	P(DWINPOS);
1552 	P(DWINSZ);
1553 	P(SWIDTH);
1554 	P(SWIDTHSW);
1555 	P(SHEIGHT);
1556 	P(YRGBSCALE);
1557 	P(UVSCALE);
1558 	P(OCLRC0);
1559 	P(OCLRC1);
1560 	P(DCLRKV);
1561 	P(DCLRKM);
1562 	P(SCLRKVH);
1563 	P(SCLRKVL);
1564 	P(SCLRKEN);
1565 	P(OCONFIG);
1566 	P(OCMD);
1567 	P(OSTART_0Y);
1568 	P(OSTART_1Y);
1569 	P(OSTART_0U);
1570 	P(OSTART_0V);
1571 	P(OSTART_1U);
1572 	P(OSTART_1V);
1573 	P(OTILEOFF_0Y);
1574 	P(OTILEOFF_1Y);
1575 	P(OTILEOFF_0U);
1576 	P(OTILEOFF_0V);
1577 	P(OTILEOFF_1U);
1578 	P(OTILEOFF_1V);
1579 	P(FASTHSCALE);
1580 	P(UVSCALEV);
1581 #undef P
1582 }
1583