xref: /openbsd-src/sys/dev/pci/drm/radeon/evergreen_cs.c (revision 50b7afb2c2c0993b0894d4e34bf857cb13ed9c80)
1 /*	$OpenBSD: evergreen_cs.c,v 1.3 2014/02/15 14:19:44 jsg Exp $	*/
2 /*
3  * Copyright 2010 Advanced Micro Devices, Inc.
4  * Copyright 2008 Red Hat Inc.
5  * Copyright 2009 Jerome Glisse.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11  * and/or sell copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23  * OTHER DEALINGS IN THE SOFTWARE.
24  *
25  * Authors: Dave Airlie
26  *          Alex Deucher
27  *          Jerome Glisse
28  */
29 #include <dev/pci/drm/drmP.h>
30 #include "radeon.h"
31 #include "evergreend.h"
32 #include "evergreen_reg_safe.h"
33 #include "cayman_reg_safe.h"
34 
35 #define MAX(a,b)                   (((a)>(b))?(a):(b))
36 #define MIN(a,b)                   (((a)<(b))?(a):(b))
37 
38 int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
39 			   struct radeon_cs_reloc **cs_reloc);
40 static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p,
41 					  struct radeon_cs_reloc **cs_reloc);
42 int evergreen_cs_parse(struct radeon_cs_parser *p);
43 int evergreen_dma_cs_parse(struct radeon_cs_parser *p);
44 int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
45 int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
46 
47 struct evergreen_cs_track {
48 	u32			group_size;
49 	u32			nbanks;
50 	u32			npipes;
51 	u32			row_size;
52 	/* value we track */
53 	u32			nsamples;		/* unused */
54 	struct radeon_bo	*cb_color_bo[12];
55 	u32			cb_color_bo_offset[12];
56 	struct radeon_bo	*cb_color_fmask_bo[8];	/* unused */
57 	struct radeon_bo	*cb_color_cmask_bo[8];	/* unused */
58 	u32			cb_color_info[12];
59 	u32			cb_color_view[12];
60 	u32			cb_color_pitch[12];
61 	u32			cb_color_slice[12];
62 	u32			cb_color_slice_idx[12];
63 	u32			cb_color_attrib[12];
64 	u32			cb_color_cmask_slice[8];/* unused */
65 	u32			cb_color_fmask_slice[8];/* unused */
66 	u32			cb_target_mask;
67 	u32			cb_shader_mask; /* unused */
68 	u32			vgt_strmout_config;
69 	u32			vgt_strmout_buffer_config;
70 	struct radeon_bo	*vgt_strmout_bo[4];
71 	u32			vgt_strmout_bo_offset[4];
72 	u32			vgt_strmout_size[4];
73 	u32			db_depth_control;
74 	u32			db_depth_view;
75 	u32			db_depth_slice;
76 	u32			db_depth_size;
77 	u32			db_z_info;
78 	u32			db_z_read_offset;
79 	u32			db_z_write_offset;
80 	struct radeon_bo	*db_z_read_bo;
81 	struct radeon_bo	*db_z_write_bo;
82 	u32			db_s_info;
83 	u32			db_s_read_offset;
84 	u32			db_s_write_offset;
85 	struct radeon_bo	*db_s_read_bo;
86 	struct radeon_bo	*db_s_write_bo;
87 	bool			sx_misc_kill_all_prims;
88 	bool			cb_dirty;
89 	bool			db_dirty;
90 	bool			streamout_dirty;
91 	u32			htile_offset;
92 	u32			htile_surface;
93 	struct radeon_bo	*htile_bo;
94 };
95 
96 static u32 evergreen_cs_get_aray_mode(u32 tiling_flags)
97 {
98 	if (tiling_flags & RADEON_TILING_MACRO)
99 		return ARRAY_2D_TILED_THIN1;
100 	else if (tiling_flags & RADEON_TILING_MICRO)
101 		return ARRAY_1D_TILED_THIN1;
102 	else
103 		return ARRAY_LINEAR_GENERAL;
104 }
105 
106 static u32 evergreen_cs_get_num_banks(u32 nbanks)
107 {
108 	switch (nbanks) {
109 	case 2:
110 		return ADDR_SURF_2_BANK;
111 	case 4:
112 		return ADDR_SURF_4_BANK;
113 	case 8:
114 	default:
115 		return ADDR_SURF_8_BANK;
116 	case 16:
117 		return ADDR_SURF_16_BANK;
118 	}
119 }
120 
121 static void evergreen_cs_track_init(struct evergreen_cs_track *track)
122 {
123 	int i;
124 
125 	for (i = 0; i < 8; i++) {
126 		track->cb_color_fmask_bo[i] = NULL;
127 		track->cb_color_cmask_bo[i] = NULL;
128 		track->cb_color_cmask_slice[i] = 0;
129 		track->cb_color_fmask_slice[i] = 0;
130 	}
131 
132 	for (i = 0; i < 12; i++) {
133 		track->cb_color_bo[i] = NULL;
134 		track->cb_color_bo_offset[i] = 0xFFFFFFFF;
135 		track->cb_color_info[i] = 0;
136 		track->cb_color_view[i] = 0xFFFFFFFF;
137 		track->cb_color_pitch[i] = 0;
138 		track->cb_color_slice[i] = 0xfffffff;
139 		track->cb_color_slice_idx[i] = 0;
140 	}
141 	track->cb_target_mask = 0xFFFFFFFF;
142 	track->cb_shader_mask = 0xFFFFFFFF;
143 	track->cb_dirty = true;
144 
145 	track->db_depth_slice = 0xffffffff;
146 	track->db_depth_view = 0xFFFFC000;
147 	track->db_depth_size = 0xFFFFFFFF;
148 	track->db_depth_control = 0xFFFFFFFF;
149 	track->db_z_info = 0xFFFFFFFF;
150 	track->db_z_read_offset = 0xFFFFFFFF;
151 	track->db_z_write_offset = 0xFFFFFFFF;
152 	track->db_z_read_bo = NULL;
153 	track->db_z_write_bo = NULL;
154 	track->db_s_info = 0xFFFFFFFF;
155 	track->db_s_read_offset = 0xFFFFFFFF;
156 	track->db_s_write_offset = 0xFFFFFFFF;
157 	track->db_s_read_bo = NULL;
158 	track->db_s_write_bo = NULL;
159 	track->db_dirty = true;
160 	track->htile_bo = NULL;
161 	track->htile_offset = 0xFFFFFFFF;
162 	track->htile_surface = 0;
163 
164 	for (i = 0; i < 4; i++) {
165 		track->vgt_strmout_size[i] = 0;
166 		track->vgt_strmout_bo[i] = NULL;
167 		track->vgt_strmout_bo_offset[i] = 0xFFFFFFFF;
168 	}
169 	track->streamout_dirty = true;
170 	track->sx_misc_kill_all_prims = false;
171 }
172 
173 struct eg_surface {
174 	/* value gathered from cs */
175 	unsigned	nbx;
176 	unsigned	nby;
177 	unsigned	format;
178 	unsigned	mode;
179 	unsigned	nbanks;
180 	unsigned	bankw;
181 	unsigned	bankh;
182 	unsigned	tsplit;
183 	unsigned	mtilea;
184 	unsigned	nsamples;
185 	/* output value */
186 	unsigned	bpe;
187 	unsigned	layer_size;
188 	unsigned	palign;
189 	unsigned	halign;
190 	unsigned long	base_align;
191 };
192 
193 static int evergreen_surface_check_linear(struct radeon_cs_parser *p,
194 					  struct eg_surface *surf,
195 					  const char *prefix)
196 {
197 	surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples;
198 	surf->base_align = surf->bpe;
199 	surf->palign = 1;
200 	surf->halign = 1;
201 	return 0;
202 }
203 
204 static int evergreen_surface_check_linear_aligned(struct radeon_cs_parser *p,
205 						  struct eg_surface *surf,
206 						  const char *prefix)
207 {
208 	struct evergreen_cs_track *track = p->track;
209 	unsigned palign;
210 
211 	palign = MAX(64, track->group_size / surf->bpe);
212 	surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples;
213 	surf->base_align = track->group_size;
214 	surf->palign = palign;
215 	surf->halign = 1;
216 	if (surf->nbx & (palign - 1)) {
217 		if (prefix) {
218 			dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n",
219 				 __func__, __LINE__, prefix, surf->nbx, palign);
220 		}
221 		return -EINVAL;
222 	}
223 	return 0;
224 }
225 
226 static int evergreen_surface_check_1d(struct radeon_cs_parser *p,
227 				      struct eg_surface *surf,
228 				      const char *prefix)
229 {
230 	struct evergreen_cs_track *track = p->track;
231 	unsigned palign;
232 
233 	palign = track->group_size / (8 * surf->bpe * surf->nsamples);
234 	palign = MAX(8, palign);
235 	surf->layer_size = surf->nbx * surf->nby * surf->bpe;
236 	surf->base_align = track->group_size;
237 	surf->palign = palign;
238 	surf->halign = 8;
239 	if ((surf->nbx & (palign - 1))) {
240 		if (prefix) {
241 			dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d (%d %d %d)\n",
242 				 __func__, __LINE__, prefix, surf->nbx, palign,
243 				 track->group_size, surf->bpe, surf->nsamples);
244 		}
245 		return -EINVAL;
246 	}
247 	if ((surf->nby & (8 - 1))) {
248 		if (prefix) {
249 			dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with 8\n",
250 				 __func__, __LINE__, prefix, surf->nby);
251 		}
252 		return -EINVAL;
253 	}
254 	return 0;
255 }
256 
257 static int evergreen_surface_check_2d(struct radeon_cs_parser *p,
258 				      struct eg_surface *surf,
259 				      const char *prefix)
260 {
261 	struct evergreen_cs_track *track = p->track;
262 	unsigned palign, halign, tileb, slice_pt;
263 	unsigned mtile_pr, mtile_ps, mtileb;
264 
265 	tileb = 64 * surf->bpe * surf->nsamples;
266 	slice_pt = 1;
267 	if (tileb > surf->tsplit) {
268 		slice_pt = tileb / surf->tsplit;
269 	}
270 	tileb = tileb / slice_pt;
271 	/* macro tile width & height */
272 	palign = (8 * surf->bankw * track->npipes) * surf->mtilea;
273 	halign = (8 * surf->bankh * surf->nbanks) / surf->mtilea;
274 	mtileb = (palign / 8) * (halign / 8) * tileb;
275 	mtile_pr = surf->nbx / palign;
276 	mtile_ps = (mtile_pr * surf->nby) / halign;
277 	surf->layer_size = mtile_ps * mtileb * slice_pt;
278 	surf->base_align = (palign / 8) * (halign / 8) * tileb;
279 	surf->palign = palign;
280 	surf->halign = halign;
281 
282 	if ((surf->nbx & (palign - 1))) {
283 		if (prefix) {
284 			dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n",
285 				 __func__, __LINE__, prefix, surf->nbx, palign);
286 		}
287 		return -EINVAL;
288 	}
289 	if ((surf->nby & (halign - 1))) {
290 		if (prefix) {
291 			dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with %d\n",
292 				 __func__, __LINE__, prefix, surf->nby, halign);
293 		}
294 		return -EINVAL;
295 	}
296 
297 	return 0;
298 }
299 
300 static int evergreen_surface_check(struct radeon_cs_parser *p,
301 				   struct eg_surface *surf,
302 				   const char *prefix)
303 {
304 	/* some common value computed here */
305 	surf->bpe = r600_fmt_get_blocksize(surf->format);
306 
307 	switch (surf->mode) {
308 	case ARRAY_LINEAR_GENERAL:
309 		return evergreen_surface_check_linear(p, surf, prefix);
310 	case ARRAY_LINEAR_ALIGNED:
311 		return evergreen_surface_check_linear_aligned(p, surf, prefix);
312 	case ARRAY_1D_TILED_THIN1:
313 		return evergreen_surface_check_1d(p, surf, prefix);
314 	case ARRAY_2D_TILED_THIN1:
315 		return evergreen_surface_check_2d(p, surf, prefix);
316 	default:
317 		dev_warn(p->dev, "%s:%d %s invalid array mode %d\n",
318 				__func__, __LINE__, prefix, surf->mode);
319 		return -EINVAL;
320 	}
321 	return -EINVAL;
322 }
323 
324 static int evergreen_surface_value_conv_check(struct radeon_cs_parser *p,
325 					      struct eg_surface *surf,
326 					      const char *prefix)
327 {
328 	switch (surf->mode) {
329 	case ARRAY_2D_TILED_THIN1:
330 		break;
331 	case ARRAY_LINEAR_GENERAL:
332 	case ARRAY_LINEAR_ALIGNED:
333 	case ARRAY_1D_TILED_THIN1:
334 		return 0;
335 	default:
336 		dev_warn(p->dev, "%s:%d %s invalid array mode %d\n",
337 				__func__, __LINE__, prefix, surf->mode);
338 		return -EINVAL;
339 	}
340 
341 	switch (surf->nbanks) {
342 	case 0: surf->nbanks = 2; break;
343 	case 1: surf->nbanks = 4; break;
344 	case 2: surf->nbanks = 8; break;
345 	case 3: surf->nbanks = 16; break;
346 	default:
347 		dev_warn(p->dev, "%s:%d %s invalid number of banks %d\n",
348 			 __func__, __LINE__, prefix, surf->nbanks);
349 		return -EINVAL;
350 	}
351 	switch (surf->bankw) {
352 	case 0: surf->bankw = 1; break;
353 	case 1: surf->bankw = 2; break;
354 	case 2: surf->bankw = 4; break;
355 	case 3: surf->bankw = 8; break;
356 	default:
357 		dev_warn(p->dev, "%s:%d %s invalid bankw %d\n",
358 			 __func__, __LINE__, prefix, surf->bankw);
359 		return -EINVAL;
360 	}
361 	switch (surf->bankh) {
362 	case 0: surf->bankh = 1; break;
363 	case 1: surf->bankh = 2; break;
364 	case 2: surf->bankh = 4; break;
365 	case 3: surf->bankh = 8; break;
366 	default:
367 		dev_warn(p->dev, "%s:%d %s invalid bankh %d\n",
368 			 __func__, __LINE__, prefix, surf->bankh);
369 		return -EINVAL;
370 	}
371 	switch (surf->mtilea) {
372 	case 0: surf->mtilea = 1; break;
373 	case 1: surf->mtilea = 2; break;
374 	case 2: surf->mtilea = 4; break;
375 	case 3: surf->mtilea = 8; break;
376 	default:
377 		dev_warn(p->dev, "%s:%d %s invalid macro tile aspect %d\n",
378 			 __func__, __LINE__, prefix, surf->mtilea);
379 		return -EINVAL;
380 	}
381 	switch (surf->tsplit) {
382 	case 0: surf->tsplit = 64; break;
383 	case 1: surf->tsplit = 128; break;
384 	case 2: surf->tsplit = 256; break;
385 	case 3: surf->tsplit = 512; break;
386 	case 4: surf->tsplit = 1024; break;
387 	case 5: surf->tsplit = 2048; break;
388 	case 6: surf->tsplit = 4096; break;
389 	default:
390 		dev_warn(p->dev, "%s:%d %s invalid tile split %d\n",
391 			 __func__, __LINE__, prefix, surf->tsplit);
392 		return -EINVAL;
393 	}
394 	return 0;
395 }
396 
397 static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned id)
398 {
399 	struct evergreen_cs_track *track = p->track;
400 	struct eg_surface surf;
401 	unsigned pitch, slice, mslice;
402 	unsigned long offset;
403 	int r;
404 
405 	mslice = G_028C6C_SLICE_MAX(track->cb_color_view[id]) + 1;
406 	pitch = track->cb_color_pitch[id];
407 	slice = track->cb_color_slice[id];
408 	surf.nbx = (pitch + 1) * 8;
409 	surf.nby = ((slice + 1) * 64) / surf.nbx;
410 	surf.mode = G_028C70_ARRAY_MODE(track->cb_color_info[id]);
411 	surf.format = G_028C70_FORMAT(track->cb_color_info[id]);
412 	surf.tsplit = G_028C74_TILE_SPLIT(track->cb_color_attrib[id]);
413 	surf.nbanks = G_028C74_NUM_BANKS(track->cb_color_attrib[id]);
414 	surf.bankw = G_028C74_BANK_WIDTH(track->cb_color_attrib[id]);
415 	surf.bankh = G_028C74_BANK_HEIGHT(track->cb_color_attrib[id]);
416 	surf.mtilea = G_028C74_MACRO_TILE_ASPECT(track->cb_color_attrib[id]);
417 	surf.nsamples = 1;
418 
419 	if (!r600_fmt_is_valid_color(surf.format)) {
420 		dev_warn(p->dev, "%s:%d cb invalid format %d for %d (0x%08x)\n",
421 			 __func__, __LINE__, surf.format,
422 			id, track->cb_color_info[id]);
423 		return -EINVAL;
424 	}
425 
426 	r = evergreen_surface_value_conv_check(p, &surf, "cb");
427 	if (r) {
428 		return r;
429 	}
430 
431 	r = evergreen_surface_check(p, &surf, "cb");
432 	if (r) {
433 		dev_warn(p->dev, "%s:%d cb[%d] invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
434 			 __func__, __LINE__, id, track->cb_color_pitch[id],
435 			 track->cb_color_slice[id], track->cb_color_attrib[id],
436 			 track->cb_color_info[id]);
437 		return r;
438 	}
439 
440 	offset = track->cb_color_bo_offset[id] << 8;
441 	if (offset & (surf.base_align - 1)) {
442 		dev_warn(p->dev, "%s:%d cb[%d] bo base %ld not aligned with %ld\n",
443 			 __func__, __LINE__, id, offset, surf.base_align);
444 		return -EINVAL;
445 	}
446 
447 	offset += surf.layer_size * mslice;
448 	if (offset > radeon_bo_size(track->cb_color_bo[id])) {
449 		/* old ddx are broken they allocate bo with w*h*bpp but
450 		 * program slice with ALIGN(h, 8), catch this and patch
451 		 * command stream.
452 		 */
453 		if (!surf.mode) {
454 			volatile u32 *ib = p->ib.ptr;
455 			unsigned long tmp, nby, bsize, size, min = 0;
456 
457 			/* find the height the ddx wants */
458 			if (surf.nby > 8) {
459 				min = surf.nby - 8;
460 			}
461 			bsize = radeon_bo_size(track->cb_color_bo[id]);
462 			tmp = track->cb_color_bo_offset[id] << 8;
463 			for (nby = surf.nby; nby > min; nby--) {
464 				size = nby * surf.nbx * surf.bpe * surf.nsamples;
465 				if ((tmp + size * mslice) <= bsize) {
466 					break;
467 				}
468 			}
469 			if (nby > min) {
470 				surf.nby = nby;
471 				slice = ((nby * surf.nbx) / 64) - 1;
472 				if (!evergreen_surface_check(p, &surf, "cb")) {
473 					/* check if this one works */
474 					tmp += surf.layer_size * mslice;
475 					if (tmp <= bsize) {
476 						ib[track->cb_color_slice_idx[id]] = slice;
477 						goto old_ddx_ok;
478 					}
479 				}
480 			}
481 		}
482 		dev_warn(p->dev, "%s:%d cb[%d] bo too small (layer size %d, "
483 			 "offset %d, max layer %d, bo size %ld, slice %d)\n",
484 			 __func__, __LINE__, id, surf.layer_size,
485 			track->cb_color_bo_offset[id] << 8, mslice,
486 			radeon_bo_size(track->cb_color_bo[id]), slice);
487 		dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
488 			 __func__, __LINE__, surf.nbx, surf.nby,
489 			surf.mode, surf.bpe, surf.nsamples,
490 			surf.bankw, surf.bankh,
491 			surf.tsplit, surf.mtilea);
492 		return -EINVAL;
493 	}
494 old_ddx_ok:
495 
496 	return 0;
497 }
498 
499 static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p,
500 						unsigned nbx, unsigned nby)
501 {
502 	struct evergreen_cs_track *track = p->track;
503 	unsigned long size;
504 
505 	if (track->htile_bo == NULL) {
506 		dev_warn(p->dev, "%s:%d htile enabled without htile surface 0x%08x\n",
507 				__func__, __LINE__, track->db_z_info);
508 		return -EINVAL;
509 	}
510 
511 	if (G_028ABC_LINEAR(track->htile_surface)) {
512 		/* pitch must be 16 htiles aligned == 16 * 8 pixel aligned */
513 		nbx = roundup2(nbx, 16 * 8);
514 		/* height is npipes htiles aligned == npipes * 8 pixel aligned */
515 		nby = roundup(nby, track->npipes * 8);
516 	} else {
517 		/* always assume 8x8 htile */
518 		/* align is htile align * 8, htile align vary according to
519 		 * number of pipe and tile width and nby
520 		 */
521 		switch (track->npipes) {
522 		case 8:
523 			/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
524 			nbx = roundup2(nbx, 64 * 8);
525 			nby = roundup2(nby, 64 * 8);
526 			break;
527 		case 4:
528 			/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
529 			nbx = roundup2(nbx, 64 * 8);
530 			nby = roundup2(nby, 32 * 8);
531 			break;
532 		case 2:
533 			/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
534 			nbx = roundup2(nbx, 32 * 8);
535 			nby = roundup2(nby, 32 * 8);
536 			break;
537 		case 1:
538 			/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
539 			nbx = roundup2(nbx, 32 * 8);
540 			nby = roundup2(nby, 16 * 8);
541 			break;
542 		default:
543 			dev_warn(p->dev, "%s:%d invalid num pipes %d\n",
544 					__func__, __LINE__, track->npipes);
545 			return -EINVAL;
546 		}
547 	}
548 	/* compute number of htile */
549 	nbx = nbx >> 3;
550 	nby = nby >> 3;
551 	/* size must be aligned on npipes * 2K boundary */
552 	size = roundup(nbx * nby * 4, track->npipes * (2 << 10));
553 	size += track->htile_offset;
554 
555 	if (size > radeon_bo_size(track->htile_bo)) {
556 		dev_warn(p->dev, "%s:%d htile surface too small %ld for %ld (%d %d)\n",
557 				__func__, __LINE__, radeon_bo_size(track->htile_bo),
558 				size, nbx, nby);
559 		return -EINVAL;
560 	}
561 	return 0;
562 }
563 
564 static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p)
565 {
566 	struct evergreen_cs_track *track = p->track;
567 	struct eg_surface surf;
568 	unsigned pitch, slice, mslice;
569 	unsigned long offset;
570 	int r;
571 
572 	mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1;
573 	pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size);
574 	slice = track->db_depth_slice;
575 	surf.nbx = (pitch + 1) * 8;
576 	surf.nby = ((slice + 1) * 64) / surf.nbx;
577 	surf.mode = G_028040_ARRAY_MODE(track->db_z_info);
578 	surf.format = G_028044_FORMAT(track->db_s_info);
579 	surf.tsplit = G_028044_TILE_SPLIT(track->db_s_info);
580 	surf.nbanks = G_028040_NUM_BANKS(track->db_z_info);
581 	surf.bankw = G_028040_BANK_WIDTH(track->db_z_info);
582 	surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info);
583 	surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info);
584 	surf.nsamples = 1;
585 
586 	if (surf.format != 1) {
587 		dev_warn(p->dev, "%s:%d stencil invalid format %d\n",
588 			 __func__, __LINE__, surf.format);
589 		return -EINVAL;
590 	}
591 	/* replace by color format so we can use same code */
592 	surf.format = V_028C70_COLOR_8;
593 
594 	r = evergreen_surface_value_conv_check(p, &surf, "stencil");
595 	if (r) {
596 		return r;
597 	}
598 
599 	r = evergreen_surface_check(p, &surf, NULL);
600 	if (r) {
601 		/* old userspace doesn't compute proper depth/stencil alignment
602 		 * check that alignment against a bigger byte per elements and
603 		 * only report if that alignment is wrong too.
604 		 */
605 		surf.format = V_028C70_COLOR_8_8_8_8;
606 		r = evergreen_surface_check(p, &surf, "stencil");
607 		if (r) {
608 			dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
609 				 __func__, __LINE__, track->db_depth_size,
610 				 track->db_depth_slice, track->db_s_info, track->db_z_info);
611 		}
612 		return r;
613 	}
614 
615 	offset = track->db_s_read_offset << 8;
616 	if (offset & (surf.base_align - 1)) {
617 		dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n",
618 			 __func__, __LINE__, offset, surf.base_align);
619 		return -EINVAL;
620 	}
621 	offset += surf.layer_size * mslice;
622 	if (offset > radeon_bo_size(track->db_s_read_bo)) {
623 		dev_warn(p->dev, "%s:%d stencil read bo too small (layer size %d, "
624 			 "offset %ld, max layer %d, bo size %ld)\n",
625 			 __func__, __LINE__, surf.layer_size,
626 			(unsigned long)track->db_s_read_offset << 8, mslice,
627 			radeon_bo_size(track->db_s_read_bo));
628 		dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
629 			 __func__, __LINE__, track->db_depth_size,
630 			 track->db_depth_slice, track->db_s_info, track->db_z_info);
631 		return -EINVAL;
632 	}
633 
634 	offset = track->db_s_write_offset << 8;
635 	if (offset & (surf.base_align - 1)) {
636 		dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n",
637 			 __func__, __LINE__, offset, surf.base_align);
638 		return -EINVAL;
639 	}
640 	offset += surf.layer_size * mslice;
641 	if (offset > radeon_bo_size(track->db_s_write_bo)) {
642 		dev_warn(p->dev, "%s:%d stencil write bo too small (layer size %d, "
643 			 "offset %ld, max layer %d, bo size %ld)\n",
644 			 __func__, __LINE__, surf.layer_size,
645 			(unsigned long)track->db_s_write_offset << 8, mslice,
646 			radeon_bo_size(track->db_s_write_bo));
647 		return -EINVAL;
648 	}
649 
650 	/* hyperz */
651 	if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) {
652 		r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby);
653 		if (r) {
654 			return r;
655 		}
656 	}
657 
658 	return 0;
659 }
660 
661 static int evergreen_cs_track_validate_depth(struct radeon_cs_parser *p)
662 {
663 	struct evergreen_cs_track *track = p->track;
664 	struct eg_surface surf;
665 	unsigned pitch, slice, mslice;
666 	unsigned long offset;
667 	int r;
668 
669 	mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1;
670 	pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size);
671 	slice = track->db_depth_slice;
672 	surf.nbx = (pitch + 1) * 8;
673 	surf.nby = ((slice + 1) * 64) / surf.nbx;
674 	surf.mode = G_028040_ARRAY_MODE(track->db_z_info);
675 	surf.format = G_028040_FORMAT(track->db_z_info);
676 	surf.tsplit = G_028040_TILE_SPLIT(track->db_z_info);
677 	surf.nbanks = G_028040_NUM_BANKS(track->db_z_info);
678 	surf.bankw = G_028040_BANK_WIDTH(track->db_z_info);
679 	surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info);
680 	surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info);
681 	surf.nsamples = 1;
682 
683 	switch (surf.format) {
684 	case V_028040_Z_16:
685 		surf.format = V_028C70_COLOR_16;
686 		break;
687 	case V_028040_Z_24:
688 	case V_028040_Z_32_FLOAT:
689 		surf.format = V_028C70_COLOR_8_8_8_8;
690 		break;
691 	default:
692 		dev_warn(p->dev, "%s:%d depth invalid format %d\n",
693 			 __func__, __LINE__, surf.format);
694 		return -EINVAL;
695 	}
696 
697 	r = evergreen_surface_value_conv_check(p, &surf, "depth");
698 	if (r) {
699 		dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
700 			 __func__, __LINE__, track->db_depth_size,
701 			 track->db_depth_slice, track->db_z_info);
702 		return r;
703 	}
704 
705 	r = evergreen_surface_check(p, &surf, "depth");
706 	if (r) {
707 		dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
708 			 __func__, __LINE__, track->db_depth_size,
709 			 track->db_depth_slice, track->db_z_info);
710 		return r;
711 	}
712 
713 	offset = track->db_z_read_offset << 8;
714 	if (offset & (surf.base_align - 1)) {
715 		dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n",
716 			 __func__, __LINE__, offset, surf.base_align);
717 		return -EINVAL;
718 	}
719 	offset += surf.layer_size * mslice;
720 	if (offset > radeon_bo_size(track->db_z_read_bo)) {
721 		dev_warn(p->dev, "%s:%d depth read bo too small (layer size %d, "
722 			 "offset %ld, max layer %d, bo size %ld)\n",
723 			 __func__, __LINE__, surf.layer_size,
724 			(unsigned long)track->db_z_read_offset << 8, mslice,
725 			radeon_bo_size(track->db_z_read_bo));
726 		return -EINVAL;
727 	}
728 
729 	offset = track->db_z_write_offset << 8;
730 	if (offset & (surf.base_align - 1)) {
731 		dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n",
732 			 __func__, __LINE__, offset, surf.base_align);
733 		return -EINVAL;
734 	}
735 	offset += surf.layer_size * mslice;
736 	if (offset > radeon_bo_size(track->db_z_write_bo)) {
737 		dev_warn(p->dev, "%s:%d depth write bo too small (layer size %d, "
738 			 "offset %ld, max layer %d, bo size %ld)\n",
739 			 __func__, __LINE__, surf.layer_size,
740 			(unsigned long)track->db_z_write_offset << 8, mslice,
741 			radeon_bo_size(track->db_z_write_bo));
742 		return -EINVAL;
743 	}
744 
745 	/* hyperz */
746 	if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) {
747 		r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby);
748 		if (r) {
749 			return r;
750 		}
751 	}
752 
753 	return 0;
754 }
755 
756 static int evergreen_cs_track_validate_texture(struct radeon_cs_parser *p,
757 					       struct radeon_bo *texture,
758 					       struct radeon_bo *mipmap,
759 					       unsigned idx)
760 {
761 	struct eg_surface surf;
762 	unsigned long toffset, moffset;
763 	unsigned dim, llevel, mslice, width, height, depth, i;
764 	u32 texdw[8];
765 	int r;
766 
767 	texdw[0] = radeon_get_ib_value(p, idx + 0);
768 	texdw[1] = radeon_get_ib_value(p, idx + 1);
769 	texdw[2] = radeon_get_ib_value(p, idx + 2);
770 	texdw[3] = radeon_get_ib_value(p, idx + 3);
771 	texdw[4] = radeon_get_ib_value(p, idx + 4);
772 	texdw[5] = radeon_get_ib_value(p, idx + 5);
773 	texdw[6] = radeon_get_ib_value(p, idx + 6);
774 	texdw[7] = radeon_get_ib_value(p, idx + 7);
775 	dim = G_030000_DIM(texdw[0]);
776 	llevel = G_030014_LAST_LEVEL(texdw[5]);
777 	mslice = G_030014_LAST_ARRAY(texdw[5]) + 1;
778 	width = G_030000_TEX_WIDTH(texdw[0]) + 1;
779 	height =  G_030004_TEX_HEIGHT(texdw[1]) + 1;
780 	depth = G_030004_TEX_DEPTH(texdw[1]) + 1;
781 	surf.format = G_03001C_DATA_FORMAT(texdw[7]);
782 	surf.nbx = (G_030000_PITCH(texdw[0]) + 1) * 8;
783 	surf.nbx = r600_fmt_get_nblocksx(surf.format, surf.nbx);
784 	surf.nby = r600_fmt_get_nblocksy(surf.format, height);
785 	surf.mode = G_030004_ARRAY_MODE(texdw[1]);
786 	surf.tsplit = G_030018_TILE_SPLIT(texdw[6]);
787 	surf.nbanks = G_03001C_NUM_BANKS(texdw[7]);
788 	surf.bankw = G_03001C_BANK_WIDTH(texdw[7]);
789 	surf.bankh = G_03001C_BANK_HEIGHT(texdw[7]);
790 	surf.mtilea = G_03001C_MACRO_TILE_ASPECT(texdw[7]);
791 	surf.nsamples = 1;
792 	toffset = texdw[2] << 8;
793 	moffset = texdw[3] << 8;
794 
795 	if (!r600_fmt_is_valid_texture(surf.format, p->family)) {
796 		dev_warn(p->dev, "%s:%d texture invalid format %d\n",
797 			 __func__, __LINE__, surf.format);
798 		return -EINVAL;
799 	}
800 	switch (dim) {
801 	case V_030000_SQ_TEX_DIM_1D:
802 	case V_030000_SQ_TEX_DIM_2D:
803 	case V_030000_SQ_TEX_DIM_CUBEMAP:
804 	case V_030000_SQ_TEX_DIM_1D_ARRAY:
805 	case V_030000_SQ_TEX_DIM_2D_ARRAY:
806 		depth = 1;
807 		break;
808 	case V_030000_SQ_TEX_DIM_2D_MSAA:
809 	case V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA:
810 		surf.nsamples = 1 << llevel;
811 		llevel = 0;
812 		depth = 1;
813 		break;
814 	case V_030000_SQ_TEX_DIM_3D:
815 		break;
816 	default:
817 		dev_warn(p->dev, "%s:%d texture invalid dimension %d\n",
818 			 __func__, __LINE__, dim);
819 		return -EINVAL;
820 	}
821 
822 	r = evergreen_surface_value_conv_check(p, &surf, "texture");
823 	if (r) {
824 		return r;
825 	}
826 
827 	/* align height */
828 	evergreen_surface_check(p, &surf, NULL);
829 	surf.nby = roundup(surf.nby, surf.halign);
830 
831 	r = evergreen_surface_check(p, &surf, "texture");
832 	if (r) {
833 		dev_warn(p->dev, "%s:%d texture invalid 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n",
834 			 __func__, __LINE__, texdw[0], texdw[1], texdw[4],
835 			 texdw[5], texdw[6], texdw[7]);
836 		return r;
837 	}
838 
839 	/* check texture size */
840 	if (toffset & (surf.base_align - 1)) {
841 		dev_warn(p->dev, "%s:%d texture bo base %ld not aligned with %ld\n",
842 			 __func__, __LINE__, toffset, surf.base_align);
843 		return -EINVAL;
844 	}
845 	if (moffset & (surf.base_align - 1)) {
846 		dev_warn(p->dev, "%s:%d mipmap bo base %ld not aligned with %ld\n",
847 			 __func__, __LINE__, moffset, surf.base_align);
848 		return -EINVAL;
849 	}
850 	if (dim == SQ_TEX_DIM_3D) {
851 		toffset += surf.layer_size * depth;
852 	} else {
853 		toffset += surf.layer_size * mslice;
854 	}
855 	if (toffset > radeon_bo_size(texture)) {
856 		dev_warn(p->dev, "%s:%d texture bo too small (layer size %d, "
857 			 "offset %ld, max layer %d, depth %d, bo size %ld) (%d %d)\n",
858 			 __func__, __LINE__, surf.layer_size,
859 			(unsigned long)texdw[2] << 8, mslice,
860 			depth, radeon_bo_size(texture),
861 			surf.nbx, surf.nby);
862 		return -EINVAL;
863 	}
864 
865 	if (!mipmap) {
866 		if (llevel) {
867 			dev_warn(p->dev, "%s:%i got NULL MIP_ADDRESS relocation\n",
868 				 __func__, __LINE__);
869 			return -EINVAL;
870 		} else {
871 			return 0; /* everything's ok */
872 		}
873 	}
874 
875 	/* check mipmap size */
876 	for (i = 1; i <= llevel; i++) {
877 		unsigned w, h, d;
878 
879 		w = r600_mip_minify(width, i);
880 		h = r600_mip_minify(height, i);
881 		d = r600_mip_minify(depth, i);
882 		surf.nbx = r600_fmt_get_nblocksx(surf.format, w);
883 		surf.nby = r600_fmt_get_nblocksy(surf.format, h);
884 
885 		switch (surf.mode) {
886 		case ARRAY_2D_TILED_THIN1:
887 			if (surf.nbx < surf.palign || surf.nby < surf.halign) {
888 				surf.mode = ARRAY_1D_TILED_THIN1;
889 			}
890 			/* recompute alignment */
891 			evergreen_surface_check(p, &surf, NULL);
892 			break;
893 		case ARRAY_LINEAR_GENERAL:
894 		case ARRAY_LINEAR_ALIGNED:
895 		case ARRAY_1D_TILED_THIN1:
896 			break;
897 		default:
898 			dev_warn(p->dev, "%s:%d invalid array mode %d\n",
899 				 __func__, __LINE__, surf.mode);
900 			return -EINVAL;
901 		}
902 		surf.nbx = roundup(surf.nbx, surf.palign);
903 		surf.nby = roundup(surf.nby, surf.halign);
904 
905 		r = evergreen_surface_check(p, &surf, "mipmap");
906 		if (r) {
907 			return r;
908 		}
909 
910 		if (dim == SQ_TEX_DIM_3D) {
911 			moffset += surf.layer_size * d;
912 		} else {
913 			moffset += surf.layer_size * mslice;
914 		}
915 		if (moffset > radeon_bo_size(mipmap)) {
916 			dev_warn(p->dev, "%s:%d mipmap [%d] bo too small (layer size %d, "
917 					"offset %ld, coffset %ld, max layer %d, depth %d, "
918 					"bo size %ld) level0 (%d %d %d)\n",
919 					__func__, __LINE__, i, surf.layer_size,
920 					(unsigned long)texdw[3] << 8, moffset, mslice,
921 					d, radeon_bo_size(mipmap),
922 					width, height, depth);
923 			dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
924 				 __func__, __LINE__, surf.nbx, surf.nby,
925 				surf.mode, surf.bpe, surf.nsamples,
926 				surf.bankw, surf.bankh,
927 				surf.tsplit, surf.mtilea);
928 			return -EINVAL;
929 		}
930 	}
931 
932 	return 0;
933 }
934 
935 static int evergreen_cs_track_check(struct radeon_cs_parser *p)
936 {
937 	struct evergreen_cs_track *track = p->track;
938 	unsigned tmp, i;
939 	int r;
940 	unsigned buffer_mask = 0;
941 
942 	/* check streamout */
943 	if (track->streamout_dirty && track->vgt_strmout_config) {
944 		for (i = 0; i < 4; i++) {
945 			if (track->vgt_strmout_config & (1 << i)) {
946 				buffer_mask |= (track->vgt_strmout_buffer_config >> (i * 4)) & 0xf;
947 			}
948 		}
949 
950 		for (i = 0; i < 4; i++) {
951 			if (buffer_mask & (1 << i)) {
952 				if (track->vgt_strmout_bo[i]) {
953 					u64 offset = (u64)track->vgt_strmout_bo_offset[i] +
954 							(u64)track->vgt_strmout_size[i];
955 					if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) {
956 						DRM_ERROR("streamout %d bo too small: 0x%llx, 0x%lx\n",
957 							  i, offset,
958 							  radeon_bo_size(track->vgt_strmout_bo[i]));
959 						return -EINVAL;
960 					}
961 				} else {
962 					dev_warn(p->dev, "No buffer for streamout %d\n", i);
963 					return -EINVAL;
964 				}
965 			}
966 		}
967 		track->streamout_dirty = false;
968 	}
969 
970 	if (track->sx_misc_kill_all_prims)
971 		return 0;
972 
973 	/* check that we have a cb for each enabled target
974 	 */
975 	if (track->cb_dirty) {
976 		tmp = track->cb_target_mask;
977 		for (i = 0; i < 8; i++) {
978 			u32 format = G_028C70_FORMAT(track->cb_color_info[i]);
979 
980 			if (format != V_028C70_COLOR_INVALID &&
981 			    (tmp >> (i * 4)) & 0xF) {
982 				/* at least one component is enabled */
983 				if (track->cb_color_bo[i] == NULL) {
984 					dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n",
985 						__func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i);
986 					return -EINVAL;
987 				}
988 				/* check cb */
989 				r = evergreen_cs_track_validate_cb(p, i);
990 				if (r) {
991 					return r;
992 				}
993 			}
994 		}
995 		track->cb_dirty = false;
996 	}
997 
998 	if (track->db_dirty) {
999 		/* Check stencil buffer */
1000 		if (G_028044_FORMAT(track->db_s_info) != V_028044_STENCIL_INVALID &&
1001 		    G_028800_STENCIL_ENABLE(track->db_depth_control)) {
1002 			r = evergreen_cs_track_validate_stencil(p);
1003 			if (r)
1004 				return r;
1005 		}
1006 		/* Check depth buffer */
1007 		if (G_028040_FORMAT(track->db_z_info) != V_028040_Z_INVALID &&
1008 		    G_028800_Z_ENABLE(track->db_depth_control)) {
1009 			r = evergreen_cs_track_validate_depth(p);
1010 			if (r)
1011 				return r;
1012 		}
1013 		track->db_dirty = false;
1014 	}
1015 
1016 	return 0;
1017 }
1018 
1019 /**
1020  * evergreen_cs_packet_parse() - parse cp packet and point ib index to next packet
1021  * @parser:	parser structure holding parsing context.
1022  * @pkt:	where to store packet informations
1023  *
1024  * Assume that chunk_ib_index is properly set. Will return -EINVAL
1025  * if packet is bigger than remaining ib size. or if packets is unknown.
1026  **/
1027 static int evergreen_cs_packet_parse(struct radeon_cs_parser *p,
1028 			      struct radeon_cs_packet *pkt,
1029 			      unsigned idx)
1030 {
1031 	struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
1032 	uint32_t header;
1033 
1034 	if (idx >= ib_chunk->length_dw) {
1035 		DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
1036 			  idx, ib_chunk->length_dw);
1037 		return -EINVAL;
1038 	}
1039 	header = radeon_get_ib_value(p, idx);
1040 	pkt->idx = idx;
1041 	pkt->type = CP_PACKET_GET_TYPE(header);
1042 	pkt->count = CP_PACKET_GET_COUNT(header);
1043 	pkt->one_reg_wr = 0;
1044 	switch (pkt->type) {
1045 	case PACKET_TYPE0:
1046 		pkt->reg = CP_PACKET0_GET_REG(header);
1047 		break;
1048 	case PACKET_TYPE3:
1049 		pkt->opcode = CP_PACKET3_GET_OPCODE(header);
1050 		break;
1051 	case PACKET_TYPE2:
1052 		pkt->count = -1;
1053 		break;
1054 	default:
1055 		DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
1056 		return -EINVAL;
1057 	}
1058 	if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
1059 		DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
1060 			  pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
1061 		return -EINVAL;
1062 	}
1063 	return 0;
1064 }
1065 
1066 /**
1067  * evergreen_cs_packet_next_reloc() - parse next packet which should be reloc packet3
1068  * @parser:		parser structure holding parsing context.
1069  * @data:		pointer to relocation data
1070  * @offset_start:	starting offset
1071  * @offset_mask:	offset mask (to align start offset on)
1072  * @reloc:		reloc informations
1073  *
1074  * Check next packet is relocation packet3, do bo validation and compute
1075  * GPU offset using the provided start.
1076  **/
1077 static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p,
1078 					  struct radeon_cs_reloc **cs_reloc)
1079 {
1080 	struct radeon_cs_chunk *relocs_chunk;
1081 	struct radeon_cs_packet p3reloc;
1082 	unsigned idx;
1083 	int r;
1084 
1085 	if (p->chunk_relocs_idx == -1) {
1086 		DRM_ERROR("No relocation chunk !\n");
1087 		return -EINVAL;
1088 	}
1089 	*cs_reloc = NULL;
1090 	relocs_chunk = &p->chunks[p->chunk_relocs_idx];
1091 	r = evergreen_cs_packet_parse(p, &p3reloc, p->idx);
1092 	if (r) {
1093 		return r;
1094 	}
1095 	p->idx += p3reloc.count + 2;
1096 	if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
1097 		DRM_ERROR("No packet3 for relocation for packet at %d.\n",
1098 			  p3reloc.idx);
1099 		return -EINVAL;
1100 	}
1101 	idx = radeon_get_ib_value(p, p3reloc.idx + 1);
1102 	if (idx >= relocs_chunk->length_dw) {
1103 		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
1104 			  idx, relocs_chunk->length_dw);
1105 		return -EINVAL;
1106 	}
1107 	/* FIXME: we assume reloc size is 4 dwords */
1108 	*cs_reloc = p->relocs_ptr[(idx / 4)];
1109 	return 0;
1110 }
1111 
1112 /**
1113  * evergreen_cs_packet_next_is_pkt3_nop() - test if the next packet is NOP
1114  * @p:		structure holding the parser context.
1115  *
1116  * Check if the next packet is a relocation packet3.
1117  **/
1118 static bool evergreen_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
1119 {
1120 	struct radeon_cs_packet p3reloc;
1121 	int r;
1122 
1123 	r = evergreen_cs_packet_parse(p, &p3reloc, p->idx);
1124 	if (r) {
1125 		return false;
1126 	}
1127 	if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
1128 		return false;
1129 	}
1130 	return true;
1131 }
1132 
1133 /**
1134  * evergreen_cs_packet_next_vline() - parse userspace VLINE packet
1135  * @parser:		parser structure holding parsing context.
1136  *
1137  * Userspace sends a special sequence for VLINE waits.
1138  * PACKET0 - VLINE_START_END + value
1139  * PACKET3 - WAIT_REG_MEM poll vline status reg
1140  * RELOC (P3) - crtc_id in reloc.
1141  *
1142  * This function parses this and relocates the VLINE START END
1143  * and WAIT_REG_MEM packets to the correct crtc.
1144  * It also detects a switched off crtc and nulls out the
1145  * wait in that case.
1146  */
1147 static int evergreen_cs_packet_parse_vline(struct radeon_cs_parser *p)
1148 {
1149 	struct drm_mode_object *obj;
1150 	struct drm_crtc *crtc;
1151 	struct radeon_crtc *radeon_crtc;
1152 	struct radeon_cs_packet p3reloc, wait_reg_mem;
1153 	int crtc_id;
1154 	int r;
1155 	uint32_t header, h_idx, reg, wait_reg_mem_info;
1156 	volatile uint32_t *ib;
1157 
1158 	ib = p->ib.ptr;
1159 
1160 	/* parse the WAIT_REG_MEM */
1161 	r = evergreen_cs_packet_parse(p, &wait_reg_mem, p->idx);
1162 	if (r)
1163 		return r;
1164 
1165 	/* check its a WAIT_REG_MEM */
1166 	if (wait_reg_mem.type != PACKET_TYPE3 ||
1167 	    wait_reg_mem.opcode != PACKET3_WAIT_REG_MEM) {
1168 		DRM_ERROR("vline wait missing WAIT_REG_MEM segment\n");
1169 		return -EINVAL;
1170 	}
1171 
1172 	wait_reg_mem_info = radeon_get_ib_value(p, wait_reg_mem.idx + 1);
1173 	/* bit 4 is reg (0) or mem (1) */
1174 	if (wait_reg_mem_info & 0x10) {
1175 		DRM_ERROR("vline WAIT_REG_MEM waiting on MEM rather than REG\n");
1176 		return -EINVAL;
1177 	}
1178 	/* waiting for value to be equal */
1179 	if ((wait_reg_mem_info & 0x7) != 0x3) {
1180 		DRM_ERROR("vline WAIT_REG_MEM function not equal\n");
1181 		return -EINVAL;
1182 	}
1183 	if ((radeon_get_ib_value(p, wait_reg_mem.idx + 2) << 2) != EVERGREEN_VLINE_STATUS) {
1184 		DRM_ERROR("vline WAIT_REG_MEM bad reg\n");
1185 		return -EINVAL;
1186 	}
1187 
1188 	if (radeon_get_ib_value(p, wait_reg_mem.idx + 5) != EVERGREEN_VLINE_STAT) {
1189 		DRM_ERROR("vline WAIT_REG_MEM bad bit mask\n");
1190 		return -EINVAL;
1191 	}
1192 
1193 	/* jump over the NOP */
1194 	r = evergreen_cs_packet_parse(p, &p3reloc, p->idx + wait_reg_mem.count + 2);
1195 	if (r)
1196 		return r;
1197 
1198 	h_idx = p->idx - 2;
1199 	p->idx += wait_reg_mem.count + 2;
1200 	p->idx += p3reloc.count + 2;
1201 
1202 	header = radeon_get_ib_value(p, h_idx);
1203 	crtc_id = radeon_get_ib_value(p, h_idx + 2 + 7 + 1);
1204 	reg = CP_PACKET0_GET_REG(header);
1205 	obj = drm_mode_object_find(p->rdev->ddev, crtc_id, DRM_MODE_OBJECT_CRTC);
1206 	if (!obj) {
1207 		DRM_ERROR("cannot find crtc %d\n", crtc_id);
1208 		return -EINVAL;
1209 	}
1210 	crtc = obj_to_crtc(obj);
1211 	radeon_crtc = to_radeon_crtc(crtc);
1212 	crtc_id = radeon_crtc->crtc_id;
1213 
1214 	if (!crtc->enabled) {
1215 		/* if the CRTC isn't enabled - we need to nop out the WAIT_REG_MEM */
1216 		ib[h_idx + 2] = PACKET2(0);
1217 		ib[h_idx + 3] = PACKET2(0);
1218 		ib[h_idx + 4] = PACKET2(0);
1219 		ib[h_idx + 5] = PACKET2(0);
1220 		ib[h_idx + 6] = PACKET2(0);
1221 		ib[h_idx + 7] = PACKET2(0);
1222 		ib[h_idx + 8] = PACKET2(0);
1223 	} else {
1224 		switch (reg) {
1225 		case EVERGREEN_VLINE_START_END:
1226 			header &= ~R600_CP_PACKET0_REG_MASK;
1227 			header |= (EVERGREEN_VLINE_START_END + radeon_crtc->crtc_offset) >> 2;
1228 			ib[h_idx] = header;
1229 			ib[h_idx + 4] = (EVERGREEN_VLINE_STATUS + radeon_crtc->crtc_offset) >> 2;
1230 			break;
1231 		default:
1232 			DRM_ERROR("unknown crtc reloc\n");
1233 			return -EINVAL;
1234 		}
1235 	}
1236 	return 0;
1237 }
1238 
1239 static int evergreen_packet0_check(struct radeon_cs_parser *p,
1240 				   struct radeon_cs_packet *pkt,
1241 				   unsigned idx, unsigned reg)
1242 {
1243 	int r;
1244 
1245 	switch (reg) {
1246 	case EVERGREEN_VLINE_START_END:
1247 		r = evergreen_cs_packet_parse_vline(p);
1248 		if (r) {
1249 			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1250 					idx, reg);
1251 			return r;
1252 		}
1253 		break;
1254 	default:
1255 		DRM_ERROR( "Forbidden register 0x%04X in cs at %d\n",
1256 		       reg, idx);
1257 		return -EINVAL;
1258 	}
1259 	return 0;
1260 }
1261 
1262 static int evergreen_cs_parse_packet0(struct radeon_cs_parser *p,
1263 				      struct radeon_cs_packet *pkt)
1264 {
1265 	unsigned reg, i;
1266 	unsigned idx;
1267 	int r;
1268 
1269 	idx = pkt->idx + 1;
1270 	reg = pkt->reg;
1271 	for (i = 0; i <= pkt->count; i++, idx++, reg += 4) {
1272 		r = evergreen_packet0_check(p, pkt, idx, reg);
1273 		if (r) {
1274 			return r;
1275 		}
1276 	}
1277 	return 0;
1278 }
1279 
1280 /**
1281  * evergreen_cs_check_reg() - check if register is authorized or not
1282  * @parser: parser structure holding parsing context
1283  * @reg: register we are testing
1284  * @idx: index into the cs buffer
1285  *
1286  * This function will test against evergreen_reg_safe_bm and return 0
1287  * if register is safe. If register is not flag as safe this function
1288  * will test it against a list of register needind special handling.
1289  */
1290 static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1291 {
1292 	struct evergreen_cs_track *track = (struct evergreen_cs_track *)p->track;
1293 	struct radeon_cs_reloc *reloc;
1294 	u32 last_reg;
1295 	u32 m, i, tmp, *ib;
1296 	int r;
1297 
1298 	if (p->rdev->family >= CHIP_CAYMAN)
1299 		last_reg = ARRAY_SIZE(cayman_reg_safe_bm);
1300 	else
1301 		last_reg = ARRAY_SIZE(evergreen_reg_safe_bm);
1302 
1303 	i = (reg >> 7);
1304 	if (i >= last_reg) {
1305 		dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1306 		return -EINVAL;
1307 	}
1308 	m = 1 << ((reg >> 2) & 31);
1309 	if (p->rdev->family >= CHIP_CAYMAN) {
1310 		if (!(cayman_reg_safe_bm[i] & m))
1311 			return 0;
1312 	} else {
1313 		if (!(evergreen_reg_safe_bm[i] & m))
1314 			return 0;
1315 	}
1316 	ib = p->ib.ptr;
1317 	switch (reg) {
1318 	/* force following reg to 0 in an attempt to disable out buffer
1319 	 * which will need us to better understand how it works to perform
1320 	 * security check on it (Jerome)
1321 	 */
1322 	case SQ_ESGS_RING_SIZE:
1323 	case SQ_GSVS_RING_SIZE:
1324 	case SQ_ESTMP_RING_SIZE:
1325 	case SQ_GSTMP_RING_SIZE:
1326 	case SQ_HSTMP_RING_SIZE:
1327 	case SQ_LSTMP_RING_SIZE:
1328 	case SQ_PSTMP_RING_SIZE:
1329 	case SQ_VSTMP_RING_SIZE:
1330 	case SQ_ESGS_RING_ITEMSIZE:
1331 	case SQ_ESTMP_RING_ITEMSIZE:
1332 	case SQ_GSTMP_RING_ITEMSIZE:
1333 	case SQ_GSVS_RING_ITEMSIZE:
1334 	case SQ_GS_VERT_ITEMSIZE:
1335 	case SQ_GS_VERT_ITEMSIZE_1:
1336 	case SQ_GS_VERT_ITEMSIZE_2:
1337 	case SQ_GS_VERT_ITEMSIZE_3:
1338 	case SQ_GSVS_RING_OFFSET_1:
1339 	case SQ_GSVS_RING_OFFSET_2:
1340 	case SQ_GSVS_RING_OFFSET_3:
1341 	case SQ_HSTMP_RING_ITEMSIZE:
1342 	case SQ_LSTMP_RING_ITEMSIZE:
1343 	case SQ_PSTMP_RING_ITEMSIZE:
1344 	case SQ_VSTMP_RING_ITEMSIZE:
1345 	case VGT_TF_RING_SIZE:
1346 		/* get value to populate the IB don't remove */
1347 		/*tmp =radeon_get_ib_value(p, idx);
1348 		  ib[idx] = 0;*/
1349 		break;
1350 	case SQ_ESGS_RING_BASE:
1351 	case SQ_GSVS_RING_BASE:
1352 	case SQ_ESTMP_RING_BASE:
1353 	case SQ_GSTMP_RING_BASE:
1354 	case SQ_HSTMP_RING_BASE:
1355 	case SQ_LSTMP_RING_BASE:
1356 	case SQ_PSTMP_RING_BASE:
1357 	case SQ_VSTMP_RING_BASE:
1358 		r = evergreen_cs_packet_next_reloc(p, &reloc);
1359 		if (r) {
1360 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1361 					"0x%04X\n", reg);
1362 			return -EINVAL;
1363 		}
1364 		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1365 		break;
1366 	case DB_DEPTH_CONTROL:
1367 		track->db_depth_control = radeon_get_ib_value(p, idx);
1368 		track->db_dirty = true;
1369 		break;
1370 	case CAYMAN_DB_EQAA:
1371 		if (p->rdev->family < CHIP_CAYMAN) {
1372 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1373 				 "0x%04X\n", reg);
1374 			return -EINVAL;
1375 		}
1376 		break;
1377 	case CAYMAN_DB_DEPTH_INFO:
1378 		if (p->rdev->family < CHIP_CAYMAN) {
1379 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1380 				 "0x%04X\n", reg);
1381 			return -EINVAL;
1382 		}
1383 		break;
1384 	case DB_Z_INFO:
1385 		track->db_z_info = radeon_get_ib_value(p, idx);
1386 		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1387 			r = evergreen_cs_packet_next_reloc(p, &reloc);
1388 			if (r) {
1389 				dev_warn(p->dev, "bad SET_CONTEXT_REG "
1390 						"0x%04X\n", reg);
1391 				return -EINVAL;
1392 			}
1393 			ib[idx] &= ~Z_ARRAY_MODE(0xf);
1394 			track->db_z_info &= ~Z_ARRAY_MODE(0xf);
1395 			ib[idx] |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
1396 			track->db_z_info |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
1397 			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
1398 				unsigned bankw, bankh, mtaspect, tile_split;
1399 
1400 				evergreen_tiling_fields(reloc->lobj.tiling_flags,
1401 							&bankw, &bankh, &mtaspect,
1402 							&tile_split);
1403 				ib[idx] |= DB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1404 				ib[idx] |= DB_TILE_SPLIT(tile_split) |
1405 						DB_BANK_WIDTH(bankw) |
1406 						DB_BANK_HEIGHT(bankh) |
1407 						DB_MACRO_TILE_ASPECT(mtaspect);
1408 			}
1409 		}
1410 		track->db_dirty = true;
1411 		break;
1412 	case DB_STENCIL_INFO:
1413 		track->db_s_info = radeon_get_ib_value(p, idx);
1414 		track->db_dirty = true;
1415 		break;
1416 	case DB_DEPTH_VIEW:
1417 		track->db_depth_view = radeon_get_ib_value(p, idx);
1418 		track->db_dirty = true;
1419 		break;
1420 	case DB_DEPTH_SIZE:
1421 		track->db_depth_size = radeon_get_ib_value(p, idx);
1422 		track->db_dirty = true;
1423 		break;
1424 	case R_02805C_DB_DEPTH_SLICE:
1425 		track->db_depth_slice = radeon_get_ib_value(p, idx);
1426 		track->db_dirty = true;
1427 		break;
1428 	case DB_Z_READ_BASE:
1429 		r = evergreen_cs_packet_next_reloc(p, &reloc);
1430 		if (r) {
1431 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1432 					"0x%04X\n", reg);
1433 			return -EINVAL;
1434 		}
1435 		track->db_z_read_offset = radeon_get_ib_value(p, idx);
1436 		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1437 		track->db_z_read_bo = reloc->robj;
1438 		track->db_dirty = true;
1439 		break;
1440 	case DB_Z_WRITE_BASE:
1441 		r = evergreen_cs_packet_next_reloc(p, &reloc);
1442 		if (r) {
1443 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1444 					"0x%04X\n", reg);
1445 			return -EINVAL;
1446 		}
1447 		track->db_z_write_offset = radeon_get_ib_value(p, idx);
1448 		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1449 		track->db_z_write_bo = reloc->robj;
1450 		track->db_dirty = true;
1451 		break;
1452 	case DB_STENCIL_READ_BASE:
1453 		r = evergreen_cs_packet_next_reloc(p, &reloc);
1454 		if (r) {
1455 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1456 					"0x%04X\n", reg);
1457 			return -EINVAL;
1458 		}
1459 		track->db_s_read_offset = radeon_get_ib_value(p, idx);
1460 		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1461 		track->db_s_read_bo = reloc->robj;
1462 		track->db_dirty = true;
1463 		break;
1464 	case DB_STENCIL_WRITE_BASE:
1465 		r = evergreen_cs_packet_next_reloc(p, &reloc);
1466 		if (r) {
1467 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1468 					"0x%04X\n", reg);
1469 			return -EINVAL;
1470 		}
1471 		track->db_s_write_offset = radeon_get_ib_value(p, idx);
1472 		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1473 		track->db_s_write_bo = reloc->robj;
1474 		track->db_dirty = true;
1475 		break;
1476 	case VGT_STRMOUT_CONFIG:
1477 		track->vgt_strmout_config = radeon_get_ib_value(p, idx);
1478 		track->streamout_dirty = true;
1479 		break;
1480 	case VGT_STRMOUT_BUFFER_CONFIG:
1481 		track->vgt_strmout_buffer_config = radeon_get_ib_value(p, idx);
1482 		track->streamout_dirty = true;
1483 		break;
1484 	case VGT_STRMOUT_BUFFER_BASE_0:
1485 	case VGT_STRMOUT_BUFFER_BASE_1:
1486 	case VGT_STRMOUT_BUFFER_BASE_2:
1487 	case VGT_STRMOUT_BUFFER_BASE_3:
1488 		r = evergreen_cs_packet_next_reloc(p, &reloc);
1489 		if (r) {
1490 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1491 					"0x%04X\n", reg);
1492 			return -EINVAL;
1493 		}
1494 		tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16;
1495 		track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8;
1496 		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1497 		track->vgt_strmout_bo[tmp] = reloc->robj;
1498 		track->streamout_dirty = true;
1499 		break;
1500 	case VGT_STRMOUT_BUFFER_SIZE_0:
1501 	case VGT_STRMOUT_BUFFER_SIZE_1:
1502 	case VGT_STRMOUT_BUFFER_SIZE_2:
1503 	case VGT_STRMOUT_BUFFER_SIZE_3:
1504 		tmp = (reg - VGT_STRMOUT_BUFFER_SIZE_0) / 16;
1505 		/* size in register is DWs, convert to bytes */
1506 		track->vgt_strmout_size[tmp] = radeon_get_ib_value(p, idx) * 4;
1507 		track->streamout_dirty = true;
1508 		break;
1509 	case CP_COHER_BASE:
1510 		r = evergreen_cs_packet_next_reloc(p, &reloc);
1511 		if (r) {
1512 			dev_warn(p->dev, "missing reloc for CP_COHER_BASE "
1513 					"0x%04X\n", reg);
1514 			return -EINVAL;
1515 		}
1516 		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1517 	case CB_TARGET_MASK:
1518 		track->cb_target_mask = radeon_get_ib_value(p, idx);
1519 		track->cb_dirty = true;
1520 		break;
1521 	case CB_SHADER_MASK:
1522 		track->cb_shader_mask = radeon_get_ib_value(p, idx);
1523 		track->cb_dirty = true;
1524 		break;
1525 	case PA_SC_AA_CONFIG:
1526 		if (p->rdev->family >= CHIP_CAYMAN) {
1527 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1528 				 "0x%04X\n", reg);
1529 			return -EINVAL;
1530 		}
1531 		tmp = radeon_get_ib_value(p, idx) & MSAA_NUM_SAMPLES_MASK;
1532 		track->nsamples = 1 << tmp;
1533 		break;
1534 	case CAYMAN_PA_SC_AA_CONFIG:
1535 		if (p->rdev->family < CHIP_CAYMAN) {
1536 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1537 				 "0x%04X\n", reg);
1538 			return -EINVAL;
1539 		}
1540 		tmp = radeon_get_ib_value(p, idx) & CAYMAN_MSAA_NUM_SAMPLES_MASK;
1541 		track->nsamples = 1 << tmp;
1542 		break;
1543 	case CB_COLOR0_VIEW:
1544 	case CB_COLOR1_VIEW:
1545 	case CB_COLOR2_VIEW:
1546 	case CB_COLOR3_VIEW:
1547 	case CB_COLOR4_VIEW:
1548 	case CB_COLOR5_VIEW:
1549 	case CB_COLOR6_VIEW:
1550 	case CB_COLOR7_VIEW:
1551 		tmp = (reg - CB_COLOR0_VIEW) / 0x3c;
1552 		track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
1553 		track->cb_dirty = true;
1554 		break;
1555 	case CB_COLOR8_VIEW:
1556 	case CB_COLOR9_VIEW:
1557 	case CB_COLOR10_VIEW:
1558 	case CB_COLOR11_VIEW:
1559 		tmp = ((reg - CB_COLOR8_VIEW) / 0x1c) + 8;
1560 		track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
1561 		track->cb_dirty = true;
1562 		break;
1563 	case CB_COLOR0_INFO:
1564 	case CB_COLOR1_INFO:
1565 	case CB_COLOR2_INFO:
1566 	case CB_COLOR3_INFO:
1567 	case CB_COLOR4_INFO:
1568 	case CB_COLOR5_INFO:
1569 	case CB_COLOR6_INFO:
1570 	case CB_COLOR7_INFO:
1571 		tmp = (reg - CB_COLOR0_INFO) / 0x3c;
1572 		track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1573 		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1574 			r = evergreen_cs_packet_next_reloc(p, &reloc);
1575 			if (r) {
1576 				dev_warn(p->dev, "bad SET_CONTEXT_REG "
1577 						"0x%04X\n", reg);
1578 				return -EINVAL;
1579 			}
1580 			ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
1581 			track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
1582 		}
1583 		track->cb_dirty = true;
1584 		break;
1585 	case CB_COLOR8_INFO:
1586 	case CB_COLOR9_INFO:
1587 	case CB_COLOR10_INFO:
1588 	case CB_COLOR11_INFO:
1589 		tmp = ((reg - CB_COLOR8_INFO) / 0x1c) + 8;
1590 		track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1591 		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1592 			r = evergreen_cs_packet_next_reloc(p, &reloc);
1593 			if (r) {
1594 				dev_warn(p->dev, "bad SET_CONTEXT_REG "
1595 						"0x%04X\n", reg);
1596 				return -EINVAL;
1597 			}
1598 			ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
1599 			track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
1600 		}
1601 		track->cb_dirty = true;
1602 		break;
1603 	case CB_COLOR0_PITCH:
1604 	case CB_COLOR1_PITCH:
1605 	case CB_COLOR2_PITCH:
1606 	case CB_COLOR3_PITCH:
1607 	case CB_COLOR4_PITCH:
1608 	case CB_COLOR5_PITCH:
1609 	case CB_COLOR6_PITCH:
1610 	case CB_COLOR7_PITCH:
1611 		tmp = (reg - CB_COLOR0_PITCH) / 0x3c;
1612 		track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx);
1613 		track->cb_dirty = true;
1614 		break;
1615 	case CB_COLOR8_PITCH:
1616 	case CB_COLOR9_PITCH:
1617 	case CB_COLOR10_PITCH:
1618 	case CB_COLOR11_PITCH:
1619 		tmp = ((reg - CB_COLOR8_PITCH) / 0x1c) + 8;
1620 		track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx);
1621 		track->cb_dirty = true;
1622 		break;
1623 	case CB_COLOR0_SLICE:
1624 	case CB_COLOR1_SLICE:
1625 	case CB_COLOR2_SLICE:
1626 	case CB_COLOR3_SLICE:
1627 	case CB_COLOR4_SLICE:
1628 	case CB_COLOR5_SLICE:
1629 	case CB_COLOR6_SLICE:
1630 	case CB_COLOR7_SLICE:
1631 		tmp = (reg - CB_COLOR0_SLICE) / 0x3c;
1632 		track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
1633 		track->cb_color_slice_idx[tmp] = idx;
1634 		track->cb_dirty = true;
1635 		break;
1636 	case CB_COLOR8_SLICE:
1637 	case CB_COLOR9_SLICE:
1638 	case CB_COLOR10_SLICE:
1639 	case CB_COLOR11_SLICE:
1640 		tmp = ((reg - CB_COLOR8_SLICE) / 0x1c) + 8;
1641 		track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
1642 		track->cb_color_slice_idx[tmp] = idx;
1643 		track->cb_dirty = true;
1644 		break;
1645 	case CB_COLOR0_ATTRIB:
1646 	case CB_COLOR1_ATTRIB:
1647 	case CB_COLOR2_ATTRIB:
1648 	case CB_COLOR3_ATTRIB:
1649 	case CB_COLOR4_ATTRIB:
1650 	case CB_COLOR5_ATTRIB:
1651 	case CB_COLOR6_ATTRIB:
1652 	case CB_COLOR7_ATTRIB:
1653 		r = evergreen_cs_packet_next_reloc(p, &reloc);
1654 		if (r) {
1655 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1656 					"0x%04X\n", reg);
1657 			return -EINVAL;
1658 		}
1659 		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1660 			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
1661 				unsigned bankw, bankh, mtaspect, tile_split;
1662 
1663 				evergreen_tiling_fields(reloc->lobj.tiling_flags,
1664 							&bankw, &bankh, &mtaspect,
1665 							&tile_split);
1666 				ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1667 				ib[idx] |= CB_TILE_SPLIT(tile_split) |
1668 					   CB_BANK_WIDTH(bankw) |
1669 					   CB_BANK_HEIGHT(bankh) |
1670 					   CB_MACRO_TILE_ASPECT(mtaspect);
1671 			}
1672 		}
1673 		tmp = ((reg - CB_COLOR0_ATTRIB) / 0x3c);
1674 		track->cb_color_attrib[tmp] = ib[idx];
1675 		track->cb_dirty = true;
1676 		break;
1677 	case CB_COLOR8_ATTRIB:
1678 	case CB_COLOR9_ATTRIB:
1679 	case CB_COLOR10_ATTRIB:
1680 	case CB_COLOR11_ATTRIB:
1681 		r = evergreen_cs_packet_next_reloc(p, &reloc);
1682 		if (r) {
1683 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1684 					"0x%04X\n", reg);
1685 			return -EINVAL;
1686 		}
1687 		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1688 			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
1689 				unsigned bankw, bankh, mtaspect, tile_split;
1690 
1691 				evergreen_tiling_fields(reloc->lobj.tiling_flags,
1692 							&bankw, &bankh, &mtaspect,
1693 							&tile_split);
1694 				ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1695 				ib[idx] |= CB_TILE_SPLIT(tile_split) |
1696 					   CB_BANK_WIDTH(bankw) |
1697 					   CB_BANK_HEIGHT(bankh) |
1698 					   CB_MACRO_TILE_ASPECT(mtaspect);
1699 			}
1700 		}
1701 		tmp = ((reg - CB_COLOR8_ATTRIB) / 0x1c) + 8;
1702 		track->cb_color_attrib[tmp] = ib[idx];
1703 		track->cb_dirty = true;
1704 		break;
1705 	case CB_COLOR0_FMASK:
1706 	case CB_COLOR1_FMASK:
1707 	case CB_COLOR2_FMASK:
1708 	case CB_COLOR3_FMASK:
1709 	case CB_COLOR4_FMASK:
1710 	case CB_COLOR5_FMASK:
1711 	case CB_COLOR6_FMASK:
1712 	case CB_COLOR7_FMASK:
1713 		tmp = (reg - CB_COLOR0_FMASK) / 0x3c;
1714 		r = evergreen_cs_packet_next_reloc(p, &reloc);
1715 		if (r) {
1716 			dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1717 			return -EINVAL;
1718 		}
1719 		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1720 		track->cb_color_fmask_bo[tmp] = reloc->robj;
1721 		break;
1722 	case CB_COLOR0_CMASK:
1723 	case CB_COLOR1_CMASK:
1724 	case CB_COLOR2_CMASK:
1725 	case CB_COLOR3_CMASK:
1726 	case CB_COLOR4_CMASK:
1727 	case CB_COLOR5_CMASK:
1728 	case CB_COLOR6_CMASK:
1729 	case CB_COLOR7_CMASK:
1730 		tmp = (reg - CB_COLOR0_CMASK) / 0x3c;
1731 		r = evergreen_cs_packet_next_reloc(p, &reloc);
1732 		if (r) {
1733 			dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1734 			return -EINVAL;
1735 		}
1736 		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1737 		track->cb_color_cmask_bo[tmp] = reloc->robj;
1738 		break;
1739 	case CB_COLOR0_FMASK_SLICE:
1740 	case CB_COLOR1_FMASK_SLICE:
1741 	case CB_COLOR2_FMASK_SLICE:
1742 	case CB_COLOR3_FMASK_SLICE:
1743 	case CB_COLOR4_FMASK_SLICE:
1744 	case CB_COLOR5_FMASK_SLICE:
1745 	case CB_COLOR6_FMASK_SLICE:
1746 	case CB_COLOR7_FMASK_SLICE:
1747 		tmp = (reg - CB_COLOR0_FMASK_SLICE) / 0x3c;
1748 		track->cb_color_fmask_slice[tmp] = radeon_get_ib_value(p, idx);
1749 		break;
1750 	case CB_COLOR0_CMASK_SLICE:
1751 	case CB_COLOR1_CMASK_SLICE:
1752 	case CB_COLOR2_CMASK_SLICE:
1753 	case CB_COLOR3_CMASK_SLICE:
1754 	case CB_COLOR4_CMASK_SLICE:
1755 	case CB_COLOR5_CMASK_SLICE:
1756 	case CB_COLOR6_CMASK_SLICE:
1757 	case CB_COLOR7_CMASK_SLICE:
1758 		tmp = (reg - CB_COLOR0_CMASK_SLICE) / 0x3c;
1759 		track->cb_color_cmask_slice[tmp] = radeon_get_ib_value(p, idx);
1760 		break;
1761 	case CB_COLOR0_BASE:
1762 	case CB_COLOR1_BASE:
1763 	case CB_COLOR2_BASE:
1764 	case CB_COLOR3_BASE:
1765 	case CB_COLOR4_BASE:
1766 	case CB_COLOR5_BASE:
1767 	case CB_COLOR6_BASE:
1768 	case CB_COLOR7_BASE:
1769 		r = evergreen_cs_packet_next_reloc(p, &reloc);
1770 		if (r) {
1771 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1772 					"0x%04X\n", reg);
1773 			return -EINVAL;
1774 		}
1775 		tmp = (reg - CB_COLOR0_BASE) / 0x3c;
1776 		track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx);
1777 		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1778 		track->cb_color_bo[tmp] = reloc->robj;
1779 		track->cb_dirty = true;
1780 		break;
1781 	case CB_COLOR8_BASE:
1782 	case CB_COLOR9_BASE:
1783 	case CB_COLOR10_BASE:
1784 	case CB_COLOR11_BASE:
1785 		r = evergreen_cs_packet_next_reloc(p, &reloc);
1786 		if (r) {
1787 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1788 					"0x%04X\n", reg);
1789 			return -EINVAL;
1790 		}
1791 		tmp = ((reg - CB_COLOR8_BASE) / 0x1c) + 8;
1792 		track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx);
1793 		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1794 		track->cb_color_bo[tmp] = reloc->robj;
1795 		track->cb_dirty = true;
1796 		break;
1797 	case DB_HTILE_DATA_BASE:
1798 		r = evergreen_cs_packet_next_reloc(p, &reloc);
1799 		if (r) {
1800 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1801 					"0x%04X\n", reg);
1802 			return -EINVAL;
1803 		}
1804 		track->htile_offset = radeon_get_ib_value(p, idx);
1805 		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1806 		track->htile_bo = reloc->robj;
1807 		track->db_dirty = true;
1808 		break;
1809 	case DB_HTILE_SURFACE:
1810 		/* 8x8 only */
1811 		track->htile_surface = radeon_get_ib_value(p, idx);
1812 		/* force 8x8 htile width and height */
1813 		ib[idx] |= 3;
1814 		track->db_dirty = true;
1815 		break;
1816 	case CB_IMMED0_BASE:
1817 	case CB_IMMED1_BASE:
1818 	case CB_IMMED2_BASE:
1819 	case CB_IMMED3_BASE:
1820 	case CB_IMMED4_BASE:
1821 	case CB_IMMED5_BASE:
1822 	case CB_IMMED6_BASE:
1823 	case CB_IMMED7_BASE:
1824 	case CB_IMMED8_BASE:
1825 	case CB_IMMED9_BASE:
1826 	case CB_IMMED10_BASE:
1827 	case CB_IMMED11_BASE:
1828 	case SQ_PGM_START_FS:
1829 	case SQ_PGM_START_ES:
1830 	case SQ_PGM_START_VS:
1831 	case SQ_PGM_START_GS:
1832 	case SQ_PGM_START_PS:
1833 	case SQ_PGM_START_HS:
1834 	case SQ_PGM_START_LS:
1835 	case SQ_CONST_MEM_BASE:
1836 	case SQ_ALU_CONST_CACHE_GS_0:
1837 	case SQ_ALU_CONST_CACHE_GS_1:
1838 	case SQ_ALU_CONST_CACHE_GS_2:
1839 	case SQ_ALU_CONST_CACHE_GS_3:
1840 	case SQ_ALU_CONST_CACHE_GS_4:
1841 	case SQ_ALU_CONST_CACHE_GS_5:
1842 	case SQ_ALU_CONST_CACHE_GS_6:
1843 	case SQ_ALU_CONST_CACHE_GS_7:
1844 	case SQ_ALU_CONST_CACHE_GS_8:
1845 	case SQ_ALU_CONST_CACHE_GS_9:
1846 	case SQ_ALU_CONST_CACHE_GS_10:
1847 	case SQ_ALU_CONST_CACHE_GS_11:
1848 	case SQ_ALU_CONST_CACHE_GS_12:
1849 	case SQ_ALU_CONST_CACHE_GS_13:
1850 	case SQ_ALU_CONST_CACHE_GS_14:
1851 	case SQ_ALU_CONST_CACHE_GS_15:
1852 	case SQ_ALU_CONST_CACHE_PS_0:
1853 	case SQ_ALU_CONST_CACHE_PS_1:
1854 	case SQ_ALU_CONST_CACHE_PS_2:
1855 	case SQ_ALU_CONST_CACHE_PS_3:
1856 	case SQ_ALU_CONST_CACHE_PS_4:
1857 	case SQ_ALU_CONST_CACHE_PS_5:
1858 	case SQ_ALU_CONST_CACHE_PS_6:
1859 	case SQ_ALU_CONST_CACHE_PS_7:
1860 	case SQ_ALU_CONST_CACHE_PS_8:
1861 	case SQ_ALU_CONST_CACHE_PS_9:
1862 	case SQ_ALU_CONST_CACHE_PS_10:
1863 	case SQ_ALU_CONST_CACHE_PS_11:
1864 	case SQ_ALU_CONST_CACHE_PS_12:
1865 	case SQ_ALU_CONST_CACHE_PS_13:
1866 	case SQ_ALU_CONST_CACHE_PS_14:
1867 	case SQ_ALU_CONST_CACHE_PS_15:
1868 	case SQ_ALU_CONST_CACHE_VS_0:
1869 	case SQ_ALU_CONST_CACHE_VS_1:
1870 	case SQ_ALU_CONST_CACHE_VS_2:
1871 	case SQ_ALU_CONST_CACHE_VS_3:
1872 	case SQ_ALU_CONST_CACHE_VS_4:
1873 	case SQ_ALU_CONST_CACHE_VS_5:
1874 	case SQ_ALU_CONST_CACHE_VS_6:
1875 	case SQ_ALU_CONST_CACHE_VS_7:
1876 	case SQ_ALU_CONST_CACHE_VS_8:
1877 	case SQ_ALU_CONST_CACHE_VS_9:
1878 	case SQ_ALU_CONST_CACHE_VS_10:
1879 	case SQ_ALU_CONST_CACHE_VS_11:
1880 	case SQ_ALU_CONST_CACHE_VS_12:
1881 	case SQ_ALU_CONST_CACHE_VS_13:
1882 	case SQ_ALU_CONST_CACHE_VS_14:
1883 	case SQ_ALU_CONST_CACHE_VS_15:
1884 	case SQ_ALU_CONST_CACHE_HS_0:
1885 	case SQ_ALU_CONST_CACHE_HS_1:
1886 	case SQ_ALU_CONST_CACHE_HS_2:
1887 	case SQ_ALU_CONST_CACHE_HS_3:
1888 	case SQ_ALU_CONST_CACHE_HS_4:
1889 	case SQ_ALU_CONST_CACHE_HS_5:
1890 	case SQ_ALU_CONST_CACHE_HS_6:
1891 	case SQ_ALU_CONST_CACHE_HS_7:
1892 	case SQ_ALU_CONST_CACHE_HS_8:
1893 	case SQ_ALU_CONST_CACHE_HS_9:
1894 	case SQ_ALU_CONST_CACHE_HS_10:
1895 	case SQ_ALU_CONST_CACHE_HS_11:
1896 	case SQ_ALU_CONST_CACHE_HS_12:
1897 	case SQ_ALU_CONST_CACHE_HS_13:
1898 	case SQ_ALU_CONST_CACHE_HS_14:
1899 	case SQ_ALU_CONST_CACHE_HS_15:
1900 	case SQ_ALU_CONST_CACHE_LS_0:
1901 	case SQ_ALU_CONST_CACHE_LS_1:
1902 	case SQ_ALU_CONST_CACHE_LS_2:
1903 	case SQ_ALU_CONST_CACHE_LS_3:
1904 	case SQ_ALU_CONST_CACHE_LS_4:
1905 	case SQ_ALU_CONST_CACHE_LS_5:
1906 	case SQ_ALU_CONST_CACHE_LS_6:
1907 	case SQ_ALU_CONST_CACHE_LS_7:
1908 	case SQ_ALU_CONST_CACHE_LS_8:
1909 	case SQ_ALU_CONST_CACHE_LS_9:
1910 	case SQ_ALU_CONST_CACHE_LS_10:
1911 	case SQ_ALU_CONST_CACHE_LS_11:
1912 	case SQ_ALU_CONST_CACHE_LS_12:
1913 	case SQ_ALU_CONST_CACHE_LS_13:
1914 	case SQ_ALU_CONST_CACHE_LS_14:
1915 	case SQ_ALU_CONST_CACHE_LS_15:
1916 		r = evergreen_cs_packet_next_reloc(p, &reloc);
1917 		if (r) {
1918 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1919 					"0x%04X\n", reg);
1920 			return -EINVAL;
1921 		}
1922 		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1923 		break;
1924 	case SX_MEMORY_EXPORT_BASE:
1925 		if (p->rdev->family >= CHIP_CAYMAN) {
1926 			dev_warn(p->dev, "bad SET_CONFIG_REG "
1927 				 "0x%04X\n", reg);
1928 			return -EINVAL;
1929 		}
1930 		r = evergreen_cs_packet_next_reloc(p, &reloc);
1931 		if (r) {
1932 			dev_warn(p->dev, "bad SET_CONFIG_REG "
1933 					"0x%04X\n", reg);
1934 			return -EINVAL;
1935 		}
1936 		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1937 		break;
1938 	case CAYMAN_SX_SCATTER_EXPORT_BASE:
1939 		if (p->rdev->family < CHIP_CAYMAN) {
1940 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1941 				 "0x%04X\n", reg);
1942 			return -EINVAL;
1943 		}
1944 		r = evergreen_cs_packet_next_reloc(p, &reloc);
1945 		if (r) {
1946 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1947 					"0x%04X\n", reg);
1948 			return -EINVAL;
1949 		}
1950 		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1951 		break;
1952 	case SX_MISC:
1953 		track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0;
1954 		break;
1955 	default:
1956 		dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1957 		return -EINVAL;
1958 	}
1959 	return 0;
1960 }
1961 
1962 static bool evergreen_is_safe_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1963 {
1964 	u32 last_reg, m, i;
1965 
1966 	if (p->rdev->family >= CHIP_CAYMAN)
1967 		last_reg = ARRAY_SIZE(cayman_reg_safe_bm);
1968 	else
1969 		last_reg = ARRAY_SIZE(evergreen_reg_safe_bm);
1970 
1971 	i = (reg >> 7);
1972 	if (i >= last_reg) {
1973 		dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1974 		return false;
1975 	}
1976 	m = 1 << ((reg >> 2) & 31);
1977 	if (p->rdev->family >= CHIP_CAYMAN) {
1978 		if (!(cayman_reg_safe_bm[i] & m))
1979 			return true;
1980 	} else {
1981 		if (!(evergreen_reg_safe_bm[i] & m))
1982 			return true;
1983 	}
1984 	dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1985 	return false;
1986 }
1987 
1988 static int evergreen_packet3_check(struct radeon_cs_parser *p,
1989 				   struct radeon_cs_packet *pkt)
1990 {
1991 	struct radeon_cs_reloc *reloc;
1992 	struct evergreen_cs_track *track;
1993 	volatile u32 *ib;
1994 	unsigned idx;
1995 	unsigned i;
1996 	unsigned start_reg, end_reg, reg;
1997 	int r;
1998 	u32 idx_value;
1999 
2000 	track = (struct evergreen_cs_track *)p->track;
2001 	ib = p->ib.ptr;
2002 	idx = pkt->idx + 1;
2003 	idx_value = radeon_get_ib_value(p, idx);
2004 
2005 	switch (pkt->opcode) {
2006 	case PACKET3_SET_PREDICATION:
2007 	{
2008 		int pred_op;
2009 		int tmp;
2010 		uint64_t offset;
2011 
2012 		if (pkt->count != 1) {
2013 			DRM_ERROR("bad SET PREDICATION\n");
2014 			return -EINVAL;
2015 		}
2016 
2017 		tmp = radeon_get_ib_value(p, idx + 1);
2018 		pred_op = (tmp >> 16) & 0x7;
2019 
2020 		/* for the clear predicate operation */
2021 		if (pred_op == 0)
2022 			return 0;
2023 
2024 		if (pred_op > 2) {
2025 			DRM_ERROR("bad SET PREDICATION operation %d\n", pred_op);
2026 			return -EINVAL;
2027 		}
2028 
2029 		r = evergreen_cs_packet_next_reloc(p, &reloc);
2030 		if (r) {
2031 			DRM_ERROR("bad SET PREDICATION\n");
2032 			return -EINVAL;
2033 		}
2034 
2035 		offset = reloc->lobj.gpu_offset +
2036 		         (idx_value & 0xfffffff0) +
2037 		         ((u64)(tmp & 0xff) << 32);
2038 
2039 		ib[idx + 0] = offset;
2040 		ib[idx + 1] = (tmp & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2041 	}
2042 	break;
2043 	case PACKET3_CONTEXT_CONTROL:
2044 		if (pkt->count != 1) {
2045 			DRM_ERROR("bad CONTEXT_CONTROL\n");
2046 			return -EINVAL;
2047 		}
2048 		break;
2049 	case PACKET3_INDEX_TYPE:
2050 	case PACKET3_NUM_INSTANCES:
2051 	case PACKET3_CLEAR_STATE:
2052 		if (pkt->count) {
2053 			DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
2054 			return -EINVAL;
2055 		}
2056 		break;
2057 	case CAYMAN_PACKET3_DEALLOC_STATE:
2058 		if (p->rdev->family < CHIP_CAYMAN) {
2059 			DRM_ERROR("bad PACKET3_DEALLOC_STATE\n");
2060 			return -EINVAL;
2061 		}
2062 		if (pkt->count) {
2063 			DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
2064 			return -EINVAL;
2065 		}
2066 		break;
2067 	case PACKET3_INDEX_BASE:
2068 	{
2069 		uint64_t offset;
2070 
2071 		if (pkt->count != 1) {
2072 			DRM_ERROR("bad INDEX_BASE\n");
2073 			return -EINVAL;
2074 		}
2075 		r = evergreen_cs_packet_next_reloc(p, &reloc);
2076 		if (r) {
2077 			DRM_ERROR("bad INDEX_BASE\n");
2078 			return -EINVAL;
2079 		}
2080 
2081 		offset = reloc->lobj.gpu_offset +
2082 		         idx_value +
2083 		         ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
2084 
2085 		ib[idx+0] = offset;
2086 		ib[idx+1] = upper_32_bits(offset) & 0xff;
2087 
2088 		r = evergreen_cs_track_check(p);
2089 		if (r) {
2090 			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2091 			return r;
2092 		}
2093 		break;
2094 	}
2095 	case PACKET3_DRAW_INDEX:
2096 	{
2097 		uint64_t offset;
2098 		if (pkt->count != 3) {
2099 			DRM_ERROR("bad DRAW_INDEX\n");
2100 			return -EINVAL;
2101 		}
2102 		r = evergreen_cs_packet_next_reloc(p, &reloc);
2103 		if (r) {
2104 			DRM_ERROR("bad DRAW_INDEX\n");
2105 			return -EINVAL;
2106 		}
2107 
2108 		offset = reloc->lobj.gpu_offset +
2109 		         idx_value +
2110 		         ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
2111 
2112 		ib[idx+0] = offset;
2113 		ib[idx+1] = upper_32_bits(offset) & 0xff;
2114 
2115 		r = evergreen_cs_track_check(p);
2116 		if (r) {
2117 			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2118 			return r;
2119 		}
2120 		break;
2121 	}
2122 	case PACKET3_DRAW_INDEX_2:
2123 	{
2124 		uint64_t offset;
2125 
2126 		if (pkt->count != 4) {
2127 			DRM_ERROR("bad DRAW_INDEX_2\n");
2128 			return -EINVAL;
2129 		}
2130 		r = evergreen_cs_packet_next_reloc(p, &reloc);
2131 		if (r) {
2132 			DRM_ERROR("bad DRAW_INDEX_2\n");
2133 			return -EINVAL;
2134 		}
2135 
2136 		offset = reloc->lobj.gpu_offset +
2137 		         radeon_get_ib_value(p, idx+1) +
2138 		         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2139 
2140 		ib[idx+1] = offset;
2141 		ib[idx+2] = upper_32_bits(offset) & 0xff;
2142 
2143 		r = evergreen_cs_track_check(p);
2144 		if (r) {
2145 			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2146 			return r;
2147 		}
2148 		break;
2149 	}
2150 	case PACKET3_DRAW_INDEX_AUTO:
2151 		if (pkt->count != 1) {
2152 			DRM_ERROR("bad DRAW_INDEX_AUTO\n");
2153 			return -EINVAL;
2154 		}
2155 		r = evergreen_cs_track_check(p);
2156 		if (r) {
2157 			dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
2158 			return r;
2159 		}
2160 		break;
2161 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
2162 		if (pkt->count != 2) {
2163 			DRM_ERROR("bad DRAW_INDEX_MULTI_AUTO\n");
2164 			return -EINVAL;
2165 		}
2166 		r = evergreen_cs_track_check(p);
2167 		if (r) {
2168 			dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
2169 			return r;
2170 		}
2171 		break;
2172 	case PACKET3_DRAW_INDEX_IMMD:
2173 		if (pkt->count < 2) {
2174 			DRM_ERROR("bad DRAW_INDEX_IMMD\n");
2175 			return -EINVAL;
2176 		}
2177 		r = evergreen_cs_track_check(p);
2178 		if (r) {
2179 			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2180 			return r;
2181 		}
2182 		break;
2183 	case PACKET3_DRAW_INDEX_OFFSET:
2184 		if (pkt->count != 2) {
2185 			DRM_ERROR("bad DRAW_INDEX_OFFSET\n");
2186 			return -EINVAL;
2187 		}
2188 		r = evergreen_cs_track_check(p);
2189 		if (r) {
2190 			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2191 			return r;
2192 		}
2193 		break;
2194 	case PACKET3_DRAW_INDEX_OFFSET_2:
2195 		if (pkt->count != 3) {
2196 			DRM_ERROR("bad DRAW_INDEX_OFFSET_2\n");
2197 			return -EINVAL;
2198 		}
2199 		r = evergreen_cs_track_check(p);
2200 		if (r) {
2201 			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2202 			return r;
2203 		}
2204 		break;
2205 	case PACKET3_DISPATCH_DIRECT:
2206 		if (pkt->count != 3) {
2207 			DRM_ERROR("bad DISPATCH_DIRECT\n");
2208 			return -EINVAL;
2209 		}
2210 		r = evergreen_cs_track_check(p);
2211 		if (r) {
2212 			dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
2213 			return r;
2214 		}
2215 		break;
2216 	case PACKET3_DISPATCH_INDIRECT:
2217 		if (pkt->count != 1) {
2218 			DRM_ERROR("bad DISPATCH_INDIRECT\n");
2219 			return -EINVAL;
2220 		}
2221 		r = evergreen_cs_packet_next_reloc(p, &reloc);
2222 		if (r) {
2223 			DRM_ERROR("bad DISPATCH_INDIRECT\n");
2224 			return -EINVAL;
2225 		}
2226 		ib[idx+0] = idx_value + (u32)(reloc->lobj.gpu_offset & 0xffffffff);
2227 		r = evergreen_cs_track_check(p);
2228 		if (r) {
2229 			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2230 			return r;
2231 		}
2232 		break;
2233 	case PACKET3_WAIT_REG_MEM:
2234 		if (pkt->count != 5) {
2235 			DRM_ERROR("bad WAIT_REG_MEM\n");
2236 			return -EINVAL;
2237 		}
2238 		/* bit 4 is reg (0) or mem (1) */
2239 		if (idx_value & 0x10) {
2240 			uint64_t offset;
2241 
2242 			r = evergreen_cs_packet_next_reloc(p, &reloc);
2243 			if (r) {
2244 				DRM_ERROR("bad WAIT_REG_MEM\n");
2245 				return -EINVAL;
2246 			}
2247 
2248 			offset = reloc->lobj.gpu_offset +
2249 			         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2250 			         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2251 
2252 			ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffffc);
2253 			ib[idx+2] = upper_32_bits(offset) & 0xff;
2254 		}
2255 		break;
2256 	case PACKET3_CP_DMA:
2257 	{
2258 		u32 command, size, info;
2259 		u64 offset, tmp;
2260 		if (pkt->count != 4) {
2261 			DRM_ERROR("bad CP DMA\n");
2262 			return -EINVAL;
2263 		}
2264 		command = radeon_get_ib_value(p, idx+4);
2265 		size = command & 0x1fffff;
2266 		info = radeon_get_ib_value(p, idx+1);
2267 		if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
2268 		    (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
2269 		    ((((info & 0x00300000) >> 20) == 0) &&
2270 		     (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
2271 		    ((((info & 0x60000000) >> 29) == 0) &&
2272 		     (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
2273 			/* non mem to mem copies requires dw aligned count */
2274 			if (size % 4) {
2275 				DRM_ERROR("CP DMA command requires dw count alignment\n");
2276 				return -EINVAL;
2277 			}
2278 		}
2279 		if (command & PACKET3_CP_DMA_CMD_SAS) {
2280 			/* src address space is register */
2281 			/* GDS is ok */
2282 			if (((info & 0x60000000) >> 29) != 1) {
2283 				DRM_ERROR("CP DMA SAS not supported\n");
2284 				return -EINVAL;
2285 			}
2286 		} else {
2287 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
2288 				DRM_ERROR("CP DMA SAIC only supported for registers\n");
2289 				return -EINVAL;
2290 			}
2291 			/* src address space is memory */
2292 			if (((info & 0x60000000) >> 29) == 0) {
2293 				r = evergreen_cs_packet_next_reloc(p, &reloc);
2294 				if (r) {
2295 					DRM_ERROR("bad CP DMA SRC\n");
2296 					return -EINVAL;
2297 				}
2298 
2299 				tmp = radeon_get_ib_value(p, idx) +
2300 					((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
2301 
2302 				offset = reloc->lobj.gpu_offset + tmp;
2303 
2304 				if ((tmp + size) > radeon_bo_size(reloc->robj)) {
2305 					dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n",
2306 						 tmp + size, radeon_bo_size(reloc->robj));
2307 					return -EINVAL;
2308 				}
2309 
2310 				ib[idx] = offset;
2311 				ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2312 			} else if (((info & 0x60000000) >> 29) != 2) {
2313 				DRM_ERROR("bad CP DMA SRC_SEL\n");
2314 				return -EINVAL;
2315 			}
2316 		}
2317 		if (command & PACKET3_CP_DMA_CMD_DAS) {
2318 			/* dst address space is register */
2319 			/* GDS is ok */
2320 			if (((info & 0x00300000) >> 20) != 1) {
2321 				DRM_ERROR("CP DMA DAS not supported\n");
2322 				return -EINVAL;
2323 			}
2324 		} else {
2325 			/* dst address space is memory */
2326 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
2327 				DRM_ERROR("CP DMA DAIC only supported for registers\n");
2328 				return -EINVAL;
2329 			}
2330 			if (((info & 0x00300000) >> 20) == 0) {
2331 				r = evergreen_cs_packet_next_reloc(p, &reloc);
2332 				if (r) {
2333 					DRM_ERROR("bad CP DMA DST\n");
2334 					return -EINVAL;
2335 				}
2336 
2337 				tmp = radeon_get_ib_value(p, idx+2) +
2338 					((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32);
2339 
2340 				offset = reloc->lobj.gpu_offset + tmp;
2341 
2342 				if ((tmp + size) > radeon_bo_size(reloc->robj)) {
2343 					dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n",
2344 						 tmp + size, radeon_bo_size(reloc->robj));
2345 					return -EINVAL;
2346 				}
2347 
2348 				ib[idx+2] = offset;
2349 				ib[idx+3] = upper_32_bits(offset) & 0xff;
2350 			} else {
2351 				DRM_ERROR("bad CP DMA DST_SEL\n");
2352 				return -EINVAL;
2353 			}
2354 		}
2355 		break;
2356 	}
2357 	case PACKET3_SURFACE_SYNC:
2358 		if (pkt->count != 3) {
2359 			DRM_ERROR("bad SURFACE_SYNC\n");
2360 			return -EINVAL;
2361 		}
2362 		/* 0xffffffff/0x0 is flush all cache flag */
2363 		if (radeon_get_ib_value(p, idx + 1) != 0xffffffff ||
2364 		    radeon_get_ib_value(p, idx + 2) != 0) {
2365 			r = evergreen_cs_packet_next_reloc(p, &reloc);
2366 			if (r) {
2367 				DRM_ERROR("bad SURFACE_SYNC\n");
2368 				return -EINVAL;
2369 			}
2370 			ib[idx+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
2371 		}
2372 		break;
2373 	case PACKET3_EVENT_WRITE:
2374 		if (pkt->count != 2 && pkt->count != 0) {
2375 			DRM_ERROR("bad EVENT_WRITE\n");
2376 			return -EINVAL;
2377 		}
2378 		if (pkt->count) {
2379 			uint64_t offset;
2380 
2381 			r = evergreen_cs_packet_next_reloc(p, &reloc);
2382 			if (r) {
2383 				DRM_ERROR("bad EVENT_WRITE\n");
2384 				return -EINVAL;
2385 			}
2386 			offset = reloc->lobj.gpu_offset +
2387 			         (radeon_get_ib_value(p, idx+1) & 0xfffffff8) +
2388 			         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2389 
2390 			ib[idx+1] = offset & 0xfffffff8;
2391 			ib[idx+2] = upper_32_bits(offset) & 0xff;
2392 		}
2393 		break;
2394 	case PACKET3_EVENT_WRITE_EOP:
2395 	{
2396 		uint64_t offset;
2397 
2398 		if (pkt->count != 4) {
2399 			DRM_ERROR("bad EVENT_WRITE_EOP\n");
2400 			return -EINVAL;
2401 		}
2402 		r = evergreen_cs_packet_next_reloc(p, &reloc);
2403 		if (r) {
2404 			DRM_ERROR("bad EVENT_WRITE_EOP\n");
2405 			return -EINVAL;
2406 		}
2407 
2408 		offset = reloc->lobj.gpu_offset +
2409 		         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2410 		         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2411 
2412 		ib[idx+1] = offset & 0xfffffffc;
2413 		ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2414 		break;
2415 	}
2416 	case PACKET3_EVENT_WRITE_EOS:
2417 	{
2418 		uint64_t offset;
2419 
2420 		if (pkt->count != 3) {
2421 			DRM_ERROR("bad EVENT_WRITE_EOS\n");
2422 			return -EINVAL;
2423 		}
2424 		r = evergreen_cs_packet_next_reloc(p, &reloc);
2425 		if (r) {
2426 			DRM_ERROR("bad EVENT_WRITE_EOS\n");
2427 			return -EINVAL;
2428 		}
2429 
2430 		offset = reloc->lobj.gpu_offset +
2431 		         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2432 		         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2433 
2434 		ib[idx+1] = offset & 0xfffffffc;
2435 		ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2436 		break;
2437 	}
2438 	case PACKET3_SET_CONFIG_REG:
2439 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
2440 		end_reg = 4 * pkt->count + start_reg - 4;
2441 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
2442 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
2443 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
2444 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
2445 			return -EINVAL;
2446 		}
2447 		for (i = 0; i < pkt->count; i++) {
2448 			reg = start_reg + (4 * i);
2449 			r = evergreen_cs_check_reg(p, reg, idx+1+i);
2450 			if (r)
2451 				return r;
2452 		}
2453 		break;
2454 	case PACKET3_SET_CONTEXT_REG:
2455 		start_reg = (idx_value << 2) + PACKET3_SET_CONTEXT_REG_START;
2456 		end_reg = 4 * pkt->count + start_reg - 4;
2457 		if ((start_reg < PACKET3_SET_CONTEXT_REG_START) ||
2458 		    (start_reg >= PACKET3_SET_CONTEXT_REG_END) ||
2459 		    (end_reg >= PACKET3_SET_CONTEXT_REG_END)) {
2460 			DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n");
2461 			return -EINVAL;
2462 		}
2463 		for (i = 0; i < pkt->count; i++) {
2464 			reg = start_reg + (4 * i);
2465 			r = evergreen_cs_check_reg(p, reg, idx+1+i);
2466 			if (r)
2467 				return r;
2468 		}
2469 		break;
2470 	case PACKET3_SET_RESOURCE:
2471 		if (pkt->count % 8) {
2472 			DRM_ERROR("bad SET_RESOURCE\n");
2473 			return -EINVAL;
2474 		}
2475 		start_reg = (idx_value << 2) + PACKET3_SET_RESOURCE_START;
2476 		end_reg = 4 * pkt->count + start_reg - 4;
2477 		if ((start_reg < PACKET3_SET_RESOURCE_START) ||
2478 		    (start_reg >= PACKET3_SET_RESOURCE_END) ||
2479 		    (end_reg >= PACKET3_SET_RESOURCE_END)) {
2480 			DRM_ERROR("bad SET_RESOURCE\n");
2481 			return -EINVAL;
2482 		}
2483 		for (i = 0; i < (pkt->count / 8); i++) {
2484 			struct radeon_bo *texture, *mipmap;
2485 			u32 toffset, moffset;
2486 			u32 size, offset, mip_address, tex_dim;
2487 
2488 			switch (G__SQ_CONSTANT_TYPE(radeon_get_ib_value(p, idx+1+(i*8)+7))) {
2489 			case SQ_TEX_VTX_VALID_TEXTURE:
2490 				/* tex base */
2491 				r = evergreen_cs_packet_next_reloc(p, &reloc);
2492 				if (r) {
2493 					DRM_ERROR("bad SET_RESOURCE (tex)\n");
2494 					return -EINVAL;
2495 				}
2496 				if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
2497 					ib[idx+1+(i*8)+1] |=
2498 						TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
2499 					if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
2500 						unsigned bankw, bankh, mtaspect, tile_split;
2501 
2502 						evergreen_tiling_fields(reloc->lobj.tiling_flags,
2503 									&bankw, &bankh, &mtaspect,
2504 									&tile_split);
2505 						ib[idx+1+(i*8)+6] |= TEX_TILE_SPLIT(tile_split);
2506 						ib[idx+1+(i*8)+7] |=
2507 							TEX_BANK_WIDTH(bankw) |
2508 							TEX_BANK_HEIGHT(bankh) |
2509 							MACRO_TILE_ASPECT(mtaspect) |
2510 							TEX_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
2511 					}
2512 				}
2513 				texture = reloc->robj;
2514 				toffset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
2515 
2516 				/* tex mip base */
2517 				tex_dim = ib[idx+1+(i*8)+0] & 0x7;
2518 				mip_address = ib[idx+1+(i*8)+3];
2519 
2520 				if ((tex_dim == SQ_TEX_DIM_2D_MSAA || tex_dim == SQ_TEX_DIM_2D_ARRAY_MSAA) &&
2521 				    !mip_address &&
2522 				    !evergreen_cs_packet_next_is_pkt3_nop(p)) {
2523 					/* MIP_ADDRESS should point to FMASK for an MSAA texture.
2524 					 * It should be 0 if FMASK is disabled. */
2525 					moffset = 0;
2526 					mipmap = NULL;
2527 				} else {
2528 					r = evergreen_cs_packet_next_reloc(p, &reloc);
2529 					if (r) {
2530 						DRM_ERROR("bad SET_RESOURCE (tex)\n");
2531 						return -EINVAL;
2532 					}
2533 					moffset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
2534 					mipmap = reloc->robj;
2535 				}
2536 
2537 				r = evergreen_cs_track_validate_texture(p, texture, mipmap, idx+1+(i*8));
2538 				if (r)
2539 					return r;
2540 				ib[idx+1+(i*8)+2] += toffset;
2541 				ib[idx+1+(i*8)+3] += moffset;
2542 				break;
2543 			case SQ_TEX_VTX_VALID_BUFFER:
2544 			{
2545 				uint64_t offset64;
2546 				/* vtx base */
2547 				r = evergreen_cs_packet_next_reloc(p, &reloc);
2548 				if (r) {
2549 					DRM_ERROR("bad SET_RESOURCE (vtx)\n");
2550 					return -EINVAL;
2551 				}
2552 				offset = radeon_get_ib_value(p, idx+1+(i*8)+0);
2553 				size = radeon_get_ib_value(p, idx+1+(i*8)+1);
2554 				if (p->rdev && (size + offset) > radeon_bo_size(reloc->robj)) {
2555 					/* force size to size of the buffer */
2556 					dev_warn(p->dev, "vbo resource seems too big for the bo\n");
2557 					ib[idx+1+(i*8)+1] = radeon_bo_size(reloc->robj) - offset;
2558 				}
2559 
2560 				offset64 = reloc->lobj.gpu_offset + offset;
2561 				ib[idx+1+(i*8)+0] = offset64;
2562 				ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) |
2563 						    (upper_32_bits(offset64) & 0xff);
2564 				break;
2565 			}
2566 			case SQ_TEX_VTX_INVALID_TEXTURE:
2567 			case SQ_TEX_VTX_INVALID_BUFFER:
2568 			default:
2569 				DRM_ERROR("bad SET_RESOURCE\n");
2570 				return -EINVAL;
2571 			}
2572 		}
2573 		break;
2574 	case PACKET3_SET_ALU_CONST:
2575 		/* XXX fix me ALU const buffers only */
2576 		break;
2577 	case PACKET3_SET_BOOL_CONST:
2578 		start_reg = (idx_value << 2) + PACKET3_SET_BOOL_CONST_START;
2579 		end_reg = 4 * pkt->count + start_reg - 4;
2580 		if ((start_reg < PACKET3_SET_BOOL_CONST_START) ||
2581 		    (start_reg >= PACKET3_SET_BOOL_CONST_END) ||
2582 		    (end_reg >= PACKET3_SET_BOOL_CONST_END)) {
2583 			DRM_ERROR("bad SET_BOOL_CONST\n");
2584 			return -EINVAL;
2585 		}
2586 		break;
2587 	case PACKET3_SET_LOOP_CONST:
2588 		start_reg = (idx_value << 2) + PACKET3_SET_LOOP_CONST_START;
2589 		end_reg = 4 * pkt->count + start_reg - 4;
2590 		if ((start_reg < PACKET3_SET_LOOP_CONST_START) ||
2591 		    (start_reg >= PACKET3_SET_LOOP_CONST_END) ||
2592 		    (end_reg >= PACKET3_SET_LOOP_CONST_END)) {
2593 			DRM_ERROR("bad SET_LOOP_CONST\n");
2594 			return -EINVAL;
2595 		}
2596 		break;
2597 	case PACKET3_SET_CTL_CONST:
2598 		start_reg = (idx_value << 2) + PACKET3_SET_CTL_CONST_START;
2599 		end_reg = 4 * pkt->count + start_reg - 4;
2600 		if ((start_reg < PACKET3_SET_CTL_CONST_START) ||
2601 		    (start_reg >= PACKET3_SET_CTL_CONST_END) ||
2602 		    (end_reg >= PACKET3_SET_CTL_CONST_END)) {
2603 			DRM_ERROR("bad SET_CTL_CONST\n");
2604 			return -EINVAL;
2605 		}
2606 		break;
2607 	case PACKET3_SET_SAMPLER:
2608 		if (pkt->count % 3) {
2609 			DRM_ERROR("bad SET_SAMPLER\n");
2610 			return -EINVAL;
2611 		}
2612 		start_reg = (idx_value << 2) + PACKET3_SET_SAMPLER_START;
2613 		end_reg = 4 * pkt->count + start_reg - 4;
2614 		if ((start_reg < PACKET3_SET_SAMPLER_START) ||
2615 		    (start_reg >= PACKET3_SET_SAMPLER_END) ||
2616 		    (end_reg >= PACKET3_SET_SAMPLER_END)) {
2617 			DRM_ERROR("bad SET_SAMPLER\n");
2618 			return -EINVAL;
2619 		}
2620 		break;
2621 	case PACKET3_STRMOUT_BUFFER_UPDATE:
2622 		if (pkt->count != 4) {
2623 			DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (invalid count)\n");
2624 			return -EINVAL;
2625 		}
2626 		/* Updating memory at DST_ADDRESS. */
2627 		if (idx_value & 0x1) {
2628 			u64 offset;
2629 			r = evergreen_cs_packet_next_reloc(p, &reloc);
2630 			if (r) {
2631 				DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n");
2632 				return -EINVAL;
2633 			}
2634 			offset = radeon_get_ib_value(p, idx+1);
2635 			offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2636 			if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2637 				DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%llx, 0x%lx\n",
2638 					  offset + 4, radeon_bo_size(reloc->robj));
2639 				return -EINVAL;
2640 			}
2641 			offset += reloc->lobj.gpu_offset;
2642 			ib[idx+1] = offset;
2643 			ib[idx+2] = upper_32_bits(offset) & 0xff;
2644 		}
2645 		/* Reading data from SRC_ADDRESS. */
2646 		if (((idx_value >> 1) & 0x3) == 2) {
2647 			u64 offset;
2648 			r = evergreen_cs_packet_next_reloc(p, &reloc);
2649 			if (r) {
2650 				DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n");
2651 				return -EINVAL;
2652 			}
2653 			offset = radeon_get_ib_value(p, idx+3);
2654 			offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2655 			if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2656 				DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%llx, 0x%lx\n",
2657 					  offset + 4, radeon_bo_size(reloc->robj));
2658 				return -EINVAL;
2659 			}
2660 			offset += reloc->lobj.gpu_offset;
2661 			ib[idx+3] = offset;
2662 			ib[idx+4] = upper_32_bits(offset) & 0xff;
2663 		}
2664 		break;
2665 	case PACKET3_MEM_WRITE:
2666 	{
2667 		u64 offset;
2668 
2669 		if (pkt->count != 3) {
2670 			DRM_ERROR("bad MEM_WRITE (invalid count)\n");
2671 			return -EINVAL;
2672 		}
2673 		r = evergreen_cs_packet_next_reloc(p, &reloc);
2674 		if (r) {
2675 			DRM_ERROR("bad MEM_WRITE (missing reloc)\n");
2676 			return -EINVAL;
2677 		}
2678 		offset = radeon_get_ib_value(p, idx+0);
2679 		offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL;
2680 		if (offset & 0x7) {
2681 			DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n");
2682 			return -EINVAL;
2683 		}
2684 		if ((offset + 8) > radeon_bo_size(reloc->robj)) {
2685 			DRM_ERROR("bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n",
2686 				  offset + 8, radeon_bo_size(reloc->robj));
2687 			return -EINVAL;
2688 		}
2689 		offset += reloc->lobj.gpu_offset;
2690 		ib[idx+0] = offset;
2691 		ib[idx+1] = upper_32_bits(offset) & 0xff;
2692 		break;
2693 	}
2694 	case PACKET3_COPY_DW:
2695 		if (pkt->count != 4) {
2696 			DRM_ERROR("bad COPY_DW (invalid count)\n");
2697 			return -EINVAL;
2698 		}
2699 		if (idx_value & 0x1) {
2700 			u64 offset;
2701 			/* SRC is memory. */
2702 			r = evergreen_cs_packet_next_reloc(p, &reloc);
2703 			if (r) {
2704 				DRM_ERROR("bad COPY_DW (missing src reloc)\n");
2705 				return -EINVAL;
2706 			}
2707 			offset = radeon_get_ib_value(p, idx+1);
2708 			offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2709 			if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2710 				DRM_ERROR("bad COPY_DW src bo too small: 0x%llx, 0x%lx\n",
2711 					  offset + 4, radeon_bo_size(reloc->robj));
2712 				return -EINVAL;
2713 			}
2714 			offset += reloc->lobj.gpu_offset;
2715 			ib[idx+1] = offset;
2716 			ib[idx+2] = upper_32_bits(offset) & 0xff;
2717 		} else {
2718 			/* SRC is a reg. */
2719 			reg = radeon_get_ib_value(p, idx+1) << 2;
2720 			if (!evergreen_is_safe_reg(p, reg, idx+1))
2721 				return -EINVAL;
2722 		}
2723 		if (idx_value & 0x2) {
2724 			u64 offset;
2725 			/* DST is memory. */
2726 			r = evergreen_cs_packet_next_reloc(p, &reloc);
2727 			if (r) {
2728 				DRM_ERROR("bad COPY_DW (missing dst reloc)\n");
2729 				return -EINVAL;
2730 			}
2731 			offset = radeon_get_ib_value(p, idx+3);
2732 			offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2733 			if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2734 				DRM_ERROR("bad COPY_DW dst bo too small: 0x%llx, 0x%lx\n",
2735 					  offset + 4, radeon_bo_size(reloc->robj));
2736 				return -EINVAL;
2737 			}
2738 			offset += reloc->lobj.gpu_offset;
2739 			ib[idx+3] = offset;
2740 			ib[idx+4] = upper_32_bits(offset) & 0xff;
2741 		} else {
2742 			/* DST is a reg. */
2743 			reg = radeon_get_ib_value(p, idx+3) << 2;
2744 			if (!evergreen_is_safe_reg(p, reg, idx+3))
2745 				return -EINVAL;
2746 		}
2747 		break;
2748 	case PACKET3_NOP:
2749 		break;
2750 	default:
2751 		DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
2752 		return -EINVAL;
2753 	}
2754 	return 0;
2755 }
2756 
2757 int evergreen_cs_parse(struct radeon_cs_parser *p)
2758 {
2759 	struct radeon_cs_packet pkt;
2760 	struct evergreen_cs_track *track;
2761 	u32 tmp;
2762 	int r;
2763 
2764 	if (p->track == NULL) {
2765 		/* initialize tracker, we are in kms */
2766 		track = kzalloc(sizeof(*track), GFP_KERNEL);
2767 		if (track == NULL)
2768 			return -ENOMEM;
2769 		evergreen_cs_track_init(track);
2770 		if (p->rdev->family >= CHIP_CAYMAN)
2771 			tmp = p->rdev->config.cayman.tile_config;
2772 		else
2773 			tmp = p->rdev->config.evergreen.tile_config;
2774 
2775 		switch (tmp & 0xf) {
2776 		case 0:
2777 			track->npipes = 1;
2778 			break;
2779 		case 1:
2780 		default:
2781 			track->npipes = 2;
2782 			break;
2783 		case 2:
2784 			track->npipes = 4;
2785 			break;
2786 		case 3:
2787 			track->npipes = 8;
2788 			break;
2789 		}
2790 
2791 		switch ((tmp & 0xf0) >> 4) {
2792 		case 0:
2793 			track->nbanks = 4;
2794 			break;
2795 		case 1:
2796 		default:
2797 			track->nbanks = 8;
2798 			break;
2799 		case 2:
2800 			track->nbanks = 16;
2801 			break;
2802 		}
2803 
2804 		switch ((tmp & 0xf00) >> 8) {
2805 		case 0:
2806 			track->group_size = 256;
2807 			break;
2808 		case 1:
2809 		default:
2810 			track->group_size = 512;
2811 			break;
2812 		}
2813 
2814 		switch ((tmp & 0xf000) >> 12) {
2815 		case 0:
2816 			track->row_size = 1;
2817 			break;
2818 		case 1:
2819 		default:
2820 			track->row_size = 2;
2821 			break;
2822 		case 2:
2823 			track->row_size = 4;
2824 			break;
2825 		}
2826 
2827 		p->track = track;
2828 	}
2829 	do {
2830 		r = evergreen_cs_packet_parse(p, &pkt, p->idx);
2831 		if (r) {
2832 			kfree(p->track);
2833 			p->track = NULL;
2834 			return r;
2835 		}
2836 		p->idx += pkt.count + 2;
2837 		switch (pkt.type) {
2838 		case PACKET_TYPE0:
2839 			r = evergreen_cs_parse_packet0(p, &pkt);
2840 			break;
2841 		case PACKET_TYPE2:
2842 			break;
2843 		case PACKET_TYPE3:
2844 			r = evergreen_packet3_check(p, &pkt);
2845 			break;
2846 		default:
2847 			DRM_ERROR("Unknown packet type %d !\n", pkt.type);
2848 			kfree(p->track);
2849 			p->track = NULL;
2850 			return -EINVAL;
2851 		}
2852 		if (r) {
2853 			kfree(p->track);
2854 			p->track = NULL;
2855 			return r;
2856 		}
2857 	} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
2858 #if 0
2859 	for (r = 0; r < p->ib.length_dw; r++) {
2860 		printk(KERN_INFO "%05d  0x%08X\n", r, p->ib.ptr[r]);
2861 		mdelay(1);
2862 	}
2863 #endif
2864 	kfree(p->track);
2865 	p->track = NULL;
2866 	return 0;
2867 }
2868 
2869 /*
2870  *  DMA
2871  */
2872 
2873 #define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28)
2874 #define GET_DMA_COUNT(h) ((h) & 0x000fffff)
2875 #define GET_DMA_T(h) (((h) & 0x00800000) >> 23)
2876 #define GET_DMA_NEW(h) (((h) & 0x04000000) >> 26)
2877 #define GET_DMA_MISC(h) (((h) & 0x0700000) >> 20)
2878 
2879 /**
2880  * evergreen_dma_cs_parse() - parse the DMA IB
2881  * @p:		parser structure holding parsing context.
2882  *
2883  * Parses the DMA IB from the CS ioctl and updates
2884  * the GPU addresses based on the reloc information and
2885  * checks for errors. (Evergreen-Cayman)
2886  * Returns 0 for success and an error on failure.
2887  **/
2888 int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
2889 {
2890 	struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
2891 	struct radeon_cs_reloc *src_reloc, *dst_reloc, *dst2_reloc;
2892 	u32 header, cmd, count, tiled, new_cmd, misc;
2893 	volatile u32 *ib = p->ib.ptr;
2894 	u32 idx, idx_value;
2895 	u64 src_offset, dst_offset, dst2_offset;
2896 	int r;
2897 
2898 	do {
2899 		if (p->idx >= ib_chunk->length_dw) {
2900 			DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
2901 				  p->idx, ib_chunk->length_dw);
2902 			return -EINVAL;
2903 		}
2904 		idx = p->idx;
2905 		header = radeon_get_ib_value(p, idx);
2906 		cmd = GET_DMA_CMD(header);
2907 		count = GET_DMA_COUNT(header);
2908 		tiled = GET_DMA_T(header);
2909 		new_cmd = GET_DMA_NEW(header);
2910 		misc = GET_DMA_MISC(header);
2911 
2912 		switch (cmd) {
2913 		case DMA_PACKET_WRITE:
2914 			r = r600_dma_cs_next_reloc(p, &dst_reloc);
2915 			if (r) {
2916 				DRM_ERROR("bad DMA_PACKET_WRITE\n");
2917 				return -EINVAL;
2918 			}
2919 			if (tiled) {
2920 				dst_offset = radeon_get_ib_value(p, idx+1);
2921 				dst_offset <<= 8;
2922 
2923 				ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2924 				p->idx += count + 7;
2925 			} else {
2926 				dst_offset = radeon_get_ib_value(p, idx+1);
2927 				dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2928 
2929 				ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2930 				ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
2931 				p->idx += count + 3;
2932 			}
2933 			if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2934 				dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n",
2935 					 dst_offset, radeon_bo_size(dst_reloc->robj));
2936 				return -EINVAL;
2937 			}
2938 			break;
2939 		case DMA_PACKET_COPY:
2940 			r = r600_dma_cs_next_reloc(p, &src_reloc);
2941 			if (r) {
2942 				DRM_ERROR("bad DMA_PACKET_COPY\n");
2943 				return -EINVAL;
2944 			}
2945 			r = r600_dma_cs_next_reloc(p, &dst_reloc);
2946 			if (r) {
2947 				DRM_ERROR("bad DMA_PACKET_COPY\n");
2948 				return -EINVAL;
2949 			}
2950 			if (tiled) {
2951 				idx_value = radeon_get_ib_value(p, idx + 2);
2952 				if (new_cmd) {
2953 					switch (misc) {
2954 					case 0:
2955 						/* L2T, frame to fields */
2956 						if (idx_value & (1 << 31)) {
2957 							DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
2958 							return -EINVAL;
2959 						}
2960 						r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2961 						if (r) {
2962 							DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
2963 							return -EINVAL;
2964 						}
2965 						dst_offset = radeon_get_ib_value(p, idx+1);
2966 						dst_offset <<= 8;
2967 						dst2_offset = radeon_get_ib_value(p, idx+2);
2968 						dst2_offset <<= 8;
2969 						src_offset = radeon_get_ib_value(p, idx+8);
2970 						src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
2971 						if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2972 							dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n",
2973 								 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2974 							return -EINVAL;
2975 						}
2976 						if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2977 							dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
2978 								 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2979 							return -EINVAL;
2980 						}
2981 						if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
2982 							dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
2983 								 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
2984 							return -EINVAL;
2985 						}
2986 						ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2987 						ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
2988 						ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
2989 						ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
2990 						p->idx += 10;
2991 						break;
2992 					case 1:
2993 						/* L2T, T2L partial */
2994 						if (p->family < CHIP_CAYMAN) {
2995 							DRM_ERROR("L2T, T2L Partial is cayman only !\n");
2996 							return -EINVAL;
2997 						}
2998 						/* detile bit */
2999 						if (idx_value & (1 << 31)) {
3000 							/* tiled src, linear dst */
3001 							ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
3002 
3003 							ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3004 							ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3005 						} else {
3006 							/* linear src, tiled dst */
3007 							ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3008 							ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3009 
3010 							ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3011 						}
3012 						p->idx += 12;
3013 						break;
3014 					case 3:
3015 						/* L2T, broadcast */
3016 						if (idx_value & (1 << 31)) {
3017 							DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3018 							return -EINVAL;
3019 						}
3020 						r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3021 						if (r) {
3022 							DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3023 							return -EINVAL;
3024 						}
3025 						dst_offset = radeon_get_ib_value(p, idx+1);
3026 						dst_offset <<= 8;
3027 						dst2_offset = radeon_get_ib_value(p, idx+2);
3028 						dst2_offset <<= 8;
3029 						src_offset = radeon_get_ib_value(p, idx+8);
3030 						src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
3031 						if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3032 							dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
3033 								 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3034 							return -EINVAL;
3035 						}
3036 						if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3037 							dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
3038 								 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3039 							return -EINVAL;
3040 						}
3041 						if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3042 							dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
3043 								 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3044 							return -EINVAL;
3045 						}
3046 						ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3047 						ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
3048 						ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3049 						ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3050 						p->idx += 10;
3051 						break;
3052 					case 4:
3053 						/* L2T, T2L */
3054 						/* detile bit */
3055 						if (idx_value & (1 << 31)) {
3056 							/* tiled src, linear dst */
3057 							src_offset = radeon_get_ib_value(p, idx+1);
3058 							src_offset <<= 8;
3059 							ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
3060 
3061 							dst_offset = radeon_get_ib_value(p, idx+7);
3062 							dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3063 							ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3064 							ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3065 						} else {
3066 							/* linear src, tiled dst */
3067 							src_offset = radeon_get_ib_value(p, idx+7);
3068 							src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3069 							ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3070 							ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3071 
3072 							dst_offset = radeon_get_ib_value(p, idx+1);
3073 							dst_offset <<= 8;
3074 							ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3075 						}
3076 						if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3077 							dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n",
3078 								 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3079 							return -EINVAL;
3080 						}
3081 						if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3082 							dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%llu %lu)\n",
3083 								 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3084 							return -EINVAL;
3085 						}
3086 						p->idx += 9;
3087 						break;
3088 					case 5:
3089 						/* T2T partial */
3090 						if (p->family < CHIP_CAYMAN) {
3091 							DRM_ERROR("L2T, T2L Partial is cayman only !\n");
3092 							return -EINVAL;
3093 						}
3094 						ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
3095 						ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3096 						p->idx += 13;
3097 						break;
3098 					case 7:
3099 						/* L2T, broadcast */
3100 						if (idx_value & (1 << 31)) {
3101 							DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3102 							return -EINVAL;
3103 						}
3104 						r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3105 						if (r) {
3106 							DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3107 							return -EINVAL;
3108 						}
3109 						dst_offset = radeon_get_ib_value(p, idx+1);
3110 						dst_offset <<= 8;
3111 						dst2_offset = radeon_get_ib_value(p, idx+2);
3112 						dst2_offset <<= 8;
3113 						src_offset = radeon_get_ib_value(p, idx+8);
3114 						src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
3115 						if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3116 							dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
3117 								 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3118 							return -EINVAL;
3119 						}
3120 						if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3121 							dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
3122 								 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3123 							return -EINVAL;
3124 						}
3125 						if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3126 							dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
3127 								 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3128 							return -EINVAL;
3129 						}
3130 						ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3131 						ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
3132 						ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3133 						ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3134 						p->idx += 10;
3135 						break;
3136 					default:
3137 						DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3138 						return -EINVAL;
3139 					}
3140 				} else {
3141 					switch (misc) {
3142 					case 0:
3143 						/* detile bit */
3144 						if (idx_value & (1 << 31)) {
3145 							/* tiled src, linear dst */
3146 							src_offset = radeon_get_ib_value(p, idx+1);
3147 							src_offset <<= 8;
3148 							ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
3149 
3150 							dst_offset = radeon_get_ib_value(p, idx+7);
3151 							dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3152 							ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3153 							ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3154 						} else {
3155 							/* linear src, tiled dst */
3156 							src_offset = radeon_get_ib_value(p, idx+7);
3157 							src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3158 							ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3159 							ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3160 
3161 							dst_offset = radeon_get_ib_value(p, idx+1);
3162 							dst_offset <<= 8;
3163 							ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3164 						}
3165 						if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3166 							dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
3167 								 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3168 							return -EINVAL;
3169 						}
3170 						if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3171 							dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
3172 								 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3173 							return -EINVAL;
3174 						}
3175 						p->idx += 9;
3176 						break;
3177 					default:
3178 						DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3179 						return -EINVAL;
3180 					}
3181 				}
3182 			} else {
3183 				if (new_cmd) {
3184 					switch (misc) {
3185 					case 0:
3186 						/* L2L, byte */
3187 						src_offset = radeon_get_ib_value(p, idx+2);
3188 						src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
3189 						dst_offset = radeon_get_ib_value(p, idx+1);
3190 						dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
3191 						if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) {
3192 							dev_warn(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n",
3193 								 src_offset + count, radeon_bo_size(src_reloc->robj));
3194 							return -EINVAL;
3195 						}
3196 						if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) {
3197 							dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%llu %lu)\n",
3198 								 dst_offset + count, radeon_bo_size(dst_reloc->robj));
3199 							return -EINVAL;
3200 						}
3201 						ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
3202 						ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
3203 						ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3204 						ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3205 						p->idx += 5;
3206 						break;
3207 					case 1:
3208 						/* L2L, partial */
3209 						if (p->family < CHIP_CAYMAN) {
3210 							DRM_ERROR("L2L Partial is cayman only !\n");
3211 							return -EINVAL;
3212 						}
3213 						ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
3214 						ib[idx+2] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3215 						ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
3216 						ib[idx+5] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3217 
3218 						p->idx += 9;
3219 						break;
3220 					case 4:
3221 						/* L2L, dw, broadcast */
3222 						r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3223 						if (r) {
3224 							DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n");
3225 							return -EINVAL;
3226 						}
3227 						dst_offset = radeon_get_ib_value(p, idx+1);
3228 						dst_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
3229 						dst2_offset = radeon_get_ib_value(p, idx+2);
3230 						dst2_offset |= ((u64)(radeon_get_ib_value(p, idx+5) & 0xff)) << 32;
3231 						src_offset = radeon_get_ib_value(p, idx+3);
3232 						src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32;
3233 						if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3234 							dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n",
3235 								 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3236 							return -EINVAL;
3237 						}
3238 						if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3239 							dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%llu %lu)\n",
3240 								 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3241 							return -EINVAL;
3242 						}
3243 						if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3244 							dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%llu %lu)\n",
3245 								 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3246 							return -EINVAL;
3247 						}
3248 						ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3249 						ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset & 0xfffffffc);
3250 						ib[idx+3] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3251 						ib[idx+4] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3252 						ib[idx+5] += upper_32_bits(dst2_reloc->lobj.gpu_offset) & 0xff;
3253 						ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3254 						p->idx += 7;
3255 						break;
3256 					default:
3257 						DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3258 						return -EINVAL;
3259 					}
3260 				} else {
3261 					/* L2L, dw */
3262 					src_offset = radeon_get_ib_value(p, idx+2);
3263 					src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
3264 					dst_offset = radeon_get_ib_value(p, idx+1);
3265 					dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
3266 					if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3267 						dev_warn(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n",
3268 							 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3269 						return -EINVAL;
3270 					}
3271 					if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3272 						dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%llu %lu)\n",
3273 							 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3274 						return -EINVAL;
3275 					}
3276 					ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3277 					ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3278 					ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3279 					ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3280 					p->idx += 5;
3281 				}
3282 			}
3283 			break;
3284 		case DMA_PACKET_CONSTANT_FILL:
3285 			r = r600_dma_cs_next_reloc(p, &dst_reloc);
3286 			if (r) {
3287 				DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n");
3288 				return -EINVAL;
3289 			}
3290 			dst_offset = radeon_get_ib_value(p, idx+1);
3291 			dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0x00ff0000)) << 16;
3292 			if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3293 				dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
3294 					 dst_offset, radeon_bo_size(dst_reloc->robj));
3295 				return -EINVAL;
3296 			}
3297 			ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3298 			ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000;
3299 			p->idx += 4;
3300 			break;
3301 		case DMA_PACKET_NOP:
3302 			p->idx += 1;
3303 			break;
3304 		default:
3305 			DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3306 			return -EINVAL;
3307 		}
3308 	} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
3309 #if 0
3310 	for (r = 0; r < p->ib->length_dw; r++) {
3311 		printk(KERN_INFO "%05d  0x%08X\n", r, p->ib.ptr[r]);
3312 		mdelay(1);
3313 	}
3314 #endif
3315 	return 0;
3316 }
3317 
3318 /* vm parser */
3319 static bool evergreen_vm_reg_valid(u32 reg)
3320 {
3321 	/* context regs are fine */
3322 	if (reg >= 0x28000)
3323 		return true;
3324 
3325 	/* check config regs */
3326 	switch (reg) {
3327 	case WAIT_UNTIL:
3328 	case GRBM_GFX_INDEX:
3329 	case CP_STRMOUT_CNTL:
3330 	case CP_COHER_CNTL:
3331 	case CP_COHER_SIZE:
3332 	case VGT_VTX_VECT_EJECT_REG:
3333 	case VGT_CACHE_INVALIDATION:
3334 	case VGT_GS_VERTEX_REUSE:
3335 	case VGT_PRIMITIVE_TYPE:
3336 	case VGT_INDEX_TYPE:
3337 	case VGT_NUM_INDICES:
3338 	case VGT_NUM_INSTANCES:
3339 	case VGT_COMPUTE_DIM_X:
3340 	case VGT_COMPUTE_DIM_Y:
3341 	case VGT_COMPUTE_DIM_Z:
3342 	case VGT_COMPUTE_START_X:
3343 	case VGT_COMPUTE_START_Y:
3344 	case VGT_COMPUTE_START_Z:
3345 	case VGT_COMPUTE_INDEX:
3346 	case VGT_COMPUTE_THREAD_GROUP_SIZE:
3347 	case VGT_HS_OFFCHIP_PARAM:
3348 	case PA_CL_ENHANCE:
3349 	case PA_SU_LINE_STIPPLE_VALUE:
3350 	case PA_SC_LINE_STIPPLE_STATE:
3351 	case PA_SC_ENHANCE:
3352 	case SQ_DYN_GPR_CNTL_PS_FLUSH_REQ:
3353 	case SQ_DYN_GPR_SIMD_LOCK_EN:
3354 	case SQ_CONFIG:
3355 	case SQ_GPR_RESOURCE_MGMT_1:
3356 	case SQ_GLOBAL_GPR_RESOURCE_MGMT_1:
3357 	case SQ_GLOBAL_GPR_RESOURCE_MGMT_2:
3358 	case SQ_CONST_MEM_BASE:
3359 	case SQ_STATIC_THREAD_MGMT_1:
3360 	case SQ_STATIC_THREAD_MGMT_2:
3361 	case SQ_STATIC_THREAD_MGMT_3:
3362 	case SPI_CONFIG_CNTL:
3363 	case SPI_CONFIG_CNTL_1:
3364 	case TA_CNTL_AUX:
3365 	case DB_DEBUG:
3366 	case DB_DEBUG2:
3367 	case DB_DEBUG3:
3368 	case DB_DEBUG4:
3369 	case DB_WATERMARKS:
3370 	case TD_PS_BORDER_COLOR_INDEX:
3371 	case TD_PS_BORDER_COLOR_RED:
3372 	case TD_PS_BORDER_COLOR_GREEN:
3373 	case TD_PS_BORDER_COLOR_BLUE:
3374 	case TD_PS_BORDER_COLOR_ALPHA:
3375 	case TD_VS_BORDER_COLOR_INDEX:
3376 	case TD_VS_BORDER_COLOR_RED:
3377 	case TD_VS_BORDER_COLOR_GREEN:
3378 	case TD_VS_BORDER_COLOR_BLUE:
3379 	case TD_VS_BORDER_COLOR_ALPHA:
3380 	case TD_GS_BORDER_COLOR_INDEX:
3381 	case TD_GS_BORDER_COLOR_RED:
3382 	case TD_GS_BORDER_COLOR_GREEN:
3383 	case TD_GS_BORDER_COLOR_BLUE:
3384 	case TD_GS_BORDER_COLOR_ALPHA:
3385 	case TD_HS_BORDER_COLOR_INDEX:
3386 	case TD_HS_BORDER_COLOR_RED:
3387 	case TD_HS_BORDER_COLOR_GREEN:
3388 	case TD_HS_BORDER_COLOR_BLUE:
3389 	case TD_HS_BORDER_COLOR_ALPHA:
3390 	case TD_LS_BORDER_COLOR_INDEX:
3391 	case TD_LS_BORDER_COLOR_RED:
3392 	case TD_LS_BORDER_COLOR_GREEN:
3393 	case TD_LS_BORDER_COLOR_BLUE:
3394 	case TD_LS_BORDER_COLOR_ALPHA:
3395 	case TD_CS_BORDER_COLOR_INDEX:
3396 	case TD_CS_BORDER_COLOR_RED:
3397 	case TD_CS_BORDER_COLOR_GREEN:
3398 	case TD_CS_BORDER_COLOR_BLUE:
3399 	case TD_CS_BORDER_COLOR_ALPHA:
3400 	case SQ_ESGS_RING_SIZE:
3401 	case SQ_GSVS_RING_SIZE:
3402 	case SQ_ESTMP_RING_SIZE:
3403 	case SQ_GSTMP_RING_SIZE:
3404 	case SQ_HSTMP_RING_SIZE:
3405 	case SQ_LSTMP_RING_SIZE:
3406 	case SQ_PSTMP_RING_SIZE:
3407 	case SQ_VSTMP_RING_SIZE:
3408 	case SQ_ESGS_RING_ITEMSIZE:
3409 	case SQ_ESTMP_RING_ITEMSIZE:
3410 	case SQ_GSTMP_RING_ITEMSIZE:
3411 	case SQ_GSVS_RING_ITEMSIZE:
3412 	case SQ_GS_VERT_ITEMSIZE:
3413 	case SQ_GS_VERT_ITEMSIZE_1:
3414 	case SQ_GS_VERT_ITEMSIZE_2:
3415 	case SQ_GS_VERT_ITEMSIZE_3:
3416 	case SQ_GSVS_RING_OFFSET_1:
3417 	case SQ_GSVS_RING_OFFSET_2:
3418 	case SQ_GSVS_RING_OFFSET_3:
3419 	case SQ_HSTMP_RING_ITEMSIZE:
3420 	case SQ_LSTMP_RING_ITEMSIZE:
3421 	case SQ_PSTMP_RING_ITEMSIZE:
3422 	case SQ_VSTMP_RING_ITEMSIZE:
3423 	case VGT_TF_RING_SIZE:
3424 	case SQ_ESGS_RING_BASE:
3425 	case SQ_GSVS_RING_BASE:
3426 	case SQ_ESTMP_RING_BASE:
3427 	case SQ_GSTMP_RING_BASE:
3428 	case SQ_HSTMP_RING_BASE:
3429 	case SQ_LSTMP_RING_BASE:
3430 	case SQ_PSTMP_RING_BASE:
3431 	case SQ_VSTMP_RING_BASE:
3432 	case CAYMAN_VGT_OFFCHIP_LDS_BASE:
3433 	case CAYMAN_SQ_EX_ALLOC_TABLE_SLOTS:
3434 		return true;
3435 	default:
3436 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
3437 		return false;
3438 	}
3439 }
3440 
3441 static int evergreen_vm_packet3_check(struct radeon_device *rdev,
3442 				      u32 *ib, struct radeon_cs_packet *pkt)
3443 {
3444 	u32 idx = pkt->idx + 1;
3445 	u32 idx_value = ib[idx];
3446 	u32 start_reg, end_reg, reg, i;
3447 	u32 command, info;
3448 
3449 	switch (pkt->opcode) {
3450 	case PACKET3_NOP:
3451 	case PACKET3_SET_BASE:
3452 	case PACKET3_CLEAR_STATE:
3453 	case PACKET3_INDEX_BUFFER_SIZE:
3454 	case PACKET3_DISPATCH_DIRECT:
3455 	case PACKET3_DISPATCH_INDIRECT:
3456 	case PACKET3_MODE_CONTROL:
3457 	case PACKET3_SET_PREDICATION:
3458 	case PACKET3_COND_EXEC:
3459 	case PACKET3_PRED_EXEC:
3460 	case PACKET3_DRAW_INDIRECT:
3461 	case PACKET3_DRAW_INDEX_INDIRECT:
3462 	case PACKET3_INDEX_BASE:
3463 	case PACKET3_DRAW_INDEX_2:
3464 	case PACKET3_CONTEXT_CONTROL:
3465 	case PACKET3_DRAW_INDEX_OFFSET:
3466 	case PACKET3_INDEX_TYPE:
3467 	case PACKET3_DRAW_INDEX:
3468 	case PACKET3_DRAW_INDEX_AUTO:
3469 	case PACKET3_DRAW_INDEX_IMMD:
3470 	case PACKET3_NUM_INSTANCES:
3471 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
3472 	case PACKET3_STRMOUT_BUFFER_UPDATE:
3473 	case PACKET3_DRAW_INDEX_OFFSET_2:
3474 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
3475 	case PACKET3_MPEG_INDEX:
3476 	case PACKET3_WAIT_REG_MEM:
3477 	case PACKET3_MEM_WRITE:
3478 	case PACKET3_SURFACE_SYNC:
3479 	case PACKET3_EVENT_WRITE:
3480 	case PACKET3_EVENT_WRITE_EOP:
3481 	case PACKET3_EVENT_WRITE_EOS:
3482 	case PACKET3_SET_CONTEXT_REG:
3483 	case PACKET3_SET_BOOL_CONST:
3484 	case PACKET3_SET_LOOP_CONST:
3485 	case PACKET3_SET_RESOURCE:
3486 	case PACKET3_SET_SAMPLER:
3487 	case PACKET3_SET_CTL_CONST:
3488 	case PACKET3_SET_RESOURCE_OFFSET:
3489 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
3490 	case PACKET3_SET_RESOURCE_INDIRECT:
3491 	case CAYMAN_PACKET3_DEALLOC_STATE:
3492 		break;
3493 	case PACKET3_COND_WRITE:
3494 		if (idx_value & 0x100) {
3495 			reg = ib[idx + 5] * 4;
3496 			if (!evergreen_vm_reg_valid(reg))
3497 				return -EINVAL;
3498 		}
3499 		break;
3500 	case PACKET3_COPY_DW:
3501 		if (idx_value & 0x2) {
3502 			reg = ib[idx + 3] * 4;
3503 			if (!evergreen_vm_reg_valid(reg))
3504 				return -EINVAL;
3505 		}
3506 		break;
3507 	case PACKET3_SET_CONFIG_REG:
3508 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
3509 		end_reg = 4 * pkt->count + start_reg - 4;
3510 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
3511 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
3512 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
3513 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
3514 			return -EINVAL;
3515 		}
3516 		for (i = 0; i < pkt->count; i++) {
3517 			reg = start_reg + (4 * i);
3518 			if (!evergreen_vm_reg_valid(reg))
3519 				return -EINVAL;
3520 		}
3521 		break;
3522 	case PACKET3_CP_DMA:
3523 		command = ib[idx + 4];
3524 		info = ib[idx + 1];
3525 		if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
3526 		    (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
3527 		    ((((info & 0x00300000) >> 20) == 0) &&
3528 		     (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
3529 		    ((((info & 0x60000000) >> 29) == 0) &&
3530 		     (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
3531 			/* non mem to mem copies requires dw aligned count */
3532 			if ((command & 0x1fffff) % 4) {
3533 				DRM_ERROR("CP DMA command requires dw count alignment\n");
3534 				return -EINVAL;
3535 			}
3536 		}
3537 		if (command & PACKET3_CP_DMA_CMD_SAS) {
3538 			/* src address space is register */
3539 			if (((info & 0x60000000) >> 29) == 0) {
3540 				start_reg = idx_value << 2;
3541 				if (command & PACKET3_CP_DMA_CMD_SAIC) {
3542 					reg = start_reg;
3543 					if (!evergreen_vm_reg_valid(reg)) {
3544 						DRM_ERROR("CP DMA Bad SRC register\n");
3545 						return -EINVAL;
3546 					}
3547 				} else {
3548 					for (i = 0; i < (command & 0x1fffff); i++) {
3549 						reg = start_reg + (4 * i);
3550 						if (!evergreen_vm_reg_valid(reg)) {
3551 							DRM_ERROR("CP DMA Bad SRC register\n");
3552 							return -EINVAL;
3553 						}
3554 					}
3555 				}
3556 			}
3557 		}
3558 		if (command & PACKET3_CP_DMA_CMD_DAS) {
3559 			/* dst address space is register */
3560 			if (((info & 0x00300000) >> 20) == 0) {
3561 				start_reg = ib[idx + 2];
3562 				if (command & PACKET3_CP_DMA_CMD_DAIC) {
3563 					reg = start_reg;
3564 					if (!evergreen_vm_reg_valid(reg)) {
3565 						DRM_ERROR("CP DMA Bad DST register\n");
3566 						return -EINVAL;
3567 					}
3568 				} else {
3569 					for (i = 0; i < (command & 0x1fffff); i++) {
3570 						reg = start_reg + (4 * i);
3571 						if (!evergreen_vm_reg_valid(reg)) {
3572 							DRM_ERROR("CP DMA Bad DST register\n");
3573 							return -EINVAL;
3574 						}
3575 					}
3576 				}
3577 			}
3578 		}
3579 		break;
3580 	default:
3581 		return -EINVAL;
3582 	}
3583 	return 0;
3584 }
3585 
3586 int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3587 {
3588 	int ret = 0;
3589 	u32 idx = 0;
3590 	struct radeon_cs_packet pkt;
3591 
3592 	do {
3593 		pkt.idx = idx;
3594 		pkt.type = CP_PACKET_GET_TYPE(ib->ptr[idx]);
3595 		pkt.count = CP_PACKET_GET_COUNT(ib->ptr[idx]);
3596 		pkt.one_reg_wr = 0;
3597 		switch (pkt.type) {
3598 		case PACKET_TYPE0:
3599 			dev_err(rdev->dev, "Packet0 not allowed!\n");
3600 			ret = -EINVAL;
3601 			break;
3602 		case PACKET_TYPE2:
3603 			idx += 1;
3604 			break;
3605 		case PACKET_TYPE3:
3606 			pkt.opcode = CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
3607 			ret = evergreen_vm_packet3_check(rdev, ib->ptr, &pkt);
3608 			idx += pkt.count + 2;
3609 			break;
3610 		default:
3611 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
3612 			ret = -EINVAL;
3613 			break;
3614 		}
3615 		if (ret)
3616 			break;
3617 	} while (idx < ib->length_dw);
3618 
3619 	return ret;
3620 }
3621 
3622 /**
3623  * evergreen_dma_ib_parse() - parse the DMA IB for VM
3624  * @rdev: radeon_device pointer
3625  * @ib:	radeon_ib pointer
3626  *
3627  * Parses the DMA IB from the VM CS ioctl
3628  * checks for errors. (Cayman-SI)
3629  * Returns 0 for success and an error on failure.
3630  **/
3631 int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3632 {
3633 	u32 idx = 0;
3634 	u32 header, cmd, count, tiled, new_cmd, misc;
3635 
3636 	do {
3637 		header = ib->ptr[idx];
3638 		cmd = GET_DMA_CMD(header);
3639 		count = GET_DMA_COUNT(header);
3640 		tiled = GET_DMA_T(header);
3641 		new_cmd = GET_DMA_NEW(header);
3642 		misc = GET_DMA_MISC(header);
3643 
3644 		switch (cmd) {
3645 		case DMA_PACKET_WRITE:
3646 			if (tiled)
3647 				idx += count + 7;
3648 			else
3649 				idx += count + 3;
3650 			break;
3651 		case DMA_PACKET_COPY:
3652 			if (tiled) {
3653 				if (new_cmd) {
3654 					switch (misc) {
3655 					case 0:
3656 						/* L2T, frame to fields */
3657 						idx += 10;
3658 						break;
3659 					case 1:
3660 						/* L2T, T2L partial */
3661 						idx += 12;
3662 						break;
3663 					case 3:
3664 						/* L2T, broadcast */
3665 						idx += 10;
3666 						break;
3667 					case 4:
3668 						/* L2T, T2L */
3669 						idx += 9;
3670 						break;
3671 					case 5:
3672 						/* T2T partial */
3673 						idx += 13;
3674 						break;
3675 					case 7:
3676 						/* L2T, broadcast */
3677 						idx += 10;
3678 						break;
3679 					default:
3680 						DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3681 						return -EINVAL;
3682 					}
3683 				} else {
3684 					switch (misc) {
3685 					case 0:
3686 						idx += 9;
3687 						break;
3688 					default:
3689 						DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3690 						return -EINVAL;
3691 					}
3692 				}
3693 			} else {
3694 				if (new_cmd) {
3695 					switch (misc) {
3696 					case 0:
3697 						/* L2L, byte */
3698 						idx += 5;
3699 						break;
3700 					case 1:
3701 						/* L2L, partial */
3702 						idx += 9;
3703 						break;
3704 					case 4:
3705 						/* L2L, dw, broadcast */
3706 						idx += 7;
3707 						break;
3708 					default:
3709 						DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3710 						return -EINVAL;
3711 					}
3712 				} else {
3713 					/* L2L, dw */
3714 					idx += 5;
3715 				}
3716 			}
3717 			break;
3718 		case DMA_PACKET_CONSTANT_FILL:
3719 			idx += 4;
3720 			break;
3721 		case DMA_PACKET_NOP:
3722 			idx += 1;
3723 			break;
3724 		default:
3725 			DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3726 			return -EINVAL;
3727 		}
3728 	} while (idx < ib->length_dw);
3729 
3730 	return 0;
3731 }
3732