xref: /netbsd-src/sys/external/bsd/drm/dist/shared-core/r300_cmdbuf.c (revision d7792ba7fe789bc3d5f4d57a355db3f8d54d5c21)
1 /* r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*-
2  *
3  * Copyright (C) The Weather Channel, Inc.  2002.
4  * Copyright (C) 2004 Nicolai Haehnle.
5  * All Rights Reserved.
6  *
7  * The Weather Channel (TM) funded Tungsten Graphics to develop the
8  * initial release of the Radeon 8500 driver under the XFree86 license.
9  * This notice must be preserved.
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice (including the next
19  * paragraph) shall be included in all copies or substantial portions of the
20  * Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
25  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28  * DEALINGS IN THE SOFTWARE.
29  *
30  * Authors:
31  *    Nicolai Haehnle <prefect_@gmx.net>
32  */
33 
34 #include "drmP.h"
35 #include "drm.h"
36 #include "radeon_drm.h"
37 #include "radeon_drv.h"
38 #include "r300_reg.h"
39 
40 #define R300_SIMULTANEOUS_CLIPRECTS		4
41 
42 /* Values for R300_RE_CLIPRECT_CNTL depending on the number of cliprects
43  */
44 static const int r300_cliprect_cntl[4] = {
45 	0xAAAA,
46 	0xEEEE,
47 	0xFEFE,
48 	0xFFFE
49 };
50 
51 /**
52  * Emit up to R300_SIMULTANEOUS_CLIPRECTS cliprects from the given command
53  * buffer, starting with index n.
54  */
r300_emit_cliprects(drm_radeon_private_t * dev_priv,drm_radeon_kcmd_buffer_t * cmdbuf,int n)55 static int r300_emit_cliprects(drm_radeon_private_t *dev_priv,
56 			       drm_radeon_kcmd_buffer_t *cmdbuf, int n)
57 {
58 	struct drm_clip_rect box;
59 	int nr;
60 	int i;
61 	RING_LOCALS;
62 
63 	nr = cmdbuf->nbox - n;
64 	if (nr > R300_SIMULTANEOUS_CLIPRECTS)
65 		nr = R300_SIMULTANEOUS_CLIPRECTS;
66 
67 	DRM_DEBUG("%i cliprects\n", nr);
68 
69 	if (nr) {
70 		BEGIN_RING(6 + nr * 2);
71 		OUT_RING(CP_PACKET0(R300_RE_CLIPRECT_TL_0, nr * 2 - 1));
72 
73 		for (i = 0; i < nr; ++i) {
74 			if (DRM_COPY_FROM_USER_UNCHECKED
75 			    (&box, &cmdbuf->boxes[n + i], sizeof(box))) {
76 				DRM_ERROR("copy cliprect faulted\n");
77 				return -EFAULT;
78 			}
79 
80 			box.x2--; /* Hardware expects inclusive bottom-right corner */
81 			box.y2--;
82 
83 			if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
84 				box.x1 = (box.x1) &
85 					R300_CLIPRECT_MASK;
86 				box.y1 = (box.y1) &
87 					R300_CLIPRECT_MASK;
88 				box.x2 = (box.x2) &
89 					R300_CLIPRECT_MASK;
90 				box.y2 = (box.y2) &
91 					R300_CLIPRECT_MASK;
92 			} else {
93 				box.x1 = (box.x1 + R300_CLIPRECT_OFFSET) &
94 					R300_CLIPRECT_MASK;
95 				box.y1 = (box.y1 + R300_CLIPRECT_OFFSET) &
96 					R300_CLIPRECT_MASK;
97 				box.x2 = (box.x2 + R300_CLIPRECT_OFFSET) &
98 					R300_CLIPRECT_MASK;
99 				box.y2 = (box.y2 + R300_CLIPRECT_OFFSET) &
100 					R300_CLIPRECT_MASK;
101 			}
102 
103 			OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) |
104 				 (box.y1 << R300_CLIPRECT_Y_SHIFT));
105 			OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) |
106 				 (box.y2 << R300_CLIPRECT_Y_SHIFT));
107 
108 		}
109 
110 		OUT_RING_REG(R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr - 1]);
111 
112 		/* TODO/SECURITY: Force scissors to a safe value, otherwise the
113 		 * client might be able to trample over memory.
114 		 * The impact should be very limited, but I'd rather be safe than
115 		 * sorry.
116 		 */
117 		OUT_RING(CP_PACKET0(R300_RE_SCISSORS_TL, 1));
118 		OUT_RING(0);
119 		OUT_RING(R300_SCISSORS_X_MASK | R300_SCISSORS_Y_MASK);
120 		ADVANCE_RING();
121 	} else {
122 		/* Why we allow zero cliprect rendering:
123 		 * There are some commands in a command buffer that must be submitted
124 		 * even when there are no cliprects, e.g. DMA buffer discard
125 		 * or state setting (though state setting could be avoided by
126 		 * simulating a loss of context).
127 		 *
128 		 * Now since the cmdbuf interface is so chaotic right now (and is
129 		 * bound to remain that way for a bit until things settle down),
130 		 * it is basically impossible to filter out the commands that are
131 		 * necessary and those that aren't.
132 		 *
133 		 * So I choose the safe way and don't do any filtering at all;
134 		 * instead, I simply set up the engine so that all rendering
135 		 * can't produce any fragments.
136 		 */
137 		BEGIN_RING(2);
138 		OUT_RING_REG(R300_RE_CLIPRECT_CNTL, 0);
139 		ADVANCE_RING();
140 	}
141 
142 	/* flus cache and wait idle clean after cliprect change */
143 	BEGIN_RING(2);
144 	OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
145 	OUT_RING(R300_RB3D_DC_FLUSH);
146 	ADVANCE_RING();
147 	BEGIN_RING(2);
148 	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
149 	OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
150 	ADVANCE_RING();
151 	/* set flush flag */
152 	dev_priv->track_flush |= RADEON_FLUSH_EMITED;
153 
154 	return 0;
155 }
156 
157 static u8 r300_reg_flags[0x10000 >> 2];
158 
r300_init_reg_flags(struct drm_device * dev)159 void r300_init_reg_flags(struct drm_device *dev)
160 {
161 	int i;
162 	drm_radeon_private_t *dev_priv = dev->dev_private;
163 
164 	memset(r300_reg_flags, 0, 0x10000 >> 2);
165 #define ADD_RANGE_MARK(reg, count,mark) \
166 		for(i=((reg)>>2);i<((reg)>>2)+(count);i++)\
167 			r300_reg_flags[i]|=(mark);
168 
169 #define MARK_SAFE		1
170 #define MARK_CHECK_OFFSET	2
171 
172 #define ADD_RANGE(reg, count)	ADD_RANGE_MARK(reg, count, MARK_SAFE)
173 
174 	/* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */
175 	ADD_RANGE(R300_SE_VPORT_XSCALE, 6);
176 	ADD_RANGE(R300_VAP_CNTL, 1);
177 	ADD_RANGE(R300_SE_VTE_CNTL, 2);
178 	ADD_RANGE(0x2134, 2);
179 	ADD_RANGE(R300_VAP_CNTL_STATUS, 1);
180 	ADD_RANGE(R300_VAP_INPUT_CNTL_0, 2);
181 	ADD_RANGE(0x21DC, 1);
182 	ADD_RANGE(R300_VAP_UNKNOWN_221C, 1);
183 	ADD_RANGE(R300_VAP_CLIP_X_0, 4);
184 	ADD_RANGE(R300_VAP_PVS_STATE_FLUSH_REG, 1);
185 	ADD_RANGE(R300_VAP_UNKNOWN_2288, 1);
186 	ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2);
187 	ADD_RANGE(R300_VAP_PVS_CNTL_1, 3);
188 	ADD_RANGE(R300_GB_ENABLE, 1);
189 	ADD_RANGE(R300_GB_MSPOS0, 5);
190 	ADD_RANGE(R300_TX_INVALTAGS, 1);
191 	ADD_RANGE(R300_TX_ENABLE, 1);
192 	ADD_RANGE(0x4200, 4);
193 	ADD_RANGE(0x4214, 1);
194 	ADD_RANGE(R300_RE_POINTSIZE, 1);
195 	ADD_RANGE(0x4230, 3);
196 	ADD_RANGE(R300_RE_LINE_CNT, 1);
197 	ADD_RANGE(R300_RE_UNK4238, 1);
198 	ADD_RANGE(0x4260, 3);
199 	ADD_RANGE(R300_RE_SHADE, 4);
200 	ADD_RANGE(R300_RE_POLYGON_MODE, 5);
201 	ADD_RANGE(R300_RE_ZBIAS_CNTL, 1);
202 	ADD_RANGE(R300_RE_ZBIAS_T_FACTOR, 4);
203 	ADD_RANGE(R300_RE_OCCLUSION_CNTL, 1);
204 	ADD_RANGE(R300_RE_CULL_CNTL, 1);
205 	ADD_RANGE(0x42C0, 2);
206 	ADD_RANGE(R300_RS_CNTL_0, 2);
207 
208 	ADD_RANGE(R300_SC_HYPERZ, 2);
209 	ADD_RANGE(0x43E8, 1);
210 
211 	ADD_RANGE(0x46A4, 5);
212 
213 	ADD_RANGE(R300_RE_FOG_STATE, 1);
214 	ADD_RANGE(R300_FOG_COLOR_R, 3);
215 	ADD_RANGE(R300_PP_ALPHA_TEST, 2);
216 	ADD_RANGE(0x4BD8, 1);
217 	ADD_RANGE(R300_PFS_PARAM_0_X, 64);
218 	ADD_RANGE(0x4E00, 1);
219 	ADD_RANGE(R300_RB3D_CBLEND, 2);
220 	ADD_RANGE(R300_RB3D_COLORMASK, 1);
221 	ADD_RANGE(R300_RB3D_BLEND_COLOR, 3);
222 	ADD_RANGE_MARK(R300_RB3D_COLOROFFSET0, 1, MARK_CHECK_OFFSET);	/* check offset */
223 	ADD_RANGE(R300_RB3D_COLORPITCH0, 1);
224 	ADD_RANGE(0x4E50, 9);
225 	ADD_RANGE(0x4E88, 1);
226 	ADD_RANGE(0x4EA0, 2);
227 	ADD_RANGE(R300_ZB_CNTL, 3);
228 	ADD_RANGE(R300_ZB_FORMAT, 4);
229 	ADD_RANGE_MARK(R300_ZB_DEPTHOFFSET, 1, MARK_CHECK_OFFSET);	/* check offset */
230 	ADD_RANGE(R300_ZB_DEPTHPITCH, 1);
231 	ADD_RANGE(R300_ZB_DEPTHCLEARVALUE, 1);
232 	ADD_RANGE(R300_ZB_ZMASK_OFFSET, 13);
233 
234 	ADD_RANGE(R300_TX_FILTER_0, 16);
235 	ADD_RANGE(R300_TX_FILTER1_0, 16);
236 	ADD_RANGE(R300_TX_SIZE_0, 16);
237 	ADD_RANGE(R300_TX_FORMAT_0, 16);
238 	ADD_RANGE(R300_TX_PITCH_0, 16);
239 	/* Texture offset is dangerous and needs more checking */
240 	ADD_RANGE_MARK(R300_TX_OFFSET_0, 16, MARK_CHECK_OFFSET);
241 	ADD_RANGE(R300_TX_CHROMA_KEY_0, 16);
242 	ADD_RANGE(R300_TX_BORDER_COLOR_0, 16);
243 
244 	/* Sporadic registers used as primitives are emitted */
245 	ADD_RANGE(R300_ZB_ZCACHE_CTLSTAT, 1);
246 	ADD_RANGE(R300_RB3D_DSTCACHE_CTLSTAT, 1);
247 	ADD_RANGE(R300_VAP_INPUT_ROUTE_0_0, 8);
248 	ADD_RANGE(R300_VAP_INPUT_ROUTE_1_0, 8);
249 
250 	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
251 		ADD_RANGE(R500_VAP_INDEX_OFFSET, 1);
252 		ADD_RANGE(R500_US_CONFIG, 2);
253 		ADD_RANGE(R500_US_CODE_ADDR, 3);
254 		ADD_RANGE(R500_US_FC_CTRL, 1);
255 		ADD_RANGE(R500_RS_IP_0, 16);
256 		ADD_RANGE(R500_RS_INST_0, 16);
257 		ADD_RANGE(R500_RB3D_COLOR_CLEAR_VALUE_AR, 2);
258 		ADD_RANGE(R500_RB3D_CONSTANT_COLOR_AR, 2);
259 		ADD_RANGE(R500_ZB_FIFO_SIZE, 2);
260 	} else {
261 		ADD_RANGE(R300_PFS_CNTL_0, 3);
262 		ADD_RANGE(R300_PFS_NODE_0, 4);
263 		ADD_RANGE(R300_PFS_TEXI_0, 64);
264 		ADD_RANGE(R300_PFS_INSTR0_0, 64);
265 		ADD_RANGE(R300_PFS_INSTR1_0, 64);
266 		ADD_RANGE(R300_PFS_INSTR2_0, 64);
267 		ADD_RANGE(R300_PFS_INSTR3_0, 64);
268 		ADD_RANGE(R300_RS_INTERP_0, 8);
269 		ADD_RANGE(R300_RS_ROUTE_0, 8);
270 
271 	}
272 }
273 
r300_check_range(unsigned reg,int count)274 static __inline__ int r300_check_range(unsigned reg, int count)
275 {
276 	int i;
277 	if (reg & ~0xffff)
278 		return -1;
279 	for (i = (reg >> 2); i < (reg >> 2) + count; i++)
280 		if (r300_reg_flags[i] != MARK_SAFE)
281 			return 1;
282 	return 0;
283 }
284 
r300_emit_carefully_checked_packet0(drm_radeon_private_t * dev_priv,drm_radeon_kcmd_buffer_t * cmdbuf,drm_r300_cmd_header_t header)285 static __inline__ int r300_emit_carefully_checked_packet0(drm_radeon_private_t *
286 							  dev_priv,
287 							  drm_radeon_kcmd_buffer_t
288 							  * cmdbuf,
289 							  drm_r300_cmd_header_t
290 							  header)
291 {
292 	int reg;
293 	int sz;
294 	int i;
295 	int values[64];
296 	RING_LOCALS;
297 
298 	sz = header.packet0.count;
299 	reg = (header.packet0.reghi << 8) | header.packet0.reglo;
300 
301 	if ((sz > 64) || (sz < 0)) {
302 		DRM_ERROR
303 		    ("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n",
304 		     reg, sz);
305 		return -EINVAL;
306 	}
307 	for (i = 0; i < sz; i++) {
308 		values[i] = ((int *)cmdbuf->buf)[i];
309 		switch (r300_reg_flags[(reg >> 2) + i]) {
310 		case MARK_SAFE:
311 			break;
312 		case MARK_CHECK_OFFSET:
313 			if (!radeon_check_offset(dev_priv, (u32) values[i])) {
314 				DRM_ERROR
315 				    ("Offset failed range check (reg=%04x sz=%d)\n",
316 				     reg, sz);
317 				return -EINVAL;
318 			}
319 			break;
320 		default:
321 			DRM_ERROR("Register %04x failed check as flag=%02x\n",
322 				  reg + i * 4, r300_reg_flags[(reg >> 2) + i]);
323 			return -EINVAL;
324 		}
325 	}
326 
327 	BEGIN_RING(1 + sz);
328 	OUT_RING(CP_PACKET0(reg, sz - 1));
329 	OUT_RING_TABLE(values, sz);
330 	ADVANCE_RING();
331 
332 	cmdbuf->buf += sz * 4;
333 	cmdbuf->bufsz -= sz * 4;
334 
335 	return 0;
336 }
337 
338 /**
339  * Emits a packet0 setting arbitrary registers.
340  * Called by r300_do_cp_cmdbuf.
341  *
342  * Note that checks are performed on contents and addresses of the registers
343  */
r300_emit_packet0(drm_radeon_private_t * dev_priv,drm_radeon_kcmd_buffer_t * cmdbuf,drm_r300_cmd_header_t header)344 static __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv,
345 					drm_radeon_kcmd_buffer_t *cmdbuf,
346 					drm_r300_cmd_header_t header)
347 {
348 	int reg;
349 	int sz;
350 	RING_LOCALS;
351 
352 	sz = header.packet0.count;
353 	reg = (header.packet0.reghi << 8) | header.packet0.reglo;
354 
355 	DRM_DEBUG("R300_CMD_PACKET0: reg %04x, sz %d\n", reg, sz);
356 	if (!sz)
357 		return 0;
358 
359 	if (sz * 4 > cmdbuf->bufsz)
360 		return -EINVAL;
361 
362 	if (reg + sz * 4 >= 0x10000) {
363 		DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n", reg,
364 			  sz);
365 		return -EINVAL;
366 	}
367 
368 	if (r300_check_range(reg, sz)) {
369 		/* go and check everything */
370 		return r300_emit_carefully_checked_packet0(dev_priv, cmdbuf,
371 							   header);
372 	}
373 	/* the rest of the data is safe to emit, whatever the values the user passed */
374 
375 	BEGIN_RING(1 + sz);
376 	OUT_RING(CP_PACKET0(reg, sz - 1));
377 	OUT_RING_TABLE((int *)cmdbuf->buf, sz);
378 	ADVANCE_RING();
379 
380 	cmdbuf->buf += sz * 4;
381 	cmdbuf->bufsz -= sz * 4;
382 
383 	return 0;
384 }
385 
386 /**
387  * Uploads user-supplied vertex program instructions or parameters onto
388  * the graphics card.
389  * Called by r300_do_cp_cmdbuf.
390  */
r300_emit_vpu(drm_radeon_private_t * dev_priv,drm_radeon_kcmd_buffer_t * cmdbuf,drm_r300_cmd_header_t header)391 static __inline__ int r300_emit_vpu(drm_radeon_private_t *dev_priv,
392 				    drm_radeon_kcmd_buffer_t *cmdbuf,
393 				    drm_r300_cmd_header_t header)
394 {
395 	int sz;
396 	int addr;
397 	RING_LOCALS;
398 
399 	sz = header.vpu.count;
400 	addr = (header.vpu.adrhi << 8) | header.vpu.adrlo;
401 
402 	if (!sz)
403 		return 0;
404 	if (sz * 16 > cmdbuf->bufsz)
405 		return -EINVAL;
406 
407 	/* VAP is very sensitive so we purge cache before we program it
408 	 * and we also flush its state before & after */
409 	BEGIN_RING(6);
410 	OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
411 	OUT_RING(R300_RB3D_DC_FLUSH);
412 	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
413 	OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
414 	OUT_RING(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0));
415 	OUT_RING(0);
416 	ADVANCE_RING();
417 	/* set flush flag */
418 	dev_priv->track_flush |= RADEON_FLUSH_EMITED;
419 
420 	BEGIN_RING(3 + sz * 4);
421 	OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
422 	OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1));
423 	OUT_RING_TABLE((int *)cmdbuf->buf, sz * 4);
424 	ADVANCE_RING();
425 
426 	BEGIN_RING(2);
427 	OUT_RING(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0));
428 	OUT_RING(0);
429 	ADVANCE_RING();
430 
431 	cmdbuf->buf += sz * 16;
432 	cmdbuf->bufsz -= sz * 16;
433 
434 	return 0;
435 }
436 
437 /**
438  * Emit a clear packet from userspace.
439  * Called by r300_emit_packet3.
440  */
r300_emit_clear(drm_radeon_private_t * dev_priv,drm_radeon_kcmd_buffer_t * cmdbuf)441 static __inline__ int r300_emit_clear(drm_radeon_private_t *dev_priv,
442 				      drm_radeon_kcmd_buffer_t *cmdbuf)
443 {
444 	RING_LOCALS;
445 
446 	if (8 * 4 > cmdbuf->bufsz)
447 		return -EINVAL;
448 
449 	BEGIN_RING(10);
450 	OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
451 	OUT_RING(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
452 		 (1 << R300_PRIM_NUM_VERTICES_SHIFT));
453 	OUT_RING_TABLE((int *)cmdbuf->buf, 8);
454 	ADVANCE_RING();
455 
456 	BEGIN_RING(4);
457 	OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
458 	OUT_RING(R300_RB3D_DC_FLUSH);
459 	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
460 	OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
461 	ADVANCE_RING();
462 	/* set flush flag */
463 	dev_priv->track_flush |= RADEON_FLUSH_EMITED;
464 
465 	cmdbuf->buf += 8 * 4;
466 	cmdbuf->bufsz -= 8 * 4;
467 
468 	return 0;
469 }
470 
r300_emit_3d_load_vbpntr(drm_radeon_private_t * dev_priv,drm_radeon_kcmd_buffer_t * cmdbuf,u32 header)471 static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv,
472 					       drm_radeon_kcmd_buffer_t *cmdbuf,
473 					       u32 header)
474 {
475 	int count, i, k;
476 #define MAX_ARRAY_PACKET  64
477 	u32 payload[MAX_ARRAY_PACKET];
478 	u32 narrays;
479 	RING_LOCALS;
480 
481 	count = (header >> 16) & 0x3fff;
482 
483 	if ((count + 1) > MAX_ARRAY_PACKET) {
484 		DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
485 			  count);
486 		return -EINVAL;
487 	}
488 	memset(payload, 0, MAX_ARRAY_PACKET * 4);
489 	memcpy(payload, cmdbuf->buf + 4, (count + 1) * 4);
490 
491 	/* carefully check packet contents */
492 
493 	narrays = payload[0];
494 	k = 0;
495 	i = 1;
496 	while ((k < narrays) && (i < (count + 1))) {
497 		i++;		/* skip attribute field */
498 		if (!radeon_check_offset(dev_priv, payload[i])) {
499 			DRM_ERROR
500 			    ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
501 			     k, i);
502 			return -EINVAL;
503 		}
504 		k++;
505 		i++;
506 		if (k == narrays)
507 			break;
508 		/* have one more to process, they come in pairs */
509 		if (!radeon_check_offset(dev_priv, payload[i])) {
510 			DRM_ERROR
511 			    ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
512 			     k, i);
513 			return -EINVAL;
514 		}
515 		k++;
516 		i++;
517 	}
518 	/* do the counts match what we expect ? */
519 	if ((k != narrays) || (i != (count + 1))) {
520 		DRM_ERROR
521 		    ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
522 		     k, i, narrays, count + 1);
523 		return -EINVAL;
524 	}
525 
526 	/* all clear, output packet */
527 
528 	BEGIN_RING(count + 2);
529 	OUT_RING(header);
530 	OUT_RING_TABLE(payload, count + 1);
531 	ADVANCE_RING();
532 
533 	cmdbuf->buf += (count + 2) * 4;
534 	cmdbuf->bufsz -= (count + 2) * 4;
535 
536 	return 0;
537 }
538 
r300_emit_bitblt_multi(drm_radeon_private_t * dev_priv,drm_radeon_kcmd_buffer_t * cmdbuf)539 static __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv,
540 					     drm_radeon_kcmd_buffer_t *cmdbuf)
541 {
542 	u32 *cmd = (u32 *) cmdbuf->buf;
543 	int count, ret;
544 	RING_LOCALS;
545 
546 	count=(cmd[0]>>16) & 0x3fff;
547 
548 	if (cmd[0] & 0x8000) {
549 		u32 offset;
550 
551 		if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
552 			      | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
553 			offset = cmd[2] << 10;
554 			ret = !radeon_check_offset(dev_priv, offset);
555 			if (ret) {
556 				DRM_ERROR("Invalid bitblt first offset is %08X\n", offset);
557 				return -EINVAL;
558 			}
559 		}
560 
561 		if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
562 		    (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
563 			offset = cmd[3] << 10;
564 			ret = !radeon_check_offset(dev_priv, offset);
565 			if (ret) {
566 				DRM_ERROR("Invalid bitblt second offset is %08X\n", offset);
567 				return -EINVAL;
568 			}
569 
570 		}
571 	}
572 
573 	BEGIN_RING(count+2);
574 	OUT_RING(cmd[0]);
575 	OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
576 	ADVANCE_RING();
577 
578 	cmdbuf->buf += (count+2)*4;
579 	cmdbuf->bufsz -= (count+2)*4;
580 
581 	return 0;
582 }
583 
r300_emit_draw_indx_2(drm_radeon_private_t * dev_priv,drm_radeon_kcmd_buffer_t * cmdbuf)584 static __inline__ int r300_emit_draw_indx_2(drm_radeon_private_t *dev_priv,
585 					    drm_radeon_kcmd_buffer_t *cmdbuf)
586 {
587 	u32 *cmd;
588 	int count;
589 	int expected_count;
590 	RING_LOCALS;
591 
592 	cmd = (u32 *) cmdbuf->buf;
593 	count = (cmd[0]>>16) & 0x3fff;
594 	expected_count = cmd[1] >> 16;
595 	if (!(cmd[1] & R300_VAP_VF_CNTL__INDEX_SIZE_32bit))
596 		expected_count = (expected_count+1)/2;
597 
598 	if (count && count != expected_count) {
599 		DRM_ERROR("3D_DRAW_INDX_2: packet size %i, expected %i\n",
600 			count, expected_count);
601 		return -EINVAL;
602 	}
603 
604 	BEGIN_RING(count+2);
605 	OUT_RING(cmd[0]);
606 	OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
607 	ADVANCE_RING();
608 
609 	cmdbuf->buf += (count+2)*4;
610 	cmdbuf->bufsz -= (count+2)*4;
611 
612 	if (!count) {
613 		drm_r300_cmd_header_t header;
614 
615 		if (cmdbuf->bufsz < 4*4 + sizeof(header)) {
616 			DRM_ERROR("3D_DRAW_INDX_2: expect subsequent INDX_BUFFER, but stream is too short.\n");
617 			return -EINVAL;
618 		}
619 
620 		header.u = *(unsigned int *)cmdbuf->buf;
621 
622 		cmdbuf->buf += sizeof(header);
623 		cmdbuf->bufsz -= sizeof(header);
624 		cmd = (u32 *) cmdbuf->buf;
625 
626 		if (header.header.cmd_type != R300_CMD_PACKET3 ||
627 		    header.packet3.packet != R300_CMD_PACKET3_RAW ||
628 		    cmd[0] != CP_PACKET3(RADEON_CP_INDX_BUFFER, 2)) {
629 			DRM_ERROR("3D_DRAW_INDX_2: expect subsequent INDX_BUFFER.\n");
630 			return -EINVAL;
631 		}
632 
633 		if ((cmd[1] & 0x8000ffff) != 0x80000810) {
634 			DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
635 			return -EINVAL;
636 		}
637 		if (!radeon_check_offset(dev_priv, cmd[2])) {
638 			DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
639 			return -EINVAL;
640 		}
641 		if (cmd[3] != expected_count) {
642 			DRM_ERROR("INDX_BUFFER: buffer size %i, expected %i\n",
643 				cmd[3], expected_count);
644 			return -EINVAL;
645 		}
646 
647 		BEGIN_RING(4);
648 		OUT_RING(cmd[0]);
649 		OUT_RING_TABLE((int *)(cmdbuf->buf + 4), 3);
650 		ADVANCE_RING();
651 
652 		cmdbuf->buf += 4*4;
653 		cmdbuf->bufsz -= 4*4;
654 	}
655 
656 	return 0;
657 }
658 
r300_emit_raw_packet3(drm_radeon_private_t * dev_priv,drm_radeon_kcmd_buffer_t * cmdbuf)659 static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv,
660 					    drm_radeon_kcmd_buffer_t *cmdbuf)
661 {
662 	u32 header;
663 	int count;
664 	RING_LOCALS;
665 
666 	if (4 > cmdbuf->bufsz)
667 		return -EINVAL;
668 
669 	/* Fixme !! This simply emits a packet without much checking.
670 	   We need to be smarter. */
671 
672 	/* obtain first word - actual packet3 header */
673 	header = *(u32 *) cmdbuf->buf;
674 
675 	/* Is it packet 3 ? */
676 	if ((header >> 30) != 0x3) {
677 		DRM_ERROR("Not a packet3 header (0x%08x)\n", header);
678 		return -EINVAL;
679 	}
680 
681 	count = (header >> 16) & 0x3fff;
682 
683 	/* Check again now that we know how much data to expect */
684 	if ((count + 2) * 4 > cmdbuf->bufsz) {
685 		DRM_ERROR
686 		    ("Expected packet3 of length %d but have only %d bytes left\n",
687 		     (count + 2) * 4, cmdbuf->bufsz);
688 		return -EINVAL;
689 	}
690 
691 	/* Is it a packet type we know about ? */
692 	switch (header & 0xff00) {
693 	case RADEON_3D_LOAD_VBPNTR:	/* load vertex array pointers */
694 		return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, header);
695 
696 	case RADEON_CNTL_BITBLT_MULTI:
697 		return r300_emit_bitblt_multi(dev_priv, cmdbuf);
698 
699 	case RADEON_CP_INDX_BUFFER:
700 		DRM_ERROR("packet3 INDX_BUFFER without preceding 3D_DRAW_INDX_2 is illegal.\n");
701 		return -EINVAL;
702 	case RADEON_CP_3D_DRAW_IMMD_2:
703 		/* triggers drawing using in-packet vertex data */
704 	case RADEON_CP_3D_DRAW_VBUF_2:
705 		/* triggers drawing of vertex buffers setup elsewhere */
706 		dev_priv->track_flush &= ~(RADEON_FLUSH_EMITED |
707 					   RADEON_PURGE_EMITED);
708 		break;
709 	case RADEON_CP_3D_DRAW_INDX_2:
710 		/* triggers drawing using indices to vertex buffer */
711 		/* whenever we send vertex we clear flush & purge */
712 		dev_priv->track_flush &= ~(RADEON_FLUSH_EMITED |
713 					   RADEON_PURGE_EMITED);
714 		return r300_emit_draw_indx_2(dev_priv, cmdbuf);
715 	case RADEON_WAIT_FOR_IDLE:
716 	case RADEON_CP_NOP:
717 		/* these packets are safe */
718 		break;
719 	default:
720 		DRM_ERROR("Unknown packet3 header (0x%08x)\n", header);
721 		return -EINVAL;
722 	}
723 
724 	BEGIN_RING(count + 2);
725 	OUT_RING(header);
726 	OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
727 	ADVANCE_RING();
728 
729 	cmdbuf->buf += (count + 2) * 4;
730 	cmdbuf->bufsz -= (count + 2) * 4;
731 
732 	return 0;
733 }
734 
735 /**
736  * Emit a rendering packet3 from userspace.
737  * Called by r300_do_cp_cmdbuf.
738  */
r300_emit_packet3(drm_radeon_private_t * dev_priv,drm_radeon_kcmd_buffer_t * cmdbuf,drm_r300_cmd_header_t header)739 static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
740 					drm_radeon_kcmd_buffer_t *cmdbuf,
741 					drm_r300_cmd_header_t header)
742 {
743 	int n;
744 	int ret;
745 	char *orig_buf = cmdbuf->buf;
746 	int orig_bufsz = cmdbuf->bufsz;
747 
748 	/* This is a do-while-loop so that we run the interior at least once,
749 	 * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale.
750 	 */
751 	n = 0;
752 	do {
753 		if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) {
754 			ret = r300_emit_cliprects(dev_priv, cmdbuf, n);
755 			if (ret)
756 				return ret;
757 
758 			cmdbuf->buf = orig_buf;
759 			cmdbuf->bufsz = orig_bufsz;
760 		}
761 
762 		switch (header.packet3.packet) {
763 		case R300_CMD_PACKET3_CLEAR:
764 			DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n");
765 			ret = r300_emit_clear(dev_priv, cmdbuf);
766 			if (ret) {
767 				DRM_ERROR("r300_emit_clear failed\n");
768 				return ret;
769 			}
770 			break;
771 
772 		case R300_CMD_PACKET3_RAW:
773 			DRM_DEBUG("R300_CMD_PACKET3_RAW\n");
774 			ret = r300_emit_raw_packet3(dev_priv, cmdbuf);
775 			if (ret) {
776 				DRM_ERROR("r300_emit_raw_packet3 failed\n");
777 				return ret;
778 			}
779 			break;
780 
781 		default:
782 			DRM_ERROR("bad packet3 type %i at %p\n",
783 				  header.packet3.packet,
784 				  cmdbuf->buf - sizeof(header));
785 			return -EINVAL;
786 		}
787 
788 		n += R300_SIMULTANEOUS_CLIPRECTS;
789 	} while (n < cmdbuf->nbox);
790 
791 	return 0;
792 }
793 
794 /* Some of the R300 chips seem to be extremely touchy about the two registers
795  * that are configured in r300_pacify.
796  * Among the worst offenders seems to be the R300 ND (0x4E44): When userspace
797  * sends a command buffer that contains only state setting commands and a
798  * vertex program/parameter upload sequence, this will eventually lead to a
799  * lockup, unless the sequence is bracketed by calls to r300_pacify.
800  * So we should take great care to *always* call r300_pacify before
801  * *anything* 3D related, and again afterwards. This is what the
802  * call bracket in r300_do_cp_cmdbuf is for.
803  */
804 
805 /**
806  * Emit the sequence to pacify R300.
807  */
r300_pacify(drm_radeon_private_t * dev_priv)808 static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
809 {
810 	uint32_t cache_z, cache_3d, cache_2d;
811 	RING_LOCALS;
812 
813 	cache_z = R300_ZC_FLUSH;
814 	cache_2d = R300_RB2D_DC_FLUSH;
815 	cache_3d = R300_RB3D_DC_FLUSH;
816 	if (!(dev_priv->track_flush & RADEON_PURGE_EMITED)) {
817 		/* we can purge, primitive where draw since last purge */
818 		cache_z |= R300_ZC_FREE;
819 		cache_2d |= R300_RB2D_DC_FREE;
820 		cache_3d |= R300_RB3D_DC_FREE;
821 	}
822 
823 	/* flush & purge zbuffer */
824 	BEGIN_RING(2);
825 	OUT_RING(CP_PACKET0(R300_ZB_ZCACHE_CTLSTAT, 0));
826 	OUT_RING(cache_z);
827 	ADVANCE_RING();
828 	/* flush & purge 3d */
829 	BEGIN_RING(2);
830 	OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
831 	OUT_RING(cache_3d);
832 	ADVANCE_RING();
833 	/* flush & purge texture */
834 	BEGIN_RING(2);
835 	OUT_RING(CP_PACKET0(R300_TX_INVALTAGS, 0));
836 	OUT_RING(0);
837 	ADVANCE_RING();
838 	/* FIXME: is this one really needed ? */
839 	BEGIN_RING(2);
840 	OUT_RING(CP_PACKET0(R300_RB3D_AARESOLVE_CTL, 0));
841 	OUT_RING(0);
842 	ADVANCE_RING();
843 	BEGIN_RING(2);
844 	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
845 	OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
846 	ADVANCE_RING();
847 	/* flush & purge 2d through E2 as RB2D will trigger lockup */
848 	BEGIN_RING(4);
849 	OUT_RING(CP_PACKET0(R300_DSTCACHE_CTLSTAT, 0));
850 	OUT_RING(cache_2d);
851 	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
852 	OUT_RING(RADEON_WAIT_2D_IDLECLEAN |
853 		 RADEON_WAIT_HOST_IDLECLEAN);
854 	ADVANCE_RING();
855 	/* set flush & purge flags */
856 	dev_priv->track_flush |= RADEON_FLUSH_EMITED | RADEON_PURGE_EMITED;
857 }
858 
859 /**
860  * Called by r300_do_cp_cmdbuf to update the internal buffer age and state.
861  * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must
862  * be careful about how this function is called.
863  */
r300_discard_buffer(struct drm_device * dev,struct drm_buf * buf)864 static void r300_discard_buffer(struct drm_device * dev, struct drm_buf * buf)
865 {
866 	drm_radeon_private_t *dev_priv = dev->dev_private;
867 	drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
868 
869 	buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
870 	buf->pending = 1;
871 	buf->used = 0;
872 }
873 
r300_cmd_wait(drm_radeon_private_t * dev_priv,drm_r300_cmd_header_t header)874 static void r300_cmd_wait(drm_radeon_private_t * dev_priv,
875 			  drm_r300_cmd_header_t header)
876 {
877 	u32 wait_until;
878 	RING_LOCALS;
879 
880 	if (!header.wait.flags)
881 		return;
882 
883 	wait_until = 0;
884 
885 	switch(header.wait.flags) {
886 	case R300_WAIT_2D:
887 		wait_until = RADEON_WAIT_2D_IDLE;
888 		break;
889 	case R300_WAIT_3D:
890 		wait_until = RADEON_WAIT_3D_IDLE;
891 		break;
892 	case R300_NEW_WAIT_2D_3D:
893 		wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_3D_IDLE;
894 		break;
895 	case R300_NEW_WAIT_2D_2D_CLEAN:
896 		wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_2D_IDLECLEAN;
897 		break;
898 	case R300_NEW_WAIT_3D_3D_CLEAN:
899 		wait_until = RADEON_WAIT_3D_IDLE|RADEON_WAIT_3D_IDLECLEAN;
900 		break;
901 	case R300_NEW_WAIT_2D_2D_CLEAN_3D_3D_CLEAN:
902 		wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_2D_IDLECLEAN;
903 		wait_until |= RADEON_WAIT_3D_IDLE|RADEON_WAIT_3D_IDLECLEAN;
904 		break;
905 	default:
906 		return;
907 	}
908 
909 	BEGIN_RING(2);
910 	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
911 	OUT_RING(wait_until);
912 	ADVANCE_RING();
913 }
914 
r300_scratch(drm_radeon_private_t * dev_priv,drm_radeon_kcmd_buffer_t * cmdbuf,drm_r300_cmd_header_t header)915 static int r300_scratch(drm_radeon_private_t *dev_priv,
916 			drm_radeon_kcmd_buffer_t *cmdbuf,
917 			drm_r300_cmd_header_t header)
918 {
919 	u32 *ref_age_base;
920 	u32 i, buf_idx, h_pending;
921 	RING_LOCALS;
922 
923 	if (cmdbuf->bufsz < sizeof(uint64_t) + header.scratch.n_bufs * sizeof(buf_idx) ) {
924 		return -EINVAL;
925 	}
926 
927 	if (header.scratch.reg >= 5) {
928 		return -EINVAL;
929 	}
930 
931 	dev_priv->scratch_ages[header.scratch.reg] ++;
932 
933 	ref_age_base = (u32 *)(unsigned long)*((uint64_t *)cmdbuf->buf);
934 
935 	cmdbuf->buf += sizeof(uint64_t);
936 	cmdbuf->bufsz -= sizeof(uint64_t);
937 
938 	for (i=0; i < header.scratch.n_bufs; i++) {
939 		buf_idx = *(u32 *)cmdbuf->buf;
940 		buf_idx *= 2; /* 8 bytes per buf */
941 
942 		if (DRM_COPY_TO_USER(ref_age_base + buf_idx, &dev_priv->scratch_ages[header.scratch.reg], sizeof(u32))) {
943 			return -EINVAL;
944 		}
945 
946 		if (DRM_COPY_FROM_USER(&h_pending, ref_age_base + buf_idx + 1, sizeof(u32))) {
947 			return -EINVAL;
948 		}
949 
950 		if (h_pending == 0) {
951 			return -EINVAL;
952 		}
953 
954 		h_pending--;
955 
956 		if (DRM_COPY_TO_USER(ref_age_base + buf_idx + 1, &h_pending, sizeof(u32))) {
957 			return -EINVAL;
958 		}
959 
960 		cmdbuf->buf += sizeof(buf_idx);
961 		cmdbuf->bufsz -= sizeof(buf_idx);
962 	}
963 
964 	BEGIN_RING(2);
965 	OUT_RING( CP_PACKET0( RADEON_SCRATCH_REG0 + header.scratch.reg * 4, 0 ) );
966 	OUT_RING( dev_priv->scratch_ages[header.scratch.reg] );
967 	ADVANCE_RING();
968 
969 	return 0;
970 }
971 
972 /**
973  * Uploads user-supplied vertex program instructions or parameters onto
974  * the graphics card.
975  * Called by r300_do_cp_cmdbuf.
976  */
r300_emit_r500fp(drm_radeon_private_t * dev_priv,drm_radeon_kcmd_buffer_t * cmdbuf,drm_r300_cmd_header_t header)977 static __inline__ int r300_emit_r500fp(drm_radeon_private_t *dev_priv,
978 				       drm_radeon_kcmd_buffer_t *cmdbuf,
979 				       drm_r300_cmd_header_t header)
980 {
981 	int sz;
982 	int addr;
983 	int type;
984 	int clamp;
985 	int stride;
986 	RING_LOCALS;
987 
988 	sz = header.r500fp.count;
989 	/* address is 9 bits 0 - 8, bit 1 of flags is part of address */
990 	addr = ((header.r500fp.adrhi_flags & 1) << 8) | header.r500fp.adrlo;
991 
992 	type = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE);
993 	clamp = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP);
994 
995 	addr |= (type << 16);
996 	addr |= (clamp << 17);
997 
998 	stride = type ? 4 : 6;
999 
1000 	DRM_DEBUG("r500fp %d %d type: %d\n", sz, addr, type);
1001 	if (!sz)
1002 		return 0;
1003 	if (sz * stride * 4 > cmdbuf->bufsz)
1004 		return -EINVAL;
1005 
1006 	BEGIN_RING(3 + sz * stride);
1007 	OUT_RING_REG(R500_GA_US_VECTOR_INDEX, addr);
1008 	OUT_RING(CP_PACKET0_TABLE(R500_GA_US_VECTOR_DATA, sz * stride - 1));
1009 	OUT_RING_TABLE((int *)cmdbuf->buf, sz * stride);
1010 
1011 	ADVANCE_RING();
1012 
1013 	cmdbuf->buf += sz * stride * 4;
1014 	cmdbuf->bufsz -= sz * stride * 4;
1015 
1016 	return 0;
1017 }
1018 
1019 
1020 /**
1021  * Parses and validates a user-supplied command buffer and emits appropriate
1022  * commands on the DMA ring buffer.
1023  * Called by the ioctl handler function radeon_cp_cmdbuf.
1024  */
r300_do_cp_cmdbuf(struct drm_device * dev,struct drm_file * file_priv,drm_radeon_kcmd_buffer_t * cmdbuf)1025 int r300_do_cp_cmdbuf(struct drm_device *dev,
1026 		      struct drm_file *file_priv,
1027 		      drm_radeon_kcmd_buffer_t *cmdbuf)
1028 {
1029 	drm_radeon_private_t *dev_priv = dev->dev_private;
1030 	struct drm_device_dma *dma = dev->dma;
1031 	struct drm_buf *buf = NULL;
1032 	int emit_dispatch_age = 0;
1033 	int ret = 0;
1034 
1035 	DRM_DEBUG("\n");
1036 
1037 	/* pacify */
1038 	r300_pacify(dev_priv);
1039 
1040 	if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
1041 		ret = r300_emit_cliprects(dev_priv, cmdbuf, 0);
1042 		if (ret)
1043 			goto cleanup;
1044 	}
1045 
1046 	while (cmdbuf->bufsz >= sizeof(drm_r300_cmd_header_t)) {
1047 		int idx;
1048 		drm_r300_cmd_header_t header;
1049 
1050 		header.u = *(unsigned int *)cmdbuf->buf;
1051 
1052 		cmdbuf->buf += sizeof(header);
1053 		cmdbuf->bufsz -= sizeof(header);
1054 
1055 		switch (header.header.cmd_type) {
1056 		case R300_CMD_PACKET0:
1057 			ret = r300_emit_packet0(dev_priv, cmdbuf, header);
1058 			if (ret) {
1059 				DRM_ERROR("r300_emit_packet0 failed\n");
1060 				goto cleanup;
1061 			}
1062 			break;
1063 
1064 		case R300_CMD_VPU:
1065 			DRM_DEBUG("R300_CMD_VPU\n");
1066 			ret = r300_emit_vpu(dev_priv, cmdbuf, header);
1067 			if (ret) {
1068 				DRM_ERROR("r300_emit_vpu failed\n");
1069 				goto cleanup;
1070 			}
1071 			break;
1072 
1073 		case R300_CMD_PACKET3:
1074 			DRM_DEBUG("R300_CMD_PACKET3\n");
1075 			ret = r300_emit_packet3(dev_priv, cmdbuf, header);
1076 			if (ret) {
1077 				DRM_ERROR("r300_emit_packet3 failed\n");
1078 				goto cleanup;
1079 			}
1080 			break;
1081 
1082 		case R300_CMD_END3D:
1083 			DRM_DEBUG("R300_CMD_END3D\n");
1084 			/* TODO:
1085 			   Ideally userspace driver should not need to issue this call,
1086 			   i.e. the drm driver should issue it automatically and prevent
1087 			   lockups.
1088 
1089 			   In practice, we do not understand why this call is needed and what
1090 			   it does (except for some vague guesses that it has to do with cache
1091 			   coherence) and so the user space driver does it.
1092 
1093 			   Once we are sure which uses prevent lockups the code could be moved
1094 			   into the kernel and the userspace driver will not
1095 			   need to use this command.
1096 
1097 			   Note that issuing this command does not hurt anything
1098 			   except, possibly, performance */
1099 			r300_pacify(dev_priv);
1100 			break;
1101 
1102 		case R300_CMD_CP_DELAY:
1103 			/* simple enough, we can do it here */
1104 			DRM_DEBUG("R300_CMD_CP_DELAY\n");
1105 			{
1106 				int i;
1107 				RING_LOCALS;
1108 
1109 				BEGIN_RING(header.delay.count);
1110 				for (i = 0; i < header.delay.count; i++)
1111 					OUT_RING(RADEON_CP_PACKET2);
1112 				ADVANCE_RING();
1113 			}
1114 			break;
1115 
1116 		case R300_CMD_DMA_DISCARD:
1117 			DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
1118 			idx = header.dma.buf_idx;
1119 			if (idx < 0 || idx >= dma->buf_count) {
1120 				DRM_ERROR("buffer index %d (of %d max)\n",
1121 					  idx, dma->buf_count - 1);
1122 				ret = -EINVAL;
1123 				goto cleanup;
1124 			}
1125 
1126 			buf = dma->buflist[idx];
1127 			if (buf->file_priv != file_priv || buf->pending) {
1128 				DRM_ERROR("bad buffer %p %p %d\n",
1129 					  buf->file_priv, file_priv,
1130 					  buf->pending);
1131 				ret = -EINVAL;
1132 				goto cleanup;
1133 			}
1134 
1135 			emit_dispatch_age = 1;
1136 			r300_discard_buffer(dev, buf);
1137 			break;
1138 
1139 		case R300_CMD_WAIT:
1140 			DRM_DEBUG("R300_CMD_WAIT\n");
1141 			r300_cmd_wait(dev_priv, header);
1142 			break;
1143 
1144 		case R300_CMD_SCRATCH:
1145 			DRM_DEBUG("R300_CMD_SCRATCH\n");
1146 			ret = r300_scratch(dev_priv, cmdbuf, header);
1147 			if (ret) {
1148 				DRM_ERROR("r300_scratch failed\n");
1149 				goto cleanup;
1150 			}
1151 			break;
1152 
1153 		case R300_CMD_R500FP:
1154 			if ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV515) {
1155 				DRM_ERROR("Calling r500 command on r300 card\n");
1156 				ret = -EINVAL;
1157 				goto cleanup;
1158 			}
1159 			DRM_DEBUG("R300_CMD_R500FP\n");
1160 			ret = r300_emit_r500fp(dev_priv, cmdbuf, header);
1161 			if (ret) {
1162 				DRM_ERROR("r300_emit_r500fp failed\n");
1163 				goto cleanup;
1164 			}
1165 			break;
1166 		default:
1167 			DRM_ERROR("bad cmd_type %i at %p\n",
1168 				  header.header.cmd_type,
1169 				  cmdbuf->buf - sizeof(header));
1170 			ret = -EINVAL;
1171 			goto cleanup;
1172 		}
1173 	}
1174 
1175 	DRM_DEBUG("END\n");
1176 
1177       cleanup:
1178 	r300_pacify(dev_priv);
1179 
1180 	/* We emit the vertex buffer age here, outside the pacifier "brackets"
1181 	 * for two reasons:
1182 	 *  (1) This may coalesce multiple age emissions into a single one and
1183 	 *  (2) more importantly, some chips lock up hard when scratch registers
1184 	 *      are written inside the pacifier bracket.
1185 	 */
1186 	if (emit_dispatch_age) {
1187 		RING_LOCALS;
1188 
1189 		/* Emit the vertex buffer age */
1190 		BEGIN_RING(2);
1191 		RADEON_DISPATCH_AGE(dev_priv->sarea_priv->last_dispatch);
1192 		ADVANCE_RING();
1193 	}
1194 
1195 	COMMIT_RING();
1196 
1197 	return ret;
1198 }
1199