1 /* $NetBSD: savage_state.c,v 1.3 2021/12/18 23:45:43 riastradh Exp $ */
2
3 /* savage_state.c -- State and drawing support for Savage
4 *
5 * Copyright 2004 Felix Kuehling
6 * All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sub license,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22 * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
24 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 */
27
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: savage_state.c,v 1.3 2021/12/18 23:45:43 riastradh Exp $");
30
31 #include <linux/slab.h>
32 #include <linux/uaccess.h>
33
34 #include <drm/drm_device.h>
35 #include <drm/drm_file.h>
36 #include <drm/drm_print.h>
37 #include <drm/savage_drm.h>
38
39 #include "savage_drv.h"
40
savage_emit_clip_rect_s3d(drm_savage_private_t * dev_priv,const struct drm_clip_rect * pbox)41 void savage_emit_clip_rect_s3d(drm_savage_private_t * dev_priv,
42 const struct drm_clip_rect * pbox)
43 {
44 uint32_t scstart = dev_priv->state.s3d.new_scstart;
45 uint32_t scend = dev_priv->state.s3d.new_scend;
46 scstart = (scstart & ~SAVAGE_SCISSOR_MASK_S3D) |
47 ((uint32_t) pbox->x1 & 0x000007ff) |
48 (((uint32_t) pbox->y1 << 16) & 0x07ff0000);
49 scend = (scend & ~SAVAGE_SCISSOR_MASK_S3D) |
50 (((uint32_t) pbox->x2 - 1) & 0x000007ff) |
51 ((((uint32_t) pbox->y2 - 1) << 16) & 0x07ff0000);
52 if (scstart != dev_priv->state.s3d.scstart ||
53 scend != dev_priv->state.s3d.scend) {
54 DMA_LOCALS;
55 BEGIN_DMA(4);
56 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
57 DMA_SET_REGISTERS(SAVAGE_SCSTART_S3D, 2);
58 DMA_WRITE(scstart);
59 DMA_WRITE(scend);
60 dev_priv->state.s3d.scstart = scstart;
61 dev_priv->state.s3d.scend = scend;
62 dev_priv->waiting = 1;
63 DMA_COMMIT();
64 }
65 }
66
savage_emit_clip_rect_s4(drm_savage_private_t * dev_priv,const struct drm_clip_rect * pbox)67 void savage_emit_clip_rect_s4(drm_savage_private_t * dev_priv,
68 const struct drm_clip_rect * pbox)
69 {
70 uint32_t drawctrl0 = dev_priv->state.s4.new_drawctrl0;
71 uint32_t drawctrl1 = dev_priv->state.s4.new_drawctrl1;
72 drawctrl0 = (drawctrl0 & ~SAVAGE_SCISSOR_MASK_S4) |
73 ((uint32_t) pbox->x1 & 0x000007ff) |
74 (((uint32_t) pbox->y1 << 12) & 0x00fff000);
75 drawctrl1 = (drawctrl1 & ~SAVAGE_SCISSOR_MASK_S4) |
76 (((uint32_t) pbox->x2 - 1) & 0x000007ff) |
77 ((((uint32_t) pbox->y2 - 1) << 12) & 0x00fff000);
78 if (drawctrl0 != dev_priv->state.s4.drawctrl0 ||
79 drawctrl1 != dev_priv->state.s4.drawctrl1) {
80 DMA_LOCALS;
81 BEGIN_DMA(4);
82 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
83 DMA_SET_REGISTERS(SAVAGE_DRAWCTRL0_S4, 2);
84 DMA_WRITE(drawctrl0);
85 DMA_WRITE(drawctrl1);
86 dev_priv->state.s4.drawctrl0 = drawctrl0;
87 dev_priv->state.s4.drawctrl1 = drawctrl1;
88 dev_priv->waiting = 1;
89 DMA_COMMIT();
90 }
91 }
92
savage_verify_texaddr(drm_savage_private_t * dev_priv,int unit,uint32_t addr)93 static int savage_verify_texaddr(drm_savage_private_t * dev_priv, int unit,
94 uint32_t addr)
95 {
96 if ((addr & 6) != 2) { /* reserved bits */
97 DRM_ERROR("bad texAddr%d %08x (reserved bits)\n", unit, addr);
98 return -EINVAL;
99 }
100 if (!(addr & 1)) { /* local */
101 addr &= ~7;
102 if (addr < dev_priv->texture_offset ||
103 addr >= dev_priv->texture_offset + dev_priv->texture_size) {
104 DRM_ERROR
105 ("bad texAddr%d %08x (local addr out of range)\n",
106 unit, addr);
107 return -EINVAL;
108 }
109 } else { /* AGP */
110 if (!dev_priv->agp_textures) {
111 DRM_ERROR("bad texAddr%d %08x (AGP not available)\n",
112 unit, addr);
113 return -EINVAL;
114 }
115 addr &= ~7;
116 if (addr < dev_priv->agp_textures->offset ||
117 addr >= (dev_priv->agp_textures->offset +
118 dev_priv->agp_textures->size)) {
119 DRM_ERROR
120 ("bad texAddr%d %08x (AGP addr out of range)\n",
121 unit, addr);
122 return -EINVAL;
123 }
124 }
125 return 0;
126 }
127
128 #define SAVE_STATE(reg,where) \
129 if(start <= reg && start+count > reg) \
130 dev_priv->state.where = regs[reg - start]
131 #define SAVE_STATE_MASK(reg,where,mask) do { \
132 if(start <= reg && start+count > reg) { \
133 uint32_t tmp; \
134 tmp = regs[reg - start]; \
135 dev_priv->state.where = (tmp & (mask)) | \
136 (dev_priv->state.where & ~(mask)); \
137 } \
138 } while (0)
139
savage_verify_state_s3d(drm_savage_private_t * dev_priv,unsigned int start,unsigned int count,const uint32_t * regs)140 static int savage_verify_state_s3d(drm_savage_private_t * dev_priv,
141 unsigned int start, unsigned int count,
142 const uint32_t *regs)
143 {
144 if (start < SAVAGE_TEXPALADDR_S3D ||
145 start + count - 1 > SAVAGE_DESTTEXRWWATERMARK_S3D) {
146 DRM_ERROR("invalid register range (0x%04x-0x%04x)\n",
147 start, start + count - 1);
148 return -EINVAL;
149 }
150
151 SAVE_STATE_MASK(SAVAGE_SCSTART_S3D, s3d.new_scstart,
152 ~SAVAGE_SCISSOR_MASK_S3D);
153 SAVE_STATE_MASK(SAVAGE_SCEND_S3D, s3d.new_scend,
154 ~SAVAGE_SCISSOR_MASK_S3D);
155
156 /* if any texture regs were changed ... */
157 if (start <= SAVAGE_TEXCTRL_S3D &&
158 start + count > SAVAGE_TEXPALADDR_S3D) {
159 /* ... check texture state */
160 SAVE_STATE(SAVAGE_TEXCTRL_S3D, s3d.texctrl);
161 SAVE_STATE(SAVAGE_TEXADDR_S3D, s3d.texaddr);
162 if (dev_priv->state.s3d.texctrl & SAVAGE_TEXCTRL_TEXEN_MASK)
163 return savage_verify_texaddr(dev_priv, 0,
164 dev_priv->state.s3d.texaddr);
165 }
166
167 return 0;
168 }
169
savage_verify_state_s4(drm_savage_private_t * dev_priv,unsigned int start,unsigned int count,const uint32_t * regs)170 static int savage_verify_state_s4(drm_savage_private_t * dev_priv,
171 unsigned int start, unsigned int count,
172 const uint32_t *regs)
173 {
174 int ret = 0;
175
176 if (start < SAVAGE_DRAWLOCALCTRL_S4 ||
177 start + count - 1 > SAVAGE_TEXBLENDCOLOR_S4) {
178 DRM_ERROR("invalid register range (0x%04x-0x%04x)\n",
179 start, start + count - 1);
180 return -EINVAL;
181 }
182
183 SAVE_STATE_MASK(SAVAGE_DRAWCTRL0_S4, s4.new_drawctrl0,
184 ~SAVAGE_SCISSOR_MASK_S4);
185 SAVE_STATE_MASK(SAVAGE_DRAWCTRL1_S4, s4.new_drawctrl1,
186 ~SAVAGE_SCISSOR_MASK_S4);
187
188 /* if any texture regs were changed ... */
189 if (start <= SAVAGE_TEXDESCR_S4 &&
190 start + count > SAVAGE_TEXPALADDR_S4) {
191 /* ... check texture state */
192 SAVE_STATE(SAVAGE_TEXDESCR_S4, s4.texdescr);
193 SAVE_STATE(SAVAGE_TEXADDR0_S4, s4.texaddr0);
194 SAVE_STATE(SAVAGE_TEXADDR1_S4, s4.texaddr1);
195 if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX0EN_MASK)
196 ret |= savage_verify_texaddr(dev_priv, 0,
197 dev_priv->state.s4.texaddr0);
198 if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX1EN_MASK)
199 ret |= savage_verify_texaddr(dev_priv, 1,
200 dev_priv->state.s4.texaddr1);
201 }
202
203 return ret;
204 }
205
206 #undef SAVE_STATE
207 #undef SAVE_STATE_MASK
208
savage_dispatch_state(drm_savage_private_t * dev_priv,const drm_savage_cmd_header_t * cmd_header,const uint32_t * regs)209 static int savage_dispatch_state(drm_savage_private_t * dev_priv,
210 const drm_savage_cmd_header_t * cmd_header,
211 const uint32_t *regs)
212 {
213 unsigned int count = cmd_header->state.count;
214 unsigned int start = cmd_header->state.start;
215 unsigned int count2 = 0;
216 unsigned int bci_size;
217 int ret;
218 DMA_LOCALS;
219
220 if (!count)
221 return 0;
222
223 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
224 ret = savage_verify_state_s3d(dev_priv, start, count, regs);
225 if (ret != 0)
226 return ret;
227 /* scissor regs are emitted in savage_dispatch_draw */
228 if (start < SAVAGE_SCSTART_S3D) {
229 if (start + count > SAVAGE_SCEND_S3D + 1)
230 count2 = count - (SAVAGE_SCEND_S3D + 1 - start);
231 if (start + count > SAVAGE_SCSTART_S3D)
232 count = SAVAGE_SCSTART_S3D - start;
233 } else if (start <= SAVAGE_SCEND_S3D) {
234 if (start + count > SAVAGE_SCEND_S3D + 1) {
235 count -= SAVAGE_SCEND_S3D + 1 - start;
236 start = SAVAGE_SCEND_S3D + 1;
237 } else
238 return 0;
239 }
240 } else {
241 ret = savage_verify_state_s4(dev_priv, start, count, regs);
242 if (ret != 0)
243 return ret;
244 /* scissor regs are emitted in savage_dispatch_draw */
245 if (start < SAVAGE_DRAWCTRL0_S4) {
246 if (start + count > SAVAGE_DRAWCTRL1_S4 + 1)
247 count2 = count -
248 (SAVAGE_DRAWCTRL1_S4 + 1 - start);
249 if (start + count > SAVAGE_DRAWCTRL0_S4)
250 count = SAVAGE_DRAWCTRL0_S4 - start;
251 } else if (start <= SAVAGE_DRAWCTRL1_S4) {
252 if (start + count > SAVAGE_DRAWCTRL1_S4 + 1) {
253 count -= SAVAGE_DRAWCTRL1_S4 + 1 - start;
254 start = SAVAGE_DRAWCTRL1_S4 + 1;
255 } else
256 return 0;
257 }
258 }
259
260 bci_size = count + (count + 254) / 255 + count2 + (count2 + 254) / 255;
261
262 if (cmd_header->state.global) {
263 BEGIN_DMA(bci_size + 1);
264 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
265 dev_priv->waiting = 1;
266 } else {
267 BEGIN_DMA(bci_size);
268 }
269
270 do {
271 while (count > 0) {
272 unsigned int n = count < 255 ? count : 255;
273 DMA_SET_REGISTERS(start, n);
274 DMA_COPY(regs, n);
275 count -= n;
276 start += n;
277 regs += n;
278 }
279 start += 2;
280 regs += 2;
281 count = count2;
282 count2 = 0;
283 } while (count);
284
285 DMA_COMMIT();
286
287 return 0;
288 }
289
savage_dispatch_dma_prim(drm_savage_private_t * dev_priv,const drm_savage_cmd_header_t * cmd_header,const struct drm_buf * dmabuf)290 static int savage_dispatch_dma_prim(drm_savage_private_t * dev_priv,
291 const drm_savage_cmd_header_t * cmd_header,
292 const struct drm_buf * dmabuf)
293 {
294 unsigned char reorder = 0;
295 unsigned int prim = cmd_header->prim.prim;
296 unsigned int skip = cmd_header->prim.skip;
297 unsigned int n = cmd_header->prim.count;
298 unsigned int start = cmd_header->prim.start;
299 unsigned int i;
300 BCI_LOCALS;
301
302 if (!dmabuf) {
303 DRM_ERROR("called without dma buffers!\n");
304 return -EINVAL;
305 }
306
307 if (!n)
308 return 0;
309
310 switch (prim) {
311 case SAVAGE_PRIM_TRILIST_201:
312 reorder = 1;
313 prim = SAVAGE_PRIM_TRILIST;
314 /* fall through */
315 case SAVAGE_PRIM_TRILIST:
316 if (n % 3 != 0) {
317 DRM_ERROR("wrong number of vertices %u in TRILIST\n",
318 n);
319 return -EINVAL;
320 }
321 break;
322 case SAVAGE_PRIM_TRISTRIP:
323 case SAVAGE_PRIM_TRIFAN:
324 if (n < 3) {
325 DRM_ERROR
326 ("wrong number of vertices %u in TRIFAN/STRIP\n",
327 n);
328 return -EINVAL;
329 }
330 break;
331 default:
332 DRM_ERROR("invalid primitive type %u\n", prim);
333 return -EINVAL;
334 }
335
336 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
337 if (skip != 0) {
338 DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
339 return -EINVAL;
340 }
341 } else {
342 unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) -
343 (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) -
344 (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1);
345 if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) {
346 DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
347 return -EINVAL;
348 }
349 if (reorder) {
350 DRM_ERROR("TRILIST_201 used on Savage4 hardware\n");
351 return -EINVAL;
352 }
353 }
354
355 if (start + n > dmabuf->total / 32) {
356 DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n",
357 start, start + n - 1, dmabuf->total / 32);
358 return -EINVAL;
359 }
360
361 /* Vertex DMA doesn't work with command DMA at the same time,
362 * so we use BCI_... to submit commands here. Flush buffered
363 * faked DMA first. */
364 DMA_FLUSH();
365
366 if (dmabuf->bus_address != dev_priv->state.common.vbaddr) {
367 BEGIN_BCI(2);
368 BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1);
369 BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type);
370 dev_priv->state.common.vbaddr = dmabuf->bus_address;
371 }
372 if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) {
373 /* Workaround for what looks like a hardware bug. If a
374 * WAIT_3D_IDLE was emitted some time before the
375 * indexed drawing command then the engine will lock
376 * up. There are two known workarounds:
377 * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */
378 BEGIN_BCI(63);
379 for (i = 0; i < 63; ++i)
380 BCI_WRITE(BCI_CMD_WAIT);
381 dev_priv->waiting = 0;
382 }
383
384 prim <<= 25;
385 while (n != 0) {
386 /* Can emit up to 255 indices (85 triangles) at once. */
387 unsigned int count = n > 255 ? 255 : n;
388 if (reorder) {
389 /* Need to reorder indices for correct flat
390 * shading while preserving the clock sense
391 * for correct culling. Only on Savage3D. */
392 int reorder[3] = { -1, -1, -1 };
393 reorder[start % 3] = 2;
394
395 BEGIN_BCI((count + 1 + 1) / 2);
396 BCI_DRAW_INDICES_S3D(count, prim, start + 2);
397
398 for (i = start + 1; i + 1 < start + count; i += 2)
399 BCI_WRITE((i + reorder[i % 3]) |
400 ((i + 1 +
401 reorder[(i + 1) % 3]) << 16));
402 if (i < start + count)
403 BCI_WRITE(i + reorder[i % 3]);
404 } else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
405 BEGIN_BCI((count + 1 + 1) / 2);
406 BCI_DRAW_INDICES_S3D(count, prim, start);
407
408 for (i = start + 1; i + 1 < start + count; i += 2)
409 BCI_WRITE(i | ((i + 1) << 16));
410 if (i < start + count)
411 BCI_WRITE(i);
412 } else {
413 BEGIN_BCI((count + 2 + 1) / 2);
414 BCI_DRAW_INDICES_S4(count, prim, skip);
415
416 for (i = start; i + 1 < start + count; i += 2)
417 BCI_WRITE(i | ((i + 1) << 16));
418 if (i < start + count)
419 BCI_WRITE(i);
420 }
421
422 start += count;
423 n -= count;
424
425 prim |= BCI_CMD_DRAW_CONT;
426 }
427
428 return 0;
429 }
430
savage_dispatch_vb_prim(drm_savage_private_t * dev_priv,const drm_savage_cmd_header_t * cmd_header,const uint32_t * vtxbuf,unsigned int vb_size,unsigned int vb_stride)431 static int savage_dispatch_vb_prim(drm_savage_private_t * dev_priv,
432 const drm_savage_cmd_header_t * cmd_header,
433 const uint32_t *vtxbuf, unsigned int vb_size,
434 unsigned int vb_stride)
435 {
436 unsigned char reorder = 0;
437 unsigned int prim = cmd_header->prim.prim;
438 unsigned int skip = cmd_header->prim.skip;
439 unsigned int n = cmd_header->prim.count;
440 unsigned int start = cmd_header->prim.start;
441 unsigned int vtx_size;
442 unsigned int i;
443 DMA_LOCALS;
444
445 if (!n)
446 return 0;
447
448 switch (prim) {
449 case SAVAGE_PRIM_TRILIST_201:
450 reorder = 1;
451 prim = SAVAGE_PRIM_TRILIST;
452 /* fall through */
453 case SAVAGE_PRIM_TRILIST:
454 if (n % 3 != 0) {
455 DRM_ERROR("wrong number of vertices %u in TRILIST\n",
456 n);
457 return -EINVAL;
458 }
459 break;
460 case SAVAGE_PRIM_TRISTRIP:
461 case SAVAGE_PRIM_TRIFAN:
462 if (n < 3) {
463 DRM_ERROR
464 ("wrong number of vertices %u in TRIFAN/STRIP\n",
465 n);
466 return -EINVAL;
467 }
468 break;
469 default:
470 DRM_ERROR("invalid primitive type %u\n", prim);
471 return -EINVAL;
472 }
473
474 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
475 if (skip > SAVAGE_SKIP_ALL_S3D) {
476 DRM_ERROR("invalid skip flags 0x%04x\n", skip);
477 return -EINVAL;
478 }
479 vtx_size = 8; /* full vertex */
480 } else {
481 if (skip > SAVAGE_SKIP_ALL_S4) {
482 DRM_ERROR("invalid skip flags 0x%04x\n", skip);
483 return -EINVAL;
484 }
485 vtx_size = 10; /* full vertex */
486 }
487
488 vtx_size -= (skip & 1) + (skip >> 1 & 1) +
489 (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) +
490 (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1);
491
492 if (vtx_size > vb_stride) {
493 DRM_ERROR("vertex size greater than vb stride (%u > %u)\n",
494 vtx_size, vb_stride);
495 return -EINVAL;
496 }
497
498 if (start + n > vb_size / (vb_stride * 4)) {
499 DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n",
500 start, start + n - 1, vb_size / (vb_stride * 4));
501 return -EINVAL;
502 }
503
504 prim <<= 25;
505 while (n != 0) {
506 /* Can emit up to 255 vertices (85 triangles) at once. */
507 unsigned int count = n > 255 ? 255 : n;
508 if (reorder) {
509 /* Need to reorder vertices for correct flat
510 * shading while preserving the clock sense
511 * for correct culling. Only on Savage3D. */
512 int reorder[3] = { -1, -1, -1 };
513 reorder[start % 3] = 2;
514
515 BEGIN_DMA(count * vtx_size + 1);
516 DMA_DRAW_PRIMITIVE(count, prim, skip);
517
518 for (i = start; i < start + count; ++i) {
519 unsigned int j = i + reorder[i % 3];
520 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size);
521 }
522
523 DMA_COMMIT();
524 } else {
525 BEGIN_DMA(count * vtx_size + 1);
526 DMA_DRAW_PRIMITIVE(count, prim, skip);
527
528 if (vb_stride == vtx_size) {
529 DMA_COPY(&vtxbuf[vb_stride * start],
530 vtx_size * count);
531 } else {
532 for (i = start; i < start + count; ++i) {
533 DMA_COPY(&vtxbuf [vb_stride * i],
534 vtx_size);
535 }
536 }
537
538 DMA_COMMIT();
539 }
540
541 start += count;
542 n -= count;
543
544 prim |= BCI_CMD_DRAW_CONT;
545 }
546
547 return 0;
548 }
549
savage_dispatch_dma_idx(drm_savage_private_t * dev_priv,const drm_savage_cmd_header_t * cmd_header,const uint16_t * idx,const struct drm_buf * dmabuf)550 static int savage_dispatch_dma_idx(drm_savage_private_t * dev_priv,
551 const drm_savage_cmd_header_t * cmd_header,
552 const uint16_t *idx,
553 const struct drm_buf * dmabuf)
554 {
555 unsigned char reorder = 0;
556 unsigned int prim = cmd_header->idx.prim;
557 unsigned int skip = cmd_header->idx.skip;
558 unsigned int n = cmd_header->idx.count;
559 unsigned int i;
560 BCI_LOCALS;
561
562 if (!dmabuf) {
563 DRM_ERROR("called without dma buffers!\n");
564 return -EINVAL;
565 }
566
567 if (!n)
568 return 0;
569
570 switch (prim) {
571 case SAVAGE_PRIM_TRILIST_201:
572 reorder = 1;
573 prim = SAVAGE_PRIM_TRILIST;
574 /* fall through */
575 case SAVAGE_PRIM_TRILIST:
576 if (n % 3 != 0) {
577 DRM_ERROR("wrong number of indices %u in TRILIST\n", n);
578 return -EINVAL;
579 }
580 break;
581 case SAVAGE_PRIM_TRISTRIP:
582 case SAVAGE_PRIM_TRIFAN:
583 if (n < 3) {
584 DRM_ERROR
585 ("wrong number of indices %u in TRIFAN/STRIP\n", n);
586 return -EINVAL;
587 }
588 break;
589 default:
590 DRM_ERROR("invalid primitive type %u\n", prim);
591 return -EINVAL;
592 }
593
594 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
595 if (skip != 0) {
596 DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
597 return -EINVAL;
598 }
599 } else {
600 unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) -
601 (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) -
602 (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1);
603 if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) {
604 DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
605 return -EINVAL;
606 }
607 if (reorder) {
608 DRM_ERROR("TRILIST_201 used on Savage4 hardware\n");
609 return -EINVAL;
610 }
611 }
612
613 /* Vertex DMA doesn't work with command DMA at the same time,
614 * so we use BCI_... to submit commands here. Flush buffered
615 * faked DMA first. */
616 DMA_FLUSH();
617
618 if (dmabuf->bus_address != dev_priv->state.common.vbaddr) {
619 BEGIN_BCI(2);
620 BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1);
621 BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type);
622 dev_priv->state.common.vbaddr = dmabuf->bus_address;
623 }
624 if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) {
625 /* Workaround for what looks like a hardware bug. If a
626 * WAIT_3D_IDLE was emitted some time before the
627 * indexed drawing command then the engine will lock
628 * up. There are two known workarounds:
629 * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */
630 BEGIN_BCI(63);
631 for (i = 0; i < 63; ++i)
632 BCI_WRITE(BCI_CMD_WAIT);
633 dev_priv->waiting = 0;
634 }
635
636 prim <<= 25;
637 while (n != 0) {
638 /* Can emit up to 255 indices (85 triangles) at once. */
639 unsigned int count = n > 255 ? 255 : n;
640
641 /* check indices */
642 for (i = 0; i < count; ++i) {
643 if (idx[i] > dmabuf->total / 32) {
644 DRM_ERROR("idx[%u]=%u out of range (0-%u)\n",
645 i, idx[i], dmabuf->total / 32);
646 return -EINVAL;
647 }
648 }
649
650 if (reorder) {
651 /* Need to reorder indices for correct flat
652 * shading while preserving the clock sense
653 * for correct culling. Only on Savage3D. */
654 int reorder[3] = { 2, -1, -1 };
655
656 BEGIN_BCI((count + 1 + 1) / 2);
657 BCI_DRAW_INDICES_S3D(count, prim, idx[2]);
658
659 for (i = 1; i + 1 < count; i += 2)
660 BCI_WRITE(idx[i + reorder[i % 3]] |
661 (idx[i + 1 +
662 reorder[(i + 1) % 3]] << 16));
663 if (i < count)
664 BCI_WRITE(idx[i + reorder[i % 3]]);
665 } else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
666 BEGIN_BCI((count + 1 + 1) / 2);
667 BCI_DRAW_INDICES_S3D(count, prim, idx[0]);
668
669 for (i = 1; i + 1 < count; i += 2)
670 BCI_WRITE(idx[i] | (idx[i + 1] << 16));
671 if (i < count)
672 BCI_WRITE(idx[i]);
673 } else {
674 BEGIN_BCI((count + 2 + 1) / 2);
675 BCI_DRAW_INDICES_S4(count, prim, skip);
676
677 for (i = 0; i + 1 < count; i += 2)
678 BCI_WRITE(idx[i] | (idx[i + 1] << 16));
679 if (i < count)
680 BCI_WRITE(idx[i]);
681 }
682
683 idx += count;
684 n -= count;
685
686 prim |= BCI_CMD_DRAW_CONT;
687 }
688
689 return 0;
690 }
691
savage_dispatch_vb_idx(drm_savage_private_t * dev_priv,const drm_savage_cmd_header_t * cmd_header,const uint16_t * idx,const uint32_t * vtxbuf,unsigned int vb_size,unsigned int vb_stride)692 static int savage_dispatch_vb_idx(drm_savage_private_t * dev_priv,
693 const drm_savage_cmd_header_t * cmd_header,
694 const uint16_t *idx,
695 const uint32_t *vtxbuf,
696 unsigned int vb_size, unsigned int vb_stride)
697 {
698 unsigned char reorder = 0;
699 unsigned int prim = cmd_header->idx.prim;
700 unsigned int skip = cmd_header->idx.skip;
701 unsigned int n = cmd_header->idx.count;
702 unsigned int vtx_size;
703 unsigned int i;
704 DMA_LOCALS;
705
706 if (!n)
707 return 0;
708
709 switch (prim) {
710 case SAVAGE_PRIM_TRILIST_201:
711 reorder = 1;
712 prim = SAVAGE_PRIM_TRILIST;
713 /* fall through */
714 case SAVAGE_PRIM_TRILIST:
715 if (n % 3 != 0) {
716 DRM_ERROR("wrong number of indices %u in TRILIST\n", n);
717 return -EINVAL;
718 }
719 break;
720 case SAVAGE_PRIM_TRISTRIP:
721 case SAVAGE_PRIM_TRIFAN:
722 if (n < 3) {
723 DRM_ERROR
724 ("wrong number of indices %u in TRIFAN/STRIP\n", n);
725 return -EINVAL;
726 }
727 break;
728 default:
729 DRM_ERROR("invalid primitive type %u\n", prim);
730 return -EINVAL;
731 }
732
733 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
734 if (skip > SAVAGE_SKIP_ALL_S3D) {
735 DRM_ERROR("invalid skip flags 0x%04x\n", skip);
736 return -EINVAL;
737 }
738 vtx_size = 8; /* full vertex */
739 } else {
740 if (skip > SAVAGE_SKIP_ALL_S4) {
741 DRM_ERROR("invalid skip flags 0x%04x\n", skip);
742 return -EINVAL;
743 }
744 vtx_size = 10; /* full vertex */
745 }
746
747 vtx_size -= (skip & 1) + (skip >> 1 & 1) +
748 (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) +
749 (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1);
750
751 if (vtx_size > vb_stride) {
752 DRM_ERROR("vertex size greater than vb stride (%u > %u)\n",
753 vtx_size, vb_stride);
754 return -EINVAL;
755 }
756
757 prim <<= 25;
758 while (n != 0) {
759 /* Can emit up to 255 vertices (85 triangles) at once. */
760 unsigned int count = n > 255 ? 255 : n;
761
762 /* Check indices */
763 for (i = 0; i < count; ++i) {
764 if (idx[i] > vb_size / (vb_stride * 4)) {
765 DRM_ERROR("idx[%u]=%u out of range (0-%u)\n",
766 i, idx[i], vb_size / (vb_stride * 4));
767 return -EINVAL;
768 }
769 }
770
771 if (reorder) {
772 /* Need to reorder vertices for correct flat
773 * shading while preserving the clock sense
774 * for correct culling. Only on Savage3D. */
775 int reorder[3] = { 2, -1, -1 };
776
777 BEGIN_DMA(count * vtx_size + 1);
778 DMA_DRAW_PRIMITIVE(count, prim, skip);
779
780 for (i = 0; i < count; ++i) {
781 unsigned int j = idx[i + reorder[i % 3]];
782 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size);
783 }
784
785 DMA_COMMIT();
786 } else {
787 BEGIN_DMA(count * vtx_size + 1);
788 DMA_DRAW_PRIMITIVE(count, prim, skip);
789
790 for (i = 0; i < count; ++i) {
791 unsigned int j = idx[i];
792 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size);
793 }
794
795 DMA_COMMIT();
796 }
797
798 idx += count;
799 n -= count;
800
801 prim |= BCI_CMD_DRAW_CONT;
802 }
803
804 return 0;
805 }
806
savage_dispatch_clear(drm_savage_private_t * dev_priv,const drm_savage_cmd_header_t * cmd_header,const drm_savage_cmd_header_t * data,unsigned int nbox,const struct drm_clip_rect * boxes)807 static int savage_dispatch_clear(drm_savage_private_t * dev_priv,
808 const drm_savage_cmd_header_t * cmd_header,
809 const drm_savage_cmd_header_t *data,
810 unsigned int nbox,
811 const struct drm_clip_rect *boxes)
812 {
813 unsigned int flags = cmd_header->clear0.flags;
814 unsigned int clear_cmd;
815 unsigned int i, nbufs;
816 DMA_LOCALS;
817
818 if (nbox == 0)
819 return 0;
820
821 clear_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP |
822 BCI_CMD_SEND_COLOR | BCI_CMD_DEST_PBD_NEW;
823 BCI_CMD_SET_ROP(clear_cmd, 0xCC);
824
825 nbufs = ((flags & SAVAGE_FRONT) ? 1 : 0) +
826 ((flags & SAVAGE_BACK) ? 1 : 0) + ((flags & SAVAGE_DEPTH) ? 1 : 0);
827 if (nbufs == 0)
828 return 0;
829
830 if (data->clear1.mask != 0xffffffff) {
831 /* set mask */
832 BEGIN_DMA(2);
833 DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1);
834 DMA_WRITE(data->clear1.mask);
835 DMA_COMMIT();
836 }
837 for (i = 0; i < nbox; ++i) {
838 unsigned int x, y, w, h;
839 unsigned int buf;
840 x = boxes[i].x1, y = boxes[i].y1;
841 w = boxes[i].x2 - boxes[i].x1;
842 h = boxes[i].y2 - boxes[i].y1;
843 BEGIN_DMA(nbufs * 6);
844 for (buf = SAVAGE_FRONT; buf <= SAVAGE_DEPTH; buf <<= 1) {
845 if (!(flags & buf))
846 continue;
847 DMA_WRITE(clear_cmd);
848 switch (buf) {
849 case SAVAGE_FRONT:
850 DMA_WRITE(dev_priv->front_offset);
851 DMA_WRITE(dev_priv->front_bd);
852 break;
853 case SAVAGE_BACK:
854 DMA_WRITE(dev_priv->back_offset);
855 DMA_WRITE(dev_priv->back_bd);
856 break;
857 case SAVAGE_DEPTH:
858 DMA_WRITE(dev_priv->depth_offset);
859 DMA_WRITE(dev_priv->depth_bd);
860 break;
861 }
862 DMA_WRITE(data->clear1.value);
863 DMA_WRITE(BCI_X_Y(x, y));
864 DMA_WRITE(BCI_W_H(w, h));
865 }
866 DMA_COMMIT();
867 }
868 if (data->clear1.mask != 0xffffffff) {
869 /* reset mask */
870 BEGIN_DMA(2);
871 DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1);
872 DMA_WRITE(0xffffffff);
873 DMA_COMMIT();
874 }
875
876 return 0;
877 }
878
savage_dispatch_swap(drm_savage_private_t * dev_priv,unsigned int nbox,const struct drm_clip_rect * boxes)879 static int savage_dispatch_swap(drm_savage_private_t * dev_priv,
880 unsigned int nbox, const struct drm_clip_rect *boxes)
881 {
882 unsigned int swap_cmd;
883 unsigned int i;
884 DMA_LOCALS;
885
886 if (nbox == 0)
887 return 0;
888
889 swap_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP |
890 BCI_CMD_SRC_PBD_COLOR_NEW | BCI_CMD_DEST_GBD;
891 BCI_CMD_SET_ROP(swap_cmd, 0xCC);
892
893 for (i = 0; i < nbox; ++i) {
894 BEGIN_DMA(6);
895 DMA_WRITE(swap_cmd);
896 DMA_WRITE(dev_priv->back_offset);
897 DMA_WRITE(dev_priv->back_bd);
898 DMA_WRITE(BCI_X_Y(boxes[i].x1, boxes[i].y1));
899 DMA_WRITE(BCI_X_Y(boxes[i].x1, boxes[i].y1));
900 DMA_WRITE(BCI_W_H(boxes[i].x2 - boxes[i].x1,
901 boxes[i].y2 - boxes[i].y1));
902 DMA_COMMIT();
903 }
904
905 return 0;
906 }
907
savage_dispatch_draw(drm_savage_private_t * dev_priv,const drm_savage_cmd_header_t * start,const drm_savage_cmd_header_t * end,const struct drm_buf * dmabuf,const unsigned int * vtxbuf,unsigned int vb_size,unsigned int vb_stride,unsigned int nbox,const struct drm_clip_rect * boxes)908 static int savage_dispatch_draw(drm_savage_private_t * dev_priv,
909 const drm_savage_cmd_header_t *start,
910 const drm_savage_cmd_header_t *end,
911 const struct drm_buf * dmabuf,
912 const unsigned int *vtxbuf,
913 unsigned int vb_size, unsigned int vb_stride,
914 unsigned int nbox,
915 const struct drm_clip_rect *boxes)
916 {
917 unsigned int i, j;
918 int ret;
919
920 for (i = 0; i < nbox; ++i) {
921 const drm_savage_cmd_header_t *cmdbuf;
922 dev_priv->emit_clip_rect(dev_priv, &boxes[i]);
923
924 cmdbuf = start;
925 while (cmdbuf < end) {
926 drm_savage_cmd_header_t cmd_header;
927 cmd_header = *cmdbuf;
928 cmdbuf++;
929 switch (cmd_header.cmd.cmd) {
930 case SAVAGE_CMD_DMA_PRIM:
931 ret = savage_dispatch_dma_prim(
932 dev_priv, &cmd_header, dmabuf);
933 break;
934 case SAVAGE_CMD_VB_PRIM:
935 ret = savage_dispatch_vb_prim(
936 dev_priv, &cmd_header,
937 vtxbuf, vb_size, vb_stride);
938 break;
939 case SAVAGE_CMD_DMA_IDX:
940 j = (cmd_header.idx.count + 3) / 4;
941 /* j was check in savage_bci_cmdbuf */
942 ret = savage_dispatch_dma_idx(dev_priv,
943 &cmd_header, (const uint16_t *)cmdbuf,
944 dmabuf);
945 cmdbuf += j;
946 break;
947 case SAVAGE_CMD_VB_IDX:
948 j = (cmd_header.idx.count + 3) / 4;
949 /* j was check in savage_bci_cmdbuf */
950 ret = savage_dispatch_vb_idx(dev_priv,
951 &cmd_header, (const uint16_t *)cmdbuf,
952 (const uint32_t *)vtxbuf, vb_size,
953 vb_stride);
954 cmdbuf += j;
955 break;
956 default:
957 /* What's the best return code? EFAULT? */
958 DRM_ERROR("IMPLEMENTATION ERROR: "
959 "non-drawing-command %d\n",
960 cmd_header.cmd.cmd);
961 return -EINVAL;
962 }
963
964 if (ret != 0)
965 return ret;
966 }
967 }
968
969 return 0;
970 }
971
savage_bci_cmdbuf(struct drm_device * dev,void * data,struct drm_file * file_priv)972 int savage_bci_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv)
973 {
974 drm_savage_private_t *dev_priv = dev->dev_private;
975 struct drm_device_dma *dma = dev->dma;
976 struct drm_buf *dmabuf;
977 drm_savage_cmdbuf_t *cmdbuf = data;
978 drm_savage_cmd_header_t *kcmd_addr = NULL;
979 drm_savage_cmd_header_t *first_draw_cmd;
980 unsigned int *kvb_addr = NULL;
981 struct drm_clip_rect *kbox_addr = NULL;
982 unsigned int i, j;
983 int ret = 0;
984
985 DRM_DEBUG("\n");
986
987 LOCK_TEST_WITH_RETURN(dev, file_priv);
988
989 if (dma && dma->buflist) {
990 if (cmdbuf->dma_idx >= dma->buf_count) {
991 DRM_ERROR
992 ("vertex buffer index %u out of range (0-%u)\n",
993 cmdbuf->dma_idx, dma->buf_count - 1);
994 return -EINVAL;
995 }
996 dmabuf = dma->buflist[cmdbuf->dma_idx];
997 } else {
998 dmabuf = NULL;
999 }
1000
1001 /* Copy the user buffers into kernel temporary areas. This hasn't been
1002 * a performance loss compared to VERIFYAREA_READ/
1003 * COPY_FROM_USER_UNCHECKED when done in other drivers, and is correct
1004 * for locking on FreeBSD.
1005 */
1006 if (cmdbuf->size) {
1007 kcmd_addr = kmalloc_array(cmdbuf->size, 8, GFP_KERNEL);
1008 if (kcmd_addr == NULL)
1009 return -ENOMEM;
1010
1011 if (copy_from_user(kcmd_addr, cmdbuf->cmd_addr,
1012 cmdbuf->size * 8))
1013 {
1014 kfree(kcmd_addr);
1015 return -EFAULT;
1016 }
1017 cmdbuf->cmd_addr = kcmd_addr;
1018 }
1019 if (cmdbuf->vb_size) {
1020 kvb_addr = memdup_user(cmdbuf->vb_addr, cmdbuf->vb_size);
1021 if (IS_ERR(kvb_addr)) {
1022 ret = PTR_ERR(kvb_addr);
1023 kvb_addr = NULL;
1024 goto done;
1025 }
1026 cmdbuf->vb_addr = kvb_addr;
1027 }
1028 if (cmdbuf->nbox) {
1029 kbox_addr = kmalloc_array(cmdbuf->nbox, sizeof(struct drm_clip_rect),
1030 GFP_KERNEL);
1031 if (kbox_addr == NULL) {
1032 ret = -ENOMEM;
1033 goto done;
1034 }
1035
1036 if (copy_from_user(kbox_addr, cmdbuf->box_addr,
1037 cmdbuf->nbox * sizeof(struct drm_clip_rect))) {
1038 ret = -EFAULT;
1039 goto done;
1040 }
1041 cmdbuf->box_addr = kbox_addr;
1042 }
1043
1044 /* Make sure writes to DMA buffers are finished before sending
1045 * DMA commands to the graphics hardware. */
1046 mb();
1047
1048 /* Coming from user space. Don't know if the Xserver has
1049 * emitted wait commands. Assuming the worst. */
1050 dev_priv->waiting = 1;
1051
1052 i = 0;
1053 first_draw_cmd = NULL;
1054 while (i < cmdbuf->size) {
1055 drm_savage_cmd_header_t cmd_header;
1056 cmd_header = *(drm_savage_cmd_header_t *)cmdbuf->cmd_addr;
1057 cmdbuf->cmd_addr++;
1058 i++;
1059
1060 /* Group drawing commands with same state to minimize
1061 * iterations over clip rects. */
1062 j = 0;
1063 switch (cmd_header.cmd.cmd) {
1064 case SAVAGE_CMD_DMA_IDX:
1065 case SAVAGE_CMD_VB_IDX:
1066 j = (cmd_header.idx.count + 3) / 4;
1067 if (i + j > cmdbuf->size) {
1068 DRM_ERROR("indexed drawing command extends "
1069 "beyond end of command buffer\n");
1070 DMA_FLUSH();
1071 ret = -EINVAL;
1072 goto done;
1073 }
1074 /* fall through */
1075 case SAVAGE_CMD_DMA_PRIM:
1076 case SAVAGE_CMD_VB_PRIM:
1077 if (!first_draw_cmd)
1078 first_draw_cmd = cmdbuf->cmd_addr - 1;
1079 cmdbuf->cmd_addr += j;
1080 i += j;
1081 break;
1082 default:
1083 if (first_draw_cmd) {
1084 ret = savage_dispatch_draw(
1085 dev_priv, first_draw_cmd,
1086 cmdbuf->cmd_addr - 1,
1087 dmabuf, cmdbuf->vb_addr, cmdbuf->vb_size,
1088 cmdbuf->vb_stride,
1089 cmdbuf->nbox, cmdbuf->box_addr);
1090 if (ret != 0)
1091 goto done;
1092 first_draw_cmd = NULL;
1093 }
1094 }
1095 if (first_draw_cmd)
1096 continue;
1097
1098 switch (cmd_header.cmd.cmd) {
1099 case SAVAGE_CMD_STATE:
1100 j = (cmd_header.state.count + 1) / 2;
1101 if (i + j > cmdbuf->size) {
1102 DRM_ERROR("command SAVAGE_CMD_STATE extends "
1103 "beyond end of command buffer\n");
1104 DMA_FLUSH();
1105 ret = -EINVAL;
1106 goto done;
1107 }
1108 ret = savage_dispatch_state(dev_priv, &cmd_header,
1109 (const uint32_t *)cmdbuf->cmd_addr);
1110 cmdbuf->cmd_addr += j;
1111 i += j;
1112 break;
1113 case SAVAGE_CMD_CLEAR:
1114 if (i + 1 > cmdbuf->size) {
1115 DRM_ERROR("command SAVAGE_CMD_CLEAR extends "
1116 "beyond end of command buffer\n");
1117 DMA_FLUSH();
1118 ret = -EINVAL;
1119 goto done;
1120 }
1121 ret = savage_dispatch_clear(dev_priv, &cmd_header,
1122 cmdbuf->cmd_addr,
1123 cmdbuf->nbox,
1124 cmdbuf->box_addr);
1125 cmdbuf->cmd_addr++;
1126 i++;
1127 break;
1128 case SAVAGE_CMD_SWAP:
1129 ret = savage_dispatch_swap(dev_priv, cmdbuf->nbox,
1130 cmdbuf->box_addr);
1131 break;
1132 default:
1133 DRM_ERROR("invalid command 0x%x\n",
1134 cmd_header.cmd.cmd);
1135 DMA_FLUSH();
1136 ret = -EINVAL;
1137 goto done;
1138 }
1139
1140 if (ret != 0) {
1141 DMA_FLUSH();
1142 goto done;
1143 }
1144 }
1145
1146 if (first_draw_cmd) {
1147 ret = savage_dispatch_draw (
1148 dev_priv, first_draw_cmd, cmdbuf->cmd_addr, dmabuf,
1149 cmdbuf->vb_addr, cmdbuf->vb_size, cmdbuf->vb_stride,
1150 cmdbuf->nbox, cmdbuf->box_addr);
1151 if (ret != 0) {
1152 DMA_FLUSH();
1153 goto done;
1154 }
1155 }
1156
1157 DMA_FLUSH();
1158
1159 if (dmabuf && cmdbuf->discard) {
1160 drm_savage_buf_priv_t *buf_priv = dmabuf->dev_private;
1161 uint16_t event;
1162 event = savage_bci_emit_event(dev_priv, SAVAGE_WAIT_3D);
1163 SET_AGE(&buf_priv->age, event, dev_priv->event_wrap);
1164 savage_freelist_put(dev, dmabuf);
1165 }
1166
1167 done:
1168 /* If we didn't need to allocate them, these'll be NULL */
1169 kfree(kcmd_addr);
1170 kfree(kvb_addr);
1171 kfree(kbox_addr);
1172
1173 return ret;
1174 }
1175