1 /* $NetBSD: savage_state.c,v 1.3 2021/12/18 23:45:43 riastradh Exp $ */ 2 3 /* savage_state.c -- State and drawing support for Savage 4 * 5 * Copyright 2004 Felix Kuehling 6 * All Rights Reserved. 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the "Software"), 10 * to deal in the Software without restriction, including without limitation 11 * the rights to use, copy, modify, merge, publish, distribute, sub license, 12 * and/or sell copies of the Software, and to permit persons to whom the 13 * Software is furnished to do so, subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 22 * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 24 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 */ 27 28 #include <sys/cdefs.h> 29 __KERNEL_RCSID(0, "$NetBSD: savage_state.c,v 1.3 2021/12/18 23:45:43 riastradh Exp $"); 30 31 #include <linux/slab.h> 32 #include <linux/uaccess.h> 33 34 #include <drm/drm_device.h> 35 #include <drm/drm_file.h> 36 #include <drm/drm_print.h> 37 #include <drm/savage_drm.h> 38 39 #include "savage_drv.h" 40 41 void savage_emit_clip_rect_s3d(drm_savage_private_t * dev_priv, 42 const struct drm_clip_rect * pbox) 43 { 44 uint32_t scstart = dev_priv->state.s3d.new_scstart; 45 uint32_t scend = dev_priv->state.s3d.new_scend; 46 scstart = (scstart & ~SAVAGE_SCISSOR_MASK_S3D) | 47 ((uint32_t) pbox->x1 & 0x000007ff) | 48 (((uint32_t) pbox->y1 << 16) & 0x07ff0000); 49 scend = (scend & ~SAVAGE_SCISSOR_MASK_S3D) | 50 (((uint32_t) pbox->x2 - 1) & 0x000007ff) | 51 ((((uint32_t) pbox->y2 - 1) << 16) & 0x07ff0000); 52 if (scstart != dev_priv->state.s3d.scstart || 53 scend != dev_priv->state.s3d.scend) { 54 DMA_LOCALS; 55 BEGIN_DMA(4); 56 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D); 57 DMA_SET_REGISTERS(SAVAGE_SCSTART_S3D, 2); 58 DMA_WRITE(scstart); 59 DMA_WRITE(scend); 60 dev_priv->state.s3d.scstart = scstart; 61 dev_priv->state.s3d.scend = scend; 62 dev_priv->waiting = 1; 63 DMA_COMMIT(); 64 } 65 } 66 67 void savage_emit_clip_rect_s4(drm_savage_private_t * dev_priv, 68 const struct drm_clip_rect * pbox) 69 { 70 uint32_t drawctrl0 = dev_priv->state.s4.new_drawctrl0; 71 uint32_t drawctrl1 = dev_priv->state.s4.new_drawctrl1; 72 drawctrl0 = (drawctrl0 & ~SAVAGE_SCISSOR_MASK_S4) | 73 ((uint32_t) pbox->x1 & 0x000007ff) | 74 (((uint32_t) pbox->y1 << 12) & 0x00fff000); 75 drawctrl1 = (drawctrl1 & ~SAVAGE_SCISSOR_MASK_S4) | 76 (((uint32_t) pbox->x2 - 1) & 0x000007ff) | 77 ((((uint32_t) pbox->y2 - 1) << 12) & 0x00fff000); 78 if (drawctrl0 != dev_priv->state.s4.drawctrl0 || 79 drawctrl1 != dev_priv->state.s4.drawctrl1) { 80 DMA_LOCALS; 81 BEGIN_DMA(4); 82 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D); 83 DMA_SET_REGISTERS(SAVAGE_DRAWCTRL0_S4, 2); 84 DMA_WRITE(drawctrl0); 85 DMA_WRITE(drawctrl1); 86 dev_priv->state.s4.drawctrl0 = drawctrl0; 87 dev_priv->state.s4.drawctrl1 = drawctrl1; 88 dev_priv->waiting = 1; 89 DMA_COMMIT(); 90 } 91 } 92 93 static int savage_verify_texaddr(drm_savage_private_t * dev_priv, int unit, 94 uint32_t addr) 95 { 96 if ((addr & 6) != 2) { /* reserved bits */ 97 DRM_ERROR("bad texAddr%d %08x (reserved bits)\n", unit, addr); 98 return -EINVAL; 99 } 100 if (!(addr & 1)) { /* local */ 101 addr &= ~7; 102 if (addr < dev_priv->texture_offset || 103 addr >= dev_priv->texture_offset + dev_priv->texture_size) { 104 DRM_ERROR 105 ("bad texAddr%d %08x (local addr out of range)\n", 106 unit, addr); 107 return -EINVAL; 108 } 109 } else { /* AGP */ 110 if (!dev_priv->agp_textures) { 111 DRM_ERROR("bad texAddr%d %08x (AGP not available)\n", 112 unit, addr); 113 return -EINVAL; 114 } 115 addr &= ~7; 116 if (addr < dev_priv->agp_textures->offset || 117 addr >= (dev_priv->agp_textures->offset + 118 dev_priv->agp_textures->size)) { 119 DRM_ERROR 120 ("bad texAddr%d %08x (AGP addr out of range)\n", 121 unit, addr); 122 return -EINVAL; 123 } 124 } 125 return 0; 126 } 127 128 #define SAVE_STATE(reg,where) \ 129 if(start <= reg && start+count > reg) \ 130 dev_priv->state.where = regs[reg - start] 131 #define SAVE_STATE_MASK(reg,where,mask) do { \ 132 if(start <= reg && start+count > reg) { \ 133 uint32_t tmp; \ 134 tmp = regs[reg - start]; \ 135 dev_priv->state.where = (tmp & (mask)) | \ 136 (dev_priv->state.where & ~(mask)); \ 137 } \ 138 } while (0) 139 140 static int savage_verify_state_s3d(drm_savage_private_t * dev_priv, 141 unsigned int start, unsigned int count, 142 const uint32_t *regs) 143 { 144 if (start < SAVAGE_TEXPALADDR_S3D || 145 start + count - 1 > SAVAGE_DESTTEXRWWATERMARK_S3D) { 146 DRM_ERROR("invalid register range (0x%04x-0x%04x)\n", 147 start, start + count - 1); 148 return -EINVAL; 149 } 150 151 SAVE_STATE_MASK(SAVAGE_SCSTART_S3D, s3d.new_scstart, 152 ~SAVAGE_SCISSOR_MASK_S3D); 153 SAVE_STATE_MASK(SAVAGE_SCEND_S3D, s3d.new_scend, 154 ~SAVAGE_SCISSOR_MASK_S3D); 155 156 /* if any texture regs were changed ... */ 157 if (start <= SAVAGE_TEXCTRL_S3D && 158 start + count > SAVAGE_TEXPALADDR_S3D) { 159 /* ... check texture state */ 160 SAVE_STATE(SAVAGE_TEXCTRL_S3D, s3d.texctrl); 161 SAVE_STATE(SAVAGE_TEXADDR_S3D, s3d.texaddr); 162 if (dev_priv->state.s3d.texctrl & SAVAGE_TEXCTRL_TEXEN_MASK) 163 return savage_verify_texaddr(dev_priv, 0, 164 dev_priv->state.s3d.texaddr); 165 } 166 167 return 0; 168 } 169 170 static int savage_verify_state_s4(drm_savage_private_t * dev_priv, 171 unsigned int start, unsigned int count, 172 const uint32_t *regs) 173 { 174 int ret = 0; 175 176 if (start < SAVAGE_DRAWLOCALCTRL_S4 || 177 start + count - 1 > SAVAGE_TEXBLENDCOLOR_S4) { 178 DRM_ERROR("invalid register range (0x%04x-0x%04x)\n", 179 start, start + count - 1); 180 return -EINVAL; 181 } 182 183 SAVE_STATE_MASK(SAVAGE_DRAWCTRL0_S4, s4.new_drawctrl0, 184 ~SAVAGE_SCISSOR_MASK_S4); 185 SAVE_STATE_MASK(SAVAGE_DRAWCTRL1_S4, s4.new_drawctrl1, 186 ~SAVAGE_SCISSOR_MASK_S4); 187 188 /* if any texture regs were changed ... */ 189 if (start <= SAVAGE_TEXDESCR_S4 && 190 start + count > SAVAGE_TEXPALADDR_S4) { 191 /* ... check texture state */ 192 SAVE_STATE(SAVAGE_TEXDESCR_S4, s4.texdescr); 193 SAVE_STATE(SAVAGE_TEXADDR0_S4, s4.texaddr0); 194 SAVE_STATE(SAVAGE_TEXADDR1_S4, s4.texaddr1); 195 if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX0EN_MASK) 196 ret |= savage_verify_texaddr(dev_priv, 0, 197 dev_priv->state.s4.texaddr0); 198 if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX1EN_MASK) 199 ret |= savage_verify_texaddr(dev_priv, 1, 200 dev_priv->state.s4.texaddr1); 201 } 202 203 return ret; 204 } 205 206 #undef SAVE_STATE 207 #undef SAVE_STATE_MASK 208 209 static int savage_dispatch_state(drm_savage_private_t * dev_priv, 210 const drm_savage_cmd_header_t * cmd_header, 211 const uint32_t *regs) 212 { 213 unsigned int count = cmd_header->state.count; 214 unsigned int start = cmd_header->state.start; 215 unsigned int count2 = 0; 216 unsigned int bci_size; 217 int ret; 218 DMA_LOCALS; 219 220 if (!count) 221 return 0; 222 223 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 224 ret = savage_verify_state_s3d(dev_priv, start, count, regs); 225 if (ret != 0) 226 return ret; 227 /* scissor regs are emitted in savage_dispatch_draw */ 228 if (start < SAVAGE_SCSTART_S3D) { 229 if (start + count > SAVAGE_SCEND_S3D + 1) 230 count2 = count - (SAVAGE_SCEND_S3D + 1 - start); 231 if (start + count > SAVAGE_SCSTART_S3D) 232 count = SAVAGE_SCSTART_S3D - start; 233 } else if (start <= SAVAGE_SCEND_S3D) { 234 if (start + count > SAVAGE_SCEND_S3D + 1) { 235 count -= SAVAGE_SCEND_S3D + 1 - start; 236 start = SAVAGE_SCEND_S3D + 1; 237 } else 238 return 0; 239 } 240 } else { 241 ret = savage_verify_state_s4(dev_priv, start, count, regs); 242 if (ret != 0) 243 return ret; 244 /* scissor regs are emitted in savage_dispatch_draw */ 245 if (start < SAVAGE_DRAWCTRL0_S4) { 246 if (start + count > SAVAGE_DRAWCTRL1_S4 + 1) 247 count2 = count - 248 (SAVAGE_DRAWCTRL1_S4 + 1 - start); 249 if (start + count > SAVAGE_DRAWCTRL0_S4) 250 count = SAVAGE_DRAWCTRL0_S4 - start; 251 } else if (start <= SAVAGE_DRAWCTRL1_S4) { 252 if (start + count > SAVAGE_DRAWCTRL1_S4 + 1) { 253 count -= SAVAGE_DRAWCTRL1_S4 + 1 - start; 254 start = SAVAGE_DRAWCTRL1_S4 + 1; 255 } else 256 return 0; 257 } 258 } 259 260 bci_size = count + (count + 254) / 255 + count2 + (count2 + 254) / 255; 261 262 if (cmd_header->state.global) { 263 BEGIN_DMA(bci_size + 1); 264 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D); 265 dev_priv->waiting = 1; 266 } else { 267 BEGIN_DMA(bci_size); 268 } 269 270 do { 271 while (count > 0) { 272 unsigned int n = count < 255 ? count : 255; 273 DMA_SET_REGISTERS(start, n); 274 DMA_COPY(regs, n); 275 count -= n; 276 start += n; 277 regs += n; 278 } 279 start += 2; 280 regs += 2; 281 count = count2; 282 count2 = 0; 283 } while (count); 284 285 DMA_COMMIT(); 286 287 return 0; 288 } 289 290 static int savage_dispatch_dma_prim(drm_savage_private_t * dev_priv, 291 const drm_savage_cmd_header_t * cmd_header, 292 const struct drm_buf * dmabuf) 293 { 294 unsigned char reorder = 0; 295 unsigned int prim = cmd_header->prim.prim; 296 unsigned int skip = cmd_header->prim.skip; 297 unsigned int n = cmd_header->prim.count; 298 unsigned int start = cmd_header->prim.start; 299 unsigned int i; 300 BCI_LOCALS; 301 302 if (!dmabuf) { 303 DRM_ERROR("called without dma buffers!\n"); 304 return -EINVAL; 305 } 306 307 if (!n) 308 return 0; 309 310 switch (prim) { 311 case SAVAGE_PRIM_TRILIST_201: 312 reorder = 1; 313 prim = SAVAGE_PRIM_TRILIST; 314 /* fall through */ 315 case SAVAGE_PRIM_TRILIST: 316 if (n % 3 != 0) { 317 DRM_ERROR("wrong number of vertices %u in TRILIST\n", 318 n); 319 return -EINVAL; 320 } 321 break; 322 case SAVAGE_PRIM_TRISTRIP: 323 case SAVAGE_PRIM_TRIFAN: 324 if (n < 3) { 325 DRM_ERROR 326 ("wrong number of vertices %u in TRIFAN/STRIP\n", 327 n); 328 return -EINVAL; 329 } 330 break; 331 default: 332 DRM_ERROR("invalid primitive type %u\n", prim); 333 return -EINVAL; 334 } 335 336 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 337 if (skip != 0) { 338 DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip); 339 return -EINVAL; 340 } 341 } else { 342 unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) - 343 (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) - 344 (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1); 345 if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) { 346 DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip); 347 return -EINVAL; 348 } 349 if (reorder) { 350 DRM_ERROR("TRILIST_201 used on Savage4 hardware\n"); 351 return -EINVAL; 352 } 353 } 354 355 if (start + n > dmabuf->total / 32) { 356 DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n", 357 start, start + n - 1, dmabuf->total / 32); 358 return -EINVAL; 359 } 360 361 /* Vertex DMA doesn't work with command DMA at the same time, 362 * so we use BCI_... to submit commands here. Flush buffered 363 * faked DMA first. */ 364 DMA_FLUSH(); 365 366 if (dmabuf->bus_address != dev_priv->state.common.vbaddr) { 367 BEGIN_BCI(2); 368 BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1); 369 BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type); 370 dev_priv->state.common.vbaddr = dmabuf->bus_address; 371 } 372 if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) { 373 /* Workaround for what looks like a hardware bug. If a 374 * WAIT_3D_IDLE was emitted some time before the 375 * indexed drawing command then the engine will lock 376 * up. There are two known workarounds: 377 * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */ 378 BEGIN_BCI(63); 379 for (i = 0; i < 63; ++i) 380 BCI_WRITE(BCI_CMD_WAIT); 381 dev_priv->waiting = 0; 382 } 383 384 prim <<= 25; 385 while (n != 0) { 386 /* Can emit up to 255 indices (85 triangles) at once. */ 387 unsigned int count = n > 255 ? 255 : n; 388 if (reorder) { 389 /* Need to reorder indices for correct flat 390 * shading while preserving the clock sense 391 * for correct culling. Only on Savage3D. */ 392 int reorder[3] = { -1, -1, -1 }; 393 reorder[start % 3] = 2; 394 395 BEGIN_BCI((count + 1 + 1) / 2); 396 BCI_DRAW_INDICES_S3D(count, prim, start + 2); 397 398 for (i = start + 1; i + 1 < start + count; i += 2) 399 BCI_WRITE((i + reorder[i % 3]) | 400 ((i + 1 + 401 reorder[(i + 1) % 3]) << 16)); 402 if (i < start + count) 403 BCI_WRITE(i + reorder[i % 3]); 404 } else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 405 BEGIN_BCI((count + 1 + 1) / 2); 406 BCI_DRAW_INDICES_S3D(count, prim, start); 407 408 for (i = start + 1; i + 1 < start + count; i += 2) 409 BCI_WRITE(i | ((i + 1) << 16)); 410 if (i < start + count) 411 BCI_WRITE(i); 412 } else { 413 BEGIN_BCI((count + 2 + 1) / 2); 414 BCI_DRAW_INDICES_S4(count, prim, skip); 415 416 for (i = start; i + 1 < start + count; i += 2) 417 BCI_WRITE(i | ((i + 1) << 16)); 418 if (i < start + count) 419 BCI_WRITE(i); 420 } 421 422 start += count; 423 n -= count; 424 425 prim |= BCI_CMD_DRAW_CONT; 426 } 427 428 return 0; 429 } 430 431 static int savage_dispatch_vb_prim(drm_savage_private_t * dev_priv, 432 const drm_savage_cmd_header_t * cmd_header, 433 const uint32_t *vtxbuf, unsigned int vb_size, 434 unsigned int vb_stride) 435 { 436 unsigned char reorder = 0; 437 unsigned int prim = cmd_header->prim.prim; 438 unsigned int skip = cmd_header->prim.skip; 439 unsigned int n = cmd_header->prim.count; 440 unsigned int start = cmd_header->prim.start; 441 unsigned int vtx_size; 442 unsigned int i; 443 DMA_LOCALS; 444 445 if (!n) 446 return 0; 447 448 switch (prim) { 449 case SAVAGE_PRIM_TRILIST_201: 450 reorder = 1; 451 prim = SAVAGE_PRIM_TRILIST; 452 /* fall through */ 453 case SAVAGE_PRIM_TRILIST: 454 if (n % 3 != 0) { 455 DRM_ERROR("wrong number of vertices %u in TRILIST\n", 456 n); 457 return -EINVAL; 458 } 459 break; 460 case SAVAGE_PRIM_TRISTRIP: 461 case SAVAGE_PRIM_TRIFAN: 462 if (n < 3) { 463 DRM_ERROR 464 ("wrong number of vertices %u in TRIFAN/STRIP\n", 465 n); 466 return -EINVAL; 467 } 468 break; 469 default: 470 DRM_ERROR("invalid primitive type %u\n", prim); 471 return -EINVAL; 472 } 473 474 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 475 if (skip > SAVAGE_SKIP_ALL_S3D) { 476 DRM_ERROR("invalid skip flags 0x%04x\n", skip); 477 return -EINVAL; 478 } 479 vtx_size = 8; /* full vertex */ 480 } else { 481 if (skip > SAVAGE_SKIP_ALL_S4) { 482 DRM_ERROR("invalid skip flags 0x%04x\n", skip); 483 return -EINVAL; 484 } 485 vtx_size = 10; /* full vertex */ 486 } 487 488 vtx_size -= (skip & 1) + (skip >> 1 & 1) + 489 (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) + 490 (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1); 491 492 if (vtx_size > vb_stride) { 493 DRM_ERROR("vertex size greater than vb stride (%u > %u)\n", 494 vtx_size, vb_stride); 495 return -EINVAL; 496 } 497 498 if (start + n > vb_size / (vb_stride * 4)) { 499 DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n", 500 start, start + n - 1, vb_size / (vb_stride * 4)); 501 return -EINVAL; 502 } 503 504 prim <<= 25; 505 while (n != 0) { 506 /* Can emit up to 255 vertices (85 triangles) at once. */ 507 unsigned int count = n > 255 ? 255 : n; 508 if (reorder) { 509 /* Need to reorder vertices for correct flat 510 * shading while preserving the clock sense 511 * for correct culling. Only on Savage3D. */ 512 int reorder[3] = { -1, -1, -1 }; 513 reorder[start % 3] = 2; 514 515 BEGIN_DMA(count * vtx_size + 1); 516 DMA_DRAW_PRIMITIVE(count, prim, skip); 517 518 for (i = start; i < start + count; ++i) { 519 unsigned int j = i + reorder[i % 3]; 520 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size); 521 } 522 523 DMA_COMMIT(); 524 } else { 525 BEGIN_DMA(count * vtx_size + 1); 526 DMA_DRAW_PRIMITIVE(count, prim, skip); 527 528 if (vb_stride == vtx_size) { 529 DMA_COPY(&vtxbuf[vb_stride * start], 530 vtx_size * count); 531 } else { 532 for (i = start; i < start + count; ++i) { 533 DMA_COPY(&vtxbuf [vb_stride * i], 534 vtx_size); 535 } 536 } 537 538 DMA_COMMIT(); 539 } 540 541 start += count; 542 n -= count; 543 544 prim |= BCI_CMD_DRAW_CONT; 545 } 546 547 return 0; 548 } 549 550 static int savage_dispatch_dma_idx(drm_savage_private_t * dev_priv, 551 const drm_savage_cmd_header_t * cmd_header, 552 const uint16_t *idx, 553 const struct drm_buf * dmabuf) 554 { 555 unsigned char reorder = 0; 556 unsigned int prim = cmd_header->idx.prim; 557 unsigned int skip = cmd_header->idx.skip; 558 unsigned int n = cmd_header->idx.count; 559 unsigned int i; 560 BCI_LOCALS; 561 562 if (!dmabuf) { 563 DRM_ERROR("called without dma buffers!\n"); 564 return -EINVAL; 565 } 566 567 if (!n) 568 return 0; 569 570 switch (prim) { 571 case SAVAGE_PRIM_TRILIST_201: 572 reorder = 1; 573 prim = SAVAGE_PRIM_TRILIST; 574 /* fall through */ 575 case SAVAGE_PRIM_TRILIST: 576 if (n % 3 != 0) { 577 DRM_ERROR("wrong number of indices %u in TRILIST\n", n); 578 return -EINVAL; 579 } 580 break; 581 case SAVAGE_PRIM_TRISTRIP: 582 case SAVAGE_PRIM_TRIFAN: 583 if (n < 3) { 584 DRM_ERROR 585 ("wrong number of indices %u in TRIFAN/STRIP\n", n); 586 return -EINVAL; 587 } 588 break; 589 default: 590 DRM_ERROR("invalid primitive type %u\n", prim); 591 return -EINVAL; 592 } 593 594 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 595 if (skip != 0) { 596 DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip); 597 return -EINVAL; 598 } 599 } else { 600 unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) - 601 (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) - 602 (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1); 603 if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) { 604 DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip); 605 return -EINVAL; 606 } 607 if (reorder) { 608 DRM_ERROR("TRILIST_201 used on Savage4 hardware\n"); 609 return -EINVAL; 610 } 611 } 612 613 /* Vertex DMA doesn't work with command DMA at the same time, 614 * so we use BCI_... to submit commands here. Flush buffered 615 * faked DMA first. */ 616 DMA_FLUSH(); 617 618 if (dmabuf->bus_address != dev_priv->state.common.vbaddr) { 619 BEGIN_BCI(2); 620 BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1); 621 BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type); 622 dev_priv->state.common.vbaddr = dmabuf->bus_address; 623 } 624 if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) { 625 /* Workaround for what looks like a hardware bug. If a 626 * WAIT_3D_IDLE was emitted some time before the 627 * indexed drawing command then the engine will lock 628 * up. There are two known workarounds: 629 * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */ 630 BEGIN_BCI(63); 631 for (i = 0; i < 63; ++i) 632 BCI_WRITE(BCI_CMD_WAIT); 633 dev_priv->waiting = 0; 634 } 635 636 prim <<= 25; 637 while (n != 0) { 638 /* Can emit up to 255 indices (85 triangles) at once. */ 639 unsigned int count = n > 255 ? 255 : n; 640 641 /* check indices */ 642 for (i = 0; i < count; ++i) { 643 if (idx[i] > dmabuf->total / 32) { 644 DRM_ERROR("idx[%u]=%u out of range (0-%u)\n", 645 i, idx[i], dmabuf->total / 32); 646 return -EINVAL; 647 } 648 } 649 650 if (reorder) { 651 /* Need to reorder indices for correct flat 652 * shading while preserving the clock sense 653 * for correct culling. Only on Savage3D. */ 654 int reorder[3] = { 2, -1, -1 }; 655 656 BEGIN_BCI((count + 1 + 1) / 2); 657 BCI_DRAW_INDICES_S3D(count, prim, idx[2]); 658 659 for (i = 1; i + 1 < count; i += 2) 660 BCI_WRITE(idx[i + reorder[i % 3]] | 661 (idx[i + 1 + 662 reorder[(i + 1) % 3]] << 16)); 663 if (i < count) 664 BCI_WRITE(idx[i + reorder[i % 3]]); 665 } else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 666 BEGIN_BCI((count + 1 + 1) / 2); 667 BCI_DRAW_INDICES_S3D(count, prim, idx[0]); 668 669 for (i = 1; i + 1 < count; i += 2) 670 BCI_WRITE(idx[i] | (idx[i + 1] << 16)); 671 if (i < count) 672 BCI_WRITE(idx[i]); 673 } else { 674 BEGIN_BCI((count + 2 + 1) / 2); 675 BCI_DRAW_INDICES_S4(count, prim, skip); 676 677 for (i = 0; i + 1 < count; i += 2) 678 BCI_WRITE(idx[i] | (idx[i + 1] << 16)); 679 if (i < count) 680 BCI_WRITE(idx[i]); 681 } 682 683 idx += count; 684 n -= count; 685 686 prim |= BCI_CMD_DRAW_CONT; 687 } 688 689 return 0; 690 } 691 692 static int savage_dispatch_vb_idx(drm_savage_private_t * dev_priv, 693 const drm_savage_cmd_header_t * cmd_header, 694 const uint16_t *idx, 695 const uint32_t *vtxbuf, 696 unsigned int vb_size, unsigned int vb_stride) 697 { 698 unsigned char reorder = 0; 699 unsigned int prim = cmd_header->idx.prim; 700 unsigned int skip = cmd_header->idx.skip; 701 unsigned int n = cmd_header->idx.count; 702 unsigned int vtx_size; 703 unsigned int i; 704 DMA_LOCALS; 705 706 if (!n) 707 return 0; 708 709 switch (prim) { 710 case SAVAGE_PRIM_TRILIST_201: 711 reorder = 1; 712 prim = SAVAGE_PRIM_TRILIST; 713 /* fall through */ 714 case SAVAGE_PRIM_TRILIST: 715 if (n % 3 != 0) { 716 DRM_ERROR("wrong number of indices %u in TRILIST\n", n); 717 return -EINVAL; 718 } 719 break; 720 case SAVAGE_PRIM_TRISTRIP: 721 case SAVAGE_PRIM_TRIFAN: 722 if (n < 3) { 723 DRM_ERROR 724 ("wrong number of indices %u in TRIFAN/STRIP\n", n); 725 return -EINVAL; 726 } 727 break; 728 default: 729 DRM_ERROR("invalid primitive type %u\n", prim); 730 return -EINVAL; 731 } 732 733 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 734 if (skip > SAVAGE_SKIP_ALL_S3D) { 735 DRM_ERROR("invalid skip flags 0x%04x\n", skip); 736 return -EINVAL; 737 } 738 vtx_size = 8; /* full vertex */ 739 } else { 740 if (skip > SAVAGE_SKIP_ALL_S4) { 741 DRM_ERROR("invalid skip flags 0x%04x\n", skip); 742 return -EINVAL; 743 } 744 vtx_size = 10; /* full vertex */ 745 } 746 747 vtx_size -= (skip & 1) + (skip >> 1 & 1) + 748 (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) + 749 (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1); 750 751 if (vtx_size > vb_stride) { 752 DRM_ERROR("vertex size greater than vb stride (%u > %u)\n", 753 vtx_size, vb_stride); 754 return -EINVAL; 755 } 756 757 prim <<= 25; 758 while (n != 0) { 759 /* Can emit up to 255 vertices (85 triangles) at once. */ 760 unsigned int count = n > 255 ? 255 : n; 761 762 /* Check indices */ 763 for (i = 0; i < count; ++i) { 764 if (idx[i] > vb_size / (vb_stride * 4)) { 765 DRM_ERROR("idx[%u]=%u out of range (0-%u)\n", 766 i, idx[i], vb_size / (vb_stride * 4)); 767 return -EINVAL; 768 } 769 } 770 771 if (reorder) { 772 /* Need to reorder vertices for correct flat 773 * shading while preserving the clock sense 774 * for correct culling. Only on Savage3D. */ 775 int reorder[3] = { 2, -1, -1 }; 776 777 BEGIN_DMA(count * vtx_size + 1); 778 DMA_DRAW_PRIMITIVE(count, prim, skip); 779 780 for (i = 0; i < count; ++i) { 781 unsigned int j = idx[i + reorder[i % 3]]; 782 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size); 783 } 784 785 DMA_COMMIT(); 786 } else { 787 BEGIN_DMA(count * vtx_size + 1); 788 DMA_DRAW_PRIMITIVE(count, prim, skip); 789 790 for (i = 0; i < count; ++i) { 791 unsigned int j = idx[i]; 792 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size); 793 } 794 795 DMA_COMMIT(); 796 } 797 798 idx += count; 799 n -= count; 800 801 prim |= BCI_CMD_DRAW_CONT; 802 } 803 804 return 0; 805 } 806 807 static int savage_dispatch_clear(drm_savage_private_t * dev_priv, 808 const drm_savage_cmd_header_t * cmd_header, 809 const drm_savage_cmd_header_t *data, 810 unsigned int nbox, 811 const struct drm_clip_rect *boxes) 812 { 813 unsigned int flags = cmd_header->clear0.flags; 814 unsigned int clear_cmd; 815 unsigned int i, nbufs; 816 DMA_LOCALS; 817 818 if (nbox == 0) 819 return 0; 820 821 clear_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP | 822 BCI_CMD_SEND_COLOR | BCI_CMD_DEST_PBD_NEW; 823 BCI_CMD_SET_ROP(clear_cmd, 0xCC); 824 825 nbufs = ((flags & SAVAGE_FRONT) ? 1 : 0) + 826 ((flags & SAVAGE_BACK) ? 1 : 0) + ((flags & SAVAGE_DEPTH) ? 1 : 0); 827 if (nbufs == 0) 828 return 0; 829 830 if (data->clear1.mask != 0xffffffff) { 831 /* set mask */ 832 BEGIN_DMA(2); 833 DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1); 834 DMA_WRITE(data->clear1.mask); 835 DMA_COMMIT(); 836 } 837 for (i = 0; i < nbox; ++i) { 838 unsigned int x, y, w, h; 839 unsigned int buf; 840 x = boxes[i].x1, y = boxes[i].y1; 841 w = boxes[i].x2 - boxes[i].x1; 842 h = boxes[i].y2 - boxes[i].y1; 843 BEGIN_DMA(nbufs * 6); 844 for (buf = SAVAGE_FRONT; buf <= SAVAGE_DEPTH; buf <<= 1) { 845 if (!(flags & buf)) 846 continue; 847 DMA_WRITE(clear_cmd); 848 switch (buf) { 849 case SAVAGE_FRONT: 850 DMA_WRITE(dev_priv->front_offset); 851 DMA_WRITE(dev_priv->front_bd); 852 break; 853 case SAVAGE_BACK: 854 DMA_WRITE(dev_priv->back_offset); 855 DMA_WRITE(dev_priv->back_bd); 856 break; 857 case SAVAGE_DEPTH: 858 DMA_WRITE(dev_priv->depth_offset); 859 DMA_WRITE(dev_priv->depth_bd); 860 break; 861 } 862 DMA_WRITE(data->clear1.value); 863 DMA_WRITE(BCI_X_Y(x, y)); 864 DMA_WRITE(BCI_W_H(w, h)); 865 } 866 DMA_COMMIT(); 867 } 868 if (data->clear1.mask != 0xffffffff) { 869 /* reset mask */ 870 BEGIN_DMA(2); 871 DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1); 872 DMA_WRITE(0xffffffff); 873 DMA_COMMIT(); 874 } 875 876 return 0; 877 } 878 879 static int savage_dispatch_swap(drm_savage_private_t * dev_priv, 880 unsigned int nbox, const struct drm_clip_rect *boxes) 881 { 882 unsigned int swap_cmd; 883 unsigned int i; 884 DMA_LOCALS; 885 886 if (nbox == 0) 887 return 0; 888 889 swap_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP | 890 BCI_CMD_SRC_PBD_COLOR_NEW | BCI_CMD_DEST_GBD; 891 BCI_CMD_SET_ROP(swap_cmd, 0xCC); 892 893 for (i = 0; i < nbox; ++i) { 894 BEGIN_DMA(6); 895 DMA_WRITE(swap_cmd); 896 DMA_WRITE(dev_priv->back_offset); 897 DMA_WRITE(dev_priv->back_bd); 898 DMA_WRITE(BCI_X_Y(boxes[i].x1, boxes[i].y1)); 899 DMA_WRITE(BCI_X_Y(boxes[i].x1, boxes[i].y1)); 900 DMA_WRITE(BCI_W_H(boxes[i].x2 - boxes[i].x1, 901 boxes[i].y2 - boxes[i].y1)); 902 DMA_COMMIT(); 903 } 904 905 return 0; 906 } 907 908 static int savage_dispatch_draw(drm_savage_private_t * dev_priv, 909 const drm_savage_cmd_header_t *start, 910 const drm_savage_cmd_header_t *end, 911 const struct drm_buf * dmabuf, 912 const unsigned int *vtxbuf, 913 unsigned int vb_size, unsigned int vb_stride, 914 unsigned int nbox, 915 const struct drm_clip_rect *boxes) 916 { 917 unsigned int i, j; 918 int ret; 919 920 for (i = 0; i < nbox; ++i) { 921 const drm_savage_cmd_header_t *cmdbuf; 922 dev_priv->emit_clip_rect(dev_priv, &boxes[i]); 923 924 cmdbuf = start; 925 while (cmdbuf < end) { 926 drm_savage_cmd_header_t cmd_header; 927 cmd_header = *cmdbuf; 928 cmdbuf++; 929 switch (cmd_header.cmd.cmd) { 930 case SAVAGE_CMD_DMA_PRIM: 931 ret = savage_dispatch_dma_prim( 932 dev_priv, &cmd_header, dmabuf); 933 break; 934 case SAVAGE_CMD_VB_PRIM: 935 ret = savage_dispatch_vb_prim( 936 dev_priv, &cmd_header, 937 vtxbuf, vb_size, vb_stride); 938 break; 939 case SAVAGE_CMD_DMA_IDX: 940 j = (cmd_header.idx.count + 3) / 4; 941 /* j was check in savage_bci_cmdbuf */ 942 ret = savage_dispatch_dma_idx(dev_priv, 943 &cmd_header, (const uint16_t *)cmdbuf, 944 dmabuf); 945 cmdbuf += j; 946 break; 947 case SAVAGE_CMD_VB_IDX: 948 j = (cmd_header.idx.count + 3) / 4; 949 /* j was check in savage_bci_cmdbuf */ 950 ret = savage_dispatch_vb_idx(dev_priv, 951 &cmd_header, (const uint16_t *)cmdbuf, 952 (const uint32_t *)vtxbuf, vb_size, 953 vb_stride); 954 cmdbuf += j; 955 break; 956 default: 957 /* What's the best return code? EFAULT? */ 958 DRM_ERROR("IMPLEMENTATION ERROR: " 959 "non-drawing-command %d\n", 960 cmd_header.cmd.cmd); 961 return -EINVAL; 962 } 963 964 if (ret != 0) 965 return ret; 966 } 967 } 968 969 return 0; 970 } 971 972 int savage_bci_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv) 973 { 974 drm_savage_private_t *dev_priv = dev->dev_private; 975 struct drm_device_dma *dma = dev->dma; 976 struct drm_buf *dmabuf; 977 drm_savage_cmdbuf_t *cmdbuf = data; 978 drm_savage_cmd_header_t *kcmd_addr = NULL; 979 drm_savage_cmd_header_t *first_draw_cmd; 980 unsigned int *kvb_addr = NULL; 981 struct drm_clip_rect *kbox_addr = NULL; 982 unsigned int i, j; 983 int ret = 0; 984 985 DRM_DEBUG("\n"); 986 987 LOCK_TEST_WITH_RETURN(dev, file_priv); 988 989 if (dma && dma->buflist) { 990 if (cmdbuf->dma_idx >= dma->buf_count) { 991 DRM_ERROR 992 ("vertex buffer index %u out of range (0-%u)\n", 993 cmdbuf->dma_idx, dma->buf_count - 1); 994 return -EINVAL; 995 } 996 dmabuf = dma->buflist[cmdbuf->dma_idx]; 997 } else { 998 dmabuf = NULL; 999 } 1000 1001 /* Copy the user buffers into kernel temporary areas. This hasn't been 1002 * a performance loss compared to VERIFYAREA_READ/ 1003 * COPY_FROM_USER_UNCHECKED when done in other drivers, and is correct 1004 * for locking on FreeBSD. 1005 */ 1006 if (cmdbuf->size) { 1007 kcmd_addr = kmalloc_array(cmdbuf->size, 8, GFP_KERNEL); 1008 if (kcmd_addr == NULL) 1009 return -ENOMEM; 1010 1011 if (copy_from_user(kcmd_addr, cmdbuf->cmd_addr, 1012 cmdbuf->size * 8)) 1013 { 1014 kfree(kcmd_addr); 1015 return -EFAULT; 1016 } 1017 cmdbuf->cmd_addr = kcmd_addr; 1018 } 1019 if (cmdbuf->vb_size) { 1020 kvb_addr = memdup_user(cmdbuf->vb_addr, cmdbuf->vb_size); 1021 if (IS_ERR(kvb_addr)) { 1022 ret = PTR_ERR(kvb_addr); 1023 kvb_addr = NULL; 1024 goto done; 1025 } 1026 cmdbuf->vb_addr = kvb_addr; 1027 } 1028 if (cmdbuf->nbox) { 1029 kbox_addr = kmalloc_array(cmdbuf->nbox, sizeof(struct drm_clip_rect), 1030 GFP_KERNEL); 1031 if (kbox_addr == NULL) { 1032 ret = -ENOMEM; 1033 goto done; 1034 } 1035 1036 if (copy_from_user(kbox_addr, cmdbuf->box_addr, 1037 cmdbuf->nbox * sizeof(struct drm_clip_rect))) { 1038 ret = -EFAULT; 1039 goto done; 1040 } 1041 cmdbuf->box_addr = kbox_addr; 1042 } 1043 1044 /* Make sure writes to DMA buffers are finished before sending 1045 * DMA commands to the graphics hardware. */ 1046 mb(); 1047 1048 /* Coming from user space. Don't know if the Xserver has 1049 * emitted wait commands. Assuming the worst. */ 1050 dev_priv->waiting = 1; 1051 1052 i = 0; 1053 first_draw_cmd = NULL; 1054 while (i < cmdbuf->size) { 1055 drm_savage_cmd_header_t cmd_header; 1056 cmd_header = *(drm_savage_cmd_header_t *)cmdbuf->cmd_addr; 1057 cmdbuf->cmd_addr++; 1058 i++; 1059 1060 /* Group drawing commands with same state to minimize 1061 * iterations over clip rects. */ 1062 j = 0; 1063 switch (cmd_header.cmd.cmd) { 1064 case SAVAGE_CMD_DMA_IDX: 1065 case SAVAGE_CMD_VB_IDX: 1066 j = (cmd_header.idx.count + 3) / 4; 1067 if (i + j > cmdbuf->size) { 1068 DRM_ERROR("indexed drawing command extends " 1069 "beyond end of command buffer\n"); 1070 DMA_FLUSH(); 1071 ret = -EINVAL; 1072 goto done; 1073 } 1074 /* fall through */ 1075 case SAVAGE_CMD_DMA_PRIM: 1076 case SAVAGE_CMD_VB_PRIM: 1077 if (!first_draw_cmd) 1078 first_draw_cmd = cmdbuf->cmd_addr - 1; 1079 cmdbuf->cmd_addr += j; 1080 i += j; 1081 break; 1082 default: 1083 if (first_draw_cmd) { 1084 ret = savage_dispatch_draw( 1085 dev_priv, first_draw_cmd, 1086 cmdbuf->cmd_addr - 1, 1087 dmabuf, cmdbuf->vb_addr, cmdbuf->vb_size, 1088 cmdbuf->vb_stride, 1089 cmdbuf->nbox, cmdbuf->box_addr); 1090 if (ret != 0) 1091 goto done; 1092 first_draw_cmd = NULL; 1093 } 1094 } 1095 if (first_draw_cmd) 1096 continue; 1097 1098 switch (cmd_header.cmd.cmd) { 1099 case SAVAGE_CMD_STATE: 1100 j = (cmd_header.state.count + 1) / 2; 1101 if (i + j > cmdbuf->size) { 1102 DRM_ERROR("command SAVAGE_CMD_STATE extends " 1103 "beyond end of command buffer\n"); 1104 DMA_FLUSH(); 1105 ret = -EINVAL; 1106 goto done; 1107 } 1108 ret = savage_dispatch_state(dev_priv, &cmd_header, 1109 (const uint32_t *)cmdbuf->cmd_addr); 1110 cmdbuf->cmd_addr += j; 1111 i += j; 1112 break; 1113 case SAVAGE_CMD_CLEAR: 1114 if (i + 1 > cmdbuf->size) { 1115 DRM_ERROR("command SAVAGE_CMD_CLEAR extends " 1116 "beyond end of command buffer\n"); 1117 DMA_FLUSH(); 1118 ret = -EINVAL; 1119 goto done; 1120 } 1121 ret = savage_dispatch_clear(dev_priv, &cmd_header, 1122 cmdbuf->cmd_addr, 1123 cmdbuf->nbox, 1124 cmdbuf->box_addr); 1125 cmdbuf->cmd_addr++; 1126 i++; 1127 break; 1128 case SAVAGE_CMD_SWAP: 1129 ret = savage_dispatch_swap(dev_priv, cmdbuf->nbox, 1130 cmdbuf->box_addr); 1131 break; 1132 default: 1133 DRM_ERROR("invalid command 0x%x\n", 1134 cmd_header.cmd.cmd); 1135 DMA_FLUSH(); 1136 ret = -EINVAL; 1137 goto done; 1138 } 1139 1140 if (ret != 0) { 1141 DMA_FLUSH(); 1142 goto done; 1143 } 1144 } 1145 1146 if (first_draw_cmd) { 1147 ret = savage_dispatch_draw ( 1148 dev_priv, first_draw_cmd, cmdbuf->cmd_addr, dmabuf, 1149 cmdbuf->vb_addr, cmdbuf->vb_size, cmdbuf->vb_stride, 1150 cmdbuf->nbox, cmdbuf->box_addr); 1151 if (ret != 0) { 1152 DMA_FLUSH(); 1153 goto done; 1154 } 1155 } 1156 1157 DMA_FLUSH(); 1158 1159 if (dmabuf && cmdbuf->discard) { 1160 drm_savage_buf_priv_t *buf_priv = dmabuf->dev_private; 1161 uint16_t event; 1162 event = savage_bci_emit_event(dev_priv, SAVAGE_WAIT_3D); 1163 SET_AGE(&buf_priv->age, event, dev_priv->event_wrap); 1164 savage_freelist_put(dev, dmabuf); 1165 } 1166 1167 done: 1168 /* If we didn't need to allocate them, these'll be NULL */ 1169 kfree(kcmd_addr); 1170 kfree(kvb_addr); 1171 kfree(kbox_addr); 1172 1173 return ret; 1174 } 1175