1 /* $NetBSD: savage_state.c,v 1.2 2018/08/27 04:58:36 riastradh Exp $ */ 2 3 /* savage_state.c -- State and drawing support for Savage 4 * 5 * Copyright 2004 Felix Kuehling 6 * All Rights Reserved. 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the "Software"), 10 * to deal in the Software without restriction, including without limitation 11 * the rights to use, copy, modify, merge, publish, distribute, sub license, 12 * and/or sell copies of the Software, and to permit persons to whom the 13 * Software is furnished to do so, subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 22 * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 24 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 */ 27 #include <sys/cdefs.h> 28 __KERNEL_RCSID(0, "$NetBSD: savage_state.c,v 1.2 2018/08/27 04:58:36 riastradh Exp $"); 29 30 #include <drm/drmP.h> 31 #include <drm/savage_drm.h> 32 #include "savage_drv.h" 33 34 void savage_emit_clip_rect_s3d(drm_savage_private_t * dev_priv, 35 const struct drm_clip_rect * pbox) 36 { 37 uint32_t scstart = dev_priv->state.s3d.new_scstart; 38 uint32_t scend = dev_priv->state.s3d.new_scend; 39 scstart = (scstart & ~SAVAGE_SCISSOR_MASK_S3D) | 40 ((uint32_t) pbox->x1 & 0x000007ff) | 41 (((uint32_t) pbox->y1 << 16) & 0x07ff0000); 42 scend = (scend & ~SAVAGE_SCISSOR_MASK_S3D) | 43 (((uint32_t) pbox->x2 - 1) & 0x000007ff) | 44 ((((uint32_t) pbox->y2 - 1) << 16) & 0x07ff0000); 45 if (scstart != dev_priv->state.s3d.scstart || 46 scend != dev_priv->state.s3d.scend) { 47 DMA_LOCALS; 48 BEGIN_DMA(4); 49 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D); 50 DMA_SET_REGISTERS(SAVAGE_SCSTART_S3D, 2); 51 DMA_WRITE(scstart); 52 DMA_WRITE(scend); 53 dev_priv->state.s3d.scstart = scstart; 54 dev_priv->state.s3d.scend = scend; 55 dev_priv->waiting = 1; 56 DMA_COMMIT(); 57 } 58 } 59 60 void savage_emit_clip_rect_s4(drm_savage_private_t * dev_priv, 61 const struct drm_clip_rect * pbox) 62 { 63 uint32_t drawctrl0 = dev_priv->state.s4.new_drawctrl0; 64 uint32_t drawctrl1 = dev_priv->state.s4.new_drawctrl1; 65 drawctrl0 = (drawctrl0 & ~SAVAGE_SCISSOR_MASK_S4) | 66 ((uint32_t) pbox->x1 & 0x000007ff) | 67 (((uint32_t) pbox->y1 << 12) & 0x00fff000); 68 drawctrl1 = (drawctrl1 & ~SAVAGE_SCISSOR_MASK_S4) | 69 (((uint32_t) pbox->x2 - 1) & 0x000007ff) | 70 ((((uint32_t) pbox->y2 - 1) << 12) & 0x00fff000); 71 if (drawctrl0 != dev_priv->state.s4.drawctrl0 || 72 drawctrl1 != dev_priv->state.s4.drawctrl1) { 73 DMA_LOCALS; 74 BEGIN_DMA(4); 75 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D); 76 DMA_SET_REGISTERS(SAVAGE_DRAWCTRL0_S4, 2); 77 DMA_WRITE(drawctrl0); 78 DMA_WRITE(drawctrl1); 79 dev_priv->state.s4.drawctrl0 = drawctrl0; 80 dev_priv->state.s4.drawctrl1 = drawctrl1; 81 dev_priv->waiting = 1; 82 DMA_COMMIT(); 83 } 84 } 85 86 static int savage_verify_texaddr(drm_savage_private_t * dev_priv, int unit, 87 uint32_t addr) 88 { 89 if ((addr & 6) != 2) { /* reserved bits */ 90 DRM_ERROR("bad texAddr%d %08x (reserved bits)\n", unit, addr); 91 return -EINVAL; 92 } 93 if (!(addr & 1)) { /* local */ 94 addr &= ~7; 95 if (addr < dev_priv->texture_offset || 96 addr >= dev_priv->texture_offset + dev_priv->texture_size) { 97 DRM_ERROR 98 ("bad texAddr%d %08x (local addr out of range)\n", 99 unit, addr); 100 return -EINVAL; 101 } 102 } else { /* AGP */ 103 if (!dev_priv->agp_textures) { 104 DRM_ERROR("bad texAddr%d %08x (AGP not available)\n", 105 unit, addr); 106 return -EINVAL; 107 } 108 addr &= ~7; 109 if (addr < dev_priv->agp_textures->offset || 110 addr >= (dev_priv->agp_textures->offset + 111 dev_priv->agp_textures->size)) { 112 DRM_ERROR 113 ("bad texAddr%d %08x (AGP addr out of range)\n", 114 unit, addr); 115 return -EINVAL; 116 } 117 } 118 return 0; 119 } 120 121 #define SAVE_STATE(reg,where) \ 122 if(start <= reg && start+count > reg) \ 123 dev_priv->state.where = regs[reg - start] 124 #define SAVE_STATE_MASK(reg,where,mask) do { \ 125 if(start <= reg && start+count > reg) { \ 126 uint32_t tmp; \ 127 tmp = regs[reg - start]; \ 128 dev_priv->state.where = (tmp & (mask)) | \ 129 (dev_priv->state.where & ~(mask)); \ 130 } \ 131 } while (0) 132 133 static int savage_verify_state_s3d(drm_savage_private_t * dev_priv, 134 unsigned int start, unsigned int count, 135 const uint32_t *regs) 136 { 137 if (start < SAVAGE_TEXPALADDR_S3D || 138 start + count - 1 > SAVAGE_DESTTEXRWWATERMARK_S3D) { 139 DRM_ERROR("invalid register range (0x%04x-0x%04x)\n", 140 start, start + count - 1); 141 return -EINVAL; 142 } 143 144 SAVE_STATE_MASK(SAVAGE_SCSTART_S3D, s3d.new_scstart, 145 ~SAVAGE_SCISSOR_MASK_S3D); 146 SAVE_STATE_MASK(SAVAGE_SCEND_S3D, s3d.new_scend, 147 ~SAVAGE_SCISSOR_MASK_S3D); 148 149 /* if any texture regs were changed ... */ 150 if (start <= SAVAGE_TEXCTRL_S3D && 151 start + count > SAVAGE_TEXPALADDR_S3D) { 152 /* ... check texture state */ 153 SAVE_STATE(SAVAGE_TEXCTRL_S3D, s3d.texctrl); 154 SAVE_STATE(SAVAGE_TEXADDR_S3D, s3d.texaddr); 155 if (dev_priv->state.s3d.texctrl & SAVAGE_TEXCTRL_TEXEN_MASK) 156 return savage_verify_texaddr(dev_priv, 0, 157 dev_priv->state.s3d.texaddr); 158 } 159 160 return 0; 161 } 162 163 static int savage_verify_state_s4(drm_savage_private_t * dev_priv, 164 unsigned int start, unsigned int count, 165 const uint32_t *regs) 166 { 167 int ret = 0; 168 169 if (start < SAVAGE_DRAWLOCALCTRL_S4 || 170 start + count - 1 > SAVAGE_TEXBLENDCOLOR_S4) { 171 DRM_ERROR("invalid register range (0x%04x-0x%04x)\n", 172 start, start + count - 1); 173 return -EINVAL; 174 } 175 176 SAVE_STATE_MASK(SAVAGE_DRAWCTRL0_S4, s4.new_drawctrl0, 177 ~SAVAGE_SCISSOR_MASK_S4); 178 SAVE_STATE_MASK(SAVAGE_DRAWCTRL1_S4, s4.new_drawctrl1, 179 ~SAVAGE_SCISSOR_MASK_S4); 180 181 /* if any texture regs were changed ... */ 182 if (start <= SAVAGE_TEXDESCR_S4 && 183 start + count > SAVAGE_TEXPALADDR_S4) { 184 /* ... check texture state */ 185 SAVE_STATE(SAVAGE_TEXDESCR_S4, s4.texdescr); 186 SAVE_STATE(SAVAGE_TEXADDR0_S4, s4.texaddr0); 187 SAVE_STATE(SAVAGE_TEXADDR1_S4, s4.texaddr1); 188 if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX0EN_MASK) 189 ret |= savage_verify_texaddr(dev_priv, 0, 190 dev_priv->state.s4.texaddr0); 191 if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX1EN_MASK) 192 ret |= savage_verify_texaddr(dev_priv, 1, 193 dev_priv->state.s4.texaddr1); 194 } 195 196 return ret; 197 } 198 199 #undef SAVE_STATE 200 #undef SAVE_STATE_MASK 201 202 static int savage_dispatch_state(drm_savage_private_t * dev_priv, 203 const drm_savage_cmd_header_t * cmd_header, 204 const uint32_t *regs) 205 { 206 unsigned int count = cmd_header->state.count; 207 unsigned int start = cmd_header->state.start; 208 unsigned int count2 = 0; 209 unsigned int bci_size; 210 int ret; 211 DMA_LOCALS; 212 213 if (!count) 214 return 0; 215 216 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 217 ret = savage_verify_state_s3d(dev_priv, start, count, regs); 218 if (ret != 0) 219 return ret; 220 /* scissor regs are emitted in savage_dispatch_draw */ 221 if (start < SAVAGE_SCSTART_S3D) { 222 if (start + count > SAVAGE_SCEND_S3D + 1) 223 count2 = count - (SAVAGE_SCEND_S3D + 1 - start); 224 if (start + count > SAVAGE_SCSTART_S3D) 225 count = SAVAGE_SCSTART_S3D - start; 226 } else if (start <= SAVAGE_SCEND_S3D) { 227 if (start + count > SAVAGE_SCEND_S3D + 1) { 228 count -= SAVAGE_SCEND_S3D + 1 - start; 229 start = SAVAGE_SCEND_S3D + 1; 230 } else 231 return 0; 232 } 233 } else { 234 ret = savage_verify_state_s4(dev_priv, start, count, regs); 235 if (ret != 0) 236 return ret; 237 /* scissor regs are emitted in savage_dispatch_draw */ 238 if (start < SAVAGE_DRAWCTRL0_S4) { 239 if (start + count > SAVAGE_DRAWCTRL1_S4 + 1) 240 count2 = count - 241 (SAVAGE_DRAWCTRL1_S4 + 1 - start); 242 if (start + count > SAVAGE_DRAWCTRL0_S4) 243 count = SAVAGE_DRAWCTRL0_S4 - start; 244 } else if (start <= SAVAGE_DRAWCTRL1_S4) { 245 if (start + count > SAVAGE_DRAWCTRL1_S4 + 1) { 246 count -= SAVAGE_DRAWCTRL1_S4 + 1 - start; 247 start = SAVAGE_DRAWCTRL1_S4 + 1; 248 } else 249 return 0; 250 } 251 } 252 253 bci_size = count + (count + 254) / 255 + count2 + (count2 + 254) / 255; 254 255 if (cmd_header->state.global) { 256 BEGIN_DMA(bci_size + 1); 257 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D); 258 dev_priv->waiting = 1; 259 } else { 260 BEGIN_DMA(bci_size); 261 } 262 263 do { 264 while (count > 0) { 265 unsigned int n = count < 255 ? count : 255; 266 DMA_SET_REGISTERS(start, n); 267 DMA_COPY(regs, n); 268 count -= n; 269 start += n; 270 regs += n; 271 } 272 start += 2; 273 regs += 2; 274 count = count2; 275 count2 = 0; 276 } while (count); 277 278 DMA_COMMIT(); 279 280 return 0; 281 } 282 283 static int savage_dispatch_dma_prim(drm_savage_private_t * dev_priv, 284 const drm_savage_cmd_header_t * cmd_header, 285 const struct drm_buf * dmabuf) 286 { 287 unsigned char reorder = 0; 288 unsigned int prim = cmd_header->prim.prim; 289 unsigned int skip = cmd_header->prim.skip; 290 unsigned int n = cmd_header->prim.count; 291 unsigned int start = cmd_header->prim.start; 292 unsigned int i; 293 BCI_LOCALS; 294 295 if (!dmabuf) { 296 DRM_ERROR("called without dma buffers!\n"); 297 return -EINVAL; 298 } 299 300 if (!n) 301 return 0; 302 303 switch (prim) { 304 case SAVAGE_PRIM_TRILIST_201: 305 reorder = 1; 306 prim = SAVAGE_PRIM_TRILIST; 307 case SAVAGE_PRIM_TRILIST: 308 if (n % 3 != 0) { 309 DRM_ERROR("wrong number of vertices %u in TRILIST\n", 310 n); 311 return -EINVAL; 312 } 313 break; 314 case SAVAGE_PRIM_TRISTRIP: 315 case SAVAGE_PRIM_TRIFAN: 316 if (n < 3) { 317 DRM_ERROR 318 ("wrong number of vertices %u in TRIFAN/STRIP\n", 319 n); 320 return -EINVAL; 321 } 322 break; 323 default: 324 DRM_ERROR("invalid primitive type %u\n", prim); 325 return -EINVAL; 326 } 327 328 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 329 if (skip != 0) { 330 DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip); 331 return -EINVAL; 332 } 333 } else { 334 unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) - 335 (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) - 336 (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1); 337 if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) { 338 DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip); 339 return -EINVAL; 340 } 341 if (reorder) { 342 DRM_ERROR("TRILIST_201 used on Savage4 hardware\n"); 343 return -EINVAL; 344 } 345 } 346 347 if (start + n > dmabuf->total / 32) { 348 DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n", 349 start, start + n - 1, dmabuf->total / 32); 350 return -EINVAL; 351 } 352 353 /* Vertex DMA doesn't work with command DMA at the same time, 354 * so we use BCI_... to submit commands here. Flush buffered 355 * faked DMA first. */ 356 DMA_FLUSH(); 357 358 if (dmabuf->bus_address != dev_priv->state.common.vbaddr) { 359 BEGIN_BCI(2); 360 BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1); 361 BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type); 362 dev_priv->state.common.vbaddr = dmabuf->bus_address; 363 } 364 if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) { 365 /* Workaround for what looks like a hardware bug. If a 366 * WAIT_3D_IDLE was emitted some time before the 367 * indexed drawing command then the engine will lock 368 * up. There are two known workarounds: 369 * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */ 370 BEGIN_BCI(63); 371 for (i = 0; i < 63; ++i) 372 BCI_WRITE(BCI_CMD_WAIT); 373 dev_priv->waiting = 0; 374 } 375 376 prim <<= 25; 377 while (n != 0) { 378 /* Can emit up to 255 indices (85 triangles) at once. */ 379 unsigned int count = n > 255 ? 255 : n; 380 if (reorder) { 381 /* Need to reorder indices for correct flat 382 * shading while preserving the clock sense 383 * for correct culling. Only on Savage3D. */ 384 int reorder[3] = { -1, -1, -1 }; 385 reorder[start % 3] = 2; 386 387 BEGIN_BCI((count + 1 + 1) / 2); 388 BCI_DRAW_INDICES_S3D(count, prim, start + 2); 389 390 for (i = start + 1; i + 1 < start + count; i += 2) 391 BCI_WRITE((i + reorder[i % 3]) | 392 ((i + 1 + 393 reorder[(i + 1) % 3]) << 16)); 394 if (i < start + count) 395 BCI_WRITE(i + reorder[i % 3]); 396 } else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 397 BEGIN_BCI((count + 1 + 1) / 2); 398 BCI_DRAW_INDICES_S3D(count, prim, start); 399 400 for (i = start + 1; i + 1 < start + count; i += 2) 401 BCI_WRITE(i | ((i + 1) << 16)); 402 if (i < start + count) 403 BCI_WRITE(i); 404 } else { 405 BEGIN_BCI((count + 2 + 1) / 2); 406 BCI_DRAW_INDICES_S4(count, prim, skip); 407 408 for (i = start; i + 1 < start + count; i += 2) 409 BCI_WRITE(i | ((i + 1) << 16)); 410 if (i < start + count) 411 BCI_WRITE(i); 412 } 413 414 start += count; 415 n -= count; 416 417 prim |= BCI_CMD_DRAW_CONT; 418 } 419 420 return 0; 421 } 422 423 static int savage_dispatch_vb_prim(drm_savage_private_t * dev_priv, 424 const drm_savage_cmd_header_t * cmd_header, 425 const uint32_t *vtxbuf, unsigned int vb_size, 426 unsigned int vb_stride) 427 { 428 unsigned char reorder = 0; 429 unsigned int prim = cmd_header->prim.prim; 430 unsigned int skip = cmd_header->prim.skip; 431 unsigned int n = cmd_header->prim.count; 432 unsigned int start = cmd_header->prim.start; 433 unsigned int vtx_size; 434 unsigned int i; 435 DMA_LOCALS; 436 437 if (!n) 438 return 0; 439 440 switch (prim) { 441 case SAVAGE_PRIM_TRILIST_201: 442 reorder = 1; 443 prim = SAVAGE_PRIM_TRILIST; 444 case SAVAGE_PRIM_TRILIST: 445 if (n % 3 != 0) { 446 DRM_ERROR("wrong number of vertices %u in TRILIST\n", 447 n); 448 return -EINVAL; 449 } 450 break; 451 case SAVAGE_PRIM_TRISTRIP: 452 case SAVAGE_PRIM_TRIFAN: 453 if (n < 3) { 454 DRM_ERROR 455 ("wrong number of vertices %u in TRIFAN/STRIP\n", 456 n); 457 return -EINVAL; 458 } 459 break; 460 default: 461 DRM_ERROR("invalid primitive type %u\n", prim); 462 return -EINVAL; 463 } 464 465 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 466 if (skip > SAVAGE_SKIP_ALL_S3D) { 467 DRM_ERROR("invalid skip flags 0x%04x\n", skip); 468 return -EINVAL; 469 } 470 vtx_size = 8; /* full vertex */ 471 } else { 472 if (skip > SAVAGE_SKIP_ALL_S4) { 473 DRM_ERROR("invalid skip flags 0x%04x\n", skip); 474 return -EINVAL; 475 } 476 vtx_size = 10; /* full vertex */ 477 } 478 479 vtx_size -= (skip & 1) + (skip >> 1 & 1) + 480 (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) + 481 (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1); 482 483 if (vtx_size > vb_stride) { 484 DRM_ERROR("vertex size greater than vb stride (%u > %u)\n", 485 vtx_size, vb_stride); 486 return -EINVAL; 487 } 488 489 if (start + n > vb_size / (vb_stride * 4)) { 490 DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n", 491 start, start + n - 1, vb_size / (vb_stride * 4)); 492 return -EINVAL; 493 } 494 495 prim <<= 25; 496 while (n != 0) { 497 /* Can emit up to 255 vertices (85 triangles) at once. */ 498 unsigned int count = n > 255 ? 255 : n; 499 if (reorder) { 500 /* Need to reorder vertices for correct flat 501 * shading while preserving the clock sense 502 * for correct culling. Only on Savage3D. */ 503 int reorder[3] = { -1, -1, -1 }; 504 reorder[start % 3] = 2; 505 506 BEGIN_DMA(count * vtx_size + 1); 507 DMA_DRAW_PRIMITIVE(count, prim, skip); 508 509 for (i = start; i < start + count; ++i) { 510 unsigned int j = i + reorder[i % 3]; 511 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size); 512 } 513 514 DMA_COMMIT(); 515 } else { 516 BEGIN_DMA(count * vtx_size + 1); 517 DMA_DRAW_PRIMITIVE(count, prim, skip); 518 519 if (vb_stride == vtx_size) { 520 DMA_COPY(&vtxbuf[vb_stride * start], 521 vtx_size * count); 522 } else { 523 for (i = start; i < start + count; ++i) { 524 DMA_COPY(&vtxbuf [vb_stride * i], 525 vtx_size); 526 } 527 } 528 529 DMA_COMMIT(); 530 } 531 532 start += count; 533 n -= count; 534 535 prim |= BCI_CMD_DRAW_CONT; 536 } 537 538 return 0; 539 } 540 541 static int savage_dispatch_dma_idx(drm_savage_private_t * dev_priv, 542 const drm_savage_cmd_header_t * cmd_header, 543 const uint16_t *idx, 544 const struct drm_buf * dmabuf) 545 { 546 unsigned char reorder = 0; 547 unsigned int prim = cmd_header->idx.prim; 548 unsigned int skip = cmd_header->idx.skip; 549 unsigned int n = cmd_header->idx.count; 550 unsigned int i; 551 BCI_LOCALS; 552 553 if (!dmabuf) { 554 DRM_ERROR("called without dma buffers!\n"); 555 return -EINVAL; 556 } 557 558 if (!n) 559 return 0; 560 561 switch (prim) { 562 case SAVAGE_PRIM_TRILIST_201: 563 reorder = 1; 564 prim = SAVAGE_PRIM_TRILIST; 565 case SAVAGE_PRIM_TRILIST: 566 if (n % 3 != 0) { 567 DRM_ERROR("wrong number of indices %u in TRILIST\n", n); 568 return -EINVAL; 569 } 570 break; 571 case SAVAGE_PRIM_TRISTRIP: 572 case SAVAGE_PRIM_TRIFAN: 573 if (n < 3) { 574 DRM_ERROR 575 ("wrong number of indices %u in TRIFAN/STRIP\n", n); 576 return -EINVAL; 577 } 578 break; 579 default: 580 DRM_ERROR("invalid primitive type %u\n", prim); 581 return -EINVAL; 582 } 583 584 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 585 if (skip != 0) { 586 DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip); 587 return -EINVAL; 588 } 589 } else { 590 unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) - 591 (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) - 592 (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1); 593 if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) { 594 DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip); 595 return -EINVAL; 596 } 597 if (reorder) { 598 DRM_ERROR("TRILIST_201 used on Savage4 hardware\n"); 599 return -EINVAL; 600 } 601 } 602 603 /* Vertex DMA doesn't work with command DMA at the same time, 604 * so we use BCI_... to submit commands here. Flush buffered 605 * faked DMA first. */ 606 DMA_FLUSH(); 607 608 if (dmabuf->bus_address != dev_priv->state.common.vbaddr) { 609 BEGIN_BCI(2); 610 BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1); 611 BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type); 612 dev_priv->state.common.vbaddr = dmabuf->bus_address; 613 } 614 if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) { 615 /* Workaround for what looks like a hardware bug. If a 616 * WAIT_3D_IDLE was emitted some time before the 617 * indexed drawing command then the engine will lock 618 * up. There are two known workarounds: 619 * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */ 620 BEGIN_BCI(63); 621 for (i = 0; i < 63; ++i) 622 BCI_WRITE(BCI_CMD_WAIT); 623 dev_priv->waiting = 0; 624 } 625 626 prim <<= 25; 627 while (n != 0) { 628 /* Can emit up to 255 indices (85 triangles) at once. */ 629 unsigned int count = n > 255 ? 255 : n; 630 631 /* check indices */ 632 for (i = 0; i < count; ++i) { 633 if (idx[i] > dmabuf->total / 32) { 634 DRM_ERROR("idx[%u]=%u out of range (0-%u)\n", 635 i, idx[i], dmabuf->total / 32); 636 return -EINVAL; 637 } 638 } 639 640 if (reorder) { 641 /* Need to reorder indices for correct flat 642 * shading while preserving the clock sense 643 * for correct culling. Only on Savage3D. */ 644 int reorder[3] = { 2, -1, -1 }; 645 646 BEGIN_BCI((count + 1 + 1) / 2); 647 BCI_DRAW_INDICES_S3D(count, prim, idx[2]); 648 649 for (i = 1; i + 1 < count; i += 2) 650 BCI_WRITE(idx[i + reorder[i % 3]] | 651 (idx[i + 1 + 652 reorder[(i + 1) % 3]] << 16)); 653 if (i < count) 654 BCI_WRITE(idx[i + reorder[i % 3]]); 655 } else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 656 BEGIN_BCI((count + 1 + 1) / 2); 657 BCI_DRAW_INDICES_S3D(count, prim, idx[0]); 658 659 for (i = 1; i + 1 < count; i += 2) 660 BCI_WRITE(idx[i] | (idx[i + 1] << 16)); 661 if (i < count) 662 BCI_WRITE(idx[i]); 663 } else { 664 BEGIN_BCI((count + 2 + 1) / 2); 665 BCI_DRAW_INDICES_S4(count, prim, skip); 666 667 for (i = 0; i + 1 < count; i += 2) 668 BCI_WRITE(idx[i] | (idx[i + 1] << 16)); 669 if (i < count) 670 BCI_WRITE(idx[i]); 671 } 672 673 idx += count; 674 n -= count; 675 676 prim |= BCI_CMD_DRAW_CONT; 677 } 678 679 return 0; 680 } 681 682 static int savage_dispatch_vb_idx(drm_savage_private_t * dev_priv, 683 const drm_savage_cmd_header_t * cmd_header, 684 const uint16_t *idx, 685 const uint32_t *vtxbuf, 686 unsigned int vb_size, unsigned int vb_stride) 687 { 688 unsigned char reorder = 0; 689 unsigned int prim = cmd_header->idx.prim; 690 unsigned int skip = cmd_header->idx.skip; 691 unsigned int n = cmd_header->idx.count; 692 unsigned int vtx_size; 693 unsigned int i; 694 DMA_LOCALS; 695 696 if (!n) 697 return 0; 698 699 switch (prim) { 700 case SAVAGE_PRIM_TRILIST_201: 701 reorder = 1; 702 prim = SAVAGE_PRIM_TRILIST; 703 case SAVAGE_PRIM_TRILIST: 704 if (n % 3 != 0) { 705 DRM_ERROR("wrong number of indices %u in TRILIST\n", n); 706 return -EINVAL; 707 } 708 break; 709 case SAVAGE_PRIM_TRISTRIP: 710 case SAVAGE_PRIM_TRIFAN: 711 if (n < 3) { 712 DRM_ERROR 713 ("wrong number of indices %u in TRIFAN/STRIP\n", n); 714 return -EINVAL; 715 } 716 break; 717 default: 718 DRM_ERROR("invalid primitive type %u\n", prim); 719 return -EINVAL; 720 } 721 722 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 723 if (skip > SAVAGE_SKIP_ALL_S3D) { 724 DRM_ERROR("invalid skip flags 0x%04x\n", skip); 725 return -EINVAL; 726 } 727 vtx_size = 8; /* full vertex */ 728 } else { 729 if (skip > SAVAGE_SKIP_ALL_S4) { 730 DRM_ERROR("invalid skip flags 0x%04x\n", skip); 731 return -EINVAL; 732 } 733 vtx_size = 10; /* full vertex */ 734 } 735 736 vtx_size -= (skip & 1) + (skip >> 1 & 1) + 737 (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) + 738 (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1); 739 740 if (vtx_size > vb_stride) { 741 DRM_ERROR("vertex size greater than vb stride (%u > %u)\n", 742 vtx_size, vb_stride); 743 return -EINVAL; 744 } 745 746 prim <<= 25; 747 while (n != 0) { 748 /* Can emit up to 255 vertices (85 triangles) at once. */ 749 unsigned int count = n > 255 ? 255 : n; 750 751 /* Check indices */ 752 for (i = 0; i < count; ++i) { 753 if (idx[i] > vb_size / (vb_stride * 4)) { 754 DRM_ERROR("idx[%u]=%u out of range (0-%u)\n", 755 i, idx[i], vb_size / (vb_stride * 4)); 756 return -EINVAL; 757 } 758 } 759 760 if (reorder) { 761 /* Need to reorder vertices for correct flat 762 * shading while preserving the clock sense 763 * for correct culling. Only on Savage3D. */ 764 int reorder[3] = { 2, -1, -1 }; 765 766 BEGIN_DMA(count * vtx_size + 1); 767 DMA_DRAW_PRIMITIVE(count, prim, skip); 768 769 for (i = 0; i < count; ++i) { 770 unsigned int j = idx[i + reorder[i % 3]]; 771 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size); 772 } 773 774 DMA_COMMIT(); 775 } else { 776 BEGIN_DMA(count * vtx_size + 1); 777 DMA_DRAW_PRIMITIVE(count, prim, skip); 778 779 for (i = 0; i < count; ++i) { 780 unsigned int j = idx[i]; 781 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size); 782 } 783 784 DMA_COMMIT(); 785 } 786 787 idx += count; 788 n -= count; 789 790 prim |= BCI_CMD_DRAW_CONT; 791 } 792 793 return 0; 794 } 795 796 static int savage_dispatch_clear(drm_savage_private_t * dev_priv, 797 const drm_savage_cmd_header_t * cmd_header, 798 const drm_savage_cmd_header_t *data, 799 unsigned int nbox, 800 const struct drm_clip_rect *boxes) 801 { 802 unsigned int flags = cmd_header->clear0.flags; 803 unsigned int clear_cmd; 804 unsigned int i, nbufs; 805 DMA_LOCALS; 806 807 if (nbox == 0) 808 return 0; 809 810 clear_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP | 811 BCI_CMD_SEND_COLOR | BCI_CMD_DEST_PBD_NEW; 812 BCI_CMD_SET_ROP(clear_cmd, 0xCC); 813 814 nbufs = ((flags & SAVAGE_FRONT) ? 1 : 0) + 815 ((flags & SAVAGE_BACK) ? 1 : 0) + ((flags & SAVAGE_DEPTH) ? 1 : 0); 816 if (nbufs == 0) 817 return 0; 818 819 if (data->clear1.mask != 0xffffffff) { 820 /* set mask */ 821 BEGIN_DMA(2); 822 DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1); 823 DMA_WRITE(data->clear1.mask); 824 DMA_COMMIT(); 825 } 826 for (i = 0; i < nbox; ++i) { 827 unsigned int x, y, w, h; 828 unsigned int buf; 829 x = boxes[i].x1, y = boxes[i].y1; 830 w = boxes[i].x2 - boxes[i].x1; 831 h = boxes[i].y2 - boxes[i].y1; 832 BEGIN_DMA(nbufs * 6); 833 for (buf = SAVAGE_FRONT; buf <= SAVAGE_DEPTH; buf <<= 1) { 834 if (!(flags & buf)) 835 continue; 836 DMA_WRITE(clear_cmd); 837 switch (buf) { 838 case SAVAGE_FRONT: 839 DMA_WRITE(dev_priv->front_offset); 840 DMA_WRITE(dev_priv->front_bd); 841 break; 842 case SAVAGE_BACK: 843 DMA_WRITE(dev_priv->back_offset); 844 DMA_WRITE(dev_priv->back_bd); 845 break; 846 case SAVAGE_DEPTH: 847 DMA_WRITE(dev_priv->depth_offset); 848 DMA_WRITE(dev_priv->depth_bd); 849 break; 850 } 851 DMA_WRITE(data->clear1.value); 852 DMA_WRITE(BCI_X_Y(x, y)); 853 DMA_WRITE(BCI_W_H(w, h)); 854 } 855 DMA_COMMIT(); 856 } 857 if (data->clear1.mask != 0xffffffff) { 858 /* reset mask */ 859 BEGIN_DMA(2); 860 DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1); 861 DMA_WRITE(0xffffffff); 862 DMA_COMMIT(); 863 } 864 865 return 0; 866 } 867 868 static int savage_dispatch_swap(drm_savage_private_t * dev_priv, 869 unsigned int nbox, const struct drm_clip_rect *boxes) 870 { 871 unsigned int swap_cmd; 872 unsigned int i; 873 DMA_LOCALS; 874 875 if (nbox == 0) 876 return 0; 877 878 swap_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP | 879 BCI_CMD_SRC_PBD_COLOR_NEW | BCI_CMD_DEST_GBD; 880 BCI_CMD_SET_ROP(swap_cmd, 0xCC); 881 882 for (i = 0; i < nbox; ++i) { 883 BEGIN_DMA(6); 884 DMA_WRITE(swap_cmd); 885 DMA_WRITE(dev_priv->back_offset); 886 DMA_WRITE(dev_priv->back_bd); 887 DMA_WRITE(BCI_X_Y(boxes[i].x1, boxes[i].y1)); 888 DMA_WRITE(BCI_X_Y(boxes[i].x1, boxes[i].y1)); 889 DMA_WRITE(BCI_W_H(boxes[i].x2 - boxes[i].x1, 890 boxes[i].y2 - boxes[i].y1)); 891 DMA_COMMIT(); 892 } 893 894 return 0; 895 } 896 897 static int savage_dispatch_draw(drm_savage_private_t * dev_priv, 898 const drm_savage_cmd_header_t *start, 899 const drm_savage_cmd_header_t *end, 900 const struct drm_buf * dmabuf, 901 const unsigned int *vtxbuf, 902 unsigned int vb_size, unsigned int vb_stride, 903 unsigned int nbox, 904 const struct drm_clip_rect *boxes) 905 { 906 unsigned int i, j; 907 int ret; 908 909 for (i = 0; i < nbox; ++i) { 910 const drm_savage_cmd_header_t *cmdbuf; 911 dev_priv->emit_clip_rect(dev_priv, &boxes[i]); 912 913 cmdbuf = start; 914 while (cmdbuf < end) { 915 drm_savage_cmd_header_t cmd_header; 916 cmd_header = *cmdbuf; 917 cmdbuf++; 918 switch (cmd_header.cmd.cmd) { 919 case SAVAGE_CMD_DMA_PRIM: 920 ret = savage_dispatch_dma_prim( 921 dev_priv, &cmd_header, dmabuf); 922 break; 923 case SAVAGE_CMD_VB_PRIM: 924 ret = savage_dispatch_vb_prim( 925 dev_priv, &cmd_header, 926 vtxbuf, vb_size, vb_stride); 927 break; 928 case SAVAGE_CMD_DMA_IDX: 929 j = (cmd_header.idx.count + 3) / 4; 930 /* j was check in savage_bci_cmdbuf */ 931 ret = savage_dispatch_dma_idx(dev_priv, 932 &cmd_header, (const uint16_t *)cmdbuf, 933 dmabuf); 934 cmdbuf += j; 935 break; 936 case SAVAGE_CMD_VB_IDX: 937 j = (cmd_header.idx.count + 3) / 4; 938 /* j was check in savage_bci_cmdbuf */ 939 ret = savage_dispatch_vb_idx(dev_priv, 940 &cmd_header, (const uint16_t *)cmdbuf, 941 (const uint32_t *)vtxbuf, vb_size, 942 vb_stride); 943 cmdbuf += j; 944 break; 945 default: 946 /* What's the best return code? EFAULT? */ 947 DRM_ERROR("IMPLEMENTATION ERROR: " 948 "non-drawing-command %d\n", 949 cmd_header.cmd.cmd); 950 return -EINVAL; 951 } 952 953 if (ret != 0) 954 return ret; 955 } 956 } 957 958 return 0; 959 } 960 961 int savage_bci_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv) 962 { 963 drm_savage_private_t *dev_priv = dev->dev_private; 964 struct drm_device_dma *dma = dev->dma; 965 struct drm_buf *dmabuf; 966 drm_savage_cmdbuf_t *cmdbuf = data; 967 drm_savage_cmd_header_t *kcmd_addr = NULL; 968 drm_savage_cmd_header_t *first_draw_cmd; 969 unsigned int *kvb_addr = NULL; 970 struct drm_clip_rect *kbox_addr = NULL; 971 unsigned int i, j; 972 int ret = 0; 973 974 DRM_DEBUG("\n"); 975 976 LOCK_TEST_WITH_RETURN(dev, file_priv); 977 978 if (dma && dma->buflist) { 979 if (cmdbuf->dma_idx > dma->buf_count) { 980 DRM_ERROR 981 ("vertex buffer index %u out of range (0-%u)\n", 982 cmdbuf->dma_idx, dma->buf_count - 1); 983 return -EINVAL; 984 } 985 dmabuf = dma->buflist[cmdbuf->dma_idx]; 986 } else { 987 dmabuf = NULL; 988 } 989 990 /* Copy the user buffers into kernel temporary areas. This hasn't been 991 * a performance loss compared to VERIFYAREA_READ/ 992 * COPY_FROM_USER_UNCHECKED when done in other drivers, and is correct 993 * for locking on FreeBSD. 994 */ 995 if (cmdbuf->size) { 996 kcmd_addr = kmalloc_array(cmdbuf->size, 8, GFP_KERNEL); 997 if (kcmd_addr == NULL) 998 return -ENOMEM; 999 1000 if (copy_from_user(kcmd_addr, cmdbuf->cmd_addr, 1001 cmdbuf->size * 8)) 1002 { 1003 kfree(kcmd_addr); 1004 return -EFAULT; 1005 } 1006 cmdbuf->cmd_addr = kcmd_addr; 1007 } 1008 if (cmdbuf->vb_size) { 1009 kvb_addr = kmalloc(cmdbuf->vb_size, GFP_KERNEL); 1010 if (kvb_addr == NULL) { 1011 ret = -ENOMEM; 1012 goto done; 1013 } 1014 1015 if (copy_from_user(kvb_addr, cmdbuf->vb_addr, 1016 cmdbuf->vb_size)) { 1017 ret = -EFAULT; 1018 goto done; 1019 } 1020 cmdbuf->vb_addr = kvb_addr; 1021 } 1022 if (cmdbuf->nbox) { 1023 kbox_addr = kmalloc_array(cmdbuf->nbox, sizeof(struct drm_clip_rect), 1024 GFP_KERNEL); 1025 if (kbox_addr == NULL) { 1026 ret = -ENOMEM; 1027 goto done; 1028 } 1029 1030 if (copy_from_user(kbox_addr, cmdbuf->box_addr, 1031 cmdbuf->nbox * sizeof(struct drm_clip_rect))) { 1032 ret = -EFAULT; 1033 goto done; 1034 } 1035 cmdbuf->box_addr = kbox_addr; 1036 } 1037 1038 /* Make sure writes to DMA buffers are finished before sending 1039 * DMA commands to the graphics hardware. */ 1040 mb(); 1041 1042 /* Coming from user space. Don't know if the Xserver has 1043 * emitted wait commands. Assuming the worst. */ 1044 dev_priv->waiting = 1; 1045 1046 i = 0; 1047 first_draw_cmd = NULL; 1048 while (i < cmdbuf->size) { 1049 drm_savage_cmd_header_t cmd_header; 1050 cmd_header = *(drm_savage_cmd_header_t *)cmdbuf->cmd_addr; 1051 cmdbuf->cmd_addr++; 1052 i++; 1053 1054 /* Group drawing commands with same state to minimize 1055 * iterations over clip rects. */ 1056 j = 0; 1057 switch (cmd_header.cmd.cmd) { 1058 case SAVAGE_CMD_DMA_IDX: 1059 case SAVAGE_CMD_VB_IDX: 1060 j = (cmd_header.idx.count + 3) / 4; 1061 if (i + j > cmdbuf->size) { 1062 DRM_ERROR("indexed drawing command extends " 1063 "beyond end of command buffer\n"); 1064 DMA_FLUSH(); 1065 ret = -EINVAL; 1066 goto done; 1067 } 1068 /* fall through */ 1069 case SAVAGE_CMD_DMA_PRIM: 1070 case SAVAGE_CMD_VB_PRIM: 1071 if (!first_draw_cmd) 1072 first_draw_cmd = cmdbuf->cmd_addr - 1; 1073 cmdbuf->cmd_addr += j; 1074 i += j; 1075 break; 1076 default: 1077 if (first_draw_cmd) { 1078 ret = savage_dispatch_draw( 1079 dev_priv, first_draw_cmd, 1080 cmdbuf->cmd_addr - 1, 1081 dmabuf, cmdbuf->vb_addr, cmdbuf->vb_size, 1082 cmdbuf->vb_stride, 1083 cmdbuf->nbox, cmdbuf->box_addr); 1084 if (ret != 0) 1085 goto done; 1086 first_draw_cmd = NULL; 1087 } 1088 } 1089 if (first_draw_cmd) 1090 continue; 1091 1092 switch (cmd_header.cmd.cmd) { 1093 case SAVAGE_CMD_STATE: 1094 j = (cmd_header.state.count + 1) / 2; 1095 if (i + j > cmdbuf->size) { 1096 DRM_ERROR("command SAVAGE_CMD_STATE extends " 1097 "beyond end of command buffer\n"); 1098 DMA_FLUSH(); 1099 ret = -EINVAL; 1100 goto done; 1101 } 1102 ret = savage_dispatch_state(dev_priv, &cmd_header, 1103 (const uint32_t *)cmdbuf->cmd_addr); 1104 cmdbuf->cmd_addr += j; 1105 i += j; 1106 break; 1107 case SAVAGE_CMD_CLEAR: 1108 if (i + 1 > cmdbuf->size) { 1109 DRM_ERROR("command SAVAGE_CMD_CLEAR extends " 1110 "beyond end of command buffer\n"); 1111 DMA_FLUSH(); 1112 ret = -EINVAL; 1113 goto done; 1114 } 1115 ret = savage_dispatch_clear(dev_priv, &cmd_header, 1116 cmdbuf->cmd_addr, 1117 cmdbuf->nbox, 1118 cmdbuf->box_addr); 1119 cmdbuf->cmd_addr++; 1120 i++; 1121 break; 1122 case SAVAGE_CMD_SWAP: 1123 ret = savage_dispatch_swap(dev_priv, cmdbuf->nbox, 1124 cmdbuf->box_addr); 1125 break; 1126 default: 1127 DRM_ERROR("invalid command 0x%x\n", 1128 cmd_header.cmd.cmd); 1129 DMA_FLUSH(); 1130 ret = -EINVAL; 1131 goto done; 1132 } 1133 1134 if (ret != 0) { 1135 DMA_FLUSH(); 1136 goto done; 1137 } 1138 } 1139 1140 if (first_draw_cmd) { 1141 ret = savage_dispatch_draw ( 1142 dev_priv, first_draw_cmd, cmdbuf->cmd_addr, dmabuf, 1143 cmdbuf->vb_addr, cmdbuf->vb_size, cmdbuf->vb_stride, 1144 cmdbuf->nbox, cmdbuf->box_addr); 1145 if (ret != 0) { 1146 DMA_FLUSH(); 1147 goto done; 1148 } 1149 } 1150 1151 DMA_FLUSH(); 1152 1153 if (dmabuf && cmdbuf->discard) { 1154 drm_savage_buf_priv_t *buf_priv = dmabuf->dev_private; 1155 uint16_t event; 1156 event = savage_bci_emit_event(dev_priv, SAVAGE_WAIT_3D); 1157 SET_AGE(&buf_priv->age, event, dev_priv->event_wrap); 1158 savage_freelist_put(dev, dmabuf); 1159 } 1160 1161 done: 1162 /* If we didn't need to allocate them, these'll be NULL */ 1163 kfree(kcmd_addr); 1164 kfree(kvb_addr); 1165 kfree(kbox_addr); 1166 1167 return ret; 1168 } 1169