1 #include "l.h" 2 3 enum 4 { 5 E_HILO = 1<<0, 6 E_FCR = 1<<1, 7 E_MCR = 1<<2, 8 E_MEM = 1<<3, 9 E_MEMSP = 1<<4, /* uses offset and size */ 10 E_MEMSB = 1<<5, /* uses offset and size */ 11 ANYMEM = E_MEM|E_MEMSP|E_MEMSB, 12 DELAY = BRANCH|LOAD|FCMP, 13 }; 14 15 typedef struct Sch Sch; 16 typedef struct Dep Dep; 17 18 struct Dep 19 { 20 ulong ireg; 21 ulong freg; 22 ulong cc; 23 }; 24 struct Sch 25 { 26 Prog p; 27 Dep set; 28 Dep used; 29 long soffset; 30 char size; 31 char nop; 32 char comp; 33 }; 34 35 void markregused(Sch*, Prog*); 36 int depend(Sch*, Sch*); 37 int conflict(Sch*, Sch*); 38 int offoverlap(Sch*, Sch*); 39 void dumpbits(Sch*, Dep*); 40 41 void 42 sched(Prog *p0, Prog *pe) 43 { 44 Prog *p, *q; 45 Sch sch[NSCHED], *s, *t, *u, *se, stmp; 46 47 /* 48 * build side structure 49 */ 50 s = sch; 51 for(p=p0;; p=p->link) { 52 memset(s, 0, sizeof(*s)); 53 s->p = *p; 54 markregused(s, p); 55 if(debug['X']) { 56 Bprint(&bso, "%P%|set", &s->p, 40); 57 dumpbits(s, &s->set); 58 Bprint(&bso, "; used"); 59 dumpbits(s, &s->used); 60 if(s->comp) 61 Bprint(&bso, "; compound"); 62 if(s->p.mark & LOAD) 63 Bprint(&bso, "; load"); 64 if(s->p.mark & BRANCH) 65 Bprint(&bso, "; branch"); 66 if(s->p.mark & FCMP) 67 Bprint(&bso, "; fcmp"); 68 Bprint(&bso, "\n"); 69 } 70 if(p == pe) 71 break; 72 s++; 73 } 74 se = s; 75 76 /* 77 * prepass to move things around 78 * does nothing, but tries to make 79 * the actual scheduler work better 80 */ 81 for(s=sch; s<=se; s++) { 82 if(!(s->p.mark & LOAD)) 83 continue; 84 /* always good to put nonconflict loads together */ 85 for(t=s+1; t<=se; t++) { 86 if(!(t->p.mark & LOAD)) 87 continue; 88 if(t->p.mark & BRANCH) 89 break; 90 if(conflict(s, t)) 91 break; 92 for(u=t-1; u>s; u--) 93 if(depend(u, t)) 94 goto no11; 95 u = s+1; 96 stmp = *t; 97 memmove(s+2, u, (uchar*)t - (uchar*)u); 98 *u = stmp; 99 break; 100 } 101 no11: 102 103 /* put schedule fodder above load */ 104 for(t=s+1; t<=se; t++) { 105 if(t->p.mark & BRANCH) 106 break; 107 if(s > sch && conflict(s-1, t)) 108 continue; 109 for(u=t-1; u>=s; u--) 110 if(depend(t, u)) 111 goto no1; 112 stmp = *t; 113 memmove(s+1, s, (uchar*)t - (uchar*)s); 114 *s = stmp; 115 if(!(s->p.mark & LOAD)) 116 break; 117 no1:; 118 } 119 } 120 121 for(s=se; s>=sch; s--) { 122 if(!(s->p.mark & DELAY)) 123 continue; 124 if(s < se) 125 if(!conflict(s, s+1)) 126 goto out3; 127 /* 128 * s is load, s+1 is immediate use of result or end of block 129 * t is the trial instruction to insert between s and s+1 130 */ 131 if(!debug['Y']) 132 for(t=s-1; t>=sch; t--) { 133 if(t->comp) 134 if(s->p.mark & BRANCH) 135 goto no2; 136 if(t->p.mark & DELAY) 137 if(s >= se || conflict(t, s+1)) 138 goto no2; 139 for(u=t+1; u<=s; u++) 140 if(depend(u, t)) 141 goto no2; 142 goto out2; 143 no2:; 144 } 145 if(debug['X']) 146 Bprint(&bso, "?l%P\n", &s->p); 147 if(s->p.mark & BRANCH) 148 s->nop = 1; 149 if(debug['v']) { 150 if(s->p.mark & LOAD) { 151 nop.load.count++; 152 nop.load.outof++; 153 } 154 if(s->p.mark & BRANCH) { 155 nop.branch.count++; 156 nop.branch.outof++; 157 } 158 if(s->p.mark & FCMP) { 159 nop.fcmp.count++; 160 nop.fcmp.outof++; 161 } 162 } 163 continue; 164 165 out2: 166 if(debug['X']) { 167 Bprint(&bso, "!l%P\n", &t->p); 168 Bprint(&bso, "%P\n", &s->p); 169 } 170 stmp = *t; 171 memmove(t, t+1, (uchar*)s - (uchar*)t); 172 *s = stmp; 173 s--; 174 175 out3: 176 if(debug['v']) { 177 if(s->p.mark & LOAD) 178 nop.load.outof++; 179 if(s->p.mark & BRANCH) 180 nop.branch.outof++; 181 if(s->p.mark & FCMP) 182 nop.fcmp.outof++; 183 } 184 } 185 186 /* Avoid HI/LO use->set */ 187 t = sch+1; 188 for(s=sch; s<se-1; s++, t++) { 189 if((s->used.cc & E_HILO) == 0) 190 continue; 191 if(t->set.cc & E_HILO) 192 s->nop = 2; 193 } 194 195 /* 196 * put it all back 197 */ 198 for(s=sch, p=p0; s<=se; s++, p=q) { 199 q = p->link; 200 if(q != s->p.link) { 201 *p = s->p; 202 p->link = q; 203 } 204 while(s->nop--) 205 addnop(p); 206 } 207 if(debug['X']) { 208 Bprint(&bso, "\n"); 209 Bflush(&bso); 210 } 211 } 212 213 void 214 markregused(Sch *s, Prog *realp) 215 { 216 int c, ar, ad, ld, sz; 217 ulong m; 218 Prog *p; 219 220 p = &s->p; 221 s->comp = compound(p); 222 s->nop = 0; 223 if(s->comp) { 224 s->set.ireg |= 1<<REGTMP; 225 s->used.ireg |= 1<<REGTMP; 226 } 227 228 ar = 0; /* dest is really reference */ 229 ad = 0; /* source/dest is really address */ 230 ld = 0; /* opcode is load instruction */ 231 sz = 20; /* size of load/store for overlap computation */ 232 233 /* 234 * flags based on opcode 235 */ 236 switch(p->as) { 237 case ATEXT: 238 curtext = realp; 239 autosize = p->to.offset + 8; 240 ad = 1; 241 break; 242 case AJAL: 243 c = p->reg; 244 if(c == NREG) 245 c = REGLINK; 246 s->set.ireg |= 1<<c; 247 ar = 1; 248 ad = 1; 249 break; 250 case ABGEZAL: 251 case ABLTZAL: 252 s->set.ireg |= 1<<REGLINK; 253 case ABEQ: 254 case ABGEZ: 255 case ABGTZ: 256 case ABLEZ: 257 case ABLTZ: 258 case ABNE: 259 ar = 1; 260 ad = 1; 261 break; 262 case ABFPT: 263 case ABFPF: 264 ad = 1; 265 s->used.cc |= E_FCR; 266 break; 267 case ACMPEQD: 268 case ACMPEQF: 269 case ACMPGED: 270 case ACMPGEF: 271 case ACMPGTD: 272 case ACMPGTF: 273 ar = 1; 274 s->set.cc |= E_FCR; 275 p->mark |= FCMP; 276 break; 277 case AJMP: 278 ar = 1; 279 ad = 1; 280 break; 281 case AMOVB: 282 case AMOVBU: 283 sz = 1; 284 ld = 1; 285 break; 286 case AMOVH: 287 case AMOVHU: 288 sz = 2; 289 ld = 1; 290 break; 291 case AMOVF: 292 case AMOVW: 293 case AMOVWL: 294 case AMOVWR: 295 sz = 4; 296 ld = 1; 297 break; 298 case AMOVD: 299 case AMOVV: 300 case AMOVVL: 301 case AMOVVR: 302 sz = 8; 303 ld = 1; 304 break; 305 case ADIV: 306 case ADIVU: 307 case AMUL: 308 case AMULU: 309 case AREM: 310 case AREMU: 311 case ADIVV: 312 case ADIVVU: 313 case AMULV: 314 case AMULVU: 315 case AREMV: 316 case AREMVU: 317 s->set.cc = E_HILO; 318 case AADD: 319 case AADDU: 320 case AADDV: 321 case AADDVU: 322 case AAND: 323 case ANOR: 324 case AOR: 325 case ASGT: 326 case ASGTU: 327 case ASLL: 328 case ASRA: 329 case ASRL: 330 case ASLLV: 331 case ASRAV: 332 case ASRLV: 333 case ASUB: 334 case ASUBU: 335 case ASUBV: 336 case ASUBVU: 337 case AXOR: 338 339 case AADDD: 340 case AADDF: 341 case AADDW: 342 case ASUBD: 343 case ASUBF: 344 case ASUBW: 345 case AMULF: 346 case AMULD: 347 case AMULW: 348 case ADIVF: 349 case ADIVD: 350 case ADIVW: 351 if(p->reg == NREG) { 352 if(p->to.type == D_REG || p->to.type == D_FREG) 353 p->reg = p->to.reg; 354 if(p->reg == NREG) 355 print("botch %P\n", p); 356 } 357 break; 358 } 359 360 /* 361 * flags based on 'to' field 362 */ 363 c = p->to.class; 364 if(c == 0) { 365 c = aclass(&p->to) + 1; 366 p->to.class = c; 367 } 368 c--; 369 switch(c) { 370 default: 371 print("unknown class %d %D\n", c, &p->to); 372 373 case C_ZCON: 374 case C_SCON: 375 case C_ADD0CON: 376 case C_AND0CON: 377 case C_ADDCON: 378 case C_ANDCON: 379 case C_UCON: 380 case C_LCON: 381 case C_NONE: 382 case C_SBRA: 383 case C_LBRA: 384 break; 385 386 case C_HI: 387 case C_LO: 388 s->set.cc |= E_HILO; 389 break; 390 case C_FCREG: 391 s->set.cc |= E_FCR; 392 break; 393 case C_MREG: 394 s->set.cc |= E_MCR; 395 break; 396 case C_ZOREG: 397 case C_SOREG: 398 case C_LOREG: 399 c = p->to.reg; 400 s->used.ireg |= 1<<c; 401 if(ad) 402 break; 403 s->size = sz; 404 s->soffset = regoff(&p->to); 405 406 m = ANYMEM; 407 if(c == REGSB) 408 m = E_MEMSB; 409 if(c == REGSP) 410 m = E_MEMSP; 411 412 if(ar) 413 s->used.cc |= m; 414 else 415 s->set.cc |= m; 416 break; 417 case C_SACON: 418 case C_LACON: 419 s->used.ireg |= 1<<REGSP; 420 break; 421 case C_SECON: 422 case C_LECON: 423 s->used.ireg |= 1<<REGSB; 424 break; 425 case C_REG: 426 if(ar) 427 s->used.ireg |= 1<<p->to.reg; 428 else 429 s->set.ireg |= 1<<p->to.reg; 430 break; 431 case C_FREG: 432 /* do better -- determine double prec */ 433 if(ar) { 434 s->used.freg |= 1<<p->to.reg; 435 s->used.freg |= 1<<(p->to.reg|1); 436 } else { 437 s->set.freg |= 1<<p->to.reg; 438 s->set.freg |= 1<<(p->to.reg|1); 439 } 440 if(ld && p->from.type == D_REG) 441 p->mark |= LOAD; 442 break; 443 case C_SAUTO: 444 case C_LAUTO: 445 s->used.ireg |= 1<<REGSP; 446 if(ad) 447 break; 448 s->size = sz; 449 s->soffset = regoff(&p->to); 450 451 if(ar) 452 s->used.cc |= E_MEMSP; 453 else 454 s->set.cc |= E_MEMSP; 455 break; 456 case C_SEXT: 457 case C_LEXT: 458 s->used.ireg |= 1<<REGSB; 459 if(ad) 460 break; 461 s->size = sz; 462 s->soffset = regoff(&p->to); 463 464 if(ar) 465 s->used.cc |= E_MEMSB; 466 else 467 s->set.cc |= E_MEMSB; 468 break; 469 } 470 471 /* 472 * flags based on 'from' field 473 */ 474 c = p->from.class; 475 if(c == 0) { 476 c = aclass(&p->from) + 1; 477 p->from.class = c; 478 } 479 c--; 480 switch(c) { 481 default: 482 print("unknown class %d %D\n", c, &p->from); 483 484 case C_ZCON: 485 case C_SCON: 486 case C_ADD0CON: 487 case C_AND0CON: 488 case C_ADDCON: 489 case C_ANDCON: 490 case C_UCON: 491 case C_LCON: 492 case C_NONE: 493 case C_SBRA: 494 case C_LBRA: 495 break; 496 case C_HI: 497 case C_LO: 498 s->used.cc |= E_HILO; 499 break; 500 case C_FCREG: 501 s->used.cc |= E_FCR; 502 break; 503 case C_MREG: 504 s->used.cc |= E_MCR; 505 break; 506 case C_ZOREG: 507 case C_SOREG: 508 case C_LOREG: 509 c = p->from.reg; 510 s->used.ireg |= 1<<c; 511 if(ld) 512 p->mark |= LOAD; 513 s->size = sz; 514 s->soffset = regoff(&p->from); 515 516 m = ANYMEM; 517 if(c == REGSB) 518 m = E_MEMSB; 519 if(c == REGSP) 520 m = E_MEMSP; 521 522 s->used.cc |= m; 523 break; 524 case C_SACON: 525 case C_LACON: 526 s->used.ireg |= 1<<REGSP; 527 break; 528 case C_SECON: 529 case C_LECON: 530 s->used.ireg |= 1<<REGSB; 531 break; 532 case C_REG: 533 s->used.ireg |= 1<<p->from.reg; 534 break; 535 case C_FREG: 536 /* do better -- determine double prec */ 537 s->used.freg |= 1<<p->from.reg; 538 s->used.freg |= 1<<(p->from.reg|1); 539 if(ld && p->to.type == D_REG) 540 p->mark |= LOAD; 541 break; 542 case C_SAUTO: 543 case C_LAUTO: 544 s->used.ireg |= 1<<REGSP; 545 if(ld) 546 p->mark |= LOAD; 547 if(ad) 548 break; 549 s->size = sz; 550 s->soffset = regoff(&p->from); 551 552 s->used.cc |= E_MEMSP; 553 break; 554 case C_SEXT: 555 case C_LEXT: 556 s->used.ireg |= 1<<REGSB; 557 if(ld) 558 p->mark |= LOAD; 559 if(ad) 560 break; 561 s->size = sz; 562 s->soffset = regoff(&p->from); 563 564 s->used.cc |= E_MEMSB; 565 break; 566 } 567 568 c = p->reg; 569 if(c != NREG) { 570 if(p->from.type == D_FREG || p->to.type == D_FREG) { 571 s->used.freg |= 1<<c; 572 s->used.freg |= 1<<(c|1); 573 } else 574 s->used.ireg |= 1<<c; 575 } 576 s->set.ireg &= ~(1<<REGZERO); /* R0 cant be set */ 577 } 578 579 /* 580 * test to see if 2 instrictions can be 581 * interchanged without changing semantics 582 */ 583 int 584 depend(Sch *sa, Sch *sb) 585 { 586 ulong x; 587 588 if(sa->set.ireg & (sb->set.ireg|sb->used.ireg)) 589 return 1; 590 if(sb->set.ireg & sa->used.ireg) 591 return 1; 592 593 if(sa->set.freg & (sb->set.freg|sb->used.freg)) 594 return 1; 595 if(sb->set.freg & sa->used.freg) 596 return 1; 597 598 /* 599 * special case. 600 * loads from same address cannot pass. 601 * this is for hardware fifo's and the like 602 */ 603 if(sa->used.cc & sb->used.cc & E_MEM) 604 if(sa->p.reg == sb->p.reg) 605 if(regoff(&sa->p.from) == regoff(&sb->p.from)) 606 return 1; 607 608 x = (sa->set.cc & (sb->set.cc|sb->used.cc)) | 609 (sb->set.cc & sa->used.cc); 610 if(x) { 611 /* 612 * allow SB and SP to pass each other. 613 * allow SB to pass SB iff doffsets are ok 614 * anything else conflicts 615 */ 616 if(x != E_MEMSP && x != E_MEMSB) 617 return 1; 618 x = sa->set.cc | sb->set.cc | 619 sa->used.cc | sb->used.cc; 620 if(x & E_MEM) 621 return 1; 622 if(offoverlap(sa, sb)) 623 return 1; 624 } 625 626 return 0; 627 } 628 629 int 630 offoverlap(Sch *sa, Sch *sb) 631 { 632 633 if(sa->soffset < sb->soffset) { 634 if(sa->soffset+sa->size > sb->soffset) 635 return 1; 636 return 0; 637 } 638 if(sb->soffset+sb->size > sa->soffset) 639 return 1; 640 return 0; 641 } 642 643 /* 644 * test 2 adjacent instructions 645 * and find out if inserted instructions 646 * are desired to prevent stalls. 647 */ 648 int 649 conflict(Sch *sa, Sch *sb) 650 { 651 652 if(sa->set.ireg & sb->used.ireg) 653 return 1; 654 if(sa->set.freg & sb->used.freg) 655 return 1; 656 if(sa->set.cc & sb->used.cc) 657 return 1; 658 659 return 0; 660 } 661 662 int 663 compound(Prog *p) 664 { 665 Optab *o; 666 667 o = oplook(p); 668 if(o->size != 4) 669 return 1; 670 if(p->to.type == D_REG && p->to.reg == REGSB) 671 return 1; 672 return 0; 673 } 674 675 void 676 dumpbits(Sch *s, Dep *d) 677 { 678 int i; 679 680 for(i=0; i<32; i++) 681 if(d->ireg & (1<<i)) 682 Bprint(&bso, " R%d", i); 683 for(i=0; i<32; i++) 684 if(d->freg & (1<<i)) 685 Bprint(&bso, " F%d", i); 686 for(i=0; i<32; i++) 687 switch(d->cc & (1<<i)) { 688 default: 689 break; 690 case E_HILO: 691 Bprint(&bso, " HILO"); 692 break; 693 case E_FCR: 694 Bprint(&bso, " FCR"); 695 break; 696 case E_MCR: 697 Bprint(&bso, " MCR"); 698 break; 699 case E_MEM: 700 Bprint(&bso, " MEM%d", s->size); 701 break; 702 case E_MEMSB: 703 Bprint(&bso, " SB%d", s->size); 704 break; 705 case E_MEMSP: 706 Bprint(&bso, " SP%d", s->size); 707 break; 708 } 709 } 710