1 #include "l.h" 2 3 enum 4 { 5 E_ICC = 1<<0, 6 E_FCC = 1<<1, 7 E_MEM = 1<<2, 8 E_MEMSP = 1<<3, /* uses offset and size */ 9 E_MEMSB = 1<<4, /* uses offset and size */ 10 E_LR = 1<<5, 11 E_CR = 1<<6, 12 E_CTR = 1<<7, 13 E_XER = 1<<8, 14 15 E_CR0 = 0xF<<0, 16 E_CR1 = 0xF<<4, 17 18 ANYMEM = E_MEM|E_MEMSP|E_MEMSB, 19 ALL = ~0, 20 }; 21 22 typedef struct Sch Sch; 23 typedef struct Dep Dep; 24 25 struct Dep 26 { 27 ulong ireg; 28 ulong freg; 29 ulong cc; 30 ulong cr; 31 }; 32 struct Sch 33 { 34 Prog p; 35 Dep set; 36 Dep used; 37 long soffset; 38 char size; 39 char comp; 40 }; 41 42 void regused(Sch*, Prog*); 43 int depend(Sch*, Sch*); 44 int conflict(Sch*, Sch*); 45 int offoverlap(Sch*, Sch*); 46 void dumpbits(Sch*, Dep*); 47 48 void 49 sched(Prog *p0, Prog *pe) 50 { 51 Prog *p, *q; 52 Sch sch[NSCHED], *s, *t, *u, *se, stmp; 53 54 if(!debug['Q']) 55 return; 56 /* 57 * build side structure 58 */ 59 s = sch; 60 for(p=p0;; p=p->link) { 61 memset(s, 0, sizeof(*s)); 62 s->p = *p; 63 regused(s, p); 64 if(debug['X']) { 65 Bprint(&bso, "%P\tset", &s->p); 66 dumpbits(s, &s->set); 67 Bprint(&bso, "; used"); 68 dumpbits(s, &s->used); 69 if(s->comp) 70 Bprint(&bso, "; compound"); 71 if(s->p.mark & LOAD) 72 Bprint(&bso, "; load"); 73 if(s->p.mark & BRANCH) 74 Bprint(&bso, "; branch"); 75 if(s->p.mark & FCMP) 76 Bprint(&bso, "; fcmp"); 77 Bprint(&bso, "\n"); 78 } 79 s++; 80 if(p == pe) 81 break; 82 } 83 se = s; 84 85 for(s=se-1; s>=sch; s--) { 86 87 /* 88 * load delay. interlocked. 89 */ 90 if(s->p.mark & LOAD) { 91 if(s >= se-1) 92 continue; 93 if(!conflict(s, (s+1))) 94 continue; 95 /* 96 * s is load, s+1 is immediate use of result 97 * t is the trial instruction to insert between s and s+1 98 */ 99 for(t=s-1; t>=sch; t--) { 100 if(t->p.mark & BRANCH) 101 goto no2; 102 if(t->p.mark & FCMP) 103 if((s+1)->p.mark & BRANCH) 104 goto no2; 105 if(t->p.mark & LOAD) 106 if(conflict(t, (s+1))) 107 goto no2; 108 for(u=t+1; u<=s; u++) 109 if(depend(u, t)) 110 goto no2; 111 goto out2; 112 no2:; 113 } 114 if(debug['X']) 115 Bprint(&bso, "?l%P\n", &s->p); 116 continue; 117 out2: 118 if(debug['X']) { 119 Bprint(&bso, "!l%P\n", &t->p); 120 Bprint(&bso, "%P\n", &s->p); 121 } 122 stmp = *t; 123 memmove(t, t+1, (uchar*)s - (uchar*)t); 124 *s = stmp; 125 s--; 126 continue; 127 } 128 129 /* 130 * fop2 delay. 131 */ 132 if(s->p.mark & FCMP) { 133 if(s >= se-1) 134 continue; 135 if(!((s+1)->p.mark & BRANCH)) 136 continue; 137 /* t is the trial instruction to use */ 138 for(t=s-1; t>=sch; t--) { 139 for(u=t+1; u<=s; u++) 140 if(depend(u, t)) 141 goto no3; 142 goto out3; 143 no3:; 144 } 145 if(debug['X']) 146 Bprint(&bso, "?f%P\n", &s->p); 147 continue; 148 out3: 149 if(debug['X']) { 150 Bprint(&bso, "!f%P\n", &t->p); 151 Bprint(&bso, "%P\n", &s->p); 152 } 153 stmp = *t; 154 memmove(t, t+1, (uchar*)s - (uchar*)t); 155 *s = stmp; 156 s--; 157 continue; 158 } 159 } 160 161 /* 162 * put it all back 163 */ 164 for(s=sch, p=p0; s<se; s++, p=q) { 165 q = p->link; 166 if(q != s->p.link) { 167 *p = s->p; 168 p->link = q; 169 } 170 } 171 if(debug['X']) 172 Bprint(&bso, "\n"); 173 } 174 175 void 176 regused(Sch *s, Prog *realp) 177 { 178 int c, ar, ad, ld, sz, nr, upd; 179 ulong m; 180 Prog *p; 181 182 p = &s->p; 183 s->comp = compound(p); 184 if(s->comp) { 185 s->set.ireg |= 1<<REGTMP; 186 s->used.ireg |= 1<<REGTMP; 187 } 188 ar = 0; /* dest is really reference */ 189 ad = 0; /* source/dest is really address */ 190 ld = 0; /* opcode is load instruction */ 191 sz = 32*4; /* size of load/store for overlap computation */ 192 nr = 0; /* source/dest is not really reg */ 193 upd = 0; /* move with update; changes reg */ 194 195 /* 196 * flags based on opcode 197 */ 198 switch(p->as) { 199 case ATEXT: 200 curtext = realp; 201 autosize = p->to.offset + 4; 202 ad = 1; 203 break; 204 case ABL: 205 s->set.cc |= E_LR; 206 ar = 1; 207 ad = 1; 208 break; 209 case ABR: 210 ar = 1; 211 ad = 1; 212 break; 213 case ACMP: 214 s->set.cc |= E_ICC; 215 if(p->reg == 0) 216 s->set.cr |= E_CR0; 217 else 218 s->set.cr |= (0xF<<((p->reg&7)*4)); 219 ar = 1; 220 break; 221 case AFCMPO: 222 case AFCMPU: 223 s->set.cc |= E_FCC; 224 if(p->reg == 0) 225 s->set.cr |= E_CR0; 226 else 227 s->set.cr |= (0xF<<((p->reg&7)*4)); 228 ar = 1; 229 break; 230 case ACRAND: 231 case ACRANDN: 232 case ACREQV: 233 case ACRNAND: 234 case ACRNOR: 235 case ACROR: 236 case ACRORN: 237 case ACRXOR: 238 s->used.cr |= 1<<p->from.reg; 239 s->set.cr |= 1<<p->to.reg; 240 nr = 1; 241 break; 242 case ABCL: /* tricky */ 243 s->used.cc |= E_FCC|E_ICC; 244 s->used.cr = ALL; 245 s->set.cc |= E_LR; 246 ar = 1; 247 break; 248 case ABC: /* tricky */ 249 s->used.cc |= E_FCC|E_ICC; 250 s->used.cr = ALL; 251 ar = 1; 252 break; 253 case ABEQ: 254 case ABGE: 255 case ABGT: 256 case ABLE: 257 case ABLT: 258 case ABNE: 259 case ABVC: 260 case ABVS: 261 s->used.cc |= E_ICC; 262 s->used.cr |= E_CR0; 263 ar = 1; 264 break; 265 case ALSW: 266 case AMOVMW: 267 /* could do better */ 268 sz = 32*4; 269 ld = 1; 270 break; 271 case AMOVBU: 272 case AMOVBZU: 273 upd = 1; 274 sz = 1; 275 ld = 1; 276 break; 277 case AMOVB: 278 case AMOVBZ: 279 sz = 1; 280 ld = 1; 281 break; 282 case AMOVHU: 283 case AMOVHZU: 284 upd = 1; 285 sz = 2; 286 ld = 1; 287 break; 288 case AMOVH: 289 case AMOVHBR: 290 case AMOVHZ: 291 sz = 2; 292 ld = 1; 293 break; 294 case AFMOVSU: 295 case AMOVWU: 296 upd = 1; 297 sz = 4; 298 ld = 1; 299 break; 300 case AFMOVS: 301 case AMOVW: 302 case AMOVWBR: 303 case ALWAR: 304 sz = 4; 305 ld = 1; 306 break; 307 case AFMOVDU: 308 upd = 1; 309 sz = 8; 310 ld = 1; 311 break; 312 case AFMOVD: 313 sz = 8; 314 ld = 1; 315 break; 316 case AFMOVDCC: 317 sz = 8; 318 ld = 1; 319 s->set.cc |= E_FCC; 320 s->set.cr |= E_CR1; 321 break; 322 case AMOVFL: 323 case AMOVCRFS: 324 case AMTFSB0: 325 case AMTFSB0CC: 326 case AMTFSB1: 327 case AMTFSB1CC: 328 s->set.ireg = ALL; 329 s->set.freg = ALL; 330 s->set.cc = ALL; 331 s->set.cr = ALL; 332 break; 333 case AADDCC: 334 case AADDVCC: 335 case AADDCCC: 336 case AADDCVCC: 337 case AADDMECC: 338 case AADDMEVCC: 339 case AADDECC: 340 case AADDEVCC: 341 case AADDZECC: 342 case AADDZEVCC: 343 case AANDCC: 344 case AANDNCC: 345 case ACNTLZWCC: 346 case ADIVWCC: 347 case ADIVWVCC: 348 case ADIVWUCC: 349 case ADIVWUVCC: 350 case AEQVCC: 351 case AEXTSBCC: 352 case AEXTSHCC: 353 case AMULHWCC: 354 case AMULHWUCC: 355 case AMULLWCC: 356 case AMULLWVCC: 357 case ANANDCC: 358 case ANEGCC: 359 case ANEGVCC: 360 case ANORCC: 361 case AORCC: 362 case AORNCC: 363 case AREMCC: 364 case AREMVCC: 365 case AREMUCC: 366 case AREMUVCC: 367 case ARLWMICC: 368 case ARLWNMCC: 369 case ASLWCC: 370 case ASRAWCC: 371 case ASRWCC: 372 case ASTWCCC: 373 case ASUBCC: 374 case ASUBVCC: 375 case ASUBCCC: 376 case ASUBCVCC: 377 case ASUBMECC: 378 case ASUBMEVCC: 379 case ASUBECC: 380 case ASUBEVCC: 381 case ASUBZECC: 382 case ASUBZEVCC: 383 case AXORCC: 384 s->set.cc |= E_ICC; 385 s->set.cr |= E_CR0; 386 break; 387 case AFABSCC: 388 case AFADDCC: 389 case AFADDSCC: 390 case AFCTIWCC: 391 case AFCTIWZCC: 392 case AFDIVCC: 393 case AFDIVSCC: 394 case AFMADDCC: 395 case AFMADDSCC: 396 case AFMSUBCC: 397 case AFMSUBSCC: 398 case AFMULCC: 399 case AFMULSCC: 400 case AFNABSCC: 401 case AFNEGCC: 402 case AFNMADDCC: 403 case AFNMADDSCC: 404 case AFNMSUBCC: 405 case AFNMSUBSCC: 406 case AFRSPCC: 407 case AFSUBCC: 408 case AFSUBSCC: 409 s->set.cc |= E_FCC; 410 s->set.cr |= E_CR1; 411 break; 412 } 413 414 /* 415 * flags based on 'to' field 416 */ 417 c = p->to.class; 418 if(c == 0) { 419 c = aclass(&p->to) + 1; 420 p->to.class = c; 421 } 422 c--; 423 switch(c) { 424 default: 425 print("unknown class %d %D\n", c, &p->to); 426 427 case C_NONE: 428 case C_ZCON: 429 case C_SCON: 430 case C_UCON: 431 case C_LCON: 432 case C_ADDCON: 433 case C_ANDCON: 434 case C_SBRA: 435 case C_LBRA: 436 break; 437 case C_CREG: 438 c = p->to.reg; 439 if(c == NREG) 440 s->set.cr = ALL; 441 else 442 s->set.cr |= (0xF << ((p->from.reg&7)*4)); 443 s->set.cc = ALL; 444 break; 445 case C_SPR: 446 case C_SREG: 447 case C_FPSCR: 448 case C_MSR: 449 case C_XER: 450 s->set.ireg = ALL; 451 s->set.freg = ALL; 452 s->set.cc = ALL; 453 s->set.cr = ALL; 454 break; 455 case C_LR: 456 s->set.cc |= E_LR; 457 break; 458 case C_CTR: 459 s->set.cc |= E_CTR; 460 break; 461 case C_ZOREG: 462 case C_SOREG: 463 case C_LOREG: 464 c = p->to.reg; 465 s->used.ireg |= 1<<c; 466 if(upd) 467 s->set.ireg |= 1<<c; 468 if(ad) 469 break; 470 s->size = sz; 471 s->soffset = regoff(&p->to); 472 473 m = ANYMEM; 474 if(c == REGSB) 475 m = E_MEMSB; 476 if(c == REGSP) 477 m = E_MEMSP; 478 479 if(ar) 480 s->used.cc |= m; 481 else 482 s->set.cc |= m; 483 break; 484 case C_SACON: 485 case C_LACON: 486 s->used.ireg |= 1<<REGSP; 487 if(upd) 488 s->set.ireg |= 1<<c; 489 break; 490 case C_SECON: 491 case C_LECON: 492 s->used.ireg |= 1<<REGSB; 493 if(upd) 494 s->set.ireg |= 1<<c; 495 break; 496 case C_REG: 497 if(nr) 498 break; 499 if(ar) 500 s->used.ireg |= 1<<p->to.reg; 501 else 502 s->set.ireg |= 1<<p->to.reg; 503 break; 504 case C_FREG: 505 if(ar) 506 s->used.freg |= 1<<p->to.reg; 507 else 508 s->set.freg |= 1<<p->to.reg; 509 break; 510 case C_SAUTO: 511 case C_LAUTO: 512 s->used.ireg |= 1<<REGSP; 513 if(upd) 514 s->set.ireg |= 1<<c; 515 if(ad) 516 break; 517 s->size = sz; 518 s->soffset = regoff(&p->to); 519 520 if(ar) 521 s->used.cc |= E_MEMSP; 522 else 523 s->set.cc |= E_MEMSP; 524 break; 525 case C_SEXT: 526 case C_LEXT: 527 s->used.ireg |= 1<<REGSB; 528 if(upd) 529 s->set.ireg |= 1<<c; 530 if(ad) 531 break; 532 s->size = sz; 533 s->soffset = regoff(&p->to); 534 535 if(ar) 536 s->used.cc |= E_MEMSB; 537 else 538 s->set.cc |= E_MEMSB; 539 break; 540 } 541 542 /* 543 * flags based on 'from' field 544 */ 545 c = p->from.class; 546 if(c == 0) { 547 c = aclass(&p->from) + 1; 548 p->from.class = c; 549 } 550 c--; 551 switch(c) { 552 default: 553 print("unknown class %d %D\n", c, &p->from); 554 555 case C_NONE: 556 case C_ZCON: 557 case C_SCON: 558 case C_UCON: 559 case C_LCON: 560 case C_ADDCON: 561 case C_ANDCON: 562 case C_SBRA: 563 case C_LBRA: 564 c = p->from.reg; 565 if(c != NREG) 566 s->used.ireg |= 1<<c; 567 break; 568 case C_CREG: 569 c = p->from.reg; 570 if(c == NREG) 571 s->used.cr = ALL; 572 else 573 s->used.cr |= (0xF << ((p->from.reg&7)*4)); 574 s->used.cc = ALL; 575 break; 576 case C_SPR: 577 case C_SREG: 578 case C_FPSCR: 579 case C_MSR: 580 case C_XER: 581 s->set.ireg = ALL; 582 s->set.freg = ALL; 583 s->set.cc = ALL; 584 s->set.cr = ALL; 585 break; 586 case C_LR: 587 s->used.cc |= E_LR; 588 break; 589 case C_CTR: 590 s->used.cc |= E_CTR; 591 break; 592 case C_ZOREG: 593 case C_SOREG: 594 case C_LOREG: 595 c = p->from.reg; 596 s->used.ireg |= 1<<c; 597 if(ld) 598 p->mark |= LOAD; 599 if(ad) 600 break; 601 s->size = sz; 602 s->soffset = regoff(&p->from); 603 604 m = ANYMEM; 605 if(c == REGSB) 606 m = E_MEMSB; 607 if(c == REGSP) 608 m = E_MEMSP; 609 610 s->used.cc |= m; 611 break; 612 case C_SACON: 613 case C_LACON: 614 s->used.ireg |= 1<<REGSP; 615 break; 616 case C_SECON: 617 case C_LECON: 618 s->used.ireg |= 1<<REGSB; 619 break; 620 case C_REG: 621 if(nr) 622 break; 623 s->used.ireg |= 1<<p->from.reg; 624 break; 625 case C_FREG: 626 s->used.freg |= 1<<p->from.reg; 627 break; 628 case C_SAUTO: 629 case C_LAUTO: 630 s->used.ireg |= 1<<REGSP; 631 if(ld) 632 p->mark |= LOAD; 633 if(ad) 634 break; 635 s->size = sz; 636 s->soffset = regoff(&p->from); 637 638 s->used.cc |= E_MEMSP; 639 break; 640 case C_SEXT: 641 case C_LEXT: 642 s->used.ireg |= 1<<REGSB; 643 if(ld) 644 p->mark |= LOAD; 645 if(ad) 646 break; 647 s->size = sz; 648 s->soffset = regoff(&p->from); 649 650 s->used.cc |= E_MEMSB; 651 break; 652 } 653 654 c = p->reg; 655 if(c != NREG) { 656 if(p->from.type == D_FREG || p->to.type == D_FREG) 657 s->used.freg |= 1<<c; 658 else 659 s->used.ireg |= 1<<c; 660 } 661 } 662 663 /* 664 * test to see if 2 instrictions can be 665 * interchanged without changing semantics 666 */ 667 int 668 depend(Sch *sa, Sch *sb) 669 { 670 ulong x; 671 672 if(sa->set.ireg & (sb->set.ireg|sb->used.ireg)) 673 return 1; 674 if(sb->set.ireg & sa->used.ireg) 675 return 1; 676 677 if(sa->set.freg & (sb->set.freg|sb->used.freg)) 678 return 1; 679 if(sb->set.freg & sa->used.freg) 680 return 1; 681 682 if(sa->set.cr & (sb->set.cr|sb->used.cr)) 683 return 1; 684 if(sb->set.cr & sa->used.cr) 685 return 1; 686 687 688 x = (sa->set.cc & (sb->set.cc|sb->used.cc)) | 689 (sb->set.cc & sa->used.cc); 690 if(x) { 691 /* 692 * allow SB and SP to pass each other. 693 * allow SB to pass SB iff doffsets are ok 694 * anything else conflicts 695 */ 696 if(x != E_MEMSP && x != E_MEMSB) 697 return 1; 698 x = sa->set.cc | sb->set.cc | 699 sa->used.cc | sb->used.cc; 700 if(x & E_MEM) 701 return 1; 702 if(offoverlap(sa, sb)) 703 return 1; 704 } 705 706 return 0; 707 } 708 709 int 710 offoverlap(Sch *sa, Sch *sb) 711 { 712 713 if(sa->soffset < sb->soffset) { 714 if(sa->soffset+sa->size > sb->soffset) 715 return 1; 716 return 0; 717 } 718 if(sb->soffset+sb->size > sa->soffset) 719 return 1; 720 return 0; 721 } 722 723 /* 724 * test 2 adjacent instructions 725 * and find out if inserted instructions 726 * are desired to prevent stalls. 727 * first instruction is a load instruction. 728 */ 729 int 730 conflict(Sch *sa, Sch *sb) 731 { 732 733 if(sa->set.ireg & sb->used.ireg) 734 return 1; 735 if(sa->set.freg & sb->used.freg) 736 return 1; 737 if(sa->set.cr & sb->used.cr) 738 return 1; 739 return 0; 740 } 741 742 int 743 compound(Prog *p) 744 { 745 Optab *o; 746 747 o = oplook(p); 748 if(o->size != 4) 749 return 1; 750 if(p->to.type == D_REG && p->to.reg == REGSB) 751 return 1; 752 return 0; 753 } 754 755 void 756 dumpbits(Sch *s, Dep *d) 757 { 758 int i; 759 760 for(i=0; i<32; i++) 761 if(d->ireg & (1<<i)) 762 Bprint(&bso, " R%d", i); 763 for(i=0; i<32; i++) 764 if(d->freg & (1<<i)) 765 Bprint(&bso, " F%d", i); 766 for(i=0; i<32; i++) 767 if(d->cr & (1<<i)) 768 Bprint(&bso, " C%d", i); 769 for(i=0; i<32; i++) 770 switch(d->cc & (1<<i)) { 771 default: 772 break; 773 case E_ICC: 774 Bprint(&bso, " ICC"); 775 break; 776 case E_FCC: 777 Bprint(&bso, " FCC"); 778 break; 779 case E_LR: 780 Bprint(&bso, " LR"); 781 break; 782 case E_CR: 783 Bprint(&bso, " CR"); 784 break; 785 case E_CTR: 786 Bprint(&bso, " CTR"); 787 break; 788 case E_XER: 789 Bprint(&bso, " XER"); 790 break; 791 case E_MEM: 792 Bprint(&bso, " MEM%d", s->size); 793 break; 794 case E_MEMSB: 795 Bprint(&bso, " SB%d", s->size); 796 break; 797 case E_MEMSP: 798 Bprint(&bso, " SP%d", s->size); 799 break; 800 } 801 } 802