1 #include "l.h" 2 3 enum 4 { 5 E_HILO = 1<<0, 6 E_FCR = 1<<1, 7 E_MCR = 1<<2, 8 E_MEM = 1<<3, 9 E_MEMSP = 1<<4, /* uses offset and size */ 10 E_MEMSB = 1<<5, /* uses offset and size */ 11 ANYMEM = E_MEM|E_MEMSP|E_MEMSB, 12 DELAY = BRANCH|LOAD|FCMP, 13 }; 14 15 typedef struct Sch Sch; 16 typedef struct Dep Dep; 17 18 struct Dep 19 { 20 ulong ireg; 21 ulong freg; 22 ulong cc; 23 }; 24 struct Sch 25 { 26 Prog p; 27 Dep set; 28 Dep used; 29 long soffset; 30 char size; 31 char nop; 32 char comp; 33 }; 34 35 void regsused(Sch*, Prog*); 36 int depend(Sch*, Sch*); 37 int conflict(Sch*, Sch*); 38 int offoverlap(Sch*, Sch*); 39 void dumpbits(Sch*, Dep*); 40 41 void 42 sched(Prog *p0, Prog *pe) 43 { 44 Prog *p, *q; 45 Sch sch[NSCHED], *s, *t, *u, *se, stmp; 46 47 /* 48 * build side structure 49 */ 50 s = sch; 51 for(p=p0;; p=p->link) { 52 memset(s, 0, sizeof(*s)); 53 s->p = *p; 54 regsused(s, p); 55 if(debug['X']) { 56 Bprint(&bso, "%P\t\tset", &s->p); 57 dumpbits(s, &s->set); 58 Bprint(&bso, "; used"); 59 dumpbits(s, &s->used); 60 if(s->comp) 61 Bprint(&bso, "; compound"); 62 if(s->p.mark & LOAD) 63 Bprint(&bso, "; load"); 64 if(s->p.mark & BRANCH) 65 Bprint(&bso, "; branch"); 66 if(s->p.mark & FCMP) 67 Bprint(&bso, "; fcmp"); 68 Bprint(&bso, "\n"); 69 } 70 if(p == pe) 71 break; 72 s++; 73 } 74 se = s; 75 76 /* 77 * prepass to move things around 78 * does nothing, but tries to make 79 * the actual scheduler work better 80 */ 81 for(s=sch; s<=se; s++) { 82 if(!(s->p.mark & LOAD)) 83 continue; 84 /* always good to put nonconflict loads together */ 85 for(t=s+1; t<=se; t++) { 86 if(!(t->p.mark & LOAD)) 87 continue; 88 if(t->p.mark & BRANCH) 89 break; 90 if(conflict(s, t)) 91 break; 92 for(u=t-1; u>s; u--) 93 if(depend(u, t)) 94 goto no11; 95 u = s+1; 96 stmp = *t; 97 memmove(s+2, u, (uchar*)t - (uchar*)u); 98 *u = stmp; 99 break; 100 } 101 no11: 102 103 /* put schedule fodder above load */ 104 for(t=s+1; t<=se; t++) { 105 if(t->p.mark & BRANCH) 106 break; 107 if(s > sch && conflict(s-1, t)) 108 continue; 109 for(u=t-1; u>=s; u--) 110 if(depend(t, u)) 111 goto no1; 112 stmp = *t; 113 memmove(s+1, s, (uchar*)t - (uchar*)s); 114 *s = stmp; 115 if(!(s->p.mark & LOAD)) 116 break; 117 no1:; 118 } 119 } 120 121 for(s=se; s>=sch; s--) { 122 if(!(s->p.mark & DELAY)) 123 continue; 124 if(s < se) 125 if(!conflict(s, s+1)) 126 goto out3; 127 /* 128 * s is load, s+1 is immediate use of result or end of block 129 * t is the trial instruction to insert between s and s+1 130 */ 131 if(!debug['Y']) 132 for(t=s-1; t>=sch; t--) { 133 if(t->comp) 134 if(s->p.mark & BRANCH) 135 goto no2; 136 if(t->p.mark & DELAY) 137 if(s >= se || conflict(t, s+1)) 138 goto no2; 139 for(u=t+1; u<=s; u++) 140 if(depend(u, t)) 141 goto no2; 142 goto out2; 143 no2:; 144 } 145 if(debug['X']) 146 Bprint(&bso, "?l%P\n", &s->p); 147 s->nop = 1; 148 if(debug['v']) { 149 if(s->p.mark & LOAD) { 150 nop.load.count++; 151 nop.load.outof++; 152 } 153 if(s->p.mark & BRANCH) { 154 nop.branch.count++; 155 nop.branch.outof++; 156 } 157 if(s->p.mark & FCMP) { 158 nop.fcmp.count++; 159 nop.fcmp.outof++; 160 } 161 } 162 continue; 163 164 out2: 165 if(debug['X']) { 166 Bprint(&bso, "!l%P\n", &t->p); 167 Bprint(&bso, "%P\n", &s->p); 168 } 169 stmp = *t; 170 memmove(t, t+1, (uchar*)s - (uchar*)t); 171 *s = stmp; 172 s--; 173 174 out3: 175 if(debug['v']) { 176 if(s->p.mark & LOAD) 177 nop.load.outof++; 178 if(s->p.mark & BRANCH) 179 nop.branch.outof++; 180 if(s->p.mark & FCMP) 181 nop.fcmp.outof++; 182 } 183 } 184 185 /* Avoid HI/LO use->set */ 186 t = sch+1; 187 for(s=sch; s<se-1; s++, t++) { 188 if((s->used.cc & E_HILO) == 0) 189 continue; 190 if(t->set.cc & E_HILO) 191 s->nop = 2; 192 } 193 194 /* 195 * put it all back 196 */ 197 for(s=sch, p=p0; s<=se; s++, p=q) { 198 q = p->link; 199 if(q != s->p.link) { 200 *p = s->p; 201 p->link = q; 202 } 203 while(s->nop--) 204 addnop(p); 205 } 206 if(debug['X']) { 207 Bprint(&bso, "\n"); 208 Bflush(&bso); 209 } 210 } 211 212 void 213 regsused(Sch *s, Prog *realp) 214 { 215 int c, ar, ad, ld, sz; 216 ulong m; 217 Prog *p; 218 219 p = &s->p; 220 s->comp = compound(p); 221 s->nop = 0; 222 if(s->comp) { 223 s->set.ireg |= 1<<REGTMP; 224 s->used.ireg |= 1<<REGTMP; 225 } 226 227 ar = 0; /* dest is really reference */ 228 ad = 0; /* source/dest is really address */ 229 ld = 0; /* opcode is load instruction */ 230 sz = 20; /* size of load/store for overlap computation */ 231 232 /* 233 * flags based on opcode 234 */ 235 switch(p->as) { 236 case ATEXT: 237 curtext = realp; 238 autosize = p->to.offset + 4; 239 ad = 1; 240 break; 241 case AJAL: 242 c = p->reg; 243 if(c == NREG) 244 c = REGLINK; 245 s->set.ireg |= 1<<c; 246 ar = 1; 247 ad = 1; 248 break; 249 case ABGEZAL: 250 case ABLTZAL: 251 s->set.ireg |= 1<<REGLINK; 252 case ABEQ: 253 case ABGEZ: 254 case ABGTZ: 255 case ABLEZ: 256 case ABLTZ: 257 case ABNE: 258 ar = 1; 259 ad = 1; 260 break; 261 case ABFPT: 262 case ABFPF: 263 ad = 1; 264 s->used.cc |= E_FCR; 265 break; 266 case ACMPEQD: 267 case ACMPEQF: 268 case ACMPGED: 269 case ACMPGEF: 270 case ACMPGTD: 271 case ACMPGTF: 272 ar = 1; 273 s->set.cc |= E_FCR; 274 p->mark |= FCMP; 275 break; 276 case AJMP: 277 ar = 1; 278 ad = 1; 279 break; 280 case AMOVB: 281 case AMOVBU: 282 sz = 1; 283 ld = 1; 284 break; 285 case AMOVH: 286 case AMOVHU: 287 sz = 2; 288 ld = 1; 289 break; 290 case AMOVF: 291 case AMOVW: 292 case AMOVWL: 293 case AMOVWR: 294 sz = 4; 295 ld = 1; 296 break; 297 case AMOVD: 298 case AMOVV: 299 case AMOVVL: 300 case AMOVVR: 301 sz = 8; 302 ld = 1; 303 break; 304 case ADIV: 305 case ADIVU: 306 case AMUL: 307 case AMULU: 308 case AREM: 309 case AREMU: 310 s->set.cc = E_HILO; 311 case AADD: 312 case AADDU: 313 case AAND: 314 case ANOR: 315 case AOR: 316 case ASGT: 317 case ASGTU: 318 case ASLL: 319 case ASRA: 320 case ASRL: 321 case ASUB: 322 case ASUBU: 323 case AXOR: 324 325 case AADDD: 326 case AADDF: 327 case AADDW: 328 case ASUBD: 329 case ASUBF: 330 case ASUBW: 331 case AMULF: 332 case AMULD: 333 case AMULW: 334 case ADIVF: 335 case ADIVD: 336 case ADIVW: 337 if(p->reg == NREG) { 338 if(p->to.type == D_REG || p->to.type == D_FREG) 339 p->reg = p->to.reg; 340 if(p->reg == NREG) 341 print("botch %P\n", p); 342 } 343 break; 344 } 345 346 /* 347 * flags based on 'to' field 348 */ 349 c = p->to.class; 350 if(c == 0) { 351 c = aclass(&p->to) + 1; 352 p->to.class = c; 353 } 354 c--; 355 switch(c) { 356 default: 357 print("unknown class %d %D\n", c, &p->to); 358 359 case C_ZCON: 360 case C_SCON: 361 case C_ADD0CON: 362 case C_AND0CON: 363 case C_ADDCON: 364 case C_ANDCON: 365 case C_UCON: 366 case C_LCON: 367 case C_NONE: 368 case C_SBRA: 369 case C_LBRA: 370 break; 371 372 case C_HI: 373 case C_LO: 374 s->set.cc |= E_HILO; 375 break; 376 case C_FCREG: 377 s->set.cc |= E_FCR; 378 break; 379 case C_MREG: 380 s->set.cc |= E_MCR; 381 break; 382 case C_ZOREG: 383 case C_SOREG: 384 case C_LOREG: 385 c = p->to.reg; 386 s->used.ireg |= 1<<c; 387 if(ad) 388 break; 389 s->size = sz; 390 s->soffset = regoff(&p->to); 391 392 m = ANYMEM; 393 if(c == REGSB) 394 m = E_MEMSB; 395 if(c == REGSP) 396 m = E_MEMSP; 397 398 if(ar) 399 s->used.cc |= m; 400 else 401 s->set.cc |= m; 402 break; 403 case C_SACON: 404 case C_LACON: 405 s->used.ireg |= 1<<REGSP; 406 break; 407 case C_SECON: 408 case C_LECON: 409 s->used.ireg |= 1<<REGSB; 410 break; 411 case C_REG: 412 if(ar) 413 s->used.ireg |= 1<<p->to.reg; 414 else 415 s->set.ireg |= 1<<p->to.reg; 416 break; 417 case C_FREG: 418 /* do better -- determine double prec */ 419 if(ar) { 420 s->used.freg |= 1<<p->to.reg; 421 s->used.freg |= 1<<(p->to.reg|1); 422 } else { 423 s->set.freg |= 1<<p->to.reg; 424 s->set.freg |= 1<<(p->to.reg|1); 425 } 426 if(ld && p->from.type == D_REG) 427 p->mark |= LOAD; 428 break; 429 case C_SAUTO: 430 case C_LAUTO: 431 s->used.ireg |= 1<<REGSP; 432 if(ad) 433 break; 434 s->size = sz; 435 s->soffset = regoff(&p->to); 436 437 if(ar) 438 s->used.cc |= E_MEMSP; 439 else 440 s->set.cc |= E_MEMSP; 441 break; 442 case C_SEXT: 443 case C_LEXT: 444 s->used.ireg |= 1<<REGSB; 445 if(ad) 446 break; 447 s->size = sz; 448 s->soffset = regoff(&p->to); 449 450 if(ar) 451 s->used.cc |= E_MEMSB; 452 else 453 s->set.cc |= E_MEMSB; 454 break; 455 } 456 457 /* 458 * flags based on 'from' field 459 */ 460 c = p->from.class; 461 if(c == 0) { 462 c = aclass(&p->from) + 1; 463 p->from.class = c; 464 } 465 c--; 466 switch(c) { 467 default: 468 print("unknown class %d %D\n", c, &p->from); 469 470 case C_ZCON: 471 case C_SCON: 472 case C_ADD0CON: 473 case C_AND0CON: 474 case C_ADDCON: 475 case C_ANDCON: 476 case C_UCON: 477 case C_LCON: 478 case C_NONE: 479 case C_SBRA: 480 case C_LBRA: 481 break; 482 case C_HI: 483 case C_LO: 484 s->used.cc |= E_HILO; 485 break; 486 case C_FCREG: 487 s->used.cc |= E_FCR; 488 break; 489 case C_MREG: 490 s->used.cc |= E_MCR; 491 break; 492 case C_ZOREG: 493 case C_SOREG: 494 case C_LOREG: 495 c = p->from.reg; 496 s->used.ireg |= 1<<c; 497 if(ld) 498 p->mark |= LOAD; 499 s->size = sz; 500 s->soffset = regoff(&p->from); 501 502 m = ANYMEM; 503 if(c == REGSB) 504 m = E_MEMSB; 505 if(c == REGSP) 506 m = E_MEMSP; 507 508 s->used.cc |= m; 509 break; 510 case C_SACON: 511 case C_LACON: 512 s->used.ireg |= 1<<REGSP; 513 break; 514 case C_SECON: 515 case C_LECON: 516 s->used.ireg |= 1<<REGSB; 517 break; 518 case C_REG: 519 s->used.ireg |= 1<<p->from.reg; 520 break; 521 case C_FREG: 522 /* do better -- determine double prec */ 523 s->used.freg |= 1<<p->from.reg; 524 s->used.freg |= 1<<(p->from.reg|1); 525 if(ld && p->to.type == D_REG) 526 p->mark |= LOAD; 527 break; 528 case C_SAUTO: 529 case C_LAUTO: 530 s->used.ireg |= 1<<REGSP; 531 if(ld) 532 p->mark |= LOAD; 533 if(ad) 534 break; 535 s->size = sz; 536 s->soffset = regoff(&p->from); 537 538 s->used.cc |= E_MEMSP; 539 break; 540 case C_SEXT: 541 case C_LEXT: 542 s->used.ireg |= 1<<REGSB; 543 if(ld) 544 p->mark |= LOAD; 545 if(ad) 546 break; 547 s->size = sz; 548 s->soffset = regoff(&p->from); 549 550 s->used.cc |= E_MEMSB; 551 break; 552 } 553 554 c = p->reg; 555 if(c != NREG) { 556 if(p->from.type == D_FREG || p->to.type == D_FREG) { 557 s->used.freg |= 1<<c; 558 s->used.freg |= 1<<(c|1); 559 } else 560 s->used.ireg |= 1<<c; 561 } 562 s->set.ireg &= ~(1<<REGZERO); /* R0 cant be set */ 563 } 564 565 /* 566 * test to see if 2 instrictions can be 567 * interchanged without changing semantics 568 */ 569 int 570 depend(Sch *sa, Sch *sb) 571 { 572 ulong x; 573 574 if(sa->set.ireg & (sb->set.ireg|sb->used.ireg)) 575 return 1; 576 if(sb->set.ireg & sa->used.ireg) 577 return 1; 578 579 if(sa->set.freg & (sb->set.freg|sb->used.freg)) 580 return 1; 581 if(sb->set.freg & sa->used.freg) 582 return 1; 583 584 /* 585 * special case. 586 * loads from same address cannot pass. 587 * this is for hardware fifo's and the like 588 */ 589 if(sa->used.cc & sb->used.cc & E_MEM) 590 if(sa->p.reg == sb->p.reg) 591 if(regoff(&sa->p.from) == regoff(&sb->p.from)) 592 return 1; 593 594 x = (sa->set.cc & (sb->set.cc|sb->used.cc)) | 595 (sb->set.cc & sa->used.cc); 596 if(x) { 597 /* 598 * allow SB and SP to pass each other. 599 * allow SB to pass SB iff doffsets are ok 600 * anything else conflicts 601 */ 602 if(x != E_MEMSP && x != E_MEMSB) 603 return 1; 604 x = sa->set.cc | sb->set.cc | 605 sa->used.cc | sb->used.cc; 606 if(x & E_MEM) 607 return 1; 608 if(offoverlap(sa, sb)) 609 return 1; 610 } 611 612 return 0; 613 } 614 615 int 616 offoverlap(Sch *sa, Sch *sb) 617 { 618 619 if(sa->soffset < sb->soffset) { 620 if(sa->soffset+sa->size > sb->soffset) 621 return 1; 622 return 0; 623 } 624 if(sb->soffset+sb->size > sa->soffset) 625 return 1; 626 return 0; 627 } 628 629 /* 630 * test 2 adjacent instructions 631 * and find out if inserted instructions 632 * are desired to prevent stalls. 633 */ 634 int 635 conflict(Sch *sa, Sch *sb) 636 { 637 638 if(sa->set.ireg & sb->used.ireg) 639 return 1; 640 if(sa->set.freg & sb->used.freg) 641 return 1; 642 if(sa->set.cc & sb->used.cc) 643 return 1; 644 645 return 0; 646 } 647 648 int 649 compound(Prog *p) 650 { 651 Optab *o; 652 653 o = oplook(p); 654 if(o->size != 4) 655 return 1; 656 if(p->to.type == D_REG && p->to.reg == REGSB) 657 return 1; 658 return 0; 659 } 660 661 void 662 dumpbits(Sch *s, Dep *d) 663 { 664 int i; 665 666 for(i=0; i<32; i++) 667 if(d->ireg & (1<<i)) 668 Bprint(&bso, " R%d", i); 669 for(i=0; i<32; i++) 670 if(d->freg & (1<<i)) 671 Bprint(&bso, " F%d", i); 672 for(i=0; i<32; i++) 673 switch(d->cc & (1<<i)) { 674 default: 675 break; 676 case E_HILO: 677 Bprint(&bso, " HILO"); 678 break; 679 case E_FCR: 680 Bprint(&bso, " FCR"); 681 break; 682 case E_MCR: 683 Bprint(&bso, " MCR"); 684 break; 685 case E_MEM: 686 Bprint(&bso, " MEM%d", s->size); 687 break; 688 case E_MEMSB: 689 Bprint(&bso, " SB%d", s->size); 690 break; 691 case E_MEMSP: 692 Bprint(&bso, " SP%d", s->size); 693 break; 694 } 695 } 696