1 #include "l.h" 2 3 enum 4 { 5 E_HILO = 1<<0, 6 E_FCR = 1<<1, 7 E_MCR = 1<<2, 8 E_MEM = 1<<3, 9 E_MEMSP = 1<<4, /* uses offset and size */ 10 E_MEMSB = 1<<5, /* uses offset and size */ 11 ANYMEM = E_MEM|E_MEMSP|E_MEMSB, 12 DELAY = BRANCH|LOAD|FCMP, 13 }; 14 15 typedef struct Sch Sch; 16 typedef struct Dep Dep; 17 18 struct Dep 19 { 20 ulong ireg; 21 ulong freg; 22 ulong cc; 23 }; 24 struct Sch 25 { 26 Prog p; 27 Dep set; 28 Dep used; 29 long offset; 30 char size; 31 char nop; 32 char comp; 33 }; 34 35 void regused(Sch*, Prog*); 36 int depend(Sch*, Sch*); 37 int conflict(Sch*, Sch*); 38 int offoverlap(Sch*, Sch*); 39 void dumpbits(Sch*, Dep*); 40 41 void 42 sched(Prog *p0, Prog *pe) 43 { 44 Prog *p, *q; 45 Sch sch[NSCHED], *s, *t, *u, *se, stmp; 46 47 /* 48 * build side structure 49 */ 50 s = sch; 51 for(p=p0;; p=p->link) { 52 memset(s, 0, sizeof(*s)); 53 s->p = *p; 54 regused(s, p); 55 if(debug['X']) { 56 Bprint(&bso, "%P%|set", &s->p, 40); 57 dumpbits(s, &s->set); 58 Bprint(&bso, "; used"); 59 dumpbits(s, &s->used); 60 if(s->comp) 61 Bprint(&bso, "; compound"); 62 if(s->p.mark & LOAD) 63 Bprint(&bso, "; load"); 64 if(s->p.mark & BRANCH) 65 Bprint(&bso, "; branch"); 66 if(s->p.mark & FCMP) 67 Bprint(&bso, "; fcmp"); 68 Bprint(&bso, "\n"); 69 } 70 if(p == pe) 71 break; 72 s++; 73 } 74 se = s; 75 76 /* 77 * prepass to move things around 78 * does nothing, but tries to make 79 * the actual scheduler work better 80 */ 81 for(s=sch; s<=se; s++) { 82 if(!(s->p.mark & LOAD)) 83 continue; 84 /* always good to put nonconflict loads together */ 85 for(t=s+1; t<=se; t++) { 86 if(!(t->p.mark & LOAD)) 87 continue; 88 if(t->p.mark & BRANCH) 89 break; 90 if(conflict(s, t)) 91 break; 92 for(u=t-1; u>s; u--) 93 if(depend(u, t)) 94 goto no11; 95 u = s+1; 96 stmp = *t; 97 memmove(s+2, u, (uchar*)t - (uchar*)u); 98 *u = stmp; 99 break; 100 } 101 no11: 102 103 /* put schedule fodder above load */ 104 for(t=s+1; t<=se; t++) { 105 if(t->p.mark & BRANCH) 106 break; 107 if(s > sch && conflict(s-1, t)) 108 continue; 109 for(u=t-1; u>=s; u--) 110 if(depend(t, u)) 111 goto no1; 112 stmp = *t; 113 memmove(s+1, s, (uchar*)t - (uchar*)s); 114 *s = stmp; 115 if(!(s->p.mark & LOAD)) 116 break; 117 no1:; 118 } 119 no12:; 120 } 121 122 for(s=se; s>=sch; s--) { 123 if(!(s->p.mark & DELAY)) 124 continue; 125 if(s < se) 126 if(!conflict(s, s+1)) 127 goto out3; 128 /* 129 * s is load, s+1 is immediate use of result or end of block 130 * t is the trial instruction to insert between s and s+1 131 */ 132 if(!debug['Y']) 133 for(t=s-1; t>=sch; t--) { 134 if(t->comp) 135 if(s->p.mark & BRANCH) 136 goto no2; 137 if(t->p.mark & DELAY) 138 if(s >= se || conflict(t, s+1)) 139 goto no2; 140 for(u=t+1; u<=s; u++) 141 if(depend(u, t)) 142 goto no2; 143 goto out2; 144 no2:; 145 } 146 if(debug['X']) 147 Bprint(&bso, "?l%P\n", &s->p); 148 s->nop = 1; 149 if(debug['v']) { 150 if(s->p.mark & LOAD) { 151 nop.load.count++; 152 nop.load.outof++; 153 } 154 if(s->p.mark & BRANCH) { 155 nop.branch.count++; 156 nop.branch.outof++; 157 } 158 if(s->p.mark & FCMP) { 159 nop.fcmp.count++; 160 nop.fcmp.outof++; 161 } 162 } 163 continue; 164 165 out2: 166 if(debug['X']) { 167 Bprint(&bso, "!l%P\n", &t->p); 168 Bprint(&bso, "%P\n", &s->p); 169 } 170 stmp = *t; 171 memmove(t, t+1, (uchar*)s - (uchar*)t); 172 *s = stmp; 173 s--; 174 175 out3: 176 if(debug['v']) { 177 if(s->p.mark & LOAD) 178 nop.load.outof++; 179 if(s->p.mark & BRANCH) 180 nop.branch.outof++; 181 if(s->p.mark & FCMP) 182 nop.fcmp.outof++; 183 } 184 } 185 186 /* Avoid HI/LO use->set */ 187 t = sch+1; 188 for(s=sch; s<se-1; s++, t++) { 189 if((s->used.cc & E_HILO) == 0) 190 continue; 191 if(t->set.cc & E_HILO) 192 s->nop = 2; 193 } 194 195 /* 196 * put it all back 197 */ 198 for(s=sch, p=p0; s<=se; s++, p=q) { 199 q = p->link; 200 if(q != s->p.link) { 201 *p = s->p; 202 p->link = q; 203 } 204 while(s->nop--) 205 addnop(p); 206 } 207 if(debug['X']) { 208 Bprint(&bso, "\n"); 209 Bflush(&bso); 210 } 211 } 212 213 void 214 regused(Sch *s, Prog *realp) 215 { 216 int c, ar, ad, ld, sz; 217 ulong m; 218 Prog *p; 219 220 p = &s->p; 221 s->comp = compound(p); 222 s->nop = 0; 223 if(s->comp) { 224 s->set.ireg |= 1<<REGTMP; 225 s->used.ireg |= 1<<REGTMP; 226 } 227 228 ar = 0; /* dest is really reference */ 229 ad = 0; /* source/dest is really address */ 230 ld = 0; /* opcode is load instruction */ 231 sz = 20; /* size of load/store for overlap computation */ 232 233 /* 234 * flags based on opcode 235 */ 236 switch(p->as) { 237 case ATEXT: 238 curtext = realp; 239 autosize = p->to.offset + 4; 240 ad = 1; 241 break; 242 case AJAL: 243 c = p->reg; 244 if(c == NREG) 245 c = REGLINK; 246 s->set.ireg |= 1<<c; 247 ar = 1; 248 ad = 1; 249 break; 250 case ABGEZAL: 251 case ABLTZAL: 252 s->set.ireg |= 1<<REGLINK; 253 case ABEQ: 254 case ABGEZ: 255 case ABGTZ: 256 case ABLEZ: 257 case ABLTZ: 258 case ABNE: 259 ar = 1; 260 ad = 1; 261 break; 262 case ABFPT: 263 case ABFPF: 264 ad = 1; 265 s->used.cc |= E_FCR; 266 break; 267 case ACMPEQD: 268 case ACMPEQF: 269 case ACMPGED: 270 case ACMPGEF: 271 case ACMPGTD: 272 case ACMPGTF: 273 ar = 1; 274 s->set.cc |= E_FCR; 275 p->mark |= FCMP; 276 break; 277 case AJMP: 278 ar = 1; 279 ad = 1; 280 break; 281 case AMOVB: 282 case AMOVBU: 283 sz = 1; 284 ld = 1; 285 break; 286 case AMOVH: 287 case AMOVHU: 288 sz = 2; 289 ld = 1; 290 break; 291 case AMOVF: 292 case AMOVW: 293 case AMOVWL: 294 case AMOVWR: 295 sz = 4; 296 ld = 1; 297 break; 298 case AMOVD: 299 case AMOVV: 300 case AMOVVL: 301 case AMOVVR: 302 sz = 8; 303 ld = 1; 304 break; 305 case ADIV: 306 case ADIVU: 307 case AMUL: 308 case AMULU: 309 case AREM: 310 case AREMU: 311 s->set.cc = E_HILO; 312 case AADD: 313 case AADDU: 314 case AAND: 315 case ANOR: 316 case AOR: 317 case ASGT: 318 case ASGTU: 319 case ASLL: 320 case ASRA: 321 case ASRL: 322 case ASUB: 323 case ASUBU: 324 case AXOR: 325 326 case AADDD: 327 case AADDF: 328 case AADDW: 329 case ASUBD: 330 case ASUBF: 331 case ASUBW: 332 case AMULF: 333 case AMULD: 334 case AMULW: 335 case ADIVF: 336 case ADIVD: 337 case ADIVW: 338 if(p->reg == NREG) { 339 if(p->to.type == D_REG || p->to.type == D_FREG) 340 p->reg = p->to.reg; 341 if(p->reg == NREG) 342 print("botch %P\n", p); 343 } 344 break; 345 } 346 347 /* 348 * flags based on 'to' field 349 */ 350 c = p->to.class; 351 if(c == 0) { 352 c = aclass(&p->to) + 1; 353 p->to.class = c; 354 } 355 c--; 356 switch(c) { 357 default: 358 print("unknown class %d %D\n", c, &p->to); 359 360 case C_ZCON: 361 case C_SCON: 362 case C_ADD0CON: 363 case C_AND0CON: 364 case C_ADDCON: 365 case C_ANDCON: 366 case C_UCON: 367 case C_LCON: 368 case C_NONE: 369 case C_SBRA: 370 case C_LBRA: 371 break; 372 373 case C_HI: 374 case C_LO: 375 s->set.cc |= E_HILO; 376 break; 377 case C_FCREG: 378 s->set.cc |= E_FCR; 379 break; 380 case C_MREG: 381 s->set.cc |= E_MCR; 382 break; 383 case C_ZOREG: 384 case C_SOREG: 385 case C_LOREG: 386 c = p->to.reg; 387 s->used.ireg |= 1<<c; 388 if(ad) 389 break; 390 s->size = sz; 391 s->offset = regoff(&p->to); 392 393 m = ANYMEM; 394 if(c == REGSB) 395 m = E_MEMSB; 396 if(c == REGSP) 397 m = E_MEMSP; 398 399 if(ar) 400 s->used.cc |= m; 401 else 402 s->set.cc |= m; 403 break; 404 case C_SACON: 405 case C_LACON: 406 s->used.ireg |= 1<<REGSP; 407 break; 408 case C_SECON: 409 case C_LECON: 410 s->used.ireg |= 1<<REGSB; 411 break; 412 case C_REG: 413 if(ar) 414 s->used.ireg |= 1<<p->to.reg; 415 else 416 s->set.ireg |= 1<<p->to.reg; 417 break; 418 case C_FREG: 419 /* do better -- determine double prec */ 420 if(ar) { 421 s->used.freg |= 1<<p->to.reg; 422 s->used.freg |= 1<<(p->to.reg|1); 423 } else { 424 s->set.freg |= 1<<p->to.reg; 425 s->set.freg |= 1<<(p->to.reg|1); 426 } 427 if(ld && p->from.type == D_REG) 428 p->mark |= LOAD; 429 break; 430 case C_SAUTO: 431 case C_LAUTO: 432 s->used.ireg |= 1<<REGSP; 433 if(ad) 434 break; 435 s->size = sz; 436 s->offset = regoff(&p->to); 437 438 if(ar) 439 s->used.cc |= E_MEMSP; 440 else 441 s->set.cc |= E_MEMSP; 442 break; 443 case C_SEXT: 444 case C_LEXT: 445 s->used.ireg |= 1<<REGSB; 446 if(ad) 447 break; 448 s->size = sz; 449 s->offset = regoff(&p->to); 450 451 if(ar) 452 s->used.cc |= E_MEMSB; 453 else 454 s->set.cc |= E_MEMSB; 455 break; 456 } 457 458 /* 459 * flags based on 'from' field 460 */ 461 c = p->from.class; 462 if(c == 0) { 463 c = aclass(&p->from) + 1; 464 p->from.class = c; 465 } 466 c--; 467 switch(c) { 468 default: 469 print("unknown class %d %D\n", c, &p->from); 470 471 case C_ZCON: 472 case C_SCON: 473 case C_ADD0CON: 474 case C_AND0CON: 475 case C_ADDCON: 476 case C_ANDCON: 477 case C_UCON: 478 case C_LCON: 479 case C_NONE: 480 case C_SBRA: 481 case C_LBRA: 482 break; 483 case C_HI: 484 case C_LO: 485 s->used.cc |= E_HILO; 486 break; 487 case C_FCREG: 488 s->used.cc |= E_FCR; 489 break; 490 case C_MREG: 491 s->used.cc |= E_MCR; 492 break; 493 case C_ZOREG: 494 case C_SOREG: 495 case C_LOREG: 496 c = p->from.reg; 497 s->used.ireg |= 1<<c; 498 if(ld) 499 p->mark |= LOAD; 500 s->size = sz; 501 s->offset = regoff(&p->from); 502 503 m = ANYMEM; 504 if(c == REGSB) 505 m = E_MEMSB; 506 if(c == REGSP) 507 m = E_MEMSP; 508 509 s->used.cc |= m; 510 break; 511 case C_SACON: 512 case C_LACON: 513 s->used.ireg |= 1<<REGSP; 514 break; 515 case C_SECON: 516 case C_LECON: 517 s->used.ireg |= 1<<REGSB; 518 break; 519 case C_REG: 520 s->used.ireg |= 1<<p->from.reg; 521 break; 522 case C_FREG: 523 /* do better -- determine double prec */ 524 s->used.freg |= 1<<p->from.reg; 525 s->used.freg |= 1<<(p->from.reg|1); 526 if(ld && p->to.type == D_REG) 527 p->mark |= LOAD; 528 break; 529 case C_SAUTO: 530 case C_LAUTO: 531 s->used.ireg |= 1<<REGSP; 532 if(ld) 533 p->mark |= LOAD; 534 if(ad) 535 break; 536 s->size = sz; 537 s->offset = regoff(&p->from); 538 539 s->used.cc |= E_MEMSP; 540 break; 541 case C_SEXT: 542 case C_LEXT: 543 s->used.ireg |= 1<<REGSB; 544 if(ld) 545 p->mark |= LOAD; 546 if(ad) 547 break; 548 s->size = sz; 549 s->offset = regoff(&p->from); 550 551 s->used.cc |= E_MEMSB; 552 break; 553 } 554 555 c = p->reg; 556 if(c != NREG) { 557 if(p->from.type == D_FREG || p->to.type == D_FREG) { 558 s->used.freg |= 1<<c; 559 s->used.freg |= 1<<(c|1); 560 } else 561 s->used.ireg |= 1<<c; 562 } 563 s->set.ireg &= ~(1<<REGZERO); /* R0 cant be set */ 564 } 565 566 /* 567 * test to see if 2 instrictions can be 568 * interchanged without changing semantics 569 */ 570 int 571 depend(Sch *sa, Sch *sb) 572 { 573 ulong x; 574 575 if(sa->set.ireg & (sb->set.ireg|sb->used.ireg)) 576 return 1; 577 if(sb->set.ireg & sa->used.ireg) 578 return 1; 579 580 if(sa->set.freg & (sb->set.freg|sb->used.freg)) 581 return 1; 582 if(sb->set.freg & sa->used.freg) 583 return 1; 584 585 /* 586 * special case. 587 * loads from same address cannot pass. 588 * this is for hardware fifo's and the like 589 */ 590 if(sa->used.cc & sb->used.cc & E_MEM) 591 if(sa->p.reg == sb->p.reg) 592 if(regoff(&sa->p.from) == regoff(&sb->p.from)) 593 return 1; 594 595 x = (sa->set.cc & (sb->set.cc|sb->used.cc)) | 596 (sb->set.cc & sa->used.cc); 597 if(x) { 598 /* 599 * allow SB and SP to pass each other. 600 * allow SB to pass SB iff doffsets are ok 601 * anything else conflicts 602 */ 603 if(x != E_MEMSP && x != E_MEMSB) 604 return 1; 605 x = sa->set.cc | sb->set.cc | 606 sa->used.cc | sb->used.cc; 607 if(x & E_MEM) 608 return 1; 609 if(offoverlap(sa, sb)) 610 return 1; 611 } 612 613 return 0; 614 } 615 616 int 617 offoverlap(Sch *sa, Sch *sb) 618 { 619 620 if(sa->offset < sb->offset) { 621 if(sa->offset+sa->size > sb->offset) 622 return 1; 623 return 0; 624 } 625 if(sb->offset+sb->size > sa->offset) 626 return 1; 627 return 0; 628 } 629 630 /* 631 * test 2 adjacent instructions 632 * and find out if inserted instructions 633 * are desired to prevent stalls. 634 */ 635 int 636 conflict(Sch *sa, Sch *sb) 637 { 638 639 if(sa->set.ireg & sb->used.ireg) 640 return 1; 641 if(sa->set.freg & sb->used.freg) 642 return 1; 643 if(sa->set.cc & sb->used.cc) 644 return 1; 645 646 return 0; 647 } 648 649 int 650 compound(Prog *p) 651 { 652 Optab *o; 653 654 o = oplook(p); 655 if(o->size != 4) 656 return 1; 657 if(p->to.type == D_REG && p->to.reg == REGSB) 658 return 1; 659 return 0; 660 } 661 662 void 663 dumpbits(Sch *s, Dep *d) 664 { 665 int i; 666 667 for(i=0; i<32; i++) 668 if(d->ireg & (1<<i)) 669 Bprint(&bso, " R%d", i); 670 for(i=0; i<32; i++) 671 if(d->freg & (1<<i)) 672 Bprint(&bso, " F%d", i); 673 for(i=0; i<32; i++) 674 switch(d->cc & (1<<i)) { 675 default: 676 break; 677 case E_HILO: 678 Bprint(&bso, " HILO"); 679 break; 680 case E_FCR: 681 Bprint(&bso, " FCR"); 682 break; 683 case E_MCR: 684 Bprint(&bso, " MCR"); 685 break; 686 case E_MEM: 687 Bprint(&bso, " MEM%d", s->size); 688 break; 689 case E_MEMSB: 690 Bprint(&bso, " SB%d", s->size); 691 break; 692 case E_MEMSP: 693 Bprint(&bso, " SP%d", s->size); 694 break; 695 } 696 } 697