1 #include "l.h" 2 3 enum 4 { 5 E_ICC = 1<<0, 6 E_FCC = 1<<1, 7 E_MEM = 1<<2, 8 E_MEMSP = 1<<3, /* uses offset and size */ 9 E_MEMSB = 1<<4, /* uses offset and size */ 10 ANYMEM = E_MEM|E_MEMSP|E_MEMSB, 11 ALL = ~0 12 }; 13 14 typedef struct Sch Sch; 15 typedef struct Dep Dep; 16 17 struct Dep 18 { 19 ulong ireg; 20 ulong freg; 21 ulong cc; 22 }; 23 struct Sch 24 { 25 Prog p; 26 Dep set; 27 Dep used; 28 long soffset; 29 char size; 30 char nop; 31 char comp; 32 }; 33 34 void regsused(Sch*, Prog*); 35 int depend(Sch*, Sch*); 36 int conflict(Sch*, Sch*); 37 int offoverlap(Sch*, Sch*); 38 void dumpbits(Sch*, Dep*); 39 40 void 41 sched(Prog *p0, Prog *pe) 42 { 43 Prog *p, *q; 44 Sch sch[NSCHED], *s, *t, *u, *se, stmp; 45 46 /* 47 * build side structure 48 */ 49 s = sch; 50 for(p=p0;; p=p->link) { 51 memset(s, 0, sizeof(*s)); 52 s->p = *p; 53 regsused(s, p); 54 if(debug['X']) { 55 Bprint(&bso, "%P\tset", &s->p); 56 dumpbits(s, &s->set); 57 Bprint(&bso, "; used"); 58 dumpbits(s, &s->used); 59 if(s->comp) 60 Bprint(&bso, "; compound"); 61 if(s->p.mark & LOAD) 62 Bprint(&bso, "; load"); 63 if(s->p.mark & BRANCH) 64 Bprint(&bso, "; branch"); 65 if(s->p.mark & FCMP) 66 Bprint(&bso, "; fcmp"); 67 Bprint(&bso, "\n"); 68 } 69 s++; 70 if(p == pe) 71 break; 72 } 73 se = s; 74 75 for(s=se-1; s>=sch; s--) { 76 /* 77 * branch delay slot 78 */ 79 if(s->p.mark & BRANCH) { 80 /* t is the trial instruction to use */ 81 for(t=s-1; t>=sch; t--) { 82 if(t->comp || (t->p.mark & FCMP)) 83 goto no1; 84 for(u=t+1; u<=s; u++) 85 if(depend(u, t)) 86 goto no1; 87 goto out1; 88 no1:; 89 } 90 if(debug['X']) 91 Bprint(&bso, "?b%P\n", &s->p); 92 s->nop = 1; 93 continue; 94 95 out1: 96 if(debug['X']) { 97 Bprint(&bso, "!b%P\n", &t->p); 98 Bprint(&bso, "%P\n", &s->p); 99 } 100 stmp = *t; 101 memmove(t, t+1, (uchar*)s - (uchar*)t); 102 *s = stmp; 103 s--; 104 continue; 105 } 106 107 /* 108 * load delay. interlocked. 109 */ 110 if(s->p.mark & LOAD) { 111 if(s >= se-1) 112 continue; 113 if(!conflict(s, (s+1))) 114 continue; 115 /* 116 * s is load, s+1 is immediate use of result 117 * t is the trial instruction to insert between s and s+1 118 */ 119 for(t=s-1; t>=sch; t--) { 120 if(t->p.mark & BRANCH) 121 goto no2; 122 if(t->p.mark & FCMP) 123 if((s+1)->p.mark & BRANCH) 124 goto no2; 125 if(t->p.mark & LOAD) 126 if(conflict(t, (s+1))) 127 goto no2; 128 for(u=t+1; u<=s; u++) 129 if(depend(u, t)) 130 goto no2; 131 goto out2; 132 no2:; 133 } 134 if(debug['X']) 135 Bprint(&bso, "?l%P\n", &s->p); 136 continue; 137 out2: 138 if(debug['X']) { 139 Bprint(&bso, "!l%P\n", &t->p); 140 Bprint(&bso, "%P\n", &s->p); 141 } 142 stmp = *t; 143 memmove(t, t+1, (uchar*)s - (uchar*)t); 144 *s = stmp; 145 s--; 146 continue; 147 } 148 149 /* 150 * fop2 delay. 151 */ 152 if(s->p.mark & FCMP) { 153 if(s >= se-1) { 154 s->nop = 1; 155 continue; 156 } 157 if(!((s+1)->p.mark & BRANCH)) 158 continue; 159 /* t is the trial instruction to use */ 160 for(t=s-1; t>=sch; t--) { 161 for(u=t+1; u<=s; u++) 162 if(depend(u, t)) 163 goto no3; 164 goto out3; 165 no3:; 166 } 167 if(debug['X']) 168 Bprint(&bso, "?f%P\n", &s->p); 169 s->nop = 1; 170 continue; 171 out3: 172 if(debug['X']) { 173 Bprint(&bso, "!f%P\n", &t->p); 174 Bprint(&bso, "%P\n", &s->p); 175 } 176 stmp = *t; 177 memmove(t, t+1, (uchar*)s - (uchar*)t); 178 *s = stmp; 179 s--; 180 continue; 181 } 182 } 183 184 /* 185 * put it all back 186 */ 187 for(s=sch, p=p0; s<se; s++, p=q) { 188 q = p->link; 189 if(q != s->p.link) { 190 *p = s->p; 191 p->link = q; 192 } 193 if(s->nop) 194 addnop(p); 195 } 196 if(debug['X']) 197 Bprint(&bso, "\n"); 198 } 199 200 void 201 regsused(Sch *s, Prog *realp) 202 { 203 int c, ar, ad, ld, sz; 204 ulong m; 205 Prog *p; 206 207 p = &s->p; 208 s->comp = compound(p); 209 s->nop = 0; 210 if(s->comp) { 211 s->set.ireg |= 1<<REGTMP; 212 s->used.ireg |= 1<<REGTMP; 213 } 214 ar = 0; /* dest is really reference */ 215 ad = 0; /* source/dest is really address */ 216 ld = 0; /* opcode is load instruction */ 217 sz = 20; /* size of load/store for overlap computation */ 218 219 /* 220 * flags based on opcode 221 */ 222 switch(p->as) { 223 case ATEXT: 224 curtext = realp; 225 autosize = p->to.offset + 4; 226 ad = 1; 227 break; 228 case AJMPL: 229 c = p->reg; 230 if(c == NREG) 231 c = REGLINK; 232 s->set.ireg |= 1<<c; 233 ar = 1; 234 ad = 1; 235 break; 236 case AJMP: 237 ar = 1; 238 ad = 1; 239 break; 240 case ACMP: 241 s->set.cc |= E_ICC; 242 ar = 1; 243 break; 244 case AFCMPD: 245 case AFCMPED: 246 case AFCMPEF: 247 case AFCMPEX: 248 case AFCMPF: 249 case AFCMPX: 250 s->set.cc |= E_FCC; 251 ar = 1; 252 break; 253 case ABE: 254 case ABNE: 255 case ABLE: 256 case ABG: 257 case ABL: 258 case ABGE: 259 case ABLEU: 260 case ABGU: 261 case ABCS: 262 case ABCC: 263 case ABNEG: 264 case ABPOS: 265 case ABVC: 266 case ABVS: 267 s->used.cc |= E_ICC; 268 ar = 1; 269 break; 270 case AFBE: 271 case AFBLG: 272 case AFBG: 273 case AFBLE: 274 case AFBGE: 275 case AFBL: 276 s->used.cc |= E_FCC; 277 ar = 1; 278 break; 279 case AMOVB: 280 case AMOVBU: 281 sz = 1; 282 ld = 1; 283 break; 284 case AMOVH: 285 case AMOVHU: 286 sz = 2; 287 ld = 1; 288 break; 289 case AFMOVF: 290 case AMOVW: 291 sz = 4; 292 ld = 1; 293 break; 294 case AMOVD: 295 case AFMOVD: 296 sz = 8; 297 ld = 1; 298 break; 299 case AFMOVX: /* gok */ 300 sz = 16; 301 ld = 1; 302 break; 303 case AADDCC: 304 case AADDXCC: 305 case AANDCC: 306 case AANDNCC: 307 case AORCC: 308 case AORNCC: 309 case ASUBCC: 310 case ASUBXCC: 311 case ATADDCC: 312 case ATADDCCTV: 313 case ATSUBCC: 314 case ATSUBCCTV: 315 case AXNORCC: 316 case AXORCC: 317 s->set.cc |= E_ICC; 318 break; 319 case ADIV: 320 case ADIVL: 321 case AMOD: 322 case AMODL: 323 case AMUL: 324 case AMULSCC: 325 case ATAS: 326 s->set.ireg = ALL; 327 s->set.freg = ALL; 328 s->set.cc = ALL; 329 break; 330 } 331 332 /* 333 * flags based on 'to' field 334 */ 335 c = p->to.class; 336 if(c == 0) { 337 c = aclass(&p->to) + 1; 338 p->to.class = c; 339 } 340 c--; 341 switch(c) { 342 default: 343 print("unknown class %d %D\n", c, &p->to); 344 345 case C_ZCON: 346 case C_SCON: 347 case C_UCON: 348 case C_LCON: 349 case C_NONE: 350 case C_SBRA: 351 case C_LBRA: 352 break; 353 case C_CREG: 354 case C_FSR: 355 case C_FQ: 356 case C_PREG: 357 s->set.ireg = ALL; 358 s->set.freg = ALL; 359 s->set.cc = ALL; 360 break; 361 case C_ZOREG: 362 case C_SOREG: 363 case C_LOREG: 364 case C_ASI: 365 c = p->to.reg; 366 s->used.ireg |= 1<<c; 367 if(ad) 368 break; 369 s->size = sz; 370 s->soffset = regoff(&p->to); 371 372 m = ANYMEM; 373 if(c == REGSB) 374 m = E_MEMSB; 375 if(c == REGSP) 376 m = E_MEMSP; 377 378 if(ar) 379 s->used.cc |= m; 380 else 381 s->set.cc |= m; 382 break; 383 case C_SACON: 384 case C_LACON: 385 s->used.ireg |= 1<<REGSP; 386 break; 387 case C_SECON: 388 case C_LECON: 389 s->used.ireg |= 1<<REGSB; 390 break; 391 case C_REG: 392 if(ar) 393 s->used.ireg |= 1<<p->to.reg; 394 else 395 s->set.ireg |= 1<<p->to.reg; 396 break; 397 case C_FREG: 398 /* do better -- determine double prec */ 399 if(ar) { 400 s->used.freg |= 1<<p->to.reg; 401 s->used.freg |= 1<<(p->to.reg|1); 402 } else { 403 s->set.freg |= 1<<p->to.reg; 404 s->set.freg |= 1<<(p->to.reg|1); 405 } 406 break; 407 case C_SAUTO: 408 case C_LAUTO: 409 case C_ESAUTO: 410 case C_OSAUTO: 411 case C_ELAUTO: 412 case C_OLAUTO: 413 s->used.ireg |= 1<<REGSP; 414 if(ad) 415 break; 416 s->size = sz; 417 s->soffset = regoff(&p->to); 418 419 if(ar) 420 s->used.cc |= E_MEMSP; 421 else 422 s->set.cc |= E_MEMSP; 423 break; 424 case C_SEXT: 425 case C_LEXT: 426 case C_ESEXT: 427 case C_OSEXT: 428 case C_ELEXT: 429 case C_OLEXT: 430 s->used.ireg |= 1<<REGSB; 431 if(ad) 432 break; 433 s->size = sz; 434 s->soffset = regoff(&p->to); 435 436 if(ar) 437 s->used.cc |= E_MEMSB; 438 else 439 s->set.cc |= E_MEMSB; 440 break; 441 } 442 443 /* 444 * flags based on 'from' field 445 */ 446 c = p->from.class; 447 if(c == 0) { 448 c = aclass(&p->from) + 1; 449 p->from.class = c; 450 } 451 c--; 452 switch(c) { 453 default: 454 print("unknown class %d %D\n", c, &p->from); 455 456 case C_ZCON: 457 case C_SCON: 458 case C_UCON: 459 case C_LCON: 460 case C_NONE: 461 case C_SBRA: 462 case C_LBRA: 463 c = p->from.reg; 464 if(c != NREG) 465 s->used.ireg |= 1<<c; 466 break; 467 case C_CREG: 468 case C_FSR: 469 case C_FQ: 470 case C_PREG: 471 s->set.ireg = ALL; 472 s->set.freg = ALL; 473 s->set.cc = ALL; 474 break; 475 case C_ZOREG: 476 case C_SOREG: 477 case C_LOREG: 478 case C_ASI: 479 c = p->from.reg; 480 s->used.ireg |= 1<<c; 481 if(ld) 482 p->mark |= LOAD; 483 if(ad) 484 break; 485 s->size = sz; 486 s->soffset = regoff(&p->from); 487 488 m = ANYMEM; 489 if(c == REGSB) 490 m = E_MEMSB; 491 if(c == REGSP) 492 m = E_MEMSP; 493 494 s->used.cc |= m; 495 break; 496 case C_SACON: 497 case C_LACON: 498 s->used.ireg |= 1<<REGSP; 499 break; 500 case C_SECON: 501 case C_LECON: 502 s->used.ireg |= 1<<REGSB; 503 break; 504 case C_REG: 505 s->used.ireg |= 1<<p->from.reg; 506 break; 507 case C_FREG: 508 /* do better -- determine double prec */ 509 s->used.freg |= 1<<p->from.reg; 510 s->used.freg |= 1<<(p->from.reg|1); 511 break; 512 case C_SAUTO: 513 case C_LAUTO: 514 case C_ESAUTO: 515 case C_ELAUTO: 516 case C_OSAUTO: 517 case C_OLAUTO: 518 s->used.ireg |= 1<<REGSP; 519 if(ld) 520 p->mark |= LOAD; 521 if(ad) 522 break; 523 s->size = sz; 524 s->soffset = regoff(&p->from); 525 526 s->used.cc |= E_MEMSP; 527 break; 528 case C_SEXT: 529 case C_LEXT: 530 case C_ESEXT: 531 case C_ELEXT: 532 case C_OSEXT: 533 case C_OLEXT: 534 s->used.ireg |= 1<<REGSB; 535 if(ld) 536 p->mark |= LOAD; 537 if(ad) 538 break; 539 s->size = sz; 540 s->soffset = regoff(&p->from); 541 542 s->used.cc |= E_MEMSB; 543 break; 544 } 545 546 c = p->reg; 547 if(c != NREG) { 548 if(p->from.type == D_FREG || p->to.type == D_FREG) { 549 s->used.freg |= 1<<c; 550 s->used.freg |= 1<<(c|1); 551 } else 552 s->used.ireg |= 1<<c; 553 } 554 s->set.ireg &= ~(1<<0); /* R0 cant be set */ 555 } 556 557 /* 558 * test to see if 2 instrictions can be 559 * interchanged without changing semantics 560 */ 561 int 562 depend(Sch *sa, Sch *sb) 563 { 564 ulong x; 565 566 if(sa->set.ireg & (sb->set.ireg|sb->used.ireg)) 567 return 1; 568 if(sb->set.ireg & sa->used.ireg) 569 return 1; 570 571 if(sa->set.freg & (sb->set.freg|sb->used.freg)) 572 return 1; 573 if(sb->set.freg & sa->used.freg) 574 return 1; 575 576 x = (sa->set.cc & (sb->set.cc|sb->used.cc)) | 577 (sb->set.cc & sa->used.cc); 578 if(x) { 579 /* 580 * allow SB and SP to pass each other. 581 * allow SB to pass SB iff doffsets are ok 582 * anything else conflicts 583 */ 584 if(x != E_MEMSP && x != E_MEMSB) 585 return 1; 586 x = sa->set.cc | sb->set.cc | 587 sa->used.cc | sb->used.cc; 588 if(x & E_MEM) 589 return 1; 590 if(offoverlap(sa, sb)) 591 return 1; 592 } 593 594 return 0; 595 } 596 597 int 598 offoverlap(Sch *sa, Sch *sb) 599 { 600 601 if(sa->soffset < sb->soffset) { 602 if(sa->soffset+sa->size > sb->soffset) 603 return 1; 604 return 0; 605 } 606 if(sb->soffset+sb->size > sa->soffset) 607 return 1; 608 return 0; 609 } 610 611 /* 612 * test 2 adjacent instructions 613 * and find out if inserted instructions 614 * are desired to prevent stalls. 615 * first instruction is a load instruction. 616 */ 617 int 618 conflict(Sch *sa, Sch *sb) 619 { 620 621 if(sa->set.ireg & sb->used.ireg) 622 return 1; 623 if(sa->set.freg & sb->used.freg) 624 return 1; 625 return 0; 626 } 627 628 int 629 compound(Prog *p) 630 { 631 Optab *o; 632 633 o = oplook(p); 634 if(o->size != 4) 635 return 1; 636 if(p->to.type == D_REG && p->to.reg == REGSB) 637 return 1; 638 return 0; 639 } 640 641 void 642 dumpbits(Sch *s, Dep *d) 643 { 644 int i; 645 646 for(i=0; i<32; i++) 647 if(d->ireg & (1<<i)) 648 Bprint(&bso, " R%d", i); 649 for(i=0; i<32; i++) 650 if(d->freg & (1<<i)) 651 Bprint(&bso, " F%d", i); 652 for(i=0; i<32; i++) 653 switch(d->cc & (1<<i)) { 654 default: 655 break; 656 case E_ICC: 657 Bprint(&bso, " ICC"); 658 break; 659 case E_FCC: 660 Bprint(&bso, " FCC"); 661 break; 662 case E_MEM: 663 Bprint(&bso, " MEM%d", s->size); 664 break; 665 case E_MEMSB: 666 Bprint(&bso, " SB%d", s->size); 667 break; 668 case E_MEMSP: 669 Bprint(&bso, " SP%d", s->size); 670 break; 671 } 672 } 673