1 #include "l.h" 2 3 enum 4 { 5 E_MEM = 1<<3, 6 E_MEMSP = 1<<4, /* uses offset and size */ 7 E_MEMSB = 1<<5, /* uses offset and size */ 8 ANYMEM = E_MEM|E_MEMSP|E_MEMSB, 9 DELAY = BRANCH|LOAD|FCMP, 10 11 NRWORD = 8, 12 isdouble = 0, 13 }; 14 15 typedef struct Sch Sch; 16 typedef struct Dep Dep; 17 18 struct Dep 19 { 20 ulong ireg[NRWORD]; 21 ulong cc; 22 }; 23 struct Sch 24 { 25 Prog p; 26 Dep set; 27 Dep used; 28 long offset; 29 char size; 30 char nop; 31 char comp; 32 }; 33 34 void bitset(ulong*, int); 35 int bittest(ulong*, int); 36 37 void regused(Sch*, Prog*); 38 int depend(Sch*, Sch*); 39 int conflict(Sch*, Sch*); 40 int offoverlap(Sch*, Sch*); 41 void dumpbits(Sch*, Dep*); 42 43 void 44 sched(Prog *p0, Prog *pe) 45 { 46 Prog *p, *q; 47 Sch sch[NSCHED], *s, *t, *u, *se, stmp; 48 49 /* 50 * build side structure 51 */ 52 s = sch; 53 for(p=p0;; p=p->link) { 54 memset(s, 0, sizeof(*s)); 55 s->p = *p; 56 regused(s, p); 57 if(debug['X']) { 58 Bprint(&bso, "%P\t\tmark %x set", &s->p, s->p.mark); 59 dumpbits(s, &s->set); 60 Bprint(&bso, "; used"); 61 dumpbits(s, &s->used); 62 if(s->comp) 63 Bprint(&bso, "; compound"); 64 if(s->p.mark & LOAD) 65 Bprint(&bso, "; load"); 66 if(s->p.mark & BRANCH) 67 Bprint(&bso, "; branch"); 68 if(s->p.mark & FCMP) 69 Bprint(&bso, "; fcmp"); 70 Bprint(&bso, "\n"); 71 } 72 if(p == pe) 73 break; 74 s++; 75 } 76 se = s; 77 78 /* 79 * prepass to move things around 80 * does nothing, but tries to make 81 * the actual scheduler work better 82 */ 83 if(!debug['Y']) 84 for(s=sch; s<=se; s++) { 85 if(!(s->p.mark & LOAD)) 86 continue; 87 /* always good to put nonconflict loads together */ 88 for(t=s+1; t<=se; t++) { 89 if(!(t->p.mark & LOAD)) 90 continue; 91 if(t->p.mark & BRANCH) 92 break; 93 if(conflict(s, t)) 94 break; 95 for(u=t-1; u>s; u--) 96 if(depend(u, t)) 97 goto no11; 98 u = s+1; 99 stmp = *t; 100 memmove(s+2, u, (uchar*)t - (uchar*)u); 101 *u = stmp; 102 break; 103 } 104 no11: 105 106 /* put schedule fodder above load */ 107 for(t=s+1; t<=se; t++) { 108 if(t->p.mark & BRANCH) 109 break; 110 if(s > sch && conflict(s-1, t)) 111 continue; 112 for(u=t-1; u>=s; u--) 113 if(depend(t, u)) 114 goto no1; 115 stmp = *t; 116 memmove(s+1, s, (uchar*)t - (uchar*)s); 117 *s = stmp; 118 if(!(s->p.mark & LOAD)) 119 break; 120 no1:; 121 } 122 } 123 124 for(s=se; s>=sch; s--) { 125 if(!(s->p.mark & DELAY)) 126 continue; 127 if(s < se) 128 if(!conflict(s, s+1)) 129 goto out3; 130 /* 131 * s is load, s+1 is immediate use of result or end of block 132 * t is the trial instruction to insert between s and s+1 133 */ 134 if(!debug['Y']) 135 for(t=s-1; t>=sch; t--) { 136 if(t->comp) 137 if(s->p.mark & BRANCH) 138 goto no2; 139 if(t->p.mark & DELAY) 140 if(s >= se || conflict(t, s+1)) 141 goto no2; 142 for(u=t+1; u<=s; u++) 143 if(depend(u, t)) 144 goto no2; 145 goto out2; 146 no2:; 147 } 148 if(debug['X']) 149 Bprint(&bso, "?l%P\n", &s->p); 150 if(s->p.mark & BRANCH) 151 s->nop = 1; 152 if(debug['v']) { 153 if(s->p.mark & LOAD) { 154 nop.load.count++; 155 nop.load.outof++; 156 } 157 if(s->p.mark & BRANCH) { 158 nop.branch.count++; 159 nop.branch.outof++; 160 } 161 if(s->p.mark & FCMP) { 162 nop.fcmp.count++; 163 nop.fcmp.outof++; 164 } 165 } 166 continue; 167 168 out2: 169 if(debug['X']) { 170 Bprint(&bso, "!l%P\n", &t->p); 171 Bprint(&bso, "%P\n", &s->p); 172 } 173 stmp = *t; 174 memmove(t, t+1, (uchar*)s - (uchar*)t); 175 *s = stmp; 176 s--; 177 178 out3: 179 if(debug['v']) { 180 if(s->p.mark & LOAD) 181 nop.load.outof++; 182 if(s->p.mark & BRANCH) 183 nop.branch.outof++; 184 if(s->p.mark & FCMP) 185 nop.fcmp.outof++; 186 } 187 } 188 189 /* 190 * put it all back 191 */ 192 for(s=sch, p=p0; s<=se; s++, p=q) { 193 q = p->link; 194 if(q != s->p.link) { 195 *p = s->p; 196 p->link = q; 197 } 198 while(s->nop--) 199 addnop(p); 200 } 201 if(debug['X']) { 202 Bprint(&bso, "\n"); 203 Bflush(&bso); 204 } 205 } 206 207 void 208 regused(Sch *s, Prog *realp) 209 { 210 int c, ar, ad, ld, sz; 211 ulong m; 212 Prog *p; 213 214 p = &s->p; 215 s->comp = compound(p); 216 s->nop = 0; 217 if(s->comp) { 218 bitset(s->set.ireg, REGTMP); 219 bitset(s->used.ireg, REGTMP); 220 } 221 222 ar = 0; /* dest is really reference */ 223 ad = 0; /* source/dest is really address */ 224 ld = 0; /* opcode is load instruction */ 225 sz = 20; /* size of load/store for overlap computation */ 226 227 /* 228 * flags based on opcode 229 */ 230 switch(p->as) { 231 case ATEXT: 232 curtext = realp; 233 autosize = p->to.offset + 4; 234 ad = 1; 235 break; 236 case ACALL: 237 c = p->reg; 238 if(c == NREG) 239 c = REGLINK; 240 bitset(s->set.ireg, c); 241 ar = 1; 242 ad = 1; 243 break; 244 case AJMP: 245 ar = 1; 246 ad = 1; 247 break; 248 case AMOVB: 249 case AMOVBU: 250 sz = 1; 251 ld = 1; 252 break; 253 case AMOVH: 254 case AMOVHU: 255 sz = 2; 256 ld = 1; 257 break; 258 case AMOVF: 259 case AMOVL: 260 sz = 4; 261 ld = 1; 262 break; 263 case ADIVL: 264 case ADIVUL: 265 case AMULL: 266 case AMULUL: 267 case AMULML: 268 case AMULMUL: 269 case AMSTEPL: 270 case AMSTEPUL: 271 case AMSTEPLL: 272 case ADSTEPL: 273 case ADSTEP0L: 274 case ADSTEPLL: 275 case ADSTEPRL: 276 bitset(s->set.ireg, REGQ); 277 bitset(s->used.ireg, REGQ); 278 break; 279 case AMOVD: 280 sz = 8; 281 ld = 1; 282 if(p->from.type == D_REG && p->to.type == D_REG) 283 break; 284 case ALOADM: 285 case ASTOREM: 286 bitset(s->set.ireg, REGC); 287 bitset(s->used.ireg, REGC); 288 break; 289 } 290 291 /* 292 * flags based on 'to' field 293 */ 294 c = p->to.class; 295 if(c == 0) { 296 c = aclass(&p->to) + 1; 297 p->to.class = c; 298 } 299 c--; 300 switch(c) { 301 default: 302 print("unknown class %d %D\n", c, &p->to); 303 304 case C_ZCON: 305 case C_SCON: 306 case C_MCON: 307 case C_PCON: 308 case C_NCON: 309 case C_LCON: 310 case C_NONE: 311 case C_SBRA: 312 case C_LBRA: 313 break; 314 315 case C_ZOREG: 316 case C_SOREG: 317 case C_MOREG: 318 case C_POREG: 319 case C_NOREG: 320 case C_LOREG: 321 c = p->to.reg; 322 bitset(s->used.ireg, c); 323 if(ad) 324 break; 325 s->size = sz; 326 s->offset = regoff(&p->to); 327 328 m = ANYMEM; 329 if(c == REGSP) 330 m = E_MEMSP; 331 332 if(ar) 333 s->used.cc |= m; 334 else 335 s->set.cc |= m; 336 break; 337 338 case C_ZACON: 339 case C_SACON: 340 case C_MACON: 341 case C_PACON: 342 case C_NACON: 343 case C_LACON: 344 bitset(s->used.ireg, REGSP); 345 break; 346 347 case C_REG: 348 if(ar) 349 bitset(s->used.ireg, p->to.reg); 350 else 351 bitset(s->set.ireg, p->to.reg); 352 break; 353 354 case C_ZAUTO: 355 case C_SAUTO: 356 case C_MAUTO: 357 case C_PAUTO: 358 case C_NAUTO: 359 case C_LAUTO: 360 bitset(s->used.ireg, REGSP); 361 if(ad) 362 break; 363 s->size = sz; 364 s->offset = regoff(&p->to); 365 366 if(ar) 367 s->used.cc |= E_MEMSP; 368 else 369 s->set.cc |= E_MEMSP; 370 break; 371 case C_SEXT: 372 case C_LEXT: 373 if(ad) 374 break; 375 s->size = sz; 376 s->offset = regoff(&p->to); 377 378 if(ar) 379 s->used.cc |= E_MEMSB; 380 else 381 s->set.cc |= E_MEMSB; 382 break; 383 } 384 385 /* 386 * flags based on 'from' field 387 */ 388 c = p->from.class; 389 if(c == 0) { 390 c = aclass(&p->from) + 1; 391 p->from.class = c; 392 } 393 c--; 394 switch(c) { 395 default: 396 print("unknown class %d %D\n", c, &p->from); 397 398 case C_ZCON: 399 case C_SCON: 400 case C_MCON: 401 case C_PCON: 402 case C_NCON: 403 case C_LCON: 404 case C_NONE: 405 case C_SBRA: 406 case C_LBRA: 407 break; 408 409 case C_ZOREG: 410 case C_SOREG: 411 case C_MOREG: 412 case C_POREG: 413 case C_NOREG: 414 case C_LOREG: 415 c = p->from.reg; 416 bitset(s->used.ireg, c); 417 if(ld) 418 p->mark |= LOAD; 419 s->size = sz; 420 s->offset = regoff(&p->from); 421 422 m = ANYMEM; 423 if(c == REGSP) 424 m = E_MEMSP; 425 426 s->used.cc |= m; 427 break; 428 case C_ZACON: 429 case C_SACON: 430 case C_MACON: 431 case C_PACON: 432 case C_NACON: 433 case C_LACON: 434 bitset(s->used.ireg, REGSP); 435 break; 436 case C_REG: 437 bitset(s->used.ireg, p->from.reg); 438 if(isdouble) 439 bitset(s->used.ireg, p->from.reg+1); 440 break; 441 case C_ZAUTO: 442 case C_SAUTO: 443 case C_MAUTO: 444 case C_PAUTO: 445 case C_NAUTO: 446 case C_LAUTO: 447 bitset(s->used.ireg, REGSP); 448 if(ld) 449 p->mark |= LOAD; 450 if(ad) 451 break; 452 s->size = sz; 453 s->offset = regoff(&p->from); 454 455 s->used.cc |= E_MEMSP; 456 break; 457 case C_SEXT: 458 case C_LEXT: 459 if(ld) 460 p->mark |= LOAD; 461 if(ad) 462 break; 463 s->size = sz; 464 s->offset = regoff(&p->from); 465 466 s->used.cc |= E_MEMSB; 467 break; 468 } 469 470 c = p->reg; 471 if(c != NREG) { 472 if(isdouble) { 473 bitset(s->used.ireg, c); 474 bitset(s->used.ireg, c+1); 475 } else 476 bitset(s->used.ireg, c); 477 } 478 } 479 480 /* 481 * test to see if 2 instrictions can be 482 * interchanged without changing semantics 483 */ 484 int 485 depend(Sch *sa, Sch *sb) 486 { 487 ulong x; 488 int i; 489 490 for(i=0; i<NRWORD; i++) { 491 if(sa->set.ireg[i] & (sb->set.ireg[i]|sb->used.ireg[i])) 492 return 1; 493 if(sb->set.ireg[i] & sa->used.ireg[i]) 494 return 1; 495 } 496 497 /* 498 * special case. 499 * loads from same address cannot pass. 500 * this is for hardware fifo's and the like 501 */ 502 if(sa->used.cc & sb->used.cc & E_MEM) 503 if(sa->p.reg == sb->p.reg) 504 if(regoff(&sa->p.from) == regoff(&sb->p.from)) 505 return 1; 506 507 x = (sa->set.cc & (sb->set.cc|sb->used.cc)) | 508 (sb->set.cc & sa->used.cc); 509 if(x) { 510 /* 511 * allow SB and SP to pass each other. 512 * allow SB to pass SB iff doffsets are ok 513 * anything else conflicts 514 */ 515 if(x != E_MEMSP && x != E_MEMSB) 516 return 1; 517 x = sa->set.cc | sb->set.cc | 518 sa->used.cc | sb->used.cc; 519 if(x & E_MEM) 520 return 1; 521 if(offoverlap(sa, sb)) 522 return 1; 523 } 524 525 return 0; 526 } 527 528 int 529 offoverlap(Sch *sa, Sch *sb) 530 { 531 532 if(sa->offset < sb->offset) { 533 if(sa->offset+sa->size > sb->offset) 534 return 1; 535 return 0; 536 } 537 if(sb->offset+sb->size > sa->offset) 538 return 1; 539 return 0; 540 } 541 542 /* 543 * test 2 adjacent instructions 544 * and find out if inserted instructions 545 * are desired to prevent stalls. 546 */ 547 int 548 conflict(Sch *sa, Sch *sb) 549 { 550 int i; 551 552 for(i=0; i<NRWORD; i++) 553 if(sa->set.ireg[i] & sb->used.ireg[i]) 554 return 1; 555 if(sa->set.cc & sb->used.cc) 556 return 1; 557 558 return 0; 559 } 560 561 int 562 compound(Prog *p) 563 { 564 Optab *o; 565 566 o = oplook(p); 567 if(o->size != 4) 568 return 1; 569 return 0; 570 } 571 572 void 573 dumpbits(Sch *s, Dep *d) 574 { 575 int i; 576 577 for(i=0; i<NRWORD*32; i++) 578 if(bittest(d->ireg, i)) 579 Bprint(&bso, " R%d", i); 580 for(i=0; i<32; i++) 581 switch(d->cc & (1<<i)) { 582 default: 583 Bprint(&bso, " CC%d", i); 584 case 0: 585 break; 586 case E_MEM: 587 Bprint(&bso, " MEM.%d", s->size); 588 break; 589 case E_MEMSB: 590 Bprint(&bso, " SB.%ld.%d", s->offset, s->size); 591 break; 592 case E_MEMSP: 593 Bprint(&bso, " SP.%ld.%d", s->offset, s->size); 594 break; 595 } 596 } 597 598 void 599 bitset(ulong *b, int r) 600 { 601 602 b[r>>5] |= 1<<(r&31); 603 } 604 605 int 606 bittest(ulong *b, int r) 607 { 608 609 if(b[r>>5] & 1<<(r&31)) 610 return 1; 611 return 0; 612 } 613