1 #include "gc.h" 2 3 void addsplits(void); 4 5 Reg* 6 rega(void) 7 { 8 Reg *r; 9 10 r = freer; 11 if(r == R) { 12 r = alloc(sizeof(*r)); 13 } else 14 freer = r->link; 15 16 *r = zreg; 17 return r; 18 } 19 20 int 21 rcmp(const void *a1, const void *a2) 22 { 23 Rgn *p1, *p2; 24 int c1, c2; 25 26 p1 = (Rgn*)a1; 27 p2 = (Rgn*)a2; 28 c1 = p2->cost; 29 c2 = p1->cost; 30 if(c1 -= c2) 31 return c1; 32 return p2->varno - p1->varno; 33 } 34 35 void 36 regopt(Prog *p) 37 { 38 Reg *r, *r1, *r2; 39 Prog *p1; 40 int i, z; 41 long initpc, val, npc; 42 ulong vreg; 43 Bits bit; 44 struct 45 { 46 long m; 47 long c; 48 Reg* p; 49 } log5[6], *lp; 50 51 firstr = R; 52 lastr = R; 53 nvar = 0; 54 regbits = 0; 55 for(z=0; z<BITS; z++) { 56 externs.b[z] = 0; 57 params.b[z] = 0; 58 consts.b[z] = 0; 59 addrs.b[z] = 0; 60 } 61 62 /* 63 * pass 1 64 * build aux data structure 65 * allocate pcs 66 * find use and set of variables 67 */ 68 val = 5L * 5L * 5L * 5L * 5L; 69 lp = log5; 70 for(i=0; i<5; i++) { 71 lp->m = val; 72 lp->c = 0; 73 lp->p = R; 74 val /= 5L; 75 lp++; 76 } 77 val = 0; 78 for(; p != P; p = p->link) { 79 switch(p->as) { 80 case ADATA: 81 case AGLOBL: 82 case ANAME: 83 continue; 84 } 85 r = rega(); 86 if(firstr == R) { 87 firstr = r; 88 lastr = r; 89 } else { 90 lastr->link = r; 91 r->p1 = lastr; 92 lastr->s1 = r; 93 lastr = r; 94 } 95 r->prog = p; 96 r->pc = val; 97 val++; 98 99 lp = log5; 100 for(i=0; i<5; i++) { 101 lp->c--; 102 if(lp->c <= 0) { 103 lp->c = lp->m; 104 if(lp->p != R) 105 lp->p->log5 = r; 106 lp->p = r; 107 (lp+1)->c = 0; 108 break; 109 } 110 lp++; 111 } 112 113 r1 = r->p1; 114 if(r1 != R) 115 switch(r1->prog->as) { 116 case ARET: 117 case AJMP: 118 case ARFE: 119 r->p1 = R; 120 r1->s1 = R; 121 } 122 123 /* 124 * left side always read 125 */ 126 bit = mkvar(&p->from, p->as==AMOVW); 127 for(z=0; z<BITS; z++) 128 r->use1.b[z] |= bit.b[z]; 129 130 /* 131 * right side depends on opcode 132 */ 133 bit = mkvar(&p->to, 0); 134 if(bany(&bit)) 135 switch(p->as) { 136 default: 137 diag(Z, "reg: unknown asop: %A", p->as); 138 break; 139 140 /* 141 * right side write 142 */ 143 case ANOP: 144 case AMOVB: 145 case AMOVBU: 146 case AMOVH: 147 case AMOVHU: 148 case AMOVW: 149 case AMOVF: 150 case AMOVD: 151 for(z=0; z<BITS; z++) 152 r->set.b[z] |= bit.b[z]; 153 break; 154 155 /* 156 * funny 157 */ 158 case AJAL: 159 for(z=0; z<BITS; z++) 160 addrs.b[z] |= bit.b[z]; 161 break; 162 } 163 } 164 if(firstr == R) 165 return; 166 initpc = pc - val; 167 npc = val; 168 169 /* 170 * pass 2 171 * turn branch references to pointers 172 * build back pointers 173 */ 174 for(r = firstr; r != R; r = r->link) { 175 p = r->prog; 176 if(p->to.type == D_BRANCH) { 177 val = p->to.offset - initpc; 178 r1 = firstr; 179 while(r1 != R) { 180 r2 = r1->log5; 181 if(r2 != R && val >= r2->pc) { 182 r1 = r2; 183 continue; 184 } 185 if(r1->pc == val) 186 break; 187 r1 = r1->link; 188 } 189 if(r1 == R) { 190 nearln = p->lineno; 191 diag(Z, "ref not found\n%P", p); 192 continue; 193 } 194 if(r1 == r) { 195 nearln = p->lineno; 196 diag(Z, "ref to self\n%P", p); 197 continue; 198 } 199 r->s2 = r1; 200 r->p2link = r1->p2; 201 r1->p2 = r; 202 } 203 } 204 if(debug['R']) { 205 p = firstr->prog; 206 print("\n%L %D\n", p->lineno, &p->from); 207 } 208 209 /* 210 * pass 2.5 211 * find looping structure 212 */ 213 for(r = firstr; r != R; r = r->link) 214 r->active = 0; 215 change = 0; 216 loopit(firstr, npc); 217 218 /* 219 * pass 3 220 * iterate propagating usage 221 * back until flow graph is complete 222 */ 223 loop1: 224 change = 0; 225 for(r = firstr; r != R; r = r->link) 226 r->active = 0; 227 for(r = firstr; r != R; r = r->link) 228 if(r->prog->as == ARET) 229 prop(r, zbits, zbits); 230 loop11: 231 /* pick up unreachable code */ 232 i = 0; 233 for(r = firstr; r != R; r = r1) { 234 r1 = r->link; 235 if(r1 && r1->active && !r->active) { 236 prop(r, zbits, zbits); 237 i = 1; 238 } 239 } 240 if(i) 241 goto loop11; 242 if(change) 243 goto loop1; 244 245 246 /* 247 * pass 4 248 * iterate propagating register/variable synchrony 249 * forward until graph is complete 250 */ 251 loop2: 252 change = 0; 253 for(r = firstr; r != R; r = r->link) 254 r->active = 0; 255 synch(firstr, zbits); 256 if(change) 257 goto loop2; 258 259 addsplits(); 260 261 if(debug['R'] && debug['v']) { 262 print("\nprop structure:\n"); 263 for(r = firstr; r != R; r = r->link) { 264 print("%ld:%P", r->loop, r->prog); 265 for(z=0; z<BITS; z++) 266 bit.b[z] = r->set.b[z] | 267 r->refahead.b[z] | r->calahead.b[z] | 268 r->refbehind.b[z] | r->calbehind.b[z] | 269 r->use1.b[z] | r->use2.b[z]; 270 if(bany(&bit)) { 271 print("\t"); 272 if(bany(&r->use1)) 273 print(" u1=%B", r->use1); 274 if(bany(&r->use2)) 275 print(" u2=%B", r->use2); 276 if(bany(&r->set)) 277 print(" st=%B", r->set); 278 if(bany(&r->refahead)) 279 print(" ra=%B", r->refahead); 280 if(bany(&r->calahead)) 281 print(" ca=%B", r->calahead); 282 if(bany(&r->refbehind)) 283 print(" rb=%B", r->refbehind); 284 if(bany(&r->calbehind)) 285 print(" cb=%B", r->calbehind); 286 if(bany(&r->regdiff)) 287 print(" rd=%B", r->regdiff); 288 } 289 print("\n"); 290 } 291 } 292 293 /* 294 * pass 5 295 * isolate regions 296 * calculate costs (paint1) 297 */ 298 r = firstr; 299 if(r) { 300 for(z=0; z<BITS; z++) 301 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & 302 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); 303 if(bany(&bit)) { 304 nearln = r->prog->lineno; 305 warn(Z, "used and not set: %B", bit); 306 if(debug['R'] && !debug['w']) 307 print("used and not set: %B\n", bit); 308 } 309 } 310 311 for(r = firstr; r != R; r = r->link) 312 r->act = zbits; 313 rgp = region; 314 nregion = 0; 315 for(r = firstr; r != R; r = r->link) { 316 for(z=0; z<BITS; z++) 317 bit.b[z] = r->set.b[z] & 318 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); 319 if(bany(&bit)) { 320 nearln = r->prog->lineno; 321 warn(Z, "set and not used: %B", bit); 322 if(debug['R']) 323 print("set and not used: %B\n", bit); 324 excise(r); 325 } 326 for(z=0; z<BITS; z++) 327 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); 328 while(bany(&bit)) { 329 i = bnum(bit); 330 rgp->enter = r; 331 rgp->varno = i; 332 change = 0; 333 if(debug['R'] && debug['v']) 334 print("\n"); 335 paint1(r, i); 336 bit.b[i/32] &= ~(1L<<(i%32)); 337 if(change <= 0) { 338 if(debug['R']) 339 print("%L $%d: %B\n", 340 r->prog->lineno, change, blsh(i)); 341 continue; 342 } 343 rgp->cost = change; 344 nregion++; 345 if(nregion >= NRGN) { 346 warn(Z, "too many regions"); 347 goto brk; 348 } 349 rgp++; 350 } 351 } 352 brk: 353 qsort(region, nregion, sizeof(region[0]), rcmp); 354 355 /* 356 * pass 6 357 * determine used registers (paint2) 358 * replace code (paint3) 359 */ 360 rgp = region; 361 for(i=0; i<nregion; i++) { 362 bit = blsh(rgp->varno); 363 vreg = paint2(rgp->enter, rgp->varno); 364 vreg = allreg(vreg, rgp); 365 if(debug['R']) { 366 if(rgp->regno >= NREG) 367 print("%L $%d F%d: %B\n", 368 rgp->enter->prog->lineno, 369 rgp->cost, 370 rgp->regno-NREG, 371 bit); 372 else 373 print("%L $%d R%d: %B\n", 374 rgp->enter->prog->lineno, 375 rgp->cost, 376 rgp->regno, 377 bit); 378 } 379 if(rgp->regno != 0) 380 paint3(rgp->enter, rgp->varno, vreg, rgp->regno); 381 rgp++; 382 } 383 /* 384 * pass 7 385 * peep-hole on basic block 386 */ 387 if(!debug['R'] || debug['P']) 388 peep(); 389 390 /* 391 * pass 8 392 * recalculate pc 393 */ 394 val = initpc; 395 for(r = firstr; r != R; r = r1) { 396 r->pc = val; 397 p = r->prog; 398 p1 = P; 399 r1 = r->link; 400 if(r1 != R) 401 p1 = r1->prog; 402 for(; p != p1; p = p->link) { 403 switch(p->as) { 404 default: 405 val++; 406 break; 407 408 case ANOP: 409 case ADATA: 410 case AGLOBL: 411 case ANAME: 412 break; 413 } 414 } 415 } 416 pc = val; 417 418 /* 419 * fix up branches 420 */ 421 if(debug['R']) 422 if(bany(&addrs)) 423 print("addrs: %B\n", addrs); 424 425 r1 = 0; /* set */ 426 for(r = firstr; r != R; r = r->link) { 427 p = r->prog; 428 if(p->to.type == D_BRANCH) 429 p->to.offset = r->s2->pc; 430 r1 = r; 431 } 432 433 /* 434 * last pass 435 * eliminate nops 436 * free aux structures 437 */ 438 for(p = firstr->prog; p != P; p = p->link){ 439 while(p->link && p->link->as == ANOP) 440 p->link = p->link->link; 441 } 442 if(r1 != R) { 443 r1->link = freer; 444 freer = firstr; 445 } 446 } 447 448 void 449 addsplits(void) 450 { 451 Reg *r, *r1; 452 int z, i; 453 Bits bit; 454 455 for(r = firstr; r != R; r = r->link) { 456 if(r->loop > 1) 457 continue; 458 if(r->prog->as == AJAL) 459 continue; 460 for(r1 = r->p2; r1 != R; r1 = r1->p2link) { 461 if(r1->loop <= 1) 462 continue; 463 for(z=0; z<BITS; z++) 464 bit.b[z] = r1->calbehind.b[z] & 465 (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) & 466 ~(r->calahead.b[z] & addrs.b[z]); 467 while(bany(&bit)) { 468 i = bnum(bit); 469 bit.b[i/32] &= ~(1L << (i%32)); 470 } 471 } 472 } 473 } 474 475 /* 476 * add mov b,rn 477 * just after r 478 */ 479 void 480 addmove(Reg *r, int bn, int rn, int f) 481 { 482 Prog *p, *p1; 483 Adr *a; 484 Var *v; 485 486 p1 = alloc(sizeof(*p1)); 487 *p1 = zprog; 488 p = r->prog; 489 490 p1->link = p->link; 491 p->link = p1; 492 p1->lineno = p->lineno; 493 494 v = var + bn; 495 496 a = &p1->to; 497 a->sym = v->sym; 498 a->name = v->name; 499 a->offset = v->offset; 500 a->etype = v->etype; 501 a->type = D_OREG; 502 if(a->etype == TARRAY || a->sym == S) 503 a->type = D_CONST; 504 505 p1->as = AMOVW; 506 if(v->etype == TCHAR || v->etype == TUCHAR) 507 p1->as = AMOVB; 508 if(v->etype == TSHORT || v->etype == TUSHORT) 509 p1->as = AMOVH; 510 if(v->etype == TFLOAT) 511 p1->as = AMOVF; 512 if(v->etype == TDOUBLE) 513 p1->as = AMOVD; 514 515 p1->from.type = D_REG; 516 p1->from.reg = rn; 517 if(rn >= NREG) { 518 p1->from.type = D_FREG; 519 p1->from.reg = rn-NREG; 520 } 521 if(!f) { 522 p1->from = *a; 523 *a = zprog.from; 524 a->type = D_REG; 525 a->reg = rn; 526 if(rn >= NREG) { 527 a->type = D_FREG; 528 a->reg = rn-NREG; 529 } 530 if(v->etype == TUCHAR) 531 p1->as = AMOVBU; 532 if(v->etype == TUSHORT) 533 p1->as = AMOVHU; 534 } 535 if(debug['R']) 536 print("%P\t.a%P\n", p, p1); 537 } 538 539 Bits 540 mkvar(Adr *a, int docon) 541 { 542 Var *v; 543 int i, t, n, et, z; 544 long o; 545 Bits bit; 546 Sym *s; 547 548 t = a->type; 549 if(t == D_REG && a->reg != NREG) 550 regbits |= RtoB(a->reg); 551 if(t == D_FREG && a->reg != NREG) 552 regbits |= FtoB(a->reg); 553 s = a->sym; 554 o = a->offset; 555 et = a->etype; 556 if(s == S) { 557 if(t != D_CONST || !docon || a->reg != NREG) 558 goto none; 559 et = TLONG; 560 } 561 if(t == D_CONST) { 562 if(s == S && sval(o)) 563 goto none; 564 } 565 566 n = a->name; 567 v = var; 568 for(i=0; i<nvar; i++) { 569 if(s == v->sym) 570 if(n == v->name) 571 if(o == v->offset) 572 goto out; 573 v++; 574 } 575 if(s) 576 if(s->name[0] == '.') 577 goto none; 578 if(nvar >= NVAR) { 579 if(debug['w'] > 1 && s) 580 warn(Z, "variable not optimized: %s", s->name); 581 goto none; 582 } 583 i = nvar; 584 nvar++; 585 v = &var[i]; 586 v->sym = s; 587 v->offset = o; 588 v->etype = et; 589 v->name = n; 590 if(debug['R']) 591 print("bit=%2d et=%2d %D\n", i, et, a); 592 out: 593 bit = blsh(i); 594 if(n == D_EXTERN || n == D_STATIC) 595 for(z=0; z<BITS; z++) 596 externs.b[z] |= bit.b[z]; 597 if(n == D_PARAM) 598 for(z=0; z<BITS; z++) 599 params.b[z] |= bit.b[z]; 600 if(v->etype != et || !typechlpfd[et]) /* funny punning */ 601 for(z=0; z<BITS; z++) 602 addrs.b[z] |= bit.b[z]; 603 if(t == D_CONST) { 604 if(s == S) { 605 for(z=0; z<BITS; z++) 606 consts.b[z] |= bit.b[z]; 607 return bit; 608 } 609 if(et != TARRAY) 610 for(z=0; z<BITS; z++) 611 addrs.b[z] |= bit.b[z]; 612 for(z=0; z<BITS; z++) 613 params.b[z] |= bit.b[z]; 614 return bit; 615 } 616 if(t == D_OREG) 617 return bit; 618 619 none: 620 return zbits; 621 } 622 623 void 624 prop(Reg *r, Bits ref, Bits cal) 625 { 626 Reg *r1, *r2; 627 int z; 628 629 for(r1 = r; r1 != R; r1 = r1->p1) { 630 for(z=0; z<BITS; z++) { 631 ref.b[z] |= r1->refahead.b[z]; 632 if(ref.b[z] != r1->refahead.b[z]) { 633 r1->refahead.b[z] = ref.b[z]; 634 change++; 635 } 636 cal.b[z] |= r1->calahead.b[z]; 637 if(cal.b[z] != r1->calahead.b[z]) { 638 r1->calahead.b[z] = cal.b[z]; 639 change++; 640 } 641 } 642 switch(r1->prog->as) { 643 case AJAL: 644 for(z=0; z<BITS; z++) { 645 cal.b[z] |= ref.b[z] | externs.b[z]; 646 ref.b[z] = 0; 647 } 648 break; 649 650 case ATEXT: 651 for(z=0; z<BITS; z++) { 652 cal.b[z] = 0; 653 ref.b[z] = 0; 654 } 655 break; 656 657 case ARET: 658 for(z=0; z<BITS; z++) { 659 cal.b[z] = externs.b[z]; 660 ref.b[z] = 0; 661 } 662 } 663 for(z=0; z<BITS; z++) { 664 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | 665 r1->use1.b[z] | r1->use2.b[z]; 666 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); 667 r1->refbehind.b[z] = ref.b[z]; 668 r1->calbehind.b[z] = cal.b[z]; 669 } 670 if(r1->active) 671 break; 672 r1->active = 1; 673 } 674 for(; r != r1; r = r->p1) 675 for(r2 = r->p2; r2 != R; r2 = r2->p2link) 676 prop(r2, r->refbehind, r->calbehind); 677 } 678 679 /* 680 * find looping structure 681 * 682 * 1) find reverse postordering 683 * 2) find approximate dominators, 684 * the actual dominators if the flow graph is reducible 685 * otherwise, dominators plus some other non-dominators. 686 * See Matthew S. Hecht and Jeffrey D. Ullman, 687 * "Analysis of a Simple Algorithm for Global Data Flow Problems", 688 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts, 689 * Oct. 1-3, 1973, pp. 207-217. 690 * 3) find all nodes with a predecessor dominated by the current node. 691 * such a node is a loop head. 692 * recursively, all preds with a greater rpo number are in the loop 693 */ 694 long 695 postorder(Reg *r, Reg **rpo2r, long n) 696 { 697 Reg *r1; 698 699 r->rpo = 1; 700 r1 = r->s1; 701 if(r1 && !r1->rpo) 702 n = postorder(r1, rpo2r, n); 703 r1 = r->s2; 704 if(r1 && !r1->rpo) 705 n = postorder(r1, rpo2r, n); 706 rpo2r[n] = r; 707 n++; 708 return n; 709 } 710 711 long 712 rpolca(long *idom, long rpo1, long rpo2) 713 { 714 long t; 715 716 if(rpo1 == -1) 717 return rpo2; 718 while(rpo1 != rpo2){ 719 if(rpo1 > rpo2){ 720 t = rpo2; 721 rpo2 = rpo1; 722 rpo1 = t; 723 } 724 while(rpo1 < rpo2){ 725 t = idom[rpo2]; 726 if(t >= rpo2) 727 sysfatal("bad idom"); 728 rpo2 = t; 729 } 730 } 731 return rpo1; 732 } 733 734 int 735 doms(long *idom, long r, long s) 736 { 737 while(s > r) 738 s = idom[s]; 739 return s == r; 740 } 741 742 int 743 loophead(long *idom, Reg *r) 744 { 745 long src; 746 747 src = r->rpo; 748 if(r->p1 != R && doms(idom, src, r->p1->rpo)) 749 return 1; 750 for(r = r->p2; r != R; r = r->p2link) 751 if(doms(idom, src, r->rpo)) 752 return 1; 753 return 0; 754 } 755 756 void 757 loopmark(Reg **rpo2r, long head, Reg *r) 758 { 759 if(r->rpo < head || r->active == head) 760 return; 761 r->active = head; 762 r->loop += LOOP; 763 if(r->p1 != R) 764 loopmark(rpo2r, head, r->p1); 765 for(r = r->p2; r != R; r = r->p2link) 766 loopmark(rpo2r, head, r); 767 } 768 769 void 770 loopit(Reg *r, long nr) 771 { 772 Reg *r1; 773 long i, d, me; 774 775 if(nr > maxnr) { 776 rpo2r = alloc(nr * sizeof(Reg*)); 777 idom = alloc(nr * sizeof(long)); 778 maxnr = nr; 779 } 780 781 d = postorder(r, rpo2r, 0); 782 if(d > nr) 783 sysfatal("too many reg nodes"); 784 nr = d; 785 for(i = 0; i < nr / 2; i++){ 786 r1 = rpo2r[i]; 787 rpo2r[i] = rpo2r[nr - 1 - i]; 788 rpo2r[nr - 1 - i] = r1; 789 } 790 for(i = 0; i < nr; i++) 791 rpo2r[i]->rpo = i; 792 793 idom[0] = 0; 794 for(i = 0; i < nr; i++){ 795 r1 = rpo2r[i]; 796 me = r1->rpo; 797 d = -1; 798 if(r1->p1 != R && r1->p1->rpo < me) 799 d = r1->p1->rpo; 800 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link) 801 if(r1->rpo < me) 802 d = rpolca(idom, d, r1->rpo); 803 idom[i] = d; 804 } 805 806 for(i = 0; i < nr; i++){ 807 r1 = rpo2r[i]; 808 r1->loop++; 809 if(r1->p2 != R && loophead(idom, r1)) 810 loopmark(rpo2r, i, r1); 811 } 812 } 813 814 void 815 synch(Reg *r, Bits dif) 816 { 817 Reg *r1; 818 int z; 819 820 for(r1 = r; r1 != R; r1 = r1->s1) { 821 for(z=0; z<BITS; z++) { 822 dif.b[z] = (dif.b[z] & 823 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) | 824 r1->set.b[z] | r1->regdiff.b[z]; 825 if(dif.b[z] != r1->regdiff.b[z]) { 826 r1->regdiff.b[z] = dif.b[z]; 827 change++; 828 } 829 } 830 if(r1->active) 831 break; 832 r1->active = 1; 833 for(z=0; z<BITS; z++) 834 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); 835 if(r1->s2 != R) 836 synch(r1->s2, dif); 837 } 838 } 839 840 ulong 841 allreg(ulong b, Rgn *r) 842 { 843 Var *v; 844 int i; 845 846 v = var + r->varno; 847 r->regno = 0; 848 switch(v->etype) { 849 850 default: 851 diag(Z, "unknown etype %d/%d", bitno(b), v->etype); 852 break; 853 854 case TCHAR: 855 case TUCHAR: 856 case TSHORT: 857 case TUSHORT: 858 case TINT: 859 case TUINT: 860 case TLONG: 861 case TULONG: 862 case TIND: 863 case TARRAY: 864 i = BtoR(~b); 865 if(i && r->cost >= 0) { 866 r->regno = i; 867 return RtoB(i); 868 } 869 break; 870 871 case TDOUBLE: 872 case TFLOAT: 873 i = BtoF(~b); 874 if(i && r->cost >= 0) { 875 r->regno = i+NREG; 876 return FtoB(i); 877 } 878 break; 879 } 880 return 0; 881 } 882 883 void 884 paint1(Reg *r, int bn) 885 { 886 Reg *r1; 887 Prog *p; 888 int z; 889 ulong bb; 890 891 z = bn/32; 892 bb = 1L<<(bn%32); 893 if(r->act.b[z] & bb) 894 return; 895 for(;;) { 896 if(!(r->refbehind.b[z] & bb)) 897 break; 898 r1 = r->p1; 899 if(r1 == R) 900 break; 901 if(!(r1->refahead.b[z] & bb)) 902 break; 903 if(r1->act.b[z] & bb) 904 break; 905 r = r1; 906 } 907 908 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) { 909 change -= CLOAD * r->loop; 910 if(debug['R'] && debug['v']) 911 print("%ld%P\tld %B $%d\n", r->loop, 912 r->prog, blsh(bn), change); 913 } 914 for(;;) { 915 r->act.b[z] |= bb; 916 p = r->prog; 917 918 if(r->use1.b[z] & bb) { 919 change += CREF * r->loop; 920 if(debug['R'] && debug['v']) 921 print("%ld%P\tu1 %B $%d\n", r->loop, 922 p, blsh(bn), change); 923 } 924 925 if((r->use2.b[z]|r->set.b[z]) & bb) { 926 change += CREF * r->loop; 927 if(debug['R'] && debug['v']) 928 print("%ld%P\tu2 %B $%d\n", r->loop, 929 p, blsh(bn), change); 930 } 931 932 if(STORE(r) & r->regdiff.b[z] & bb) { 933 change -= CLOAD * r->loop; 934 if(debug['R'] && debug['v']) 935 print("%ld%P\tst %B $%d\n", r->loop, 936 p, blsh(bn), change); 937 } 938 939 if(r->refbehind.b[z] & bb) 940 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 941 if(r1->refahead.b[z] & bb) 942 paint1(r1, bn); 943 944 if(!(r->refahead.b[z] & bb)) 945 break; 946 r1 = r->s2; 947 if(r1 != R) 948 if(r1->refbehind.b[z] & bb) 949 paint1(r1, bn); 950 r = r->s1; 951 if(r == R) 952 break; 953 if(r->act.b[z] & bb) 954 break; 955 if(!(r->refbehind.b[z] & bb)) 956 break; 957 } 958 } 959 960 ulong 961 paint2(Reg *r, int bn) 962 { 963 Reg *r1; 964 int z; 965 ulong bb, vreg; 966 967 z = bn/32; 968 bb = 1L << (bn%32); 969 vreg = regbits; 970 if(!(r->act.b[z] & bb)) 971 return vreg; 972 for(;;) { 973 if(!(r->refbehind.b[z] & bb)) 974 break; 975 r1 = r->p1; 976 if(r1 == R) 977 break; 978 if(!(r1->refahead.b[z] & bb)) 979 break; 980 if(!(r1->act.b[z] & bb)) 981 break; 982 r = r1; 983 } 984 for(;;) { 985 r->act.b[z] &= ~bb; 986 987 vreg |= r->regu; 988 989 if(r->refbehind.b[z] & bb) 990 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 991 if(r1->refahead.b[z] & bb) 992 vreg |= paint2(r1, bn); 993 994 if(!(r->refahead.b[z] & bb)) 995 break; 996 r1 = r->s2; 997 if(r1 != R) 998 if(r1->refbehind.b[z] & bb) 999 vreg |= paint2(r1, bn); 1000 r = r->s1; 1001 if(r == R) 1002 break; 1003 if(!(r->act.b[z] & bb)) 1004 break; 1005 if(!(r->refbehind.b[z] & bb)) 1006 break; 1007 } 1008 return vreg; 1009 } 1010 1011 void 1012 paint3(Reg *r, int bn, long rb, int rn) 1013 { 1014 Reg *r1; 1015 Prog *p; 1016 int z; 1017 ulong bb; 1018 1019 z = bn/32; 1020 bb = 1L << (bn%32); 1021 if(r->act.b[z] & bb) 1022 return; 1023 for(;;) { 1024 if(!(r->refbehind.b[z] & bb)) 1025 break; 1026 r1 = r->p1; 1027 if(r1 == R) 1028 break; 1029 if(!(r1->refahead.b[z] & bb)) 1030 break; 1031 if(r1->act.b[z] & bb) 1032 break; 1033 r = r1; 1034 } 1035 1036 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) 1037 addmove(r, bn, rn, 0); 1038 for(;;) { 1039 r->act.b[z] |= bb; 1040 p = r->prog; 1041 1042 if(r->use1.b[z] & bb) { 1043 if(debug['R']) 1044 print("%P", p); 1045 addreg(&p->from, rn); 1046 if(debug['R']) 1047 print("\t.c%P\n", p); 1048 } 1049 if((r->use2.b[z]|r->set.b[z]) & bb) { 1050 if(debug['R']) 1051 print("%P", p); 1052 addreg(&p->to, rn); 1053 if(debug['R']) 1054 print("\t.c%P\n", p); 1055 } 1056 1057 if(STORE(r) & r->regdiff.b[z] & bb) 1058 addmove(r, bn, rn, 1); 1059 r->regu |= rb; 1060 1061 if(r->refbehind.b[z] & bb) 1062 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1063 if(r1->refahead.b[z] & bb) 1064 paint3(r1, bn, rb, rn); 1065 1066 if(!(r->refahead.b[z] & bb)) 1067 break; 1068 r1 = r->s2; 1069 if(r1 != R) 1070 if(r1->refbehind.b[z] & bb) 1071 paint3(r1, bn, rb, rn); 1072 r = r->s1; 1073 if(r == R) 1074 break; 1075 if(r->act.b[z] & bb) 1076 break; 1077 if(!(r->refbehind.b[z] & bb)) 1078 break; 1079 } 1080 } 1081 1082 void 1083 addreg(Adr *a, int rn) 1084 { 1085 1086 a->sym = 0; 1087 a->name = D_NONE; 1088 a->type = D_REG; 1089 a->reg = rn; 1090 if(rn >= NREG) { 1091 a->type = D_FREG; 1092 a->reg = rn-NREG; 1093 } 1094 } 1095 1096 /* 1097 * bit reg 1098 * 0 R3 1099 * 1 R4 1100 * ... ... 1101 * 19 R22 1102 * 20 R23 1103 */ 1104 long 1105 RtoB(int r) 1106 { 1107 1108 if(r < 3 || r > 23) 1109 return 0; 1110 return 1L << (r-3); 1111 } 1112 1113 BtoR(long b) 1114 { 1115 1116 b &= 0x001fffffL; 1117 if(b == 0) 1118 return 0; 1119 return bitno(b) + 3; 1120 } 1121 1122 /* 1123 * bit reg 1124 * 22 F4 1125 * 23 F6 1126 * ... ... 1127 * 31 F22 1128 */ 1129 long 1130 FtoB(int f) 1131 { 1132 1133 if(f < 4 || f > 22 || (f&1)) 1134 return 0; 1135 return 1L << (f/2 + 20); 1136 } 1137 1138 int 1139 BtoF(long b) 1140 { 1141 1142 b &= 0xffc00000L; 1143 if(b == 0) 1144 return 0; 1145 return bitno(b)*2 - 40; 1146 } 1147