1 #include "gc.h" 2 3 void addsplits(void); 4 5 Reg* 6 rega(void) 7 { 8 Reg *r; 9 10 r = freer; 11 if(r == R) { 12 r = alloc(sizeof(*r)); 13 } else 14 freer = r->link; 15 16 *r = zreg; 17 return r; 18 } 19 20 int 21 rcmp(const void *a1, const void *a2) 22 { 23 Rgn *p1, *p2; 24 int c1, c2; 25 26 p1 = (Rgn*)a1; 27 p2 = (Rgn*)a2; 28 c1 = p2->cost; 29 c2 = p1->cost; 30 if(c1 -= c2) 31 return c1; 32 return p2->varno - p1->varno; 33 } 34 35 void 36 regopt(Prog *p) 37 { 38 Reg *r, *r1, *r2; 39 Prog *p1; 40 int i, z; 41 long initpc, val, npc; 42 ulong vreg; 43 Bits bit; 44 struct 45 { 46 long m; 47 long c; 48 Reg* p; 49 } log5[6], *lp; 50 51 firstr = R; 52 lastr = R; 53 nvar = 0; 54 regbits = 0; 55 for(z=0; z<BITS; z++) { 56 externs.b[z] = 0; 57 params.b[z] = 0; 58 consts.b[z] = 0; 59 addrs.b[z] = 0; 60 } 61 62 /* 63 * pass 1 64 * build aux data structure 65 * allocate pcs 66 * find use and set of variables 67 */ 68 val = 5L * 5L * 5L * 5L * 5L; 69 lp = log5; 70 for(i=0; i<5; i++) { 71 lp->m = val; 72 lp->c = 0; 73 lp->p = R; 74 val /= 5L; 75 lp++; 76 } 77 val = 0; 78 for(; p != P; p = p->link) { 79 switch(p->as) { 80 case ADATA: 81 case AGLOBL: 82 case ANAME: 83 case ASIGNAME: 84 continue; 85 } 86 r = rega(); 87 if(firstr == R) { 88 firstr = r; 89 lastr = r; 90 } else { 91 lastr->link = r; 92 r->p1 = lastr; 93 lastr->s1 = r; 94 lastr = r; 95 } 96 r->prog = p; 97 r->pc = val; 98 val++; 99 100 lp = log5; 101 for(i=0; i<5; i++) { 102 lp->c--; 103 if(lp->c <= 0) { 104 lp->c = lp->m; 105 if(lp->p != R) 106 lp->p->log5 = r; 107 lp->p = r; 108 (lp+1)->c = 0; 109 break; 110 } 111 lp++; 112 } 113 114 r1 = r->p1; 115 if(r1 != R) 116 switch(r1->prog->as) { 117 case ARET: 118 case AJMP: 119 case ARFE: 120 r->p1 = R; 121 r1->s1 = R; 122 } 123 124 /* 125 * left side always read 126 */ 127 bit = mkvar(&p->from, p->as==AMOVW); 128 for(z=0; z<BITS; z++) 129 r->use1.b[z] |= bit.b[z]; 130 131 /* 132 * right side depends on opcode 133 */ 134 bit = mkvar(&p->to, 0); 135 if(bany(&bit)) 136 switch(p->as) { 137 default: 138 diag(Z, "reg: unknown asop: %A", p->as); 139 break; 140 141 /* 142 * right side write 143 */ 144 case ANOP: 145 case AMOVB: 146 case AMOVBU: 147 case AMOVH: 148 case AMOVHU: 149 case AMOVW: 150 case AMOVF: 151 case AMOVD: 152 for(z=0; z<BITS; z++) 153 r->set.b[z] |= bit.b[z]; 154 break; 155 156 /* 157 * funny 158 */ 159 case AJAL: 160 for(z=0; z<BITS; z++) 161 addrs.b[z] |= bit.b[z]; 162 break; 163 } 164 } 165 if(firstr == R) 166 return; 167 initpc = pc - val; 168 npc = val; 169 170 /* 171 * pass 2 172 * turn branch references to pointers 173 * build back pointers 174 */ 175 for(r = firstr; r != R; r = r->link) { 176 p = r->prog; 177 if(p->to.type == D_BRANCH) { 178 val = p->to.offset - initpc; 179 r1 = firstr; 180 while(r1 != R) { 181 r2 = r1->log5; 182 if(r2 != R && val >= r2->pc) { 183 r1 = r2; 184 continue; 185 } 186 if(r1->pc == val) 187 break; 188 r1 = r1->link; 189 } 190 if(r1 == R) { 191 nearln = p->lineno; 192 diag(Z, "ref not found\n%P", p); 193 continue; 194 } 195 if(r1 == r) { 196 nearln = p->lineno; 197 diag(Z, "ref to self\n%P", p); 198 continue; 199 } 200 r->s2 = r1; 201 r->p2link = r1->p2; 202 r1->p2 = r; 203 } 204 } 205 if(debug['R']) { 206 p = firstr->prog; 207 print("\n%L %D\n", p->lineno, &p->from); 208 } 209 210 /* 211 * pass 2.5 212 * find looping structure 213 */ 214 for(r = firstr; r != R; r = r->link) 215 r->active = 0; 216 change = 0; 217 loopit(firstr, npc); 218 219 /* 220 * pass 3 221 * iterate propagating usage 222 * back until flow graph is complete 223 */ 224 loop1: 225 change = 0; 226 for(r = firstr; r != R; r = r->link) 227 r->active = 0; 228 for(r = firstr; r != R; r = r->link) 229 if(r->prog->as == ARET) 230 prop(r, zbits, zbits); 231 loop11: 232 /* pick up unreachable code */ 233 i = 0; 234 for(r = firstr; r != R; r = r1) { 235 r1 = r->link; 236 if(r1 && r1->active && !r->active) { 237 prop(r, zbits, zbits); 238 i = 1; 239 } 240 } 241 if(i) 242 goto loop11; 243 if(change) 244 goto loop1; 245 246 247 /* 248 * pass 4 249 * iterate propagating register/variable synchrony 250 * forward until graph is complete 251 */ 252 loop2: 253 change = 0; 254 for(r = firstr; r != R; r = r->link) 255 r->active = 0; 256 synch(firstr, zbits); 257 if(change) 258 goto loop2; 259 260 addsplits(); 261 262 if(debug['R'] && debug['v']) { 263 print("\nprop structure:\n"); 264 for(r = firstr; r != R; r = r->link) { 265 print("%ld:%P", r->loop, r->prog); 266 for(z=0; z<BITS; z++) 267 bit.b[z] = r->set.b[z] | 268 r->refahead.b[z] | r->calahead.b[z] | 269 r->refbehind.b[z] | r->calbehind.b[z] | 270 r->use1.b[z] | r->use2.b[z]; 271 if(bany(&bit)) { 272 print("\t"); 273 if(bany(&r->use1)) 274 print(" u1=%B", r->use1); 275 if(bany(&r->use2)) 276 print(" u2=%B", r->use2); 277 if(bany(&r->set)) 278 print(" st=%B", r->set); 279 if(bany(&r->refahead)) 280 print(" ra=%B", r->refahead); 281 if(bany(&r->calahead)) 282 print(" ca=%B", r->calahead); 283 if(bany(&r->refbehind)) 284 print(" rb=%B", r->refbehind); 285 if(bany(&r->calbehind)) 286 print(" cb=%B", r->calbehind); 287 if(bany(&r->regdiff)) 288 print(" rd=%B", r->regdiff); 289 } 290 print("\n"); 291 } 292 } 293 294 /* 295 * pass 5 296 * isolate regions 297 * calculate costs (paint1) 298 */ 299 r = firstr; 300 if(r) { 301 for(z=0; z<BITS; z++) 302 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & 303 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); 304 if(bany(&bit)) { 305 nearln = r->prog->lineno; 306 warn(Z, "used and not set: %B", bit); 307 if(debug['R'] && !debug['w']) 308 print("used and not set: %B\n", bit); 309 } 310 } 311 312 for(r = firstr; r != R; r = r->link) 313 r->act = zbits; 314 rgp = region; 315 nregion = 0; 316 for(r = firstr; r != R; r = r->link) { 317 for(z=0; z<BITS; z++) 318 bit.b[z] = r->set.b[z] & 319 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); 320 if(bany(&bit)) { 321 nearln = r->prog->lineno; 322 warn(Z, "set and not used: %B", bit); 323 if(debug['R']) 324 print("set and not used: %B\n", bit); 325 excise(r); 326 } 327 for(z=0; z<BITS; z++) 328 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); 329 while(bany(&bit)) { 330 i = bnum(bit); 331 rgp->enter = r; 332 rgp->varno = i; 333 change = 0; 334 if(debug['R'] && debug['v']) 335 print("\n"); 336 paint1(r, i); 337 bit.b[i/32] &= ~(1L<<(i%32)); 338 if(change <= 0) { 339 if(debug['R']) 340 print("%L $%d: %B\n", 341 r->prog->lineno, change, blsh(i)); 342 continue; 343 } 344 rgp->cost = change; 345 nregion++; 346 if(nregion >= NRGN) { 347 warn(Z, "too many regions"); 348 goto brk; 349 } 350 rgp++; 351 } 352 } 353 brk: 354 qsort(region, nregion, sizeof(region[0]), rcmp); 355 356 /* 357 * pass 6 358 * determine used registers (paint2) 359 * replace code (paint3) 360 */ 361 rgp = region; 362 for(i=0; i<nregion; i++) { 363 bit = blsh(rgp->varno); 364 vreg = paint2(rgp->enter, rgp->varno); 365 vreg = allreg(vreg, rgp); 366 if(debug['R']) { 367 if(rgp->regno >= NREG) 368 print("%L $%d F%d: %B\n", 369 rgp->enter->prog->lineno, 370 rgp->cost, 371 rgp->regno-NREG, 372 bit); 373 else 374 print("%L $%d R%d: %B\n", 375 rgp->enter->prog->lineno, 376 rgp->cost, 377 rgp->regno, 378 bit); 379 } 380 if(rgp->regno != 0) 381 paint3(rgp->enter, rgp->varno, vreg, rgp->regno); 382 rgp++; 383 } 384 /* 385 * pass 7 386 * peep-hole on basic block 387 */ 388 if(!debug['R'] || debug['P']) 389 peep(); 390 391 /* 392 * pass 8 393 * recalculate pc 394 */ 395 val = initpc; 396 for(r = firstr; r != R; r = r1) { 397 r->pc = val; 398 p = r->prog; 399 p1 = P; 400 r1 = r->link; 401 if(r1 != R) 402 p1 = r1->prog; 403 for(; p != p1; p = p->link) { 404 switch(p->as) { 405 default: 406 val++; 407 break; 408 409 case ANOP: 410 case ADATA: 411 case AGLOBL: 412 case ANAME: 413 case ASIGNAME: 414 break; 415 } 416 } 417 } 418 pc = val; 419 420 /* 421 * fix up branches 422 */ 423 if(debug['R']) 424 if(bany(&addrs)) 425 print("addrs: %B\n", addrs); 426 427 r1 = 0; /* set */ 428 for(r = firstr; r != R; r = r->link) { 429 p = r->prog; 430 if(p->to.type == D_BRANCH) 431 p->to.offset = r->s2->pc; 432 r1 = r; 433 } 434 435 /* 436 * last pass 437 * eliminate nops 438 * free aux structures 439 */ 440 for(p = firstr->prog; p != P; p = p->link){ 441 while(p->link && p->link->as == ANOP) 442 p->link = p->link->link; 443 } 444 if(r1 != R) { 445 r1->link = freer; 446 freer = firstr; 447 } 448 } 449 450 void 451 addsplits(void) 452 { 453 Reg *r, *r1; 454 int z, i; 455 Bits bit; 456 457 for(r = firstr; r != R; r = r->link) { 458 if(r->loop > 1) 459 continue; 460 if(r->prog->as == AJAL) 461 continue; 462 for(r1 = r->p2; r1 != R; r1 = r1->p2link) { 463 if(r1->loop <= 1) 464 continue; 465 for(z=0; z<BITS; z++) 466 bit.b[z] = r1->calbehind.b[z] & 467 (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) & 468 ~(r->calahead.b[z] & addrs.b[z]); 469 while(bany(&bit)) { 470 i = bnum(bit); 471 bit.b[i/32] &= ~(1L << (i%32)); 472 } 473 } 474 } 475 } 476 477 /* 478 * add mov b,rn 479 * just after r 480 */ 481 void 482 addmove(Reg *r, int bn, int rn, int f) 483 { 484 Prog *p, *p1; 485 Adr *a; 486 Var *v; 487 488 p1 = alloc(sizeof(*p1)); 489 *p1 = zprog; 490 p = r->prog; 491 492 p1->link = p->link; 493 p->link = p1; 494 p1->lineno = p->lineno; 495 496 v = var + bn; 497 498 a = &p1->to; 499 a->sym = v->sym; 500 a->name = v->name; 501 a->offset = v->offset; 502 a->etype = v->etype; 503 a->type = D_OREG; 504 if(a->etype == TARRAY || a->sym == S) 505 a->type = D_CONST; 506 507 p1->as = AMOVW; 508 if(v->etype == TCHAR || v->etype == TUCHAR) 509 p1->as = AMOVB; 510 if(v->etype == TSHORT || v->etype == TUSHORT) 511 p1->as = AMOVH; 512 if(v->etype == TFLOAT) 513 p1->as = AMOVF; 514 if(v->etype == TDOUBLE) 515 p1->as = AMOVD; 516 517 p1->from.type = D_REG; 518 p1->from.reg = rn; 519 if(rn >= NREG) { 520 p1->from.type = D_FREG; 521 p1->from.reg = rn-NREG; 522 } 523 if(!f) { 524 p1->from = *a; 525 *a = zprog.from; 526 a->type = D_REG; 527 a->reg = rn; 528 if(rn >= NREG) { 529 a->type = D_FREG; 530 a->reg = rn-NREG; 531 } 532 if(v->etype == TUCHAR) 533 p1->as = AMOVBU; 534 if(v->etype == TUSHORT) 535 p1->as = AMOVHU; 536 } 537 if(debug['R']) 538 print("%P\t.a%P\n", p, p1); 539 } 540 541 Bits 542 mkvar(Adr *a, int docon) 543 { 544 Var *v; 545 int i, t, n, et, z; 546 long o; 547 Bits bit; 548 Sym *s; 549 550 t = a->type; 551 if(t == D_REG && a->reg != NREG) 552 regbits |= RtoB(a->reg); 553 if(t == D_FREG && a->reg != NREG) 554 regbits |= FtoB(a->reg); 555 s = a->sym; 556 o = a->offset; 557 et = a->etype; 558 if(s == S) { 559 if(t != D_CONST || !docon || a->reg != NREG) 560 goto none; 561 et = TLONG; 562 } 563 if(t == D_CONST) { 564 if(s == S && sval(o)) 565 goto none; 566 } 567 568 n = a->name; 569 v = var; 570 for(i=0; i<nvar; i++) { 571 if(s == v->sym) 572 if(n == v->name) 573 if(o == v->offset) 574 goto out; 575 v++; 576 } 577 if(s) 578 if(s->name[0] == '.') 579 goto none; 580 if(nvar >= NVAR) { 581 if(debug['w'] > 1 && s) 582 warn(Z, "variable not optimized: %s", s->name); 583 goto none; 584 } 585 i = nvar; 586 nvar++; 587 v = &var[i]; 588 v->sym = s; 589 v->offset = o; 590 v->etype = et; 591 v->name = n; 592 if(debug['R']) 593 print("bit=%2d et=%2d %D\n", i, et, a); 594 out: 595 bit = blsh(i); 596 if(n == D_EXTERN || n == D_STATIC) 597 for(z=0; z<BITS; z++) 598 externs.b[z] |= bit.b[z]; 599 if(n == D_PARAM) 600 for(z=0; z<BITS; z++) 601 params.b[z] |= bit.b[z]; 602 if(v->etype != et || !typechlpfd[et]) /* funny punning */ 603 for(z=0; z<BITS; z++) 604 addrs.b[z] |= bit.b[z]; 605 if(t == D_CONST) { 606 if(s == S) { 607 for(z=0; z<BITS; z++) 608 consts.b[z] |= bit.b[z]; 609 return bit; 610 } 611 if(et != TARRAY) 612 for(z=0; z<BITS; z++) 613 addrs.b[z] |= bit.b[z]; 614 for(z=0; z<BITS; z++) 615 params.b[z] |= bit.b[z]; 616 return bit; 617 } 618 if(t == D_OREG) 619 return bit; 620 621 none: 622 return zbits; 623 } 624 625 void 626 prop(Reg *r, Bits ref, Bits cal) 627 { 628 Reg *r1, *r2; 629 int z; 630 631 for(r1 = r; r1 != R; r1 = r1->p1) { 632 for(z=0; z<BITS; z++) { 633 ref.b[z] |= r1->refahead.b[z]; 634 if(ref.b[z] != r1->refahead.b[z]) { 635 r1->refahead.b[z] = ref.b[z]; 636 change++; 637 } 638 cal.b[z] |= r1->calahead.b[z]; 639 if(cal.b[z] != r1->calahead.b[z]) { 640 r1->calahead.b[z] = cal.b[z]; 641 change++; 642 } 643 } 644 switch(r1->prog->as) { 645 case AJAL: 646 for(z=0; z<BITS; z++) { 647 cal.b[z] |= ref.b[z] | externs.b[z]; 648 ref.b[z] = 0; 649 } 650 break; 651 652 case ATEXT: 653 for(z=0; z<BITS; z++) { 654 cal.b[z] = 0; 655 ref.b[z] = 0; 656 } 657 break; 658 659 case ARET: 660 for(z=0; z<BITS; z++) { 661 cal.b[z] = externs.b[z]; 662 ref.b[z] = 0; 663 } 664 } 665 for(z=0; z<BITS; z++) { 666 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | 667 r1->use1.b[z] | r1->use2.b[z]; 668 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); 669 r1->refbehind.b[z] = ref.b[z]; 670 r1->calbehind.b[z] = cal.b[z]; 671 } 672 if(r1->active) 673 break; 674 r1->active = 1; 675 } 676 for(; r != r1; r = r->p1) 677 for(r2 = r->p2; r2 != R; r2 = r2->p2link) 678 prop(r2, r->refbehind, r->calbehind); 679 } 680 681 /* 682 * find looping structure 683 * 684 * 1) find reverse postordering 685 * 2) find approximate dominators, 686 * the actual dominators if the flow graph is reducible 687 * otherwise, dominators plus some other non-dominators. 688 * See Matthew S. Hecht and Jeffrey D. Ullman, 689 * "Analysis of a Simple Algorithm for Global Data Flow Problems", 690 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts, 691 * Oct. 1-3, 1973, pp. 207-217. 692 * 3) find all nodes with a predecessor dominated by the current node. 693 * such a node is a loop head. 694 * recursively, all preds with a greater rpo number are in the loop 695 */ 696 long 697 postorder(Reg *r, Reg **rpo2r, long n) 698 { 699 Reg *r1; 700 701 r->rpo = 1; 702 r1 = r->s1; 703 if(r1 && !r1->rpo) 704 n = postorder(r1, rpo2r, n); 705 r1 = r->s2; 706 if(r1 && !r1->rpo) 707 n = postorder(r1, rpo2r, n); 708 rpo2r[n] = r; 709 n++; 710 return n; 711 } 712 713 long 714 rpolca(long *idom, long rpo1, long rpo2) 715 { 716 long t; 717 718 if(rpo1 == -1) 719 return rpo2; 720 while(rpo1 != rpo2){ 721 if(rpo1 > rpo2){ 722 t = rpo2; 723 rpo2 = rpo1; 724 rpo1 = t; 725 } 726 while(rpo1 < rpo2){ 727 t = idom[rpo2]; 728 if(t >= rpo2) 729 fatal(Z, "bad idom"); 730 rpo2 = t; 731 } 732 } 733 return rpo1; 734 } 735 736 int 737 doms(long *idom, long r, long s) 738 { 739 while(s > r) 740 s = idom[s]; 741 return s == r; 742 } 743 744 int 745 loophead(long *idom, Reg *r) 746 { 747 long src; 748 749 src = r->rpo; 750 if(r->p1 != R && doms(idom, src, r->p1->rpo)) 751 return 1; 752 for(r = r->p2; r != R; r = r->p2link) 753 if(doms(idom, src, r->rpo)) 754 return 1; 755 return 0; 756 } 757 758 void 759 loopmark(Reg **rpo2r, long head, Reg *r) 760 { 761 if(r->rpo < head || r->active == head) 762 return; 763 r->active = head; 764 r->loop += LOOP; 765 if(r->p1 != R) 766 loopmark(rpo2r, head, r->p1); 767 for(r = r->p2; r != R; r = r->p2link) 768 loopmark(rpo2r, head, r); 769 } 770 771 void 772 loopit(Reg *r, long nr) 773 { 774 Reg *r1; 775 long i, d, me; 776 777 if(nr > maxnr) { 778 rpo2r = alloc(nr * sizeof(Reg*)); 779 idom = alloc(nr * sizeof(long)); 780 maxnr = nr; 781 } 782 783 d = postorder(r, rpo2r, 0); 784 if(d > nr) 785 fatal(Z, "too many reg nodes"); 786 nr = d; 787 for(i = 0; i < nr / 2; i++){ 788 r1 = rpo2r[i]; 789 rpo2r[i] = rpo2r[nr - 1 - i]; 790 rpo2r[nr - 1 - i] = r1; 791 } 792 for(i = 0; i < nr; i++) 793 rpo2r[i]->rpo = i; 794 795 idom[0] = 0; 796 for(i = 0; i < nr; i++){ 797 r1 = rpo2r[i]; 798 me = r1->rpo; 799 d = -1; 800 if(r1->p1 != R && r1->p1->rpo < me) 801 d = r1->p1->rpo; 802 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link) 803 if(r1->rpo < me) 804 d = rpolca(idom, d, r1->rpo); 805 idom[i] = d; 806 } 807 808 for(i = 0; i < nr; i++){ 809 r1 = rpo2r[i]; 810 r1->loop++; 811 if(r1->p2 != R && loophead(idom, r1)) 812 loopmark(rpo2r, i, r1); 813 } 814 } 815 816 void 817 synch(Reg *r, Bits dif) 818 { 819 Reg *r1; 820 int z; 821 822 for(r1 = r; r1 != R; r1 = r1->s1) { 823 for(z=0; z<BITS; z++) { 824 dif.b[z] = (dif.b[z] & 825 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) | 826 r1->set.b[z] | r1->regdiff.b[z]; 827 if(dif.b[z] != r1->regdiff.b[z]) { 828 r1->regdiff.b[z] = dif.b[z]; 829 change++; 830 } 831 } 832 if(r1->active) 833 break; 834 r1->active = 1; 835 for(z=0; z<BITS; z++) 836 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); 837 if(r1->s2 != R) 838 synch(r1->s2, dif); 839 } 840 } 841 842 ulong 843 allreg(ulong b, Rgn *r) 844 { 845 Var *v; 846 int i; 847 848 v = var + r->varno; 849 r->regno = 0; 850 switch(v->etype) { 851 852 default: 853 diag(Z, "unknown etype %d/%d", bitno(b), v->etype); 854 break; 855 856 case TCHAR: 857 case TUCHAR: 858 case TSHORT: 859 case TUSHORT: 860 case TINT: 861 case TUINT: 862 case TLONG: 863 case TULONG: 864 case TIND: 865 case TARRAY: 866 i = BtoR(~b); 867 if(i && r->cost >= 0) { 868 r->regno = i; 869 return RtoB(i); 870 } 871 break; 872 873 case TDOUBLE: 874 case TFLOAT: 875 i = BtoF(~b); 876 if(i && r->cost >= 0) { 877 r->regno = i+NREG; 878 return FtoB(i); 879 } 880 break; 881 } 882 return 0; 883 } 884 885 void 886 paint1(Reg *r, int bn) 887 { 888 Reg *r1; 889 Prog *p; 890 int z; 891 ulong bb; 892 893 z = bn/32; 894 bb = 1L<<(bn%32); 895 if(r->act.b[z] & bb) 896 return; 897 for(;;) { 898 if(!(r->refbehind.b[z] & bb)) 899 break; 900 r1 = r->p1; 901 if(r1 == R) 902 break; 903 if(!(r1->refahead.b[z] & bb)) 904 break; 905 if(r1->act.b[z] & bb) 906 break; 907 r = r1; 908 } 909 910 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) { 911 change -= CLOAD * r->loop; 912 if(debug['R'] && debug['v']) 913 print("%ld%P\tld %B $%d\n", r->loop, 914 r->prog, blsh(bn), change); 915 } 916 for(;;) { 917 r->act.b[z] |= bb; 918 p = r->prog; 919 920 if(r->use1.b[z] & bb) { 921 change += CREF * r->loop; 922 if(debug['R'] && debug['v']) 923 print("%ld%P\tu1 %B $%d\n", r->loop, 924 p, blsh(bn), change); 925 } 926 927 if((r->use2.b[z]|r->set.b[z]) & bb) { 928 change += CREF * r->loop; 929 if(debug['R'] && debug['v']) 930 print("%ld%P\tu2 %B $%d\n", r->loop, 931 p, blsh(bn), change); 932 } 933 934 if(STORE(r) & r->regdiff.b[z] & bb) { 935 change -= CLOAD * r->loop; 936 if(debug['R'] && debug['v']) 937 print("%ld%P\tst %B $%d\n", r->loop, 938 p, blsh(bn), change); 939 } 940 941 if(r->refbehind.b[z] & bb) 942 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 943 if(r1->refahead.b[z] & bb) 944 paint1(r1, bn); 945 946 if(!(r->refahead.b[z] & bb)) 947 break; 948 r1 = r->s2; 949 if(r1 != R) 950 if(r1->refbehind.b[z] & bb) 951 paint1(r1, bn); 952 r = r->s1; 953 if(r == R) 954 break; 955 if(r->act.b[z] & bb) 956 break; 957 if(!(r->refbehind.b[z] & bb)) 958 break; 959 } 960 } 961 962 ulong 963 paint2(Reg *r, int bn) 964 { 965 Reg *r1; 966 int z; 967 ulong bb, vreg; 968 969 z = bn/32; 970 bb = 1L << (bn%32); 971 vreg = regbits; 972 if(!(r->act.b[z] & bb)) 973 return vreg; 974 for(;;) { 975 if(!(r->refbehind.b[z] & bb)) 976 break; 977 r1 = r->p1; 978 if(r1 == R) 979 break; 980 if(!(r1->refahead.b[z] & bb)) 981 break; 982 if(!(r1->act.b[z] & bb)) 983 break; 984 r = r1; 985 } 986 for(;;) { 987 r->act.b[z] &= ~bb; 988 989 vreg |= r->regu; 990 991 if(r->refbehind.b[z] & bb) 992 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 993 if(r1->refahead.b[z] & bb) 994 vreg |= paint2(r1, bn); 995 996 if(!(r->refahead.b[z] & bb)) 997 break; 998 r1 = r->s2; 999 if(r1 != R) 1000 if(r1->refbehind.b[z] & bb) 1001 vreg |= paint2(r1, bn); 1002 r = r->s1; 1003 if(r == R) 1004 break; 1005 if(!(r->act.b[z] & bb)) 1006 break; 1007 if(!(r->refbehind.b[z] & bb)) 1008 break; 1009 } 1010 return vreg; 1011 } 1012 1013 void 1014 paint3(Reg *r, int bn, long rb, int rn) 1015 { 1016 Reg *r1; 1017 Prog *p; 1018 int z; 1019 ulong bb; 1020 1021 z = bn/32; 1022 bb = 1L << (bn%32); 1023 if(r->act.b[z] & bb) 1024 return; 1025 for(;;) { 1026 if(!(r->refbehind.b[z] & bb)) 1027 break; 1028 r1 = r->p1; 1029 if(r1 == R) 1030 break; 1031 if(!(r1->refahead.b[z] & bb)) 1032 break; 1033 if(r1->act.b[z] & bb) 1034 break; 1035 r = r1; 1036 } 1037 1038 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) 1039 addmove(r, bn, rn, 0); 1040 for(;;) { 1041 r->act.b[z] |= bb; 1042 p = r->prog; 1043 1044 if(r->use1.b[z] & bb) { 1045 if(debug['R']) 1046 print("%P", p); 1047 addreg(&p->from, rn); 1048 if(debug['R']) 1049 print("\t.c%P\n", p); 1050 } 1051 if((r->use2.b[z]|r->set.b[z]) & bb) { 1052 if(debug['R']) 1053 print("%P", p); 1054 addreg(&p->to, rn); 1055 if(debug['R']) 1056 print("\t.c%P\n", p); 1057 } 1058 1059 if(STORE(r) & r->regdiff.b[z] & bb) 1060 addmove(r, bn, rn, 1); 1061 r->regu |= rb; 1062 1063 if(r->refbehind.b[z] & bb) 1064 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1065 if(r1->refahead.b[z] & bb) 1066 paint3(r1, bn, rb, rn); 1067 1068 if(!(r->refahead.b[z] & bb)) 1069 break; 1070 r1 = r->s2; 1071 if(r1 != R) 1072 if(r1->refbehind.b[z] & bb) 1073 paint3(r1, bn, rb, rn); 1074 r = r->s1; 1075 if(r == R) 1076 break; 1077 if(r->act.b[z] & bb) 1078 break; 1079 if(!(r->refbehind.b[z] & bb)) 1080 break; 1081 } 1082 } 1083 1084 void 1085 addreg(Adr *a, int rn) 1086 { 1087 1088 a->sym = 0; 1089 a->name = D_NONE; 1090 a->type = D_REG; 1091 a->reg = rn; 1092 if(rn >= NREG) { 1093 a->type = D_FREG; 1094 a->reg = rn-NREG; 1095 } 1096 } 1097 1098 /* 1099 * bit reg 1100 * 0 R3 1101 * 1 R4 1102 * ... ... 1103 * 19 R22 1104 * 20 R23 1105 */ 1106 long 1107 RtoB(int r) 1108 { 1109 1110 if(r < 3 || r > 23) 1111 return 0; 1112 return 1L << (r-3); 1113 } 1114 1115 int 1116 BtoR(long b) 1117 { 1118 1119 b &= 0x001fffffL; 1120 if(b == 0) 1121 return 0; 1122 return bitno(b) + 3; 1123 } 1124 1125 /* 1126 * bit reg 1127 * 22 F4 1128 * 23 F6 1129 * ... ... 1130 * 31 F22 1131 */ 1132 long 1133 FtoB(int f) 1134 { 1135 1136 if(f < 4 || f > 22 || (f&1)) 1137 return 0; 1138 return 1L << (f/2 + 20); 1139 } 1140 1141 int 1142 BtoF(long b) 1143 { 1144 1145 b &= 0xffc00000L; 1146 if(b == 0) 1147 return 0; 1148 return bitno(b)*2 - 40; 1149 } 1150