1 #include "gc.h" 2 3 Reg* 4 rega(void) 5 { 6 Reg *r; 7 8 r = freer; 9 if(r == R) { 10 r = alloc(sizeof(*r)); 11 } else 12 freer = r->link; 13 14 *r = zreg; 15 return r; 16 } 17 18 int 19 rcmp(void *a1, void *a2) 20 { 21 Rgn *p1, *p2; 22 int c1, c2; 23 24 p1 = a1; 25 p2 = a2; 26 c1 = p2->cost; 27 c2 = p1->cost; 28 if(c1 -= c2) 29 return c1; 30 return p2->varno - p1->varno; 31 } 32 33 void 34 regopt(Prog *p) 35 { 36 Reg *r, *r1, *r2; 37 Prog *p1; 38 int i, z; 39 long initpc, val, npc; 40 ulong vreg; 41 Bits bit; 42 struct 43 { 44 long m; 45 long c; 46 Reg* p; 47 } log5[6], *lp; 48 49 firstr = R; 50 lastr = R; 51 nvar = 0; 52 regbits = 0; 53 for(z=0; z<BITS; z++) { 54 externs.b[z] = 0; 55 params.b[z] = 0; 56 consts.b[z] = 0; 57 addrs.b[z] = 0; 58 } 59 60 /* 61 * pass 1 62 * build aux data structure 63 * allocate pcs 64 * find use and set of variables 65 */ 66 val = 5L * 5L * 5L * 5L * 5L; 67 lp = log5; 68 for(i=0; i<5; i++) { 69 lp->m = val; 70 lp->c = 0; 71 lp->p = R; 72 val /= 5L; 73 lp++; 74 } 75 val = 0; 76 for(; p != P; p = p->link) { 77 switch(p->as) { 78 case ADATA: 79 case AGLOBL: 80 case ANAME: 81 case ASIGNAME: 82 continue; 83 } 84 r = rega(); 85 if(firstr == R) { 86 firstr = r; 87 lastr = r; 88 } else { 89 lastr->link = r; 90 r->p1 = lastr; 91 lastr->s1 = r; 92 lastr = r; 93 } 94 r->prog = p; 95 r->pc = val; 96 val++; 97 98 lp = log5; 99 for(i=0; i<5; i++) { 100 lp->c--; 101 if(lp->c <= 0) { 102 lp->c = lp->m; 103 if(lp->p != R) 104 lp->p->log5 = r; 105 lp->p = r; 106 (lp+1)->c = 0; 107 break; 108 } 109 lp++; 110 } 111 112 r1 = r->p1; 113 if(r1 != R) 114 switch(r1->prog->as) { 115 case ARETURN: 116 case ABR: 117 case ARFI: 118 r->p1 = R; 119 r1->s1 = R; 120 } 121 122 /* 123 * left side always read 124 */ 125 bit = mkvar(&p->from, p->as==AMOVW); 126 for(z=0; z<BITS; z++) 127 r->use1.b[z] |= bit.b[z]; 128 129 /* 130 * right side depends on opcode 131 */ 132 bit = mkvar(&p->to, 0); 133 if(bany(&bit)) 134 switch(p->as) { 135 default: 136 diag(Z, "reg: unknown asop: %A", p->as); 137 break; 138 139 /* 140 * right side write 141 */ 142 case ANOP: 143 case AMOVB: 144 case AMOVBU: 145 case AMOVBZ: 146 case AMOVBZU: 147 case AMOVH: 148 case AMOVHBR: 149 case AMOVHU: 150 case AMOVHZ: 151 case AMOVHZU: 152 case AMOVW: 153 case AMOVWU: 154 case AFMOVD: 155 case AFMOVDCC: 156 case AFMOVDU: 157 case AFMOVS: 158 case AFMOVSU: 159 case AFRSP: 160 for(z=0; z<BITS; z++) 161 r->set.b[z] |= bit.b[z]; 162 break; 163 164 /* 165 * funny 166 */ 167 case ABL: 168 for(z=0; z<BITS; z++) 169 addrs.b[z] |= bit.b[z]; 170 break; 171 } 172 } 173 if(firstr == R) 174 return; 175 initpc = pc - val; 176 npc = val; 177 178 /* 179 * pass 2 180 * turn branch references to pointers 181 * build back pointers 182 */ 183 for(r = firstr; r != R; r = r->link) { 184 p = r->prog; 185 if(p->to.type == D_BRANCH) { 186 val = p->to.offset - initpc; 187 r1 = firstr; 188 while(r1 != R) { 189 r2 = r1->log5; 190 if(r2 != R && val >= r2->pc) { 191 r1 = r2; 192 continue; 193 } 194 if(r1->pc == val) 195 break; 196 r1 = r1->link; 197 } 198 if(r1 == R) { 199 nearln = p->lineno; 200 diag(Z, "ref not found\n%P", p); 201 continue; 202 } 203 if(r1 == r) { 204 nearln = p->lineno; 205 diag(Z, "ref to self\n%P", p); 206 continue; 207 } 208 r->s2 = r1; 209 r->p2link = r1->p2; 210 r1->p2 = r; 211 } 212 } 213 if(debug['R']) { 214 p = firstr->prog; 215 print("\n%L %D\n", p->lineno, &p->from); 216 } 217 218 /* 219 * pass 2.5 220 * find looping structure 221 */ 222 for(r = firstr; r != R; r = r->link) 223 r->active = 0; 224 change = 0; 225 loopit(firstr, npc); 226 if(debug['R'] && debug['v']) { 227 print("\nlooping structure:\n"); 228 for(r = firstr; r != R; r = r->link) { 229 print("%ld:%P", r->loop, r->prog); 230 for(z=0; z<BITS; z++) 231 bit.b[z] = r->use1.b[z] | 232 r->use2.b[z] | r->set.b[z]; 233 if(bany(&bit)) { 234 print("\t"); 235 if(bany(&r->use1)) 236 print(" u1=%B", r->use1); 237 if(bany(&r->use2)) 238 print(" u2=%B", r->use2); 239 if(bany(&r->set)) 240 print(" st=%B", r->set); 241 } 242 print("\n"); 243 } 244 } 245 246 /* 247 * pass 3 248 * iterate propagating usage 249 * back until flow graph is complete 250 */ 251 loop1: 252 change = 0; 253 for(r = firstr; r != R; r = r->link) 254 r->active = 0; 255 for(r = firstr; r != R; r = r->link) 256 if(r->prog->as == ARETURN) 257 prop(r, zbits, zbits); 258 loop11: 259 /* pick up unreachable code */ 260 i = 0; 261 for(r = firstr; r != R; r = r1) { 262 r1 = r->link; 263 if(r1 && r1->active && !r->active) { 264 prop(r, zbits, zbits); 265 i = 1; 266 } 267 } 268 if(i) 269 goto loop11; 270 if(change) 271 goto loop1; 272 273 274 /* 275 * pass 4 276 * iterate propagating register/variable synchrony 277 * forward until graph is complete 278 */ 279 loop2: 280 change = 0; 281 for(r = firstr; r != R; r = r->link) 282 r->active = 0; 283 synch(firstr, zbits); 284 if(change) 285 goto loop2; 286 287 288 /* 289 * pass 5 290 * isolate regions 291 * calculate costs (paint1) 292 */ 293 r = firstr; 294 if(r) { 295 for(z=0; z<BITS; z++) 296 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & 297 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); 298 if(bany(&bit)) { 299 nearln = r->prog->lineno; 300 warn(Z, "used and not set: %B", bit); 301 if(debug['R'] && !debug['w']) 302 print("used and not set: %B\n", bit); 303 } 304 } 305 if(debug['R'] && debug['v']) 306 print("\nprop structure:\n"); 307 for(r = firstr; r != R; r = r->link) 308 r->act = zbits; 309 rgp = region; 310 nregion = 0; 311 for(r = firstr; r != R; r = r->link) { 312 if(debug['R'] && debug['v']) 313 print("%P\n set = %B; rah = %B; cal = %B\n", 314 r->prog, r->set, r->refahead, r->calahead); 315 for(z=0; z<BITS; z++) 316 bit.b[z] = r->set.b[z] & 317 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); 318 if(bany(&bit)) { 319 nearln = r->prog->lineno; 320 warn(Z, "set and not used: %B", bit); 321 if(debug['R']) 322 print("set an not used: %B\n", bit); 323 excise(r); 324 } 325 for(z=0; z<BITS; z++) 326 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); 327 while(bany(&bit)) { 328 i = bnum(bit); 329 rgp->enter = r; 330 rgp->varno = i; 331 change = 0; 332 if(debug['R'] && debug['v']) 333 print("\n"); 334 paint1(r, i); 335 bit.b[i/32] &= ~(1L<<(i%32)); 336 if(change <= 0) { 337 if(debug['R']) 338 print("%L$%d: %B\n", 339 r->prog->lineno, change, blsh(i)); 340 continue; 341 } 342 rgp->cost = change; 343 nregion++; 344 if(nregion >= NRGN) { 345 warn(Z, "too many regions"); 346 goto brk; 347 } 348 rgp++; 349 } 350 } 351 brk: 352 qsort(region, nregion, sizeof(region[0]), rcmp); 353 354 /* 355 * pass 6 356 * determine used registers (paint2) 357 * replace code (paint3) 358 */ 359 rgp = region; 360 for(i=0; i<nregion; i++) { 361 bit = blsh(rgp->varno); 362 vreg = paint2(rgp->enter, rgp->varno); 363 vreg = allreg(vreg, rgp); 364 if(debug['R']) { 365 if(rgp->regno >= NREG) 366 print("%L$%d F%d: %B\n", 367 rgp->enter->prog->lineno, 368 rgp->cost, 369 rgp->regno-NREG, 370 bit); 371 else 372 print("%L$%d R%d: %B\n", 373 rgp->enter->prog->lineno, 374 rgp->cost, 375 rgp->regno, 376 bit); 377 } 378 if(rgp->regno != 0) 379 paint3(rgp->enter, rgp->varno, vreg, rgp->regno); 380 rgp++; 381 } 382 /* 383 * pass 7 384 * peep-hole on basic block 385 */ 386 if(!debug['R'] || debug['P']) 387 peep(); 388 389 /* 390 * pass 8 391 * recalculate pc 392 */ 393 val = initpc; 394 for(r = firstr; r != R; r = r1) { 395 r->pc = val; 396 p = r->prog; 397 p1 = P; 398 r1 = r->link; 399 if(r1 != R) 400 p1 = r1->prog; 401 for(; p != p1; p = p->link) { 402 switch(p->as) { 403 default: 404 val++; 405 break; 406 407 case ANOP: 408 case ADATA: 409 case AGLOBL: 410 case ANAME: 411 case ASIGNAME: 412 break; 413 } 414 } 415 } 416 pc = val; 417 418 /* 419 * fix up branches 420 */ 421 if(debug['R']) 422 if(bany(&addrs)) 423 print("addrs: %B\n", addrs); 424 425 r1 = 0; /* set */ 426 for(r = firstr; r != R; r = r->link) { 427 p = r->prog; 428 if(p->to.type == D_BRANCH) 429 p->to.offset = r->s2->pc; 430 r1 = r; 431 } 432 433 /* 434 * last pass 435 * eliminate nops 436 * free aux structures 437 */ 438 for(p = firstr->prog; p != P; p = p->link){ 439 while(p->link && p->link->as == ANOP) 440 p->link = p->link->link; 441 } 442 if(r1 != R) { 443 r1->link = freer; 444 freer = firstr; 445 } 446 } 447 448 /* 449 * add mov b,rn 450 * just after r 451 */ 452 void 453 addmove(Reg *r, int bn, int rn, int f) 454 { 455 Prog *p, *p1; 456 Adr *a; 457 Var *v; 458 459 p1 = alloc(sizeof(*p1)); 460 *p1 = zprog; 461 p = r->prog; 462 463 p1->link = p->link; 464 p->link = p1; 465 p1->lineno = p->lineno; 466 467 v = var + bn; 468 469 a = &p1->to; 470 a->sym = v->sym; 471 a->name = v->name; 472 a->offset = v->offset; 473 a->etype = v->etype; 474 a->type = D_OREG; 475 if(a->etype == TARRAY || a->sym == S) 476 a->type = D_CONST; 477 478 p1->as = AMOVW; 479 if(v->etype == TCHAR || v->etype == TUCHAR) 480 p1->as = AMOVB; 481 if(v->etype == TSHORT || v->etype == TUSHORT) 482 p1->as = AMOVH; 483 if(v->etype == TFLOAT) 484 p1->as = AFMOVS; 485 if(v->etype == TDOUBLE) 486 p1->as = AFMOVD; 487 488 p1->from.type = D_REG; 489 p1->from.reg = rn; 490 if(rn >= NREG) { 491 p1->from.type = D_FREG; 492 p1->from.reg = rn-NREG; 493 } 494 if(!f) { 495 p1->from = *a; 496 *a = zprog.from; 497 a->type = D_REG; 498 a->reg = rn; 499 if(rn >= NREG) { 500 a->type = D_FREG; 501 a->reg = rn-NREG; 502 } 503 if(v->etype == TUCHAR) 504 p1->as = AMOVBZ; 505 if(v->etype == TUSHORT) 506 p1->as = AMOVHZ; 507 } 508 if(debug['R']) 509 print("%P\t.a%P\n", p, p1); 510 } 511 512 Bits 513 mkvar(Adr *a, int docon) 514 { 515 Var *v; 516 int i, t, n, et, z; 517 long o; 518 Bits bit; 519 Sym *s; 520 521 t = a->type; 522 if(t == D_REG && a->reg != NREG) 523 regbits |= RtoB(a->reg); 524 if(t == D_FREG && a->reg != NREG) 525 regbits |= FtoB(a->reg); 526 s = a->sym; 527 o = a->offset; 528 et = a->etype; 529 if(s == S) { 530 if(t != D_CONST || !docon || a->reg != NREG) 531 goto none; 532 et = TLONG; 533 } 534 if(t == D_CONST) { 535 if(s == S && sval(o)) 536 goto none; 537 } 538 n = a->name; 539 v = var; 540 for(i=0; i<nvar; i++) { 541 if(s == v->sym) 542 if(n == v->name) 543 if(o == v->offset) 544 goto out; 545 v++; 546 } 547 if(s) 548 if(s->name[0] == '.') 549 goto none; 550 if(nvar >= NVAR) { 551 if(debug['w'] > 1 && s) 552 warn(Z, "variable not optimized: %s", s->name); 553 goto none; 554 } 555 i = nvar; 556 nvar++; 557 v = &var[i]; 558 v->sym = s; 559 v->offset = o; 560 v->etype = et; 561 v->name = n; 562 if(debug['R']) 563 print("bit=%2d et=%2d %D\n", i, et, a); 564 out: 565 bit = blsh(i); 566 if(n == D_EXTERN || n == D_STATIC) 567 for(z=0; z<BITS; z++) 568 externs.b[z] |= bit.b[z]; 569 if(n == D_PARAM) 570 for(z=0; z<BITS; z++) 571 params.b[z] |= bit.b[z]; 572 if(v->etype != et || !typechlpfd[et]) /* funny punning */ 573 for(z=0; z<BITS; z++) 574 addrs.b[z] |= bit.b[z]; 575 if(t == D_CONST) { 576 if(s == S) { 577 for(z=0; z<BITS; z++) 578 consts.b[z] |= bit.b[z]; 579 return bit; 580 } 581 if(et != TARRAY) 582 for(z=0; z<BITS; z++) 583 addrs.b[z] |= bit.b[z]; 584 for(z=0; z<BITS; z++) 585 params.b[z] |= bit.b[z]; 586 return bit; 587 } 588 if(t == D_OREG) 589 return bit; 590 591 none: 592 return zbits; 593 } 594 595 void 596 prop(Reg *r, Bits ref, Bits cal) 597 { 598 Reg *r1, *r2; 599 int z; 600 601 for(r1 = r; r1 != R; r1 = r1->p1) { 602 for(z=0; z<BITS; z++) { 603 ref.b[z] |= r1->refahead.b[z]; 604 if(ref.b[z] != r1->refahead.b[z]) { 605 r1->refahead.b[z] = ref.b[z]; 606 change++; 607 } 608 cal.b[z] |= r1->calahead.b[z]; 609 if(cal.b[z] != r1->calahead.b[z]) { 610 r1->calahead.b[z] = cal.b[z]; 611 change++; 612 } 613 } 614 switch(r1->prog->as) { 615 case ABL: 616 for(z=0; z<BITS; z++) { 617 cal.b[z] |= ref.b[z] | externs.b[z]; 618 ref.b[z] = 0; 619 } 620 break; 621 622 case ATEXT: 623 for(z=0; z<BITS; z++) { 624 cal.b[z] = 0; 625 ref.b[z] = 0; 626 } 627 break; 628 629 case ARETURN: 630 for(z=0; z<BITS; z++) { 631 cal.b[z] = externs.b[z]; 632 ref.b[z] = 0; 633 } 634 } 635 for(z=0; z<BITS; z++) { 636 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | 637 r1->use1.b[z] | r1->use2.b[z]; 638 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); 639 r1->refbehind.b[z] = ref.b[z]; 640 r1->calbehind.b[z] = cal.b[z]; 641 } 642 if(r1->active) 643 break; 644 r1->active = 1; 645 } 646 for(; r != r1; r = r->p1) 647 for(r2 = r->p2; r2 != R; r2 = r2->p2link) 648 prop(r2, r->refbehind, r->calbehind); 649 } 650 651 /* 652 * find looping structure 653 * 654 * 1) find reverse postordering 655 * 2) find approximate dominators, 656 * the actual dominators if the flow graph is reducible 657 * otherwise, dominators plus some other non-dominators. 658 * See Matthew S. Hecht and Jeffrey D. Ullman, 659 * "Analysis of a Simple Algorithm for Global Data Flow Problems", 660 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts, 661 * Oct. 1-3, 1973, pp. 207-217. 662 * 3) find all nodes with a predecessor dominated by the current node. 663 * such a node is a loop head. 664 * recursively, all preds with a greater rpo number are in the loop 665 */ 666 long 667 postorder(Reg *r, Reg **rpo2r, long n) 668 { 669 Reg *r1; 670 671 r->rpo = 1; 672 r1 = r->s1; 673 if(r1 && !r1->rpo) 674 n = postorder(r1, rpo2r, n); 675 r1 = r->s2; 676 if(r1 && !r1->rpo) 677 n = postorder(r1, rpo2r, n); 678 rpo2r[n] = r; 679 n++; 680 return n; 681 } 682 683 long 684 rpolca(long *idom, long rpo1, long rpo2) 685 { 686 long t; 687 688 if(rpo1 == -1) 689 return rpo2; 690 while(rpo1 != rpo2){ 691 if(rpo1 > rpo2){ 692 t = rpo2; 693 rpo2 = rpo1; 694 rpo1 = t; 695 } 696 while(rpo1 < rpo2){ 697 t = idom[rpo2]; 698 if(t >= rpo2) 699 fatal(Z, "bad idom"); 700 rpo2 = t; 701 } 702 } 703 return rpo1; 704 } 705 706 int 707 doms(long *idom, long r, long s) 708 { 709 while(s > r) 710 s = idom[s]; 711 return s == r; 712 } 713 714 int 715 loophead(long *idom, Reg *r) 716 { 717 long src; 718 719 src = r->rpo; 720 if(r->p1 != R && doms(idom, src, r->p1->rpo)) 721 return 1; 722 for(r = r->p2; r != R; r = r->p2link) 723 if(doms(idom, src, r->rpo)) 724 return 1; 725 return 0; 726 } 727 728 void 729 loopmark(Reg **rpo2r, long head, Reg *r) 730 { 731 if(r->rpo < head || r->active == head) 732 return; 733 r->active = head; 734 r->loop += LOOP; 735 if(r->p1 != R) 736 loopmark(rpo2r, head, r->p1); 737 for(r = r->p2; r != R; r = r->p2link) 738 loopmark(rpo2r, head, r); 739 } 740 741 void 742 loopit(Reg *r, long nr) 743 { 744 Reg *r1; 745 long i, d, me; 746 747 if(nr > maxnr) { 748 rpo2r = alloc(nr * sizeof(Reg*)); 749 idom = alloc(nr * sizeof(long)); 750 maxnr = nr; 751 } 752 753 d = postorder(r, rpo2r, 0); 754 if(d > nr) 755 fatal(Z, "too many reg nodes"); 756 nr = d; 757 for(i = 0; i < nr / 2; i++){ 758 r1 = rpo2r[i]; 759 rpo2r[i] = rpo2r[nr - 1 - i]; 760 rpo2r[nr - 1 - i] = r1; 761 } 762 for(i = 0; i < nr; i++) 763 rpo2r[i]->rpo = i; 764 765 idom[0] = 0; 766 for(i = 0; i < nr; i++){ 767 r1 = rpo2r[i]; 768 me = r1->rpo; 769 d = -1; 770 if(r1->p1 != R && r1->p1->rpo < me) 771 d = r1->p1->rpo; 772 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link) 773 if(r1->rpo < me) 774 d = rpolca(idom, d, r1->rpo); 775 idom[i] = d; 776 } 777 778 for(i = 0; i < nr; i++){ 779 r1 = rpo2r[i]; 780 r1->loop++; 781 if(r1->p2 != R && loophead(idom, r1)) 782 loopmark(rpo2r, i, r1); 783 } 784 } 785 786 void 787 synch(Reg *r, Bits dif) 788 { 789 Reg *r1; 790 int z; 791 792 for(r1 = r; r1 != R; r1 = r1->s1) { 793 for(z=0; z<BITS; z++) { 794 dif.b[z] = (dif.b[z] & 795 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) | 796 r1->set.b[z] | r1->regdiff.b[z]; 797 if(dif.b[z] != r1->regdiff.b[z]) { 798 r1->regdiff.b[z] = dif.b[z]; 799 change++; 800 } 801 } 802 if(r1->active) 803 break; 804 r1->active = 1; 805 for(z=0; z<BITS; z++) 806 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); 807 if(r1->s2 != R) 808 synch(r1->s2, dif); 809 } 810 } 811 812 ulong 813 allreg(ulong b, Rgn *r) 814 { 815 Var *v; 816 int i; 817 818 v = var + r->varno; 819 r->regno = 0; 820 switch(v->etype) { 821 822 default: 823 diag(Z, "unknown etype %d/%d", bitno(b), v->etype); 824 break; 825 826 case TCHAR: 827 case TUCHAR: 828 case TSHORT: 829 case TUSHORT: 830 case TINT: 831 case TUINT: 832 case TLONG: 833 case TULONG: 834 case TIND: 835 case TARRAY: 836 i = BtoR(~b); 837 if(i && r->cost > 0) { 838 r->regno = i; 839 return RtoB(i); 840 } 841 break; 842 843 case TDOUBLE: 844 case TFLOAT: 845 i = BtoF(~b); 846 if(i && r->cost > 0) { 847 r->regno = i+NREG; 848 return FtoB(i); 849 } 850 break; 851 } 852 return 0; 853 } 854 855 void 856 paint1(Reg *r, int bn) 857 { 858 Reg *r1; 859 Prog *p; 860 int z; 861 ulong bb; 862 863 z = bn/32; 864 bb = 1L<<(bn%32); 865 if(r->act.b[z] & bb) 866 return; 867 for(;;) { 868 if(!(r->refbehind.b[z] & bb)) 869 break; 870 r1 = r->p1; 871 if(r1 == R) 872 break; 873 if(!(r1->refahead.b[z] & bb)) 874 break; 875 if(r1->act.b[z] & bb) 876 break; 877 r = r1; 878 } 879 880 if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) { 881 change -= CLOAD * r->loop; 882 if(debug['R'] && debug['v']) 883 print("%ld%P\tld %B $%d\n", r->loop, 884 r->prog, blsh(bn), change); 885 } 886 for(;;) { 887 r->act.b[z] |= bb; 888 p = r->prog; 889 890 if(r->use1.b[z] & bb) { 891 change += CREF * r->loop; 892 if(p->to.type == D_FREG && p->as == AMOVW) 893 change = -CINF; /* cant go Rreg to Freg */ 894 if(debug['R'] && debug['v']) 895 print("%ld%P\tu1 %B $%d\n", r->loop, 896 p, blsh(bn), change); 897 } 898 899 if((r->use2.b[z]|r->set.b[z]) & bb) { 900 change += CREF * r->loop; 901 if(p->from.type == D_FREG && p->as == AMOVW) 902 change = -CINF; /* cant go Rreg to Freg */ 903 if(debug['R'] && debug['v']) 904 print("%ld%P\tu2 %B $%d\n", r->loop, 905 p, blsh(bn), change); 906 } 907 908 if(STORE(r) & r->regdiff.b[z] & bb) { 909 change -= CLOAD * r->loop; 910 if(debug['R'] && debug['v']) 911 print("%ld%P\tst %B $%d\n", r->loop, 912 p, blsh(bn), change); 913 } 914 915 if(r->refbehind.b[z] & bb) 916 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 917 if(r1->refahead.b[z] & bb) 918 paint1(r1, bn); 919 920 if(!(r->refahead.b[z] & bb)) 921 break; 922 r1 = r->s2; 923 if(r1 != R) 924 if(r1->refbehind.b[z] & bb) 925 paint1(r1, bn); 926 r = r->s1; 927 if(r == R) 928 break; 929 if(r->act.b[z] & bb) 930 break; 931 if(!(r->refbehind.b[z] & bb)) 932 break; 933 } 934 } 935 936 ulong 937 paint2(Reg *r, int bn) 938 { 939 Reg *r1; 940 int z; 941 ulong bb, vreg; 942 943 z = bn/32; 944 bb = 1L << (bn%32); 945 vreg = regbits; 946 if(!(r->act.b[z] & bb)) 947 return vreg; 948 for(;;) { 949 if(!(r->refbehind.b[z] & bb)) 950 break; 951 r1 = r->p1; 952 if(r1 == R) 953 break; 954 if(!(r1->refahead.b[z] & bb)) 955 break; 956 if(!(r1->act.b[z] & bb)) 957 break; 958 r = r1; 959 } 960 for(;;) { 961 r->act.b[z] &= ~bb; 962 963 vreg |= r->regu; 964 965 if(r->refbehind.b[z] & bb) 966 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 967 if(r1->refahead.b[z] & bb) 968 vreg |= paint2(r1, bn); 969 970 if(!(r->refahead.b[z] & bb)) 971 break; 972 r1 = r->s2; 973 if(r1 != R) 974 if(r1->refbehind.b[z] & bb) 975 vreg |= paint2(r1, bn); 976 r = r->s1; 977 if(r == R) 978 break; 979 if(!(r->act.b[z] & bb)) 980 break; 981 if(!(r->refbehind.b[z] & bb)) 982 break; 983 } 984 return vreg; 985 } 986 987 void 988 paint3(Reg *r, int bn, long rb, int rn) 989 { 990 Reg *r1; 991 Prog *p; 992 int z; 993 ulong bb; 994 995 z = bn/32; 996 bb = 1L << (bn%32); 997 if(r->act.b[z] & bb) 998 return; 999 for(;;) { 1000 if(!(r->refbehind.b[z] & bb)) 1001 break; 1002 r1 = r->p1; 1003 if(r1 == R) 1004 break; 1005 if(!(r1->refahead.b[z] & bb)) 1006 break; 1007 if(r1->act.b[z] & bb) 1008 break; 1009 r = r1; 1010 } 1011 1012 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) 1013 addmove(r, bn, rn, 0); 1014 for(;;) { 1015 r->act.b[z] |= bb; 1016 p = r->prog; 1017 1018 if(r->use1.b[z] & bb) { 1019 if(debug['R']) 1020 print("%P", p); 1021 addreg(&p->from, rn); 1022 if(debug['R']) 1023 print("\t.c%P\n", p); 1024 } 1025 if((r->use2.b[z]|r->set.b[z]) & bb) { 1026 if(debug['R']) 1027 print("%P", p); 1028 addreg(&p->to, rn); 1029 if(debug['R']) 1030 print("\t.c%P\n", p); 1031 } 1032 1033 if(STORE(r) & r->regdiff.b[z] & bb) 1034 addmove(r, bn, rn, 1); 1035 r->regu |= rb; 1036 1037 if(r->refbehind.b[z] & bb) 1038 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1039 if(r1->refahead.b[z] & bb) 1040 paint3(r1, bn, rb, rn); 1041 1042 if(!(r->refahead.b[z] & bb)) 1043 break; 1044 r1 = r->s2; 1045 if(r1 != R) 1046 if(r1->refbehind.b[z] & bb) 1047 paint3(r1, bn, rb, rn); 1048 r = r->s1; 1049 if(r == R) 1050 break; 1051 if(r->act.b[z] & bb) 1052 break; 1053 if(!(r->refbehind.b[z] & bb)) 1054 break; 1055 } 1056 } 1057 1058 void 1059 addreg(Adr *a, int rn) 1060 { 1061 1062 a->sym = 0; 1063 a->name = D_NONE; 1064 a->type = D_REG; 1065 a->reg = rn; 1066 if(rn >= NREG) { 1067 a->type = D_FREG; 1068 a->reg = rn-NREG; 1069 } 1070 } 1071 1072 /* 1073 * track register variables including external registers: 1074 * bit reg 1075 * 0 R7 1076 * 1 R8 1077 * ... ... 1078 * 21 R28 1079 */ 1080 long 1081 RtoB(int r) 1082 { 1083 1084 if(r >= REGMIN && r <= REGMAX) 1085 return 1L << (r-REGMIN); 1086 return 0; 1087 } 1088 1089 int 1090 BtoR(long b) 1091 { 1092 b &= 0x001fffffL; 1093 if(b == 0) 1094 return 0; 1095 return bitno(b) + REGMIN; 1096 } 1097 1098 /* 1099 * bit reg 1100 * 22 F17 1101 * 23 F18 1102 * ... ... 1103 * 31 F26 1104 */ 1105 long 1106 FtoB(int f) 1107 { 1108 if(f < FREGMIN || f > FREGEXT) 1109 return 0; 1110 return 1L << (f - FREGMIN + 22); 1111 } 1112 1113 int 1114 BtoF(long b) 1115 { 1116 1117 b &= 0xffc00000L; 1118 if(b == 0) 1119 return 0; 1120 return bitno(b) - 22 + FREGMIN; 1121 } 1122