1 #include "gc.h" 2 3 Reg* 4 rega(void) 5 { 6 Reg *r; 7 8 r = freer; 9 if(r == R) { 10 r = alloc(sizeof(*r)); 11 } else 12 freer = r->link; 13 14 *r = zreg; 15 return r; 16 } 17 18 int 19 rcmp(void *a1, void *a2) 20 { 21 Rgn *p1, *p2; 22 int c1, c2; 23 24 p1 = (Rgn*)a1; 25 p2 = (Rgn*)a2; 26 c1 = p2->cost; 27 c2 = p1->cost; 28 if(c1 -= c2) 29 return c1; 30 return p2->varno - p1->varno; 31 } 32 33 void 34 regopt(Prog *p) 35 { 36 Reg *r, *r1, *r2; 37 Prog *p1; 38 int i, z; 39 long initpc, val, npc; 40 ulong vreg; 41 Bits bit; 42 struct 43 { 44 long m; 45 long c; 46 Reg* p; 47 } log5[6], *lp; 48 49 firstr = R; 50 lastr = R; 51 nvar = 0; 52 regbits = 0; 53 for(z=0; z<BITS; z++) { 54 externs.b[z] = 0; 55 params.b[z] = 0; 56 consts.b[z] = 0; 57 addrs.b[z] = 0; 58 } 59 60 /* 61 * pass 1 62 * build aux data structure 63 * allocate pcs 64 * find use and set of variables 65 */ 66 val = 5L * 5L * 5L * 5L * 5L; 67 lp = log5; 68 for(i=0; i<5; i++) { 69 lp->m = val; 70 lp->c = 0; 71 lp->p = R; 72 val /= 5L; 73 lp++; 74 } 75 val = 0; 76 for(; p != P; p = p->link) { 77 switch(p->as) { 78 case ADATA: 79 case AGLOBL: 80 case ANAME: 81 case ASIGNAME: 82 continue; 83 } 84 r = rega(); 85 if(firstr == R) { 86 firstr = r; 87 lastr = r; 88 } else { 89 lastr->link = r; 90 r->p1 = lastr; 91 lastr->s1 = r; 92 lastr = r; 93 } 94 r->prog = p; 95 r->pc = val; 96 val++; 97 98 lp = log5; 99 for(i=0; i<5; i++) { 100 lp->c--; 101 if(lp->c <= 0) { 102 lp->c = lp->m; 103 if(lp->p != R) 104 lp->p->log5 = r; 105 lp->p = r; 106 (lp+1)->c = 0; 107 break; 108 } 109 lp++; 110 } 111 112 r1 = r->p1; 113 if(r1 != R) 114 switch(r1->prog->as) { 115 case ARETURN: 116 case AJMP: 117 case ARETT: 118 r->p1 = R; 119 r1->s1 = R; 120 } 121 122 /* 123 * left side always read 124 */ 125 bit = mkvar(&p->from, p->as==AMOVW); 126 for(z=0; z<BITS; z++) 127 r->use1.b[z] |= bit.b[z]; 128 129 /* 130 * right side depends on opcode 131 */ 132 bit = mkvar(&p->to, 0); 133 if(bany(&bit)) 134 switch(p->as) { 135 default: 136 diag(Z, "reg: unknown asop: %A", p->as); 137 break; 138 139 /* 140 * right side write 141 */ 142 case ANOP: 143 case AMOVB: 144 case AMOVBU: 145 case AMOVH: 146 case AMOVHU: 147 case AMOVW: 148 case AFMOVF: 149 case AFMOVD: 150 for(z=0; z<BITS; z++) 151 r->set.b[z] |= bit.b[z]; 152 break; 153 154 /* 155 * funny 156 */ 157 case AJMPL: 158 for(z=0; z<BITS; z++) 159 addrs.b[z] |= bit.b[z]; 160 break; 161 } 162 } 163 if(firstr == R) 164 return; 165 initpc = pc - val; 166 npc = val; 167 168 /* 169 * pass 2 170 * turn branch references to pointers 171 * build back pointers 172 */ 173 for(r = firstr; r != R; r = r->link) { 174 p = r->prog; 175 if(p->to.type == D_BRANCH) { 176 val = p->to.offset - initpc; 177 r1 = firstr; 178 while(r1 != R) { 179 r2 = r1->log5; 180 if(r2 != R && val >= r2->pc) { 181 r1 = r2; 182 continue; 183 } 184 if(r1->pc == val) 185 break; 186 r1 = r1->link; 187 } 188 if(r1 == R) { 189 nearln = p->lineno; 190 diag(Z, "ref not found\n%P", p); 191 continue; 192 } 193 if(r1 == r) { 194 nearln = p->lineno; 195 diag(Z, "ref to self\n%P", p); 196 continue; 197 } 198 r->s2 = r1; 199 r->p2link = r1->p2; 200 r1->p2 = r; 201 } 202 } 203 if(debug['R']) { 204 p = firstr->prog; 205 print("\n%L %D\n", p->lineno, &p->from); 206 } 207 208 /* 209 * pass 2.5 210 * find looping structure 211 */ 212 for(r = firstr; r != R; r = r->link) 213 r->active = 0; 214 change = 0; 215 loopit(firstr, npc); 216 if(debug['R'] && debug['v']) { 217 print("\nlooping structure:\n"); 218 for(r = firstr; r != R; r = r->link) { 219 print("%ld:%P", r->loop, r->prog); 220 for(z=0; z<BITS; z++) 221 bit.b[z] = r->use1.b[z] | 222 r->use2.b[z] | r->set.b[z]; 223 if(bany(&bit)) { 224 print("\t"); 225 if(bany(&r->use1)) 226 print(" u1=%B", r->use1); 227 if(bany(&r->use2)) 228 print(" u2=%B", r->use2); 229 if(bany(&r->set)) 230 print(" st=%B", r->set); 231 } 232 print("\n"); 233 } 234 } 235 236 /* 237 * pass 3 238 * iterate propagating usage 239 * back until flow graph is complete 240 */ 241 loop1: 242 change = 0; 243 for(r = firstr; r != R; r = r->link) 244 r->active = 0; 245 for(r = firstr; r != R; r = r->link) 246 if(r->prog->as == ARETURN) 247 prop(r, zbits, zbits); 248 loop11: 249 /* pick up unreachable code */ 250 i = 0; 251 for(r = firstr; r != R; r = r1) { 252 r1 = r->link; 253 if(r1 && r1->active && !r->active) { 254 prop(r, zbits, zbits); 255 i = 1; 256 } 257 } 258 if(i) 259 goto loop11; 260 if(change) 261 goto loop1; 262 263 264 /* 265 * pass 4 266 * iterate propagating register/variable synchrony 267 * forward until graph is complete 268 */ 269 loop2: 270 change = 0; 271 for(r = firstr; r != R; r = r->link) 272 r->active = 0; 273 synch(firstr, zbits); 274 if(change) 275 goto loop2; 276 277 278 /* 279 * pass 5 280 * isolate regions 281 * calculate costs (paint1) 282 */ 283 r = firstr; 284 if(r) { 285 for(z=0; z<BITS; z++) 286 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & 287 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); 288 if(bany(&bit)) { 289 nearln = r->prog->lineno; 290 warn(Z, "used and not set: %B", bit); 291 if(debug['R'] && !debug['w']) 292 print("used and not set: %B\n", bit); 293 } 294 } 295 if(debug['R'] && debug['v']) 296 print("\nprop structure:\n"); 297 for(r = firstr; r != R; r = r->link) 298 r->act = zbits; 299 rgp = region; 300 nregion = 0; 301 for(r = firstr; r != R; r = r->link) { 302 if(debug['R'] && debug['v']) 303 print("%P\n set = %B; rah = %B; cal = %B\n", 304 r->prog, r->set, r->refahead, r->calahead); 305 for(z=0; z<BITS; z++) 306 bit.b[z] = r->set.b[z] & 307 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); 308 if(bany(&bit)) { 309 nearln = r->prog->lineno; 310 warn(Z, "set and not used: %B", bit); 311 if(debug['R']) 312 print("set an not used: %B\n", bit); 313 excise(r); 314 } 315 for(z=0; z<BITS; z++) 316 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); 317 while(bany(&bit)) { 318 i = bnum(bit); 319 rgp->enter = r; 320 rgp->varno = i; 321 change = 0; 322 if(debug['R'] && debug['v']) 323 print("\n"); 324 paint1(r, i); 325 bit.b[i/32] &= ~(1L<<(i%32)); 326 if(change <= 0) { 327 if(debug['R']) 328 print("%L$%d: %B\n", 329 r->prog->lineno, change, blsh(i)); 330 continue; 331 } 332 rgp->cost = change; 333 nregion++; 334 if(nregion >= NRGN) { 335 warn(Z, "too many regions"); 336 goto brk; 337 } 338 rgp++; 339 } 340 } 341 brk: 342 qsort(region, nregion, sizeof(region[0]), rcmp); 343 344 /* 345 * pass 6 346 * determine used registers (paint2) 347 * replace code (paint3) 348 */ 349 rgp = region; 350 for(i=0; i<nregion; i++) { 351 bit = blsh(rgp->varno); 352 vreg = paint2(rgp->enter, rgp->varno); 353 vreg = allreg(vreg, rgp); 354 if(debug['R']) { 355 if(rgp->regno >= NREG) 356 print("%L$%d F%d: %B\n", 357 rgp->enter->prog->lineno, 358 rgp->cost, 359 rgp->regno-NREG, 360 bit); 361 else 362 print("%L$%d R%d: %B\n", 363 rgp->enter->prog->lineno, 364 rgp->cost, 365 rgp->regno, 366 bit); 367 } 368 if(rgp->regno != 0) 369 paint3(rgp->enter, rgp->varno, vreg, rgp->regno); 370 rgp++; 371 } 372 /* 373 * pass 7 374 * peep-hole on basic block 375 */ 376 if(!debug['R'] || debug['P']) 377 peep(); 378 379 /* 380 * pass 8 381 * recalculate pc 382 */ 383 val = initpc; 384 for(r = firstr; r != R; r = r1) { 385 r->pc = val; 386 p = r->prog; 387 p1 = P; 388 r1 = r->link; 389 if(r1 != R) 390 p1 = r1->prog; 391 for(; p != p1; p = p->link) { 392 switch(p->as) { 393 default: 394 val++; 395 break; 396 397 case ANOP: 398 case ADATA: 399 case AGLOBL: 400 case ANAME: 401 case ASIGNAME: 402 break; 403 } 404 } 405 } 406 pc = val; 407 408 /* 409 * fix up branches 410 */ 411 if(debug['R']) 412 if(bany(&addrs)) 413 print("addrs: %B\n", addrs); 414 415 r1 = 0; /* set */ 416 for(r = firstr; r != R; r = r->link) { 417 p = r->prog; 418 if(p->to.type == D_BRANCH) 419 p->to.offset = r->s2->pc; 420 r1 = r; 421 } 422 423 /* 424 * last pass 425 * eliminate nops 426 * free aux structures 427 */ 428 for(p = firstr->prog; p != P; p = p->link){ 429 while(p->link && p->link->as == ANOP) 430 p->link = p->link->link; 431 } 432 if(r1 != R) { 433 r1->link = freer; 434 freer = firstr; 435 } 436 } 437 438 /* 439 * add mov b,rn 440 * just after r 441 */ 442 void 443 addmove(Reg *r, int bn, int rn, int f) 444 { 445 Prog *p, *p1; 446 Adr *a; 447 Var *v; 448 449 p1 = alloc(sizeof(*p1)); 450 *p1 = zprog; 451 p = r->prog; 452 453 p1->link = p->link; 454 p->link = p1; 455 p1->lineno = p->lineno; 456 457 v = var + bn; 458 459 a = &p1->to; 460 a->sym = v->sym; 461 a->name = v->name; 462 a->offset = v->offset; 463 a->etype = v->etype; 464 a->type = D_OREG; 465 if(a->etype == TARRAY || a->sym == S) 466 a->type = D_CONST; 467 468 p1->as = AMOVW; 469 if(v->etype == TCHAR || v->etype == TUCHAR) 470 p1->as = AMOVB; 471 if(v->etype == TSHORT || v->etype == TUSHORT) 472 p1->as = AMOVH; 473 if(v->etype == TFLOAT) 474 p1->as = AFMOVF; 475 if(v->etype == TDOUBLE) 476 p1->as = AFMOVD; 477 478 p1->from.type = D_REG; 479 p1->from.reg = rn; 480 if(rn >= NREG) { 481 p1->from.type = D_FREG; 482 p1->from.reg = rn-NREG; 483 } 484 if(!f) { 485 p1->from = *a; 486 *a = zprog.from; 487 a->type = D_REG; 488 a->reg = rn; 489 if(rn >= NREG) { 490 a->type = D_FREG; 491 a->reg = rn-NREG; 492 } 493 if(v->etype == TUCHAR) 494 p1->as = AMOVBU; 495 if(v->etype == TUSHORT) 496 p1->as = AMOVHU; 497 } 498 if(debug['R']) 499 print("%P\t.a%P\n", p, p1); 500 } 501 502 Bits 503 mkvar(Adr *a, int docon) 504 { 505 Var *v; 506 int i, t, n, et, z; 507 long o; 508 Bits bit; 509 Sym *s; 510 511 t = a->type; 512 if(t == D_REG && a->reg != NREG) 513 regbits |= RtoB(a->reg); 514 if(t == D_FREG && a->reg != NREG) 515 regbits |= FtoB(a->reg); 516 s = a->sym; 517 o = a->offset; 518 et = a->etype; 519 if(s == S) { 520 if(t != D_CONST || !docon || a->reg != NREG) 521 goto none; 522 et = TLONG; 523 } 524 if(t == D_CONST) { 525 if(s == S && sval(o)) 526 goto none; 527 } 528 n = a->name; 529 v = var; 530 for(i=0; i<nvar; i++) { 531 if(s == v->sym) 532 if(n == v->name) 533 if(o == v->offset) 534 goto out; 535 v++; 536 } 537 if(s) 538 if(s->name[0] == '.') 539 goto none; 540 if(nvar >= NVAR) { 541 if(debug['w'] > 1 && s) 542 warn(Z, "variable not optimized: %s", s->name); 543 goto none; 544 } 545 i = nvar; 546 nvar++; 547 v = &var[i]; 548 v->sym = s; 549 v->offset = o; 550 v->etype = et; 551 v->name = n; 552 if(debug['R']) 553 print("bit=%2d et=%2d %D\n", i, et, a); 554 out: 555 bit = blsh(i); 556 if(n == D_EXTERN || n == D_STATIC) 557 for(z=0; z<BITS; z++) 558 externs.b[z] |= bit.b[z]; 559 if(n == D_PARAM) 560 for(z=0; z<BITS; z++) 561 params.b[z] |= bit.b[z]; 562 if(v->etype != et || !typechlpfd[et]) /* funny punning */ 563 for(z=0; z<BITS; z++) 564 addrs.b[z] |= bit.b[z]; 565 if(t == D_CONST) { 566 if(s == S) { 567 for(z=0; z<BITS; z++) 568 consts.b[z] |= bit.b[z]; 569 return bit; 570 } 571 if(et != TARRAY) 572 for(z=0; z<BITS; z++) 573 addrs.b[z] |= bit.b[z]; 574 for(z=0; z<BITS; z++) 575 params.b[z] |= bit.b[z]; 576 return bit; 577 } 578 if(t == D_OREG) 579 return bit; 580 581 none: 582 return zbits; 583 } 584 585 void 586 prop(Reg *r, Bits ref, Bits cal) 587 { 588 Reg *r1, *r2; 589 int z; 590 591 for(r1 = r; r1 != R; r1 = r1->p1) { 592 for(z=0; z<BITS; z++) { 593 ref.b[z] |= r1->refahead.b[z]; 594 if(ref.b[z] != r1->refahead.b[z]) { 595 r1->refahead.b[z] = ref.b[z]; 596 change++; 597 } 598 cal.b[z] |= r1->calahead.b[z]; 599 if(cal.b[z] != r1->calahead.b[z]) { 600 r1->calahead.b[z] = cal.b[z]; 601 change++; 602 } 603 } 604 switch(r1->prog->as) { 605 case AJMPL: 606 for(z=0; z<BITS; z++) { 607 cal.b[z] |= ref.b[z] | externs.b[z]; 608 ref.b[z] = 0; 609 } 610 break; 611 612 case ATEXT: 613 for(z=0; z<BITS; z++) { 614 cal.b[z] = 0; 615 ref.b[z] = 0; 616 } 617 break; 618 619 case ARETURN: 620 for(z=0; z<BITS; z++) { 621 cal.b[z] = externs.b[z]; 622 ref.b[z] = 0; 623 } 624 } 625 for(z=0; z<BITS; z++) { 626 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | 627 r1->use1.b[z] | r1->use2.b[z]; 628 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); 629 r1->refbehind.b[z] = ref.b[z]; 630 r1->calbehind.b[z] = cal.b[z]; 631 } 632 if(r1->active) 633 break; 634 r1->active = 1; 635 } 636 for(; r != r1; r = r->p1) 637 for(r2 = r->p2; r2 != R; r2 = r2->p2link) 638 prop(r2, r->refbehind, r->calbehind); 639 } 640 641 642 /* 643 * find looping structure 644 * 645 * 1) find reverse postordering 646 * 2) find approximate dominators, 647 * the actual dominators if the flow graph is reducible 648 * otherwise, dominators plus some other non-dominators. 649 * See Matthew S. Hecht and Jeffrey D. Ullman, 650 * "Analysis of a Simple Algorithm for Global Data Flow Problems", 651 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts, 652 * Oct. 1-3, 1973, pp. 207-217. 653 * 3) find all nodes with a predecessor dominated by the current node. 654 * such a node is a loop head. 655 * recursively, all preds with a greater rpo number are in the loop 656 */ 657 long 658 postorder(Reg *r, Reg **rpo2r, long n) 659 { 660 Reg *r1; 661 662 r->rpo = 1; 663 r1 = r->s1; 664 if(r1 && !r1->rpo) 665 n = postorder(r1, rpo2r, n); 666 r1 = r->s2; 667 if(r1 && !r1->rpo) 668 n = postorder(r1, rpo2r, n); 669 rpo2r[n] = r; 670 n++; 671 return n; 672 } 673 674 long 675 rpolca(long *idom, long rpo1, long rpo2) 676 { 677 long t; 678 679 if(rpo1 == -1) 680 return rpo2; 681 while(rpo1 != rpo2){ 682 if(rpo1 > rpo2){ 683 t = rpo2; 684 rpo2 = rpo1; 685 rpo1 = t; 686 } 687 while(rpo1 < rpo2){ 688 t = idom[rpo2]; 689 if(t >= rpo2) 690 fatal(Z, "bad idom"); 691 rpo2 = t; 692 } 693 } 694 return rpo1; 695 } 696 697 int 698 doms(long *idom, long r, long s) 699 { 700 while(s > r) 701 s = idom[s]; 702 return s == r; 703 } 704 705 int 706 loophead(long *idom, Reg *r) 707 { 708 long src; 709 710 src = r->rpo; 711 if(r->p1 != R && doms(idom, src, r->p1->rpo)) 712 return 1; 713 for(r = r->p2; r != R; r = r->p2link) 714 if(doms(idom, src, r->rpo)) 715 return 1; 716 return 0; 717 } 718 719 void 720 loopmark(Reg **rpo2r, long head, Reg *r) 721 { 722 if(r->rpo < head || r->active == head) 723 return; 724 r->active = head; 725 r->loop += LOOP; 726 if(r->p1 != R) 727 loopmark(rpo2r, head, r->p1); 728 for(r = r->p2; r != R; r = r->p2link) 729 loopmark(rpo2r, head, r); 730 } 731 732 void 733 loopit(Reg *r, long nr) 734 { 735 Reg *r1; 736 long i, d, me; 737 738 if(nr > maxnr) { 739 rpo2r = alloc(nr * sizeof(Reg*)); 740 idom = alloc(nr * sizeof(long)); 741 maxnr = nr; 742 } 743 744 d = postorder(r, rpo2r, 0); 745 if(d > nr) 746 fatal(Z, "too many reg nodes"); 747 nr = d; 748 for(i = 0; i < nr / 2; i++){ 749 r1 = rpo2r[i]; 750 rpo2r[i] = rpo2r[nr - 1 - i]; 751 rpo2r[nr - 1 - i] = r1; 752 } 753 for(i = 0; i < nr; i++) 754 rpo2r[i]->rpo = i; 755 756 idom[0] = 0; 757 for(i = 0; i < nr; i++){ 758 r1 = rpo2r[i]; 759 me = r1->rpo; 760 d = -1; 761 if(r1->p1 != R && r1->p1->rpo < me) 762 d = r1->p1->rpo; 763 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link) 764 if(r1->rpo < me) 765 d = rpolca(idom, d, r1->rpo); 766 idom[i] = d; 767 } 768 769 for(i = 0; i < nr; i++){ 770 r1 = rpo2r[i]; 771 r1->loop++; 772 if(r1->p2 != R && loophead(idom, r1)) 773 loopmark(rpo2r, i, r1); 774 } 775 } 776 777 void 778 synch(Reg *r, Bits dif) 779 { 780 Reg *r1; 781 int z; 782 783 for(r1 = r; r1 != R; r1 = r1->s1) { 784 for(z=0; z<BITS; z++) { 785 dif.b[z] = (dif.b[z] & 786 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) | 787 r1->set.b[z] | r1->regdiff.b[z]; 788 if(dif.b[z] != r1->regdiff.b[z]) { 789 r1->regdiff.b[z] = dif.b[z]; 790 change++; 791 } 792 } 793 if(r1->active) 794 break; 795 r1->active = 1; 796 for(z=0; z<BITS; z++) 797 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); 798 if(r1->s2 != R) 799 synch(r1->s2, dif); 800 } 801 } 802 803 ulong 804 allreg(ulong b, Rgn *r) 805 { 806 Var *v; 807 int i; 808 809 v = var + r->varno; 810 r->regno = 0; 811 switch(v->etype) { 812 813 default: 814 diag(Z, "unknown etype %d/%d", bitno(b), v->etype); 815 break; 816 817 case TCHAR: 818 case TUCHAR: 819 case TSHORT: 820 case TUSHORT: 821 case TINT: 822 case TUINT: 823 case TLONG: 824 case TULONG: 825 case TIND: 826 case TARRAY: 827 i = BtoR(~b); 828 if(i && r->cost > 0) { 829 r->regno = i; 830 return RtoB(i); 831 } 832 break; 833 834 case TDOUBLE: 835 case TFLOAT: 836 i = BtoF(~b); 837 if(i && r->cost > 0) { 838 r->regno = i+NREG; 839 return FtoB(i); 840 } 841 break; 842 } 843 return 0; 844 } 845 846 void 847 paint1(Reg *r, int bn) 848 { 849 Reg *r1; 850 Prog *p; 851 int z; 852 ulong bb; 853 854 z = bn/32; 855 bb = 1L<<(bn%32); 856 if(r->act.b[z] & bb) 857 return; 858 for(;;) { 859 if(!(r->refbehind.b[z] & bb)) 860 break; 861 r1 = r->p1; 862 if(r1 == R) 863 break; 864 if(!(r1->refahead.b[z] & bb)) 865 break; 866 if(r1->act.b[z] & bb) 867 break; 868 r = r1; 869 } 870 871 if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) { 872 change -= CLOAD * r->loop; 873 if(debug['R'] && debug['v']) 874 print("%ld%P\tld %B $%d\n", r->loop, 875 r->prog, blsh(bn), change); 876 } 877 for(;;) { 878 r->act.b[z] |= bb; 879 p = r->prog; 880 881 if(r->use1.b[z] & bb) { 882 change += CREF * r->loop; 883 if(p->to.type == D_FREG && p->as == AMOVW) 884 change = -CINF; /* cant go Rreg to Freg */ 885 if(debug['R'] && debug['v']) 886 print("%ld%P\tu1 %B $%d\n", r->loop, 887 p, blsh(bn), change); 888 } 889 890 if((r->use2.b[z]|r->set.b[z]) & bb) { 891 change += CREF * r->loop; 892 if(p->from.type == D_FREG && p->as == AMOVW) 893 change = -CINF; /* cant go Rreg to Freg */ 894 if(debug['R'] && debug['v']) 895 print("%ld%P\tu2 %B $%d\n", r->loop, 896 p, blsh(bn), change); 897 } 898 899 if(STORE(r) & r->regdiff.b[z] & bb) { 900 change -= CLOAD * r->loop; 901 if(debug['R'] && debug['v']) 902 print("%ld%P\tst %B $%d\n", r->loop, 903 p, blsh(bn), change); 904 } 905 906 if(r->refbehind.b[z] & bb) 907 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 908 if(r1->refahead.b[z] & bb) 909 paint1(r1, bn); 910 911 if(!(r->refahead.b[z] & bb)) 912 break; 913 r1 = r->s2; 914 if(r1 != R) 915 if(r1->refbehind.b[z] & bb) 916 paint1(r1, bn); 917 r = r->s1; 918 if(r == R) 919 break; 920 if(r->act.b[z] & bb) 921 break; 922 if(!(r->refbehind.b[z] & bb)) 923 break; 924 } 925 } 926 927 ulong 928 paint2(Reg *r, int bn) 929 { 930 Reg *r1; 931 int z; 932 ulong bb, vreg; 933 934 z = bn/32; 935 bb = 1L << (bn%32); 936 vreg = regbits; 937 if(!(r->act.b[z] & bb)) 938 return vreg; 939 for(;;) { 940 if(!(r->refbehind.b[z] & bb)) 941 break; 942 r1 = r->p1; 943 if(r1 == R) 944 break; 945 if(!(r1->refahead.b[z] & bb)) 946 break; 947 if(!(r1->act.b[z] & bb)) 948 break; 949 r = r1; 950 } 951 for(;;) { 952 r->act.b[z] &= ~bb; 953 954 vreg |= r->regu; 955 956 if(r->refbehind.b[z] & bb) 957 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 958 if(r1->refahead.b[z] & bb) 959 vreg |= paint2(r1, bn); 960 961 if(!(r->refahead.b[z] & bb)) 962 break; 963 r1 = r->s2; 964 if(r1 != R) 965 if(r1->refbehind.b[z] & bb) 966 vreg |= paint2(r1, bn); 967 r = r->s1; 968 if(r == R) 969 break; 970 if(!(r->act.b[z] & bb)) 971 break; 972 if(!(r->refbehind.b[z] & bb)) 973 break; 974 } 975 return vreg; 976 } 977 978 void 979 paint3(Reg *r, int bn, long rb, int rn) 980 { 981 Reg *r1; 982 Prog *p; 983 int z; 984 ulong bb; 985 986 z = bn/32; 987 bb = 1L << (bn%32); 988 if(r->act.b[z] & bb) 989 return; 990 for(;;) { 991 if(!(r->refbehind.b[z] & bb)) 992 break; 993 r1 = r->p1; 994 if(r1 == R) 995 break; 996 if(!(r1->refahead.b[z] & bb)) 997 break; 998 if(r1->act.b[z] & bb) 999 break; 1000 r = r1; 1001 } 1002 1003 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) 1004 addmove(r, bn, rn, 0); 1005 for(;;) { 1006 r->act.b[z] |= bb; 1007 p = r->prog; 1008 1009 if(r->use1.b[z] & bb) { 1010 if(debug['R']) 1011 print("%P", p); 1012 addreg(&p->from, rn); 1013 if(debug['R']) 1014 print("\t.c%P\n", p); 1015 } 1016 if((r->use2.b[z]|r->set.b[z]) & bb) { 1017 if(debug['R']) 1018 print("%P", p); 1019 addreg(&p->to, rn); 1020 if(debug['R']) 1021 print("\t.c%P\n", p); 1022 } 1023 1024 if(STORE(r) & r->regdiff.b[z] & bb) 1025 addmove(r, bn, rn, 1); 1026 r->regu |= rb; 1027 1028 if(r->refbehind.b[z] & bb) 1029 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1030 if(r1->refahead.b[z] & bb) 1031 paint3(r1, bn, rb, rn); 1032 1033 if(!(r->refahead.b[z] & bb)) 1034 break; 1035 r1 = r->s2; 1036 if(r1 != R) 1037 if(r1->refbehind.b[z] & bb) 1038 paint3(r1, bn, rb, rn); 1039 r = r->s1; 1040 if(r == R) 1041 break; 1042 if(r->act.b[z] & bb) 1043 break; 1044 if(!(r->refbehind.b[z] & bb)) 1045 break; 1046 } 1047 } 1048 1049 void 1050 addreg(Adr *a, int rn) 1051 { 1052 1053 a->sym = 0; 1054 a->name = D_NONE; 1055 a->type = D_REG; 1056 a->reg = rn; 1057 if(rn >= NREG) { 1058 a->type = D_FREG; 1059 a->reg = rn-NREG; 1060 } 1061 } 1062 1063 /* 1064 * bit reg 1065 * 0 R9 1066 * 1 R10 1067 * ... ... 1068 * 4 R13 1069 * 5 R16 1070 * ... ... 1071 * 20 R31 1072 */ 1073 long 1074 RtoB(int r) 1075 { 1076 1077 if(r >= 9 && r <= 13) 1078 return 1L << (r-9); 1079 if(r >= 16 && r <= 31) 1080 return 1L << (r-11); 1081 return 0; 1082 } 1083 1084 int 1085 BtoR(long b) 1086 { 1087 int r; 1088 1089 b &= 0x001fffffL; 1090 if(b == 0) 1091 return 0; 1092 r = bitno(b) + 9; 1093 if(r >= 14) 1094 r += 2; 1095 return r; 1096 } 1097 1098 /* 1099 * bit reg 1100 * 22 F4 1101 * 23 F6 1102 * ... ... 1103 * 31 F22 1104 */ 1105 long 1106 FtoB(int f) 1107 { 1108 1109 if(f < 4 || f > 22 || (f&1)) 1110 return 0; 1111 return 1L << (f/2 + 20); 1112 } 1113 1114 int 1115 BtoF(long b) 1116 { 1117 1118 b &= 0xffc00000L; 1119 if(b == 0) 1120 return 0; 1121 return bitno(b)*2 - 40; 1122 } 1123