1 #include "gc.h" 2 3 void addsplits(void); 4 5 Reg* 6 rega(void) 7 { 8 Reg *r; 9 10 r = freer; 11 if(r == R) { 12 r = alloc(sizeof(*r)); 13 } else 14 freer = r->link; 15 16 *r = zreg; 17 return r; 18 } 19 20 int 21 rcmp(void *a1, void *a2) 22 { 23 Rgn *p1, *p2; 24 int c1, c2; 25 26 p1 = (Rgn*)a1; 27 p2 = (Rgn*)a2; 28 c1 = p2->cost; 29 c2 = p1->cost; 30 if(c1 -= c2) 31 return c1; 32 return p2->varno - p1->varno; 33 } 34 35 void 36 regopt(Prog *p) 37 { 38 Reg *r, *r1, *r2; 39 Prog *p1; 40 int i, z; 41 long initpc, val, npc; 42 ulong vreg; 43 Bits bit; 44 struct 45 { 46 long m; 47 long c; 48 Reg* p; 49 } log5[6], *lp; 50 51 firstr = R; 52 lastr = R; 53 nvar = 0; 54 regbits = 0; 55 for(z=0; z<BITS; z++) { 56 externs.b[z] = 0; 57 params.b[z] = 0; 58 consts.b[z] = 0; 59 addrs.b[z] = 0; 60 } 61 62 /* 63 * pass 1 64 * build aux data structure 65 * allocate pcs 66 * find use and set of variables 67 */ 68 val = 5L * 5L * 5L * 5L * 5L; 69 lp = log5; 70 for(i=0; i<5; i++) { 71 lp->m = val; 72 lp->c = 0; 73 lp->p = R; 74 val /= 5L; 75 lp++; 76 } 77 val = 0; 78 for(; p != P; p = p->link) { 79 switch(p->as) { 80 case ADATA: 81 case AGLOBL: 82 case ANAME: 83 case ASIGNAME: 84 continue; 85 } 86 r = rega(); 87 if(firstr == R) { 88 firstr = r; 89 lastr = r; 90 } else { 91 lastr->link = r; 92 r->p1 = lastr; 93 lastr->s1 = r; 94 lastr = r; 95 } 96 r->prog = p; 97 r->pc = val; 98 val++; 99 100 lp = log5; 101 for(i=0; i<5; i++) { 102 lp->c--; 103 if(lp->c <= 0) { 104 lp->c = lp->m; 105 if(lp->p != R) 106 lp->p->log5 = r; 107 lp->p = r; 108 (lp+1)->c = 0; 109 break; 110 } 111 lp++; 112 } 113 114 r1 = r->p1; 115 if(r1 != R) 116 switch(r1->prog->as) { 117 case ARET: 118 case AJMP: 119 case ARFE: 120 r->p1 = R; 121 r1->s1 = R; 122 } 123 124 /* 125 * left side always read 126 */ 127 bit = mkvar(&p->from, p->as==AMOVW); 128 for(z=0; z<BITS; z++) 129 r->use1.b[z] |= bit.b[z]; 130 131 /* 132 * right side depends on opcode 133 */ 134 bit = mkvar(&p->to, 0); 135 if(bany(&bit)) 136 switch(p->as) { 137 default: 138 diag(Z, "reg: unknown asop: %A", p->as); 139 break; 140 141 /* 142 * right side write 143 */ 144 case ANOP: 145 case AMOVB: 146 case AMOVBU: 147 case AMOVH: 148 case AMOVHU: 149 case AMOVW: 150 case AMOVV: 151 case AMOVF: 152 case AMOVD: 153 for(z=0; z<BITS; z++) 154 r->set.b[z] |= bit.b[z]; 155 break; 156 157 /* 158 * funny 159 */ 160 case AJAL: 161 for(z=0; z<BITS; z++) 162 addrs.b[z] |= bit.b[z]; 163 break; 164 } 165 } 166 if(firstr == R) 167 return; 168 initpc = pc - val; 169 npc = val; 170 171 /* 172 * pass 2 173 * turn branch references to pointers 174 * build back pointers 175 */ 176 for(r = firstr; r != R; r = r->link) { 177 p = r->prog; 178 if(p->to.type == D_BRANCH) { 179 val = p->to.offset - initpc; 180 r1 = firstr; 181 while(r1 != R) { 182 r2 = r1->log5; 183 if(r2 != R && val >= r2->pc) { 184 r1 = r2; 185 continue; 186 } 187 if(r1->pc == val) 188 break; 189 r1 = r1->link; 190 } 191 if(r1 == R) { 192 nearln = p->lineno; 193 diag(Z, "ref not found\n%P", p); 194 continue; 195 } 196 if(r1 == r) { 197 nearln = p->lineno; 198 diag(Z, "ref to self\n%P", p); 199 continue; 200 } 201 r->s2 = r1; 202 r->p2link = r1->p2; 203 r1->p2 = r; 204 } 205 } 206 if(debug['R']) { 207 p = firstr->prog; 208 print("\n%L %D\n", p->lineno, &p->from); 209 } 210 211 /* 212 * pass 2.5 213 * find looping structure 214 */ 215 for(r = firstr; r != R; r = r->link) 216 r->active = 0; 217 change = 0; 218 loopit(firstr, npc); 219 220 /* 221 * pass 3 222 * iterate propagating usage 223 * back until flow graph is complete 224 */ 225 loop1: 226 change = 0; 227 for(r = firstr; r != R; r = r->link) 228 r->active = 0; 229 for(r = firstr; r != R; r = r->link) 230 if(r->prog->as == ARET) 231 prop(r, zbits, zbits); 232 loop11: 233 /* pick up unreachable code */ 234 i = 0; 235 for(r = firstr; r != R; r = r1) { 236 r1 = r->link; 237 if(r1 && r1->active && !r->active) { 238 prop(r, zbits, zbits); 239 i = 1; 240 } 241 } 242 if(i) 243 goto loop11; 244 if(change) 245 goto loop1; 246 247 248 /* 249 * pass 4 250 * iterate propagating register/variable synchrony 251 * forward until graph is complete 252 */ 253 loop2: 254 change = 0; 255 for(r = firstr; r != R; r = r->link) 256 r->active = 0; 257 synch(firstr, zbits); 258 if(change) 259 goto loop2; 260 261 addsplits(); 262 263 if(debug['R'] && debug['v']) { 264 print("\nprop structure:\n"); 265 for(r = firstr; r != R; r = r->link) { 266 print("%ld:%P", r->loop, r->prog); 267 for(z=0; z<BITS; z++) 268 bit.b[z] = r->set.b[z] | 269 r->refahead.b[z] | r->calahead.b[z] | 270 r->refbehind.b[z] | r->calbehind.b[z] | 271 r->use1.b[z] | r->use2.b[z]; 272 if(bany(&bit)) { 273 print("\t"); 274 if(bany(&r->use1)) 275 print(" u1=%B", r->use1); 276 if(bany(&r->use2)) 277 print(" u2=%B", r->use2); 278 if(bany(&r->set)) 279 print(" st=%B", r->set); 280 if(bany(&r->refahead)) 281 print(" ra=%B", r->refahead); 282 if(bany(&r->calahead)) 283 print(" ca=%B", r->calahead); 284 if(bany(&r->refbehind)) 285 print(" rb=%B", r->refbehind); 286 if(bany(&r->calbehind)) 287 print(" cb=%B", r->calbehind); 288 } 289 print("\n"); 290 } 291 } 292 293 /* 294 * pass 5 295 * isolate regions 296 * calculate costs (paint1) 297 */ 298 r = firstr; 299 if(r) { 300 for(z=0; z<BITS; z++) 301 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & 302 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); 303 if(bany(&bit)) { 304 nearln = r->prog->lineno; 305 warn(Z, "used and not set: %B", bit); 306 if(debug['R'] && !debug['w']) 307 print("used and not set: %B\n", bit); 308 } 309 } 310 311 for(r = firstr; r != R; r = r->link) 312 r->act = zbits; 313 rgp = region; 314 nregion = 0; 315 for(r = firstr; r != R; r = r->link) { 316 for(z=0; z<BITS; z++) 317 bit.b[z] = r->set.b[z] & 318 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); 319 if(bany(&bit)) { 320 nearln = r->prog->lineno; 321 warn(Z, "set and not used: %B", bit); 322 if(debug['R']) 323 print("set and not used: %B\n", bit); 324 excise(r); 325 } 326 for(z=0; z<BITS; z++) 327 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); 328 while(bany(&bit)) { 329 i = bnum(bit); 330 rgp->enter = r; 331 rgp->varno = i; 332 change = 0; 333 if(debug['R'] && debug['v']) 334 print("\n"); 335 paint1(r, i); 336 bit.b[i/32] &= ~(1L<<(i%32)); 337 if(change <= 0) { 338 if(debug['R']) 339 print("%L $%d: %B\n", 340 r->prog->lineno, change, blsh(i)); 341 continue; 342 } 343 rgp->cost = change; 344 nregion++; 345 if(nregion >= NRGN) { 346 warn(Z, "too many regions"); 347 goto brk; 348 } 349 rgp++; 350 } 351 } 352 brk: 353 qsort(region, nregion, sizeof(region[0]), rcmp); 354 355 /* 356 * pass 6 357 * determine used registers (paint2) 358 * replace code (paint3) 359 */ 360 rgp = region; 361 for(i=0; i<nregion; i++) { 362 bit = blsh(rgp->varno); 363 vreg = paint2(rgp->enter, rgp->varno); 364 vreg = allreg(vreg, rgp); 365 if(debug['R']) { 366 if(rgp->regno >= NREG) 367 print("%L $%d F%d: %B\n", 368 rgp->enter->prog->lineno, 369 rgp->cost, 370 rgp->regno-NREG, 371 bit); 372 else 373 print("%L $%d R%d: %B\n", 374 rgp->enter->prog->lineno, 375 rgp->cost, 376 rgp->regno, 377 bit); 378 } 379 if(rgp->regno != 0) 380 paint3(rgp->enter, rgp->varno, vreg, rgp->regno); 381 rgp++; 382 } 383 /* 384 * pass 7 385 * peep-hole on basic block 386 */ 387 if(!debug['R'] || debug['P']) 388 peep(); 389 390 /* 391 * pass 8 392 * recalculate pc 393 */ 394 val = initpc; 395 for(r = firstr; r != R; r = r1) { 396 r->pc = val; 397 p = r->prog; 398 p1 = P; 399 r1 = r->link; 400 if(r1 != R) 401 p1 = r1->prog; 402 for(; p != p1; p = p->link) { 403 switch(p->as) { 404 default: 405 val++; 406 break; 407 408 case ANOP: 409 case ADATA: 410 case AGLOBL: 411 case ANAME: 412 case ASIGNAME: 413 break; 414 } 415 } 416 } 417 pc = val; 418 419 /* 420 * fix up branches 421 */ 422 if(debug['R']) 423 if(bany(&addrs)) 424 print("addrs: %B\n", addrs); 425 426 r1 = 0; /* set */ 427 for(r = firstr; r != R; r = r->link) { 428 p = r->prog; 429 if(p->to.type == D_BRANCH) 430 p->to.offset = r->s2->pc; 431 r1 = r; 432 } 433 434 /* 435 * last pass 436 * eliminate nops 437 * free aux structures 438 */ 439 for(p = firstr->prog; p != P; p = p->link){ 440 while(p->link && p->link->as == ANOP) 441 p->link = p->link->link; 442 } 443 if(r1 != R) { 444 r1->link = freer; 445 freer = firstr; 446 } 447 } 448 449 void 450 addsplits(void) 451 { 452 Reg *r, *r1; 453 int z, i; 454 Bits bit; 455 456 for(r = firstr; r != R; r = r->link) { 457 if(r->loop > 1) 458 continue; 459 if(r->prog->as == AJAL) 460 continue; 461 for(r1 = r->p2; r1 != R; r1 = r1->p2link) { 462 if(r1->loop <= 1) 463 continue; 464 for(z=0; z<BITS; z++) 465 bit.b[z] = r1->calbehind.b[z] & 466 (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) & 467 ~(r->calahead.b[z] & addrs.b[z]); 468 while(bany(&bit)) { 469 i = bnum(bit); 470 bit.b[i/32] &= ~(1L << (i%32)); 471 } 472 } 473 } 474 } 475 476 /* 477 * add mov b,rn 478 * just after r 479 */ 480 void 481 addmove(Reg *r, int bn, int rn, int f) 482 { 483 Prog *p, *p1; 484 Adr *a; 485 Var *v; 486 487 p1 = alloc(sizeof(*p1)); 488 *p1 = zprog; 489 p = r->prog; 490 491 p1->link = p->link; 492 p->link = p1; 493 p1->lineno = p->lineno; 494 495 v = var + bn; 496 497 a = &p1->to; 498 a->sym = v->sym; 499 a->name = v->name; 500 a->offset = v->offset; 501 a->etype = v->etype; 502 a->type = D_OREG; 503 if(a->etype == TARRAY || a->sym == S) 504 a->type = D_CONST; 505 506 p1->as = AMOVW; 507 if(v->etype == TCHAR || v->etype == TUCHAR) 508 p1->as = AMOVB; 509 if(v->etype == TSHORT || v->etype == TUSHORT) 510 p1->as = AMOVH; 511 if(v->etype == TVLONG || v->etype == TUVLONG) 512 p1->as = AMOVV; 513 if(v->etype == TFLOAT) 514 p1->as = AMOVF; 515 if(v->etype == TDOUBLE) 516 p1->as = AMOVD; 517 518 p1->from.type = D_REG; 519 p1->from.reg = rn; 520 if(rn >= NREG) { 521 p1->from.type = D_FREG; 522 p1->from.reg = rn-NREG; 523 } 524 if(!f) { 525 p1->from = *a; 526 *a = zprog.from; 527 a->type = D_REG; 528 a->reg = rn; 529 if(rn >= NREG) { 530 a->type = D_FREG; 531 a->reg = rn-NREG; 532 } 533 if(v->etype == TUCHAR) 534 p1->as = AMOVBU; 535 if(v->etype == TUSHORT) 536 p1->as = AMOVHU; 537 } 538 if(debug['R']) 539 print("%P\t.a%P\n", p, p1); 540 } 541 542 Bits 543 mkvar(Adr *a, int docon) 544 { 545 Var *v; 546 int i, t, n, et, z; 547 long o; 548 Bits bit; 549 Sym *s; 550 551 t = a->type; 552 if(t == D_REG && a->reg != NREG) 553 regbits |= RtoB(a->reg); 554 if(t == D_FREG && a->reg != NREG) 555 regbits |= FtoB(a->reg); 556 s = a->sym; 557 o = a->offset; 558 et = a->etype; 559 if(s == S) { 560 if(t != D_CONST || !docon || a->reg != NREG) 561 goto none; 562 et = TLONG; 563 } 564 if(t == D_CONST) { 565 if(s == S && sval(o)) 566 goto none; 567 } 568 569 n = a->name; 570 v = var; 571 for(i=0; i<nvar; i++) { 572 if(s == v->sym) 573 if(n == v->name) 574 if(o == v->offset) 575 goto out; 576 v++; 577 } 578 if(s) 579 if(s->name[0] == '.') 580 goto none; 581 if(nvar >= NVAR) { 582 if(debug['w'] > 1 && s) 583 warn(Z, "variable not optimized: %s", s->name); 584 goto none; 585 } 586 i = nvar; 587 nvar++; 588 v = &var[i]; 589 v->sym = s; 590 v->offset = o; 591 v->etype = et; 592 v->name = n; 593 if(debug['R']) 594 print("bit=%2d et=%2d %D\n", i, et, a); 595 out: 596 bit = blsh(i); 597 if(n == D_EXTERN || n == D_STATIC) 598 for(z=0; z<BITS; z++) 599 externs.b[z] |= bit.b[z]; 600 if(n == D_PARAM) 601 for(z=0; z<BITS; z++) 602 params.b[z] |= bit.b[z]; 603 if(v->etype != et || !(typechlpfd[et]/* || typev[et]*/)) /* funny punning */ 604 for(z=0; z<BITS; z++) 605 addrs.b[z] |= bit.b[z]; 606 if(t == D_CONST) { 607 if(s == S) { 608 for(z=0; z<BITS; z++) 609 consts.b[z] |= bit.b[z]; 610 return bit; 611 } 612 if(et != TARRAY) 613 for(z=0; z<BITS; z++) 614 addrs.b[z] |= bit.b[z]; 615 for(z=0; z<BITS; z++) 616 params.b[z] |= bit.b[z]; 617 return bit; 618 } 619 if(t == D_OREG) 620 return bit; 621 622 none: 623 return zbits; 624 } 625 626 void 627 prop(Reg *r, Bits ref, Bits cal) 628 { 629 Reg *r1, *r2; 630 int z; 631 632 for(r1 = r; r1 != R; r1 = r1->p1) { 633 for(z=0; z<BITS; z++) { 634 ref.b[z] |= r1->refahead.b[z]; 635 if(ref.b[z] != r1->refahead.b[z]) { 636 r1->refahead.b[z] = ref.b[z]; 637 change++; 638 } 639 cal.b[z] |= r1->calahead.b[z]; 640 if(cal.b[z] != r1->calahead.b[z]) { 641 r1->calahead.b[z] = cal.b[z]; 642 change++; 643 } 644 } 645 switch(r1->prog->as) { 646 case AJAL: 647 for(z=0; z<BITS; z++) { 648 cal.b[z] |= ref.b[z] | externs.b[z]; 649 ref.b[z] = 0; 650 } 651 break; 652 653 case ATEXT: 654 for(z=0; z<BITS; z++) { 655 cal.b[z] = 0; 656 ref.b[z] = 0; 657 } 658 break; 659 660 case ARET: 661 for(z=0; z<BITS; z++) { 662 cal.b[z] = externs.b[z]; 663 ref.b[z] = 0; 664 } 665 } 666 for(z=0; z<BITS; z++) { 667 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | 668 r1->use1.b[z] | r1->use2.b[z]; 669 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); 670 r1->refbehind.b[z] = ref.b[z]; 671 r1->calbehind.b[z] = cal.b[z]; 672 } 673 if(r1->active) 674 break; 675 r1->active = 1; 676 } 677 for(; r != r1; r = r->p1) 678 for(r2 = r->p2; r2 != R; r2 = r2->p2link) 679 prop(r2, r->refbehind, r->calbehind); 680 } 681 682 /* 683 * find looping structure 684 * 685 * 1) find reverse postordering 686 * 2) find approximate dominators, 687 * the actual dominators if the flow graph is reducible 688 * otherwise, dominators plus some other non-dominators. 689 * See Matthew S. Hecht and Jeffrey D. Ullman, 690 * "Analysis of a Simple Algorithm for Global Data Flow Problems", 691 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts, 692 * Oct. 1-3, 1973, pp. 207-217. 693 * 3) find all nodes with a predecessor dominated by the current node. 694 * such a node is a loop head. 695 * recursively, all preds with a greater rpo number are in the loop 696 */ 697 long 698 postorder(Reg *r, Reg **rpo2r, long n) 699 { 700 Reg *r1; 701 702 r->rpo = 1; 703 r1 = r->s1; 704 if(r1 && !r1->rpo) 705 n = postorder(r1, rpo2r, n); 706 r1 = r->s2; 707 if(r1 && !r1->rpo) 708 n = postorder(r1, rpo2r, n); 709 rpo2r[n] = r; 710 n++; 711 return n; 712 } 713 714 long 715 rpolca(long *idom, long rpo1, long rpo2) 716 { 717 long t; 718 719 if(rpo1 == -1) 720 return rpo2; 721 while(rpo1 != rpo2){ 722 if(rpo1 > rpo2){ 723 t = rpo2; 724 rpo2 = rpo1; 725 rpo1 = t; 726 } 727 while(rpo1 < rpo2){ 728 t = idom[rpo2]; 729 if(t >= rpo2) 730 fatal(Z, "bad idom"); 731 rpo2 = t; 732 } 733 } 734 return rpo1; 735 } 736 737 int 738 doms(long *idom, long r, long s) 739 { 740 while(s > r) 741 s = idom[s]; 742 return s == r; 743 } 744 745 int 746 loophead(long *idom, Reg *r) 747 { 748 long src; 749 750 src = r->rpo; 751 if(r->p1 != R && doms(idom, src, r->p1->rpo)) 752 return 1; 753 for(r = r->p2; r != R; r = r->p2link) 754 if(doms(idom, src, r->rpo)) 755 return 1; 756 return 0; 757 } 758 759 void 760 loopmark(Reg **rpo2r, long head, Reg *r) 761 { 762 if(r->rpo < head || r->active == head) 763 return; 764 r->active = head; 765 r->loop += LOOP; 766 if(r->p1 != R) 767 loopmark(rpo2r, head, r->p1); 768 for(r = r->p2; r != R; r = r->p2link) 769 loopmark(rpo2r, head, r); 770 } 771 772 void 773 loopit(Reg *r, long nr) 774 { 775 Reg *r1; 776 long i, d, me; 777 778 if(nr > maxnr) { 779 rpo2r = alloc(nr * sizeof(Reg*)); 780 idom = alloc(nr * sizeof(long)); 781 maxnr = nr; 782 } 783 784 d = postorder(r, rpo2r, 0); 785 if(d > nr) 786 fatal(Z, "too many reg nodes"); 787 nr = d; 788 for(i = 0; i < nr / 2; i++){ 789 r1 = rpo2r[i]; 790 rpo2r[i] = rpo2r[nr - 1 - i]; 791 rpo2r[nr - 1 - i] = r1; 792 } 793 for(i = 0; i < nr; i++) 794 rpo2r[i]->rpo = i; 795 796 idom[0] = 0; 797 for(i = 0; i < nr; i++){ 798 r1 = rpo2r[i]; 799 me = r1->rpo; 800 d = -1; 801 if(r1->p1 != R && r1->p1->rpo < me) 802 d = r1->p1->rpo; 803 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link) 804 if(r1->rpo < me) 805 d = rpolca(idom, d, r1->rpo); 806 idom[i] = d; 807 } 808 809 for(i = 0; i < nr; i++){ 810 r1 = rpo2r[i]; 811 r1->loop++; 812 if(r1->p2 != R && loophead(idom, r1)) 813 loopmark(rpo2r, i, r1); 814 } 815 } 816 817 void 818 synch(Reg *r, Bits dif) 819 { 820 Reg *r1; 821 int z; 822 823 for(r1 = r; r1 != R; r1 = r1->s1) { 824 for(z=0; z<BITS; z++) { 825 dif.b[z] = (dif.b[z] & 826 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) | 827 r1->set.b[z] | r1->regdiff.b[z]; 828 if(dif.b[z] != r1->regdiff.b[z]) { 829 r1->regdiff.b[z] = dif.b[z]; 830 change++; 831 } 832 } 833 if(r1->active) 834 break; 835 r1->active = 1; 836 for(z=0; z<BITS; z++) 837 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); 838 if(r1->s2 != R) 839 synch(r1->s2, dif); 840 } 841 } 842 843 ulong 844 allreg(ulong b, Rgn *r) 845 { 846 Var *v; 847 int i; 848 849 v = var + r->varno; 850 r->regno = 0; 851 switch(v->etype) { 852 853 default: 854 diag(Z, "unknown etype %d/%d", bitno(b), v->etype); 855 break; 856 857 case TCHAR: 858 case TUCHAR: 859 case TSHORT: 860 case TUSHORT: 861 case TINT: 862 case TUINT: 863 case TLONG: 864 case TULONG: 865 case TVLONG: 866 case TUVLONG: 867 case TIND: 868 case TARRAY: 869 i = BtoR(~b); 870 if(i && r->cost >= 0) { 871 r->regno = i; 872 return RtoB(i); 873 } 874 break; 875 876 case TDOUBLE: 877 case TFLOAT: 878 i = BtoF(~b); 879 if(i && r->cost >= 0) { 880 r->regno = i+NREG; 881 return FtoB(i); 882 } 883 break; 884 } 885 return 0; 886 } 887 888 void 889 paint1(Reg *r, int bn) 890 { 891 Reg *r1; 892 Prog *p; 893 int z; 894 ulong bb; 895 896 z = bn/32; 897 bb = 1L<<(bn%32); 898 if(r->act.b[z] & bb) 899 return; 900 for(;;) { 901 if(!(r->refbehind.b[z] & bb)) 902 break; 903 r1 = r->p1; 904 if(r1 == R) 905 break; 906 if(!(r1->refahead.b[z] & bb)) 907 break; 908 if(r1->act.b[z] & bb) 909 break; 910 r = r1; 911 } 912 913 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) { 914 change -= CLOAD * r->loop; 915 if(debug['R'] && debug['v']) 916 print("%ld%P\tld %B $%d\n", r->loop, 917 r->prog, blsh(bn), change); 918 } 919 for(;;) { 920 r->act.b[z] |= bb; 921 p = r->prog; 922 923 if(r->use1.b[z] & bb) { 924 change += CREF * r->loop; 925 if(debug['R'] && debug['v']) 926 print("%ld%P\tu1 %B $%d\n", r->loop, 927 p, blsh(bn), change); 928 } 929 930 if((r->use2.b[z]|r->set.b[z]) & bb) { 931 change += CREF * r->loop; 932 if(debug['R'] && debug['v']) 933 print("%ld%P\tu2 %B $%d\n", r->loop, 934 p, blsh(bn), change); 935 } 936 937 if(STORE(r) & r->regdiff.b[z] & bb) { 938 change -= CLOAD * r->loop; 939 if(debug['R'] && debug['v']) 940 print("%ld%P\tst %B $%d\n", r->loop, 941 p, blsh(bn), change); 942 } 943 944 if(r->refbehind.b[z] & bb) 945 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 946 if(r1->refahead.b[z] & bb) 947 paint1(r1, bn); 948 949 if(!(r->refahead.b[z] & bb)) 950 break; 951 r1 = r->s2; 952 if(r1 != R) 953 if(r1->refbehind.b[z] & bb) 954 paint1(r1, bn); 955 r = r->s1; 956 if(r == R) 957 break; 958 if(r->act.b[z] & bb) 959 break; 960 if(!(r->refbehind.b[z] & bb)) 961 break; 962 } 963 } 964 965 ulong 966 paint2(Reg *r, int bn) 967 { 968 Reg *r1; 969 int z; 970 ulong bb, vreg; 971 972 z = bn/32; 973 bb = 1L << (bn%32); 974 vreg = regbits; 975 if(!(r->act.b[z] & bb)) 976 return vreg; 977 for(;;) { 978 if(!(r->refbehind.b[z] & bb)) 979 break; 980 r1 = r->p1; 981 if(r1 == R) 982 break; 983 if(!(r1->refahead.b[z] & bb)) 984 break; 985 if(!(r1->act.b[z] & bb)) 986 break; 987 r = r1; 988 } 989 for(;;) { 990 r->act.b[z] &= ~bb; 991 992 vreg |= r->regu; 993 994 if(r->refbehind.b[z] & bb) 995 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 996 if(r1->refahead.b[z] & bb) 997 vreg |= paint2(r1, bn); 998 999 if(!(r->refahead.b[z] & bb)) 1000 break; 1001 r1 = r->s2; 1002 if(r1 != R) 1003 if(r1->refbehind.b[z] & bb) 1004 vreg |= paint2(r1, bn); 1005 r = r->s1; 1006 if(r == R) 1007 break; 1008 if(!(r->act.b[z] & bb)) 1009 break; 1010 if(!(r->refbehind.b[z] & bb)) 1011 break; 1012 } 1013 return vreg; 1014 } 1015 1016 void 1017 paint3(Reg *r, int bn, long rb, int rn) 1018 { 1019 Reg *r1; 1020 Prog *p; 1021 int z; 1022 ulong bb; 1023 1024 z = bn/32; 1025 bb = 1L << (bn%32); 1026 if(r->act.b[z] & bb) 1027 return; 1028 for(;;) { 1029 if(!(r->refbehind.b[z] & bb)) 1030 break; 1031 r1 = r->p1; 1032 if(r1 == R) 1033 break; 1034 if(!(r1->refahead.b[z] & bb)) 1035 break; 1036 if(r1->act.b[z] & bb) 1037 break; 1038 r = r1; 1039 } 1040 1041 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) 1042 addmove(r, bn, rn, 0); 1043 for(;;) { 1044 r->act.b[z] |= bb; 1045 p = r->prog; 1046 1047 if(r->use1.b[z] & bb) { 1048 if(debug['R']) 1049 print("%P", p); 1050 addreg(&p->from, rn); 1051 if(debug['R']) 1052 print("\t.c%P\n", p); 1053 } 1054 if((r->use2.b[z]|r->set.b[z]) & bb) { 1055 if(debug['R']) 1056 print("%P", p); 1057 addreg(&p->to, rn); 1058 if(debug['R']) 1059 print("\t.c%P\n", p); 1060 } 1061 1062 if(STORE(r) & r->regdiff.b[z] & bb) 1063 addmove(r, bn, rn, 1); 1064 r->regu |= rb; 1065 1066 if(r->refbehind.b[z] & bb) 1067 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1068 if(r1->refahead.b[z] & bb) 1069 paint3(r1, bn, rb, rn); 1070 1071 if(!(r->refahead.b[z] & bb)) 1072 break; 1073 r1 = r->s2; 1074 if(r1 != R) 1075 if(r1->refbehind.b[z] & bb) 1076 paint3(r1, bn, rb, rn); 1077 r = r->s1; 1078 if(r == R) 1079 break; 1080 if(r->act.b[z] & bb) 1081 break; 1082 if(!(r->refbehind.b[z] & bb)) 1083 break; 1084 } 1085 } 1086 1087 void 1088 addreg(Adr *a, int rn) 1089 { 1090 1091 a->sym = 0; 1092 a->name = D_NONE; 1093 a->type = D_REG; 1094 a->reg = rn; 1095 if(rn >= NREG) { 1096 a->type = D_FREG; 1097 a->reg = rn-NREG; 1098 } 1099 } 1100 1101 /* 1102 * bit reg 1103 * 0 R3 1104 * 1 R4 1105 * ... ... 1106 * 19 R22 1107 * 20 R23 1108 */ 1109 long 1110 RtoB(int r) 1111 { 1112 1113 if(r < 3 || r > 23) 1114 return 0; 1115 return 1L << (r-3); 1116 } 1117 1118 int 1119 BtoR(long b) 1120 { 1121 1122 b &= 0x001fffffL; 1123 if(b == 0) 1124 return 0; 1125 return bitno(b) + 3; 1126 } 1127 1128 /* 1129 * bit reg 1130 * 22 F4 1131 * 23 F6 1132 * ... ... 1133 * 31 F22 1134 */ 1135 long 1136 FtoB(int f) 1137 { 1138 1139 if(f < 4 || f > 22 || (f&1)) 1140 return 0; 1141 return 1L << (f/2 + 20); 1142 } 1143 1144 int 1145 BtoF(long b) 1146 { 1147 1148 b &= 0xffc00000L; 1149 if(b == 0) 1150 return 0; 1151 return bitno(b)*2 - 40; 1152 } 1153