1 #include "gc.h" 2 3 void addsplits(void); 4 5 Reg* 6 rega(void) 7 { 8 Reg *r; 9 10 r = freer; 11 if(r == R) { 12 r = alloc(sizeof(*r)); 13 } else 14 freer = r->link; 15 16 *r = zreg; 17 return r; 18 } 19 20 int 21 rcmp(void *a1, void *a2) 22 { 23 Rgn *p1, *p2; 24 int c1, c2; 25 26 p1 = (Rgn*)a1; 27 p2 = (Rgn*)a2; 28 c1 = p2->cost; 29 c2 = p1->cost; 30 if(c1 -= c2) 31 return c1; 32 return p2->varno - p1->varno; 33 } 34 35 void 36 regopt(Prog *p) 37 { 38 Reg *r, *r1, *r2; 39 Prog *p1; 40 int i, z; 41 long initpc, val, npc; 42 ulong vreg; 43 Bits bit; 44 struct 45 { 46 long m; 47 long c; 48 Reg* p; 49 } log5[6], *lp; 50 51 firstr = R; 52 lastr = R; 53 nvar = 0; 54 regbits = 0; 55 for(z=0; z<BITS; z++) { 56 externs.b[z] = 0; 57 params.b[z] = 0; 58 consts.b[z] = 0; 59 addrs.b[z] = 0; 60 } 61 62 /* 63 * pass 1 64 * build aux data structure 65 * allocate pcs 66 * find use and set of variables 67 */ 68 val = 5L * 5L * 5L * 5L * 5L; 69 lp = log5; 70 for(i=0; i<5; i++) { 71 lp->m = val; 72 lp->c = 0; 73 lp->p = R; 74 val /= 5L; 75 lp++; 76 } 77 val = 0; 78 for(; p != P; p = p->link) { 79 switch(p->as) { 80 case ADATA: 81 case AGLOBL: 82 case ANAME: 83 case ASIGNAME: 84 continue; 85 } 86 r = rega(); 87 if(firstr == R) { 88 firstr = r; 89 lastr = r; 90 } else { 91 lastr->link = r; 92 r->p1 = lastr; 93 lastr->s1 = r; 94 lastr = r; 95 } 96 r->prog = p; 97 r->pc = val; 98 val++; 99 100 lp = log5; 101 for(i=0; i<5; i++) { 102 lp->c--; 103 if(lp->c <= 0) { 104 lp->c = lp->m; 105 if(lp->p != R) 106 lp->p->log5 = r; 107 lp->p = r; 108 (lp+1)->c = 0; 109 break; 110 } 111 lp++; 112 } 113 114 r1 = r->p1; 115 if(r1 != R) 116 switch(r1->prog->as) { 117 case ARET: 118 case AB: 119 case ARFE: 120 r->p1 = R; 121 r1->s1 = R; 122 } 123 124 /* 125 * left side always read 126 */ 127 bit = mkvar(&p->from, p->as==AMOVW); 128 for(z=0; z<BITS; z++) 129 r->use1.b[z] |= bit.b[z]; 130 131 /* 132 * right side depends on opcode 133 */ 134 bit = mkvar(&p->to, 0); 135 if(bany(&bit)) 136 switch(p->as) { 137 default: 138 diag(Z, "reg: unknown asop: %A", p->as); 139 break; 140 141 /* 142 * right side write 143 */ 144 case ANOP: 145 case AMOVB: 146 case AMOVBU: 147 case AMOVH: 148 case AMOVHU: 149 case AMOVW: 150 case AMOVF: 151 case AMOVD: 152 for(z=0; z<BITS; z++) 153 r->set.b[z] |= bit.b[z]; 154 break; 155 156 /* 157 * funny 158 */ 159 case ABL: 160 case ABX: 161 for(z=0; z<BITS; z++) 162 addrs.b[z] |= bit.b[z]; 163 break; 164 } 165 166 if(p->as == AMOVM) { 167 if(p->from.type == D_CONST) 168 z = p->from.offset; 169 else 170 z = p->to.offset; 171 for(i=0; z; i++) { 172 if(z&1) 173 regbits |= RtoB(i); 174 z >>= 1; 175 } 176 } 177 } 178 if(firstr == R) 179 return; 180 initpc = pc - val; 181 npc = val; 182 183 /* 184 * pass 2 185 * turn branch references to pointers 186 * build back pointers 187 */ 188 for(r = firstr; r != R; r = r->link) { 189 p = r->prog; 190 if(p->to.type == D_BRANCH) { 191 val = p->to.offset - initpc; 192 r1 = firstr; 193 while(r1 != R) { 194 r2 = r1->log5; 195 if(r2 != R && val >= r2->pc) { 196 r1 = r2; 197 continue; 198 } 199 if(r1->pc == val) 200 break; 201 r1 = r1->link; 202 } 203 if(r1 == R) { 204 nearln = p->lineno; 205 diag(Z, "ref not found\n%P", p); 206 continue; 207 } 208 if(r1 == r) { 209 nearln = p->lineno; 210 diag(Z, "ref to self\n%P", p); 211 continue; 212 } 213 r->s2 = r1; 214 r->p2link = r1->p2; 215 r1->p2 = r; 216 } 217 } 218 if(debug['R']) { 219 p = firstr->prog; 220 print("\n%L %D\n", p->lineno, &p->from); 221 } 222 223 /* 224 * pass 2.5 225 * find looping structure 226 */ 227 for(r = firstr; r != R; r = r->link) 228 r->active = 0; 229 change = 0; 230 loopit(firstr, npc); 231 232 /* 233 * pass 3 234 * iterate propagating usage 235 * back until flow graph is complete 236 */ 237 loop1: 238 change = 0; 239 for(r = firstr; r != R; r = r->link) 240 r->active = 0; 241 for(r = firstr; r != R; r = r->link) 242 if(r->prog->as == ARET) 243 prop(r, zbits, zbits); 244 loop11: 245 /* pick up unreachable code */ 246 i = 0; 247 for(r = firstr; r != R; r = r1) { 248 r1 = r->link; 249 if(r1 && r1->active && !r->active) { 250 prop(r, zbits, zbits); 251 i = 1; 252 } 253 } 254 if(i) 255 goto loop11; 256 if(change) 257 goto loop1; 258 259 260 /* 261 * pass 4 262 * iterate propagating register/variable synchrony 263 * forward until graph is complete 264 */ 265 loop2: 266 change = 0; 267 for(r = firstr; r != R; r = r->link) 268 r->active = 0; 269 synch(firstr, zbits); 270 if(change) 271 goto loop2; 272 273 addsplits(); 274 275 if(debug['R'] && debug['v']) { 276 print("\nprop structure:\n"); 277 for(r = firstr; r != R; r = r->link) { 278 print("%ld:%P", r->loop, r->prog); 279 for(z=0; z<BITS; z++) 280 bit.b[z] = r->set.b[z] | 281 r->refahead.b[z] | r->calahead.b[z] | 282 r->refbehind.b[z] | r->calbehind.b[z] | 283 r->use1.b[z] | r->use2.b[z]; 284 if(bany(&bit)) { 285 print("\t"); 286 if(bany(&r->use1)) 287 print(" u1=%B", r->use1); 288 if(bany(&r->use2)) 289 print(" u2=%B", r->use2); 290 if(bany(&r->set)) 291 print(" st=%B", r->set); 292 if(bany(&r->refahead)) 293 print(" ra=%B", r->refahead); 294 if(bany(&r->calahead)) 295 print(" ca=%B", r->calahead); 296 if(bany(&r->refbehind)) 297 print(" rb=%B", r->refbehind); 298 if(bany(&r->calbehind)) 299 print(" cb=%B", r->calbehind); 300 } 301 print("\n"); 302 } 303 } 304 305 /* 306 * pass 5 307 * isolate regions 308 * calculate costs (paint1) 309 */ 310 r = firstr; 311 if(r) { 312 for(z=0; z<BITS; z++) 313 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & 314 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); 315 if(bany(&bit)) { 316 nearln = r->prog->lineno; 317 warn(Z, "used and not set: %B", bit); 318 if(debug['R'] && !debug['w']) 319 print("used and not set: %B\n", bit); 320 } 321 } 322 323 for(r = firstr; r != R; r = r->link) 324 r->act = zbits; 325 rgp = region; 326 nregion = 0; 327 for(r = firstr; r != R; r = r->link) { 328 for(z=0; z<BITS; z++) 329 bit.b[z] = r->set.b[z] & 330 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); 331 if(bany(&bit)) { 332 nearln = r->prog->lineno; 333 warn(Z, "set and not used: %B", bit); 334 if(debug['R']) 335 print("set and not used: %B\n", bit); 336 excise(r); 337 } 338 for(z=0; z<BITS; z++) 339 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); 340 while(bany(&bit)) { 341 i = bnum(bit); 342 rgp->enter = r; 343 rgp->varno = i; 344 change = 0; 345 if(debug['R'] && debug['v']) 346 print("\n"); 347 paint1(r, i); 348 bit.b[i/32] &= ~(1L<<(i%32)); 349 if(change <= 0) { 350 if(debug['R']) 351 print("%L $%d: %B\n", 352 r->prog->lineno, change, blsh(i)); 353 continue; 354 } 355 rgp->cost = change; 356 nregion++; 357 if(nregion >= NRGN) { 358 warn(Z, "too many regions"); 359 goto brk; 360 } 361 rgp++; 362 } 363 } 364 brk: 365 qsort(region, nregion, sizeof(region[0]), rcmp); 366 367 /* 368 * pass 6 369 * determine used registers (paint2) 370 * replace code (paint3) 371 */ 372 rgp = region; 373 for(i=0; i<nregion; i++) { 374 bit = blsh(rgp->varno); 375 vreg = paint2(rgp->enter, rgp->varno); 376 vreg = allreg(vreg, rgp); 377 if(debug['R']) { 378 if(rgp->regno >= NREG) 379 print("%L $%d F%d: %B\n", 380 rgp->enter->prog->lineno, 381 rgp->cost, 382 rgp->regno-NREG, 383 bit); 384 else 385 print("%L $%d R%d: %B\n", 386 rgp->enter->prog->lineno, 387 rgp->cost, 388 rgp->regno, 389 bit); 390 } 391 if(rgp->regno != 0) 392 paint3(rgp->enter, rgp->varno, vreg, rgp->regno); 393 rgp++; 394 } 395 /* 396 * pass 7 397 * peep-hole on basic block 398 */ 399 if(!debug['R'] || debug['P']) 400 peep(); 401 402 /* 403 * pass 8 404 * recalculate pc 405 */ 406 val = initpc; 407 for(r = firstr; r != R; r = r1) { 408 r->pc = val; 409 p = r->prog; 410 p1 = P; 411 r1 = r->link; 412 if(r1 != R) 413 p1 = r1->prog; 414 for(; p != p1; p = p->link) { 415 switch(p->as) { 416 default: 417 val++; 418 break; 419 420 case ANOP: 421 case ADATA: 422 case AGLOBL: 423 case ANAME: 424 case ASIGNAME: 425 break; 426 } 427 } 428 } 429 pc = val; 430 431 /* 432 * fix up branches 433 */ 434 if(debug['R']) 435 if(bany(&addrs)) 436 print("addrs: %B\n", addrs); 437 438 r1 = 0; /* set */ 439 for(r = firstr; r != R; r = r->link) { 440 p = r->prog; 441 if(p->to.type == D_BRANCH) 442 p->to.offset = r->s2->pc; 443 r1 = r; 444 } 445 446 /* 447 * last pass 448 * eliminate nops 449 * free aux structures 450 */ 451 for(p = firstr->prog; p != P; p = p->link){ 452 while(p->link && p->link->as == ANOP) 453 p->link = p->link->link; 454 } 455 if(r1 != R) { 456 r1->link = freer; 457 freer = firstr; 458 } 459 } 460 461 void 462 addsplits(void) 463 { 464 Reg *r, *r1; 465 int z, i; 466 Bits bit; 467 468 for(r = firstr; r != R; r = r->link) { 469 if(r->loop > 1) 470 continue; 471 if(r->prog->as == ABL || r->prog->as == ABX) 472 continue; 473 for(r1 = r->p2; r1 != R; r1 = r1->p2link) { 474 if(r1->loop <= 1) 475 continue; 476 for(z=0; z<BITS; z++) 477 bit.b[z] = r1->calbehind.b[z] & 478 (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) & 479 ~(r->calahead.b[z] & addrs.b[z]); 480 while(bany(&bit)) { 481 i = bnum(bit); 482 bit.b[i/32] &= ~(1L << (i%32)); 483 } 484 } 485 } 486 } 487 488 /* 489 * add mov b,rn 490 * just after r 491 */ 492 void 493 addmove(Reg *r, int bn, int rn, int f) 494 { 495 Prog *p, *p1; 496 Adr *a; 497 Var *v; 498 499 p1 = alloc(sizeof(*p1)); 500 *p1 = zprog; 501 p = r->prog; 502 503 p1->link = p->link; 504 p->link = p1; 505 p1->lineno = p->lineno; 506 507 v = var + bn; 508 509 a = &p1->to; 510 a->sym = v->sym; 511 a->name = v->name; 512 a->offset = v->offset; 513 a->etype = v->etype; 514 a->type = D_OREG; 515 if(a->etype == TARRAY || a->sym == S) 516 a->type = D_CONST; 517 518 p1->as = AMOVW; 519 if(v->etype == TCHAR || v->etype == TUCHAR) 520 p1->as = AMOVB; 521 if(v->etype == TSHORT || v->etype == TUSHORT) 522 p1->as = AMOVH; 523 if(v->etype == TFLOAT) 524 p1->as = AMOVF; 525 if(v->etype == TDOUBLE) 526 p1->as = AMOVD; 527 528 p1->from.type = D_REG; 529 p1->from.reg = rn; 530 if(rn >= NREG) { 531 p1->from.type = D_FREG; 532 p1->from.reg = rn-NREG; 533 } 534 if(!f) { 535 p1->from = *a; 536 *a = zprog.from; 537 a->type = D_REG; 538 a->reg = rn; 539 if(rn >= NREG) { 540 a->type = D_FREG; 541 a->reg = rn-NREG; 542 } 543 if(v->etype == TUCHAR) 544 p1->as = AMOVBU; 545 if(v->etype == TUSHORT) 546 p1->as = AMOVHU; 547 } 548 if(debug['R']) 549 print("%P\t.a%P\n", p, p1); 550 } 551 552 Bits 553 mkvar(Adr *a, int docon) 554 { 555 Var *v; 556 int i, t, n, et, z; 557 long o; 558 Bits bit; 559 Sym *s; 560 561 t = a->type; 562 if(t == D_REG && a->reg != NREG) 563 regbits |= RtoB(a->reg); 564 if(t == D_FREG && a->reg != NREG) 565 regbits |= FtoB(a->reg); 566 s = a->sym; 567 o = a->offset; 568 et = a->etype; 569 if(s == S) { 570 if(t != D_CONST || !docon || a->reg != NREG) 571 goto none; 572 et = TLONG; 573 } 574 if(t == D_CONST) { 575 if(s == S && sval(o)) 576 goto none; 577 } 578 579 n = a->name; 580 v = var; 581 for(i=0; i<nvar; i++) { 582 if(s == v->sym) 583 if(n == v->name) 584 if(o == v->offset) 585 goto out; 586 v++; 587 } 588 if(s) 589 if(s->name[0] == '.') 590 goto none; 591 if(nvar >= NVAR) { 592 if(debug['w'] > 1 && s) 593 warn(Z, "variable not optimized: %s", s->name); 594 goto none; 595 } 596 i = nvar; 597 nvar++; 598 v = &var[i]; 599 v->sym = s; 600 v->offset = o; 601 v->etype = et; 602 v->name = n; 603 if(debug['R']) 604 print("bit=%2d et=%2d %D\n", i, et, a); 605 out: 606 bit = blsh(i); 607 if(n == D_EXTERN || n == D_STATIC) 608 for(z=0; z<BITS; z++) 609 externs.b[z] |= bit.b[z]; 610 if(n == D_PARAM) 611 for(z=0; z<BITS; z++) 612 params.b[z] |= bit.b[z]; 613 if(v->etype != et || !typechlpfd[et]) /* funny punning */ 614 for(z=0; z<BITS; z++) 615 addrs.b[z] |= bit.b[z]; 616 if(t == D_CONST) { 617 if(s == S) { 618 for(z=0; z<BITS; z++) 619 consts.b[z] |= bit.b[z]; 620 return bit; 621 } 622 if(et != TARRAY) 623 for(z=0; z<BITS; z++) 624 addrs.b[z] |= bit.b[z]; 625 for(z=0; z<BITS; z++) 626 params.b[z] |= bit.b[z]; 627 return bit; 628 } 629 if(t == D_OREG) 630 return bit; 631 632 none: 633 return zbits; 634 } 635 636 void 637 prop(Reg *r, Bits ref, Bits cal) 638 { 639 Reg *r1, *r2; 640 int z; 641 642 for(r1 = r; r1 != R; r1 = r1->p1) { 643 for(z=0; z<BITS; z++) { 644 ref.b[z] |= r1->refahead.b[z]; 645 if(ref.b[z] != r1->refahead.b[z]) { 646 r1->refahead.b[z] = ref.b[z]; 647 change++; 648 } 649 cal.b[z] |= r1->calahead.b[z]; 650 if(cal.b[z] != r1->calahead.b[z]) { 651 r1->calahead.b[z] = cal.b[z]; 652 change++; 653 } 654 } 655 switch(r1->prog->as) { 656 case ABL: 657 case ABX: 658 for(z=0; z<BITS; z++) { 659 cal.b[z] |= ref.b[z] | externs.b[z]; 660 ref.b[z] = 0; 661 } 662 break; 663 664 case ATEXT: 665 for(z=0; z<BITS; z++) { 666 cal.b[z] = 0; 667 ref.b[z] = 0; 668 } 669 break; 670 671 case ARET: 672 for(z=0; z<BITS; z++) { 673 cal.b[z] = externs.b[z]; 674 ref.b[z] = 0; 675 } 676 } 677 for(z=0; z<BITS; z++) { 678 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | 679 r1->use1.b[z] | r1->use2.b[z]; 680 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); 681 r1->refbehind.b[z] = ref.b[z]; 682 r1->calbehind.b[z] = cal.b[z]; 683 } 684 if(r1->active) 685 break; 686 r1->active = 1; 687 } 688 for(; r != r1; r = r->p1) 689 for(r2 = r->p2; r2 != R; r2 = r2->p2link) 690 prop(r2, r->refbehind, r->calbehind); 691 } 692 693 /* 694 * find looping structure 695 * 696 * 1) find reverse postordering 697 * 2) find approximate dominators, 698 * the actual dominators if the flow graph is reducible 699 * otherwise, dominators plus some other non-dominators. 700 * See Matthew S. Hecht and Jeffrey D. Ullman, 701 * "Analysis of a Simple Algorithm for Global Data Flow Problems", 702 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts, 703 * Oct. 1-3, 1973, pp. 207-217. 704 * 3) find all nodes with a predecessor dominated by the current node. 705 * such a node is a loop head. 706 * recursively, all preds with a greater rpo number are in the loop 707 */ 708 long 709 postorder(Reg *r, Reg **rpo2r, long n) 710 { 711 Reg *r1; 712 713 r->rpo = 1; 714 r1 = r->s1; 715 if(r1 && !r1->rpo) 716 n = postorder(r1, rpo2r, n); 717 r1 = r->s2; 718 if(r1 && !r1->rpo) 719 n = postorder(r1, rpo2r, n); 720 rpo2r[n] = r; 721 n++; 722 return n; 723 } 724 725 long 726 rpolca(long *idom, long rpo1, long rpo2) 727 { 728 long t; 729 730 if(rpo1 == -1) 731 return rpo2; 732 while(rpo1 != rpo2){ 733 if(rpo1 > rpo2){ 734 t = rpo2; 735 rpo2 = rpo1; 736 rpo1 = t; 737 } 738 while(rpo1 < rpo2){ 739 t = idom[rpo2]; 740 if(t >= rpo2) 741 fatal(Z, "bad idom"); 742 rpo2 = t; 743 } 744 } 745 return rpo1; 746 } 747 748 int 749 doms(long *idom, long r, long s) 750 { 751 while(s > r) 752 s = idom[s]; 753 return s == r; 754 } 755 756 int 757 loophead(long *idom, Reg *r) 758 { 759 long src; 760 761 src = r->rpo; 762 if(r->p1 != R && doms(idom, src, r->p1->rpo)) 763 return 1; 764 for(r = r->p2; r != R; r = r->p2link) 765 if(doms(idom, src, r->rpo)) 766 return 1; 767 return 0; 768 } 769 770 void 771 loopmark(Reg **rpo2r, long head, Reg *r) 772 { 773 if(r->rpo < head || r->active == head) 774 return; 775 r->active = head; 776 r->loop += LOOP; 777 if(r->p1 != R) 778 loopmark(rpo2r, head, r->p1); 779 for(r = r->p2; r != R; r = r->p2link) 780 loopmark(rpo2r, head, r); 781 } 782 783 void 784 loopit(Reg *r, long nr) 785 { 786 Reg *r1; 787 long i, d, me; 788 789 if(nr > maxnr) { 790 rpo2r = alloc(nr * sizeof(Reg*)); 791 idom = alloc(nr * sizeof(long)); 792 maxnr = nr; 793 } 794 795 d = postorder(r, rpo2r, 0); 796 if(d > nr) 797 fatal(Z, "too many reg nodes"); 798 nr = d; 799 for(i = 0; i < nr / 2; i++){ 800 r1 = rpo2r[i]; 801 rpo2r[i] = rpo2r[nr - 1 - i]; 802 rpo2r[nr - 1 - i] = r1; 803 } 804 for(i = 0; i < nr; i++) 805 rpo2r[i]->rpo = i; 806 807 idom[0] = 0; 808 for(i = 0; i < nr; i++){ 809 r1 = rpo2r[i]; 810 me = r1->rpo; 811 d = -1; 812 if(r1->p1 != R && r1->p1->rpo < me) 813 d = r1->p1->rpo; 814 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link) 815 if(r1->rpo < me) 816 d = rpolca(idom, d, r1->rpo); 817 idom[i] = d; 818 } 819 820 for(i = 0; i < nr; i++){ 821 r1 = rpo2r[i]; 822 r1->loop++; 823 if(r1->p2 != R && loophead(idom, r1)) 824 loopmark(rpo2r, i, r1); 825 } 826 } 827 828 void 829 synch(Reg *r, Bits dif) 830 { 831 Reg *r1; 832 int z; 833 834 for(r1 = r; r1 != R; r1 = r1->s1) { 835 for(z=0; z<BITS; z++) { 836 dif.b[z] = (dif.b[z] & 837 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) | 838 r1->set.b[z] | r1->regdiff.b[z]; 839 if(dif.b[z] != r1->regdiff.b[z]) { 840 r1->regdiff.b[z] = dif.b[z]; 841 change++; 842 } 843 } 844 if(r1->active) 845 break; 846 r1->active = 1; 847 for(z=0; z<BITS; z++) 848 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); 849 if(r1->s2 != R) 850 synch(r1->s2, dif); 851 } 852 } 853 854 ulong 855 allreg(ulong b, Rgn *r) 856 { 857 Var *v; 858 int i; 859 860 v = var + r->varno; 861 r->regno = 0; 862 switch(v->etype) { 863 864 default: 865 diag(Z, "unknown etype %d/%d", bitno(b), v->etype); 866 break; 867 868 case TCHAR: 869 case TUCHAR: 870 case TSHORT: 871 case TUSHORT: 872 case TINT: 873 case TUINT: 874 case TLONG: 875 case TULONG: 876 case TIND: 877 case TARRAY: 878 i = BtoR(~b); 879 if(i && r->cost >= 0) { 880 r->regno = i; 881 return RtoB(i); 882 } 883 break; 884 885 case TVLONG: 886 case TDOUBLE: 887 case TFLOAT: 888 i = BtoF(~b); 889 if(i && r->cost >= 0) { 890 r->regno = i+NREG; 891 return FtoB(i); 892 } 893 break; 894 } 895 return 0; 896 } 897 898 void 899 paint1(Reg *r, int bn) 900 { 901 Reg *r1; 902 Prog *p; 903 int z; 904 ulong bb; 905 906 z = bn/32; 907 bb = 1L<<(bn%32); 908 if(r->act.b[z] & bb) 909 return; 910 for(;;) { 911 if(!(r->refbehind.b[z] & bb)) 912 break; 913 r1 = r->p1; 914 if(r1 == R) 915 break; 916 if(!(r1->refahead.b[z] & bb)) 917 break; 918 if(r1->act.b[z] & bb) 919 break; 920 r = r1; 921 } 922 923 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) { 924 change -= CLOAD * r->loop; 925 if(debug['R'] && debug['v']) 926 print("%ld%P\tld %B $%d\n", r->loop, 927 r->prog, blsh(bn), change); 928 } 929 for(;;) { 930 r->act.b[z] |= bb; 931 p = r->prog; 932 933 if(r->use1.b[z] & bb) { 934 change += CREF * r->loop; 935 if(debug['R'] && debug['v']) 936 print("%ld%P\tu1 %B $%d\n", r->loop, 937 p, blsh(bn), change); 938 } 939 940 if((r->use2.b[z]|r->set.b[z]) & bb) { 941 change += CREF * r->loop; 942 if(debug['R'] && debug['v']) 943 print("%ld%P\tu2 %B $%d\n", r->loop, 944 p, blsh(bn), change); 945 } 946 947 if(STORE(r) & r->regdiff.b[z] & bb) { 948 change -= CLOAD * r->loop; 949 if(debug['R'] && debug['v']) 950 print("%ld%P\tst %B $%d\n", r->loop, 951 p, blsh(bn), change); 952 } 953 954 if(r->refbehind.b[z] & bb) 955 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 956 if(r1->refahead.b[z] & bb) 957 paint1(r1, bn); 958 959 if(!(r->refahead.b[z] & bb)) 960 break; 961 r1 = r->s2; 962 if(r1 != R) 963 if(r1->refbehind.b[z] & bb) 964 paint1(r1, bn); 965 r = r->s1; 966 if(r == R) 967 break; 968 if(r->act.b[z] & bb) 969 break; 970 if(!(r->refbehind.b[z] & bb)) 971 break; 972 } 973 } 974 975 ulong 976 paint2(Reg *r, int bn) 977 { 978 Reg *r1; 979 int z; 980 ulong bb, vreg; 981 982 z = bn/32; 983 bb = 1L << (bn%32); 984 vreg = regbits; 985 if(!(r->act.b[z] & bb)) 986 return vreg; 987 for(;;) { 988 if(!(r->refbehind.b[z] & bb)) 989 break; 990 r1 = r->p1; 991 if(r1 == R) 992 break; 993 if(!(r1->refahead.b[z] & bb)) 994 break; 995 if(!(r1->act.b[z] & bb)) 996 break; 997 r = r1; 998 } 999 for(;;) { 1000 r->act.b[z] &= ~bb; 1001 1002 vreg |= r->regu; 1003 1004 if(r->refbehind.b[z] & bb) 1005 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1006 if(r1->refahead.b[z] & bb) 1007 vreg |= paint2(r1, bn); 1008 1009 if(!(r->refahead.b[z] & bb)) 1010 break; 1011 r1 = r->s2; 1012 if(r1 != R) 1013 if(r1->refbehind.b[z] & bb) 1014 vreg |= paint2(r1, bn); 1015 r = r->s1; 1016 if(r == R) 1017 break; 1018 if(!(r->act.b[z] & bb)) 1019 break; 1020 if(!(r->refbehind.b[z] & bb)) 1021 break; 1022 } 1023 return vreg; 1024 } 1025 1026 void 1027 paint3(Reg *r, int bn, long rb, int rn) 1028 { 1029 Reg *r1; 1030 Prog *p; 1031 int z; 1032 ulong bb; 1033 1034 z = bn/32; 1035 bb = 1L << (bn%32); 1036 if(r->act.b[z] & bb) 1037 return; 1038 for(;;) { 1039 if(!(r->refbehind.b[z] & bb)) 1040 break; 1041 r1 = r->p1; 1042 if(r1 == R) 1043 break; 1044 if(!(r1->refahead.b[z] & bb)) 1045 break; 1046 if(r1->act.b[z] & bb) 1047 break; 1048 r = r1; 1049 } 1050 1051 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) 1052 addmove(r, bn, rn, 0); 1053 for(;;) { 1054 r->act.b[z] |= bb; 1055 p = r->prog; 1056 1057 if(r->use1.b[z] & bb) { 1058 if(debug['R']) 1059 print("%P", p); 1060 addreg(&p->from, rn); 1061 if(debug['R']) 1062 print("\t.c%P\n", p); 1063 } 1064 if((r->use2.b[z]|r->set.b[z]) & bb) { 1065 if(debug['R']) 1066 print("%P", p); 1067 addreg(&p->to, rn); 1068 if(debug['R']) 1069 print("\t.c%P\n", p); 1070 } 1071 1072 if(STORE(r) & r->regdiff.b[z] & bb) 1073 addmove(r, bn, rn, 1); 1074 r->regu |= rb; 1075 1076 if(r->refbehind.b[z] & bb) 1077 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1078 if(r1->refahead.b[z] & bb) 1079 paint3(r1, bn, rb, rn); 1080 1081 if(!(r->refahead.b[z] & bb)) 1082 break; 1083 r1 = r->s2; 1084 if(r1 != R) 1085 if(r1->refbehind.b[z] & bb) 1086 paint3(r1, bn, rb, rn); 1087 r = r->s1; 1088 if(r == R) 1089 break; 1090 if(r->act.b[z] & bb) 1091 break; 1092 if(!(r->refbehind.b[z] & bb)) 1093 break; 1094 } 1095 } 1096 1097 void 1098 addreg(Adr *a, int rn) 1099 { 1100 1101 a->sym = 0; 1102 a->name = D_NONE; 1103 a->type = D_REG; 1104 a->reg = rn; 1105 if(rn >= NREG) { 1106 a->type = D_FREG; 1107 a->reg = rn-NREG; 1108 } 1109 } 1110 1111 /* 1112 * bit reg 1113 * 0 R0 1114 * 1 R1 1115 * ... ... 1116 * 10 R10 1117 */ 1118 long 1119 RtoB(int r) 1120 { 1121 1122 if(r < 2 || r >= REGTMPT) 1123 return 0; 1124 return 1L << r; 1125 } 1126 1127 int 1128 BtoR(long b) 1129 { 1130 1131 b &= 0x7cL; // r2-r6 1132 if(b == 0) 1133 return 0; 1134 return bitno(b); 1135 } 1136 1137 /* 1138 * bit reg 1139 * 18 F2 1140 * 19 F3 1141 * ... ... 1142 * 23 F7 1143 */ 1144 long 1145 FtoB(int f) 1146 { 1147 1148 if(f < 2 || f > NFREG-1) 1149 return 0; 1150 return 1L << (f + 16); 1151 } 1152 1153 int 1154 BtoF(long b) 1155 { 1156 1157 b &= 0xfc0000L; 1158 if(b == 0) 1159 return 0; 1160 return bitno(b) - 16; 1161 } 1162