1 #include "gc.h" 2 3 void addsplits(void); 4 5 Reg* 6 rega(void) 7 { 8 Reg *r; 9 10 r = freer; 11 if(r == R) { 12 r = alloc(sizeof(*r)); 13 } else 14 freer = r->link; 15 16 *r = zreg; 17 return r; 18 } 19 20 int 21 rcmp(void *a1, void *a2) 22 { 23 Rgn *p1, *p2; 24 int c1, c2; 25 26 p1 = (Rgn*)a1; 27 p2 = (Rgn*)a2; 28 c1 = p2->cost; 29 c2 = p1->cost; 30 if(c1 -= c2) 31 return c1; 32 return p2->varno - p1->varno; 33 } 34 35 void 36 regopt(Prog *p) 37 { 38 Reg *r, *r1, *r2; 39 Prog *p1; 40 int i, z; 41 long initpc, val, npc; 42 ulong vreg; 43 Bits bit; 44 struct 45 { 46 long m; 47 long c; 48 Reg* p; 49 } log5[6], *lp; 50 51 firstr = R; 52 lastr = R; 53 nvar = 0; 54 regbits = 0; 55 for(z=0; z<BITS; z++) { 56 externs.b[z] = 0; 57 params.b[z] = 0; 58 consts.b[z] = 0; 59 addrs.b[z] = 0; 60 } 61 62 /* 63 * pass 1 64 * build aux data structure 65 * allocate pcs 66 * find use and set of variables 67 */ 68 val = 5L * 5L * 5L * 5L * 5L; 69 lp = log5; 70 for(i=0; i<5; i++) { 71 lp->m = val; 72 lp->c = 0; 73 lp->p = R; 74 val /= 5L; 75 lp++; 76 } 77 val = 0; 78 for(; p != P; p = p->link) { 79 switch(p->as) { 80 case ADATA: 81 case AGLOBL: 82 case ANAME: 83 case ASIGNAME: 84 continue; 85 } 86 r = rega(); 87 if(firstr == R) { 88 firstr = r; 89 lastr = r; 90 } else { 91 lastr->link = r; 92 r->p1 = lastr; 93 lastr->s1 = r; 94 lastr = r; 95 } 96 r->prog = p; 97 r->pc = val; 98 val++; 99 100 lp = log5; 101 for(i=0; i<5; i++) { 102 lp->c--; 103 if(lp->c <= 0) { 104 lp->c = lp->m; 105 if(lp->p != R) 106 lp->p->log5 = r; 107 lp->p = r; 108 (lp+1)->c = 0; 109 break; 110 } 111 lp++; 112 } 113 114 r1 = r->p1; 115 if(r1 != R) 116 switch(r1->prog->as) { 117 case ARET: 118 case AB: 119 case ARFE: 120 r->p1 = R; 121 r1->s1 = R; 122 } 123 124 /* 125 * left side always read 126 */ 127 bit = mkvar(&p->from, p->as==AMOVW); 128 for(z=0; z<BITS; z++) 129 r->use1.b[z] |= bit.b[z]; 130 131 /* 132 * right side depends on opcode 133 */ 134 bit = mkvar(&p->to, 0); 135 if(bany(&bit)) 136 switch(p->as) { 137 default: 138 diag(Z, "reg: unknown asop: %A", p->as); 139 break; 140 141 /* 142 * right side write 143 */ 144 case ANOP: 145 case AMOVB: 146 case AMOVBU: 147 case AMOVH: 148 case AMOVHU: 149 case AMOVW: 150 case AMOVF: 151 case AMOVD: 152 for(z=0; z<BITS; z++) 153 r->set.b[z] |= bit.b[z]; 154 break; 155 156 /* 157 * funny 158 */ 159 case ABL: 160 for(z=0; z<BITS; z++) 161 addrs.b[z] |= bit.b[z]; 162 break; 163 } 164 165 if(p->as == AMOVM) { 166 if(p->from.type == D_CONST) 167 z = p->from.offset; 168 else 169 z = p->to.offset; 170 for(i=0; z; i++) { 171 if(z&1) 172 regbits |= RtoB(i); 173 z >>= 1; 174 } 175 } 176 } 177 if(firstr == R) 178 return; 179 initpc = pc - val; 180 npc = val; 181 182 /* 183 * pass 2 184 * turn branch references to pointers 185 * build back pointers 186 */ 187 for(r = firstr; r != R; r = r->link) { 188 p = r->prog; 189 if(p->to.type == D_BRANCH) { 190 val = p->to.offset - initpc; 191 r1 = firstr; 192 while(r1 != R) { 193 r2 = r1->log5; 194 if(r2 != R && val >= r2->pc) { 195 r1 = r2; 196 continue; 197 } 198 if(r1->pc == val) 199 break; 200 r1 = r1->link; 201 } 202 if(r1 == R) { 203 nearln = p->lineno; 204 diag(Z, "ref not found\n%P", p); 205 continue; 206 } 207 if(r1 == r) { 208 nearln = p->lineno; 209 diag(Z, "ref to self\n%P", p); 210 continue; 211 } 212 r->s2 = r1; 213 r->p2link = r1->p2; 214 r1->p2 = r; 215 } 216 } 217 if(debug['R']) { 218 p = firstr->prog; 219 print("\n%L %D\n", p->lineno, &p->from); 220 } 221 222 /* 223 * pass 2.5 224 * find looping structure 225 */ 226 for(r = firstr; r != R; r = r->link) 227 r->active = 0; 228 change = 0; 229 loopit(firstr, npc); 230 231 /* 232 * pass 3 233 * iterate propagating usage 234 * back until flow graph is complete 235 */ 236 loop1: 237 change = 0; 238 for(r = firstr; r != R; r = r->link) 239 r->active = 0; 240 for(r = firstr; r != R; r = r->link) 241 if(r->prog->as == ARET) 242 prop(r, zbits, zbits); 243 loop11: 244 /* pick up unreachable code */ 245 i = 0; 246 for(r = firstr; r != R; r = r1) { 247 r1 = r->link; 248 if(r1 && r1->active && !r->active) { 249 prop(r, zbits, zbits); 250 i = 1; 251 } 252 } 253 if(i) 254 goto loop11; 255 if(change) 256 goto loop1; 257 258 259 /* 260 * pass 4 261 * iterate propagating register/variable synchrony 262 * forward until graph is complete 263 */ 264 loop2: 265 change = 0; 266 for(r = firstr; r != R; r = r->link) 267 r->active = 0; 268 synch(firstr, zbits); 269 if(change) 270 goto loop2; 271 272 addsplits(); 273 274 if(debug['R'] && debug['v']) { 275 print("\nprop structure:\n"); 276 for(r = firstr; r != R; r = r->link) { 277 print("%ld:%P", r->loop, r->prog); 278 for(z=0; z<BITS; z++) 279 bit.b[z] = r->set.b[z] | 280 r->refahead.b[z] | r->calahead.b[z] | 281 r->refbehind.b[z] | r->calbehind.b[z] | 282 r->use1.b[z] | r->use2.b[z]; 283 if(bany(&bit)) { 284 print("\t"); 285 if(bany(&r->use1)) 286 print(" u1=%B", r->use1); 287 if(bany(&r->use2)) 288 print(" u2=%B", r->use2); 289 if(bany(&r->set)) 290 print(" st=%B", r->set); 291 if(bany(&r->refahead)) 292 print(" ra=%B", r->refahead); 293 if(bany(&r->calahead)) 294 print(" ca=%B", r->calahead); 295 if(bany(&r->refbehind)) 296 print(" rb=%B", r->refbehind); 297 if(bany(&r->calbehind)) 298 print(" cb=%B", r->calbehind); 299 } 300 print("\n"); 301 } 302 } 303 304 /* 305 * pass 5 306 * isolate regions 307 * calculate costs (paint1) 308 */ 309 r = firstr; 310 if(r) { 311 for(z=0; z<BITS; z++) 312 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & 313 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); 314 if(bany(&bit)) { 315 nearln = r->prog->lineno; 316 warn(Z, "used and not set: %B", bit); 317 if(debug['R'] && !debug['w']) 318 print("used and not set: %B\n", bit); 319 } 320 } 321 322 for(r = firstr; r != R; r = r->link) 323 r->act = zbits; 324 rgp = region; 325 nregion = 0; 326 for(r = firstr; r != R; r = r->link) { 327 for(z=0; z<BITS; z++) 328 bit.b[z] = r->set.b[z] & 329 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); 330 if(bany(&bit)) { 331 nearln = r->prog->lineno; 332 warn(Z, "set and not used: %B", bit); 333 if(debug['R']) 334 print("set and not used: %B\n", bit); 335 excise(r); 336 } 337 for(z=0; z<BITS; z++) 338 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); 339 while(bany(&bit)) { 340 i = bnum(bit); 341 rgp->enter = r; 342 rgp->varno = i; 343 change = 0; 344 if(debug['R'] && debug['v']) 345 print("\n"); 346 paint1(r, i); 347 bit.b[i/32] &= ~(1L<<(i%32)); 348 if(change <= 0) { 349 if(debug['R']) 350 print("%L $%d: %B\n", 351 r->prog->lineno, change, blsh(i)); 352 continue; 353 } 354 rgp->cost = change; 355 nregion++; 356 if(nregion >= NRGN) { 357 warn(Z, "too many regions"); 358 goto brk; 359 } 360 rgp++; 361 } 362 } 363 brk: 364 qsort(region, nregion, sizeof(region[0]), rcmp); 365 366 /* 367 * pass 6 368 * determine used registers (paint2) 369 * replace code (paint3) 370 */ 371 rgp = region; 372 for(i=0; i<nregion; i++) { 373 bit = blsh(rgp->varno); 374 vreg = paint2(rgp->enter, rgp->varno); 375 vreg = allreg(vreg, rgp); 376 if(debug['R']) { 377 if(rgp->regno >= NREG) 378 print("%L $%d F%d: %B\n", 379 rgp->enter->prog->lineno, 380 rgp->cost, 381 rgp->regno-NREG, 382 bit); 383 else 384 print("%L $%d R%d: %B\n", 385 rgp->enter->prog->lineno, 386 rgp->cost, 387 rgp->regno, 388 bit); 389 } 390 if(rgp->regno != 0) 391 paint3(rgp->enter, rgp->varno, vreg, rgp->regno); 392 rgp++; 393 } 394 /* 395 * pass 7 396 * peep-hole on basic block 397 */ 398 if(!debug['R'] || debug['P']) 399 peep(); 400 401 /* 402 * pass 8 403 * recalculate pc 404 */ 405 val = initpc; 406 for(r = firstr; r != R; r = r1) { 407 r->pc = val; 408 p = r->prog; 409 p1 = P; 410 r1 = r->link; 411 if(r1 != R) 412 p1 = r1->prog; 413 for(; p != p1; p = p->link) { 414 switch(p->as) { 415 default: 416 val++; 417 break; 418 419 case ANOP: 420 case ADATA: 421 case AGLOBL: 422 case ANAME: 423 case ASIGNAME: 424 break; 425 } 426 } 427 } 428 pc = val; 429 430 /* 431 * fix up branches 432 */ 433 if(debug['R']) 434 if(bany(&addrs)) 435 print("addrs: %B\n", addrs); 436 437 r1 = 0; /* set */ 438 for(r = firstr; r != R; r = r->link) { 439 p = r->prog; 440 if(p->to.type == D_BRANCH) 441 p->to.offset = r->s2->pc; 442 r1 = r; 443 } 444 445 /* 446 * last pass 447 * eliminate nops 448 * free aux structures 449 */ 450 for(p = firstr->prog; p != P; p = p->link){ 451 while(p->link && p->link->as == ANOP) 452 p->link = p->link->link; 453 } 454 if(r1 != R) { 455 r1->link = freer; 456 freer = firstr; 457 } 458 } 459 460 void 461 addsplits(void) 462 { 463 Reg *r, *r1; 464 int z, i; 465 Bits bit; 466 467 for(r = firstr; r != R; r = r->link) { 468 if(r->loop > 1) 469 continue; 470 if(r->prog->as == ABL) 471 continue; 472 for(r1 = r->p2; r1 != R; r1 = r1->p2link) { 473 if(r1->loop <= 1) 474 continue; 475 for(z=0; z<BITS; z++) 476 bit.b[z] = r1->calbehind.b[z] & 477 (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) & 478 ~(r->calahead.b[z] & addrs.b[z]); 479 while(bany(&bit)) { 480 i = bnum(bit); 481 bit.b[i/32] &= ~(1L << (i%32)); 482 } 483 } 484 } 485 } 486 487 /* 488 * add mov b,rn 489 * just after r 490 */ 491 void 492 addmove(Reg *r, int bn, int rn, int f) 493 { 494 Prog *p, *p1; 495 Adr *a; 496 Var *v; 497 498 p1 = alloc(sizeof(*p1)); 499 *p1 = zprog; 500 p = r->prog; 501 502 p1->link = p->link; 503 p->link = p1; 504 p1->lineno = p->lineno; 505 506 v = var + bn; 507 508 a = &p1->to; 509 a->sym = v->sym; 510 a->name = v->name; 511 a->offset = v->offset; 512 a->etype = v->etype; 513 a->type = D_OREG; 514 if(a->etype == TARRAY || a->sym == S) 515 a->type = D_CONST; 516 517 p1->as = AMOVW; 518 if(v->etype == TCHAR || v->etype == TUCHAR) 519 p1->as = AMOVB; 520 if(v->etype == TSHORT || v->etype == TUSHORT) 521 p1->as = AMOVH; 522 if(v->etype == TFLOAT) 523 p1->as = AMOVF; 524 if(v->etype == TDOUBLE) 525 p1->as = AMOVD; 526 527 p1->from.type = D_REG; 528 p1->from.reg = rn; 529 if(rn >= NREG) { 530 p1->from.type = D_FREG; 531 p1->from.reg = rn-NREG; 532 } 533 if(!f) { 534 p1->from = *a; 535 *a = zprog.from; 536 a->type = D_REG; 537 a->reg = rn; 538 if(rn >= NREG) { 539 a->type = D_FREG; 540 a->reg = rn-NREG; 541 } 542 if(v->etype == TUCHAR) 543 p1->as = AMOVBU; 544 if(v->etype == TUSHORT) 545 p1->as = AMOVHU; 546 } 547 if(debug['R']) 548 print("%P\t.a%P\n", p, p1); 549 } 550 551 Bits 552 mkvar(Adr *a, int docon) 553 { 554 Var *v; 555 int i, t, n, et, z; 556 long o; 557 Bits bit; 558 Sym *s; 559 560 t = a->type; 561 if(t == D_REG && a->reg != NREG) 562 regbits |= RtoB(a->reg); 563 if(t == D_FREG && a->reg != NREG) 564 regbits |= FtoB(a->reg); 565 s = a->sym; 566 o = a->offset; 567 et = a->etype; 568 if(s == S) { 569 if(t != D_CONST || !docon || a->reg != NREG) 570 goto none; 571 et = TLONG; 572 } 573 if(t == D_CONST) { 574 if(s == S && sval(o)) 575 goto none; 576 } 577 578 n = a->name; 579 v = var; 580 for(i=0; i<nvar; i++) { 581 if(s == v->sym) 582 if(n == v->name) 583 if(o == v->offset) 584 goto out; 585 v++; 586 } 587 if(s) 588 if(s->name[0] == '.') 589 goto none; 590 if(nvar >= NVAR) { 591 if(debug['w'] > 1 && s) 592 warn(Z, "variable not optimized: %s", s->name); 593 goto none; 594 } 595 i = nvar; 596 nvar++; 597 v = &var[i]; 598 v->sym = s; 599 v->offset = o; 600 v->etype = et; 601 v->name = n; 602 if(debug['R']) 603 print("bit=%2d et=%2d %D\n", i, et, a); 604 out: 605 bit = blsh(i); 606 if(n == D_EXTERN || n == D_STATIC) 607 for(z=0; z<BITS; z++) 608 externs.b[z] |= bit.b[z]; 609 if(n == D_PARAM) 610 for(z=0; z<BITS; z++) 611 params.b[z] |= bit.b[z]; 612 if(v->etype != et || !typechlpfd[et]) /* funny punning */ 613 for(z=0; z<BITS; z++) 614 addrs.b[z] |= bit.b[z]; 615 if(t == D_CONST) { 616 if(s == S) { 617 for(z=0; z<BITS; z++) 618 consts.b[z] |= bit.b[z]; 619 return bit; 620 } 621 if(et != TARRAY) 622 for(z=0; z<BITS; z++) 623 addrs.b[z] |= bit.b[z]; 624 for(z=0; z<BITS; z++) 625 params.b[z] |= bit.b[z]; 626 return bit; 627 } 628 if(t == D_OREG) 629 return bit; 630 631 none: 632 return zbits; 633 } 634 635 void 636 prop(Reg *r, Bits ref, Bits cal) 637 { 638 Reg *r1, *r2; 639 int z; 640 641 for(r1 = r; r1 != R; r1 = r1->p1) { 642 for(z=0; z<BITS; z++) { 643 ref.b[z] |= r1->refahead.b[z]; 644 if(ref.b[z] != r1->refahead.b[z]) { 645 r1->refahead.b[z] = ref.b[z]; 646 change++; 647 } 648 cal.b[z] |= r1->calahead.b[z]; 649 if(cal.b[z] != r1->calahead.b[z]) { 650 r1->calahead.b[z] = cal.b[z]; 651 change++; 652 } 653 } 654 switch(r1->prog->as) { 655 case ABL: 656 for(z=0; z<BITS; z++) { 657 cal.b[z] |= ref.b[z] | externs.b[z]; 658 ref.b[z] = 0; 659 } 660 break; 661 662 case ATEXT: 663 for(z=0; z<BITS; z++) { 664 cal.b[z] = 0; 665 ref.b[z] = 0; 666 } 667 break; 668 669 case ARET: 670 for(z=0; z<BITS; z++) { 671 cal.b[z] = externs.b[z]; 672 ref.b[z] = 0; 673 } 674 } 675 for(z=0; z<BITS; z++) { 676 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | 677 r1->use1.b[z] | r1->use2.b[z]; 678 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); 679 r1->refbehind.b[z] = ref.b[z]; 680 r1->calbehind.b[z] = cal.b[z]; 681 } 682 if(r1->active) 683 break; 684 r1->active = 1; 685 } 686 for(; r != r1; r = r->p1) 687 for(r2 = r->p2; r2 != R; r2 = r2->p2link) 688 prop(r2, r->refbehind, r->calbehind); 689 } 690 691 /* 692 * find looping structure 693 * 694 * 1) find reverse postordering 695 * 2) find approximate dominators, 696 * the actual dominators if the flow graph is reducible 697 * otherwise, dominators plus some other non-dominators. 698 * See Matthew S. Hecht and Jeffrey D. Ullman, 699 * "Analysis of a Simple Algorithm for Global Data Flow Problems", 700 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts, 701 * Oct. 1-3, 1973, pp. 207-217. 702 * 3) find all nodes with a predecessor dominated by the current node. 703 * such a node is a loop head. 704 * recursively, all preds with a greater rpo number are in the loop 705 */ 706 long 707 postorder(Reg *r, Reg **rpo2r, long n) 708 { 709 Reg *r1; 710 711 r->rpo = 1; 712 r1 = r->s1; 713 if(r1 && !r1->rpo) 714 n = postorder(r1, rpo2r, n); 715 r1 = r->s2; 716 if(r1 && !r1->rpo) 717 n = postorder(r1, rpo2r, n); 718 rpo2r[n] = r; 719 n++; 720 return n; 721 } 722 723 long 724 rpolca(long *idom, long rpo1, long rpo2) 725 { 726 long t; 727 728 if(rpo1 == -1) 729 return rpo2; 730 while(rpo1 != rpo2){ 731 if(rpo1 > rpo2){ 732 t = rpo2; 733 rpo2 = rpo1; 734 rpo1 = t; 735 } 736 while(rpo1 < rpo2){ 737 t = idom[rpo2]; 738 if(t >= rpo2) 739 fatal(Z, "bad idom"); 740 rpo2 = t; 741 } 742 } 743 return rpo1; 744 } 745 746 int 747 doms(long *idom, long r, long s) 748 { 749 while(s > r) 750 s = idom[s]; 751 return s == r; 752 } 753 754 int 755 loophead(long *idom, Reg *r) 756 { 757 long src; 758 759 src = r->rpo; 760 if(r->p1 != R && doms(idom, src, r->p1->rpo)) 761 return 1; 762 for(r = r->p2; r != R; r = r->p2link) 763 if(doms(idom, src, r->rpo)) 764 return 1; 765 return 0; 766 } 767 768 void 769 loopmark(Reg **rpo2r, long head, Reg *r) 770 { 771 if(r->rpo < head || r->active == head) 772 return; 773 r->active = head; 774 r->loop += LOOP; 775 if(r->p1 != R) 776 loopmark(rpo2r, head, r->p1); 777 for(r = r->p2; r != R; r = r->p2link) 778 loopmark(rpo2r, head, r); 779 } 780 781 void 782 loopit(Reg *r, long nr) 783 { 784 Reg *r1; 785 long i, d, me; 786 787 if(nr > maxnr) { 788 rpo2r = alloc(nr * sizeof(Reg*)); 789 idom = alloc(nr * sizeof(long)); 790 maxnr = nr; 791 } 792 793 d = postorder(r, rpo2r, 0); 794 if(d > nr) 795 fatal(Z, "too many reg nodes"); 796 nr = d; 797 for(i = 0; i < nr / 2; i++){ 798 r1 = rpo2r[i]; 799 rpo2r[i] = rpo2r[nr - 1 - i]; 800 rpo2r[nr - 1 - i] = r1; 801 } 802 for(i = 0; i < nr; i++) 803 rpo2r[i]->rpo = i; 804 805 idom[0] = 0; 806 for(i = 0; i < nr; i++){ 807 r1 = rpo2r[i]; 808 me = r1->rpo; 809 d = -1; 810 if(r1->p1 != R && r1->p1->rpo < me) 811 d = r1->p1->rpo; 812 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link) 813 if(r1->rpo < me) 814 d = rpolca(idom, d, r1->rpo); 815 idom[i] = d; 816 } 817 818 for(i = 0; i < nr; i++){ 819 r1 = rpo2r[i]; 820 r1->loop++; 821 if(r1->p2 != R && loophead(idom, r1)) 822 loopmark(rpo2r, i, r1); 823 } 824 } 825 826 void 827 synch(Reg *r, Bits dif) 828 { 829 Reg *r1; 830 int z; 831 832 for(r1 = r; r1 != R; r1 = r1->s1) { 833 for(z=0; z<BITS; z++) { 834 dif.b[z] = (dif.b[z] & 835 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) | 836 r1->set.b[z] | r1->regdiff.b[z]; 837 if(dif.b[z] != r1->regdiff.b[z]) { 838 r1->regdiff.b[z] = dif.b[z]; 839 change++; 840 } 841 } 842 if(r1->active) 843 break; 844 r1->active = 1; 845 for(z=0; z<BITS; z++) 846 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); 847 if(r1->s2 != R) 848 synch(r1->s2, dif); 849 } 850 } 851 852 ulong 853 allreg(ulong b, Rgn *r) 854 { 855 Var *v; 856 int i; 857 858 v = var + r->varno; 859 r->regno = 0; 860 switch(v->etype) { 861 862 default: 863 diag(Z, "unknown etype %d/%d", bitno(b), v->etype); 864 break; 865 866 case TCHAR: 867 case TUCHAR: 868 case TSHORT: 869 case TUSHORT: 870 case TINT: 871 case TUINT: 872 case TLONG: 873 case TULONG: 874 case TIND: 875 case TARRAY: 876 i = BtoR(~b); 877 if(i && r->cost >= 0) { 878 r->regno = i; 879 return RtoB(i); 880 } 881 break; 882 883 case TVLONG: 884 case TDOUBLE: 885 case TFLOAT: 886 i = BtoF(~b); 887 if(i && r->cost >= 0) { 888 r->regno = i+NREG; 889 return FtoB(i); 890 } 891 break; 892 } 893 return 0; 894 } 895 896 void 897 paint1(Reg *r, int bn) 898 { 899 Reg *r1; 900 Prog *p; 901 int z; 902 ulong bb; 903 904 z = bn/32; 905 bb = 1L<<(bn%32); 906 if(r->act.b[z] & bb) 907 return; 908 for(;;) { 909 if(!(r->refbehind.b[z] & bb)) 910 break; 911 r1 = r->p1; 912 if(r1 == R) 913 break; 914 if(!(r1->refahead.b[z] & bb)) 915 break; 916 if(r1->act.b[z] & bb) 917 break; 918 r = r1; 919 } 920 921 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) { 922 change -= CLOAD * r->loop; 923 if(debug['R'] && debug['v']) 924 print("%ld%P\tld %B $%d\n", r->loop, 925 r->prog, blsh(bn), change); 926 } 927 for(;;) { 928 r->act.b[z] |= bb; 929 p = r->prog; 930 931 if(r->use1.b[z] & bb) { 932 change += CREF * r->loop; 933 if(debug['R'] && debug['v']) 934 print("%ld%P\tu1 %B $%d\n", r->loop, 935 p, blsh(bn), change); 936 } 937 938 if((r->use2.b[z]|r->set.b[z]) & bb) { 939 change += CREF * r->loop; 940 if(debug['R'] && debug['v']) 941 print("%ld%P\tu2 %B $%d\n", r->loop, 942 p, blsh(bn), change); 943 } 944 945 if(STORE(r) & r->regdiff.b[z] & bb) { 946 change -= CLOAD * r->loop; 947 if(debug['R'] && debug['v']) 948 print("%ld%P\tst %B $%d\n", r->loop, 949 p, blsh(bn), change); 950 } 951 952 if(r->refbehind.b[z] & bb) 953 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 954 if(r1->refahead.b[z] & bb) 955 paint1(r1, bn); 956 957 if(!(r->refahead.b[z] & bb)) 958 break; 959 r1 = r->s2; 960 if(r1 != R) 961 if(r1->refbehind.b[z] & bb) 962 paint1(r1, bn); 963 r = r->s1; 964 if(r == R) 965 break; 966 if(r->act.b[z] & bb) 967 break; 968 if(!(r->refbehind.b[z] & bb)) 969 break; 970 } 971 } 972 973 ulong 974 paint2(Reg *r, int bn) 975 { 976 Reg *r1; 977 int z; 978 ulong bb, vreg; 979 980 z = bn/32; 981 bb = 1L << (bn%32); 982 vreg = regbits; 983 if(!(r->act.b[z] & bb)) 984 return vreg; 985 for(;;) { 986 if(!(r->refbehind.b[z] & bb)) 987 break; 988 r1 = r->p1; 989 if(r1 == R) 990 break; 991 if(!(r1->refahead.b[z] & bb)) 992 break; 993 if(!(r1->act.b[z] & bb)) 994 break; 995 r = r1; 996 } 997 for(;;) { 998 r->act.b[z] &= ~bb; 999 1000 vreg |= r->regu; 1001 1002 if(r->refbehind.b[z] & bb) 1003 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1004 if(r1->refahead.b[z] & bb) 1005 vreg |= paint2(r1, bn); 1006 1007 if(!(r->refahead.b[z] & bb)) 1008 break; 1009 r1 = r->s2; 1010 if(r1 != R) 1011 if(r1->refbehind.b[z] & bb) 1012 vreg |= paint2(r1, bn); 1013 r = r->s1; 1014 if(r == R) 1015 break; 1016 if(!(r->act.b[z] & bb)) 1017 break; 1018 if(!(r->refbehind.b[z] & bb)) 1019 break; 1020 } 1021 return vreg; 1022 } 1023 1024 void 1025 paint3(Reg *r, int bn, long rb, int rn) 1026 { 1027 Reg *r1; 1028 Prog *p; 1029 int z; 1030 ulong bb; 1031 1032 z = bn/32; 1033 bb = 1L << (bn%32); 1034 if(r->act.b[z] & bb) 1035 return; 1036 for(;;) { 1037 if(!(r->refbehind.b[z] & bb)) 1038 break; 1039 r1 = r->p1; 1040 if(r1 == R) 1041 break; 1042 if(!(r1->refahead.b[z] & bb)) 1043 break; 1044 if(r1->act.b[z] & bb) 1045 break; 1046 r = r1; 1047 } 1048 1049 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) 1050 addmove(r, bn, rn, 0); 1051 for(;;) { 1052 r->act.b[z] |= bb; 1053 p = r->prog; 1054 1055 if(r->use1.b[z] & bb) { 1056 if(debug['R']) 1057 print("%P", p); 1058 addreg(&p->from, rn); 1059 if(debug['R']) 1060 print("\t.c%P\n", p); 1061 } 1062 if((r->use2.b[z]|r->set.b[z]) & bb) { 1063 if(debug['R']) 1064 print("%P", p); 1065 addreg(&p->to, rn); 1066 if(debug['R']) 1067 print("\t.c%P\n", p); 1068 } 1069 1070 if(STORE(r) & r->regdiff.b[z] & bb) 1071 addmove(r, bn, rn, 1); 1072 r->regu |= rb; 1073 1074 if(r->refbehind.b[z] & bb) 1075 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1076 if(r1->refahead.b[z] & bb) 1077 paint3(r1, bn, rb, rn); 1078 1079 if(!(r->refahead.b[z] & bb)) 1080 break; 1081 r1 = r->s2; 1082 if(r1 != R) 1083 if(r1->refbehind.b[z] & bb) 1084 paint3(r1, bn, rb, rn); 1085 r = r->s1; 1086 if(r == R) 1087 break; 1088 if(r->act.b[z] & bb) 1089 break; 1090 if(!(r->refbehind.b[z] & bb)) 1091 break; 1092 } 1093 } 1094 1095 void 1096 addreg(Adr *a, int rn) 1097 { 1098 1099 a->sym = 0; 1100 a->name = D_NONE; 1101 a->type = D_REG; 1102 a->reg = rn; 1103 if(rn >= NREG) { 1104 a->type = D_FREG; 1105 a->reg = rn-NREG; 1106 } 1107 } 1108 1109 /* 1110 * bit reg 1111 * 0 R0 1112 * 1 R1 1113 * ... ... 1114 * 10 R10 1115 */ 1116 long 1117 RtoB(int r) 1118 { 1119 1120 if(r >= REGMIN && r <= REGMAX) 1121 return 1L << r; 1122 return 0; 1123 } 1124 1125 int 1126 BtoR(long b) 1127 { 1128 b &= 0x01fcL; // excluded R9 and R10 for extern registers 1129 if(b == 0) 1130 return 0; 1131 return bitno(b); 1132 } 1133 1134 /* 1135 * bit reg 1136 * 18 F2 1137 * 19 F3 1138 * ... ... 1139 * 23 F7 1140 */ 1141 long 1142 FtoB(int f) 1143 { 1144 1145 if(f < 2 || f > NFREG-1) 1146 return 0; 1147 return 1L << (f + 16); 1148 } 1149 1150 int 1151 BtoF(long b) 1152 { 1153 1154 b &= 0xfc0000L; 1155 if(b == 0) 1156 return 0; 1157 return bitno(b) - 16; 1158 } 1159