1 #include "gc.h" 2 3 #define COL1 40 4 5 Reg* 6 rega(void) 7 { 8 Reg *r; 9 10 r = freer; 11 if(r == R) { 12 ALLOC(r, Reg); 13 } else 14 freer = r->link; 15 16 *r = zreg; 17 return r; 18 } 19 20 int 21 rcmp(void *a1, void *a2) 22 { 23 Rgn *p1, *p2; 24 int c1, c2; 25 26 p1 = a1; 27 p2 = a2; 28 c1 = p2->cost; 29 c2 = p1->cost; 30 if(c1 -= c2) 31 return c1; 32 return p2->varno - p1->varno; 33 } 34 35 void 36 regopt(Prog *p) 37 { 38 Reg *r, *r1, *r2; 39 Prog *p1; 40 int i, z; 41 long initpc, val; 42 ulong vreg; 43 Bits bit; 44 struct 45 { 46 long m; 47 long c; 48 Reg* p; 49 } log5[6], *lp; 50 51 firstr = R; 52 lastr = R; 53 nvar = 0; 54 regbits = RtoB(D_SP) | RtoB(D_AX); 55 for(z=0; z<BITS; z++) { 56 externs.b[z] = 0; 57 params.b[z] = 0; 58 consts.b[z] = 0; 59 addrs.b[z] = 0; 60 } 61 62 /* 63 * pass 1 64 * build aux data structure 65 * allocate pcs 66 * find use and set of variables 67 */ 68 val = 5L * 5L * 5L * 5L * 5L; 69 lp = log5; 70 for(i=0; i<5; i++) { 71 lp->m = val; 72 lp->c = 0; 73 lp->p = R; 74 val /= 5L; 75 lp++; 76 } 77 val = 0; 78 for(; p != P; p = p->link) { 79 switch(p->as) { 80 case ADATA: 81 case AGLOBL: 82 case ANAME: 83 continue; 84 } 85 r = rega(); 86 if(firstr == R) { 87 firstr = r; 88 lastr = r; 89 } else { 90 lastr->link = r; 91 r->p1 = lastr; 92 lastr->s1 = r; 93 lastr = r; 94 } 95 r->prog = p; 96 r->pc = val; 97 val++; 98 99 lp = log5; 100 for(i=0; i<5; i++) { 101 lp->c--; 102 if(lp->c <= 0) { 103 lp->c = lp->m; 104 if(lp->p != R) 105 lp->p->log5 = r; 106 lp->p = r; 107 (lp+1)->c = 0; 108 break; 109 } 110 lp++; 111 } 112 113 r1 = r->p1; 114 if(r1 != R) 115 switch(r1->prog->as) { 116 case ARET: 117 case AJMP: 118 case AIRETL: 119 r->p1 = R; 120 r1->s1 = R; 121 } 122 123 bit = mkvar(r, &p->from); 124 if(bany(&bit)) 125 switch(p->as) { 126 /* 127 * funny 128 */ 129 case ALEAL: 130 for(z=0; z<BITS; z++) 131 addrs.b[z] |= bit.b[z]; 132 break; 133 134 /* 135 * left side read 136 */ 137 default: 138 for(z=0; z<BITS; z++) 139 r->use1.b[z] |= bit.b[z]; 140 break; 141 } 142 143 bit = mkvar(r, &p->to); 144 if(bany(&bit)) 145 switch(p->as) { 146 default: 147 diag(Z, "reg: unknown op: %A", p->as); 148 break; 149 150 /* 151 * right side read 152 */ 153 case ACMPB: 154 case ACMPL: 155 case ACMPW: 156 for(z=0; z<BITS; z++) 157 r->use2.b[z] |= bit.b[z]; 158 break; 159 160 /* 161 * right side write 162 */ 163 case ANOP: 164 case AMOVL: 165 case AMOVB: 166 case AMOVW: 167 case AMOVBLSX: 168 case AMOVBLZX: 169 case AMOVWLSX: 170 case AMOVWLZX: 171 for(z=0; z<BITS; z++) 172 r->set.b[z] |= bit.b[z]; 173 break; 174 175 /* 176 * right side read+write 177 */ 178 case AADDB: 179 case AADDL: 180 case AADDW: 181 case AANDB: 182 case AANDL: 183 case AANDW: 184 case ASUBB: 185 case ASUBL: 186 case ASUBW: 187 case AORB: 188 case AORL: 189 case AORW: 190 case AXORB: 191 case AXORL: 192 case AXORW: 193 case ASALB: 194 case ASALL: 195 case ASALW: 196 case ASARB: 197 case ASARL: 198 case ASARW: 199 case AROLB: 200 case AROLL: 201 case AROLW: 202 case ARORB: 203 case ARORL: 204 case ARORW: 205 case ASHLB: 206 case ASHLL: 207 case ASHLW: 208 case ASHRB: 209 case ASHRL: 210 case ASHRW: 211 for(z=0; z<BITS; z++) { 212 r->set.b[z] |= bit.b[z]; 213 r->use2.b[z] |= bit.b[z]; 214 } 215 break; 216 217 /* 218 * funny 219 */ 220 case AFMOVDP: 221 case AFMOVFP: 222 case ACALL: 223 for(z=0; z<BITS; z++) 224 addrs.b[z] |= bit.b[z]; 225 break; 226 } 227 228 switch(p->as) { 229 case AIDIVB: 230 case AIDIVL: 231 case AIDIVW: 232 case AIMULB: 233 case AIMULL: 234 case AIMULW: 235 case ADIVB: 236 case ADIVL: 237 case ADIVW: 238 case AMULB: 239 case AMULL: 240 case AMULW: 241 242 case ACWD: 243 case ACDQ: 244 r->regu |= RtoB(D_AX) | RtoB(D_DX); 245 break; 246 247 case AREP: 248 case AREPN: 249 case ALOOP: 250 case ALOOPEQ: 251 case ALOOPNE: 252 r->regu |= RtoB(D_CX); 253 break; 254 255 case AMOVSB: 256 case AMOVSL: 257 case AMOVSW: 258 case ACMPSB: 259 case ACMPSL: 260 case ACMPSW: 261 r->regu |= RtoB(D_SI) | RtoB(D_DI); 262 break; 263 264 case ASTOSB: 265 case ASTOSL: 266 case ASTOSW: 267 case ASCASB: 268 case ASCASL: 269 case ASCASW: 270 r->regu |= RtoB(D_AX) | RtoB(D_DI); 271 break; 272 273 case AINSB: 274 case AINSL: 275 case AINSW: 276 case AOUTSB: 277 case AOUTSL: 278 case AOUTSW: 279 r->regu |= RtoB(D_DI) | RtoB(D_DX); 280 break; 281 282 case AFSTSW: 283 case ASAHF: 284 r->regu |= RtoB(D_AX); 285 break; 286 } 287 } 288 if(firstr == R) 289 return; 290 initpc = pc - val; 291 292 /* 293 * pass 2 294 * turn branch references to pointers 295 * build back pointers 296 */ 297 for(r = firstr; r != R; r = r->link) { 298 p = r->prog; 299 if(p->to.type == D_BRANCH) { 300 val = p->to.offset - initpc; 301 r1 = firstr; 302 while(r1 != R) { 303 r2 = r1->log5; 304 if(r2 != R && val >= r2->pc) { 305 r1 = r2; 306 continue; 307 } 308 if(r1->pc == val) 309 break; 310 r1 = r1->link; 311 } 312 if(r1 == R) { 313 nearln = p->lineno; 314 diag(Z, "ref not found\n%P", p); 315 continue; 316 } 317 if(r1 == r) { 318 nearln = p->lineno; 319 diag(Z, "ref to self\n%P", p); 320 continue; 321 } 322 r->s2 = r1; 323 r->p2link = r1->p2; 324 r1->p2 = r; 325 } 326 } 327 if(debug['R']) { 328 p = firstr->prog; 329 print("\n%L %D\n", p->lineno, &p->from); 330 } 331 332 /* 333 * pass 2.5 334 * find looping structure 335 */ 336 for(r = firstr; r != R; r = r->link) 337 r->active = 0; 338 change = 0; 339 loopit(firstr); 340 if(debug['R'] && debug['v']) { 341 print("\nlooping structure:\n"); 342 for(r = firstr; r != R; r = r->link) { 343 print("%d:%P", r->loop, r->prog); 344 for(z=0; z<BITS; z++) 345 bit.b[z] = r->use1.b[z] | 346 r->use2.b[z] | 347 r->set.b[z]; 348 if(bany(&bit)) { 349 print("%|", COL1); 350 if(bany(&r->use1)) 351 print(" u1=%B", r->use1); 352 if(bany(&r->use2)) 353 print(" u2=%B", r->use2); 354 if(bany(&r->set)) 355 print(" st=%B", r->set); 356 } 357 print("\n"); 358 } 359 } 360 361 /* 362 * pass 3 363 * iterate propagating usage 364 * back until flow graph is complete 365 */ 366 loop1: 367 change = 0; 368 for(r = firstr; r != R; r = r->link) 369 r->active = 0; 370 for(r = firstr; r != R; r = r->link) 371 if(r->prog->as == ARET) 372 prop(r, zbits, zbits); 373 loop11: 374 /* pick up unreachable code */ 375 i = 0; 376 for(r = firstr; r != R; r = r1) { 377 r1 = r->link; 378 if(r1 && r1->active && !r->active) { 379 prop(r, zbits, zbits); 380 i = 1; 381 } 382 } 383 if(i) 384 goto loop11; 385 if(change) 386 goto loop1; 387 388 389 /* 390 * pass 4 391 * iterate propagating register/variable synchrony 392 * forward until graph is complete 393 */ 394 loop2: 395 change = 0; 396 for(r = firstr; r != R; r = r->link) 397 r->active = 0; 398 synch(firstr, zbits); 399 if(change) 400 goto loop2; 401 402 403 /* 404 * pass 5 405 * isolate regions 406 * calculate costs (paint1) 407 */ 408 r = firstr; 409 if(r) { 410 for(z=0; z<BITS; z++) 411 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & 412 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); 413 if(bany(&bit)) { 414 nearln = r->prog->lineno; 415 warn(Z, "used and not set: %B", bit); 416 if(debug['R'] && !debug['w']) 417 print("used and not set: %B\n", bit); 418 } 419 } 420 if(debug['R'] && debug['v']) 421 print("\nprop structure:\n"); 422 for(r = firstr; r != R; r = r->link) 423 r->act = zbits; 424 rgp = region; 425 nregion = 0; 426 for(r = firstr; r != R; r = r->link) { 427 if(debug['R'] && debug['v']) { 428 print("%P%|", r->prog, COL1); 429 if(bany(&r->set)) 430 print("s:%B ", r->set); 431 if(bany(&r->refahead)) 432 print("ra:%B ", r->refahead); 433 if(bany(&r->calahead)) 434 print("ca:%B ", r->calahead); 435 print("\n"); 436 } 437 for(z=0; z<BITS; z++) 438 bit.b[z] = r->set.b[z] & 439 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); 440 if(bany(&bit)) { 441 nearln = r->prog->lineno; 442 warn(Z, "set and not used: %B", bit); 443 if(debug['R']) 444 print("set an not used: %B\n", bit); 445 excise(r); 446 } 447 for(z=0; z<BITS; z++) 448 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); 449 while(bany(&bit)) { 450 i = bnum(bit); 451 rgp->enter = r; 452 rgp->varno = i; 453 change = 0; 454 if(debug['R'] && debug['v']) 455 print("\n"); 456 paint1(r, i); 457 bit.b[i/32] &= ~(1L<<(i%32)); 458 if(change <= 0) { 459 if(debug['R']) 460 print("%L$%d: %B\n", 461 r->prog->lineno, change, blsh(i)); 462 continue; 463 } 464 rgp->cost = change; 465 nregion++; 466 if(nregion >= NRGN) { 467 warn(Z, "too many regions"); 468 goto brk; 469 } 470 rgp++; 471 } 472 } 473 brk: 474 qsort(region, nregion, sizeof(region[0]), rcmp); 475 476 /* 477 * pass 6 478 * determine used registers (paint2) 479 * replace code (paint3) 480 */ 481 rgp = region; 482 for(i=0; i<nregion; i++) { 483 bit = blsh(rgp->varno); 484 vreg = paint2(rgp->enter, rgp->varno); 485 vreg = allreg(vreg, rgp); 486 if(debug['R']) { 487 print("%L$%d %R: %B\n", 488 rgp->enter->prog->lineno, 489 rgp->cost, 490 rgp->regno, 491 bit); 492 } 493 if(rgp->regno != 0) 494 paint3(rgp->enter, rgp->varno, vreg, rgp->regno); 495 rgp++; 496 } 497 /* 498 * pass 7 499 * peep-hole on basic block 500 */ 501 if(!debug['R'] || debug['P']) 502 peep(); 503 504 /* 505 * pass 8 506 * recalculate pc 507 */ 508 val = initpc; 509 for(r = firstr; r != R; r = r1) { 510 r->pc = val; 511 p = r->prog; 512 p1 = P; 513 r1 = r->link; 514 if(r1 != R) 515 p1 = r1->prog; 516 for(; p != p1; p = p->link) { 517 switch(p->as) { 518 default: 519 val++; 520 break; 521 522 case ANOP: 523 case ADATA: 524 case AGLOBL: 525 case ANAME: 526 break; 527 } 528 } 529 } 530 pc = val; 531 532 /* 533 * fix up branches 534 */ 535 if(debug['R']) 536 if(bany(&addrs)) 537 print("addrs: %B\n", addrs); 538 539 r1 = 0; /* set */ 540 for(r = firstr; r != R; r = r->link) { 541 p = r->prog; 542 if(p->to.type == D_BRANCH) 543 p->to.offset = r->s2->pc; 544 r1 = r; 545 } 546 547 /* 548 * last pass 549 * eliminate nops 550 * free aux structures 551 */ 552 for(p = firstr->prog; p != P; p = p->link){ 553 while(p->link && p->link->as == ANOP) 554 p->link = p->link->link; 555 } 556 if(r1 != R) { 557 r1->link = freer; 558 freer = firstr; 559 } 560 } 561 562 /* 563 * add mov b,rn 564 * just after r 565 */ 566 void 567 addmove(Reg *r, int bn, int rn, int f) 568 { 569 Prog *p, *p1; 570 Adr *a; 571 Var *v; 572 573 ALLOC(p1,Prog); 574 *p1 = zprog; 575 p = r->prog; 576 577 p1->link = p->link; 578 p->link = p1; 579 p1->lineno = p->lineno; 580 581 v = var + bn; 582 583 a = &p1->to; 584 a->sym = v->sym; 585 a->offset = v->offset; 586 a->etype = v->etype; 587 a->type = v->name; 588 589 p1->as = AMOVL; 590 if(v->etype == TCHAR || v->etype == TUCHAR) 591 p1->as = AMOVB; 592 if(v->etype == TSHORT || v->etype == TUSHORT) 593 p1->as = AMOVW; 594 595 p1->from.type = rn; 596 if(!f) { 597 p1->from = *a; 598 *a = zprog.from; 599 a->type = rn; 600 if(v->etype == TUCHAR) 601 p1->as = AMOVB; 602 if(v->etype == TUSHORT) 603 p1->as = AMOVW; 604 } 605 if(debug['R']) 606 print("%P%|.a%P\n", p, COL1, p1); 607 } 608 609 ulong 610 doregbits(int r) 611 { 612 ulong b; 613 614 b = 0; 615 if(r >= D_INDIR) 616 r -= D_INDIR; 617 if(r >= D_AX && r <= D_DI) 618 b |= RtoB(r); 619 else 620 if(r >= D_AL && r <= D_BL) 621 b |= RtoB(r-D_AL+D_AX); 622 else 623 if(r >= D_AH && r <= D_BH) 624 b |= RtoB(r-D_AH+D_AX); 625 return b; 626 } 627 628 Bits 629 mkvar(Reg *r, Adr *a) 630 { 631 Var *v; 632 int i, t, n, et, z; 633 long o; 634 Bits bit; 635 Sym *s; 636 637 /* 638 * mark registers used 639 */ 640 t = a->type; 641 r->regu |= doregbits(t); 642 r->regu |= doregbits(a->index); 643 644 switch(t) { 645 default: 646 goto none; 647 case D_ADDR: 648 a->type = a->index; 649 bit = mkvar(r, a); 650 for(z=0; z<BITS; z++) 651 addrs.b[z] |= bit.b[z]; 652 a->type = t; 653 goto none; 654 case D_EXTERN: 655 case D_STATIC: 656 case D_PARAM: 657 case D_AUTO: 658 n = t; 659 break; 660 } 661 s = a->sym; 662 if(s == S) 663 goto none; 664 if(s->name[0] == '.') 665 goto none; 666 et = a->etype; 667 o = a->offset; 668 v = var; 669 for(i=0; i<nvar; i++) { 670 if(s == v->sym) 671 if(n == v->name) 672 if(o == v->offset) 673 goto out; 674 v++; 675 } 676 if(nvar >= NVAR) { 677 warn(Z, "variable not optimized: %s", s->name); 678 goto none; 679 } 680 i = nvar; 681 nvar++; 682 v = &var[i]; 683 v->sym = s; 684 v->offset = o; 685 v->name = n; 686 v->etype = et; 687 if(debug['R']) 688 print("bit=%2d et=%2d %D\n", i, et, a); 689 690 out: 691 bit = blsh(i); 692 if(n == D_EXTERN || n == D_STATIC) 693 for(z=0; z<BITS; z++) 694 externs.b[z] |= bit.b[z]; 695 if(n == D_PARAM) 696 for(z=0; z<BITS; z++) 697 params.b[z] |= bit.b[z]; 698 if(v->etype != et || !typechlpfd[et]) /* funny punning */ 699 for(z=0; z<BITS; z++) 700 addrs.b[z] |= bit.b[z]; 701 return bit; 702 703 none: 704 return zbits; 705 } 706 707 void 708 prop(Reg *r, Bits ref, Bits cal) 709 { 710 Reg *r1, *r2; 711 int z; 712 713 for(r1 = r; r1 != R; r1 = r1->p1) { 714 for(z=0; z<BITS; z++) { 715 ref.b[z] |= r1->refahead.b[z]; 716 if(ref.b[z] != r1->refahead.b[z]) { 717 r1->refahead.b[z] = ref.b[z]; 718 change++; 719 } 720 cal.b[z] |= r1->calahead.b[z]; 721 if(cal.b[z] != r1->calahead.b[z]) { 722 r1->calahead.b[z] = cal.b[z]; 723 change++; 724 } 725 } 726 switch(r1->prog->as) { 727 case ACALL: 728 for(z=0; z<BITS; z++) { 729 cal.b[z] |= ref.b[z] | externs.b[z]; 730 ref.b[z] = 0; 731 } 732 break; 733 734 case ATEXT: 735 for(z=0; z<BITS; z++) { 736 cal.b[z] = 0; 737 ref.b[z] = 0; 738 } 739 break; 740 741 case ARET: 742 for(z=0; z<BITS; z++) { 743 cal.b[z] = externs.b[z]; 744 ref.b[z] = 0; 745 } 746 } 747 for(z=0; z<BITS; z++) { 748 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | 749 r1->use1.b[z] | r1->use2.b[z]; 750 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); 751 r1->refbehind.b[z] = ref.b[z]; 752 r1->calbehind.b[z] = cal.b[z]; 753 } 754 if(r1->active) 755 break; 756 r1->active = 1; 757 } 758 for(; r != r1; r = r->p1) 759 for(r2 = r->p2; r2 != R; r2 = r2->p2link) 760 prop(r2, r->refbehind, r->calbehind); 761 } 762 763 int 764 loopit(Reg *r) 765 { 766 Reg *r1; 767 int l, m; 768 769 l = 0; 770 r->active = 1; 771 r->loop = 0; 772 if(r1 = r->s1) 773 switch(r1->active) { 774 case 0: 775 l += loopit(r1); 776 break; 777 case 1: 778 l += LOOP; 779 r1->loop += LOOP; 780 } 781 if(r1 = r->s2) 782 switch(r1->active) { 783 case 0: 784 l += loopit(r1); 785 break; 786 case 1: 787 l += LOOP; 788 r1->loop += LOOP; 789 } 790 r->active = 2; 791 m = r->loop; 792 r->loop = l + 1; 793 return l - m; 794 } 795 796 void 797 synch(Reg *r, Bits dif) 798 { 799 Reg *r1; 800 int z; 801 802 for(r1 = r; r1 != R; r1 = r1->s1) { 803 for(z=0; z<BITS; z++) { 804 dif.b[z] = (dif.b[z] & 805 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) | 806 r1->set.b[z] | r1->regdiff.b[z]; 807 if(dif.b[z] != r1->regdiff.b[z]) { 808 r1->regdiff.b[z] = dif.b[z]; 809 change++; 810 } 811 } 812 if(r1->active) 813 break; 814 r1->active = 1; 815 for(z=0; z<BITS; z++) 816 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); 817 if(r1->s2 != R) 818 synch(r1->s2, dif); 819 } 820 } 821 822 ulong 823 allreg(ulong b, Rgn *r) 824 { 825 Var *v; 826 int i; 827 828 v = var + r->varno; 829 r->regno = 0; 830 switch(v->etype) { 831 832 default: 833 diag(Z, "unknown etype %d/%d", bitno(b), v->etype); 834 break; 835 836 case TCHAR: 837 case TUCHAR: 838 case TSHORT: 839 case TUSHORT: 840 case TLONG: 841 case TULONG: 842 case TIND: 843 case TARRAY: 844 i = BtoR(~b); 845 if(i && r->cost > 0) { 846 r->regno = i; 847 return RtoB(i); 848 } 849 break; 850 851 case TVLONG: 852 case TDOUBLE: 853 case TFLOAT: 854 break; 855 } 856 return 0; 857 } 858 859 void 860 paint1(Reg *r, int bn) 861 { 862 Reg *r1; 863 Prog *p; 864 int z; 865 ulong bb; 866 867 z = bn/32; 868 bb = 1L<<(bn%32); 869 if(r->act.b[z] & bb) 870 return; 871 for(;;) { 872 if(!(r->refbehind.b[z] & bb)) 873 break; 874 r1 = r->p1; 875 if(r1 == R) 876 break; 877 if(!(r1->refahead.b[z] & bb)) 878 break; 879 if(r1->act.b[z] & bb) 880 break; 881 r = r1; 882 } 883 884 if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) { 885 change -= CLOAD * r->loop; 886 if(debug['R'] && debug['v']) 887 print("%d%P%|ld %B $%d\n", r->loop, 888 r->prog, COL1, blsh(bn), change); 889 } 890 for(;;) { 891 r->act.b[z] |= bb; 892 p = r->prog; 893 894 if(r->use1.b[z] & bb) { 895 change += CREF * r->loop; 896 if(p->as == AFMOVL) 897 if(BtoR(bb) != D_F0) 898 change = -CINF; 899 if(debug['R'] && debug['v']) 900 print("%d%P%|u1 %B $%d\n", r->loop, 901 p, COL1, blsh(bn), change); 902 } 903 904 if((r->use2.b[z]|r->set.b[z]) & bb) { 905 change += CREF * r->loop; 906 if(p->as == AFMOVL) 907 if(BtoR(bb) != D_F0) 908 change = -CINF; 909 if(debug['R'] && debug['v']) 910 print("%d%P%|u2 %B $%d\n", r->loop, 911 p, COL1, blsh(bn), change); 912 } 913 914 if(STORE(r) & r->regdiff.b[z] & bb) { 915 change -= CLOAD * r->loop; 916 if(p->as == AFMOVL) 917 if(BtoR(bb) != D_F0) 918 change = -CINF; 919 if(debug['R'] && debug['v']) 920 print("%d%P%|st %B $%d\n", r->loop, 921 p, COL1, blsh(bn), change); 922 } 923 924 if(r->refbehind.b[z] & bb) 925 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 926 if(r1->refahead.b[z] & bb) 927 paint1(r1, bn); 928 929 if(!(r->refahead.b[z] & bb)) 930 break; 931 r1 = r->s2; 932 if(r1 != R) 933 if(r1->refbehind.b[z] & bb) 934 paint1(r1, bn); 935 r = r->s1; 936 if(r == R) 937 break; 938 if(r->act.b[z] & bb) 939 break; 940 if(!(r->refbehind.b[z] & bb)) 941 break; 942 } 943 } 944 945 ulong 946 regset(Reg *r, ulong bb) 947 { 948 ulong b, set; 949 Adr v; 950 int c; 951 952 set = 0; 953 v = zprog.from; 954 while(b = bb & ~(bb-1)) { 955 v.type = BtoR(b); 956 c = copyu(r->prog, &v, A); 957 if(c == 3) 958 set |= b; 959 bb &= ~b; 960 } 961 return set; 962 } 963 964 ulong 965 reguse(Reg *r, ulong bb) 966 { 967 ulong b, set; 968 Adr v; 969 int c; 970 971 set = 0; 972 v = zprog.from; 973 while(b = bb & ~(bb-1)) { 974 v.type = BtoR(b); 975 c = copyu(r->prog, &v, A); 976 if(c == 1 || c == 2 || c == 4) 977 set |= b; 978 bb &= ~b; 979 } 980 return set; 981 } 982 983 ulong 984 paint2(Reg *r, int bn) 985 { 986 Reg *r1; 987 int z; 988 ulong bb, vreg, x; 989 990 z = bn/32; 991 bb = 1L << (bn%32); 992 vreg = regbits; 993 if(!(r->act.b[z] & bb)) 994 return vreg; 995 for(;;) { 996 if(!(r->refbehind.b[z] & bb)) 997 break; 998 r1 = r->p1; 999 if(r1 == R) 1000 break; 1001 if(!(r1->refahead.b[z] & bb)) 1002 break; 1003 if(!(r1->act.b[z] & bb)) 1004 break; 1005 r = r1; 1006 } 1007 for(;;) { 1008 r->act.b[z] &= ~bb; 1009 1010 vreg |= r->regu; 1011 1012 if(r->refbehind.b[z] & bb) 1013 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1014 if(r1->refahead.b[z] & bb) 1015 vreg |= paint2(r1, bn); 1016 1017 if(!(r->refahead.b[z] & bb)) 1018 break; 1019 r1 = r->s2; 1020 if(r1 != R) 1021 if(r1->refbehind.b[z] & bb) 1022 vreg |= paint2(r1, bn); 1023 r = r->s1; 1024 if(r == R) 1025 break; 1026 if(!(r->act.b[z] & bb)) 1027 break; 1028 if(!(r->refbehind.b[z] & bb)) 1029 break; 1030 } 1031 1032 bb = vreg; 1033 for(; r; r=r->s1) { 1034 x = r->regu & ~bb; 1035 if(x) { 1036 vreg |= reguse(r, x); 1037 bb |= regset(r, x); 1038 } 1039 } 1040 return vreg; 1041 } 1042 1043 void 1044 paint3(Reg *r, int bn, long rb, int rn) 1045 { 1046 Reg *r1; 1047 Prog *p; 1048 int z; 1049 ulong bb; 1050 1051 z = bn/32; 1052 bb = 1L << (bn%32); 1053 if(r->act.b[z] & bb) 1054 return; 1055 for(;;) { 1056 if(!(r->refbehind.b[z] & bb)) 1057 break; 1058 r1 = r->p1; 1059 if(r1 == R) 1060 break; 1061 if(!(r1->refahead.b[z] & bb)) 1062 break; 1063 if(r1->act.b[z] & bb) 1064 break; 1065 r = r1; 1066 } 1067 1068 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) 1069 addmove(r, bn, rn, 0); 1070 for(;;) { 1071 r->act.b[z] |= bb; 1072 p = r->prog; 1073 1074 if(r->use1.b[z] & bb) { 1075 if(debug['R']) 1076 print("%P", p); 1077 addreg(&p->from, rn); 1078 if(debug['R']) 1079 print("%|.c%P\n", COL1, p); 1080 } 1081 if((r->use2.b[z]|r->set.b[z]) & bb) { 1082 if(debug['R']) 1083 print("%P", p); 1084 addreg(&p->to, rn); 1085 if(debug['R']) 1086 print("%|.c%P\n", COL1, p); 1087 } 1088 1089 if(STORE(r) & r->regdiff.b[z] & bb) 1090 addmove(r, bn, rn, 1); 1091 r->regu |= rb; 1092 1093 if(r->refbehind.b[z] & bb) 1094 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1095 if(r1->refahead.b[z] & bb) 1096 paint3(r1, bn, rb, rn); 1097 1098 if(!(r->refahead.b[z] & bb)) 1099 break; 1100 r1 = r->s2; 1101 if(r1 != R) 1102 if(r1->refbehind.b[z] & bb) 1103 paint3(r1, bn, rb, rn); 1104 r = r->s1; 1105 if(r == R) 1106 break; 1107 if(r->act.b[z] & bb) 1108 break; 1109 if(!(r->refbehind.b[z] & bb)) 1110 break; 1111 } 1112 } 1113 1114 void 1115 addreg(Adr *a, int rn) 1116 { 1117 1118 a->sym = 0; 1119 a->offset = 0; 1120 a->type = rn; 1121 } 1122 1123 long 1124 RtoB(int r) 1125 { 1126 1127 switch(r < D_AX || r > D_DI || r == D_SP) 1128 return 0; 1129 return 1L << (r-D_AX); 1130 } 1131 1132 int 1133 BtoR(long b) 1134 { 1135 1136 b &= 0xffL; 1137 if(b == 0) 1138 return 0; 1139 return bitno(b) + D_AX; 1140 } 1141