1 #include "gc.h" 2 3 Reg* 4 rega(void) 5 { 6 Reg *r; 7 8 r = freer; 9 if(r == R) { 10 r = alloc(sizeof(*r)); 11 } else 12 freer = r->link; 13 14 *r = zreg; 15 return r; 16 } 17 18 int 19 rcmp(void *a1, void *a2) 20 { 21 Rgn *p1, *p2; 22 int c1, c2; 23 24 p1 = (Rgn*)a1; 25 p2 = (Rgn*)a2; 26 c1 = p2->cost; 27 c2 = p1->cost; 28 if(c1 -= c2) 29 return c1; 30 return p2->varno - p1->varno; 31 } 32 33 void 34 regopt(Prog *p) 35 { 36 Reg *r, *r1, *r2; 37 Prog *p1; 38 int i, z; 39 long initpc, val, npc; 40 ulong vreg; 41 Bits bit; 42 struct 43 { 44 long m; 45 long c; 46 Reg* p; 47 } log5[6], *lp; 48 49 firstr = R; 50 lastr = R; 51 nvar = 0; 52 regbits = RtoB(D_SP) | RtoB(D_AX) | RtoB(D_X0); 53 if(REGEXT) 54 regbits |= RtoB(REGEXT) | RtoB(REGEXT-1); 55 for(z=0; z<BITS; z++) { 56 externs.b[z] = 0; 57 params.b[z] = 0; 58 consts.b[z] = 0; 59 addrs.b[z] = 0; 60 } 61 62 /* 63 * pass 1 64 * build aux data structure 65 * allocate pcs 66 * find use and set of variables 67 */ 68 val = 5L * 5L * 5L * 5L * 5L; 69 lp = log5; 70 for(i=0; i<5; i++) { 71 lp->m = val; 72 lp->c = 0; 73 lp->p = R; 74 val /= 5L; 75 lp++; 76 } 77 val = 0; 78 for(; p != P; p = p->link) { 79 switch(p->as) { 80 case ADATA: 81 case AGLOBL: 82 case ANAME: 83 case ASIGNAME: 84 continue; 85 } 86 r = rega(); 87 if(firstr == R) { 88 firstr = r; 89 lastr = r; 90 } else { 91 lastr->link = r; 92 r->p1 = lastr; 93 lastr->s1 = r; 94 lastr = r; 95 } 96 r->prog = p; 97 r->pc = val; 98 val++; 99 100 lp = log5; 101 for(i=0; i<5; i++) { 102 lp->c--; 103 if(lp->c <= 0) { 104 lp->c = lp->m; 105 if(lp->p != R) 106 lp->p->log5 = r; 107 lp->p = r; 108 (lp+1)->c = 0; 109 break; 110 } 111 lp++; 112 } 113 114 r1 = r->p1; 115 if(r1 != R) 116 switch(r1->prog->as) { 117 case ARET: 118 case AJMP: 119 case AIRETL: 120 case AIRETQ: 121 r->p1 = R; 122 r1->s1 = R; 123 } 124 125 bit = mkvar(r, &p->from); 126 if(bany(&bit)) 127 switch(p->as) { 128 /* 129 * funny 130 */ 131 case ALEAL: 132 case ALEAQ: 133 for(z=0; z<BITS; z++) 134 addrs.b[z] |= bit.b[z]; 135 break; 136 137 /* 138 * left side read 139 */ 140 default: 141 for(z=0; z<BITS; z++) 142 r->use1.b[z] |= bit.b[z]; 143 break; 144 } 145 146 bit = mkvar(r, &p->to); 147 if(bany(&bit)) 148 switch(p->as) { 149 default: 150 diag(Z, "reg: unknown op: %A", p->as); 151 break; 152 153 /* 154 * right side read 155 */ 156 case ACMPB: 157 case ACMPL: 158 case ACMPQ: 159 case ACMPW: 160 case ACOMISS: 161 case ACOMISD: 162 case AUCOMISS: 163 case AUCOMISD: 164 for(z=0; z<BITS; z++) 165 r->use2.b[z] |= bit.b[z]; 166 break; 167 168 /* 169 * right side write 170 */ 171 case ANOP: 172 case AMOVL: 173 case AMOVQ: 174 case AMOVB: 175 case AMOVW: 176 case AMOVBLSX: 177 case AMOVBLZX: 178 case AMOVBQSX: 179 case AMOVBQZX: 180 case AMOVLQSX: 181 case AMOVLQZX: 182 case AMOVWLSX: 183 case AMOVWLZX: 184 case AMOVWQSX: 185 case AMOVWQZX: 186 187 case AMOVSS: 188 case AMOVSD: 189 case ACVTSD2SL: 190 case ACVTSD2SQ: 191 case ACVTSD2SS: 192 case ACVTSL2SD: 193 case ACVTSL2SS: 194 case ACVTSQ2SD: 195 case ACVTSQ2SS: 196 case ACVTSS2SD: 197 case ACVTSS2SL: 198 case ACVTSS2SQ: 199 case ACVTTSD2SL: 200 case ACVTTSD2SQ: 201 case ACVTTSS2SL: 202 case ACVTTSS2SQ: 203 for(z=0; z<BITS; z++) 204 r->set.b[z] |= bit.b[z]; 205 break; 206 207 /* 208 * right side read+write 209 */ 210 case AADDB: 211 case AADDL: 212 case AADDQ: 213 case AADDW: 214 case AANDB: 215 case AANDL: 216 case AANDQ: 217 case AANDW: 218 case ASUBB: 219 case ASUBL: 220 case ASUBQ: 221 case ASUBW: 222 case AORB: 223 case AORL: 224 case AORQ: 225 case AORW: 226 case AXORB: 227 case AXORL: 228 case AXORQ: 229 case AXORW: 230 case ASALB: 231 case ASALL: 232 case ASALQ: 233 case ASALW: 234 case ASARB: 235 case ASARL: 236 case ASARQ: 237 case ASARW: 238 case AROLB: 239 case AROLL: 240 case AROLQ: 241 case AROLW: 242 case ARORB: 243 case ARORL: 244 case ARORQ: 245 case ARORW: 246 case ASHLB: 247 case ASHLL: 248 case ASHLQ: 249 case ASHLW: 250 case ASHRB: 251 case ASHRL: 252 case ASHRQ: 253 case ASHRW: 254 case AIMULL: 255 case AIMULQ: 256 case AIMULW: 257 case ANEGL: 258 case ANEGQ: 259 case ANOTL: 260 case ANOTQ: 261 case AADCL: 262 case AADCQ: 263 case ASBBL: 264 case ASBBQ: 265 266 case AADDSD: 267 case AADDSS: 268 case ACMPSD: 269 case ACMPSS: 270 case ADIVSD: 271 case ADIVSS: 272 case AMAXSD: 273 case AMAXSS: 274 case AMINSD: 275 case AMINSS: 276 case AMULSD: 277 case AMULSS: 278 case ARCPSS: 279 case ARSQRTSS: 280 case ASQRTSD: 281 case ASQRTSS: 282 case ASUBSD: 283 case ASUBSS: 284 case AXORPD: 285 for(z=0; z<BITS; z++) { 286 r->set.b[z] |= bit.b[z]; 287 r->use2.b[z] |= bit.b[z]; 288 } 289 break; 290 291 /* 292 * funny 293 */ 294 case ACALL: 295 for(z=0; z<BITS; z++) 296 addrs.b[z] |= bit.b[z]; 297 break; 298 } 299 300 switch(p->as) { 301 case AIMULL: 302 case AIMULQ: 303 case AIMULW: 304 if(p->to.type != D_NONE) 305 break; 306 307 case AIDIVB: 308 case AIDIVL: 309 case AIDIVQ: 310 case AIDIVW: 311 case AIMULB: 312 case ADIVB: 313 case ADIVL: 314 case ADIVQ: 315 case ADIVW: 316 case AMULB: 317 case AMULL: 318 case AMULQ: 319 case AMULW: 320 321 case ACWD: 322 case ACDQ: 323 case ACQO: 324 r->regu |= RtoB(D_AX) | RtoB(D_DX); 325 break; 326 327 case AREP: 328 case AREPN: 329 case ALOOP: 330 case ALOOPEQ: 331 case ALOOPNE: 332 r->regu |= RtoB(D_CX); 333 break; 334 335 case AMOVSB: 336 case AMOVSL: 337 case AMOVSQ: 338 case AMOVSW: 339 case ACMPSB: 340 case ACMPSL: 341 case ACMPSQ: 342 case ACMPSW: 343 r->regu |= RtoB(D_SI) | RtoB(D_DI); 344 break; 345 346 case ASTOSB: 347 case ASTOSL: 348 case ASTOSQ: 349 case ASTOSW: 350 case ASCASB: 351 case ASCASL: 352 case ASCASQ: 353 case ASCASW: 354 r->regu |= RtoB(D_AX) | RtoB(D_DI); 355 break; 356 357 case AINSB: 358 case AINSL: 359 case AINSW: 360 case AOUTSB: 361 case AOUTSL: 362 case AOUTSW: 363 r->regu |= RtoB(D_DI) | RtoB(D_DX); 364 break; 365 } 366 } 367 if(firstr == R) 368 return; 369 initpc = pc - val; 370 npc = val; 371 372 /* 373 * pass 2 374 * turn branch references to pointers 375 * build back pointers 376 */ 377 for(r = firstr; r != R; r = r->link) { 378 p = r->prog; 379 if(p->to.type == D_BRANCH) { 380 val = p->to.offset - initpc; 381 r1 = firstr; 382 while(r1 != R) { 383 r2 = r1->log5; 384 if(r2 != R && val >= r2->pc) { 385 r1 = r2; 386 continue; 387 } 388 if(r1->pc == val) 389 break; 390 r1 = r1->link; 391 } 392 if(r1 == R) { 393 nearln = p->lineno; 394 diag(Z, "ref not found\n%P", p); 395 continue; 396 } 397 if(r1 == r) { 398 nearln = p->lineno; 399 diag(Z, "ref to self\n%P", p); 400 continue; 401 } 402 r->s2 = r1; 403 r->p2link = r1->p2; 404 r1->p2 = r; 405 } 406 } 407 if(debug['R']) { 408 p = firstr->prog; 409 print("\n%L %D\n", p->lineno, &p->from); 410 } 411 412 /* 413 * pass 2.5 414 * find looping structure 415 */ 416 for(r = firstr; r != R; r = r->link) 417 r->active = 0; 418 change = 0; 419 loopit(firstr, npc); 420 if(debug['R'] && debug['v']) { 421 print("\nlooping structure:\n"); 422 for(r = firstr; r != R; r = r->link) { 423 print("%ld:%P", r->loop, r->prog); 424 for(z=0; z<BITS; z++) 425 bit.b[z] = r->use1.b[z] | 426 r->use2.b[z] | 427 r->set.b[z]; 428 if(bany(&bit)) { 429 print("\t"); 430 if(bany(&r->use1)) 431 print(" u1=%B", r->use1); 432 if(bany(&r->use2)) 433 print(" u2=%B", r->use2); 434 if(bany(&r->set)) 435 print(" st=%B", r->set); 436 } 437 print("\n"); 438 } 439 } 440 441 /* 442 * pass 3 443 * iterate propagating usage 444 * back until flow graph is complete 445 */ 446 loop1: 447 change = 0; 448 for(r = firstr; r != R; r = r->link) 449 r->active = 0; 450 for(r = firstr; r != R; r = r->link) 451 if(r->prog->as == ARET) 452 prop(r, zbits, zbits); 453 loop11: 454 /* pick up unreachable code */ 455 i = 0; 456 for(r = firstr; r != R; r = r1) { 457 r1 = r->link; 458 if(r1 && r1->active && !r->active) { 459 prop(r, zbits, zbits); 460 i = 1; 461 } 462 } 463 if(i) 464 goto loop11; 465 if(change) 466 goto loop1; 467 468 469 /* 470 * pass 4 471 * iterate propagating register/variable synchrony 472 * forward until graph is complete 473 */ 474 loop2: 475 change = 0; 476 for(r = firstr; r != R; r = r->link) 477 r->active = 0; 478 synch(firstr, zbits); 479 if(change) 480 goto loop2; 481 482 483 /* 484 * pass 5 485 * isolate regions 486 * calculate costs (paint1) 487 */ 488 r = firstr; 489 if(r) { 490 for(z=0; z<BITS; z++) 491 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & 492 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); 493 if(bany(&bit)) { 494 nearln = r->prog->lineno; 495 warn(Z, "used and not set: %B", bit); 496 if(debug['R'] && !debug['w']) 497 print("used and not set: %B\n", bit); 498 } 499 } 500 if(debug['R'] && debug['v']) 501 print("\nprop structure:\n"); 502 for(r = firstr; r != R; r = r->link) 503 r->act = zbits; 504 rgp = region; 505 nregion = 0; 506 for(r = firstr; r != R; r = r->link) { 507 if(debug['R'] && debug['v']) { 508 print("%P\t", r->prog); 509 if(bany(&r->set)) 510 print("s:%B ", r->set); 511 if(bany(&r->refahead)) 512 print("ra:%B ", r->refahead); 513 if(bany(&r->calahead)) 514 print("ca:%B ", r->calahead); 515 print("\n"); 516 } 517 for(z=0; z<BITS; z++) 518 bit.b[z] = r->set.b[z] & 519 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); 520 if(bany(&bit)) { 521 nearln = r->prog->lineno; 522 warn(Z, "set and not used: %B", bit); 523 if(debug['R']) 524 print("set and not used: %B\n", bit); 525 excise(r); 526 } 527 for(z=0; z<BITS; z++) 528 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); 529 while(bany(&bit)) { 530 i = bnum(bit); 531 rgp->enter = r; 532 rgp->varno = i; 533 change = 0; 534 if(debug['R'] && debug['v']) 535 print("\n"); 536 paint1(r, i); 537 bit.b[i/32] &= ~(1L<<(i%32)); 538 if(change <= 0) { 539 if(debug['R']) 540 print("%L$%d: %B\n", 541 r->prog->lineno, change, blsh(i)); 542 continue; 543 } 544 rgp->cost = change; 545 nregion++; 546 if(nregion >= NRGN) { 547 warn(Z, "too many regions"); 548 goto brk; 549 } 550 rgp++; 551 } 552 } 553 brk: 554 qsort(region, nregion, sizeof(region[0]), rcmp); 555 556 /* 557 * pass 6 558 * determine used registers (paint2) 559 * replace code (paint3) 560 */ 561 rgp = region; 562 for(i=0; i<nregion; i++) { 563 bit = blsh(rgp->varno); 564 vreg = paint2(rgp->enter, rgp->varno); 565 vreg = allreg(vreg, rgp); 566 if(debug['R']) { 567 print("%L$%d %R: %B\n", 568 rgp->enter->prog->lineno, 569 rgp->cost, 570 rgp->regno, 571 bit); 572 } 573 if(rgp->regno != 0) 574 paint3(rgp->enter, rgp->varno, vreg, rgp->regno); 575 rgp++; 576 } 577 /* 578 * pass 7 579 * peep-hole on basic block 580 */ 581 if(!debug['R'] || debug['P']) 582 peep(); 583 584 /* 585 * pass 8 586 * recalculate pc 587 */ 588 val = initpc; 589 for(r = firstr; r != R; r = r1) { 590 r->pc = val; 591 p = r->prog; 592 p1 = P; 593 r1 = r->link; 594 if(r1 != R) 595 p1 = r1->prog; 596 for(; p != p1; p = p->link) { 597 switch(p->as) { 598 default: 599 val++; 600 break; 601 602 case ANOP: 603 case ADATA: 604 case AGLOBL: 605 case ANAME: 606 case ASIGNAME: 607 break; 608 } 609 } 610 } 611 pc = val; 612 613 /* 614 * fix up branches 615 */ 616 if(debug['R']) 617 if(bany(&addrs)) 618 print("addrs: %B\n", addrs); 619 620 r1 = 0; /* set */ 621 for(r = firstr; r != R; r = r->link) { 622 p = r->prog; 623 if(p->to.type == D_BRANCH) 624 p->to.offset = r->s2->pc; 625 r1 = r; 626 } 627 628 /* 629 * last pass 630 * eliminate nops 631 * free aux structures 632 */ 633 for(p = firstr->prog; p != P; p = p->link){ 634 while(p->link && p->link->as == ANOP) 635 p->link = p->link->link; 636 } 637 if(r1 != R) { 638 r1->link = freer; 639 freer = firstr; 640 } 641 } 642 643 /* 644 * add mov b,rn 645 * just after r 646 */ 647 void 648 addmove(Reg *r, int bn, int rn, int f) 649 { 650 Prog *p, *p1; 651 Adr *a; 652 Var *v; 653 654 p1 = alloc(sizeof(*p1)); 655 *p1 = zprog; 656 p = r->prog; 657 658 p1->link = p->link; 659 p->link = p1; 660 p1->lineno = p->lineno; 661 662 v = var + bn; 663 664 a = &p1->to; 665 a->sym = v->sym; 666 a->offset = v->offset; 667 a->etype = v->etype; 668 a->type = v->name; 669 670 p1->as = AMOVL; 671 if(v->etype == TCHAR || v->etype == TUCHAR) 672 p1->as = AMOVB; 673 if(v->etype == TSHORT || v->etype == TUSHORT) 674 p1->as = AMOVW; 675 if(v->etype == TVLONG || v->etype == TUVLONG || v->etype == TIND) 676 p1->as = AMOVQ; 677 if(v->etype == TFLOAT) 678 p1->as = AMOVSS; 679 if(v->etype == TDOUBLE) 680 p1->as = AMOVSD; 681 682 p1->from.type = rn; 683 if(!f) { 684 p1->from = *a; 685 *a = zprog.from; 686 a->type = rn; 687 if(v->etype == TUCHAR) 688 p1->as = AMOVB; 689 if(v->etype == TUSHORT) 690 p1->as = AMOVW; 691 } 692 if(debug['R']) 693 print("%P\t.a%P\n", p, p1); 694 } 695 696 ulong 697 doregbits(int r) 698 { 699 ulong b; 700 701 b = 0; 702 if(r >= D_INDIR) 703 r -= D_INDIR; 704 if(r >= D_AX && r <= D_R15) 705 b |= RtoB(r); 706 else 707 if(r >= D_AL && r <= D_R15B) 708 b |= RtoB(r-D_AL+D_AX); 709 else 710 if(r >= D_AH && r <= D_BH) 711 b |= RtoB(r-D_AH+D_AX); 712 else 713 if(r >= D_X0 && r <= D_X0+15) 714 b |= FtoB(r); 715 return b; 716 } 717 718 Bits 719 mkvar(Reg *r, Adr *a) 720 { 721 Var *v; 722 int i, t, n, et, z; 723 long o; 724 Bits bit; 725 Sym *s; 726 727 /* 728 * mark registers used 729 */ 730 t = a->type; 731 r->regu |= doregbits(t); 732 r->regu |= doregbits(a->index); 733 734 switch(t) { 735 default: 736 goto none; 737 case D_ADDR: 738 a->type = a->index; 739 bit = mkvar(r, a); 740 for(z=0; z<BITS; z++) 741 addrs.b[z] |= bit.b[z]; 742 a->type = t; 743 goto none; 744 case D_EXTERN: 745 case D_STATIC: 746 case D_PARAM: 747 case D_AUTO: 748 n = t; 749 break; 750 } 751 s = a->sym; 752 if(s == S) 753 goto none; 754 if(s->name[0] == '.') 755 goto none; 756 et = a->etype; 757 o = a->offset; 758 v = var; 759 for(i=0; i<nvar; i++) { 760 if(s == v->sym) 761 if(n == v->name) 762 if(o == v->offset) 763 goto out; 764 v++; 765 } 766 if(nvar >= NVAR) { 767 if(debug['w'] > 1 && s) 768 warn(Z, "variable not optimized: %s", s->name); 769 goto none; 770 } 771 i = nvar; 772 nvar++; 773 v = &var[i]; 774 v->sym = s; 775 v->offset = o; 776 v->name = n; 777 v->etype = et; 778 if(debug['R']) 779 print("bit=%2d et=%2d %D\n", i, et, a); 780 781 out: 782 bit = blsh(i); 783 if(n == D_EXTERN || n == D_STATIC) 784 for(z=0; z<BITS; z++) 785 externs.b[z] |= bit.b[z]; 786 if(n == D_PARAM) 787 for(z=0; z<BITS; z++) 788 params.b[z] |= bit.b[z]; 789 if(v->etype != et || !(typechlpfd[et] || typev[et])) /* funny punning */ 790 for(z=0; z<BITS; z++) 791 addrs.b[z] |= bit.b[z]; 792 return bit; 793 794 none: 795 return zbits; 796 } 797 798 void 799 prop(Reg *r, Bits ref, Bits cal) 800 { 801 Reg *r1, *r2; 802 int z; 803 804 for(r1 = r; r1 != R; r1 = r1->p1) { 805 for(z=0; z<BITS; z++) { 806 ref.b[z] |= r1->refahead.b[z]; 807 if(ref.b[z] != r1->refahead.b[z]) { 808 r1->refahead.b[z] = ref.b[z]; 809 change++; 810 } 811 cal.b[z] |= r1->calahead.b[z]; 812 if(cal.b[z] != r1->calahead.b[z]) { 813 r1->calahead.b[z] = cal.b[z]; 814 change++; 815 } 816 } 817 switch(r1->prog->as) { 818 case ACALL: 819 for(z=0; z<BITS; z++) { 820 cal.b[z] |= ref.b[z] | externs.b[z]; 821 ref.b[z] = 0; 822 } 823 break; 824 825 case ATEXT: 826 for(z=0; z<BITS; z++) { 827 cal.b[z] = 0; 828 ref.b[z] = 0; 829 } 830 break; 831 832 case ARET: 833 for(z=0; z<BITS; z++) { 834 cal.b[z] = externs.b[z]; 835 ref.b[z] = 0; 836 } 837 } 838 for(z=0; z<BITS; z++) { 839 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | 840 r1->use1.b[z] | r1->use2.b[z]; 841 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); 842 r1->refbehind.b[z] = ref.b[z]; 843 r1->calbehind.b[z] = cal.b[z]; 844 } 845 if(r1->active) 846 break; 847 r1->active = 1; 848 } 849 for(; r != r1; r = r->p1) 850 for(r2 = r->p2; r2 != R; r2 = r2->p2link) 851 prop(r2, r->refbehind, r->calbehind); 852 } 853 854 /* 855 * find looping structure 856 * 857 * 1) find reverse postordering 858 * 2) find approximate dominators, 859 * the actual dominators if the flow graph is reducible 860 * otherwise, dominators plus some other non-dominators. 861 * See Matthew S. Hecht and Jeffrey D. Ullman, 862 * "Analysis of a Simple Algorithm for Global Data Flow Problems", 863 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts, 864 * Oct. 1-3, 1973, pp. 207-217. 865 * 3) find all nodes with a predecessor dominated by the current node. 866 * such a node is a loop head. 867 * recursively, all preds with a greater rpo number are in the loop 868 */ 869 long 870 postorder(Reg *r, Reg **rpo2r, long n) 871 { 872 Reg *r1; 873 874 r->rpo = 1; 875 r1 = r->s1; 876 if(r1 && !r1->rpo) 877 n = postorder(r1, rpo2r, n); 878 r1 = r->s2; 879 if(r1 && !r1->rpo) 880 n = postorder(r1, rpo2r, n); 881 rpo2r[n] = r; 882 n++; 883 return n; 884 } 885 886 long 887 rpolca(long *idom, long rpo1, long rpo2) 888 { 889 long t; 890 891 if(rpo1 == -1) 892 return rpo2; 893 while(rpo1 != rpo2){ 894 if(rpo1 > rpo2){ 895 t = rpo2; 896 rpo2 = rpo1; 897 rpo1 = t; 898 } 899 while(rpo1 < rpo2){ 900 t = idom[rpo2]; 901 if(t >= rpo2) 902 fatal(Z, "bad idom"); 903 rpo2 = t; 904 } 905 } 906 return rpo1; 907 } 908 909 int 910 doms(long *idom, long r, long s) 911 { 912 while(s > r) 913 s = idom[s]; 914 return s == r; 915 } 916 917 int 918 loophead(long *idom, Reg *r) 919 { 920 long src; 921 922 src = r->rpo; 923 if(r->p1 != R && doms(idom, src, r->p1->rpo)) 924 return 1; 925 for(r = r->p2; r != R; r = r->p2link) 926 if(doms(idom, src, r->rpo)) 927 return 1; 928 return 0; 929 } 930 931 void 932 loopmark(Reg **rpo2r, long head, Reg *r) 933 { 934 if(r->rpo < head || r->active == head) 935 return; 936 r->active = head; 937 r->loop += LOOP; 938 if(r->p1 != R) 939 loopmark(rpo2r, head, r->p1); 940 for(r = r->p2; r != R; r = r->p2link) 941 loopmark(rpo2r, head, r); 942 } 943 944 void 945 loopit(Reg *r, long nr) 946 { 947 Reg *r1; 948 long i, d, me; 949 950 if(nr > maxnr) { 951 rpo2r = alloc(nr * sizeof(Reg*)); 952 idom = alloc(nr * sizeof(long)); 953 maxnr = nr; 954 } 955 956 d = postorder(r, rpo2r, 0); 957 if(d > nr) 958 fatal(Z, "too many reg nodes"); 959 nr = d; 960 for(i = 0; i < nr / 2; i++){ 961 r1 = rpo2r[i]; 962 rpo2r[i] = rpo2r[nr - 1 - i]; 963 rpo2r[nr - 1 - i] = r1; 964 } 965 for(i = 0; i < nr; i++) 966 rpo2r[i]->rpo = i; 967 968 idom[0] = 0; 969 for(i = 0; i < nr; i++){ 970 r1 = rpo2r[i]; 971 me = r1->rpo; 972 d = -1; 973 if(r1->p1 != R && r1->p1->rpo < me) 974 d = r1->p1->rpo; 975 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link) 976 if(r1->rpo < me) 977 d = rpolca(idom, d, r1->rpo); 978 idom[i] = d; 979 } 980 981 for(i = 0; i < nr; i++){ 982 r1 = rpo2r[i]; 983 r1->loop++; 984 if(r1->p2 != R && loophead(idom, r1)) 985 loopmark(rpo2r, i, r1); 986 } 987 } 988 989 void 990 synch(Reg *r, Bits dif) 991 { 992 Reg *r1; 993 int z; 994 995 for(r1 = r; r1 != R; r1 = r1->s1) { 996 for(z=0; z<BITS; z++) { 997 dif.b[z] = (dif.b[z] & 998 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) | 999 r1->set.b[z] | r1->regdiff.b[z]; 1000 if(dif.b[z] != r1->regdiff.b[z]) { 1001 r1->regdiff.b[z] = dif.b[z]; 1002 change++; 1003 } 1004 } 1005 if(r1->active) 1006 break; 1007 r1->active = 1; 1008 for(z=0; z<BITS; z++) 1009 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); 1010 if(r1->s2 != R) 1011 synch(r1->s2, dif); 1012 } 1013 } 1014 1015 ulong 1016 allreg(ulong b, Rgn *r) 1017 { 1018 Var *v; 1019 int i; 1020 1021 v = var + r->varno; 1022 r->regno = 0; 1023 switch(v->etype) { 1024 1025 default: 1026 diag(Z, "unknown etype %d/%d", bitno(b), v->etype); 1027 break; 1028 1029 case TCHAR: 1030 case TUCHAR: 1031 case TSHORT: 1032 case TUSHORT: 1033 case TINT: 1034 case TUINT: 1035 case TLONG: 1036 case TULONG: 1037 case TVLONG: 1038 case TUVLONG: 1039 case TIND: 1040 case TARRAY: 1041 i = BtoR(~b); 1042 if(i && r->cost > 0) { 1043 r->regno = i; 1044 return RtoB(i); 1045 } 1046 break; 1047 1048 case TDOUBLE: 1049 case TFLOAT: 1050 i = BtoF(~b); 1051 if(i && r->cost > 0) { 1052 r->regno = i; 1053 return FtoB(i); 1054 } 1055 break; 1056 } 1057 return 0; 1058 } 1059 1060 void 1061 paint1(Reg *r, int bn) 1062 { 1063 Reg *r1; 1064 Prog *p; 1065 int z; 1066 ulong bb; 1067 1068 z = bn/32; 1069 bb = 1L<<(bn%32); 1070 if(r->act.b[z] & bb) 1071 return; 1072 for(;;) { 1073 if(!(r->refbehind.b[z] & bb)) 1074 break; 1075 r1 = r->p1; 1076 if(r1 == R) 1077 break; 1078 if(!(r1->refahead.b[z] & bb)) 1079 break; 1080 if(r1->act.b[z] & bb) 1081 break; 1082 r = r1; 1083 } 1084 1085 if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) { 1086 change -= CLOAD * r->loop; 1087 if(debug['R'] && debug['v']) 1088 print("%ld%P\tld %B $%d\n", r->loop, 1089 r->prog, blsh(bn), change); 1090 } 1091 for(;;) { 1092 r->act.b[z] |= bb; 1093 p = r->prog; 1094 1095 if(r->use1.b[z] & bb) { 1096 change += CREF * r->loop; 1097 if(debug['R'] && debug['v']) 1098 print("%ld%P\tu1 %B $%d\n", r->loop, 1099 p, blsh(bn), change); 1100 } 1101 1102 if((r->use2.b[z]|r->set.b[z]) & bb) { 1103 change += CREF * r->loop; 1104 if(debug['R'] && debug['v']) 1105 print("%ld%P\tu2 %B $%d\n", r->loop, 1106 p, blsh(bn), change); 1107 } 1108 1109 if(STORE(r) & r->regdiff.b[z] & bb) { 1110 change -= CLOAD * r->loop; 1111 if(debug['R'] && debug['v']) 1112 print("%ld%P\tst %B $%d\n", r->loop, 1113 p, blsh(bn), change); 1114 } 1115 1116 if(r->refbehind.b[z] & bb) 1117 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1118 if(r1->refahead.b[z] & bb) 1119 paint1(r1, bn); 1120 1121 if(!(r->refahead.b[z] & bb)) 1122 break; 1123 r1 = r->s2; 1124 if(r1 != R) 1125 if(r1->refbehind.b[z] & bb) 1126 paint1(r1, bn); 1127 r = r->s1; 1128 if(r == R) 1129 break; 1130 if(r->act.b[z] & bb) 1131 break; 1132 if(!(r->refbehind.b[z] & bb)) 1133 break; 1134 } 1135 } 1136 1137 ulong 1138 regset(Reg *r, ulong bb) 1139 { 1140 ulong b, set; 1141 Adr v; 1142 int c; 1143 1144 set = 0; 1145 v = zprog.from; 1146 while(b = bb & ~(bb-1)) { 1147 v.type = b & 0xFFFF? BtoR(b): BtoF(b); 1148 if(v.type == 0) 1149 diag(Z, "zero v.type for %#lux", b); 1150 c = copyu(r->prog, &v, A); 1151 if(c == 3) 1152 set |= b; 1153 bb &= ~b; 1154 } 1155 return set; 1156 } 1157 1158 ulong 1159 reguse(Reg *r, ulong bb) 1160 { 1161 ulong b, set; 1162 Adr v; 1163 int c; 1164 1165 set = 0; 1166 v = zprog.from; 1167 while(b = bb & ~(bb-1)) { 1168 v.type = b & 0xFFFF? BtoR(b): BtoF(b); 1169 c = copyu(r->prog, &v, A); 1170 if(c == 1 || c == 2 || c == 4) 1171 set |= b; 1172 bb &= ~b; 1173 } 1174 return set; 1175 } 1176 1177 ulong 1178 paint2(Reg *r, int bn) 1179 { 1180 Reg *r1; 1181 int z; 1182 ulong bb, vreg, x; 1183 1184 z = bn/32; 1185 bb = 1L << (bn%32); 1186 vreg = regbits; 1187 if(!(r->act.b[z] & bb)) 1188 return vreg; 1189 for(;;) { 1190 if(!(r->refbehind.b[z] & bb)) 1191 break; 1192 r1 = r->p1; 1193 if(r1 == R) 1194 break; 1195 if(!(r1->refahead.b[z] & bb)) 1196 break; 1197 if(!(r1->act.b[z] & bb)) 1198 break; 1199 r = r1; 1200 } 1201 for(;;) { 1202 r->act.b[z] &= ~bb; 1203 1204 vreg |= r->regu; 1205 1206 if(r->refbehind.b[z] & bb) 1207 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1208 if(r1->refahead.b[z] & bb) 1209 vreg |= paint2(r1, bn); 1210 1211 if(!(r->refahead.b[z] & bb)) 1212 break; 1213 r1 = r->s2; 1214 if(r1 != R) 1215 if(r1->refbehind.b[z] & bb) 1216 vreg |= paint2(r1, bn); 1217 r = r->s1; 1218 if(r == R) 1219 break; 1220 if(!(r->act.b[z] & bb)) 1221 break; 1222 if(!(r->refbehind.b[z] & bb)) 1223 break; 1224 } 1225 1226 bb = vreg; 1227 for(; r; r=r->s1) { 1228 x = r->regu & ~bb; 1229 if(x) { 1230 vreg |= reguse(r, x); 1231 bb |= regset(r, x); 1232 } 1233 } 1234 return vreg; 1235 } 1236 1237 void 1238 paint3(Reg *r, int bn, long rb, int rn) 1239 { 1240 Reg *r1; 1241 Prog *p; 1242 int z; 1243 ulong bb; 1244 1245 z = bn/32; 1246 bb = 1L << (bn%32); 1247 if(r->act.b[z] & bb) 1248 return; 1249 for(;;) { 1250 if(!(r->refbehind.b[z] & bb)) 1251 break; 1252 r1 = r->p1; 1253 if(r1 == R) 1254 break; 1255 if(!(r1->refahead.b[z] & bb)) 1256 break; 1257 if(r1->act.b[z] & bb) 1258 break; 1259 r = r1; 1260 } 1261 1262 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) 1263 addmove(r, bn, rn, 0); 1264 for(;;) { 1265 r->act.b[z] |= bb; 1266 p = r->prog; 1267 1268 if(r->use1.b[z] & bb) { 1269 if(debug['R']) 1270 print("%P", p); 1271 addreg(&p->from, rn); 1272 if(debug['R']) 1273 print("\t.c%P\n", p); 1274 } 1275 if((r->use2.b[z]|r->set.b[z]) & bb) { 1276 if(debug['R']) 1277 print("%P", p); 1278 addreg(&p->to, rn); 1279 if(debug['R']) 1280 print("\t.c%P\n", p); 1281 } 1282 1283 if(STORE(r) & r->regdiff.b[z] & bb) 1284 addmove(r, bn, rn, 1); 1285 r->regu |= rb; 1286 1287 if(r->refbehind.b[z] & bb) 1288 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1289 if(r1->refahead.b[z] & bb) 1290 paint3(r1, bn, rb, rn); 1291 1292 if(!(r->refahead.b[z] & bb)) 1293 break; 1294 r1 = r->s2; 1295 if(r1 != R) 1296 if(r1->refbehind.b[z] & bb) 1297 paint3(r1, bn, rb, rn); 1298 r = r->s1; 1299 if(r == R) 1300 break; 1301 if(r->act.b[z] & bb) 1302 break; 1303 if(!(r->refbehind.b[z] & bb)) 1304 break; 1305 } 1306 } 1307 1308 void 1309 addreg(Adr *a, int rn) 1310 { 1311 1312 a->sym = 0; 1313 a->offset = 0; 1314 a->type = rn; 1315 } 1316 1317 long 1318 RtoB(int r) 1319 { 1320 1321 if(r < D_AX || r > D_R15) 1322 return 0; 1323 return 1L << (r-D_AX); 1324 } 1325 1326 int 1327 BtoR(long b) 1328 { 1329 1330 b &= 0xffffL; 1331 if(b == 0) 1332 return 0; 1333 return bitno(b) + D_AX; 1334 } 1335 1336 /* 1337 * bit reg 1338 * 16 X5 1339 * 17 X6 1340 * 18 X7 1341 */ 1342 long 1343 FtoB(int f) 1344 { 1345 if(f < FREGMIN || f > FREGEXT) 1346 return 0; 1347 return 1L << (f - FREGMIN + 16); 1348 } 1349 1350 int 1351 BtoF(long b) 1352 { 1353 1354 b &= 0x70000L; 1355 if(b == 0) 1356 return 0; 1357 return bitno(b) - 16 + FREGMIN; 1358 } 1359