1 #include "gc.h" 2 3 Reg* 4 rega(void) 5 { 6 Reg *r; 7 8 r = freer; 9 if(r == R) { 10 r = alloc(sizeof(*r)); 11 } else 12 freer = r->link; 13 14 *r = zreg; 15 return r; 16 } 17 18 int 19 rcmp(void *a1, void *a2) 20 { 21 Rgn *p1, *p2; 22 int c1, c2; 23 24 p1 = (Rgn*)a1; 25 p2 = (Rgn*)a2; 26 c1 = p2->cost; 27 c2 = p1->cost; 28 if(c1 -= c2) 29 return c1; 30 return p2->varno - p1->varno; 31 } 32 33 void 34 regopt(Prog *p) 35 { 36 Reg *r, *r1, *r2; 37 Prog *p1; 38 int i, z; 39 long initpc, val, npc; 40 ulong vreg; 41 Bits bit; 42 struct 43 { 44 long m; 45 long c; 46 Reg* p; 47 } log5[6], *lp; 48 49 firstr = R; 50 lastr = R; 51 nvar = 0; 52 regbits = RtoB(D_SP) | RtoB(D_AX); 53 for(z=0; z<BITS; z++) { 54 externs.b[z] = 0; 55 params.b[z] = 0; 56 consts.b[z] = 0; 57 addrs.b[z] = 0; 58 } 59 60 /* 61 * pass 1 62 * build aux data structure 63 * allocate pcs 64 * find use and set of variables 65 */ 66 val = 5L * 5L * 5L * 5L * 5L; 67 lp = log5; 68 for(i=0; i<5; i++) { 69 lp->m = val; 70 lp->c = 0; 71 lp->p = R; 72 val /= 5L; 73 lp++; 74 } 75 val = 0; 76 for(; p != P; p = p->link) { 77 switch(p->as) { 78 case ADATA: 79 case AGLOBL: 80 case ANAME: 81 case ASIGNAME: 82 continue; 83 } 84 r = rega(); 85 if(firstr == R) { 86 firstr = r; 87 lastr = r; 88 } else { 89 lastr->link = r; 90 r->p1 = lastr; 91 lastr->s1 = r; 92 lastr = r; 93 } 94 r->prog = p; 95 r->pc = val; 96 val++; 97 98 lp = log5; 99 for(i=0; i<5; i++) { 100 lp->c--; 101 if(lp->c <= 0) { 102 lp->c = lp->m; 103 if(lp->p != R) 104 lp->p->log5 = r; 105 lp->p = r; 106 (lp+1)->c = 0; 107 break; 108 } 109 lp++; 110 } 111 112 r1 = r->p1; 113 if(r1 != R) 114 switch(r1->prog->as) { 115 case ARET: 116 case AJMP: 117 case AIRETL: 118 r->p1 = R; 119 r1->s1 = R; 120 } 121 122 bit = mkvar(r, &p->from, p->as==AMOVL); 123 if(bany(&bit)) 124 switch(p->as) { 125 /* 126 * funny 127 */ 128 case ALEAL: 129 for(z=0; z<BITS; z++) 130 addrs.b[z] |= bit.b[z]; 131 break; 132 133 /* 134 * left side read 135 */ 136 default: 137 for(z=0; z<BITS; z++) 138 r->use1.b[z] |= bit.b[z]; 139 break; 140 } 141 142 bit = mkvar(r, &p->to, 0); 143 if(bany(&bit)) 144 switch(p->as) { 145 default: 146 diag(Z, "reg: unknown op: %A", p->as); 147 break; 148 149 /* 150 * right side read 151 */ 152 case ACMPB: 153 case ACMPL: 154 case ACMPW: 155 for(z=0; z<BITS; z++) 156 r->use2.b[z] |= bit.b[z]; 157 break; 158 159 /* 160 * right side write 161 */ 162 case ANOP: 163 case AMOVL: 164 case AMOVB: 165 case AMOVW: 166 case AMOVBLSX: 167 case AMOVBLZX: 168 case AMOVWLSX: 169 case AMOVWLZX: 170 for(z=0; z<BITS; z++) 171 r->set.b[z] |= bit.b[z]; 172 break; 173 174 /* 175 * right side read+write 176 */ 177 case AADDB: 178 case AADDL: 179 case AADDW: 180 case AANDB: 181 case AANDL: 182 case AANDW: 183 case ASUBB: 184 case ASUBL: 185 case ASUBW: 186 case AORB: 187 case AORL: 188 case AORW: 189 case AXORB: 190 case AXORL: 191 case AXORW: 192 case ASALB: 193 case ASALL: 194 case ASALW: 195 case ASARB: 196 case ASARL: 197 case ASARW: 198 case AROLB: 199 case AROLL: 200 case AROLW: 201 case ARORB: 202 case ARORL: 203 case ARORW: 204 case ASHLB: 205 case ASHLL: 206 case ASHLW: 207 case ASHRB: 208 case ASHRL: 209 case ASHRW: 210 case AIMULL: 211 case AIMULW: 212 case ANEGL: 213 case ANOTL: 214 case AADCL: 215 case ASBBL: 216 for(z=0; z<BITS; z++) { 217 r->set.b[z] |= bit.b[z]; 218 r->use2.b[z] |= bit.b[z]; 219 } 220 break; 221 222 /* 223 * funny 224 */ 225 case AFMOVDP: 226 case AFMOVFP: 227 case AFMOVLP: 228 case AFMOVVP: 229 case AFMOVWP: 230 case ACALL: 231 for(z=0; z<BITS; z++) 232 addrs.b[z] |= bit.b[z]; 233 break; 234 } 235 236 switch(p->as) { 237 case AIMULL: 238 case AIMULW: 239 if(p->to.type != D_NONE) 240 break; 241 242 case AIDIVB: 243 case AIDIVL: 244 case AIDIVW: 245 case AIMULB: 246 case ADIVB: 247 case ADIVL: 248 case ADIVW: 249 case AMULB: 250 case AMULL: 251 case AMULW: 252 253 case ACWD: 254 case ACDQ: 255 r->regu |= RtoB(D_AX) | RtoB(D_DX); 256 break; 257 258 case AREP: 259 case AREPN: 260 case ALOOP: 261 case ALOOPEQ: 262 case ALOOPNE: 263 r->regu |= RtoB(D_CX); 264 break; 265 266 case AMOVSB: 267 case AMOVSL: 268 case AMOVSW: 269 case ACMPSB: 270 case ACMPSL: 271 case ACMPSW: 272 r->regu |= RtoB(D_SI) | RtoB(D_DI); 273 break; 274 275 case ASTOSB: 276 case ASTOSL: 277 case ASTOSW: 278 case ASCASB: 279 case ASCASL: 280 case ASCASW: 281 r->regu |= RtoB(D_AX) | RtoB(D_DI); 282 break; 283 284 case AINSB: 285 case AINSL: 286 case AINSW: 287 case AOUTSB: 288 case AOUTSL: 289 case AOUTSW: 290 r->regu |= RtoB(D_DI) | RtoB(D_DX); 291 break; 292 293 case AFSTSW: 294 case ASAHF: 295 r->regu |= RtoB(D_AX); 296 break; 297 } 298 } 299 if(firstr == R) 300 return; 301 initpc = pc - val; 302 npc = val; 303 304 /* 305 * pass 2 306 * turn branch references to pointers 307 * build back pointers 308 */ 309 for(r = firstr; r != R; r = r->link) { 310 p = r->prog; 311 if(p->to.type == D_BRANCH) { 312 val = p->to.offset - initpc; 313 r1 = firstr; 314 while(r1 != R) { 315 r2 = r1->log5; 316 if(r2 != R && val >= r2->pc) { 317 r1 = r2; 318 continue; 319 } 320 if(r1->pc == val) 321 break; 322 r1 = r1->link; 323 } 324 if(r1 == R) { 325 nearln = p->lineno; 326 diag(Z, "ref not found\n%P", p); 327 continue; 328 } 329 if(r1 == r) { 330 nearln = p->lineno; 331 diag(Z, "ref to self\n%P", p); 332 continue; 333 } 334 r->s2 = r1; 335 r->p2link = r1->p2; 336 r1->p2 = r; 337 } 338 } 339 if(debug['R']) { 340 p = firstr->prog; 341 print("\n%L %D\n", p->lineno, &p->from); 342 } 343 344 /* 345 * pass 2.5 346 * find looping structure 347 */ 348 for(r = firstr; r != R; r = r->link) 349 r->active = 0; 350 change = 0; 351 loopit(firstr, npc); 352 if(debug['R'] && debug['v']) { 353 print("\nlooping structure:\n"); 354 for(r = firstr; r != R; r = r->link) { 355 print("%ld:%P", r->loop, r->prog); 356 for(z=0; z<BITS; z++) 357 bit.b[z] = r->use1.b[z] | 358 r->use2.b[z] | 359 r->set.b[z]; 360 if(bany(&bit)) { 361 print("\t"); 362 if(bany(&r->use1)) 363 print(" u1=%B", r->use1); 364 if(bany(&r->use2)) 365 print(" u2=%B", r->use2); 366 if(bany(&r->set)) 367 print(" st=%B", r->set); 368 } 369 print("\n"); 370 } 371 } 372 373 /* 374 * pass 3 375 * iterate propagating usage 376 * back until flow graph is complete 377 */ 378 loop1: 379 change = 0; 380 for(r = firstr; r != R; r = r->link) 381 r->active = 0; 382 for(r = firstr; r != R; r = r->link) 383 if(r->prog->as == ARET) 384 prop(r, zbits, zbits); 385 loop11: 386 /* pick up unreachable code */ 387 i = 0; 388 for(r = firstr; r != R; r = r1) { 389 r1 = r->link; 390 if(r1 && r1->active && !r->active) { 391 prop(r, zbits, zbits); 392 i = 1; 393 } 394 } 395 if(i) 396 goto loop11; 397 if(change) 398 goto loop1; 399 400 401 /* 402 * pass 4 403 * iterate propagating register/variable synchrony 404 * forward until graph is complete 405 */ 406 loop2: 407 change = 0; 408 for(r = firstr; r != R; r = r->link) 409 r->active = 0; 410 synch(firstr, zbits); 411 if(change) 412 goto loop2; 413 414 415 /* 416 * pass 5 417 * isolate regions 418 * calculate costs (paint1) 419 */ 420 r = firstr; 421 if(r) { 422 for(z=0; z<BITS; z++) 423 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & 424 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); 425 if(bany(&bit)) { 426 nearln = r->prog->lineno; 427 warn(Z, "used and not set: %B", bit); 428 if(debug['R'] && !debug['w']) 429 print("used and not set: %B\n", bit); 430 } 431 } 432 if(debug['R'] && debug['v']) 433 print("\nprop structure:\n"); 434 for(r = firstr; r != R; r = r->link) 435 r->act = zbits; 436 rgp = region; 437 nregion = 0; 438 for(r = firstr; r != R; r = r->link) { 439 if(debug['R'] && debug['v']) { 440 print("%P\t", r->prog); 441 if(bany(&r->set)) 442 print("s:%B ", r->set); 443 if(bany(&r->refahead)) 444 print("ra:%B ", r->refahead); 445 if(bany(&r->calahead)) 446 print("ca:%B ", r->calahead); 447 print("\n"); 448 } 449 for(z=0; z<BITS; z++) 450 bit.b[z] = r->set.b[z] & 451 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); 452 if(bany(&bit)) { 453 nearln = r->prog->lineno; 454 warn(Z, "set and not used: %B", bit); 455 if(debug['R']) 456 print("set and not used: %B\n", bit); 457 excise(r); 458 } 459 for(z=0; z<BITS; z++) 460 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); 461 while(bany(&bit)) { 462 i = bnum(bit); 463 rgp->enter = r; 464 rgp->varno = i; 465 change = 0; 466 if(debug['R'] && debug['v']) 467 print("\n"); 468 paint1(r, i); 469 bit.b[i/32] &= ~(1L<<(i%32)); 470 if(change <= 0) { 471 if(debug['R']) 472 print("%L$%d: %B\n", 473 r->prog->lineno, change, blsh(i)); 474 continue; 475 } 476 rgp->cost = change; 477 nregion++; 478 if(nregion >= NRGN) { 479 warn(Z, "too many regions"); 480 goto brk; 481 } 482 rgp++; 483 } 484 } 485 brk: 486 qsort(region, nregion, sizeof(region[0]), rcmp); 487 488 /* 489 * pass 6 490 * determine used registers (paint2) 491 * replace code (paint3) 492 */ 493 rgp = region; 494 for(i=0; i<nregion; i++) { 495 bit = blsh(rgp->varno); 496 vreg = paint2(rgp->enter, rgp->varno); 497 vreg = allreg(vreg, rgp); 498 if(debug['R']) { 499 print("%L$%d %R: %B\n", 500 rgp->enter->prog->lineno, 501 rgp->cost, 502 rgp->regno, 503 bit); 504 } 505 if(rgp->regno != 0) 506 paint3(rgp->enter, rgp->varno, vreg, rgp->regno); 507 rgp++; 508 } 509 /* 510 * pass 7 511 * peep-hole on basic block 512 */ 513 if(!debug['R'] || debug['P']) 514 peep(); 515 516 /* 517 * pass 8 518 * recalculate pc 519 */ 520 val = initpc; 521 for(r = firstr; r != R; r = r1) { 522 r->pc = val; 523 p = r->prog; 524 p1 = P; 525 r1 = r->link; 526 if(r1 != R) 527 p1 = r1->prog; 528 for(; p != p1; p = p->link) { 529 switch(p->as) { 530 default: 531 val++; 532 break; 533 534 case ANOP: 535 case ADATA: 536 case AGLOBL: 537 case ANAME: 538 case ASIGNAME: 539 break; 540 } 541 } 542 } 543 pc = val; 544 545 /* 546 * fix up branches 547 */ 548 if(debug['R']) 549 if(bany(&addrs)) 550 print("addrs: %B\n", addrs); 551 552 r1 = 0; /* set */ 553 for(r = firstr; r != R; r = r->link) { 554 p = r->prog; 555 if(p->to.type == D_BRANCH) 556 p->to.offset = r->s2->pc; 557 r1 = r; 558 } 559 560 /* 561 * last pass 562 * eliminate nops 563 * free aux structures 564 */ 565 for(p = firstr->prog; p != P; p = p->link){ 566 while(p->link && p->link->as == ANOP) 567 p->link = p->link->link; 568 } 569 if(r1 != R) { 570 r1->link = freer; 571 freer = firstr; 572 } 573 } 574 575 /* 576 * add mov b,rn 577 * just after r 578 */ 579 void 580 addmove(Reg *r, int bn, int rn, int f) 581 { 582 Prog *p, *p1; 583 Adr *a; 584 Var *v; 585 586 p1 = alloc(sizeof(*p1)); 587 *p1 = zprog; 588 p = r->prog; 589 590 p1->link = p->link; 591 p->link = p1; 592 p1->lineno = p->lineno; 593 594 v = var + bn; 595 596 a = &p1->to; 597 a->sym = v->sym; 598 a->offset = v->offset; 599 a->etype = v->etype; 600 a->type = v->name; 601 602 p1->as = AMOVL; 603 if(v->etype == TCHAR || v->etype == TUCHAR) 604 p1->as = AMOVB; 605 if(v->etype == TSHORT || v->etype == TUSHORT) 606 p1->as = AMOVW; 607 608 p1->from.type = rn; 609 if(!f) { 610 p1->from = *a; 611 *a = zprog.from; 612 a->type = rn; 613 if(v->etype == TUCHAR) 614 p1->as = AMOVB; 615 if(v->etype == TUSHORT) 616 p1->as = AMOVW; 617 } 618 if(debug['R']) 619 print("%P\t.a%P\n", p, p1); 620 } 621 622 ulong 623 doregbits(int r) 624 { 625 ulong b; 626 627 b = 0; 628 if(r >= D_INDIR) 629 r -= D_INDIR; 630 if(r >= D_AX && r <= D_DI) 631 b |= RtoB(r); 632 else 633 if(r >= D_AL && r <= D_BL) 634 b |= RtoB(r-D_AL+D_AX); 635 else 636 if(r >= D_AH && r <= D_BH) 637 b |= RtoB(r-D_AH+D_AX); 638 return b; 639 } 640 641 Bits 642 mkvar(Reg *r, Adr *a, int isro) 643 { 644 Var *v; 645 int i, t, n, et, z; 646 long o; 647 Bits bit; 648 Sym *s; 649 650 /* 651 * mark registers used 652 */ 653 t = a->type; 654 r->regu |= doregbits(t); 655 r->regu |= doregbits(a->index); 656 et = a->etype; 657 658 switch(t) { 659 default: 660 goto none; 661 case D_INDIR+D_GS: 662 if(!isro || 1) 663 goto none; 664 n = t; 665 {static Sym er; a->sym = &er;} 666 a->sym->name = "$extreg"; 667 break; 668 case D_ADDR: 669 a->type = a->index; 670 bit = mkvar(r, a, 0); 671 for(z=0; z<BITS; z++) 672 addrs.b[z] |= bit.b[z]; 673 a->type = t; 674 goto none; 675 case D_EXTERN: 676 case D_STATIC: 677 case D_PARAM: 678 case D_AUTO: 679 n = t; 680 break; 681 } 682 s = a->sym; 683 if(s == S) 684 goto none; 685 if(s->name[0] == '.') 686 goto none; 687 o = a->offset; 688 v = var; 689 for(i=0; i<nvar; i++) { 690 if(s == v->sym) 691 if(n == v->name) 692 if(o == v->offset) 693 goto out; 694 v++; 695 } 696 if(nvar >= NVAR) { 697 if(debug['w'] > 1 && s) 698 warn(Z, "variable not optimized: %s", s->name); 699 goto none; 700 } 701 i = nvar; 702 nvar++; 703 v = &var[i]; 704 v->sym = s; 705 v->offset = o; 706 v->name = n; 707 v->etype = et; 708 if(debug['R']) 709 print("bit=%2d et=%2d %D\n", i, et, a); 710 711 out: 712 bit = blsh(i); 713 if(n == D_EXTERN || n == D_STATIC) 714 for(z=0; z<BITS; z++) 715 externs.b[z] |= bit.b[z]; 716 if(n == D_PARAM) 717 for(z=0; z<BITS; z++) 718 params.b[z] |= bit.b[z]; 719 if(v->etype != et || !typechlpfd[et]) /* funny punning */ 720 for(z=0; z<BITS; z++) 721 addrs.b[z] |= bit.b[z]; 722 return bit; 723 724 none: 725 return zbits; 726 } 727 728 void 729 prop(Reg *r, Bits ref, Bits cal) 730 { 731 Reg *r1, *r2; 732 int z; 733 734 for(r1 = r; r1 != R; r1 = r1->p1) { 735 for(z=0; z<BITS; z++) { 736 ref.b[z] |= r1->refahead.b[z]; 737 if(ref.b[z] != r1->refahead.b[z]) { 738 r1->refahead.b[z] = ref.b[z]; 739 change++; 740 } 741 cal.b[z] |= r1->calahead.b[z]; 742 if(cal.b[z] != r1->calahead.b[z]) { 743 r1->calahead.b[z] = cal.b[z]; 744 change++; 745 } 746 } 747 switch(r1->prog->as) { 748 case ACALL: 749 for(z=0; z<BITS; z++) { 750 cal.b[z] |= ref.b[z] | externs.b[z]; 751 ref.b[z] = 0; 752 } 753 break; 754 755 case ATEXT: 756 for(z=0; z<BITS; z++) { 757 cal.b[z] = 0; 758 ref.b[z] = 0; 759 } 760 break; 761 762 case ARET: 763 for(z=0; z<BITS; z++) { 764 cal.b[z] = externs.b[z]; 765 ref.b[z] = 0; 766 } 767 } 768 for(z=0; z<BITS; z++) { 769 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | 770 r1->use1.b[z] | r1->use2.b[z]; 771 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); 772 r1->refbehind.b[z] = ref.b[z]; 773 r1->calbehind.b[z] = cal.b[z]; 774 } 775 if(r1->active) 776 break; 777 r1->active = 1; 778 } 779 for(; r != r1; r = r->p1) 780 for(r2 = r->p2; r2 != R; r2 = r2->p2link) 781 prop(r2, r->refbehind, r->calbehind); 782 } 783 784 /* 785 * find looping structure 786 * 787 * 1) find reverse postordering 788 * 2) find approximate dominators, 789 * the actual dominators if the flow graph is reducible 790 * otherwise, dominators plus some other non-dominators. 791 * See Matthew S. Hecht and Jeffrey D. Ullman, 792 * "Analysis of a Simple Algorithm for Global Data Flow Problems", 793 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts, 794 * Oct. 1-3, 1973, pp. 207-217. 795 * 3) find all nodes with a predecessor dominated by the current node. 796 * such a node is a loop head. 797 * recursively, all preds with a greater rpo number are in the loop 798 */ 799 long 800 postorder(Reg *r, Reg **rpo2r, long n) 801 { 802 Reg *r1; 803 804 r->rpo = 1; 805 r1 = r->s1; 806 if(r1 && !r1->rpo) 807 n = postorder(r1, rpo2r, n); 808 r1 = r->s2; 809 if(r1 && !r1->rpo) 810 n = postorder(r1, rpo2r, n); 811 rpo2r[n] = r; 812 n++; 813 return n; 814 } 815 816 long 817 rpolca(long *idom, long rpo1, long rpo2) 818 { 819 long t; 820 821 if(rpo1 == -1) 822 return rpo2; 823 while(rpo1 != rpo2){ 824 if(rpo1 > rpo2){ 825 t = rpo2; 826 rpo2 = rpo1; 827 rpo1 = t; 828 } 829 while(rpo1 < rpo2){ 830 t = idom[rpo2]; 831 if(t >= rpo2) 832 fatal(Z, "bad idom"); 833 rpo2 = t; 834 } 835 } 836 return rpo1; 837 } 838 839 int 840 doms(long *idom, long r, long s) 841 { 842 while(s > r) 843 s = idom[s]; 844 return s == r; 845 } 846 847 int 848 loophead(long *idom, Reg *r) 849 { 850 long src; 851 852 src = r->rpo; 853 if(r->p1 != R && doms(idom, src, r->p1->rpo)) 854 return 1; 855 for(r = r->p2; r != R; r = r->p2link) 856 if(doms(idom, src, r->rpo)) 857 return 1; 858 return 0; 859 } 860 861 void 862 loopmark(Reg **rpo2r, long head, Reg *r) 863 { 864 if(r->rpo < head || r->active == head) 865 return; 866 r->active = head; 867 r->loop += LOOP; 868 if(r->p1 != R) 869 loopmark(rpo2r, head, r->p1); 870 for(r = r->p2; r != R; r = r->p2link) 871 loopmark(rpo2r, head, r); 872 } 873 874 void 875 loopit(Reg *r, long nr) 876 { 877 Reg *r1; 878 long i, d, me; 879 880 if(nr > maxnr) { 881 rpo2r = alloc(nr * sizeof(Reg*)); 882 idom = alloc(nr * sizeof(long)); 883 maxnr = nr; 884 } 885 886 d = postorder(r, rpo2r, 0); 887 if(d > nr) 888 fatal(Z, "too many reg nodes"); 889 nr = d; 890 for(i = 0; i < nr / 2; i++){ 891 r1 = rpo2r[i]; 892 rpo2r[i] = rpo2r[nr - 1 - i]; 893 rpo2r[nr - 1 - i] = r1; 894 } 895 for(i = 0; i < nr; i++) 896 rpo2r[i]->rpo = i; 897 898 idom[0] = 0; 899 for(i = 0; i < nr; i++){ 900 r1 = rpo2r[i]; 901 me = r1->rpo; 902 d = -1; 903 if(r1->p1 != R && r1->p1->rpo < me) 904 d = r1->p1->rpo; 905 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link) 906 if(r1->rpo < me) 907 d = rpolca(idom, d, r1->rpo); 908 idom[i] = d; 909 } 910 911 for(i = 0; i < nr; i++){ 912 r1 = rpo2r[i]; 913 r1->loop++; 914 if(r1->p2 != R && loophead(idom, r1)) 915 loopmark(rpo2r, i, r1); 916 } 917 } 918 919 void 920 synch(Reg *r, Bits dif) 921 { 922 Reg *r1; 923 int z; 924 925 for(r1 = r; r1 != R; r1 = r1->s1) { 926 for(z=0; z<BITS; z++) { 927 dif.b[z] = (dif.b[z] & 928 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) | 929 r1->set.b[z] | r1->regdiff.b[z]; 930 if(dif.b[z] != r1->regdiff.b[z]) { 931 r1->regdiff.b[z] = dif.b[z]; 932 change++; 933 } 934 } 935 if(r1->active) 936 break; 937 r1->active = 1; 938 for(z=0; z<BITS; z++) 939 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); 940 if(r1->s2 != R) 941 synch(r1->s2, dif); 942 } 943 } 944 945 ulong 946 allreg(ulong b, Rgn *r) 947 { 948 Var *v; 949 int i; 950 951 v = var + r->varno; 952 r->regno = 0; 953 switch(v->etype) { 954 955 default: 956 diag(Z, "unknown etype %d/%d", bitno(b), v->etype); 957 break; 958 959 case TCHAR: 960 case TUCHAR: 961 case TSHORT: 962 case TUSHORT: 963 case TINT: 964 case TUINT: 965 case TLONG: 966 case TULONG: 967 case TIND: 968 case TARRAY: 969 i = BtoR(~b); 970 if(i && r->cost > 0) { 971 r->regno = i; 972 return RtoB(i); 973 } 974 break; 975 976 case TDOUBLE: 977 case TFLOAT: 978 break; 979 } 980 return 0; 981 } 982 983 void 984 paint1(Reg *r, int bn) 985 { 986 Reg *r1; 987 Prog *p; 988 int z; 989 ulong bb; 990 991 z = bn/32; 992 bb = 1L<<(bn%32); 993 if(r->act.b[z] & bb) 994 return; 995 for(;;) { 996 if(!(r->refbehind.b[z] & bb)) 997 break; 998 r1 = r->p1; 999 if(r1 == R) 1000 break; 1001 if(!(r1->refahead.b[z] & bb)) 1002 break; 1003 if(r1->act.b[z] & bb) 1004 break; 1005 r = r1; 1006 } 1007 1008 if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) { 1009 change -= CLOAD * r->loop; 1010 if(debug['R'] && debug['v']) 1011 print("%ld%P\tld %B $%d\n", r->loop, 1012 r->prog, blsh(bn), change); 1013 } 1014 for(;;) { 1015 r->act.b[z] |= bb; 1016 p = r->prog; 1017 1018 if(r->use1.b[z] & bb) { 1019 change += CREF * r->loop; 1020 if(p->as == AFMOVL || p->as == AFMOVW) 1021 if(BtoR(bb) != D_F0) 1022 change = -CINF; 1023 if(debug['R'] && debug['v']) 1024 print("%ld%P\tu1 %B $%d\n", r->loop, 1025 p, blsh(bn), change); 1026 } 1027 1028 if((r->use2.b[z]|r->set.b[z]) & bb) { 1029 change += CREF * r->loop; 1030 if(p->as == AFMOVL || p->as == AFMOVW) 1031 if(BtoR(bb) != D_F0) 1032 change = -CINF; 1033 if(debug['R'] && debug['v']) 1034 print("%ld%P\tu2 %B $%d\n", r->loop, 1035 p, blsh(bn), change); 1036 } 1037 1038 if(STORE(r) & r->regdiff.b[z] & bb) { 1039 change -= CLOAD * r->loop; 1040 if(p->as == AFMOVL || p->as == AFMOVW) 1041 if(BtoR(bb) != D_F0) 1042 change = -CINF; 1043 if(debug['R'] && debug['v']) 1044 print("%ld%P\tst %B $%d\n", r->loop, 1045 p, blsh(bn), change); 1046 } 1047 1048 if(r->refbehind.b[z] & bb) 1049 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1050 if(r1->refahead.b[z] & bb) 1051 paint1(r1, bn); 1052 1053 if(!(r->refahead.b[z] & bb)) 1054 break; 1055 r1 = r->s2; 1056 if(r1 != R) 1057 if(r1->refbehind.b[z] & bb) 1058 paint1(r1, bn); 1059 r = r->s1; 1060 if(r == R) 1061 break; 1062 if(r->act.b[z] & bb) 1063 break; 1064 if(!(r->refbehind.b[z] & bb)) 1065 break; 1066 } 1067 } 1068 1069 ulong 1070 regset(Reg *r, ulong bb) 1071 { 1072 ulong b, set; 1073 Adr v; 1074 int c; 1075 1076 set = 0; 1077 v = zprog.from; 1078 while(b = bb & ~(bb-1)) { 1079 v.type = BtoR(b); 1080 c = copyu(r->prog, &v, A); 1081 if(c == 3) 1082 set |= b; 1083 bb &= ~b; 1084 } 1085 return set; 1086 } 1087 1088 ulong 1089 reguse(Reg *r, ulong bb) 1090 { 1091 ulong b, set; 1092 Adr v; 1093 int c; 1094 1095 set = 0; 1096 v = zprog.from; 1097 while(b = bb & ~(bb-1)) { 1098 v.type = BtoR(b); 1099 c = copyu(r->prog, &v, A); 1100 if(c == 1 || c == 2 || c == 4) 1101 set |= b; 1102 bb &= ~b; 1103 } 1104 return set; 1105 } 1106 1107 ulong 1108 paint2(Reg *r, int bn) 1109 { 1110 Reg *r1; 1111 int z; 1112 ulong bb, vreg, x; 1113 1114 z = bn/32; 1115 bb = 1L << (bn%32); 1116 vreg = regbits; 1117 if(!(r->act.b[z] & bb)) 1118 return vreg; 1119 for(;;) { 1120 if(!(r->refbehind.b[z] & bb)) 1121 break; 1122 r1 = r->p1; 1123 if(r1 == R) 1124 break; 1125 if(!(r1->refahead.b[z] & bb)) 1126 break; 1127 if(!(r1->act.b[z] & bb)) 1128 break; 1129 r = r1; 1130 } 1131 for(;;) { 1132 r->act.b[z] &= ~bb; 1133 1134 vreg |= r->regu; 1135 1136 if(r->refbehind.b[z] & bb) 1137 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1138 if(r1->refahead.b[z] & bb) 1139 vreg |= paint2(r1, bn); 1140 1141 if(!(r->refahead.b[z] & bb)) 1142 break; 1143 r1 = r->s2; 1144 if(r1 != R) 1145 if(r1->refbehind.b[z] & bb) 1146 vreg |= paint2(r1, bn); 1147 r = r->s1; 1148 if(r == R) 1149 break; 1150 if(!(r->act.b[z] & bb)) 1151 break; 1152 if(!(r->refbehind.b[z] & bb)) 1153 break; 1154 } 1155 1156 bb = vreg; 1157 for(; r; r=r->s1) { 1158 x = r->regu & ~bb; 1159 if(x) { 1160 vreg |= reguse(r, x); 1161 bb |= regset(r, x); 1162 } 1163 } 1164 return vreg; 1165 } 1166 1167 void 1168 paint3(Reg *r, int bn, long rb, int rn) 1169 { 1170 Reg *r1; 1171 Prog *p; 1172 int z; 1173 ulong bb; 1174 1175 z = bn/32; 1176 bb = 1L << (bn%32); 1177 if(r->act.b[z] & bb) 1178 return; 1179 for(;;) { 1180 if(!(r->refbehind.b[z] & bb)) 1181 break; 1182 r1 = r->p1; 1183 if(r1 == R) 1184 break; 1185 if(!(r1->refahead.b[z] & bb)) 1186 break; 1187 if(r1->act.b[z] & bb) 1188 break; 1189 r = r1; 1190 } 1191 1192 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) 1193 addmove(r, bn, rn, 0); 1194 for(;;) { 1195 r->act.b[z] |= bb; 1196 p = r->prog; 1197 1198 if(r->use1.b[z] & bb) { 1199 if(debug['R']) 1200 print("%P", p); 1201 addreg(&p->from, rn); 1202 if(debug['R']) 1203 print("\t.c%P\n", p); 1204 } 1205 if((r->use2.b[z]|r->set.b[z]) & bb) { 1206 if(debug['R']) 1207 print("%P", p); 1208 addreg(&p->to, rn); 1209 if(debug['R']) 1210 print("\t.c%P\n", p); 1211 } 1212 1213 if(STORE(r) & r->regdiff.b[z] & bb) 1214 addmove(r, bn, rn, 1); 1215 r->regu |= rb; 1216 1217 if(r->refbehind.b[z] & bb) 1218 for(r1 = r->p2; r1 != R; r1 = r1->p2link) 1219 if(r1->refahead.b[z] & bb) 1220 paint3(r1, bn, rb, rn); 1221 1222 if(!(r->refahead.b[z] & bb)) 1223 break; 1224 r1 = r->s2; 1225 if(r1 != R) 1226 if(r1->refbehind.b[z] & bb) 1227 paint3(r1, bn, rb, rn); 1228 r = r->s1; 1229 if(r == R) 1230 break; 1231 if(r->act.b[z] & bb) 1232 break; 1233 if(!(r->refbehind.b[z] & bb)) 1234 break; 1235 } 1236 } 1237 1238 void 1239 addreg(Adr *a, int rn) 1240 { 1241 1242 a->sym = 0; 1243 a->offset = 0; 1244 a->type = rn; 1245 } 1246 1247 long 1248 RtoB(int r) 1249 { 1250 1251 if(r < D_AX || r > D_DI) 1252 return 0; 1253 return 1L << (r-D_AX); 1254 } 1255 1256 int 1257 BtoR(long b) 1258 { 1259 1260 b &= 0xffL; 1261 if(b == 0) 1262 return 0; 1263 return bitno(b) + D_AX; 1264 } 1265