1 #include "gc.h"
2
3 Reg*
rega(void)4 rega(void)
5 {
6 Reg *r;
7
8 r = freer;
9 if(r == R) {
10 r = alloc(sizeof(*r));
11 } else
12 freer = r->link;
13
14 *r = zreg;
15 return r;
16 }
17
18 int
rcmp(const void * a1,const void * a2)19 rcmp(const void *a1, const void *a2)
20 {
21 Rgn *p1, *p2;
22 int c1, c2;
23
24 p1 = (Rgn*)a1;
25 p2 = (Rgn*)a2;
26 c1 = p2->cost;
27 c2 = p1->cost;
28 if(c1 -= c2)
29 return c1;
30 return p2->varno - p1->varno;
31 }
32
33 void
regopt(Prog * p)34 regopt(Prog *p)
35 {
36 Reg *r, *r1, *r2;
37 Prog *p1;
38 int i, z;
39 long initpc, val, npc;
40 ulong vreg;
41 Bits bit;
42 struct
43 {
44 long m;
45 long c;
46 Reg* p;
47 } log5[6], *lp;
48
49 firstr = R;
50 lastr = R;
51 nvar = 0;
52 regbits = RtoB(D_SP) | RtoB(D_AX);
53 for(z=0; z<BITS; z++) {
54 externs.b[z] = 0;
55 params.b[z] = 0;
56 consts.b[z] = 0;
57 addrs.b[z] = 0;
58 }
59
60 /*
61 * pass 1
62 * build aux data structure
63 * allocate pcs
64 * find use and set of variables
65 */
66 val = 5L * 5L * 5L * 5L * 5L;
67 lp = log5;
68 for(i=0; i<5; i++) {
69 lp->m = val;
70 lp->c = 0;
71 lp->p = R;
72 val /= 5L;
73 lp++;
74 }
75 val = 0;
76 for(; p != P; p = p->link) {
77 switch(p->as) {
78 case ADATA:
79 case AGLOBL:
80 case ANAME:
81 case ASIGNAME:
82 continue;
83 }
84 r = rega();
85 if(firstr == R) {
86 firstr = r;
87 lastr = r;
88 } else {
89 lastr->link = r;
90 r->p1 = lastr;
91 lastr->s1 = r;
92 lastr = r;
93 }
94 r->prog = p;
95 r->pc = val;
96 val++;
97
98 lp = log5;
99 for(i=0; i<5; i++) {
100 lp->c--;
101 if(lp->c <= 0) {
102 lp->c = lp->m;
103 if(lp->p != R)
104 lp->p->log5 = r;
105 lp->p = r;
106 (lp+1)->c = 0;
107 break;
108 }
109 lp++;
110 }
111
112 r1 = r->p1;
113 if(r1 != R)
114 switch(r1->prog->as) {
115 case ARET:
116 case AJMP:
117 case AIRETL:
118 r->p1 = R;
119 r1->s1 = R;
120 }
121
122 bit = mkvar(r, &p->from, p->as==AMOVL);
123 if(bany(&bit))
124 switch(p->as) {
125 /*
126 * funny
127 */
128 case ALEAL:
129 for(z=0; z<BITS; z++)
130 addrs.b[z] |= bit.b[z];
131 break;
132
133 /*
134 * left side read
135 */
136 default:
137 for(z=0; z<BITS; z++)
138 r->use1.b[z] |= bit.b[z];
139 break;
140 }
141
142 bit = mkvar(r, &p->to, 0);
143 if(bany(&bit))
144 switch(p->as) {
145 default:
146 diag(Z, "reg: unknown op: %A", p->as);
147 break;
148
149 /*
150 * right side read
151 */
152 case ACMPB:
153 case ACMPL:
154 case ACMPW:
155 for(z=0; z<BITS; z++)
156 r->use2.b[z] |= bit.b[z];
157 break;
158
159 /*
160 * right side write
161 */
162 case ANOP:
163 case AMOVL:
164 case AMOVB:
165 case AMOVW:
166 case AMOVBLSX:
167 case AMOVBLZX:
168 case AMOVWLSX:
169 case AMOVWLZX:
170 for(z=0; z<BITS; z++)
171 r->set.b[z] |= bit.b[z];
172 break;
173
174 /*
175 * right side read+write
176 */
177 case AADDB:
178 case AADDL:
179 case AADDW:
180 case AANDB:
181 case AANDL:
182 case AANDW:
183 case ASUBB:
184 case ASUBL:
185 case ASUBW:
186 case AORB:
187 case AORL:
188 case AORW:
189 case AXORB:
190 case AXORL:
191 case AXORW:
192 case ASALB:
193 case ASALL:
194 case ASALW:
195 case ASARB:
196 case ASARL:
197 case ASARW:
198 case AROLB:
199 case AROLL:
200 case AROLW:
201 case ARORB:
202 case ARORL:
203 case ARORW:
204 case ASHLB:
205 case ASHLL:
206 case ASHLW:
207 case ASHRB:
208 case ASHRL:
209 case ASHRW:
210 case AIMULL:
211 case AIMULW:
212 case ANEGL:
213 case ANOTL:
214 case AADCL:
215 case ASBBL:
216 for(z=0; z<BITS; z++) {
217 r->set.b[z] |= bit.b[z];
218 r->use2.b[z] |= bit.b[z];
219 }
220 break;
221
222 /*
223 * funny
224 */
225 case AFMOVDP:
226 case AFMOVFP:
227 case AFMOVLP:
228 case AFMOVVP:
229 case AFMOVWP:
230 case ACALL:
231 for(z=0; z<BITS; z++)
232 addrs.b[z] |= bit.b[z];
233 break;
234 }
235
236 switch(p->as) {
237 case AIMULL:
238 case AIMULW:
239 if(p->to.type != D_NONE)
240 break;
241
242 case AIDIVB:
243 case AIDIVL:
244 case AIDIVW:
245 case AIMULB:
246 case ADIVB:
247 case ADIVL:
248 case ADIVW:
249 case AMULB:
250 case AMULL:
251 case AMULW:
252
253 case ACWD:
254 case ACDQ:
255 r->regu |= RtoB(D_AX) | RtoB(D_DX);
256 break;
257
258 case AREP:
259 case AREPN:
260 case ALOOP:
261 case ALOOPEQ:
262 case ALOOPNE:
263 r->regu |= RtoB(D_CX);
264 break;
265
266 case AMOVSB:
267 case AMOVSL:
268 case AMOVSW:
269 case ACMPSB:
270 case ACMPSL:
271 case ACMPSW:
272 r->regu |= RtoB(D_SI) | RtoB(D_DI);
273 break;
274
275 case ASTOSB:
276 case ASTOSL:
277 case ASTOSW:
278 case ASCASB:
279 case ASCASL:
280 case ASCASW:
281 r->regu |= RtoB(D_AX) | RtoB(D_DI);
282 break;
283
284 case AINSB:
285 case AINSL:
286 case AINSW:
287 case AOUTSB:
288 case AOUTSL:
289 case AOUTSW:
290 r->regu |= RtoB(D_DI) | RtoB(D_DX);
291 break;
292
293 case AFSTSW:
294 case ASAHF:
295 r->regu |= RtoB(D_AX);
296 break;
297 }
298 }
299 if(firstr == R)
300 return;
301 initpc = pc - val;
302 npc = val;
303
304 /*
305 * pass 2
306 * turn branch references to pointers
307 * build back pointers
308 */
309 for(r = firstr; r != R; r = r->link) {
310 p = r->prog;
311 if(p->to.type == D_BRANCH) {
312 val = p->to.offset - initpc;
313 r1 = firstr;
314 while(r1 != R) {
315 r2 = r1->log5;
316 if(r2 != R && val >= r2->pc) {
317 r1 = r2;
318 continue;
319 }
320 if(r1->pc == val)
321 break;
322 r1 = r1->link;
323 }
324 if(r1 == R) {
325 nearln = p->lineno;
326 diag(Z, "ref not found\n%P", p);
327 continue;
328 }
329 if(r1 == r) {
330 nearln = p->lineno;
331 diag(Z, "ref to self\n%P", p);
332 continue;
333 }
334 r->s2 = r1;
335 r->p2link = r1->p2;
336 r1->p2 = r;
337 }
338 }
339 if(debug['R']) {
340 p = firstr->prog;
341 print("\n%L %D\n", p->lineno, &p->from);
342 }
343
344 /*
345 * pass 2.5
346 * find looping structure
347 */
348 for(r = firstr; r != R; r = r->link)
349 r->active = 0;
350 change = 0;
351 loopit(firstr, npc);
352 if(debug['R'] && debug['v']) {
353 print("\nlooping structure:\n");
354 for(r = firstr; r != R; r = r->link) {
355 print("%ld:%P", r->loop, r->prog);
356 for(z=0; z<BITS; z++)
357 bit.b[z] = r->use1.b[z] |
358 r->use2.b[z] |
359 r->set.b[z];
360 if(bany(&bit)) {
361 print("\t");
362 if(bany(&r->use1))
363 print(" u1=%B", r->use1);
364 if(bany(&r->use2))
365 print(" u2=%B", r->use2);
366 if(bany(&r->set))
367 print(" st=%B", r->set);
368 }
369 print("\n");
370 }
371 }
372
373 /*
374 * pass 3
375 * iterate propagating usage
376 * back until flow graph is complete
377 */
378 loop1:
379 change = 0;
380 for(r = firstr; r != R; r = r->link)
381 r->active = 0;
382 for(r = firstr; r != R; r = r->link)
383 if(r->prog->as == ARET)
384 prop(r, zbits, zbits);
385 loop11:
386 /* pick up unreachable code */
387 i = 0;
388 for(r = firstr; r != R; r = r1) {
389 r1 = r->link;
390 if(r1 && r1->active && !r->active) {
391 prop(r, zbits, zbits);
392 i = 1;
393 }
394 }
395 if(i)
396 goto loop11;
397 if(change)
398 goto loop1;
399
400
401 /*
402 * pass 4
403 * iterate propagating register/variable synchrony
404 * forward until graph is complete
405 */
406 loop2:
407 change = 0;
408 for(r = firstr; r != R; r = r->link)
409 r->active = 0;
410 synch(firstr, zbits);
411 if(change)
412 goto loop2;
413
414
415 /*
416 * pass 5
417 * isolate regions
418 * calculate costs (paint1)
419 */
420 r = firstr;
421 if(r) {
422 for(z=0; z<BITS; z++)
423 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
424 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
425 if(bany(&bit)) {
426 nearln = r->prog->lineno;
427 warn(Z, "used and not set: %B", bit);
428 if(debug['R'] && !debug['w'])
429 print("used and not set: %B\n", bit);
430 }
431 }
432 if(debug['R'] && debug['v'])
433 print("\nprop structure:\n");
434 for(r = firstr; r != R; r = r->link)
435 r->act = zbits;
436 rgp = region;
437 nregion = 0;
438 for(r = firstr; r != R; r = r->link) {
439 if(debug['R'] && debug['v']) {
440 print("%P\t", r->prog);
441 if(bany(&r->set))
442 print("s:%B ", r->set);
443 if(bany(&r->refahead))
444 print("ra:%B ", r->refahead);
445 if(bany(&r->calahead))
446 print("ca:%B ", r->calahead);
447 print("\n");
448 }
449 for(z=0; z<BITS; z++)
450 bit.b[z] = r->set.b[z] &
451 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
452 if(bany(&bit)) {
453 nearln = r->prog->lineno;
454 warn(Z, "set and not used: %B", bit);
455 if(debug['R'])
456 print("set and not used: %B\n", bit);
457 excise(r);
458 }
459 for(z=0; z<BITS; z++)
460 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
461 while(bany(&bit)) {
462 i = bnum(bit);
463 rgp->enter = r;
464 rgp->varno = i;
465 change = 0;
466 if(debug['R'] && debug['v'])
467 print("\n");
468 paint1(r, i);
469 bit.b[i/32] &= ~(1L<<(i%32));
470 if(change <= 0) {
471 if(debug['R'])
472 print("%L$%d: %B\n",
473 r->prog->lineno, change, blsh(i));
474 continue;
475 }
476 rgp->cost = change;
477 nregion++;
478 if(nregion >= NRGN) {
479 warn(Z, "too many regions");
480 goto brk;
481 }
482 rgp++;
483 }
484 }
485 brk:
486 qsort(region, nregion, sizeof(region[0]), rcmp);
487
488 /*
489 * pass 6
490 * determine used registers (paint2)
491 * replace code (paint3)
492 */
493 rgp = region;
494 for(i=0; i<nregion; i++) {
495 bit = blsh(rgp->varno);
496 vreg = paint2(rgp->enter, rgp->varno);
497 vreg = allreg(vreg, rgp);
498 if(debug['R']) {
499 print("%L$%d %R: %B\n",
500 rgp->enter->prog->lineno,
501 rgp->cost,
502 rgp->regno,
503 bit);
504 }
505 if(rgp->regno != 0)
506 paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
507 rgp++;
508 }
509 /*
510 * pass 7
511 * peep-hole on basic block
512 */
513 if(!debug['R'] || debug['P'])
514 peep();
515
516 /*
517 * pass 8
518 * recalculate pc
519 */
520 val = initpc;
521 for(r = firstr; r != R; r = r1) {
522 r->pc = val;
523 p = r->prog;
524 p1 = P;
525 r1 = r->link;
526 if(r1 != R)
527 p1 = r1->prog;
528 for(; p != p1; p = p->link) {
529 switch(p->as) {
530 default:
531 val++;
532 break;
533
534 case ANOP:
535 case ADATA:
536 case AGLOBL:
537 case ANAME:
538 case ASIGNAME:
539 break;
540 }
541 }
542 }
543 pc = val;
544
545 /*
546 * fix up branches
547 */
548 if(debug['R'])
549 if(bany(&addrs))
550 print("addrs: %B\n", addrs);
551
552 r1 = 0; /* set */
553 for(r = firstr; r != R; r = r->link) {
554 p = r->prog;
555 if(p->to.type == D_BRANCH)
556 p->to.offset = r->s2->pc;
557 r1 = r;
558 }
559
560 /*
561 * last pass
562 * eliminate nops
563 * free aux structures
564 */
565 for(p = firstr->prog; p != P; p = p->link){
566 while(p->link && p->link->as == ANOP)
567 p->link = p->link->link;
568 }
569 if(r1 != R) {
570 r1->link = freer;
571 freer = firstr;
572 }
573 }
574
575 /*
576 * add mov b,rn
577 * just after r
578 */
579 void
addmove(Reg * r,int bn,int rn,int f)580 addmove(Reg *r, int bn, int rn, int f)
581 {
582 Prog *p, *p1;
583 Adr *a;
584 Var *v;
585
586 p1 = alloc(sizeof(*p1));
587 *p1 = zprog;
588 p = r->prog;
589
590 p1->link = p->link;
591 p->link = p1;
592 p1->lineno = p->lineno;
593
594 v = var + bn;
595
596 a = &p1->to;
597 a->sym = v->sym;
598 a->offset = v->offset;
599 a->etype = v->etype;
600 a->type = v->name;
601
602 p1->as = AMOVL;
603 if(v->etype == TCHAR || v->etype == TUCHAR)
604 p1->as = AMOVB;
605 if(v->etype == TSHORT || v->etype == TUSHORT)
606 p1->as = AMOVW;
607
608 p1->from.type = rn;
609 if(!f) {
610 p1->from = *a;
611 *a = zprog.from;
612 a->type = rn;
613 if(v->etype == TUCHAR)
614 p1->as = AMOVB;
615 if(v->etype == TUSHORT)
616 p1->as = AMOVW;
617 }
618 if(debug['R'])
619 print("%P\t.a%P\n", p, p1);
620 }
621
622 ulong
doregbits(int r)623 doregbits(int r)
624 {
625 ulong b;
626
627 b = 0;
628 if(r >= D_INDIR)
629 r -= D_INDIR;
630 if(r >= D_AX && r <= D_DI)
631 b |= RtoB(r);
632 else
633 if(r >= D_AL && r <= D_BL)
634 b |= RtoB(r-D_AL+D_AX);
635 else
636 if(r >= D_AH && r <= D_BH)
637 b |= RtoB(r-D_AH+D_AX);
638 return b;
639 }
640
641 Bits
mkvar(Reg * r,Adr * a,int isro)642 mkvar(Reg *r, Adr *a, int isro)
643 {
644 Var *v;
645 int i, t, n, et, z;
646 long o;
647 Bits bit;
648 Sym *s;
649
650 /*
651 * mark registers used
652 */
653 t = a->type;
654 r->regu |= doregbits(t);
655 r->regu |= doregbits(a->index);
656 et = a->etype;
657
658 switch(t) {
659 default:
660 goto none;
661 case D_INDIR+D_GS:
662 if(!isro || 1)
663 goto none;
664 n = t;
665 {static Sym er; a->sym = &er;}
666 a->sym->name = "$extreg";
667 break;
668 case D_ADDR:
669 a->type = a->index;
670 bit = mkvar(r, a, 0);
671 for(z=0; z<BITS; z++)
672 addrs.b[z] |= bit.b[z];
673 a->type = t;
674 goto none;
675 case D_EXTERN:
676 case D_STATIC:
677 case D_PARAM:
678 case D_AUTO:
679 n = t;
680 break;
681 }
682 s = a->sym;
683 if(s == S)
684 goto none;
685 if(s->name[0] == '.')
686 goto none;
687 o = a->offset;
688 v = var;
689 for(i=0; i<nvar; i++) {
690 if(s == v->sym)
691 if(n == v->name)
692 if(o == v->offset)
693 goto out;
694 v++;
695 }
696 if(nvar >= NVAR) {
697 if(debug['w'] > 1 && s)
698 warn(Z, "variable not optimized: %s", s->name);
699 goto none;
700 }
701 i = nvar;
702 nvar++;
703 v = &var[i];
704 v->sym = s;
705 v->offset = o;
706 v->name = n;
707 v->etype = et;
708 if(debug['R'])
709 print("bit=%2d et=%2d %D\n", i, et, a);
710
711 out:
712 bit = blsh(i);
713 if(n == D_EXTERN || n == D_STATIC)
714 for(z=0; z<BITS; z++)
715 externs.b[z] |= bit.b[z];
716 if(n == D_PARAM)
717 for(z=0; z<BITS; z++)
718 params.b[z] |= bit.b[z];
719 if(v->etype != et || !typechlpfd[et]) /* funny punning */
720 for(z=0; z<BITS; z++)
721 addrs.b[z] |= bit.b[z];
722 return bit;
723
724 none:
725 return zbits;
726 }
727
728 void
prop(Reg * r,Bits ref,Bits cal)729 prop(Reg *r, Bits ref, Bits cal)
730 {
731 Reg *r1, *r2;
732 int z;
733
734 for(r1 = r; r1 != R; r1 = r1->p1) {
735 for(z=0; z<BITS; z++) {
736 ref.b[z] |= r1->refahead.b[z];
737 if(ref.b[z] != r1->refahead.b[z]) {
738 r1->refahead.b[z] = ref.b[z];
739 change++;
740 }
741 cal.b[z] |= r1->calahead.b[z];
742 if(cal.b[z] != r1->calahead.b[z]) {
743 r1->calahead.b[z] = cal.b[z];
744 change++;
745 }
746 }
747 switch(r1->prog->as) {
748 case ACALL:
749 for(z=0; z<BITS; z++) {
750 cal.b[z] |= ref.b[z] | externs.b[z];
751 ref.b[z] = 0;
752 }
753 break;
754
755 case ATEXT:
756 for(z=0; z<BITS; z++) {
757 cal.b[z] = 0;
758 ref.b[z] = 0;
759 }
760 break;
761
762 case ARET:
763 for(z=0; z<BITS; z++) {
764 cal.b[z] = externs.b[z];
765 ref.b[z] = 0;
766 }
767 }
768 for(z=0; z<BITS; z++) {
769 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
770 r1->use1.b[z] | r1->use2.b[z];
771 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
772 r1->refbehind.b[z] = ref.b[z];
773 r1->calbehind.b[z] = cal.b[z];
774 }
775 if(r1->active)
776 break;
777 r1->active = 1;
778 }
779 for(; r != r1; r = r->p1)
780 for(r2 = r->p2; r2 != R; r2 = r2->p2link)
781 prop(r2, r->refbehind, r->calbehind);
782 }
783
784 /*
785 * find looping structure
786 *
787 * 1) find reverse postordering
788 * 2) find approximate dominators,
789 * the actual dominators if the flow graph is reducible
790 * otherwise, dominators plus some other non-dominators.
791 * See Matthew S. Hecht and Jeffrey D. Ullman,
792 * "Analysis of a Simple Algorithm for Global Data Flow Problems",
793 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
794 * Oct. 1-3, 1973, pp. 207-217.
795 * 3) find all nodes with a predecessor dominated by the current node.
796 * such a node is a loop head.
797 * recursively, all preds with a greater rpo number are in the loop
798 */
799 long
postorder(Reg * r,Reg ** rpo2r,long n)800 postorder(Reg *r, Reg **rpo2r, long n)
801 {
802 Reg *r1;
803
804 r->rpo = 1;
805 r1 = r->s1;
806 if(r1 && !r1->rpo)
807 n = postorder(r1, rpo2r, n);
808 r1 = r->s2;
809 if(r1 && !r1->rpo)
810 n = postorder(r1, rpo2r, n);
811 rpo2r[n] = r;
812 n++;
813 return n;
814 }
815
816 long
rpolca(long * idom,long rpo1,long rpo2)817 rpolca(long *idom, long rpo1, long rpo2)
818 {
819 long t;
820
821 if(rpo1 == -1)
822 return rpo2;
823 while(rpo1 != rpo2){
824 if(rpo1 > rpo2){
825 t = rpo2;
826 rpo2 = rpo1;
827 rpo1 = t;
828 }
829 while(rpo1 < rpo2){
830 t = idom[rpo2];
831 if(t >= rpo2)
832 fatal(Z, "bad idom");
833 rpo2 = t;
834 }
835 }
836 return rpo1;
837 }
838
839 int
doms(long * idom,long r,long s)840 doms(long *idom, long r, long s)
841 {
842 while(s > r)
843 s = idom[s];
844 return s == r;
845 }
846
847 int
loophead(long * idom,Reg * r)848 loophead(long *idom, Reg *r)
849 {
850 long src;
851
852 src = r->rpo;
853 if(r->p1 != R && doms(idom, src, r->p1->rpo))
854 return 1;
855 for(r = r->p2; r != R; r = r->p2link)
856 if(doms(idom, src, r->rpo))
857 return 1;
858 return 0;
859 }
860
861 void
loopmark(Reg ** rpo2r,long head,Reg * r)862 loopmark(Reg **rpo2r, long head, Reg *r)
863 {
864 if(r->rpo < head || r->active == head)
865 return;
866 r->active = head;
867 r->loop += LOOP;
868 if(r->p1 != R)
869 loopmark(rpo2r, head, r->p1);
870 for(r = r->p2; r != R; r = r->p2link)
871 loopmark(rpo2r, head, r);
872 }
873
874 void
loopit(Reg * r,long nr)875 loopit(Reg *r, long nr)
876 {
877 Reg *r1;
878 long i, d, me;
879
880 if(nr > maxnr) {
881 rpo2r = alloc(nr * sizeof(Reg*));
882 idom = alloc(nr * sizeof(long));
883 maxnr = nr;
884 }
885
886 d = postorder(r, rpo2r, 0);
887 if(d > nr)
888 fatal(Z, "too many reg nodes");
889 nr = d;
890 for(i = 0; i < nr / 2; i++){
891 r1 = rpo2r[i];
892 rpo2r[i] = rpo2r[nr - 1 - i];
893 rpo2r[nr - 1 - i] = r1;
894 }
895 for(i = 0; i < nr; i++)
896 rpo2r[i]->rpo = i;
897
898 idom[0] = 0;
899 for(i = 0; i < nr; i++){
900 r1 = rpo2r[i];
901 me = r1->rpo;
902 d = -1;
903 if(r1->p1 != R && r1->p1->rpo < me)
904 d = r1->p1->rpo;
905 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
906 if(r1->rpo < me)
907 d = rpolca(idom, d, r1->rpo);
908 idom[i] = d;
909 }
910
911 for(i = 0; i < nr; i++){
912 r1 = rpo2r[i];
913 r1->loop++;
914 if(r1->p2 != R && loophead(idom, r1))
915 loopmark(rpo2r, i, r1);
916 }
917 }
918
919 void
synch(Reg * r,Bits dif)920 synch(Reg *r, Bits dif)
921 {
922 Reg *r1;
923 int z;
924
925 for(r1 = r; r1 != R; r1 = r1->s1) {
926 for(z=0; z<BITS; z++) {
927 dif.b[z] = (dif.b[z] &
928 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
929 r1->set.b[z] | r1->regdiff.b[z];
930 if(dif.b[z] != r1->regdiff.b[z]) {
931 r1->regdiff.b[z] = dif.b[z];
932 change++;
933 }
934 }
935 if(r1->active)
936 break;
937 r1->active = 1;
938 for(z=0; z<BITS; z++)
939 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
940 if(r1->s2 != R)
941 synch(r1->s2, dif);
942 }
943 }
944
945 ulong
allreg(ulong b,Rgn * r)946 allreg(ulong b, Rgn *r)
947 {
948 Var *v;
949 int i;
950
951 v = var + r->varno;
952 r->regno = 0;
953 switch(v->etype) {
954
955 default:
956 diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
957 break;
958
959 case TCHAR:
960 case TUCHAR:
961 case TSHORT:
962 case TUSHORT:
963 case TINT:
964 case TUINT:
965 case TLONG:
966 case TULONG:
967 case TIND:
968 case TARRAY:
969 i = BtoR(~b);
970 if(i && r->cost > 0) {
971 r->regno = i;
972 return RtoB(i);
973 }
974 break;
975
976 case TDOUBLE:
977 case TFLOAT:
978 break;
979 }
980 return 0;
981 }
982
983 void
paint1(Reg * r,int bn)984 paint1(Reg *r, int bn)
985 {
986 Reg *r1;
987 Prog *p;
988 int z;
989 ulong bb;
990
991 z = bn/32;
992 bb = 1L<<(bn%32);
993 if(r->act.b[z] & bb)
994 return;
995 for(;;) {
996 if(!(r->refbehind.b[z] & bb))
997 break;
998 r1 = r->p1;
999 if(r1 == R)
1000 break;
1001 if(!(r1->refahead.b[z] & bb))
1002 break;
1003 if(r1->act.b[z] & bb)
1004 break;
1005 r = r1;
1006 }
1007
1008 if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
1009 change -= CLOAD * r->loop;
1010 if(debug['R'] && debug['v'])
1011 print("%ld%P\tld %B $%d\n", r->loop,
1012 r->prog, blsh(bn), change);
1013 }
1014 for(;;) {
1015 r->act.b[z] |= bb;
1016 p = r->prog;
1017
1018 if(r->use1.b[z] & bb) {
1019 change += CREF * r->loop;
1020 if(p->as == AFMOVL || p->as == AFMOVW)
1021 if(BtoR(bb) != D_F0)
1022 change = -CINF;
1023 if(debug['R'] && debug['v'])
1024 print("%ld%P\tu1 %B $%d\n", r->loop,
1025 p, blsh(bn), change);
1026 }
1027
1028 if((r->use2.b[z]|r->set.b[z]) & bb) {
1029 change += CREF * r->loop;
1030 if(p->as == AFMOVL || p->as == AFMOVW)
1031 if(BtoR(bb) != D_F0)
1032 change = -CINF;
1033 if(debug['R'] && debug['v'])
1034 print("%ld%P\tu2 %B $%d\n", r->loop,
1035 p, blsh(bn), change);
1036 }
1037
1038 if(STORE(r) & r->regdiff.b[z] & bb) {
1039 change -= CLOAD * r->loop;
1040 if(p->as == AFMOVL || p->as == AFMOVW)
1041 if(BtoR(bb) != D_F0)
1042 change = -CINF;
1043 if(debug['R'] && debug['v'])
1044 print("%ld%P\tst %B $%d\n", r->loop,
1045 p, blsh(bn), change);
1046 }
1047
1048 if(r->refbehind.b[z] & bb)
1049 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1050 if(r1->refahead.b[z] & bb)
1051 paint1(r1, bn);
1052
1053 if(!(r->refahead.b[z] & bb))
1054 break;
1055 r1 = r->s2;
1056 if(r1 != R)
1057 if(r1->refbehind.b[z] & bb)
1058 paint1(r1, bn);
1059 r = r->s1;
1060 if(r == R)
1061 break;
1062 if(r->act.b[z] & bb)
1063 break;
1064 if(!(r->refbehind.b[z] & bb))
1065 break;
1066 }
1067 }
1068
1069 ulong
regset(Reg * r,ulong bb)1070 regset(Reg *r, ulong bb)
1071 {
1072 ulong b, set;
1073 Adr v;
1074 int c;
1075
1076 set = 0;
1077 v = zprog.from;
1078 while(b = bb & ~(bb-1)) {
1079 v.type = BtoR(b);
1080 c = copyu(r->prog, &v, A);
1081 if(c == 3)
1082 set |= b;
1083 bb &= ~b;
1084 }
1085 return set;
1086 }
1087
1088 ulong
reguse(Reg * r,ulong bb)1089 reguse(Reg *r, ulong bb)
1090 {
1091 ulong b, set;
1092 Adr v;
1093 int c;
1094
1095 set = 0;
1096 v = zprog.from;
1097 while(b = bb & ~(bb-1)) {
1098 v.type = BtoR(b);
1099 c = copyu(r->prog, &v, A);
1100 if(c == 1 || c == 2 || c == 4)
1101 set |= b;
1102 bb &= ~b;
1103 }
1104 return set;
1105 }
1106
1107 ulong
paint2(Reg * r,int bn)1108 paint2(Reg *r, int bn)
1109 {
1110 Reg *r1;
1111 int z;
1112 ulong bb, vreg, x;
1113
1114 z = bn/32;
1115 bb = 1L << (bn%32);
1116 vreg = regbits;
1117 if(!(r->act.b[z] & bb))
1118 return vreg;
1119 for(;;) {
1120 if(!(r->refbehind.b[z] & bb))
1121 break;
1122 r1 = r->p1;
1123 if(r1 == R)
1124 break;
1125 if(!(r1->refahead.b[z] & bb))
1126 break;
1127 if(!(r1->act.b[z] & bb))
1128 break;
1129 r = r1;
1130 }
1131 for(;;) {
1132 r->act.b[z] &= ~bb;
1133
1134 vreg |= r->regu;
1135
1136 if(r->refbehind.b[z] & bb)
1137 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1138 if(r1->refahead.b[z] & bb)
1139 vreg |= paint2(r1, bn);
1140
1141 if(!(r->refahead.b[z] & bb))
1142 break;
1143 r1 = r->s2;
1144 if(r1 != R)
1145 if(r1->refbehind.b[z] & bb)
1146 vreg |= paint2(r1, bn);
1147 r = r->s1;
1148 if(r == R)
1149 break;
1150 if(!(r->act.b[z] & bb))
1151 break;
1152 if(!(r->refbehind.b[z] & bb))
1153 break;
1154 }
1155
1156 bb = vreg;
1157 for(; r; r=r->s1) {
1158 x = r->regu & ~bb;
1159 if(x) {
1160 vreg |= reguse(r, x);
1161 bb |= regset(r, x);
1162 }
1163 }
1164 return vreg;
1165 }
1166
1167 void
paint3(Reg * r,int bn,long rb,int rn)1168 paint3(Reg *r, int bn, long rb, int rn)
1169 {
1170 Reg *r1;
1171 Prog *p;
1172 int z;
1173 ulong bb;
1174
1175 z = bn/32;
1176 bb = 1L << (bn%32);
1177 if(r->act.b[z] & bb)
1178 return;
1179 for(;;) {
1180 if(!(r->refbehind.b[z] & bb))
1181 break;
1182 r1 = r->p1;
1183 if(r1 == R)
1184 break;
1185 if(!(r1->refahead.b[z] & bb))
1186 break;
1187 if(r1->act.b[z] & bb)
1188 break;
1189 r = r1;
1190 }
1191
1192 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1193 addmove(r, bn, rn, 0);
1194 for(;;) {
1195 r->act.b[z] |= bb;
1196 p = r->prog;
1197
1198 if(r->use1.b[z] & bb) {
1199 if(debug['R'])
1200 print("%P", p);
1201 addreg(&p->from, rn);
1202 if(debug['R'])
1203 print("\t.c%P\n", p);
1204 }
1205 if((r->use2.b[z]|r->set.b[z]) & bb) {
1206 if(debug['R'])
1207 print("%P", p);
1208 addreg(&p->to, rn);
1209 if(debug['R'])
1210 print("\t.c%P\n", p);
1211 }
1212
1213 if(STORE(r) & r->regdiff.b[z] & bb)
1214 addmove(r, bn, rn, 1);
1215 r->regu |= rb;
1216
1217 if(r->refbehind.b[z] & bb)
1218 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1219 if(r1->refahead.b[z] & bb)
1220 paint3(r1, bn, rb, rn);
1221
1222 if(!(r->refahead.b[z] & bb))
1223 break;
1224 r1 = r->s2;
1225 if(r1 != R)
1226 if(r1->refbehind.b[z] & bb)
1227 paint3(r1, bn, rb, rn);
1228 r = r->s1;
1229 if(r == R)
1230 break;
1231 if(r->act.b[z] & bb)
1232 break;
1233 if(!(r->refbehind.b[z] & bb))
1234 break;
1235 }
1236 }
1237
1238 void
addreg(Adr * a,int rn)1239 addreg(Adr *a, int rn)
1240 {
1241
1242 a->sym = 0;
1243 a->offset = 0;
1244 a->type = rn;
1245 }
1246
1247 long
RtoB(int r)1248 RtoB(int r)
1249 {
1250
1251 if(r < D_AX || r > D_DI)
1252 return 0;
1253 return 1L << (r-D_AX);
1254 }
1255
1256 int
BtoR(long b)1257 BtoR(long b)
1258 {
1259
1260 b &= 0xffL;
1261 if(b == 0)
1262 return 0;
1263 return bitno(b) + D_AX;
1264 }
1265