1 #include "gc.h"
2
3 Reg*
rega(void)4 rega(void)
5 {
6 Reg *r;
7
8 r = freer;
9 if(r == R) {
10 r = alloc(sizeof(*r));
11 } else
12 freer = r->link;
13
14 *r = zreg;
15 return r;
16 }
17
18 int
rcmp(const void * a1,const void * a2)19 rcmp(const void *a1, const void *a2)
20 {
21 Rgn *p1, *p2;
22 int c1, c2;
23
24 p1 = (Rgn*)a1;
25 p2 = (Rgn*)a2;
26 c1 = p2->cost;
27 c2 = p1->cost;
28 if(c1 -= c2)
29 return c1;
30 return p2->varno - p1->varno;
31 }
32
33 void
regopt(Prog * p)34 regopt(Prog *p)
35 {
36 Reg *r, *r1, *r2;
37 Prog *p1;
38 int i, z;
39 long initpc, val, npc;
40 ulong vreg;
41 Bits bit;
42 struct
43 {
44 long m;
45 long c;
46 Reg* p;
47 } log5[6], *lp;
48
49 firstr = R;
50 lastr = R;
51 nvar = 0;
52 regbits = RtoB(D_SP) | RtoB(D_AX) | RtoB(D_X0);
53 if(REGEXT)
54 regbits |= RtoB(REGEXT) | RtoB(REGEXT-1);
55 for(z=0; z<BITS; z++) {
56 externs.b[z] = 0;
57 params.b[z] = 0;
58 consts.b[z] = 0;
59 addrs.b[z] = 0;
60 }
61
62 /*
63 * pass 1
64 * build aux data structure
65 * allocate pcs
66 * find use and set of variables
67 */
68 val = 5L * 5L * 5L * 5L * 5L;
69 lp = log5;
70 for(i=0; i<5; i++) {
71 lp->m = val;
72 lp->c = 0;
73 lp->p = R;
74 val /= 5L;
75 lp++;
76 }
77 val = 0;
78 for(; p != P; p = p->link) {
79 switch(p->as) {
80 case ADATA:
81 case AGLOBL:
82 case ANAME:
83 case ASIGNAME:
84 continue;
85 }
86 r = rega();
87 if(firstr == R) {
88 firstr = r;
89 lastr = r;
90 } else {
91 lastr->link = r;
92 r->p1 = lastr;
93 lastr->s1 = r;
94 lastr = r;
95 }
96 r->prog = p;
97 r->pc = val;
98 val++;
99
100 lp = log5;
101 for(i=0; i<5; i++) {
102 lp->c--;
103 if(lp->c <= 0) {
104 lp->c = lp->m;
105 if(lp->p != R)
106 lp->p->log5 = r;
107 lp->p = r;
108 (lp+1)->c = 0;
109 break;
110 }
111 lp++;
112 }
113
114 r1 = r->p1;
115 if(r1 != R)
116 switch(r1->prog->as) {
117 case ARET:
118 case AJMP:
119 case AIRETL:
120 case AIRETQ:
121 r->p1 = R;
122 r1->s1 = R;
123 }
124
125 bit = mkvar(r, &p->from);
126 if(bany(&bit))
127 switch(p->as) {
128 /*
129 * funny
130 */
131 case ALEAL:
132 case ALEAQ:
133 for(z=0; z<BITS; z++)
134 addrs.b[z] |= bit.b[z];
135 break;
136
137 /*
138 * left side read
139 */
140 default:
141 for(z=0; z<BITS; z++)
142 r->use1.b[z] |= bit.b[z];
143 break;
144 }
145
146 bit = mkvar(r, &p->to);
147 if(bany(&bit))
148 switch(p->as) {
149 default:
150 diag(Z, "reg: unknown op: %A", p->as);
151 break;
152
153 /*
154 * right side read
155 */
156 case ACMPB:
157 case ACMPL:
158 case ACMPQ:
159 case ACMPW:
160 case ACOMISS:
161 case ACOMISD:
162 case AUCOMISS:
163 case AUCOMISD:
164 for(z=0; z<BITS; z++)
165 r->use2.b[z] |= bit.b[z];
166 break;
167
168 /*
169 * right side write
170 */
171 case ANOP:
172 case AMOVL:
173 case AMOVQ:
174 case AMOVB:
175 case AMOVW:
176 case AMOVBLSX:
177 case AMOVBLZX:
178 case AMOVBQSX:
179 case AMOVBQZX:
180 case AMOVLQSX:
181 case AMOVLQZX:
182 case AMOVWLSX:
183 case AMOVWLZX:
184 case AMOVWQSX:
185 case AMOVWQZX:
186 case AMOVQL:
187
188 case AMOVSS:
189 case AMOVSD:
190 case ACVTSD2SL:
191 case ACVTSD2SQ:
192 case ACVTSD2SS:
193 case ACVTSL2SD:
194 case ACVTSL2SS:
195 case ACVTSQ2SD:
196 case ACVTSQ2SS:
197 case ACVTSS2SD:
198 case ACVTSS2SL:
199 case ACVTSS2SQ:
200 case ACVTTSD2SL:
201 case ACVTTSD2SQ:
202 case ACVTTSS2SL:
203 case ACVTTSS2SQ:
204 for(z=0; z<BITS; z++)
205 r->set.b[z] |= bit.b[z];
206 break;
207
208 /*
209 * right side read+write
210 */
211 case AADDB:
212 case AADDL:
213 case AADDQ:
214 case AADDW:
215 case AANDB:
216 case AANDL:
217 case AANDQ:
218 case AANDW:
219 case ASUBB:
220 case ASUBL:
221 case ASUBQ:
222 case ASUBW:
223 case AORB:
224 case AORL:
225 case AORQ:
226 case AORW:
227 case AXORB:
228 case AXORL:
229 case AXORQ:
230 case AXORW:
231 case ASALB:
232 case ASALL:
233 case ASALQ:
234 case ASALW:
235 case ASARB:
236 case ASARL:
237 case ASARQ:
238 case ASARW:
239 case AROLB:
240 case AROLL:
241 case AROLQ:
242 case AROLW:
243 case ARORB:
244 case ARORL:
245 case ARORQ:
246 case ARORW:
247 case ASHLB:
248 case ASHLL:
249 case ASHLQ:
250 case ASHLW:
251 case ASHRB:
252 case ASHRL:
253 case ASHRQ:
254 case ASHRW:
255 case AIMULL:
256 case AIMULQ:
257 case AIMULW:
258 case ANEGL:
259 case ANEGQ:
260 case ANOTL:
261 case ANOTQ:
262 case AADCL:
263 case AADCQ:
264 case ASBBL:
265 case ASBBQ:
266
267 case AADDSD:
268 case AADDSS:
269 case ACMPSD:
270 case ACMPSS:
271 case ADIVSD:
272 case ADIVSS:
273 case AMAXSD:
274 case AMAXSS:
275 case AMINSD:
276 case AMINSS:
277 case AMULSD:
278 case AMULSS:
279 case ARCPSS:
280 case ARSQRTSS:
281 case ASQRTSD:
282 case ASQRTSS:
283 case ASUBSD:
284 case ASUBSS:
285 case AXORPD:
286 for(z=0; z<BITS; z++) {
287 r->set.b[z] |= bit.b[z];
288 r->use2.b[z] |= bit.b[z];
289 }
290 break;
291
292 /*
293 * funny
294 */
295 case ACALL:
296 for(z=0; z<BITS; z++)
297 addrs.b[z] |= bit.b[z];
298 break;
299 }
300
301 switch(p->as) {
302 case AIMULL:
303 case AIMULQ:
304 case AIMULW:
305 if(p->to.type != D_NONE)
306 break;
307
308 case AIDIVB:
309 case AIDIVL:
310 case AIDIVQ:
311 case AIDIVW:
312 case AIMULB:
313 case ADIVB:
314 case ADIVL:
315 case ADIVQ:
316 case ADIVW:
317 case AMULB:
318 case AMULL:
319 case AMULQ:
320 case AMULW:
321
322 case ACWD:
323 case ACDQ:
324 case ACQO:
325 r->regu |= RtoB(D_AX) | RtoB(D_DX);
326 break;
327
328 case AREP:
329 case AREPN:
330 case ALOOP:
331 case ALOOPEQ:
332 case ALOOPNE:
333 r->regu |= RtoB(D_CX);
334 break;
335
336 case AMOVSB:
337 case AMOVSL:
338 case AMOVSQ:
339 case AMOVSW:
340 case ACMPSB:
341 case ACMPSL:
342 case ACMPSQ:
343 case ACMPSW:
344 r->regu |= RtoB(D_SI) | RtoB(D_DI);
345 break;
346
347 case ASTOSB:
348 case ASTOSL:
349 case ASTOSQ:
350 case ASTOSW:
351 case ASCASB:
352 case ASCASL:
353 case ASCASQ:
354 case ASCASW:
355 r->regu |= RtoB(D_AX) | RtoB(D_DI);
356 break;
357
358 case AINSB:
359 case AINSL:
360 case AINSW:
361 case AOUTSB:
362 case AOUTSL:
363 case AOUTSW:
364 r->regu |= RtoB(D_DI) | RtoB(D_DX);
365 break;
366 }
367 }
368 if(firstr == R)
369 return;
370 initpc = pc - val;
371 npc = val;
372
373 /*
374 * pass 2
375 * turn branch references to pointers
376 * build back pointers
377 */
378 for(r = firstr; r != R; r = r->link) {
379 p = r->prog;
380 if(p->to.type == D_BRANCH) {
381 val = p->to.offset - initpc;
382 r1 = firstr;
383 while(r1 != R) {
384 r2 = r1->log5;
385 if(r2 != R && val >= r2->pc) {
386 r1 = r2;
387 continue;
388 }
389 if(r1->pc == val)
390 break;
391 r1 = r1->link;
392 }
393 if(r1 == R) {
394 nearln = p->lineno;
395 diag(Z, "ref not found\n%P", p);
396 continue;
397 }
398 if(r1 == r) {
399 nearln = p->lineno;
400 diag(Z, "ref to self\n%P", p);
401 continue;
402 }
403 r->s2 = r1;
404 r->p2link = r1->p2;
405 r1->p2 = r;
406 }
407 }
408 if(debug['R']) {
409 p = firstr->prog;
410 print("\n%L %D\n", p->lineno, &p->from);
411 }
412
413 /*
414 * pass 2.5
415 * find looping structure
416 */
417 for(r = firstr; r != R; r = r->link)
418 r->active = 0;
419 change = 0;
420 loopit(firstr, npc);
421 if(debug['R'] && debug['v']) {
422 print("\nlooping structure:\n");
423 for(r = firstr; r != R; r = r->link) {
424 print("%ld:%P", r->loop, r->prog);
425 for(z=0; z<BITS; z++)
426 bit.b[z] = r->use1.b[z] |
427 r->use2.b[z] |
428 r->set.b[z];
429 if(bany(&bit)) {
430 print("\t");
431 if(bany(&r->use1))
432 print(" u1=%B", r->use1);
433 if(bany(&r->use2))
434 print(" u2=%B", r->use2);
435 if(bany(&r->set))
436 print(" st=%B", r->set);
437 }
438 print("\n");
439 }
440 }
441
442 /*
443 * pass 3
444 * iterate propagating usage
445 * back until flow graph is complete
446 */
447 loop1:
448 change = 0;
449 for(r = firstr; r != R; r = r->link)
450 r->active = 0;
451 for(r = firstr; r != R; r = r->link)
452 if(r->prog->as == ARET)
453 prop(r, zbits, zbits);
454 loop11:
455 /* pick up unreachable code */
456 i = 0;
457 for(r = firstr; r != R; r = r1) {
458 r1 = r->link;
459 if(r1 && r1->active && !r->active) {
460 prop(r, zbits, zbits);
461 i = 1;
462 }
463 }
464 if(i)
465 goto loop11;
466 if(change)
467 goto loop1;
468
469
470 /*
471 * pass 4
472 * iterate propagating register/variable synchrony
473 * forward until graph is complete
474 */
475 loop2:
476 change = 0;
477 for(r = firstr; r != R; r = r->link)
478 r->active = 0;
479 synch(firstr, zbits);
480 if(change)
481 goto loop2;
482
483
484 /*
485 * pass 5
486 * isolate regions
487 * calculate costs (paint1)
488 */
489 r = firstr;
490 if(r) {
491 for(z=0; z<BITS; z++)
492 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
493 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
494 if(bany(&bit)) {
495 nearln = r->prog->lineno;
496 warn(Z, "used and not set: %B", bit);
497 if(debug['R'] && !debug['w'])
498 print("used and not set: %B\n", bit);
499 }
500 }
501 if(debug['R'] && debug['v'])
502 print("\nprop structure:\n");
503 for(r = firstr; r != R; r = r->link)
504 r->act = zbits;
505 rgp = region;
506 nregion = 0;
507 for(r = firstr; r != R; r = r->link) {
508 if(debug['R'] && debug['v']) {
509 print("%P\t", r->prog);
510 if(bany(&r->set))
511 print("s:%B ", r->set);
512 if(bany(&r->refahead))
513 print("ra:%B ", r->refahead);
514 if(bany(&r->calahead))
515 print("ca:%B ", r->calahead);
516 print("\n");
517 }
518 for(z=0; z<BITS; z++)
519 bit.b[z] = r->set.b[z] &
520 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
521 if(bany(&bit)) {
522 nearln = r->prog->lineno;
523 warn(Z, "set and not used: %B", bit);
524 if(debug['R'])
525 print("set and not used: %B\n", bit);
526 excise(r);
527 }
528 for(z=0; z<BITS; z++)
529 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
530 while(bany(&bit)) {
531 i = bnum(bit);
532 rgp->enter = r;
533 rgp->varno = i;
534 change = 0;
535 if(debug['R'] && debug['v'])
536 print("\n");
537 paint1(r, i);
538 bit.b[i/32] &= ~(1L<<(i%32));
539 if(change <= 0) {
540 if(debug['R'])
541 print("%L$%d: %B\n",
542 r->prog->lineno, change, blsh(i));
543 continue;
544 }
545 rgp->cost = change;
546 nregion++;
547 if(nregion >= NRGN) {
548 warn(Z, "too many regions");
549 goto brk;
550 }
551 rgp++;
552 }
553 }
554 brk:
555 qsort(region, nregion, sizeof(region[0]), rcmp);
556
557 /*
558 * pass 6
559 * determine used registers (paint2)
560 * replace code (paint3)
561 */
562 rgp = region;
563 for(i=0; i<nregion; i++) {
564 bit = blsh(rgp->varno);
565 vreg = paint2(rgp->enter, rgp->varno);
566 vreg = allreg(vreg, rgp);
567 if(debug['R']) {
568 print("%L$%d %R: %B\n",
569 rgp->enter->prog->lineno,
570 rgp->cost,
571 rgp->regno,
572 bit);
573 }
574 if(rgp->regno != 0)
575 paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
576 rgp++;
577 }
578 /*
579 * pass 7
580 * peep-hole on basic block
581 */
582 if(!debug['R'] || debug['P'])
583 peep();
584
585 /*
586 * pass 8
587 * recalculate pc
588 */
589 val = initpc;
590 for(r = firstr; r != R; r = r1) {
591 r->pc = val;
592 p = r->prog;
593 p1 = P;
594 r1 = r->link;
595 if(r1 != R)
596 p1 = r1->prog;
597 for(; p != p1; p = p->link) {
598 switch(p->as) {
599 default:
600 val++;
601 break;
602
603 case ANOP:
604 case ADATA:
605 case AGLOBL:
606 case ANAME:
607 case ASIGNAME:
608 break;
609 }
610 }
611 }
612 pc = val;
613
614 /*
615 * fix up branches
616 */
617 if(debug['R'])
618 if(bany(&addrs))
619 print("addrs: %B\n", addrs);
620
621 r1 = 0; /* set */
622 for(r = firstr; r != R; r = r->link) {
623 p = r->prog;
624 if(p->to.type == D_BRANCH)
625 p->to.offset = r->s2->pc;
626 r1 = r;
627 }
628
629 /*
630 * last pass
631 * eliminate nops
632 * free aux structures
633 */
634 for(p = firstr->prog; p != P; p = p->link){
635 while(p->link && p->link->as == ANOP)
636 p->link = p->link->link;
637 }
638 if(r1 != R) {
639 r1->link = freer;
640 freer = firstr;
641 }
642 }
643
644 /*
645 * add mov b,rn
646 * just after r
647 */
648 void
addmove(Reg * r,int bn,int rn,int f)649 addmove(Reg *r, int bn, int rn, int f)
650 {
651 Prog *p, *p1;
652 Adr *a;
653 Var *v;
654
655 p1 = alloc(sizeof(*p1));
656 *p1 = zprog;
657 p = r->prog;
658
659 p1->link = p->link;
660 p->link = p1;
661 p1->lineno = p->lineno;
662
663 v = var + bn;
664
665 a = &p1->to;
666 a->sym = v->sym;
667 a->offset = v->offset;
668 a->etype = v->etype;
669 a->type = v->name;
670
671 p1->as = AMOVL;
672 if(v->etype == TCHAR || v->etype == TUCHAR)
673 p1->as = AMOVB;
674 if(v->etype == TSHORT || v->etype == TUSHORT)
675 p1->as = AMOVW;
676 if(v->etype == TVLONG || v->etype == TUVLONG || v->etype == TIND)
677 p1->as = AMOVQ;
678 if(v->etype == TFLOAT)
679 p1->as = AMOVSS;
680 if(v->etype == TDOUBLE)
681 p1->as = AMOVSD;
682
683 p1->from.type = rn;
684 if(!f) {
685 p1->from = *a;
686 *a = zprog.from;
687 a->type = rn;
688 if(v->etype == TUCHAR)
689 p1->as = AMOVB;
690 if(v->etype == TUSHORT)
691 p1->as = AMOVW;
692 }
693 if(debug['R'])
694 print("%P\t.a%P\n", p, p1);
695 }
696
697 ulong
doregbits(int r)698 doregbits(int r)
699 {
700 ulong b;
701
702 b = 0;
703 if(r >= D_INDIR)
704 r -= D_INDIR;
705 if(r >= D_AX && r <= D_R15)
706 b |= RtoB(r);
707 else
708 if(r >= D_AL && r <= D_R15B)
709 b |= RtoB(r-D_AL+D_AX);
710 else
711 if(r >= D_AH && r <= D_BH)
712 b |= RtoB(r-D_AH+D_AX);
713 else
714 if(r >= D_X0 && r <= D_X0+15)
715 b |= FtoB(r);
716 return b;
717 }
718
719 Bits
mkvar(Reg * r,Adr * a)720 mkvar(Reg *r, Adr *a)
721 {
722 Var *v;
723 int i, t, n, et, z;
724 long o;
725 Bits bit;
726 Sym *s;
727
728 /*
729 * mark registers used
730 */
731 t = a->type;
732 r->regu |= doregbits(t);
733 r->regu |= doregbits(a->index);
734
735 s = a->sym;
736 switch(t) {
737 default:
738 goto none;
739 case D_ADDR:
740 a->type = a->index;
741 bit = mkvar(r, a);
742 for(z=0; z<BITS; z++)
743 addrs.b[z] |= bit.b[z];
744 a->type = t;
745 goto none;
746 case D_EXTERN:
747 case D_STATIC:
748 if(s->type != nil && s->type->garb & GVOLATILE)
749 goto none;
750 case D_PARAM:
751 case D_AUTO:
752 n = t;
753 break;
754 }
755 if(s == S)
756 goto none;
757 if(s->name[0] == '.')
758 goto none;
759 et = a->etype;
760 o = a->offset;
761 v = var;
762 for(i=0; i<nvar; i++) {
763 if(s == v->sym)
764 if(n == v->name)
765 if(o == v->offset)
766 goto out;
767 v++;
768 }
769 if(nvar >= NVAR) {
770 if(debug['w'] > 1 && s)
771 warn(Z, "variable not optimized: %s", s->name);
772 goto none;
773 }
774 i = nvar;
775 nvar++;
776 v = &var[i];
777 v->sym = s;
778 v->offset = o;
779 v->name = n;
780 v->etype = et;
781 if(debug['R'])
782 print("bit=%2d et=%2d %D\n", i, et, a);
783
784 out:
785 bit = blsh(i);
786 if(n == D_EXTERN || n == D_STATIC)
787 for(z=0; z<BITS; z++)
788 externs.b[z] |= bit.b[z];
789 if(n == D_PARAM)
790 for(z=0; z<BITS; z++)
791 params.b[z] |= bit.b[z];
792 if(v->etype != et || !(typechlpfd[et] || typev[et])) /* funny punning */
793 for(z=0; z<BITS; z++)
794 addrs.b[z] |= bit.b[z];
795 return bit;
796
797 none:
798 return zbits;
799 }
800
801 void
prop(Reg * r,Bits ref,Bits cal)802 prop(Reg *r, Bits ref, Bits cal)
803 {
804 Reg *r1, *r2;
805 int z;
806
807 for(r1 = r; r1 != R; r1 = r1->p1) {
808 for(z=0; z<BITS; z++) {
809 ref.b[z] |= r1->refahead.b[z];
810 if(ref.b[z] != r1->refahead.b[z]) {
811 r1->refahead.b[z] = ref.b[z];
812 change++;
813 }
814 cal.b[z] |= r1->calahead.b[z];
815 if(cal.b[z] != r1->calahead.b[z]) {
816 r1->calahead.b[z] = cal.b[z];
817 change++;
818 }
819 }
820 switch(r1->prog->as) {
821 case ACALL:
822 for(z=0; z<BITS; z++) {
823 cal.b[z] |= ref.b[z] | externs.b[z];
824 ref.b[z] = 0;
825 }
826 break;
827
828 case ATEXT:
829 for(z=0; z<BITS; z++) {
830 cal.b[z] = 0;
831 ref.b[z] = 0;
832 }
833 break;
834
835 case ARET:
836 for(z=0; z<BITS; z++) {
837 cal.b[z] = externs.b[z];
838 ref.b[z] = 0;
839 }
840 }
841 for(z=0; z<BITS; z++) {
842 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
843 r1->use1.b[z] | r1->use2.b[z];
844 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
845 r1->refbehind.b[z] = ref.b[z];
846 r1->calbehind.b[z] = cal.b[z];
847 }
848 if(r1->active)
849 break;
850 r1->active = 1;
851 }
852 for(; r != r1; r = r->p1)
853 for(r2 = r->p2; r2 != R; r2 = r2->p2link)
854 prop(r2, r->refbehind, r->calbehind);
855 }
856
857 /*
858 * find looping structure
859 *
860 * 1) find reverse postordering
861 * 2) find approximate dominators,
862 * the actual dominators if the flow graph is reducible
863 * otherwise, dominators plus some other non-dominators.
864 * See Matthew S. Hecht and Jeffrey D. Ullman,
865 * "Analysis of a Simple Algorithm for Global Data Flow Problems",
866 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
867 * Oct. 1-3, 1973, pp. 207-217.
868 * 3) find all nodes with a predecessor dominated by the current node.
869 * such a node is a loop head.
870 * recursively, all preds with a greater rpo number are in the loop
871 */
872 long
postorder(Reg * r,Reg ** rpo2r,long n)873 postorder(Reg *r, Reg **rpo2r, long n)
874 {
875 Reg *r1;
876
877 r->rpo = 1;
878 r1 = r->s1;
879 if(r1 && !r1->rpo)
880 n = postorder(r1, rpo2r, n);
881 r1 = r->s2;
882 if(r1 && !r1->rpo)
883 n = postorder(r1, rpo2r, n);
884 rpo2r[n] = r;
885 n++;
886 return n;
887 }
888
889 long
rpolca(long * idom,long rpo1,long rpo2)890 rpolca(long *idom, long rpo1, long rpo2)
891 {
892 long t;
893
894 if(rpo1 == -1)
895 return rpo2;
896 while(rpo1 != rpo2){
897 if(rpo1 > rpo2){
898 t = rpo2;
899 rpo2 = rpo1;
900 rpo1 = t;
901 }
902 while(rpo1 < rpo2){
903 t = idom[rpo2];
904 if(t >= rpo2)
905 fatal(Z, "bad idom");
906 rpo2 = t;
907 }
908 }
909 return rpo1;
910 }
911
912 int
doms(long * idom,long r,long s)913 doms(long *idom, long r, long s)
914 {
915 while(s > r)
916 s = idom[s];
917 return s == r;
918 }
919
920 int
loophead(long * idom,Reg * r)921 loophead(long *idom, Reg *r)
922 {
923 long src;
924
925 src = r->rpo;
926 if(r->p1 != R && doms(idom, src, r->p1->rpo))
927 return 1;
928 for(r = r->p2; r != R; r = r->p2link)
929 if(doms(idom, src, r->rpo))
930 return 1;
931 return 0;
932 }
933
934 void
loopmark(Reg ** rpo2r,long head,Reg * r)935 loopmark(Reg **rpo2r, long head, Reg *r)
936 {
937 if(r->rpo < head || r->active == head)
938 return;
939 r->active = head;
940 r->loop += LOOP;
941 if(r->p1 != R)
942 loopmark(rpo2r, head, r->p1);
943 for(r = r->p2; r != R; r = r->p2link)
944 loopmark(rpo2r, head, r);
945 }
946
947 void
loopit(Reg * r,long nr)948 loopit(Reg *r, long nr)
949 {
950 Reg *r1;
951 long i, d, me;
952
953 if(nr > maxnr) {
954 rpo2r = alloc(nr * sizeof(Reg*));
955 idom = alloc(nr * sizeof(long));
956 maxnr = nr;
957 }
958
959 d = postorder(r, rpo2r, 0);
960 if(d > nr)
961 fatal(Z, "too many reg nodes");
962 nr = d;
963 for(i = 0; i < nr / 2; i++){
964 r1 = rpo2r[i];
965 rpo2r[i] = rpo2r[nr - 1 - i];
966 rpo2r[nr - 1 - i] = r1;
967 }
968 for(i = 0; i < nr; i++)
969 rpo2r[i]->rpo = i;
970
971 idom[0] = 0;
972 for(i = 0; i < nr; i++){
973 r1 = rpo2r[i];
974 me = r1->rpo;
975 d = -1;
976 if(r1->p1 != R && r1->p1->rpo < me)
977 d = r1->p1->rpo;
978 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
979 if(r1->rpo < me)
980 d = rpolca(idom, d, r1->rpo);
981 idom[i] = d;
982 }
983
984 for(i = 0; i < nr; i++){
985 r1 = rpo2r[i];
986 r1->loop++;
987 if(r1->p2 != R && loophead(idom, r1))
988 loopmark(rpo2r, i, r1);
989 }
990 }
991
992 void
synch(Reg * r,Bits dif)993 synch(Reg *r, Bits dif)
994 {
995 Reg *r1;
996 int z;
997
998 for(r1 = r; r1 != R; r1 = r1->s1) {
999 for(z=0; z<BITS; z++) {
1000 dif.b[z] = (dif.b[z] &
1001 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
1002 r1->set.b[z] | r1->regdiff.b[z];
1003 if(dif.b[z] != r1->regdiff.b[z]) {
1004 r1->regdiff.b[z] = dif.b[z];
1005 change++;
1006 }
1007 }
1008 if(r1->active)
1009 break;
1010 r1->active = 1;
1011 for(z=0; z<BITS; z++)
1012 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
1013 if(r1->s2 != R)
1014 synch(r1->s2, dif);
1015 }
1016 }
1017
1018 ulong
allreg(ulong b,Rgn * r)1019 allreg(ulong b, Rgn *r)
1020 {
1021 Var *v;
1022 int i;
1023
1024 v = var + r->varno;
1025 r->regno = 0;
1026 switch(v->etype) {
1027
1028 default:
1029 diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
1030 break;
1031
1032 case TCHAR:
1033 case TUCHAR:
1034 case TSHORT:
1035 case TUSHORT:
1036 case TINT:
1037 case TUINT:
1038 case TLONG:
1039 case TULONG:
1040 case TVLONG:
1041 case TUVLONG:
1042 case TIND:
1043 case TARRAY:
1044 i = BtoR(~b);
1045 if(i && r->cost > 0) {
1046 r->regno = i;
1047 return RtoB(i);
1048 }
1049 break;
1050
1051 case TDOUBLE:
1052 case TFLOAT:
1053 i = BtoF(~b);
1054 if(i && r->cost > 0) {
1055 r->regno = i;
1056 return FtoB(i);
1057 }
1058 break;
1059 }
1060 return 0;
1061 }
1062
1063 void
paint1(Reg * r,int bn)1064 paint1(Reg *r, int bn)
1065 {
1066 Reg *r1;
1067 Prog *p;
1068 int z;
1069 ulong bb;
1070
1071 z = bn/32;
1072 bb = 1L<<(bn%32);
1073 if(r->act.b[z] & bb)
1074 return;
1075 for(;;) {
1076 if(!(r->refbehind.b[z] & bb))
1077 break;
1078 r1 = r->p1;
1079 if(r1 == R)
1080 break;
1081 if(!(r1->refahead.b[z] & bb))
1082 break;
1083 if(r1->act.b[z] & bb)
1084 break;
1085 r = r1;
1086 }
1087
1088 if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
1089 change -= CLOAD * r->loop;
1090 if(debug['R'] && debug['v'])
1091 print("%ld%P\tld %B $%d\n", r->loop,
1092 r->prog, blsh(bn), change);
1093 }
1094 for(;;) {
1095 r->act.b[z] |= bb;
1096 p = r->prog;
1097
1098 if(r->use1.b[z] & bb) {
1099 change += CREF * r->loop;
1100 if(debug['R'] && debug['v'])
1101 print("%ld%P\tu1 %B $%d\n", r->loop,
1102 p, blsh(bn), change);
1103 }
1104
1105 if((r->use2.b[z]|r->set.b[z]) & bb) {
1106 change += CREF * r->loop;
1107 if(debug['R'] && debug['v'])
1108 print("%ld%P\tu2 %B $%d\n", r->loop,
1109 p, blsh(bn), change);
1110 }
1111
1112 if(STORE(r) & r->regdiff.b[z] & bb) {
1113 change -= CLOAD * r->loop;
1114 if(debug['R'] && debug['v'])
1115 print("%ld%P\tst %B $%d\n", r->loop,
1116 p, blsh(bn), change);
1117 }
1118
1119 if(r->refbehind.b[z] & bb)
1120 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1121 if(r1->refahead.b[z] & bb)
1122 paint1(r1, bn);
1123
1124 if(!(r->refahead.b[z] & bb))
1125 break;
1126 r1 = r->s2;
1127 if(r1 != R)
1128 if(r1->refbehind.b[z] & bb)
1129 paint1(r1, bn);
1130 r = r->s1;
1131 if(r == R)
1132 break;
1133 if(r->act.b[z] & bb)
1134 break;
1135 if(!(r->refbehind.b[z] & bb))
1136 break;
1137 }
1138 }
1139
1140 ulong
regset(Reg * r,ulong bb)1141 regset(Reg *r, ulong bb)
1142 {
1143 ulong b, set;
1144 Adr v;
1145 int c;
1146
1147 set = 0;
1148 v = zprog.from;
1149 while(b = bb & ~(bb-1)) {
1150 v.type = b & 0xFFFF? BtoR(b): BtoF(b);
1151 if(v.type == 0)
1152 diag(Z, "zero v.type for %#lux", b);
1153 c = copyu(r->prog, &v, A);
1154 if(c == 3)
1155 set |= b;
1156 bb &= ~b;
1157 }
1158 return set;
1159 }
1160
1161 ulong
reguse(Reg * r,ulong bb)1162 reguse(Reg *r, ulong bb)
1163 {
1164 ulong b, set;
1165 Adr v;
1166 int c;
1167
1168 set = 0;
1169 v = zprog.from;
1170 while(b = bb & ~(bb-1)) {
1171 v.type = b & 0xFFFF? BtoR(b): BtoF(b);
1172 c = copyu(r->prog, &v, A);
1173 if(c == 1 || c == 2 || c == 4)
1174 set |= b;
1175 bb &= ~b;
1176 }
1177 return set;
1178 }
1179
1180 ulong
paint2(Reg * r,int bn)1181 paint2(Reg *r, int bn)
1182 {
1183 Reg *r1;
1184 int z;
1185 ulong bb, vreg, x;
1186
1187 z = bn/32;
1188 bb = 1L << (bn%32);
1189 vreg = regbits;
1190 if(!(r->act.b[z] & bb))
1191 return vreg;
1192 for(;;) {
1193 if(!(r->refbehind.b[z] & bb))
1194 break;
1195 r1 = r->p1;
1196 if(r1 == R)
1197 break;
1198 if(!(r1->refahead.b[z] & bb))
1199 break;
1200 if(!(r1->act.b[z] & bb))
1201 break;
1202 r = r1;
1203 }
1204 for(;;) {
1205 r->act.b[z] &= ~bb;
1206
1207 vreg |= r->regu;
1208
1209 if(r->refbehind.b[z] & bb)
1210 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1211 if(r1->refahead.b[z] & bb)
1212 vreg |= paint2(r1, bn);
1213
1214 if(!(r->refahead.b[z] & bb))
1215 break;
1216 r1 = r->s2;
1217 if(r1 != R)
1218 if(r1->refbehind.b[z] & bb)
1219 vreg |= paint2(r1, bn);
1220 r = r->s1;
1221 if(r == R)
1222 break;
1223 if(!(r->act.b[z] & bb))
1224 break;
1225 if(!(r->refbehind.b[z] & bb))
1226 break;
1227 }
1228
1229 bb = vreg;
1230 for(; r; r=r->s1) {
1231 x = r->regu & ~bb;
1232 if(x) {
1233 vreg |= reguse(r, x);
1234 bb |= regset(r, x);
1235 }
1236 }
1237 return vreg;
1238 }
1239
1240 void
paint3(Reg * r,int bn,long rb,int rn)1241 paint3(Reg *r, int bn, long rb, int rn)
1242 {
1243 Reg *r1;
1244 Prog *p;
1245 int z;
1246 ulong bb;
1247
1248 z = bn/32;
1249 bb = 1L << (bn%32);
1250 if(r->act.b[z] & bb)
1251 return;
1252 for(;;) {
1253 if(!(r->refbehind.b[z] & bb))
1254 break;
1255 r1 = r->p1;
1256 if(r1 == R)
1257 break;
1258 if(!(r1->refahead.b[z] & bb))
1259 break;
1260 if(r1->act.b[z] & bb)
1261 break;
1262 r = r1;
1263 }
1264
1265 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1266 addmove(r, bn, rn, 0);
1267 for(;;) {
1268 r->act.b[z] |= bb;
1269 p = r->prog;
1270
1271 if(r->use1.b[z] & bb) {
1272 if(debug['R'])
1273 print("%P", p);
1274 addreg(&p->from, rn);
1275 if(debug['R'])
1276 print("\t.c%P\n", p);
1277 }
1278 if((r->use2.b[z]|r->set.b[z]) & bb) {
1279 if(debug['R'])
1280 print("%P", p);
1281 addreg(&p->to, rn);
1282 if(debug['R'])
1283 print("\t.c%P\n", p);
1284 }
1285
1286 if(STORE(r) & r->regdiff.b[z] & bb)
1287 addmove(r, bn, rn, 1);
1288 r->regu |= rb;
1289
1290 if(r->refbehind.b[z] & bb)
1291 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1292 if(r1->refahead.b[z] & bb)
1293 paint3(r1, bn, rb, rn);
1294
1295 if(!(r->refahead.b[z] & bb))
1296 break;
1297 r1 = r->s2;
1298 if(r1 != R)
1299 if(r1->refbehind.b[z] & bb)
1300 paint3(r1, bn, rb, rn);
1301 r = r->s1;
1302 if(r == R)
1303 break;
1304 if(r->act.b[z] & bb)
1305 break;
1306 if(!(r->refbehind.b[z] & bb))
1307 break;
1308 }
1309 }
1310
1311 void
addreg(Adr * a,int rn)1312 addreg(Adr *a, int rn)
1313 {
1314
1315 a->sym = 0;
1316 a->offset = 0;
1317 a->type = rn;
1318 }
1319
1320 long
RtoB(int r)1321 RtoB(int r)
1322 {
1323
1324 if(r < D_AX || r > D_R15)
1325 return 0;
1326 return 1L << (r-D_AX);
1327 }
1328
1329 int
BtoR(long b)1330 BtoR(long b)
1331 {
1332
1333 b &= 0xffffL;
1334 if(b == 0)
1335 return 0;
1336 return bitno(b) + D_AX;
1337 }
1338
1339 /*
1340 * bit reg
1341 * 16 X5
1342 * 17 X6
1343 * 18 X7
1344 */
1345 long
FtoB(int f)1346 FtoB(int f)
1347 {
1348 if(f < FREGMIN || f > FREGEXT)
1349 return 0;
1350 return 1L << (f - FREGMIN + 16);
1351 }
1352
1353 int
BtoF(long b)1354 BtoF(long b)
1355 {
1356
1357 b &= 0x70000L;
1358 if(b == 0)
1359 return 0;
1360 return bitno(b) - 16 + FREGMIN;
1361 }
1362