1 #include "gc.h"
2
3 Reg*
rega(void)4 rega(void)
5 {
6 Reg *r;
7
8 r = freer;
9 if(r == R) {
10 r = alloc(sizeof(*r));
11 } else
12 freer = r->link;
13
14 *r = zreg;
15 return r;
16 }
17
18 int
rcmp(const void * a1,const void * a2)19 rcmp(const void *a1, const void *a2)
20 {
21 Rgn *p1, *p2;
22 int c1, c2;
23
24 p1 = (Rgn*)a1;
25 p2 = (Rgn*)a2;
26 c1 = p2->cost;
27 c2 = p1->cost;
28 if(c1 -= c2)
29 return c1;
30 return p2->varno - p1->varno;
31 }
32
33 void
regopt(Prog * p)34 regopt(Prog *p)
35 {
36 Reg *r, *r1, *r2;
37 Prog *p1;
38 int i, z;
39 long initpc, val, npc;
40 ulong vreg;
41 Bits bit;
42 struct
43 {
44 long m;
45 long c;
46 Reg* p;
47 } log5[6], *lp;
48
49 firstr = R;
50 lastr = R;
51 nvar = 0;
52 regbits = RtoB(D_SP) | RtoB(D_AX) | RtoB(D_X0);
53 if(REGEXT)
54 regbits |= RtoB(REGEXT) | RtoB(REGEXT-1);
55 for(z=0; z<BITS; z++) {
56 externs.b[z] = 0;
57 params.b[z] = 0;
58 consts.b[z] = 0;
59 addrs.b[z] = 0;
60 }
61
62 /*
63 * pass 1
64 * build aux data structure
65 * allocate pcs
66 * find use and set of variables
67 */
68 val = 5L * 5L * 5L * 5L * 5L;
69 lp = log5;
70 for(i=0; i<5; i++) {
71 lp->m = val;
72 lp->c = 0;
73 lp->p = R;
74 val /= 5L;
75 lp++;
76 }
77 val = 0;
78 for(; p != P; p = p->link) {
79 switch(p->as) {
80 case ADATA:
81 case AGLOBL:
82 case ANAME:
83 case ASIGNAME:
84 continue;
85 }
86 r = rega();
87 if(firstr == R) {
88 firstr = r;
89 lastr = r;
90 } else {
91 lastr->link = r;
92 r->p1 = lastr;
93 lastr->s1 = r;
94 lastr = r;
95 }
96 r->prog = p;
97 r->pc = val;
98 val++;
99
100 lp = log5;
101 for(i=0; i<5; i++) {
102 lp->c--;
103 if(lp->c <= 0) {
104 lp->c = lp->m;
105 if(lp->p != R)
106 lp->p->log5 = r;
107 lp->p = r;
108 (lp+1)->c = 0;
109 break;
110 }
111 lp++;
112 }
113
114 r1 = r->p1;
115 if(r1 != R)
116 switch(r1->prog->as) {
117 case ARET:
118 case AJMP:
119 case AIRETL:
120 case AIRETQ:
121 r->p1 = R;
122 r1->s1 = R;
123 }
124
125 bit = mkvar(r, &p->from);
126 if(bany(&bit))
127 switch(p->as) {
128 /*
129 * funny
130 */
131 case ALEAL:
132 case ALEAQ:
133 for(z=0; z<BITS; z++)
134 addrs.b[z] |= bit.b[z];
135 break;
136
137 /*
138 * left side read
139 */
140 default:
141 for(z=0; z<BITS; z++)
142 r->use1.b[z] |= bit.b[z];
143 break;
144 }
145
146 bit = mkvar(r, &p->to);
147 if(bany(&bit))
148 switch(p->as) {
149 default:
150 diag(Z, "reg: unknown op: %A", p->as);
151 break;
152
153 /*
154 * right side read
155 */
156 case ACMPB:
157 case ACMPL:
158 case ACMPQ:
159 case ACMPW:
160 case ACOMISS:
161 case ACOMISD:
162 case AUCOMISS:
163 case AUCOMISD:
164 for(z=0; z<BITS; z++)
165 r->use2.b[z] |= bit.b[z];
166 break;
167
168 /*
169 * right side write
170 */
171 case ANOP:
172 case AMOVL:
173 case AMOVQ:
174 case AMOVB:
175 case AMOVW:
176 case AMOVBLSX:
177 case AMOVBLZX:
178 case AMOVBQSX:
179 case AMOVBQZX:
180 case AMOVLQSX:
181 case AMOVLQZX:
182 case AMOVWLSX:
183 case AMOVWLZX:
184 case AMOVWQSX:
185 case AMOVWQZX:
186
187 case AMOVSS:
188 case AMOVSD:
189 case ACVTSD2SL:
190 case ACVTSD2SQ:
191 case ACVTSD2SS:
192 case ACVTSL2SD:
193 case ACVTSL2SS:
194 case ACVTSQ2SD:
195 case ACVTSQ2SS:
196 case ACVTSS2SD:
197 case ACVTSS2SL:
198 case ACVTSS2SQ:
199 case ACVTTSD2SL:
200 case ACVTTSD2SQ:
201 case ACVTTSS2SL:
202 case ACVTTSS2SQ:
203 for(z=0; z<BITS; z++)
204 r->set.b[z] |= bit.b[z];
205 break;
206
207 /*
208 * right side read+write
209 */
210 case AADDB:
211 case AADDL:
212 case AADDQ:
213 case AADDW:
214 case AANDB:
215 case AANDL:
216 case AANDQ:
217 case AANDW:
218 case ASUBB:
219 case ASUBL:
220 case ASUBQ:
221 case ASUBW:
222 case AORB:
223 case AORL:
224 case AORQ:
225 case AORW:
226 case AXORB:
227 case AXORL:
228 case AXORQ:
229 case AXORW:
230 case ASALB:
231 case ASALL:
232 case ASALQ:
233 case ASALW:
234 case ASARB:
235 case ASARL:
236 case ASARQ:
237 case ASARW:
238 case AROLB:
239 case AROLL:
240 case AROLQ:
241 case AROLW:
242 case ARORB:
243 case ARORL:
244 case ARORQ:
245 case ARORW:
246 case ASHLB:
247 case ASHLL:
248 case ASHLQ:
249 case ASHLW:
250 case ASHRB:
251 case ASHRL:
252 case ASHRQ:
253 case ASHRW:
254 case AIMULL:
255 case AIMULQ:
256 case AIMULW:
257 case ANEGL:
258 case ANEGQ:
259 case ANOTL:
260 case ANOTQ:
261 case AADCL:
262 case AADCQ:
263 case ASBBL:
264 case ASBBQ:
265
266 case AADDSD:
267 case AADDSS:
268 case ACMPSD:
269 case ACMPSS:
270 case ADIVSD:
271 case ADIVSS:
272 case AMAXSD:
273 case AMAXSS:
274 case AMINSD:
275 case AMINSS:
276 case AMULSD:
277 case AMULSS:
278 case ARCPSS:
279 case ARSQRTSS:
280 case ASQRTSD:
281 case ASQRTSS:
282 case ASUBSD:
283 case ASUBSS:
284 case AXORPD:
285 for(z=0; z<BITS; z++) {
286 r->set.b[z] |= bit.b[z];
287 r->use2.b[z] |= bit.b[z];
288 }
289 break;
290
291 /*
292 * funny
293 */
294 case ACALL:
295 for(z=0; z<BITS; z++)
296 addrs.b[z] |= bit.b[z];
297 break;
298 }
299
300 switch(p->as) {
301 case AIMULL:
302 case AIMULQ:
303 case AIMULW:
304 if(p->to.type != D_NONE)
305 break;
306
307 case AIDIVB:
308 case AIDIVL:
309 case AIDIVQ:
310 case AIDIVW:
311 case AIMULB:
312 case ADIVB:
313 case ADIVL:
314 case ADIVQ:
315 case ADIVW:
316 case AMULB:
317 case AMULL:
318 case AMULQ:
319 case AMULW:
320
321 case ACWD:
322 case ACDQ:
323 case ACQO:
324 r->regu |= RtoB(D_AX) | RtoB(D_DX);
325 break;
326
327 case AREP:
328 case AREPN:
329 case ALOOP:
330 case ALOOPEQ:
331 case ALOOPNE:
332 r->regu |= RtoB(D_CX);
333 break;
334
335 case AMOVSB:
336 case AMOVSL:
337 case AMOVSQ:
338 case AMOVSW:
339 case ACMPSB:
340 case ACMPSL:
341 case ACMPSQ:
342 case ACMPSW:
343 r->regu |= RtoB(D_SI) | RtoB(D_DI);
344 break;
345
346 case ASTOSB:
347 case ASTOSL:
348 case ASTOSQ:
349 case ASTOSW:
350 case ASCASB:
351 case ASCASL:
352 case ASCASQ:
353 case ASCASW:
354 r->regu |= RtoB(D_AX) | RtoB(D_DI);
355 break;
356
357 case AINSB:
358 case AINSL:
359 case AINSW:
360 case AOUTSB:
361 case AOUTSL:
362 case AOUTSW:
363 r->regu |= RtoB(D_DI) | RtoB(D_DX);
364 break;
365 }
366 }
367 if(firstr == R)
368 return;
369 initpc = pc - val;
370 npc = val;
371
372 /*
373 * pass 2
374 * turn branch references to pointers
375 * build back pointers
376 */
377 for(r = firstr; r != R; r = r->link) {
378 p = r->prog;
379 if(p->to.type == D_BRANCH) {
380 val = p->to.offset - initpc;
381 r1 = firstr;
382 while(r1 != R) {
383 r2 = r1->log5;
384 if(r2 != R && val >= r2->pc) {
385 r1 = r2;
386 continue;
387 }
388 if(r1->pc == val)
389 break;
390 r1 = r1->link;
391 }
392 if(r1 == R) {
393 nearln = p->lineno;
394 diag(Z, "ref not found\n%P", p);
395 continue;
396 }
397 if(r1 == r) {
398 nearln = p->lineno;
399 diag(Z, "ref to self\n%P", p);
400 continue;
401 }
402 r->s2 = r1;
403 r->p2link = r1->p2;
404 r1->p2 = r;
405 }
406 }
407 if(debug['R']) {
408 p = firstr->prog;
409 print("\n%L %D\n", p->lineno, &p->from);
410 }
411
412 /*
413 * pass 2.5
414 * find looping structure
415 */
416 for(r = firstr; r != R; r = r->link)
417 r->active = 0;
418 change = 0;
419 loopit(firstr, npc);
420 if(debug['R'] && debug['v']) {
421 print("\nlooping structure:\n");
422 for(r = firstr; r != R; r = r->link) {
423 print("%ld:%P", r->loop, r->prog);
424 for(z=0; z<BITS; z++)
425 bit.b[z] = r->use1.b[z] |
426 r->use2.b[z] |
427 r->set.b[z];
428 if(bany(&bit)) {
429 print("\t");
430 if(bany(&r->use1))
431 print(" u1=%B", r->use1);
432 if(bany(&r->use2))
433 print(" u2=%B", r->use2);
434 if(bany(&r->set))
435 print(" st=%B", r->set);
436 }
437 print("\n");
438 }
439 }
440
441 /*
442 * pass 3
443 * iterate propagating usage
444 * back until flow graph is complete
445 */
446 loop1:
447 change = 0;
448 for(r = firstr; r != R; r = r->link)
449 r->active = 0;
450 for(r = firstr; r != R; r = r->link)
451 if(r->prog->as == ARET)
452 prop(r, zbits, zbits);
453 loop11:
454 /* pick up unreachable code */
455 i = 0;
456 for(r = firstr; r != R; r = r1) {
457 r1 = r->link;
458 if(r1 && r1->active && !r->active) {
459 prop(r, zbits, zbits);
460 i = 1;
461 }
462 }
463 if(i)
464 goto loop11;
465 if(change)
466 goto loop1;
467
468
469 /*
470 * pass 4
471 * iterate propagating register/variable synchrony
472 * forward until graph is complete
473 */
474 loop2:
475 change = 0;
476 for(r = firstr; r != R; r = r->link)
477 r->active = 0;
478 synch(firstr, zbits);
479 if(change)
480 goto loop2;
481
482
483 /*
484 * pass 5
485 * isolate regions
486 * calculate costs (paint1)
487 */
488 r = firstr;
489 if(r) {
490 for(z=0; z<BITS; z++)
491 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
492 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
493 if(bany(&bit)) {
494 nearln = r->prog->lineno;
495 warn(Z, "used and not set: %B", bit);
496 if(debug['R'] && !debug['w'])
497 print("used and not set: %B\n", bit);
498 }
499 }
500 if(debug['R'] && debug['v'])
501 print("\nprop structure:\n");
502 for(r = firstr; r != R; r = r->link)
503 r->act = zbits;
504 rgp = region;
505 nregion = 0;
506 for(r = firstr; r != R; r = r->link) {
507 if(debug['R'] && debug['v']) {
508 print("%P\t", r->prog);
509 if(bany(&r->set))
510 print("s:%B ", r->set);
511 if(bany(&r->refahead))
512 print("ra:%B ", r->refahead);
513 if(bany(&r->calahead))
514 print("ca:%B ", r->calahead);
515 print("\n");
516 }
517 for(z=0; z<BITS; z++)
518 bit.b[z] = r->set.b[z] &
519 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
520 if(bany(&bit)) {
521 nearln = r->prog->lineno;
522 warn(Z, "set and not used: %B", bit);
523 if(debug['R'])
524 print("set and not used: %B\n", bit);
525 excise(r);
526 }
527 for(z=0; z<BITS; z++)
528 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
529 while(bany(&bit)) {
530 i = bnum(bit);
531 rgp->enter = r;
532 rgp->varno = i;
533 change = 0;
534 if(debug['R'] && debug['v'])
535 print("\n");
536 paint1(r, i);
537 bit.b[i/32] &= ~(1L<<(i%32));
538 if(change <= 0) {
539 if(debug['R'])
540 print("%L$%d: %B\n",
541 r->prog->lineno, change, blsh(i));
542 continue;
543 }
544 rgp->cost = change;
545 nregion++;
546 if(nregion >= NRGN) {
547 warn(Z, "too many regions");
548 goto brk;
549 }
550 rgp++;
551 }
552 }
553 brk:
554 qsort(region, nregion, sizeof(region[0]), rcmp);
555
556 /*
557 * pass 6
558 * determine used registers (paint2)
559 * replace code (paint3)
560 */
561 rgp = region;
562 for(i=0; i<nregion; i++) {
563 bit = blsh(rgp->varno);
564 vreg = paint2(rgp->enter, rgp->varno);
565 vreg = allreg(vreg, rgp);
566 if(debug['R']) {
567 print("%L$%d %R: %B\n",
568 rgp->enter->prog->lineno,
569 rgp->cost,
570 rgp->regno,
571 bit);
572 }
573 if(rgp->regno != 0)
574 paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
575 rgp++;
576 }
577 /*
578 * pass 7
579 * peep-hole on basic block
580 */
581 if(!debug['R'] || debug['P'])
582 peep();
583
584 /*
585 * pass 8
586 * recalculate pc
587 */
588 val = initpc;
589 for(r = firstr; r != R; r = r1) {
590 r->pc = val;
591 p = r->prog;
592 p1 = P;
593 r1 = r->link;
594 if(r1 != R)
595 p1 = r1->prog;
596 for(; p != p1; p = p->link) {
597 switch(p->as) {
598 default:
599 val++;
600 break;
601
602 case ANOP:
603 case ADATA:
604 case AGLOBL:
605 case ANAME:
606 case ASIGNAME:
607 break;
608 }
609 }
610 }
611 pc = val;
612
613 /*
614 * fix up branches
615 */
616 if(debug['R'])
617 if(bany(&addrs))
618 print("addrs: %B\n", addrs);
619
620 r1 = 0; /* set */
621 for(r = firstr; r != R; r = r->link) {
622 p = r->prog;
623 if(p->to.type == D_BRANCH)
624 p->to.offset = r->s2->pc;
625 r1 = r;
626 }
627
628 /*
629 * last pass
630 * eliminate nops
631 * free aux structures
632 */
633 for(p = firstr->prog; p != P; p = p->link){
634 while(p->link && p->link->as == ANOP)
635 p->link = p->link->link;
636 }
637 if(r1 != R) {
638 r1->link = freer;
639 freer = firstr;
640 }
641 }
642
643 /*
644 * add mov b,rn
645 * just after r
646 */
647 void
addmove(Reg * r,int bn,int rn,int f)648 addmove(Reg *r, int bn, int rn, int f)
649 {
650 Prog *p, *p1;
651 Adr *a;
652 Var *v;
653
654 p1 = alloc(sizeof(*p1));
655 *p1 = zprog;
656 p = r->prog;
657
658 p1->link = p->link;
659 p->link = p1;
660 p1->lineno = p->lineno;
661
662 v = var + bn;
663
664 a = &p1->to;
665 a->sym = v->sym;
666 a->offset = v->offset;
667 a->etype = v->etype;
668 a->type = v->name;
669
670 p1->as = AMOVL;
671 if(v->etype == TCHAR || v->etype == TUCHAR)
672 p1->as = AMOVB;
673 if(v->etype == TSHORT || v->etype == TUSHORT)
674 p1->as = AMOVW;
675 if(v->etype == TVLONG || v->etype == TUVLONG || v->etype == TIND)
676 p1->as = AMOVQ;
677 if(v->etype == TFLOAT)
678 p1->as = AMOVSS;
679 if(v->etype == TDOUBLE)
680 p1->as = AMOVSD;
681
682 p1->from.type = rn;
683 if(!f) {
684 p1->from = *a;
685 *a = zprog.from;
686 a->type = rn;
687 if(v->etype == TUCHAR)
688 p1->as = AMOVB;
689 if(v->etype == TUSHORT)
690 p1->as = AMOVW;
691 }
692 if(debug['R'])
693 print("%P\t.a%P\n", p, p1);
694 }
695
696 ulong
doregbits(int r)697 doregbits(int r)
698 {
699 ulong b;
700
701 b = 0;
702 if(r >= D_INDIR)
703 r -= D_INDIR;
704 if(r >= D_AX && r <= D_R15)
705 b |= RtoB(r);
706 else
707 if(r >= D_AL && r <= D_R15B)
708 b |= RtoB(r-D_AL+D_AX);
709 else
710 if(r >= D_AH && r <= D_BH)
711 b |= RtoB(r-D_AH+D_AX);
712 else
713 if(r >= D_X0 && r <= D_X0+15)
714 b |= FtoB(r);
715 return b;
716 }
717
718 Bits
mkvar(Reg * r,Adr * a)719 mkvar(Reg *r, Adr *a)
720 {
721 Var *v;
722 int i, t, n, et, z;
723 long o;
724 Bits bit;
725 Sym *s;
726
727 /*
728 * mark registers used
729 */
730 t = a->type;
731 r->regu |= doregbits(t);
732 r->regu |= doregbits(a->index);
733
734 switch(t) {
735 default:
736 goto none;
737 case D_ADDR:
738 a->type = a->index;
739 bit = mkvar(r, a);
740 for(z=0; z<BITS; z++)
741 addrs.b[z] |= bit.b[z];
742 a->type = t;
743 goto none;
744 case D_EXTERN:
745 case D_STATIC:
746 case D_PARAM:
747 case D_AUTO:
748 n = t;
749 break;
750 }
751 s = a->sym;
752 if(s == S)
753 goto none;
754 if(s->name[0] == '.')
755 goto none;
756 et = a->etype;
757 o = a->offset;
758 v = var;
759 for(i=0; i<nvar; i++) {
760 if(s == v->sym)
761 if(n == v->name)
762 if(o == v->offset)
763 goto out;
764 v++;
765 }
766 if(nvar >= NVAR) {
767 if(debug['w'] > 1 && s)
768 warn(Z, "variable not optimized: %s", s->name);
769 goto none;
770 }
771 i = nvar;
772 nvar++;
773 v = &var[i];
774 v->sym = s;
775 v->offset = o;
776 v->name = n;
777 v->etype = et;
778 if(debug['R'])
779 print("bit=%2d et=%2d %D\n", i, et, a);
780
781 out:
782 bit = blsh(i);
783 if(n == D_EXTERN || n == D_STATIC)
784 for(z=0; z<BITS; z++)
785 externs.b[z] |= bit.b[z];
786 if(n == D_PARAM)
787 for(z=0; z<BITS; z++)
788 params.b[z] |= bit.b[z];
789 if(v->etype != et || !(typechlpfd[et] || typev[et])) /* funny punning */
790 for(z=0; z<BITS; z++)
791 addrs.b[z] |= bit.b[z];
792 return bit;
793
794 none:
795 return zbits;
796 }
797
798 void
prop(Reg * r,Bits ref,Bits cal)799 prop(Reg *r, Bits ref, Bits cal)
800 {
801 Reg *r1, *r2;
802 int z;
803
804 for(r1 = r; r1 != R; r1 = r1->p1) {
805 for(z=0; z<BITS; z++) {
806 ref.b[z] |= r1->refahead.b[z];
807 if(ref.b[z] != r1->refahead.b[z]) {
808 r1->refahead.b[z] = ref.b[z];
809 change++;
810 }
811 cal.b[z] |= r1->calahead.b[z];
812 if(cal.b[z] != r1->calahead.b[z]) {
813 r1->calahead.b[z] = cal.b[z];
814 change++;
815 }
816 }
817 switch(r1->prog->as) {
818 case ACALL:
819 for(z=0; z<BITS; z++) {
820 cal.b[z] |= ref.b[z] | externs.b[z];
821 ref.b[z] = 0;
822 }
823 break;
824
825 case ATEXT:
826 for(z=0; z<BITS; z++) {
827 cal.b[z] = 0;
828 ref.b[z] = 0;
829 }
830 break;
831
832 case ARET:
833 for(z=0; z<BITS; z++) {
834 cal.b[z] = externs.b[z];
835 ref.b[z] = 0;
836 }
837 }
838 for(z=0; z<BITS; z++) {
839 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
840 r1->use1.b[z] | r1->use2.b[z];
841 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
842 r1->refbehind.b[z] = ref.b[z];
843 r1->calbehind.b[z] = cal.b[z];
844 }
845 if(r1->active)
846 break;
847 r1->active = 1;
848 }
849 for(; r != r1; r = r->p1)
850 for(r2 = r->p2; r2 != R; r2 = r2->p2link)
851 prop(r2, r->refbehind, r->calbehind);
852 }
853
854 /*
855 * find looping structure
856 *
857 * 1) find reverse postordering
858 * 2) find approximate dominators,
859 * the actual dominators if the flow graph is reducible
860 * otherwise, dominators plus some other non-dominators.
861 * See Matthew S. Hecht and Jeffrey D. Ullman,
862 * "Analysis of a Simple Algorithm for Global Data Flow Problems",
863 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
864 * Oct. 1-3, 1973, pp. 207-217.
865 * 3) find all nodes with a predecessor dominated by the current node.
866 * such a node is a loop head.
867 * recursively, all preds with a greater rpo number are in the loop
868 */
869 long
postorder(Reg * r,Reg ** rpo2r,long n)870 postorder(Reg *r, Reg **rpo2r, long n)
871 {
872 Reg *r1;
873
874 r->rpo = 1;
875 r1 = r->s1;
876 if(r1 && !r1->rpo)
877 n = postorder(r1, rpo2r, n);
878 r1 = r->s2;
879 if(r1 && !r1->rpo)
880 n = postorder(r1, rpo2r, n);
881 rpo2r[n] = r;
882 n++;
883 return n;
884 }
885
886 long
rpolca(long * idom,long rpo1,long rpo2)887 rpolca(long *idom, long rpo1, long rpo2)
888 {
889 long t;
890
891 if(rpo1 == -1)
892 return rpo2;
893 while(rpo1 != rpo2){
894 if(rpo1 > rpo2){
895 t = rpo2;
896 rpo2 = rpo1;
897 rpo1 = t;
898 }
899 while(rpo1 < rpo2){
900 t = idom[rpo2];
901 if(t >= rpo2)
902 fatal(Z, "bad idom");
903 rpo2 = t;
904 }
905 }
906 return rpo1;
907 }
908
909 int
doms(long * idom,long r,long s)910 doms(long *idom, long r, long s)
911 {
912 while(s > r)
913 s = idom[s];
914 return s == r;
915 }
916
917 int
loophead(long * idom,Reg * r)918 loophead(long *idom, Reg *r)
919 {
920 long src;
921
922 src = r->rpo;
923 if(r->p1 != R && doms(idom, src, r->p1->rpo))
924 return 1;
925 for(r = r->p2; r != R; r = r->p2link)
926 if(doms(idom, src, r->rpo))
927 return 1;
928 return 0;
929 }
930
931 void
loopmark(Reg ** rpo2r,long head,Reg * r)932 loopmark(Reg **rpo2r, long head, Reg *r)
933 {
934 if(r->rpo < head || r->active == head)
935 return;
936 r->active = head;
937 r->loop += LOOP;
938 if(r->p1 != R)
939 loopmark(rpo2r, head, r->p1);
940 for(r = r->p2; r != R; r = r->p2link)
941 loopmark(rpo2r, head, r);
942 }
943
944 void
loopit(Reg * r,long nr)945 loopit(Reg *r, long nr)
946 {
947 Reg *r1;
948 long i, d, me;
949
950 if(nr > maxnr) {
951 rpo2r = alloc(nr * sizeof(Reg*));
952 idom = alloc(nr * sizeof(long));
953 maxnr = nr;
954 }
955
956 d = postorder(r, rpo2r, 0);
957 if(d > nr)
958 fatal(Z, "too many reg nodes");
959 nr = d;
960 for(i = 0; i < nr / 2; i++){
961 r1 = rpo2r[i];
962 rpo2r[i] = rpo2r[nr - 1 - i];
963 rpo2r[nr - 1 - i] = r1;
964 }
965 for(i = 0; i < nr; i++)
966 rpo2r[i]->rpo = i;
967
968 idom[0] = 0;
969 for(i = 0; i < nr; i++){
970 r1 = rpo2r[i];
971 me = r1->rpo;
972 d = -1;
973 if(r1->p1 != R && r1->p1->rpo < me)
974 d = r1->p1->rpo;
975 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
976 if(r1->rpo < me)
977 d = rpolca(idom, d, r1->rpo);
978 idom[i] = d;
979 }
980
981 for(i = 0; i < nr; i++){
982 r1 = rpo2r[i];
983 r1->loop++;
984 if(r1->p2 != R && loophead(idom, r1))
985 loopmark(rpo2r, i, r1);
986 }
987 }
988
989 void
synch(Reg * r,Bits dif)990 synch(Reg *r, Bits dif)
991 {
992 Reg *r1;
993 int z;
994
995 for(r1 = r; r1 != R; r1 = r1->s1) {
996 for(z=0; z<BITS; z++) {
997 dif.b[z] = (dif.b[z] &
998 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
999 r1->set.b[z] | r1->regdiff.b[z];
1000 if(dif.b[z] != r1->regdiff.b[z]) {
1001 r1->regdiff.b[z] = dif.b[z];
1002 change++;
1003 }
1004 }
1005 if(r1->active)
1006 break;
1007 r1->active = 1;
1008 for(z=0; z<BITS; z++)
1009 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
1010 if(r1->s2 != R)
1011 synch(r1->s2, dif);
1012 }
1013 }
1014
1015 ulong
allreg(ulong b,Rgn * r)1016 allreg(ulong b, Rgn *r)
1017 {
1018 Var *v;
1019 int i;
1020
1021 v = var + r->varno;
1022 r->regno = 0;
1023 switch(v->etype) {
1024
1025 default:
1026 diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
1027 break;
1028
1029 case TCHAR:
1030 case TUCHAR:
1031 case TSHORT:
1032 case TUSHORT:
1033 case TINT:
1034 case TUINT:
1035 case TLONG:
1036 case TULONG:
1037 case TVLONG:
1038 case TUVLONG:
1039 case TIND:
1040 case TARRAY:
1041 i = BtoR(~b);
1042 if(i && r->cost > 0) {
1043 r->regno = i;
1044 return RtoB(i);
1045 }
1046 break;
1047
1048 case TDOUBLE:
1049 case TFLOAT:
1050 i = BtoF(~b);
1051 if(i && r->cost > 0) {
1052 r->regno = i;
1053 return FtoB(i);
1054 }
1055 break;
1056 }
1057 return 0;
1058 }
1059
1060 void
paint1(Reg * r,int bn)1061 paint1(Reg *r, int bn)
1062 {
1063 Reg *r1;
1064 Prog *p;
1065 int z;
1066 ulong bb;
1067
1068 z = bn/32;
1069 bb = 1L<<(bn%32);
1070 if(r->act.b[z] & bb)
1071 return;
1072 for(;;) {
1073 if(!(r->refbehind.b[z] & bb))
1074 break;
1075 r1 = r->p1;
1076 if(r1 == R)
1077 break;
1078 if(!(r1->refahead.b[z] & bb))
1079 break;
1080 if(r1->act.b[z] & bb)
1081 break;
1082 r = r1;
1083 }
1084
1085 if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
1086 change -= CLOAD * r->loop;
1087 if(debug['R'] && debug['v'])
1088 print("%ld%P\tld %B $%d\n", r->loop,
1089 r->prog, blsh(bn), change);
1090 }
1091 for(;;) {
1092 r->act.b[z] |= bb;
1093 p = r->prog;
1094
1095 if(r->use1.b[z] & bb) {
1096 change += CREF * r->loop;
1097 if(debug['R'] && debug['v'])
1098 print("%ld%P\tu1 %B $%d\n", r->loop,
1099 p, blsh(bn), change);
1100 }
1101
1102 if((r->use2.b[z]|r->set.b[z]) & bb) {
1103 change += CREF * r->loop;
1104 if(debug['R'] && debug['v'])
1105 print("%ld%P\tu2 %B $%d\n", r->loop,
1106 p, blsh(bn), change);
1107 }
1108
1109 if(STORE(r) & r->regdiff.b[z] & bb) {
1110 change -= CLOAD * r->loop;
1111 if(debug['R'] && debug['v'])
1112 print("%ld%P\tst %B $%d\n", r->loop,
1113 p, blsh(bn), change);
1114 }
1115
1116 if(r->refbehind.b[z] & bb)
1117 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1118 if(r1->refahead.b[z] & bb)
1119 paint1(r1, bn);
1120
1121 if(!(r->refahead.b[z] & bb))
1122 break;
1123 r1 = r->s2;
1124 if(r1 != R)
1125 if(r1->refbehind.b[z] & bb)
1126 paint1(r1, bn);
1127 r = r->s1;
1128 if(r == R)
1129 break;
1130 if(r->act.b[z] & bb)
1131 break;
1132 if(!(r->refbehind.b[z] & bb))
1133 break;
1134 }
1135 }
1136
1137 ulong
regset(Reg * r,ulong bb)1138 regset(Reg *r, ulong bb)
1139 {
1140 ulong b, set;
1141 Adr v;
1142 int c;
1143
1144 set = 0;
1145 v = zprog.from;
1146 while(b = bb & ~(bb-1)) {
1147 v.type = b & 0xFFFF? BtoR(b): BtoF(b);
1148 if(v.type == 0)
1149 diag(Z, "zero v.type for %#lux", b);
1150 c = copyu(r->prog, &v, A);
1151 if(c == 3)
1152 set |= b;
1153 bb &= ~b;
1154 }
1155 return set;
1156 }
1157
1158 ulong
reguse(Reg * r,ulong bb)1159 reguse(Reg *r, ulong bb)
1160 {
1161 ulong b, set;
1162 Adr v;
1163 int c;
1164
1165 set = 0;
1166 v = zprog.from;
1167 while(b = bb & ~(bb-1)) {
1168 v.type = b & 0xFFFF? BtoR(b): BtoF(b);
1169 c = copyu(r->prog, &v, A);
1170 if(c == 1 || c == 2 || c == 4)
1171 set |= b;
1172 bb &= ~b;
1173 }
1174 return set;
1175 }
1176
1177 ulong
paint2(Reg * r,int bn)1178 paint2(Reg *r, int bn)
1179 {
1180 Reg *r1;
1181 int z;
1182 ulong bb, vreg, x;
1183
1184 z = bn/32;
1185 bb = 1L << (bn%32);
1186 vreg = regbits;
1187 if(!(r->act.b[z] & bb))
1188 return vreg;
1189 for(;;) {
1190 if(!(r->refbehind.b[z] & bb))
1191 break;
1192 r1 = r->p1;
1193 if(r1 == R)
1194 break;
1195 if(!(r1->refahead.b[z] & bb))
1196 break;
1197 if(!(r1->act.b[z] & bb))
1198 break;
1199 r = r1;
1200 }
1201 for(;;) {
1202 r->act.b[z] &= ~bb;
1203
1204 vreg |= r->regu;
1205
1206 if(r->refbehind.b[z] & bb)
1207 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1208 if(r1->refahead.b[z] & bb)
1209 vreg |= paint2(r1, bn);
1210
1211 if(!(r->refahead.b[z] & bb))
1212 break;
1213 r1 = r->s2;
1214 if(r1 != R)
1215 if(r1->refbehind.b[z] & bb)
1216 vreg |= paint2(r1, bn);
1217 r = r->s1;
1218 if(r == R)
1219 break;
1220 if(!(r->act.b[z] & bb))
1221 break;
1222 if(!(r->refbehind.b[z] & bb))
1223 break;
1224 }
1225
1226 bb = vreg;
1227 for(; r; r=r->s1) {
1228 x = r->regu & ~bb;
1229 if(x) {
1230 vreg |= reguse(r, x);
1231 bb |= regset(r, x);
1232 }
1233 }
1234 return vreg;
1235 }
1236
1237 void
paint3(Reg * r,int bn,long rb,int rn)1238 paint3(Reg *r, int bn, long rb, int rn)
1239 {
1240 Reg *r1;
1241 Prog *p;
1242 int z;
1243 ulong bb;
1244
1245 z = bn/32;
1246 bb = 1L << (bn%32);
1247 if(r->act.b[z] & bb)
1248 return;
1249 for(;;) {
1250 if(!(r->refbehind.b[z] & bb))
1251 break;
1252 r1 = r->p1;
1253 if(r1 == R)
1254 break;
1255 if(!(r1->refahead.b[z] & bb))
1256 break;
1257 if(r1->act.b[z] & bb)
1258 break;
1259 r = r1;
1260 }
1261
1262 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1263 addmove(r, bn, rn, 0);
1264 for(;;) {
1265 r->act.b[z] |= bb;
1266 p = r->prog;
1267
1268 if(r->use1.b[z] & bb) {
1269 if(debug['R'])
1270 print("%P", p);
1271 addreg(&p->from, rn);
1272 if(debug['R'])
1273 print("\t.c%P\n", p);
1274 }
1275 if((r->use2.b[z]|r->set.b[z]) & bb) {
1276 if(debug['R'])
1277 print("%P", p);
1278 addreg(&p->to, rn);
1279 if(debug['R'])
1280 print("\t.c%P\n", p);
1281 }
1282
1283 if(STORE(r) & r->regdiff.b[z] & bb)
1284 addmove(r, bn, rn, 1);
1285 r->regu |= rb;
1286
1287 if(r->refbehind.b[z] & bb)
1288 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1289 if(r1->refahead.b[z] & bb)
1290 paint3(r1, bn, rb, rn);
1291
1292 if(!(r->refahead.b[z] & bb))
1293 break;
1294 r1 = r->s2;
1295 if(r1 != R)
1296 if(r1->refbehind.b[z] & bb)
1297 paint3(r1, bn, rb, rn);
1298 r = r->s1;
1299 if(r == R)
1300 break;
1301 if(r->act.b[z] & bb)
1302 break;
1303 if(!(r->refbehind.b[z] & bb))
1304 break;
1305 }
1306 }
1307
1308 void
addreg(Adr * a,int rn)1309 addreg(Adr *a, int rn)
1310 {
1311
1312 a->sym = 0;
1313 a->offset = 0;
1314 a->type = rn;
1315 }
1316
1317 long
RtoB(int r)1318 RtoB(int r)
1319 {
1320
1321 if(r < D_AX || r > D_R15)
1322 return 0;
1323 return 1L << (r-D_AX);
1324 }
1325
1326 int
BtoR(long b)1327 BtoR(long b)
1328 {
1329
1330 b &= 0xffffL;
1331 if(b == 0)
1332 return 0;
1333 return bitno(b) + D_AX;
1334 }
1335
1336 /*
1337 * bit reg
1338 * 16 X5
1339 * 17 X6
1340 * 18 X7
1341 */
1342 long
FtoB(int f)1343 FtoB(int f)
1344 {
1345 if(f < FREGMIN || f > FREGEXT)
1346 return 0;
1347 return 1L << (f - FREGMIN + 16);
1348 }
1349
1350 int
BtoF(long b)1351 BtoF(long b)
1352 {
1353
1354 b &= 0x70000L;
1355 if(b == 0)
1356 return 0;
1357 return bitno(b) - 16 + FREGMIN;
1358 }
1359