1 #include "gc.h"
2
3 Reg*
rega(void)4 rega(void)
5 {
6 Reg *r;
7
8 r = freer;
9 if(r == R) {
10 r = alloc(sizeof(*r));
11 } else
12 freer = r->link;
13
14 *r = zreg;
15 return r;
16 }
17
18 int
rcmp(const void * a1,const void * a2)19 rcmp(const void *a1, const void *a2)
20 {
21 Rgn *p1, *p2;
22 int c1, c2;
23
24 p1 = (Rgn*)a1;
25 p2 = (Rgn*)a2;
26 c1 = p2->cost;
27 c2 = p1->cost;
28 if(c1 -= c2)
29 return c1;
30 return p2->varno - p1->varno;
31 }
32
33 void
regopt(Prog * p)34 regopt(Prog *p)
35 {
36 Reg *r, *r1, *r2;
37 Prog *p1;
38 int i, z;
39 long initpc, val, npc;
40 ulong vreg;
41 Bits bit;
42 struct
43 {
44 long m;
45 long c;
46 Reg* p;
47 } log5[6], *lp;
48
49 firstr = R;
50 lastr = R;
51 nvar = 0;
52 regbits = 0;
53 for(z=0; z<BITS; z++) {
54 externs.b[z] = 0;
55 params.b[z] = 0;
56 consts.b[z] = 0;
57 addrs.b[z] = 0;
58 }
59
60 /*
61 * pass 1
62 * build aux data structure
63 * allocate pcs
64 * find use and set of variables
65 */
66 val = 5L * 5L * 5L * 5L * 5L;
67 lp = log5;
68 for(i=0; i<5; i++) {
69 lp->m = val;
70 lp->c = 0;
71 lp->p = R;
72 val /= 5L;
73 lp++;
74 }
75 val = 0;
76 for(; p != P; p = p->link) {
77 switch(p->as) {
78 case ADATA:
79 case AGLOBL:
80 case ANAME:
81 case ASIGNAME:
82 continue;
83 }
84 r = rega();
85 if(firstr == R) {
86 firstr = r;
87 lastr = r;
88 } else {
89 lastr->link = r;
90 r->p1 = lastr;
91 lastr->s1 = r;
92 lastr = r;
93 }
94 r->prog = p;
95 r->pc = val;
96 val++;
97
98 lp = log5;
99 for(i=0; i<5; i++) {
100 lp->c--;
101 if(lp->c <= 0) {
102 lp->c = lp->m;
103 if(lp->p != R)
104 lp->p->log5 = r;
105 lp->p = r;
106 (lp+1)->c = 0;
107 break;
108 }
109 lp++;
110 }
111
112 r1 = r->p1;
113 if(r1 != R)
114 switch(r1->prog->as) {
115 case ARETURN:
116 case AJMP:
117 case ARETT:
118 r->p1 = R;
119 r1->s1 = R;
120 }
121
122 /*
123 * left side always read
124 */
125 bit = mkvar(&p->from, p->as==AMOVW);
126 for(z=0; z<BITS; z++)
127 r->use1.b[z] |= bit.b[z];
128
129 /*
130 * right side depends on opcode
131 */
132 bit = mkvar(&p->to, 0);
133 if(bany(&bit))
134 switch(p->as) {
135 default:
136 diag(Z, "reg: unknown asop: %A", p->as);
137 break;
138
139 /*
140 * right side write
141 */
142 case ANOP:
143 case AMOVB:
144 case AMOVBU:
145 case AMOVH:
146 case AMOVHU:
147 case AMOVW:
148 case AFMOVF:
149 case AFMOVD:
150 for(z=0; z<BITS; z++)
151 r->set.b[z] |= bit.b[z];
152 break;
153
154 /*
155 * funny
156 */
157 case AJMPL:
158 for(z=0; z<BITS; z++)
159 addrs.b[z] |= bit.b[z];
160 break;
161 }
162 }
163 if(firstr == R)
164 return;
165 initpc = pc - val;
166 npc = val;
167
168 /*
169 * pass 2
170 * turn branch references to pointers
171 * build back pointers
172 */
173 for(r = firstr; r != R; r = r->link) {
174 p = r->prog;
175 if(p->to.type == D_BRANCH) {
176 val = p->to.offset - initpc;
177 r1 = firstr;
178 while(r1 != R) {
179 r2 = r1->log5;
180 if(r2 != R && val >= r2->pc) {
181 r1 = r2;
182 continue;
183 }
184 if(r1->pc == val)
185 break;
186 r1 = r1->link;
187 }
188 if(r1 == R) {
189 nearln = p->lineno;
190 diag(Z, "ref not found\n%P", p);
191 continue;
192 }
193 if(r1 == r) {
194 nearln = p->lineno;
195 diag(Z, "ref to self\n%P", p);
196 continue;
197 }
198 r->s2 = r1;
199 r->p2link = r1->p2;
200 r1->p2 = r;
201 }
202 }
203 if(debug['R']) {
204 p = firstr->prog;
205 print("\n%L %D\n", p->lineno, &p->from);
206 }
207
208 /*
209 * pass 2.5
210 * find looping structure
211 */
212 for(r = firstr; r != R; r = r->link)
213 r->active = 0;
214 change = 0;
215 loopit(firstr, npc);
216 if(debug['R'] && debug['v']) {
217 print("\nlooping structure:\n");
218 for(r = firstr; r != R; r = r->link) {
219 print("%ld:%P", r->loop, r->prog);
220 for(z=0; z<BITS; z++)
221 bit.b[z] = r->use1.b[z] |
222 r->use2.b[z] | r->set.b[z];
223 if(bany(&bit)) {
224 print("\t");
225 if(bany(&r->use1))
226 print(" u1=%B", r->use1);
227 if(bany(&r->use2))
228 print(" u2=%B", r->use2);
229 if(bany(&r->set))
230 print(" st=%B", r->set);
231 }
232 print("\n");
233 }
234 }
235
236 /*
237 * pass 3
238 * iterate propagating usage
239 * back until flow graph is complete
240 */
241 loop1:
242 change = 0;
243 for(r = firstr; r != R; r = r->link)
244 r->active = 0;
245 for(r = firstr; r != R; r = r->link)
246 if(r->prog->as == ARETURN)
247 prop(r, zbits, zbits);
248 loop11:
249 /* pick up unreachable code */
250 i = 0;
251 for(r = firstr; r != R; r = r1) {
252 r1 = r->link;
253 if(r1 && r1->active && !r->active) {
254 prop(r, zbits, zbits);
255 i = 1;
256 }
257 }
258 if(i)
259 goto loop11;
260 if(change)
261 goto loop1;
262
263
264 /*
265 * pass 4
266 * iterate propagating register/variable synchrony
267 * forward until graph is complete
268 */
269 loop2:
270 change = 0;
271 for(r = firstr; r != R; r = r->link)
272 r->active = 0;
273 synch(firstr, zbits);
274 if(change)
275 goto loop2;
276
277
278 /*
279 * pass 5
280 * isolate regions
281 * calculate costs (paint1)
282 */
283 r = firstr;
284 if(r) {
285 for(z=0; z<BITS; z++)
286 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
287 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
288 if(bany(&bit)) {
289 nearln = r->prog->lineno;
290 warn(Z, "used and not set: %B", bit);
291 if(debug['R'] && !debug['w'])
292 print("used and not set: %B\n", bit);
293 }
294 }
295 if(debug['R'] && debug['v'])
296 print("\nprop structure:\n");
297 for(r = firstr; r != R; r = r->link)
298 r->act = zbits;
299 rgp = region;
300 nregion = 0;
301 for(r = firstr; r != R; r = r->link) {
302 if(debug['R'] && debug['v'])
303 print("%P\n set = %B; rah = %B; cal = %B\n",
304 r->prog, r->set, r->refahead, r->calahead);
305 for(z=0; z<BITS; z++)
306 bit.b[z] = r->set.b[z] &
307 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
308 if(bany(&bit)) {
309 nearln = r->prog->lineno;
310 warn(Z, "set and not used: %B", bit);
311 if(debug['R'])
312 print("set an not used: %B\n", bit);
313 excise(r);
314 }
315 for(z=0; z<BITS; z++)
316 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
317 while(bany(&bit)) {
318 i = bnum(bit);
319 rgp->enter = r;
320 rgp->varno = i;
321 change = 0;
322 if(debug['R'] && debug['v'])
323 print("\n");
324 paint1(r, i);
325 bit.b[i/32] &= ~(1L<<(i%32));
326 if(change <= 0) {
327 if(debug['R'])
328 print("%L$%d: %B\n",
329 r->prog->lineno, change, blsh(i));
330 continue;
331 }
332 rgp->cost = change;
333 nregion++;
334 if(nregion >= NRGN) {
335 warn(Z, "too many regions");
336 goto brk;
337 }
338 rgp++;
339 }
340 }
341 brk:
342 qsort(region, nregion, sizeof(region[0]), rcmp);
343
344 /*
345 * pass 6
346 * determine used registers (paint2)
347 * replace code (paint3)
348 */
349 rgp = region;
350 for(i=0; i<nregion; i++) {
351 bit = blsh(rgp->varno);
352 vreg = paint2(rgp->enter, rgp->varno);
353 vreg = allreg(vreg, rgp);
354 if(debug['R']) {
355 if(rgp->regno >= NREG)
356 print("%L$%d F%d: %B\n",
357 rgp->enter->prog->lineno,
358 rgp->cost,
359 rgp->regno-NREG,
360 bit);
361 else
362 print("%L$%d R%d: %B\n",
363 rgp->enter->prog->lineno,
364 rgp->cost,
365 rgp->regno,
366 bit);
367 }
368 if(rgp->regno != 0)
369 paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
370 rgp++;
371 }
372 /*
373 * pass 7
374 * peep-hole on basic block
375 */
376 if(!debug['R'] || debug['P'])
377 peep();
378
379 /*
380 * pass 8
381 * recalculate pc
382 */
383 val = initpc;
384 for(r = firstr; r != R; r = r1) {
385 r->pc = val;
386 p = r->prog;
387 p1 = P;
388 r1 = r->link;
389 if(r1 != R)
390 p1 = r1->prog;
391 for(; p != p1; p = p->link) {
392 switch(p->as) {
393 default:
394 val++;
395 break;
396
397 case ANOP:
398 case ADATA:
399 case AGLOBL:
400 case ANAME:
401 case ASIGNAME:
402 break;
403 }
404 }
405 }
406 pc = val;
407
408 /*
409 * fix up branches
410 */
411 if(debug['R'])
412 if(bany(&addrs))
413 print("addrs: %B\n", addrs);
414
415 r1 = 0; /* set */
416 for(r = firstr; r != R; r = r->link) {
417 p = r->prog;
418 if(p->to.type == D_BRANCH)
419 p->to.offset = r->s2->pc;
420 r1 = r;
421 }
422
423 /*
424 * last pass
425 * eliminate nops
426 * free aux structures
427 */
428 for(p = firstr->prog; p != P; p = p->link){
429 while(p->link && p->link->as == ANOP)
430 p->link = p->link->link;
431 }
432 if(r1 != R) {
433 r1->link = freer;
434 freer = firstr;
435 }
436 }
437
438 /*
439 * add mov b,rn
440 * just after r
441 */
442 void
addmove(Reg * r,int bn,int rn,int f)443 addmove(Reg *r, int bn, int rn, int f)
444 {
445 Prog *p, *p1;
446 Adr *a;
447 Var *v;
448
449 p1 = alloc(sizeof(*p1));
450 *p1 = zprog;
451 p = r->prog;
452
453 p1->link = p->link;
454 p->link = p1;
455 p1->lineno = p->lineno;
456
457 v = var + bn;
458
459 a = &p1->to;
460 a->sym = v->sym;
461 a->name = v->name;
462 a->offset = v->offset;
463 a->etype = v->etype;
464 a->type = D_OREG;
465 if(a->etype == TARRAY || a->sym == S)
466 a->type = D_CONST;
467
468 p1->as = AMOVW;
469 if(v->etype == TCHAR || v->etype == TUCHAR)
470 p1->as = AMOVB;
471 if(v->etype == TSHORT || v->etype == TUSHORT)
472 p1->as = AMOVH;
473 if(v->etype == TFLOAT)
474 p1->as = AFMOVF;
475 if(v->etype == TDOUBLE)
476 p1->as = AFMOVD;
477
478 p1->from.type = D_REG;
479 p1->from.reg = rn;
480 if(rn >= NREG) {
481 p1->from.type = D_FREG;
482 p1->from.reg = rn-NREG;
483 }
484 if(!f) {
485 p1->from = *a;
486 *a = zprog.from;
487 a->type = D_REG;
488 a->reg = rn;
489 if(rn >= NREG) {
490 a->type = D_FREG;
491 a->reg = rn-NREG;
492 }
493 if(v->etype == TUCHAR)
494 p1->as = AMOVBU;
495 if(v->etype == TUSHORT)
496 p1->as = AMOVHU;
497 }
498 if(debug['R'])
499 print("%P\t.a%P\n", p, p1);
500 }
501
502 Bits
mkvar(Adr * a,int docon)503 mkvar(Adr *a, int docon)
504 {
505 Var *v;
506 int i, t, n, et, z;
507 long o;
508 Bits bit;
509 Sym *s;
510
511 t = a->type;
512 if(t == D_REG && a->reg != NREG)
513 regbits |= RtoB(a->reg);
514 if(t == D_FREG && a->reg != NREG)
515 regbits |= FtoB(a->reg);
516 s = a->sym;
517 o = a->offset;
518 et = a->etype;
519 if(s == S) {
520 if(t != D_CONST || !docon || a->reg != NREG)
521 goto none;
522 et = TLONG;
523 }
524 if(t == D_CONST) {
525 if(s == S && sval(o))
526 goto none;
527 }
528 n = a->name;
529 v = var;
530 for(i=0; i<nvar; i++) {
531 if(s == v->sym)
532 if(n == v->name)
533 if(o == v->offset)
534 goto out;
535 v++;
536 }
537 if(s)
538 if(s->name[0] == '.')
539 goto none;
540 if(nvar >= NVAR) {
541 if(debug['w'] > 1 && s)
542 warn(Z, "variable not optimized: %s", s->name);
543 goto none;
544 }
545 i = nvar;
546 nvar++;
547 v = &var[i];
548 v->sym = s;
549 v->offset = o;
550 v->etype = et;
551 v->name = n;
552 if(debug['R'])
553 print("bit=%2d et=%2d %D\n", i, et, a);
554 out:
555 bit = blsh(i);
556 if(n == D_EXTERN || n == D_STATIC)
557 for(z=0; z<BITS; z++)
558 externs.b[z] |= bit.b[z];
559 if(n == D_PARAM)
560 for(z=0; z<BITS; z++)
561 params.b[z] |= bit.b[z];
562 if(v->etype != et || !typechlpfd[et]) /* funny punning */
563 for(z=0; z<BITS; z++)
564 addrs.b[z] |= bit.b[z];
565 if(t == D_CONST) {
566 if(s == S) {
567 for(z=0; z<BITS; z++)
568 consts.b[z] |= bit.b[z];
569 return bit;
570 }
571 if(et != TARRAY)
572 for(z=0; z<BITS; z++)
573 addrs.b[z] |= bit.b[z];
574 for(z=0; z<BITS; z++)
575 params.b[z] |= bit.b[z];
576 return bit;
577 }
578 if(t == D_OREG)
579 return bit;
580
581 none:
582 return zbits;
583 }
584
585 void
prop(Reg * r,Bits ref,Bits cal)586 prop(Reg *r, Bits ref, Bits cal)
587 {
588 Reg *r1, *r2;
589 int z;
590
591 for(r1 = r; r1 != R; r1 = r1->p1) {
592 for(z=0; z<BITS; z++) {
593 ref.b[z] |= r1->refahead.b[z];
594 if(ref.b[z] != r1->refahead.b[z]) {
595 r1->refahead.b[z] = ref.b[z];
596 change++;
597 }
598 cal.b[z] |= r1->calahead.b[z];
599 if(cal.b[z] != r1->calahead.b[z]) {
600 r1->calahead.b[z] = cal.b[z];
601 change++;
602 }
603 }
604 switch(r1->prog->as) {
605 case AJMPL:
606 for(z=0; z<BITS; z++) {
607 cal.b[z] |= ref.b[z] | externs.b[z];
608 ref.b[z] = 0;
609 }
610 break;
611
612 case ATEXT:
613 for(z=0; z<BITS; z++) {
614 cal.b[z] = 0;
615 ref.b[z] = 0;
616 }
617 break;
618
619 case ARETURN:
620 for(z=0; z<BITS; z++) {
621 cal.b[z] = externs.b[z];
622 ref.b[z] = 0;
623 }
624 }
625 for(z=0; z<BITS; z++) {
626 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
627 r1->use1.b[z] | r1->use2.b[z];
628 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
629 r1->refbehind.b[z] = ref.b[z];
630 r1->calbehind.b[z] = cal.b[z];
631 }
632 if(r1->active)
633 break;
634 r1->active = 1;
635 }
636 for(; r != r1; r = r->p1)
637 for(r2 = r->p2; r2 != R; r2 = r2->p2link)
638 prop(r2, r->refbehind, r->calbehind);
639 }
640
641
642 /*
643 * find looping structure
644 *
645 * 1) find reverse postordering
646 * 2) find approximate dominators,
647 * the actual dominators if the flow graph is reducible
648 * otherwise, dominators plus some other non-dominators.
649 * See Matthew S. Hecht and Jeffrey D. Ullman,
650 * "Analysis of a Simple Algorithm for Global Data Flow Problems",
651 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
652 * Oct. 1-3, 1973, pp. 207-217.
653 * 3) find all nodes with a predecessor dominated by the current node.
654 * such a node is a loop head.
655 * recursively, all preds with a greater rpo number are in the loop
656 */
657 long
postorder(Reg * r,Reg ** rpo2r,long n)658 postorder(Reg *r, Reg **rpo2r, long n)
659 {
660 Reg *r1;
661
662 r->rpo = 1;
663 r1 = r->s1;
664 if(r1 && !r1->rpo)
665 n = postorder(r1, rpo2r, n);
666 r1 = r->s2;
667 if(r1 && !r1->rpo)
668 n = postorder(r1, rpo2r, n);
669 rpo2r[n] = r;
670 n++;
671 return n;
672 }
673
674 long
rpolca(long * idom,long rpo1,long rpo2)675 rpolca(long *idom, long rpo1, long rpo2)
676 {
677 long t;
678
679 if(rpo1 == -1)
680 return rpo2;
681 while(rpo1 != rpo2){
682 if(rpo1 > rpo2){
683 t = rpo2;
684 rpo2 = rpo1;
685 rpo1 = t;
686 }
687 while(rpo1 < rpo2){
688 t = idom[rpo2];
689 if(t >= rpo2)
690 fatal(Z, "bad idom");
691 rpo2 = t;
692 }
693 }
694 return rpo1;
695 }
696
697 int
doms(long * idom,long r,long s)698 doms(long *idom, long r, long s)
699 {
700 while(s > r)
701 s = idom[s];
702 return s == r;
703 }
704
705 int
loophead(long * idom,Reg * r)706 loophead(long *idom, Reg *r)
707 {
708 long src;
709
710 src = r->rpo;
711 if(r->p1 != R && doms(idom, src, r->p1->rpo))
712 return 1;
713 for(r = r->p2; r != R; r = r->p2link)
714 if(doms(idom, src, r->rpo))
715 return 1;
716 return 0;
717 }
718
719 void
loopmark(Reg ** rpo2r,long head,Reg * r)720 loopmark(Reg **rpo2r, long head, Reg *r)
721 {
722 if(r->rpo < head || r->active == head)
723 return;
724 r->active = head;
725 r->loop += LOOP;
726 if(r->p1 != R)
727 loopmark(rpo2r, head, r->p1);
728 for(r = r->p2; r != R; r = r->p2link)
729 loopmark(rpo2r, head, r);
730 }
731
732 void
loopit(Reg * r,long nr)733 loopit(Reg *r, long nr)
734 {
735 Reg *r1;
736 long i, d, me;
737
738 if(nr > maxnr) {
739 rpo2r = alloc(nr * sizeof(Reg*));
740 idom = alloc(nr * sizeof(long));
741 maxnr = nr;
742 }
743
744 d = postorder(r, rpo2r, 0);
745 if(d > nr)
746 fatal(Z, "too many reg nodes");
747 nr = d;
748 for(i = 0; i < nr / 2; i++){
749 r1 = rpo2r[i];
750 rpo2r[i] = rpo2r[nr - 1 - i];
751 rpo2r[nr - 1 - i] = r1;
752 }
753 for(i = 0; i < nr; i++)
754 rpo2r[i]->rpo = i;
755
756 idom[0] = 0;
757 for(i = 0; i < nr; i++){
758 r1 = rpo2r[i];
759 me = r1->rpo;
760 d = -1;
761 if(r1->p1 != R && r1->p1->rpo < me)
762 d = r1->p1->rpo;
763 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
764 if(r1->rpo < me)
765 d = rpolca(idom, d, r1->rpo);
766 idom[i] = d;
767 }
768
769 for(i = 0; i < nr; i++){
770 r1 = rpo2r[i];
771 r1->loop++;
772 if(r1->p2 != R && loophead(idom, r1))
773 loopmark(rpo2r, i, r1);
774 }
775 }
776
777 void
synch(Reg * r,Bits dif)778 synch(Reg *r, Bits dif)
779 {
780 Reg *r1;
781 int z;
782
783 for(r1 = r; r1 != R; r1 = r1->s1) {
784 for(z=0; z<BITS; z++) {
785 dif.b[z] = (dif.b[z] &
786 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
787 r1->set.b[z] | r1->regdiff.b[z];
788 if(dif.b[z] != r1->regdiff.b[z]) {
789 r1->regdiff.b[z] = dif.b[z];
790 change++;
791 }
792 }
793 if(r1->active)
794 break;
795 r1->active = 1;
796 for(z=0; z<BITS; z++)
797 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
798 if(r1->s2 != R)
799 synch(r1->s2, dif);
800 }
801 }
802
803 ulong
allreg(ulong b,Rgn * r)804 allreg(ulong b, Rgn *r)
805 {
806 Var *v;
807 int i;
808
809 v = var + r->varno;
810 r->regno = 0;
811 switch(v->etype) {
812
813 default:
814 diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
815 break;
816
817 case TCHAR:
818 case TUCHAR:
819 case TSHORT:
820 case TUSHORT:
821 case TINT:
822 case TUINT:
823 case TLONG:
824 case TULONG:
825 case TIND:
826 case TARRAY:
827 i = BtoR(~b);
828 if(i && r->cost > 0) {
829 r->regno = i;
830 return RtoB(i);
831 }
832 break;
833
834 case TDOUBLE:
835 case TFLOAT:
836 i = BtoF(~b);
837 if(i && r->cost > 0) {
838 r->regno = i+NREG;
839 return FtoB(i);
840 }
841 break;
842 }
843 return 0;
844 }
845
846 void
paint1(Reg * r,int bn)847 paint1(Reg *r, int bn)
848 {
849 Reg *r1;
850 Prog *p;
851 int z;
852 ulong bb;
853
854 z = bn/32;
855 bb = 1L<<(bn%32);
856 if(r->act.b[z] & bb)
857 return;
858 for(;;) {
859 if(!(r->refbehind.b[z] & bb))
860 break;
861 r1 = r->p1;
862 if(r1 == R)
863 break;
864 if(!(r1->refahead.b[z] & bb))
865 break;
866 if(r1->act.b[z] & bb)
867 break;
868 r = r1;
869 }
870
871 if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
872 change -= CLOAD * r->loop;
873 if(debug['R'] && debug['v'])
874 print("%ld%P\tld %B $%d\n", r->loop,
875 r->prog, blsh(bn), change);
876 }
877 for(;;) {
878 r->act.b[z] |= bb;
879 p = r->prog;
880
881 if(r->use1.b[z] & bb) {
882 change += CREF * r->loop;
883 if(p->to.type == D_FREG && p->as == AMOVW)
884 change = -CINF; /* cant go Rreg to Freg */
885 if(debug['R'] && debug['v'])
886 print("%ld%P\tu1 %B $%d\n", r->loop,
887 p, blsh(bn), change);
888 }
889
890 if((r->use2.b[z]|r->set.b[z]) & bb) {
891 change += CREF * r->loop;
892 if(p->from.type == D_FREG && p->as == AMOVW)
893 change = -CINF; /* cant go Rreg to Freg */
894 if(debug['R'] && debug['v'])
895 print("%ld%P\tu2 %B $%d\n", r->loop,
896 p, blsh(bn), change);
897 }
898
899 if(STORE(r) & r->regdiff.b[z] & bb) {
900 change -= CLOAD * r->loop;
901 if(debug['R'] && debug['v'])
902 print("%ld%P\tst %B $%d\n", r->loop,
903 p, blsh(bn), change);
904 }
905
906 if(r->refbehind.b[z] & bb)
907 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
908 if(r1->refahead.b[z] & bb)
909 paint1(r1, bn);
910
911 if(!(r->refahead.b[z] & bb))
912 break;
913 r1 = r->s2;
914 if(r1 != R)
915 if(r1->refbehind.b[z] & bb)
916 paint1(r1, bn);
917 r = r->s1;
918 if(r == R)
919 break;
920 if(r->act.b[z] & bb)
921 break;
922 if(!(r->refbehind.b[z] & bb))
923 break;
924 }
925 }
926
927 ulong
paint2(Reg * r,int bn)928 paint2(Reg *r, int bn)
929 {
930 Reg *r1;
931 int z;
932 ulong bb, vreg;
933
934 z = bn/32;
935 bb = 1L << (bn%32);
936 vreg = regbits;
937 if(!(r->act.b[z] & bb))
938 return vreg;
939 for(;;) {
940 if(!(r->refbehind.b[z] & bb))
941 break;
942 r1 = r->p1;
943 if(r1 == R)
944 break;
945 if(!(r1->refahead.b[z] & bb))
946 break;
947 if(!(r1->act.b[z] & bb))
948 break;
949 r = r1;
950 }
951 for(;;) {
952 r->act.b[z] &= ~bb;
953
954 vreg |= r->regu;
955
956 if(r->refbehind.b[z] & bb)
957 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
958 if(r1->refahead.b[z] & bb)
959 vreg |= paint2(r1, bn);
960
961 if(!(r->refahead.b[z] & bb))
962 break;
963 r1 = r->s2;
964 if(r1 != R)
965 if(r1->refbehind.b[z] & bb)
966 vreg |= paint2(r1, bn);
967 r = r->s1;
968 if(r == R)
969 break;
970 if(!(r->act.b[z] & bb))
971 break;
972 if(!(r->refbehind.b[z] & bb))
973 break;
974 }
975 return vreg;
976 }
977
978 void
paint3(Reg * r,int bn,long rb,int rn)979 paint3(Reg *r, int bn, long rb, int rn)
980 {
981 Reg *r1;
982 Prog *p;
983 int z;
984 ulong bb;
985
986 z = bn/32;
987 bb = 1L << (bn%32);
988 if(r->act.b[z] & bb)
989 return;
990 for(;;) {
991 if(!(r->refbehind.b[z] & bb))
992 break;
993 r1 = r->p1;
994 if(r1 == R)
995 break;
996 if(!(r1->refahead.b[z] & bb))
997 break;
998 if(r1->act.b[z] & bb)
999 break;
1000 r = r1;
1001 }
1002
1003 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1004 addmove(r, bn, rn, 0);
1005 for(;;) {
1006 r->act.b[z] |= bb;
1007 p = r->prog;
1008
1009 if(r->use1.b[z] & bb) {
1010 if(debug['R'])
1011 print("%P", p);
1012 addreg(&p->from, rn);
1013 if(debug['R'])
1014 print("\t.c%P\n", p);
1015 }
1016 if((r->use2.b[z]|r->set.b[z]) & bb) {
1017 if(debug['R'])
1018 print("%P", p);
1019 addreg(&p->to, rn);
1020 if(debug['R'])
1021 print("\t.c%P\n", p);
1022 }
1023
1024 if(STORE(r) & r->regdiff.b[z] & bb)
1025 addmove(r, bn, rn, 1);
1026 r->regu |= rb;
1027
1028 if(r->refbehind.b[z] & bb)
1029 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1030 if(r1->refahead.b[z] & bb)
1031 paint3(r1, bn, rb, rn);
1032
1033 if(!(r->refahead.b[z] & bb))
1034 break;
1035 r1 = r->s2;
1036 if(r1 != R)
1037 if(r1->refbehind.b[z] & bb)
1038 paint3(r1, bn, rb, rn);
1039 r = r->s1;
1040 if(r == R)
1041 break;
1042 if(r->act.b[z] & bb)
1043 break;
1044 if(!(r->refbehind.b[z] & bb))
1045 break;
1046 }
1047 }
1048
1049 void
addreg(Adr * a,int rn)1050 addreg(Adr *a, int rn)
1051 {
1052
1053 a->sym = 0;
1054 a->name = D_NONE;
1055 a->type = D_REG;
1056 a->reg = rn;
1057 if(rn >= NREG) {
1058 a->type = D_FREG;
1059 a->reg = rn-NREG;
1060 }
1061 }
1062
1063 /*
1064 * bit reg
1065 * 0 R9
1066 * 1 R10
1067 * ... ...
1068 * 4 R13
1069 * 5 R16
1070 * ... ...
1071 * 20 R31
1072 */
1073 long
RtoB(int r)1074 RtoB(int r)
1075 {
1076
1077 if(r >= 9 && r <= 13)
1078 return 1L << (r-9);
1079 if(r >= 16 && r <= 31)
1080 return 1L << (r-11);
1081 return 0;
1082 }
1083
1084 int
BtoR(long b)1085 BtoR(long b)
1086 {
1087 int r;
1088
1089 b &= 0x001fffffL;
1090 if(b == 0)
1091 return 0;
1092 r = bitno(b) + 9;
1093 if(r >= 14)
1094 r += 2;
1095 return r;
1096 }
1097
1098 /*
1099 * bit reg
1100 * 22 F4
1101 * 23 F6
1102 * ... ...
1103 * 31 F22
1104 */
1105 long
FtoB(int f)1106 FtoB(int f)
1107 {
1108
1109 if(f < 4 || f > 22 || (f&1))
1110 return 0;
1111 return 1L << (f/2 + 20);
1112 }
1113
BtoF(long b)1114 BtoF(long b)
1115 {
1116
1117 b &= 0xffc00000L;
1118 if(b == 0)
1119 return 0;
1120 return bitno(b)*2 - 40;
1121 }
1122