1 #include "gc.h"
2
3 Reg*
rega(void)4 rega(void)
5 {
6 Reg *r;
7
8 r = freer;
9 if(r == R) {
10 r = alloc(sizeof(*r));
11 } else
12 freer = r->link;
13
14 *r = zreg;
15 return r;
16 }
17
18 int
rcmp(void * a1,void * a2)19 rcmp(void *a1, void *a2)
20 {
21 Rgn *p1, *p2;
22 int c1, c2;
23
24 p1 = a1;
25 p2 = a2;
26 c1 = p2->cost;
27 c2 = p1->cost;
28 if(c1 -= c2)
29 return c1;
30 return p2->varno - p1->varno;
31 }
32
33 void
regopt(Prog * p)34 regopt(Prog *p)
35 {
36 Reg *r, *r1, *r2;
37 Prog *p1;
38 int i, z;
39 long initpc, val, npc;
40 ulong vreg;
41 Bits bit;
42 struct
43 {
44 long m;
45 long c;
46 Reg* p;
47 } log5[6], *lp;
48
49 firstr = R;
50 lastr = R;
51 nvar = 0;
52 regbits = 0;
53 for(z=0; z<BITS; z++) {
54 externs.b[z] = 0;
55 params.b[z] = 0;
56 consts.b[z] = 0;
57 addrs.b[z] = 0;
58 }
59
60 /*
61 * pass 1
62 * build aux data structure
63 * allocate pcs
64 * find use and set of variables
65 */
66 val = 5L * 5L * 5L * 5L * 5L;
67 lp = log5;
68 for(i=0; i<5; i++) {
69 lp->m = val;
70 lp->c = 0;
71 lp->p = R;
72 val /= 5L;
73 lp++;
74 }
75 val = 0;
76 for(; p != P; p = p->link) {
77 switch(p->as) {
78 case ADATA:
79 case AGLOBL:
80 case ANAME:
81 case ASIGNAME:
82 continue;
83 }
84 r = rega();
85 if(firstr == R) {
86 firstr = r;
87 lastr = r;
88 } else {
89 lastr->link = r;
90 r->p1 = lastr;
91 lastr->s1 = r;
92 lastr = r;
93 }
94 r->prog = p;
95 r->pc = val;
96 val++;
97
98 lp = log5;
99 for(i=0; i<5; i++) {
100 lp->c--;
101 if(lp->c <= 0) {
102 lp->c = lp->m;
103 if(lp->p != R)
104 lp->p->log5 = r;
105 lp->p = r;
106 (lp+1)->c = 0;
107 break;
108 }
109 lp++;
110 }
111
112 r1 = r->p1;
113 if(r1 != R)
114 switch(r1->prog->as) {
115 case ARETURN:
116 case ABR:
117 case ARFI:
118 r->p1 = R;
119 r1->s1 = R;
120 }
121
122 /*
123 * left side always read
124 */
125 bit = mkvar(&p->from, p->as==AMOVW);
126 for(z=0; z<BITS; z++)
127 r->use1.b[z] |= bit.b[z];
128
129 /*
130 * right side depends on opcode
131 */
132 bit = mkvar(&p->to, 0);
133 if(bany(&bit))
134 switch(p->as) {
135 default:
136 diag(Z, "reg: unknown asop: %A", p->as);
137 break;
138
139 /*
140 * right side write
141 */
142 case ANOP:
143 case AMOVB:
144 case AMOVBU:
145 case AMOVBZ:
146 case AMOVBZU:
147 case AMOVH:
148 case AMOVHBR:
149 case AMOVHU:
150 case AMOVHZ:
151 case AMOVHZU:
152 case AMOVW:
153 case AMOVWU:
154 case AFMOVD:
155 case AFMOVDCC:
156 case AFMOVDU:
157 case AFMOVS:
158 case AFMOVSU:
159 case AFRSP:
160 for(z=0; z<BITS; z++)
161 r->set.b[z] |= bit.b[z];
162 break;
163
164 /*
165 * funny
166 */
167 case ABL:
168 for(z=0; z<BITS; z++)
169 addrs.b[z] |= bit.b[z];
170 break;
171 }
172 }
173 if(firstr == R)
174 return;
175 initpc = pc - val;
176 npc = val;
177
178 /*
179 * pass 2
180 * turn branch references to pointers
181 * build back pointers
182 */
183 for(r = firstr; r != R; r = r->link) {
184 p = r->prog;
185 if(p->to.type == D_BRANCH) {
186 val = p->to.offset - initpc;
187 r1 = firstr;
188 while(r1 != R) {
189 r2 = r1->log5;
190 if(r2 != R && val >= r2->pc) {
191 r1 = r2;
192 continue;
193 }
194 if(r1->pc == val)
195 break;
196 r1 = r1->link;
197 }
198 if(r1 == R) {
199 nearln = p->lineno;
200 diag(Z, "ref not found\n%P", p);
201 continue;
202 }
203 if(r1 == r) {
204 nearln = p->lineno;
205 diag(Z, "ref to self\n%P", p);
206 continue;
207 }
208 r->s2 = r1;
209 r->p2link = r1->p2;
210 r1->p2 = r;
211 }
212 }
213 if(debug['R']) {
214 p = firstr->prog;
215 print("\n%L %D\n", p->lineno, &p->from);
216 }
217
218 /*
219 * pass 2.5
220 * find looping structure
221 */
222 for(r = firstr; r != R; r = r->link)
223 r->active = 0;
224 change = 0;
225 loopit(firstr, npc);
226 if(debug['R'] && debug['v']) {
227 print("\nlooping structure:\n");
228 for(r = firstr; r != R; r = r->link) {
229 print("%ld:%P", r->loop, r->prog);
230 for(z=0; z<BITS; z++)
231 bit.b[z] = r->use1.b[z] |
232 r->use2.b[z] | r->set.b[z];
233 if(bany(&bit)) {
234 print("\t");
235 if(bany(&r->use1))
236 print(" u1=%B", r->use1);
237 if(bany(&r->use2))
238 print(" u2=%B", r->use2);
239 if(bany(&r->set))
240 print(" st=%B", r->set);
241 }
242 print("\n");
243 }
244 }
245
246 /*
247 * pass 3
248 * iterate propagating usage
249 * back until flow graph is complete
250 */
251 loop1:
252 change = 0;
253 for(r = firstr; r != R; r = r->link)
254 r->active = 0;
255 for(r = firstr; r != R; r = r->link)
256 if(r->prog->as == ARETURN)
257 prop(r, zbits, zbits);
258 loop11:
259 /* pick up unreachable code */
260 i = 0;
261 for(r = firstr; r != R; r = r1) {
262 r1 = r->link;
263 if(r1 && r1->active && !r->active) {
264 prop(r, zbits, zbits);
265 i = 1;
266 }
267 }
268 if(i)
269 goto loop11;
270 if(change)
271 goto loop1;
272
273
274 /*
275 * pass 4
276 * iterate propagating register/variable synchrony
277 * forward until graph is complete
278 */
279 loop2:
280 change = 0;
281 for(r = firstr; r != R; r = r->link)
282 r->active = 0;
283 synch(firstr, zbits);
284 if(change)
285 goto loop2;
286
287
288 /*
289 * pass 5
290 * isolate regions
291 * calculate costs (paint1)
292 */
293 r = firstr;
294 if(r) {
295 for(z=0; z<BITS; z++)
296 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
297 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
298 if(bany(&bit)) {
299 nearln = r->prog->lineno;
300 warn(Z, "used and not set: %B", bit);
301 if(debug['R'] && !debug['w'])
302 print("used and not set: %B\n", bit);
303 }
304 }
305 if(debug['R'] && debug['v'])
306 print("\nprop structure:\n");
307 for(r = firstr; r != R; r = r->link)
308 r->act = zbits;
309 rgp = region;
310 nregion = 0;
311 for(r = firstr; r != R; r = r->link) {
312 if(debug['R'] && debug['v'])
313 print("%P\n set = %B; rah = %B; cal = %B\n",
314 r->prog, r->set, r->refahead, r->calahead);
315 for(z=0; z<BITS; z++)
316 bit.b[z] = r->set.b[z] &
317 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
318 if(bany(&bit)) {
319 nearln = r->prog->lineno;
320 warn(Z, "set and not used: %B", bit);
321 if(debug['R'])
322 print("set an not used: %B\n", bit);
323 excise(r);
324 }
325 for(z=0; z<BITS; z++)
326 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
327 while(bany(&bit)) {
328 i = bnum(bit);
329 rgp->enter = r;
330 rgp->varno = i;
331 change = 0;
332 if(debug['R'] && debug['v'])
333 print("\n");
334 paint1(r, i);
335 bit.b[i/32] &= ~(1L<<(i%32));
336 if(change <= 0) {
337 if(debug['R'])
338 print("%L$%d: %B\n",
339 r->prog->lineno, change, blsh(i));
340 continue;
341 }
342 rgp->cost = change;
343 nregion++;
344 if(nregion >= NRGN) {
345 warn(Z, "too many regions");
346 goto brk;
347 }
348 rgp++;
349 }
350 }
351 brk:
352 qsort(region, nregion, sizeof(region[0]), rcmp);
353
354 /*
355 * pass 6
356 * determine used registers (paint2)
357 * replace code (paint3)
358 */
359 rgp = region;
360 for(i=0; i<nregion; i++) {
361 bit = blsh(rgp->varno);
362 vreg = paint2(rgp->enter, rgp->varno);
363 vreg = allreg(vreg, rgp);
364 if(debug['R']) {
365 if(rgp->regno >= NREG)
366 print("%L$%d F%d: %B\n",
367 rgp->enter->prog->lineno,
368 rgp->cost,
369 rgp->regno-NREG,
370 bit);
371 else
372 print("%L$%d R%d: %B\n",
373 rgp->enter->prog->lineno,
374 rgp->cost,
375 rgp->regno,
376 bit);
377 }
378 if(rgp->regno != 0)
379 paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
380 rgp++;
381 }
382 /*
383 * pass 7
384 * peep-hole on basic block
385 */
386 if(!debug['R'] || debug['P'])
387 peep();
388
389 /*
390 * pass 8
391 * recalculate pc
392 */
393 val = initpc;
394 for(r = firstr; r != R; r = r1) {
395 r->pc = val;
396 p = r->prog;
397 p1 = P;
398 r1 = r->link;
399 if(r1 != R)
400 p1 = r1->prog;
401 for(; p != p1; p = p->link) {
402 switch(p->as) {
403 default:
404 val++;
405 break;
406
407 case ANOP:
408 case ADATA:
409 case AGLOBL:
410 case ANAME:
411 case ASIGNAME:
412 break;
413 }
414 }
415 }
416 pc = val;
417
418 /*
419 * fix up branches
420 */
421 if(debug['R'])
422 if(bany(&addrs))
423 print("addrs: %B\n", addrs);
424
425 r1 = 0; /* set */
426 for(r = firstr; r != R; r = r->link) {
427 p = r->prog;
428 if(p->to.type == D_BRANCH)
429 p->to.offset = r->s2->pc;
430 r1 = r;
431 }
432
433 /*
434 * last pass
435 * eliminate nops
436 * free aux structures
437 */
438 for(p = firstr->prog; p != P; p = p->link){
439 while(p->link && p->link->as == ANOP)
440 p->link = p->link->link;
441 }
442 if(r1 != R) {
443 r1->link = freer;
444 freer = firstr;
445 }
446 }
447
448 /*
449 * add mov b,rn
450 * just after r
451 */
452 void
addmove(Reg * r,int bn,int rn,int f)453 addmove(Reg *r, int bn, int rn, int f)
454 {
455 Prog *p, *p1;
456 Adr *a;
457 Var *v;
458
459 p1 = alloc(sizeof(*p1));
460 *p1 = zprog;
461 p = r->prog;
462
463 p1->link = p->link;
464 p->link = p1;
465 p1->lineno = p->lineno;
466
467 v = var + bn;
468
469 a = &p1->to;
470 a->sym = v->sym;
471 a->name = v->name;
472 a->offset = v->offset;
473 a->etype = v->etype;
474 a->type = D_OREG;
475 if(a->etype == TARRAY || a->sym == S)
476 a->type = D_CONST;
477
478 p1->as = AMOVW;
479 if(v->etype == TCHAR || v->etype == TUCHAR)
480 p1->as = AMOVB;
481 if(v->etype == TSHORT || v->etype == TUSHORT)
482 p1->as = AMOVH;
483 if(v->etype == TFLOAT)
484 p1->as = AFMOVS;
485 if(v->etype == TDOUBLE)
486 p1->as = AFMOVD;
487
488 p1->from.type = D_REG;
489 p1->from.reg = rn;
490 if(rn >= NREG) {
491 p1->from.type = D_FREG;
492 p1->from.reg = rn-NREG;
493 }
494 if(!f) {
495 p1->from = *a;
496 *a = zprog.from;
497 a->type = D_REG;
498 a->reg = rn;
499 if(rn >= NREG) {
500 a->type = D_FREG;
501 a->reg = rn-NREG;
502 }
503 if(v->etype == TUCHAR)
504 p1->as = AMOVBZ;
505 if(v->etype == TUSHORT)
506 p1->as = AMOVHZ;
507 }
508 if(debug['R'])
509 print("%P\t.a%P\n", p, p1);
510 }
511
512 Bits
mkvar(Adr * a,int docon)513 mkvar(Adr *a, int docon)
514 {
515 Var *v;
516 int i, t, n, et, z;
517 long o;
518 Bits bit;
519 Sym *s;
520
521 t = a->type;
522 if(t == D_REG && a->reg != NREG)
523 regbits |= RtoB(a->reg);
524 if(t == D_FREG && a->reg != NREG)
525 regbits |= FtoB(a->reg);
526 s = a->sym;
527 o = a->offset;
528 et = a->etype;
529 if(s == S) {
530 if(t != D_CONST || !docon || a->reg != NREG)
531 goto none;
532 et = TLONG;
533 }
534 if(t == D_CONST) {
535 if(s == S && sval(o))
536 goto none;
537 }
538 n = a->name;
539 v = var;
540 for(i=0; i<nvar; i++) {
541 if(s == v->sym)
542 if(n == v->name)
543 if(o == v->offset)
544 goto out;
545 v++;
546 }
547 if(s)
548 if(s->name[0] == '.')
549 goto none;
550 if(nvar >= NVAR) {
551 if(debug['w'] > 1 && s)
552 warn(Z, "variable not optimized: %s", s->name);
553 goto none;
554 }
555 i = nvar;
556 nvar++;
557 v = &var[i];
558 v->sym = s;
559 v->offset = o;
560 v->etype = et;
561 v->name = n;
562 if(debug['R'])
563 print("bit=%2d et=%2d %D\n", i, et, a);
564 out:
565 bit = blsh(i);
566 if(n == D_EXTERN || n == D_STATIC)
567 for(z=0; z<BITS; z++)
568 externs.b[z] |= bit.b[z];
569 if(n == D_PARAM)
570 for(z=0; z<BITS; z++)
571 params.b[z] |= bit.b[z];
572 if(v->etype != et || !typechlpfd[et]) /* funny punning */
573 for(z=0; z<BITS; z++)
574 addrs.b[z] |= bit.b[z];
575 if(t == D_CONST) {
576 if(s == S) {
577 for(z=0; z<BITS; z++)
578 consts.b[z] |= bit.b[z];
579 return bit;
580 }
581 if(et != TARRAY)
582 for(z=0; z<BITS; z++)
583 addrs.b[z] |= bit.b[z];
584 for(z=0; z<BITS; z++)
585 params.b[z] |= bit.b[z];
586 return bit;
587 }
588 if(t == D_OREG)
589 return bit;
590
591 none:
592 return zbits;
593 }
594
595 void
prop(Reg * r,Bits ref,Bits cal)596 prop(Reg *r, Bits ref, Bits cal)
597 {
598 Reg *r1, *r2;
599 int z;
600
601 for(r1 = r; r1 != R; r1 = r1->p1) {
602 for(z=0; z<BITS; z++) {
603 ref.b[z] |= r1->refahead.b[z];
604 if(ref.b[z] != r1->refahead.b[z]) {
605 r1->refahead.b[z] = ref.b[z];
606 change++;
607 }
608 cal.b[z] |= r1->calahead.b[z];
609 if(cal.b[z] != r1->calahead.b[z]) {
610 r1->calahead.b[z] = cal.b[z];
611 change++;
612 }
613 }
614 switch(r1->prog->as) {
615 case ABL:
616 for(z=0; z<BITS; z++) {
617 cal.b[z] |= ref.b[z] | externs.b[z];
618 ref.b[z] = 0;
619 }
620 break;
621
622 case ATEXT:
623 for(z=0; z<BITS; z++) {
624 cal.b[z] = 0;
625 ref.b[z] = 0;
626 }
627 break;
628
629 case ARETURN:
630 for(z=0; z<BITS; z++) {
631 cal.b[z] = externs.b[z];
632 ref.b[z] = 0;
633 }
634 }
635 for(z=0; z<BITS; z++) {
636 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
637 r1->use1.b[z] | r1->use2.b[z];
638 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
639 r1->refbehind.b[z] = ref.b[z];
640 r1->calbehind.b[z] = cal.b[z];
641 }
642 if(r1->active)
643 break;
644 r1->active = 1;
645 }
646 for(; r != r1; r = r->p1)
647 for(r2 = r->p2; r2 != R; r2 = r2->p2link)
648 prop(r2, r->refbehind, r->calbehind);
649 }
650
651 /*
652 * find looping structure
653 *
654 * 1) find reverse postordering
655 * 2) find approximate dominators,
656 * the actual dominators if the flow graph is reducible
657 * otherwise, dominators plus some other non-dominators.
658 * See Matthew S. Hecht and Jeffrey D. Ullman,
659 * "Analysis of a Simple Algorithm for Global Data Flow Problems",
660 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
661 * Oct. 1-3, 1973, pp. 207-217.
662 * 3) find all nodes with a predecessor dominated by the current node.
663 * such a node is a loop head.
664 * recursively, all preds with a greater rpo number are in the loop
665 */
666 long
postorder(Reg * r,Reg ** rpo2r,long n)667 postorder(Reg *r, Reg **rpo2r, long n)
668 {
669 Reg *r1;
670
671 r->rpo = 1;
672 r1 = r->s1;
673 if(r1 && !r1->rpo)
674 n = postorder(r1, rpo2r, n);
675 r1 = r->s2;
676 if(r1 && !r1->rpo)
677 n = postorder(r1, rpo2r, n);
678 rpo2r[n] = r;
679 n++;
680 return n;
681 }
682
683 long
rpolca(long * idom,long rpo1,long rpo2)684 rpolca(long *idom, long rpo1, long rpo2)
685 {
686 long t;
687
688 if(rpo1 == -1)
689 return rpo2;
690 while(rpo1 != rpo2){
691 if(rpo1 > rpo2){
692 t = rpo2;
693 rpo2 = rpo1;
694 rpo1 = t;
695 }
696 while(rpo1 < rpo2){
697 t = idom[rpo2];
698 if(t >= rpo2)
699 fatal(Z, "bad idom");
700 rpo2 = t;
701 }
702 }
703 return rpo1;
704 }
705
706 int
doms(long * idom,long r,long s)707 doms(long *idom, long r, long s)
708 {
709 while(s > r)
710 s = idom[s];
711 return s == r;
712 }
713
714 int
loophead(long * idom,Reg * r)715 loophead(long *idom, Reg *r)
716 {
717 long src;
718
719 src = r->rpo;
720 if(r->p1 != R && doms(idom, src, r->p1->rpo))
721 return 1;
722 for(r = r->p2; r != R; r = r->p2link)
723 if(doms(idom, src, r->rpo))
724 return 1;
725 return 0;
726 }
727
728 void
loopmark(Reg ** rpo2r,long head,Reg * r)729 loopmark(Reg **rpo2r, long head, Reg *r)
730 {
731 if(r->rpo < head || r->active == head)
732 return;
733 r->active = head;
734 r->loop += LOOP;
735 if(r->p1 != R)
736 loopmark(rpo2r, head, r->p1);
737 for(r = r->p2; r != R; r = r->p2link)
738 loopmark(rpo2r, head, r);
739 }
740
741 void
loopit(Reg * r,long nr)742 loopit(Reg *r, long nr)
743 {
744 Reg *r1;
745 long i, d, me;
746
747 if(nr > maxnr) {
748 rpo2r = alloc(nr * sizeof(Reg*));
749 idom = alloc(nr * sizeof(long));
750 maxnr = nr;
751 }
752
753 d = postorder(r, rpo2r, 0);
754 if(d > nr)
755 fatal(Z, "too many reg nodes");
756 nr = d;
757 for(i = 0; i < nr / 2; i++){
758 r1 = rpo2r[i];
759 rpo2r[i] = rpo2r[nr - 1 - i];
760 rpo2r[nr - 1 - i] = r1;
761 }
762 for(i = 0; i < nr; i++)
763 rpo2r[i]->rpo = i;
764
765 idom[0] = 0;
766 for(i = 0; i < nr; i++){
767 r1 = rpo2r[i];
768 me = r1->rpo;
769 d = -1;
770 if(r1->p1 != R && r1->p1->rpo < me)
771 d = r1->p1->rpo;
772 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
773 if(r1->rpo < me)
774 d = rpolca(idom, d, r1->rpo);
775 idom[i] = d;
776 }
777
778 for(i = 0; i < nr; i++){
779 r1 = rpo2r[i];
780 r1->loop++;
781 if(r1->p2 != R && loophead(idom, r1))
782 loopmark(rpo2r, i, r1);
783 }
784 }
785
786 void
synch(Reg * r,Bits dif)787 synch(Reg *r, Bits dif)
788 {
789 Reg *r1;
790 int z;
791
792 for(r1 = r; r1 != R; r1 = r1->s1) {
793 for(z=0; z<BITS; z++) {
794 dif.b[z] = (dif.b[z] &
795 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
796 r1->set.b[z] | r1->regdiff.b[z];
797 if(dif.b[z] != r1->regdiff.b[z]) {
798 r1->regdiff.b[z] = dif.b[z];
799 change++;
800 }
801 }
802 if(r1->active)
803 break;
804 r1->active = 1;
805 for(z=0; z<BITS; z++)
806 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
807 if(r1->s2 != R)
808 synch(r1->s2, dif);
809 }
810 }
811
812 ulong
allreg(ulong b,Rgn * r)813 allreg(ulong b, Rgn *r)
814 {
815 Var *v;
816 int i;
817
818 v = var + r->varno;
819 r->regno = 0;
820 switch(v->etype) {
821
822 default:
823 diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
824 break;
825
826 case TCHAR:
827 case TUCHAR:
828 case TSHORT:
829 case TUSHORT:
830 case TINT:
831 case TUINT:
832 case TLONG:
833 case TULONG:
834 case TIND:
835 case TARRAY:
836 i = BtoR(~b);
837 if(i && r->cost > 0) {
838 r->regno = i;
839 return RtoB(i);
840 }
841 break;
842
843 case TDOUBLE:
844 case TFLOAT:
845 i = BtoF(~b);
846 if(i && r->cost > 0) {
847 r->regno = i+NREG;
848 return FtoB(i);
849 }
850 break;
851 }
852 return 0;
853 }
854
855 void
paint1(Reg * r,int bn)856 paint1(Reg *r, int bn)
857 {
858 Reg *r1;
859 Prog *p;
860 int z;
861 ulong bb;
862
863 z = bn/32;
864 bb = 1L<<(bn%32);
865 if(r->act.b[z] & bb)
866 return;
867 for(;;) {
868 if(!(r->refbehind.b[z] & bb))
869 break;
870 r1 = r->p1;
871 if(r1 == R)
872 break;
873 if(!(r1->refahead.b[z] & bb))
874 break;
875 if(r1->act.b[z] & bb)
876 break;
877 r = r1;
878 }
879
880 if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
881 change -= CLOAD * r->loop;
882 if(debug['R'] && debug['v'])
883 print("%ld%P\tld %B $%d\n", r->loop,
884 r->prog, blsh(bn), change);
885 }
886 for(;;) {
887 r->act.b[z] |= bb;
888 p = r->prog;
889
890 if(r->use1.b[z] & bb) {
891 change += CREF * r->loop;
892 if(p->to.type == D_FREG && p->as == AMOVW)
893 change = -CINF; /* cant go Rreg to Freg */
894 if(debug['R'] && debug['v'])
895 print("%ld%P\tu1 %B $%d\n", r->loop,
896 p, blsh(bn), change);
897 }
898
899 if((r->use2.b[z]|r->set.b[z]) & bb) {
900 change += CREF * r->loop;
901 if(p->from.type == D_FREG && p->as == AMOVW)
902 change = -CINF; /* cant go Rreg to Freg */
903 if(debug['R'] && debug['v'])
904 print("%ld%P\tu2 %B $%d\n", r->loop,
905 p, blsh(bn), change);
906 }
907
908 if(STORE(r) & r->regdiff.b[z] & bb) {
909 change -= CLOAD * r->loop;
910 if(debug['R'] && debug['v'])
911 print("%ld%P\tst %B $%d\n", r->loop,
912 p, blsh(bn), change);
913 }
914
915 if(r->refbehind.b[z] & bb)
916 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
917 if(r1->refahead.b[z] & bb)
918 paint1(r1, bn);
919
920 if(!(r->refahead.b[z] & bb))
921 break;
922 r1 = r->s2;
923 if(r1 != R)
924 if(r1->refbehind.b[z] & bb)
925 paint1(r1, bn);
926 r = r->s1;
927 if(r == R)
928 break;
929 if(r->act.b[z] & bb)
930 break;
931 if(!(r->refbehind.b[z] & bb))
932 break;
933 }
934 }
935
936 ulong
paint2(Reg * r,int bn)937 paint2(Reg *r, int bn)
938 {
939 Reg *r1;
940 int z;
941 ulong bb, vreg;
942
943 z = bn/32;
944 bb = 1L << (bn%32);
945 vreg = regbits;
946 if(!(r->act.b[z] & bb))
947 return vreg;
948 for(;;) {
949 if(!(r->refbehind.b[z] & bb))
950 break;
951 r1 = r->p1;
952 if(r1 == R)
953 break;
954 if(!(r1->refahead.b[z] & bb))
955 break;
956 if(!(r1->act.b[z] & bb))
957 break;
958 r = r1;
959 }
960 for(;;) {
961 r->act.b[z] &= ~bb;
962
963 vreg |= r->regu;
964
965 if(r->refbehind.b[z] & bb)
966 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
967 if(r1->refahead.b[z] & bb)
968 vreg |= paint2(r1, bn);
969
970 if(!(r->refahead.b[z] & bb))
971 break;
972 r1 = r->s2;
973 if(r1 != R)
974 if(r1->refbehind.b[z] & bb)
975 vreg |= paint2(r1, bn);
976 r = r->s1;
977 if(r == R)
978 break;
979 if(!(r->act.b[z] & bb))
980 break;
981 if(!(r->refbehind.b[z] & bb))
982 break;
983 }
984 return vreg;
985 }
986
987 void
paint3(Reg * r,int bn,long rb,int rn)988 paint3(Reg *r, int bn, long rb, int rn)
989 {
990 Reg *r1;
991 Prog *p;
992 int z;
993 ulong bb;
994
995 z = bn/32;
996 bb = 1L << (bn%32);
997 if(r->act.b[z] & bb)
998 return;
999 for(;;) {
1000 if(!(r->refbehind.b[z] & bb))
1001 break;
1002 r1 = r->p1;
1003 if(r1 == R)
1004 break;
1005 if(!(r1->refahead.b[z] & bb))
1006 break;
1007 if(r1->act.b[z] & bb)
1008 break;
1009 r = r1;
1010 }
1011
1012 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1013 addmove(r, bn, rn, 0);
1014 for(;;) {
1015 r->act.b[z] |= bb;
1016 p = r->prog;
1017
1018 if(r->use1.b[z] & bb) {
1019 if(debug['R'])
1020 print("%P", p);
1021 addreg(&p->from, rn);
1022 if(debug['R'])
1023 print("\t.c%P\n", p);
1024 }
1025 if((r->use2.b[z]|r->set.b[z]) & bb) {
1026 if(debug['R'])
1027 print("%P", p);
1028 addreg(&p->to, rn);
1029 if(debug['R'])
1030 print("\t.c%P\n", p);
1031 }
1032
1033 if(STORE(r) & r->regdiff.b[z] & bb)
1034 addmove(r, bn, rn, 1);
1035 r->regu |= rb;
1036
1037 if(r->refbehind.b[z] & bb)
1038 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1039 if(r1->refahead.b[z] & bb)
1040 paint3(r1, bn, rb, rn);
1041
1042 if(!(r->refahead.b[z] & bb))
1043 break;
1044 r1 = r->s2;
1045 if(r1 != R)
1046 if(r1->refbehind.b[z] & bb)
1047 paint3(r1, bn, rb, rn);
1048 r = r->s1;
1049 if(r == R)
1050 break;
1051 if(r->act.b[z] & bb)
1052 break;
1053 if(!(r->refbehind.b[z] & bb))
1054 break;
1055 }
1056 }
1057
1058 void
addreg(Adr * a,int rn)1059 addreg(Adr *a, int rn)
1060 {
1061
1062 a->sym = 0;
1063 a->name = D_NONE;
1064 a->type = D_REG;
1065 a->reg = rn;
1066 if(rn >= NREG) {
1067 a->type = D_FREG;
1068 a->reg = rn-NREG;
1069 }
1070 }
1071
1072 /*
1073 * track register variables including external registers:
1074 * bit reg
1075 * 0 R7
1076 * 1 R8
1077 * ... ...
1078 * 21 R28
1079 */
1080 long
RtoB(int r)1081 RtoB(int r)
1082 {
1083
1084 if(r >= REGMIN && r <= REGMAX)
1085 return 1L << (r-REGMIN);
1086 return 0;
1087 }
1088
1089 int
BtoR(long b)1090 BtoR(long b)
1091 {
1092 b &= 0x001fffffL;
1093 if(b == 0)
1094 return 0;
1095 return bitno(b) + REGMIN;
1096 }
1097
1098 /*
1099 * bit reg
1100 * 22 F17
1101 * 23 F18
1102 * ... ...
1103 * 31 F26
1104 */
1105 long
FtoB(int f)1106 FtoB(int f)
1107 {
1108 if(f < FREGMIN || f > FREGEXT)
1109 return 0;
1110 return 1L << (f - FREGMIN + 22);
1111 }
1112
1113 int
BtoF(long b)1114 BtoF(long b)
1115 {
1116
1117 b &= 0xffc00000L;
1118 if(b == 0)
1119 return 0;
1120 return bitno(b) - 22 + FREGMIN;
1121 }
1122