1 #include "gc.h"
2
3 Reg*
rega(void)4 rega(void)
5 {
6 Reg *r;
7
8 r = freer;
9 if(r == R) {
10 r = alloc(sizeof(*r));
11 } else
12 freer = r->link;
13
14 *r = zreg;
15 return r;
16 }
17
18 int
rcmp(void * a1,void * a2)19 rcmp(void *a1, void *a2)
20 {
21 Rgn *p1, *p2;
22 int c1, c2;
23
24 p1 = a1;
25 p2 = a2;
26 c1 = p2->cost;
27 c2 = p1->cost;
28 if(c1 -= c2)
29 return c1;
30 return p2->varno - p1->varno;
31 }
32
33 void
regopt(Prog * p)34 regopt(Prog *p)
35 {
36 Reg *r, *r1, *r2;
37 Prog *p1;
38 int i, z;
39 long initpc, val, npc;
40 ulong vreg;
41 Bits bit;
42 struct
43 {
44 long m;
45 long c;
46 Reg* p;
47 } log5[6], *lp;
48
49 firstr = R;
50 lastr = R;
51 nvar = 0;
52 regbits = 0;
53 for(z=0; z<BITS; z++) {
54 externs.b[z] = 0;
55 params.b[z] = 0;
56 consts.b[z] = 0;
57 addrs.b[z] = 0;
58 }
59
60 /*
61 * pass 1
62 * build aux data structure
63 * allocate pcs
64 * find use and set of variables
65 */
66 val = 5L * 5L * 5L * 5L * 5L;
67 lp = log5;
68 for(i=0; i<5; i++) {
69 lp->m = val;
70 lp->c = 0;
71 lp->p = R;
72 val /= 5L;
73 lp++;
74 }
75 val = 0;
76 for(; p != P; p = p->link) {
77 switch(p->as) {
78 case ADATA:
79 case AGLOBL:
80 case ANAME:
81 case ASIGNAME:
82 continue;
83 }
84 r = rega();
85 if(firstr == R) {
86 firstr = r;
87 lastr = r;
88 } else {
89 lastr->link = r;
90 r->p1 = lastr;
91 lastr->s1 = r;
92 lastr = r;
93 }
94 r->prog = p;
95 r->pc = val;
96 val++;
97
98 lp = log5;
99 for(i=0; i<5; i++) {
100 lp->c--;
101 if(lp->c <= 0) {
102 lp->c = lp->m;
103 if(lp->p != R)
104 lp->p->log5 = r;
105 lp->p = r;
106 (lp+1)->c = 0;
107 break;
108 }
109 lp++;
110 }
111
112 r1 = r->p1;
113 if(r1 != R)
114 switch(r1->prog->as) {
115 case ARETURN:
116 case ABR:
117 case ARFI:
118 case ARFCI:
119 case ARFID:
120 r->p1 = R;
121 r1->s1 = R;
122 }
123
124 /*
125 * left side always read
126 */
127 bit = mkvar(&p->from, p->as==AMOVW || p->as == AMOVWZ || p->as == AMOVD);
128 for(z=0; z<BITS; z++)
129 r->use1.b[z] |= bit.b[z];
130
131 /*
132 * right side depends on opcode
133 */
134 bit = mkvar(&p->to, 0);
135 if(bany(&bit))
136 switch(p->as) {
137 default:
138 diag(Z, "reg: unknown asop: %A", p->as);
139 break;
140
141 /*
142 * right side write
143 */
144 case ANOP:
145 case AMOVB:
146 case AMOVBU:
147 case AMOVBZ:
148 case AMOVBZU:
149 case AMOVH:
150 case AMOVHBR:
151 case AMOVWBR:
152 case AMOVHU:
153 case AMOVHZ:
154 case AMOVHZU:
155 case AMOVW:
156 case AMOVWU:
157 case AMOVWZ:
158 case AMOVWZU:
159 case AMOVD:
160 case AMOVDU:
161 case AFMOVD:
162 case AFMOVDCC:
163 case AFMOVDU:
164 case AFMOVS:
165 case AFMOVSU:
166 case AFRSP:
167 for(z=0; z<BITS; z++)
168 r->set.b[z] |= bit.b[z];
169 break;
170
171 /*
172 * funny
173 */
174 case ABL:
175 for(z=0; z<BITS; z++)
176 addrs.b[z] |= bit.b[z];
177 break;
178 }
179 }
180 if(firstr == R)
181 return;
182 initpc = pc - val;
183 npc = val;
184
185 /*
186 * pass 2
187 * turn branch references to pointers
188 * build back pointers
189 */
190 for(r = firstr; r != R; r = r->link) {
191 p = r->prog;
192 if(p->to.type == D_BRANCH) {
193 val = p->to.offset - initpc;
194 r1 = firstr;
195 while(r1 != R) {
196 r2 = r1->log5;
197 if(r2 != R && val >= r2->pc) {
198 r1 = r2;
199 continue;
200 }
201 if(r1->pc == val)
202 break;
203 r1 = r1->link;
204 }
205 if(r1 == R) {
206 nearln = p->lineno;
207 diag(Z, "ref not found\n%P", p);
208 continue;
209 }
210 if(r1 == r) {
211 nearln = p->lineno;
212 diag(Z, "ref to self\n%P", p);
213 continue;
214 }
215 r->s2 = r1;
216 r->p2link = r1->p2;
217 r1->p2 = r;
218 }
219 }
220 if(debug['R']) {
221 p = firstr->prog;
222 print("\n%L %D\n", p->lineno, &p->from);
223 }
224
225 /*
226 * pass 2.5
227 * find looping structure
228 */
229 for(r = firstr; r != R; r = r->link)
230 r->active = 0;
231 change = 0;
232 loopit(firstr, npc);
233 if(debug['R'] && debug['v']) {
234 print("\nlooping structure:\n");
235 for(r = firstr; r != R; r = r->link) {
236 print("%ld:%P", r->loop, r->prog);
237 for(z=0; z<BITS; z++)
238 bit.b[z] = r->use1.b[z] |
239 r->use2.b[z] | r->set.b[z];
240 if(bany(&bit)) {
241 print("\t");
242 if(bany(&r->use1))
243 print(" u1=%B", r->use1);
244 if(bany(&r->use2))
245 print(" u2=%B", r->use2);
246 if(bany(&r->set))
247 print(" st=%B", r->set);
248 }
249 print("\n");
250 }
251 }
252
253 /*
254 * pass 3
255 * iterate propagating usage
256 * back until flow graph is complete
257 */
258 loop1:
259 change = 0;
260 for(r = firstr; r != R; r = r->link)
261 r->active = 0;
262 for(r = firstr; r != R; r = r->link)
263 if(r->prog->as == ARETURN)
264 prop(r, zbits, zbits);
265 loop11:
266 /* pick up unreachable code */
267 i = 0;
268 for(r = firstr; r != R; r = r1) {
269 r1 = r->link;
270 if(r1 && r1->active && !r->active) {
271 prop(r, zbits, zbits);
272 i = 1;
273 }
274 }
275 if(i)
276 goto loop11;
277 if(change)
278 goto loop1;
279
280
281 /*
282 * pass 4
283 * iterate propagating register/variable synchrony
284 * forward until graph is complete
285 */
286 loop2:
287 change = 0;
288 for(r = firstr; r != R; r = r->link)
289 r->active = 0;
290 synch(firstr, zbits);
291 if(change)
292 goto loop2;
293
294
295 /*
296 * pass 5
297 * isolate regions
298 * calculate costs (paint1)
299 */
300 r = firstr;
301 if(r) {
302 for(z=0; z<BITS; z++)
303 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
304 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
305 if(bany(&bit)) {
306 nearln = r->prog->lineno;
307 warn(Z, "used and not set: %B", bit);
308 if(debug['R'] && !debug['w'])
309 print("used and not set: %B\n", bit);
310 }
311 }
312 if(debug['R'] && debug['v'])
313 print("\nprop structure:\n");
314 for(r = firstr; r != R; r = r->link)
315 r->act = zbits;
316 rgp = region;
317 nregion = 0;
318 for(r = firstr; r != R; r = r->link) {
319 if(debug['R'] && debug['v'])
320 print("%P\n set = %B; rah = %B; cal = %B\n",
321 r->prog, r->set, r->refahead, r->calahead);
322 for(z=0; z<BITS; z++)
323 bit.b[z] = r->set.b[z] &
324 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
325 if(bany(&bit)) {
326 nearln = r->prog->lineno;
327 warn(Z, "set and not used: %B", bit);
328 if(debug['R'])
329 print("set an not used: %B\n", bit);
330 excise(r);
331 }
332 for(z=0; z<BITS; z++)
333 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
334 while(bany(&bit)) {
335 i = bnum(bit);
336 rgp->enter = r;
337 rgp->varno = i;
338 change = 0;
339 if(debug['R'] && debug['v'])
340 print("\n");
341 paint1(r, i);
342 bit.b[i/32] &= ~(1L<<(i%32));
343 if(change <= 0) {
344 if(debug['R'])
345 print("%L$%d: %B\n",
346 r->prog->lineno, change, blsh(i));
347 continue;
348 }
349 rgp->cost = change;
350 nregion++;
351 if(nregion >= NRGN) {
352 warn(Z, "too many regions");
353 goto brk;
354 }
355 rgp++;
356 }
357 }
358 brk:
359 qsort(region, nregion, sizeof(region[0]), rcmp);
360
361 /*
362 * pass 6
363 * determine used registers (paint2)
364 * replace code (paint3)
365 */
366 rgp = region;
367 for(i=0; i<nregion; i++) {
368 bit = blsh(rgp->varno);
369 vreg = paint2(rgp->enter, rgp->varno);
370 vreg = allreg(vreg, rgp);
371 if(debug['R']) {
372 if(rgp->regno >= NREG)
373 print("%L$%d F%d: %B\n",
374 rgp->enter->prog->lineno,
375 rgp->cost,
376 rgp->regno-NREG,
377 bit);
378 else
379 print("%L$%d R%d: %B\n",
380 rgp->enter->prog->lineno,
381 rgp->cost,
382 rgp->regno,
383 bit);
384 }
385 if(rgp->regno != 0)
386 paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
387 rgp++;
388 }
389 /*
390 * pass 7
391 * peep-hole on basic block
392 */
393 if(!debug['R'] || debug['P'])
394 peep();
395
396 /*
397 * pass 8
398 * recalculate pc
399 */
400 val = initpc;
401 for(r = firstr; r != R; r = r1) {
402 r->pc = val;
403 p = r->prog;
404 p1 = P;
405 r1 = r->link;
406 if(r1 != R)
407 p1 = r1->prog;
408 for(; p != p1; p = p->link) {
409 switch(p->as) {
410 default:
411 val++;
412 break;
413
414 case ANOP:
415 case ADATA:
416 case AGLOBL:
417 case ANAME:
418 case ASIGNAME:
419 break;
420 }
421 }
422 }
423 pc = val;
424
425 /*
426 * fix up branches
427 */
428 if(debug['R'])
429 if(bany(&addrs))
430 print("addrs: %B\n", addrs);
431
432 r1 = 0; /* set */
433 for(r = firstr; r != R; r = r->link) {
434 p = r->prog;
435 if(p->to.type == D_BRANCH)
436 p->to.offset = r->s2->pc;
437 r1 = r;
438 }
439
440 /*
441 * last pass
442 * eliminate nops
443 * free aux structures
444 */
445 for(p = firstr->prog; p != P; p = p->link){
446 while(p->link && p->link->as == ANOP)
447 p->link = p->link->link;
448 }
449 if(r1 != R) {
450 r1->link = freer;
451 freer = firstr;
452 }
453 }
454
455 /*
456 * add mov b,rn
457 * just after r
458 */
459 void
addmove(Reg * r,int bn,int rn,int f)460 addmove(Reg *r, int bn, int rn, int f)
461 {
462 Prog *p, *p1;
463 Adr *a;
464 Var *v;
465
466 p1 = alloc(sizeof(*p1));
467 *p1 = zprog;
468 p = r->prog;
469
470 p1->link = p->link;
471 p->link = p1;
472 p1->lineno = p->lineno;
473
474 v = var + bn;
475
476 a = &p1->to;
477 a->sym = v->sym;
478 a->name = v->name;
479 a->offset = v->offset;
480 a->etype = v->etype;
481 a->type = D_OREG;
482 if(a->etype == TARRAY || a->sym == S)
483 a->type = D_CONST;
484
485 p1->as = AMOVW;
486 if(v->etype == TCHAR || v->etype == TUCHAR)
487 p1->as = AMOVB;
488 if(v->etype == TSHORT || v->etype == TUSHORT)
489 p1->as = AMOVH;
490 if(v->etype == TVLONG || v->etype == TUVLONG || v->etype == TIND)
491 p1->as = AMOVD;
492 if(v->etype == TFLOAT)
493 p1->as = AFMOVS;
494 if(v->etype == TDOUBLE)
495 p1->as = AFMOVD;
496
497 p1->from.type = D_REG;
498 p1->from.reg = rn;
499 if(rn >= NREG) {
500 p1->from.type = D_FREG;
501 p1->from.reg = rn-NREG;
502 }
503 if(!f) {
504 p1->from = *a;
505 *a = zprog.from;
506 a->type = D_REG;
507 a->reg = rn;
508 if(rn >= NREG) {
509 a->type = D_FREG;
510 a->reg = rn-NREG;
511 }
512 if(v->etype == TUCHAR)
513 p1->as = AMOVBZ;
514 if(v->etype == TUSHORT)
515 p1->as = AMOVHZ;
516 if(v->etype == TUINT || v->etype == TULONG)
517 p1->as = AMOVWZ;
518 }
519 if(debug['R'])
520 print("%P\t.a%P\n", p, p1);
521 }
522
523 Bits
mkvar(Adr * a,int docon)524 mkvar(Adr *a, int docon)
525 {
526 Var *v;
527 int i, t, n, et, z;
528 long o;
529 Bits bit;
530 Sym *s;
531
532 t = a->type;
533 if(t == D_REG && a->reg != NREG)
534 regbits |= RtoB(a->reg);
535 if(t == D_FREG && a->reg != NREG)
536 regbits |= FtoB(a->reg);
537 s = a->sym;
538 o = a->offset;
539 et = a->etype;
540 if(s == S) {
541 if(t != D_CONST || !docon || a->reg != NREG)
542 goto none;
543 et = TLONG;
544 }
545 if(t == D_CONST) {
546 if(s == S && sval(o))
547 goto none;
548 }
549 n = a->name;
550 v = var;
551 for(i=0; i<nvar; i++) {
552 if(s == v->sym)
553 if(n == v->name)
554 if(o == v->offset)
555 goto out;
556 v++;
557 }
558 if(s)
559 if(s->name[0] == '.')
560 goto none;
561 if(nvar >= NVAR) {
562 if(debug['w'] > 1 && s)
563 warn(Z, "variable not optimized: %s", s->name);
564 goto none;
565 }
566 i = nvar;
567 nvar++;
568 v = &var[i];
569 v->sym = s;
570 v->offset = o;
571 v->etype = et;
572 v->name = n;
573 if(debug['R'])
574 print("bit=%2d et=%2d %D\n", i, et, a);
575 out:
576 bit = blsh(i);
577 if(n == D_EXTERN || n == D_STATIC)
578 for(z=0; z<BITS; z++)
579 externs.b[z] |= bit.b[z];
580 if(n == D_PARAM)
581 for(z=0; z<BITS; z++)
582 params.b[z] |= bit.b[z];
583 if(v->etype != et || !(typechlpfd[et] || typev[et])) /* funny punning */
584 for(z=0; z<BITS; z++)
585 addrs.b[z] |= bit.b[z];
586 if(t == D_CONST) {
587 if(s == S) {
588 for(z=0; z<BITS; z++)
589 consts.b[z] |= bit.b[z];
590 return bit;
591 }
592 if(et != TARRAY)
593 for(z=0; z<BITS; z++)
594 addrs.b[z] |= bit.b[z];
595 for(z=0; z<BITS; z++)
596 params.b[z] |= bit.b[z];
597 return bit;
598 }
599 if(t == D_OREG)
600 return bit;
601
602 none:
603 return zbits;
604 }
605
606 void
prop(Reg * r,Bits ref,Bits cal)607 prop(Reg *r, Bits ref, Bits cal)
608 {
609 Reg *r1, *r2;
610 int z;
611
612 for(r1 = r; r1 != R; r1 = r1->p1) {
613 for(z=0; z<BITS; z++) {
614 ref.b[z] |= r1->refahead.b[z];
615 if(ref.b[z] != r1->refahead.b[z]) {
616 r1->refahead.b[z] = ref.b[z];
617 change++;
618 }
619 cal.b[z] |= r1->calahead.b[z];
620 if(cal.b[z] != r1->calahead.b[z]) {
621 r1->calahead.b[z] = cal.b[z];
622 change++;
623 }
624 }
625 switch(r1->prog->as) {
626 case ABL:
627 for(z=0; z<BITS; z++) {
628 cal.b[z] |= ref.b[z] | externs.b[z];
629 ref.b[z] = 0;
630 }
631 break;
632
633 case ATEXT:
634 for(z=0; z<BITS; z++) {
635 cal.b[z] = 0;
636 ref.b[z] = 0;
637 }
638 break;
639
640 case ARETURN:
641 for(z=0; z<BITS; z++) {
642 cal.b[z] = externs.b[z];
643 ref.b[z] = 0;
644 }
645 }
646 for(z=0; z<BITS; z++) {
647 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
648 r1->use1.b[z] | r1->use2.b[z];
649 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
650 r1->refbehind.b[z] = ref.b[z];
651 r1->calbehind.b[z] = cal.b[z];
652 }
653 if(r1->active)
654 break;
655 r1->active = 1;
656 }
657 for(; r != r1; r = r->p1)
658 for(r2 = r->p2; r2 != R; r2 = r2->p2link)
659 prop(r2, r->refbehind, r->calbehind);
660 }
661
662 /*
663 * find looping structure
664 *
665 * 1) find reverse postordering
666 * 2) find approximate dominators,
667 * the actual dominators if the flow graph is reducible
668 * otherwise, dominators plus some other non-dominators.
669 * See Matthew S. Hecht and Jeffrey D. Ullman,
670 * "Analysis of a Simple Algorithm for Global Data Flow Problems",
671 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
672 * Oct. 1-3, 1973, pp. 207-217.
673 * 3) find all nodes with a predecessor dominated by the current node.
674 * such a node is a loop head.
675 * recursively, all preds with a greater rpo number are in the loop
676 */
677 long
postorder(Reg * r,Reg ** rpo2r,long n)678 postorder(Reg *r, Reg **rpo2r, long n)
679 {
680 Reg *r1;
681
682 r->rpo = 1;
683 r1 = r->s1;
684 if(r1 && !r1->rpo)
685 n = postorder(r1, rpo2r, n);
686 r1 = r->s2;
687 if(r1 && !r1->rpo)
688 n = postorder(r1, rpo2r, n);
689 rpo2r[n] = r;
690 n++;
691 return n;
692 }
693
694 long
rpolca(long * idom,long rpo1,long rpo2)695 rpolca(long *idom, long rpo1, long rpo2)
696 {
697 long t;
698
699 if(rpo1 == -1)
700 return rpo2;
701 while(rpo1 != rpo2){
702 if(rpo1 > rpo2){
703 t = rpo2;
704 rpo2 = rpo1;
705 rpo1 = t;
706 }
707 while(rpo1 < rpo2){
708 t = idom[rpo2];
709 if(t >= rpo2)
710 fatal(Z, "bad idom");
711 rpo2 = t;
712 }
713 }
714 return rpo1;
715 }
716
717 int
doms(long * idom,long r,long s)718 doms(long *idom, long r, long s)
719 {
720 while(s > r)
721 s = idom[s];
722 return s == r;
723 }
724
725 int
loophead(long * idom,Reg * r)726 loophead(long *idom, Reg *r)
727 {
728 long src;
729
730 src = r->rpo;
731 if(r->p1 != R && doms(idom, src, r->p1->rpo))
732 return 1;
733 for(r = r->p2; r != R; r = r->p2link)
734 if(doms(idom, src, r->rpo))
735 return 1;
736 return 0;
737 }
738
739 void
loopmark(Reg ** rpo2r,long head,Reg * r)740 loopmark(Reg **rpo2r, long head, Reg *r)
741 {
742 if(r->rpo < head || r->active == head)
743 return;
744 r->active = head;
745 r->loop += LOOP;
746 if(r->p1 != R)
747 loopmark(rpo2r, head, r->p1);
748 for(r = r->p2; r != R; r = r->p2link)
749 loopmark(rpo2r, head, r);
750 }
751
752 void
loopit(Reg * r,long nr)753 loopit(Reg *r, long nr)
754 {
755 Reg *r1;
756 long i, d, me;
757
758 if(nr > maxnr) {
759 rpo2r = alloc(nr * sizeof(Reg*));
760 idom = alloc(nr * sizeof(long));
761 maxnr = nr;
762 }
763
764 d = postorder(r, rpo2r, 0);
765 if(d > nr)
766 fatal(Z, "too many reg nodes");
767 nr = d;
768 for(i = 0; i < nr / 2; i++){
769 r1 = rpo2r[i];
770 rpo2r[i] = rpo2r[nr - 1 - i];
771 rpo2r[nr - 1 - i] = r1;
772 }
773 for(i = 0; i < nr; i++)
774 rpo2r[i]->rpo = i;
775
776 idom[0] = 0;
777 for(i = 0; i < nr; i++){
778 r1 = rpo2r[i];
779 me = r1->rpo;
780 d = -1;
781 if(r1->p1 != R && r1->p1->rpo < me)
782 d = r1->p1->rpo;
783 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
784 if(r1->rpo < me)
785 d = rpolca(idom, d, r1->rpo);
786 idom[i] = d;
787 }
788
789 for(i = 0; i < nr; i++){
790 r1 = rpo2r[i];
791 r1->loop++;
792 if(r1->p2 != R && loophead(idom, r1))
793 loopmark(rpo2r, i, r1);
794 }
795 }
796
797 void
synch(Reg * r,Bits dif)798 synch(Reg *r, Bits dif)
799 {
800 Reg *r1;
801 int z;
802
803 for(r1 = r; r1 != R; r1 = r1->s1) {
804 for(z=0; z<BITS; z++) {
805 dif.b[z] = (dif.b[z] &
806 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
807 r1->set.b[z] | r1->regdiff.b[z];
808 if(dif.b[z] != r1->regdiff.b[z]) {
809 r1->regdiff.b[z] = dif.b[z];
810 change++;
811 }
812 }
813 if(r1->active)
814 break;
815 r1->active = 1;
816 for(z=0; z<BITS; z++)
817 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
818 if(r1->s2 != R)
819 synch(r1->s2, dif);
820 }
821 }
822
823 ulong
allreg(ulong b,Rgn * r)824 allreg(ulong b, Rgn *r)
825 {
826 Var *v;
827 int i;
828
829 v = var + r->varno;
830 r->regno = 0;
831 switch(v->etype) {
832
833 default:
834 diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
835 break;
836
837 case TCHAR:
838 case TUCHAR:
839 case TSHORT:
840 case TUSHORT:
841 case TINT:
842 case TUINT:
843 case TLONG:
844 case TULONG:
845 case TIND:
846 case TVLONG:
847 case TUVLONG:
848 case TARRAY:
849 i = BtoR(~b);
850 if(i && r->cost > 0) {
851 r->regno = i;
852 return RtoB(i);
853 }
854 break;
855
856 case TDOUBLE:
857 case TFLOAT:
858 i = BtoF(~b);
859 if(i && r->cost > 0) {
860 r->regno = i+NREG;
861 return FtoB(i);
862 }
863 break;
864 }
865 return 0;
866 }
867
868 void
paint1(Reg * r,int bn)869 paint1(Reg *r, int bn)
870 {
871 Reg *r1;
872 Prog *p;
873 int z;
874 ulong bb;
875
876 z = bn/32;
877 bb = 1L<<(bn%32);
878 if(r->act.b[z] & bb)
879 return;
880 for(;;) {
881 if(!(r->refbehind.b[z] & bb))
882 break;
883 r1 = r->p1;
884 if(r1 == R)
885 break;
886 if(!(r1->refahead.b[z] & bb))
887 break;
888 if(r1->act.b[z] & bb)
889 break;
890 r = r1;
891 }
892
893 if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
894 change -= CLOAD * r->loop;
895 if(debug['R'] && debug['v'])
896 print("%ld%P\tld %B $%d\n", r->loop,
897 r->prog, blsh(bn), change);
898 }
899 for(;;) {
900 r->act.b[z] |= bb;
901 p = r->prog;
902
903 if(r->use1.b[z] & bb) {
904 change += CREF * r->loop;
905 if(p->to.type == D_FREG && (p->as == AMOVW || p->as == AMOVD))
906 change = -CINF; /* cant go Rreg to Freg */
907 if(debug['R'] && debug['v'])
908 print("%ld%P\tu1 %B $%d\n", r->loop,
909 p, blsh(bn), change);
910 }
911
912 if((r->use2.b[z]|r->set.b[z]) & bb) {
913 change += CREF * r->loop;
914 if(p->from.type == D_FREG && (p->as == AMOVW || p->as == AMOVD))
915 change = -CINF; /* cant go Rreg to Freg */
916 if(debug['R'] && debug['v'])
917 print("%ld%P\tu2 %B $%d\n", r->loop,
918 p, blsh(bn), change);
919 }
920
921 if(STORE(r) & r->regdiff.b[z] & bb) {
922 change -= CLOAD * r->loop;
923 if(debug['R'] && debug['v'])
924 print("%ld%P\tst %B $%d\n", r->loop,
925 p, blsh(bn), change);
926 }
927
928 if(r->refbehind.b[z] & bb)
929 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
930 if(r1->refahead.b[z] & bb)
931 paint1(r1, bn);
932
933 if(!(r->refahead.b[z] & bb))
934 break;
935 r1 = r->s2;
936 if(r1 != R)
937 if(r1->refbehind.b[z] & bb)
938 paint1(r1, bn);
939 r = r->s1;
940 if(r == R)
941 break;
942 if(r->act.b[z] & bb)
943 break;
944 if(!(r->refbehind.b[z] & bb))
945 break;
946 }
947 }
948
949 ulong
paint2(Reg * r,int bn)950 paint2(Reg *r, int bn)
951 {
952 Reg *r1;
953 int z;
954 ulong bb, vreg;
955
956 z = bn/32;
957 bb = 1L << (bn%32);
958 vreg = regbits;
959 if(!(r->act.b[z] & bb))
960 return vreg;
961 for(;;) {
962 if(!(r->refbehind.b[z] & bb))
963 break;
964 r1 = r->p1;
965 if(r1 == R)
966 break;
967 if(!(r1->refahead.b[z] & bb))
968 break;
969 if(!(r1->act.b[z] & bb))
970 break;
971 r = r1;
972 }
973 for(;;) {
974 r->act.b[z] &= ~bb;
975
976 vreg |= r->regu;
977
978 if(r->refbehind.b[z] & bb)
979 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
980 if(r1->refahead.b[z] & bb)
981 vreg |= paint2(r1, bn);
982
983 if(!(r->refahead.b[z] & bb))
984 break;
985 r1 = r->s2;
986 if(r1 != R)
987 if(r1->refbehind.b[z] & bb)
988 vreg |= paint2(r1, bn);
989 r = r->s1;
990 if(r == R)
991 break;
992 if(!(r->act.b[z] & bb))
993 break;
994 if(!(r->refbehind.b[z] & bb))
995 break;
996 }
997 return vreg;
998 }
999
1000 void
paint3(Reg * r,int bn,long rb,int rn)1001 paint3(Reg *r, int bn, long rb, int rn)
1002 {
1003 Reg *r1;
1004 Prog *p;
1005 int z;
1006 ulong bb;
1007
1008 z = bn/32;
1009 bb = 1L << (bn%32);
1010 if(r->act.b[z] & bb)
1011 return;
1012 for(;;) {
1013 if(!(r->refbehind.b[z] & bb))
1014 break;
1015 r1 = r->p1;
1016 if(r1 == R)
1017 break;
1018 if(!(r1->refahead.b[z] & bb))
1019 break;
1020 if(r1->act.b[z] & bb)
1021 break;
1022 r = r1;
1023 }
1024
1025 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1026 addmove(r, bn, rn, 0);
1027 for(;;) {
1028 r->act.b[z] |= bb;
1029 p = r->prog;
1030
1031 if(r->use1.b[z] & bb) {
1032 if(debug['R'])
1033 print("%P", p);
1034 addreg(&p->from, rn);
1035 if(debug['R'])
1036 print("\t.c%P\n", p);
1037 }
1038 if((r->use2.b[z]|r->set.b[z]) & bb) {
1039 if(debug['R'])
1040 print("%P", p);
1041 addreg(&p->to, rn);
1042 if(debug['R'])
1043 print("\t.c%P\n", p);
1044 }
1045
1046 if(STORE(r) & r->regdiff.b[z] & bb)
1047 addmove(r, bn, rn, 1);
1048 r->regu |= rb;
1049
1050 if(r->refbehind.b[z] & bb)
1051 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1052 if(r1->refahead.b[z] & bb)
1053 paint3(r1, bn, rb, rn);
1054
1055 if(!(r->refahead.b[z] & bb))
1056 break;
1057 r1 = r->s2;
1058 if(r1 != R)
1059 if(r1->refbehind.b[z] & bb)
1060 paint3(r1, bn, rb, rn);
1061 r = r->s1;
1062 if(r == R)
1063 break;
1064 if(r->act.b[z] & bb)
1065 break;
1066 if(!(r->refbehind.b[z] & bb))
1067 break;
1068 }
1069 }
1070
1071 void
addreg(Adr * a,int rn)1072 addreg(Adr *a, int rn)
1073 {
1074
1075 a->sym = 0;
1076 a->name = D_NONE;
1077 a->type = D_REG;
1078 a->reg = rn;
1079 if(rn >= NREG) {
1080 a->type = D_FREG;
1081 a->reg = rn-NREG;
1082 }
1083 }
1084
1085 /*
1086 * track register variables including external registers:
1087 * bit reg
1088 * 0 R7
1089 * 1 R8
1090 * ... ...
1091 * 21 R28
1092 */
1093 long
RtoB(int r)1094 RtoB(int r)
1095 {
1096
1097 if(r >= REGMIN && r <= REGMAX)
1098 return 1L << (r-REGMIN);
1099 return 0;
1100 }
1101
1102 int
BtoR(long b)1103 BtoR(long b)
1104 {
1105 b &= 0x001fffffL;
1106 if(b == 0)
1107 return 0;
1108 return bitno(b) + REGMIN;
1109 }
1110
1111 /*
1112 * bit reg
1113 * 22 F17
1114 * 23 F18
1115 * ... ...
1116 * 31 F26
1117 */
1118 long
FtoB(int f)1119 FtoB(int f)
1120 {
1121 if(f < FREGMIN || f > FREGEXT)
1122 return 0;
1123 return 1L << (f - FREGMIN + 22);
1124 }
1125
1126 int
BtoF(long b)1127 BtoF(long b)
1128 {
1129
1130 b &= 0xffc00000L;
1131 if(b == 0)
1132 return 0;
1133 return bitno(b) - 22 + FREGMIN;
1134 }
1135