1 #include "gc.h"
2
3 void addsplits(void);
4
5 Reg*
rega(void)6 rega(void)
7 {
8 Reg *r;
9
10 r = freer;
11 if(r == R) {
12 r = alloc(sizeof(*r));
13 } else
14 freer = r->link;
15
16 *r = zreg;
17 return r;
18 }
19
20 int
rcmp(void * a1,void * a2)21 rcmp(void *a1, void *a2)
22 {
23 Rgn *p1, *p2;
24 int c1, c2;
25
26 p1 = (Rgn*)a1;
27 p2 = (Rgn*)a2;
28 c1 = p2->cost;
29 c2 = p1->cost;
30 if(c1 -= c2)
31 return c1;
32 return p2->varno - p1->varno;
33 }
34
35 void
regopt(Prog * p)36 regopt(Prog *p)
37 {
38 Reg *r, *r1, *r2;
39 Prog *p1;
40 int i, z;
41 long initpc, val, npc;
42 ulong vreg;
43 Bits bit;
44 struct
45 {
46 long m;
47 long c;
48 Reg* p;
49 } log5[6], *lp;
50
51 firstr = R;
52 lastr = R;
53 nvar = 0;
54 regbits = 0;
55 for(z=0; z<BITS; z++) {
56 externs.b[z] = 0;
57 params.b[z] = 0;
58 consts.b[z] = 0;
59 addrs.b[z] = 0;
60 }
61
62 /*
63 * pass 1
64 * build aux data structure
65 * allocate pcs
66 * find use and set of variables
67 */
68 val = 5L * 5L * 5L * 5L * 5L;
69 lp = log5;
70 for(i=0; i<5; i++) {
71 lp->m = val;
72 lp->c = 0;
73 lp->p = R;
74 val /= 5L;
75 lp++;
76 }
77 val = 0;
78 for(; p != P; p = p->link) {
79 switch(p->as) {
80 case ADATA:
81 case AGLOBL:
82 case ANAME:
83 case ASIGNAME:
84 continue;
85 }
86 r = rega();
87 if(firstr == R) {
88 firstr = r;
89 lastr = r;
90 } else {
91 lastr->link = r;
92 r->p1 = lastr;
93 lastr->s1 = r;
94 lastr = r;
95 }
96 r->prog = p;
97 r->pc = val;
98 val++;
99
100 lp = log5;
101 for(i=0; i<5; i++) {
102 lp->c--;
103 if(lp->c <= 0) {
104 lp->c = lp->m;
105 if(lp->p != R)
106 lp->p->log5 = r;
107 lp->p = r;
108 (lp+1)->c = 0;
109 break;
110 }
111 lp++;
112 }
113
114 r1 = r->p1;
115 if(r1 != R)
116 switch(r1->prog->as) {
117 case ARET:
118 case AJMP:
119 r->p1 = R;
120 r1->s1 = R;
121 }
122
123 /*
124 * left side always read
125 */
126 bit = mkvar(&p->from, p->as==AMOVW || p->as==AMOV);
127 for(z=0; z<BITS; z++)
128 r->use1.b[z] |= bit.b[z];
129
130 /*
131 * right side depends on opcode
132 */
133 bit = mkvar(&p->to, 0);
134 if(bany(&bit))
135 switch(p->as) {
136 default:
137 diag(Z, "reg: unknown asop: %A", p->as);
138 break;
139
140 /*
141 * right side write
142 */
143 case ANOP:
144 case AMOVB:
145 case AMOVBU:
146 case AMOVH:
147 case AMOVHU:
148 case AMOVW:
149 case AMOV:
150 case AMOVF:
151 case AMOVD:
152 for(z=0; z<BITS; z++)
153 r->set.b[z] |= bit.b[z];
154 break;
155
156 /*
157 * funny
158 */
159 case AJAL:
160 for(z=0; z<BITS; z++)
161 addrs.b[z] |= bit.b[z];
162 break;
163 }
164 }
165 if(firstr == R)
166 return;
167 initpc = pc - val;
168 npc = val;
169
170 /*
171 * pass 2
172 * turn branch references to pointers
173 * build back pointers
174 */
175 for(r = firstr; r != R; r = r->link) {
176 p = r->prog;
177 if(p->to.type == D_BRANCH) {
178 val = p->to.offset - initpc;
179 r1 = firstr;
180 while(r1 != R) {
181 r2 = r1->log5;
182 if(r2 != R && val >= r2->pc) {
183 r1 = r2;
184 continue;
185 }
186 if(r1->pc == val)
187 break;
188 r1 = r1->link;
189 }
190 if(r1 == R) {
191 nearln = p->lineno;
192 diag(Z, "ref not found\n%P", p);
193 continue;
194 }
195 if(r1 == r) {
196 nearln = p->lineno;
197 diag(Z, "ref to self\n%P", p);
198 continue;
199 }
200 r->s2 = r1;
201 r->p2link = r1->p2;
202 r1->p2 = r;
203 }
204 }
205 if(debug['R']) {
206 p = firstr->prog;
207 print("\n%L %D\n", p->lineno, &p->from);
208 }
209
210 /*
211 * pass 2.5
212 * find looping structure
213 */
214 for(r = firstr; r != R; r = r->link)
215 r->active = 0;
216 change = 0;
217 loopit(firstr, npc);
218
219 /*
220 * pass 3
221 * iterate propagating usage
222 * back until flow graph is complete
223 */
224 loop1:
225 change = 0;
226 for(r = firstr; r != R; r = r->link)
227 r->active = 0;
228 for(r = firstr; r != R; r = r->link)
229 if(r->prog->as == ARET)
230 prop(r, zbits, zbits);
231 loop11:
232 /* pick up unreachable code */
233 i = 0;
234 for(r = firstr; r != R; r = r1) {
235 r1 = r->link;
236 if(r1 && r1->active && !r->active) {
237 prop(r, zbits, zbits);
238 i = 1;
239 }
240 }
241 if(i)
242 goto loop11;
243 if(change)
244 goto loop1;
245
246
247 /*
248 * pass 4
249 * iterate propagating register/variable synchrony
250 * forward until graph is complete
251 */
252 loop2:
253 change = 0;
254 for(r = firstr; r != R; r = r->link)
255 r->active = 0;
256 synch(firstr, zbits);
257 if(change)
258 goto loop2;
259
260 addsplits();
261
262 if(debug['R'] && debug['v']) {
263 print("\nprop structure:\n");
264 for(r = firstr; r != R; r = r->link) {
265 print("%ld:%P", r->loop, r->prog);
266 for(z=0; z<BITS; z++)
267 bit.b[z] = r->set.b[z] |
268 r->refahead.b[z] | r->calahead.b[z] |
269 r->refbehind.b[z] | r->calbehind.b[z] |
270 r->use1.b[z] | r->use2.b[z];
271 if(bany(&bit)) {
272 print("\t");
273 if(bany(&r->use1))
274 print(" u1=%B", r->use1);
275 if(bany(&r->use2))
276 print(" u2=%B", r->use2);
277 if(bany(&r->set))
278 print(" st=%B", r->set);
279 if(bany(&r->refahead))
280 print(" ra=%B", r->refahead);
281 if(bany(&r->calahead))
282 print(" ca=%B", r->calahead);
283 if(bany(&r->refbehind))
284 print(" rb=%B", r->refbehind);
285 if(bany(&r->calbehind))
286 print(" cb=%B", r->calbehind);
287 if(bany(&r->regdiff))
288 print(" rd=%B", r->regdiff);
289 }
290 print("\n");
291 }
292 }
293
294 /*
295 * pass 5
296 * isolate regions
297 * calculate costs (paint1)
298 */
299 r = firstr;
300 if(r) {
301 for(z=0; z<BITS; z++)
302 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
303 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
304 if(bany(&bit)) {
305 nearln = r->prog->lineno;
306 warn(Z, "used and not set: %B", bit);
307 if(debug['R'] && !debug['w'])
308 print("used and not set: %B\n", bit);
309 }
310 }
311
312 for(r = firstr; r != R; r = r->link)
313 r->act = zbits;
314 rgp = region;
315 nregion = 0;
316 for(r = firstr; r != R; r = r->link) {
317 for(z=0; z<BITS; z++)
318 bit.b[z] = r->set.b[z] &
319 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
320 if(bany(&bit)) {
321 nearln = r->prog->lineno;
322 warn(Z, "set and not used: %B", bit);
323 if(debug['R'])
324 print("set and not used: %B\n", bit);
325 excise(r);
326 }
327 for(z=0; z<BITS; z++)
328 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
329 while(bany(&bit)) {
330 i = bnum(bit);
331 rgp->enter = r;
332 rgp->varno = i;
333 change = 0;
334 if(debug['R'] && debug['v'])
335 print("\n");
336 paint1(r, i);
337 bit.b[i/32] &= ~(1L<<(i%32));
338 if(change <= 0) {
339 if(debug['R'])
340 print("%L $%d: %B\n",
341 r->prog->lineno, change, blsh(i));
342 continue;
343 }
344 rgp->cost = change;
345 nregion++;
346 if(nregion >= NRGN) {
347 warn(Z, "too many regions");
348 goto brk;
349 }
350 rgp++;
351 }
352 }
353 brk:
354 qsort(region, nregion, sizeof(region[0]), rcmp);
355
356 /*
357 * pass 6
358 * determine used registers (paint2)
359 * replace code (paint3)
360 */
361 rgp = region;
362 for(i=0; i<nregion; i++) {
363 bit = blsh(rgp->varno);
364 vreg = paint2(rgp->enter, rgp->varno);
365 vreg = allreg(vreg, rgp);
366 if(debug['R']) {
367 if(rgp->regno >= NREG)
368 print("%L $%d F%d: %B\n",
369 rgp->enter->prog->lineno,
370 rgp->cost,
371 rgp->regno-NREG,
372 bit);
373 else
374 print("%L $%d R%d: %B\n",
375 rgp->enter->prog->lineno,
376 rgp->cost,
377 rgp->regno,
378 bit);
379 }
380 if(rgp->regno != 0)
381 paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
382 rgp++;
383 }
384 /*
385 * pass 7
386 * peep-hole on basic block
387 */
388 if(!debug['R'] || debug['P'])
389 peep();
390
391 /*
392 * pass 8
393 * recalculate pc
394 */
395 val = initpc;
396 for(r = firstr; r != R; r = r1) {
397 r->pc = val;
398 p = r->prog;
399 p1 = P;
400 r1 = r->link;
401 if(r1 != R)
402 p1 = r1->prog;
403 for(; p != p1; p = p->link) {
404 switch(p->as) {
405 default:
406 val++;
407 break;
408
409 case ANOP:
410 case ADATA:
411 case AGLOBL:
412 case ANAME:
413 case ASIGNAME:
414 break;
415 }
416 }
417 }
418 pc = val;
419
420 /*
421 * fix up branches
422 */
423 if(debug['R'])
424 if(bany(&addrs))
425 print("addrs: %B\n", addrs);
426
427 r1 = 0; /* set */
428 for(r = firstr; r != R; r = r->link) {
429 p = r->prog;
430 if(p->to.type == D_BRANCH)
431 p->to.offset = r->s2->pc;
432 r1 = r;
433 }
434
435 /*
436 * last pass
437 * fix unsigned stores (needed as type hint in pass 6)
438 * eliminate nops
439 * free aux structures
440 */
441 for(p = firstr->prog; p != P; p = p->link){
442 if(p->to.type == D_OREG){
443 switch(p->as){
444 case AMOVBU:
445 p->as = AMOVB;
446 break;
447 case AMOVHU:
448 p->as = AMOVH;
449 break;
450 }
451 }
452 while(p->link && p->link->as == ANOP)
453 p->link = p->link->link;
454 }
455 if(r1 != R) {
456 r1->link = freer;
457 freer = firstr;
458 }
459 }
460
461 void
addsplits(void)462 addsplits(void)
463 {
464 Reg *r, *r1;
465 int z, i;
466 Bits bit;
467
468 for(r = firstr; r != R; r = r->link) {
469 if(r->loop > 1)
470 continue;
471 if(r->prog->as == AJAL)
472 continue;
473 for(r1 = r->p2; r1 != R; r1 = r1->p2link) {
474 if(r1->loop <= 1)
475 continue;
476 for(z=0; z<BITS; z++)
477 bit.b[z] = r1->calbehind.b[z] &
478 (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
479 ~(r->calahead.b[z] & addrs.b[z]);
480 while(bany(&bit)) {
481 i = bnum(bit);
482 bit.b[i/32] &= ~(1L << (i%32));
483 }
484 }
485 }
486 }
487
488 /*
489 * add mov b,rn
490 * just after r
491 */
492 void
addmove(Reg * r,int bn,int rn,int f)493 addmove(Reg *r, int bn, int rn, int f)
494 {
495 Prog *p, *p1;
496 Adr *a;
497 Var *v;
498
499 p1 = alloc(sizeof(*p1));
500 *p1 = zprog;
501 p = r->prog;
502
503 p1->link = p->link;
504 p->link = p1;
505 p1->lineno = p->lineno;
506
507 v = var + bn;
508
509 a = &p1->to;
510 a->sym = v->sym;
511 a->name = v->name;
512 a->offset = v->offset;
513 a->etype = v->etype;
514 a->type = D_OREG;
515 if(a->etype == TARRAY || a->sym == S)
516 a->type = D_CONST;
517
518 p1->as = AMOVW;
519 if(v->etype == TCHAR || v->etype == TUCHAR)
520 p1->as = AMOVB;
521 if(v->etype == TSHORT || v->etype == TUSHORT)
522 p1->as = AMOVH;
523 if(v->etype == TFLOAT)
524 p1->as = AMOVF;
525 if(v->etype == TDOUBLE)
526 p1->as = AMOVD;
527 if(thechar == 'j')
528 if(v->etype == TVLONG || v->etype == TUVLONG || v->etype == TIND)
529 p1->as = AMOV;
530
531 p1->from.type = D_REG;
532 p1->from.reg = rn;
533 if(rn >= NREG) {
534 p1->from.type = D_FREG;
535 p1->from.reg = rn-NREG;
536 }
537 if(!f) {
538 p1->from = *a;
539 *a = zprog.from;
540 a->type = D_REG;
541 a->reg = rn;
542 if(rn >= NREG) {
543 a->type = D_FREG;
544 a->reg = rn-NREG;
545 }
546 if(v->etype == TUCHAR)
547 p1->as = AMOVBU;
548 if(v->etype == TUSHORT)
549 p1->as = AMOVHU;
550 }
551 if(debug['R'])
552 print("%P\t.a%P\n", p, p1);
553 }
554
555 Bits
mkvar(Adr * a,int docon)556 mkvar(Adr *a, int docon)
557 {
558 Var *v;
559 int i, t, n, et, z;
560 long o;
561 Bits bit;
562 Sym *s;
563
564 t = a->type;
565 if(t == D_REG && a->reg != NREG)
566 regbits |= RtoB(a->reg);
567 if(t == D_FREG && a->reg != NREG)
568 regbits |= FtoB(a->reg);
569 s = a->sym;
570 o = a->offset;
571 et = a->etype;
572 if(s == S) {
573 if(t != D_CONST || !docon || a->reg != NREG)
574 goto none;
575 et = TLONG;
576 }
577 if(t == D_CONST) {
578 if(s == S && sval(o))
579 goto none;
580 }
581
582 n = a->name;
583 v = var;
584 for(i=0; i<nvar; i++) {
585 if(s == v->sym)
586 if(n == v->name)
587 if(o == v->offset)
588 goto out;
589 v++;
590 }
591 if(s)
592 if(s->name[0] == '.')
593 goto none;
594 if(nvar >= NVAR) {
595 if(debug['w'] > 1 && s)
596 warn(Z, "variable not optimized: %s", s->name);
597 goto none;
598 }
599 i = nvar;
600 nvar++;
601 v = &var[i];
602 v->sym = s;
603 v->offset = o;
604 v->etype = et;
605 v->name = n;
606 if(debug['R'])
607 print("bit=%2d et=%2d %D\n", i, et, a);
608 out:
609 bit = blsh(i);
610 if(n == D_EXTERN || n == D_STATIC)
611 for(z=0; z<BITS; z++)
612 externs.b[z] |= bit.b[z];
613 if(n == D_PARAM)
614 for(z=0; z<BITS; z++)
615 params.b[z] |= bit.b[z];
616 if(v->etype != et || !typechlpfd[et]) /* funny punning */
617 for(z=0; z<BITS; z++)
618 addrs.b[z] |= bit.b[z];
619 if(t == D_CONST) {
620 if(s == S) {
621 for(z=0; z<BITS; z++)
622 consts.b[z] |= bit.b[z];
623 return bit;
624 }
625 if(et != TARRAY)
626 for(z=0; z<BITS; z++)
627 addrs.b[z] |= bit.b[z];
628 for(z=0; z<BITS; z++)
629 params.b[z] |= bit.b[z];
630 return bit;
631 }
632 if(t == D_OREG)
633 return bit;
634
635 none:
636 return zbits;
637 }
638
639 void
prop(Reg * r,Bits ref,Bits cal)640 prop(Reg *r, Bits ref, Bits cal)
641 {
642 Reg *r1, *r2;
643 int z;
644
645 for(r1 = r; r1 != R; r1 = r1->p1) {
646 for(z=0; z<BITS; z++) {
647 ref.b[z] |= r1->refahead.b[z];
648 if(ref.b[z] != r1->refahead.b[z]) {
649 r1->refahead.b[z] = ref.b[z];
650 change++;
651 }
652 cal.b[z] |= r1->calahead.b[z];
653 if(cal.b[z] != r1->calahead.b[z]) {
654 r1->calahead.b[z] = cal.b[z];
655 change++;
656 }
657 }
658 switch(r1->prog->as) {
659 case AJAL:
660 for(z=0; z<BITS; z++) {
661 cal.b[z] |= ref.b[z] | externs.b[z];
662 ref.b[z] = 0;
663 }
664 break;
665
666 case ATEXT:
667 for(z=0; z<BITS; z++) {
668 cal.b[z] = 0;
669 ref.b[z] = 0;
670 }
671 break;
672
673 case ARET:
674 for(z=0; z<BITS; z++) {
675 cal.b[z] = externs.b[z];
676 ref.b[z] = 0;
677 }
678 }
679 for(z=0; z<BITS; z++) {
680 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
681 r1->use1.b[z] | r1->use2.b[z];
682 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
683 r1->refbehind.b[z] = ref.b[z];
684 r1->calbehind.b[z] = cal.b[z];
685 }
686 if(r1->active)
687 break;
688 r1->active = 1;
689 }
690 for(; r != r1; r = r->p1)
691 for(r2 = r->p2; r2 != R; r2 = r2->p2link)
692 prop(r2, r->refbehind, r->calbehind);
693 }
694
695 /*
696 * find looping structure
697 *
698 * 1) find reverse postordering
699 * 2) find approximate dominators,
700 * the actual dominators if the flow graph is reducible
701 * otherwise, dominators plus some other non-dominators.
702 * See Matthew S. Hecht and Jeffrey D. Ullman,
703 * "Analysis of a Simple Algorithm for Global Data Flow Problems",
704 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
705 * Oct. 1-3, 1973, pp. 207-217.
706 * 3) find all nodes with a predecessor dominated by the current node.
707 * such a node is a loop head.
708 * recursively, all preds with a greater rpo number are in the loop
709 */
710 long
postorder(Reg * r,Reg ** rpo2r,long n)711 postorder(Reg *r, Reg **rpo2r, long n)
712 {
713 Reg *r1;
714
715 r->rpo = 1;
716 r1 = r->s1;
717 if(r1 && !r1->rpo)
718 n = postorder(r1, rpo2r, n);
719 r1 = r->s2;
720 if(r1 && !r1->rpo)
721 n = postorder(r1, rpo2r, n);
722 rpo2r[n] = r;
723 n++;
724 return n;
725 }
726
727 long
rpolca(long * idom,long rpo1,long rpo2)728 rpolca(long *idom, long rpo1, long rpo2)
729 {
730 long t;
731
732 if(rpo1 == -1)
733 return rpo2;
734 while(rpo1 != rpo2){
735 if(rpo1 > rpo2){
736 t = rpo2;
737 rpo2 = rpo1;
738 rpo1 = t;
739 }
740 while(rpo1 < rpo2){
741 t = idom[rpo2];
742 if(t >= rpo2)
743 fatal(Z, "bad idom");
744 rpo2 = t;
745 }
746 }
747 return rpo1;
748 }
749
750 int
doms(long * idom,long r,long s)751 doms(long *idom, long r, long s)
752 {
753 while(s > r)
754 s = idom[s];
755 return s == r;
756 }
757
758 int
loophead(long * idom,Reg * r)759 loophead(long *idom, Reg *r)
760 {
761 long src;
762
763 src = r->rpo;
764 if(r->p1 != R && doms(idom, src, r->p1->rpo))
765 return 1;
766 for(r = r->p2; r != R; r = r->p2link)
767 if(doms(idom, src, r->rpo))
768 return 1;
769 return 0;
770 }
771
772 void
loopmark(Reg ** rpo2r,long head,Reg * r)773 loopmark(Reg **rpo2r, long head, Reg *r)
774 {
775 if(r->rpo < head || r->active == head)
776 return;
777 r->active = head;
778 r->loop += LOOP;
779 if(r->p1 != R)
780 loopmark(rpo2r, head, r->p1);
781 for(r = r->p2; r != R; r = r->p2link)
782 loopmark(rpo2r, head, r);
783 }
784
785 void
loopit(Reg * r,long nr)786 loopit(Reg *r, long nr)
787 {
788 Reg *r1;
789 long i, d, me;
790
791 if(nr > maxnr) {
792 rpo2r = alloc(nr * sizeof(Reg*));
793 idom = alloc(nr * sizeof(long));
794 maxnr = nr;
795 }
796
797 d = postorder(r, rpo2r, 0);
798 if(d > nr)
799 fatal(Z, "too many reg nodes");
800 nr = d;
801 for(i = 0; i < nr / 2; i++){
802 r1 = rpo2r[i];
803 rpo2r[i] = rpo2r[nr - 1 - i];
804 rpo2r[nr - 1 - i] = r1;
805 }
806 for(i = 0; i < nr; i++)
807 rpo2r[i]->rpo = i;
808
809 idom[0] = 0;
810 for(i = 0; i < nr; i++){
811 r1 = rpo2r[i];
812 me = r1->rpo;
813 d = -1;
814 if(r1->p1 != R && r1->p1->rpo < me)
815 d = r1->p1->rpo;
816 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
817 if(r1->rpo < me)
818 d = rpolca(idom, d, r1->rpo);
819 idom[i] = d;
820 }
821
822 for(i = 0; i < nr; i++){
823 r1 = rpo2r[i];
824 r1->loop++;
825 if(r1->p2 != R && loophead(idom, r1))
826 loopmark(rpo2r, i, r1);
827 }
828 }
829
830 void
synch(Reg * r,Bits dif)831 synch(Reg *r, Bits dif)
832 {
833 Reg *r1;
834 int z;
835
836 for(r1 = r; r1 != R; r1 = r1->s1) {
837 for(z=0; z<BITS; z++) {
838 dif.b[z] = (dif.b[z] &
839 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
840 r1->set.b[z] | r1->regdiff.b[z];
841 if(dif.b[z] != r1->regdiff.b[z]) {
842 r1->regdiff.b[z] = dif.b[z];
843 change++;
844 }
845 }
846 if(r1->active)
847 break;
848 r1->active = 1;
849 for(z=0; z<BITS; z++)
850 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
851 if(r1->s2 != R)
852 synch(r1->s2, dif);
853 }
854 }
855
856 ulong
allreg(ulong b,Rgn * r)857 allreg(ulong b, Rgn *r)
858 {
859 Var *v;
860 int i;
861
862 v = var + r->varno;
863 r->regno = 0;
864 switch(v->etype) {
865
866 default:
867 diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
868 break;
869
870 case TCHAR:
871 case TUCHAR:
872 case TSHORT:
873 case TUSHORT:
874 case TINT:
875 case TUINT:
876 case TLONG:
877 case TULONG:
878 case TIND:
879 case TARRAY:
880 i = BtoR(~b);
881 if(i && r->cost >= 0) {
882 r->regno = i;
883 return RtoB(i);
884 }
885 break;
886
887 case TDOUBLE:
888 case TFLOAT:
889 i = BtoF(~b);
890 if(i && r->cost >= 0) {
891 r->regno = i+NREG;
892 return FtoB(i);
893 }
894 break;
895 }
896 return 0;
897 }
898
899 void
paint1(Reg * r,int bn)900 paint1(Reg *r, int bn)
901 {
902 Reg *r1;
903 Prog *p;
904 int z;
905 ulong bb;
906
907 z = bn/32;
908 bb = 1L<<(bn%32);
909 if(r->act.b[z] & bb)
910 return;
911 for(;;) {
912 if(!(r->refbehind.b[z] & bb))
913 break;
914 r1 = r->p1;
915 if(r1 == R)
916 break;
917 if(!(r1->refahead.b[z] & bb))
918 break;
919 if(r1->act.b[z] & bb)
920 break;
921 r = r1;
922 }
923
924 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
925 change -= CLOAD * r->loop;
926 if(debug['R'] && debug['v'])
927 print("%ld%P\tld %B $%d\n", r->loop,
928 r->prog, blsh(bn), change);
929 }
930 for(;;) {
931 r->act.b[z] |= bb;
932 p = r->prog;
933
934 if(r->use1.b[z] & bb) {
935 change += CREF * r->loop;
936 if(debug['R'] && debug['v'])
937 print("%ld%P\tu1 %B $%d\n", r->loop,
938 p, blsh(bn), change);
939 }
940
941 if((r->use2.b[z]|r->set.b[z]) & bb) {
942 change += CREF * r->loop;
943 if(debug['R'] && debug['v'])
944 print("%ld%P\tu2 %B $%d\n", r->loop,
945 p, blsh(bn), change);
946 }
947
948 if(STORE(r) & r->regdiff.b[z] & bb) {
949 change -= CLOAD * r->loop;
950 if(debug['R'] && debug['v'])
951 print("%ld%P\tst %B $%d\n", r->loop,
952 p, blsh(bn), change);
953 }
954
955 if(r->refbehind.b[z] & bb)
956 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
957 if(r1->refahead.b[z] & bb)
958 paint1(r1, bn);
959
960 if(!(r->refahead.b[z] & bb))
961 break;
962 r1 = r->s2;
963 if(r1 != R)
964 if(r1->refbehind.b[z] & bb)
965 paint1(r1, bn);
966 r = r->s1;
967 if(r == R)
968 break;
969 if(r->act.b[z] & bb)
970 break;
971 if(!(r->refbehind.b[z] & bb))
972 break;
973 }
974 }
975
976 ulong
paint2(Reg * r,int bn)977 paint2(Reg *r, int bn)
978 {
979 Reg *r1;
980 int z;
981 ulong bb, vreg;
982
983 z = bn/32;
984 bb = 1L << (bn%32);
985 vreg = regbits;
986 if(!(r->act.b[z] & bb))
987 return vreg;
988 for(;;) {
989 if(!(r->refbehind.b[z] & bb))
990 break;
991 r1 = r->p1;
992 if(r1 == R)
993 break;
994 if(!(r1->refahead.b[z] & bb))
995 break;
996 if(!(r1->act.b[z] & bb))
997 break;
998 r = r1;
999 }
1000 for(;;) {
1001 r->act.b[z] &= ~bb;
1002
1003 vreg |= r->regu;
1004
1005 if(r->refbehind.b[z] & bb)
1006 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1007 if(r1->refahead.b[z] & bb)
1008 vreg |= paint2(r1, bn);
1009
1010 if(!(r->refahead.b[z] & bb))
1011 break;
1012 r1 = r->s2;
1013 if(r1 != R)
1014 if(r1->refbehind.b[z] & bb)
1015 vreg |= paint2(r1, bn);
1016 r = r->s1;
1017 if(r == R)
1018 break;
1019 if(!(r->act.b[z] & bb))
1020 break;
1021 if(!(r->refbehind.b[z] & bb))
1022 break;
1023 }
1024 return vreg;
1025 }
1026
1027 void
paint3(Reg * r,int bn,long rb,int rn)1028 paint3(Reg *r, int bn, long rb, int rn)
1029 {
1030 Reg *r1;
1031 Prog *p;
1032 int z;
1033 ulong bb;
1034
1035 z = bn/32;
1036 bb = 1L << (bn%32);
1037 if(r->act.b[z] & bb)
1038 return;
1039 for(;;) {
1040 if(!(r->refbehind.b[z] & bb))
1041 break;
1042 r1 = r->p1;
1043 if(r1 == R)
1044 break;
1045 if(!(r1->refahead.b[z] & bb))
1046 break;
1047 if(r1->act.b[z] & bb)
1048 break;
1049 r = r1;
1050 }
1051
1052 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1053 addmove(r, bn, rn, 0);
1054 for(;;) {
1055 r->act.b[z] |= bb;
1056 p = r->prog;
1057
1058 if(r->use1.b[z] & bb) {
1059 if(debug['R'])
1060 print("%P", p);
1061 addreg(&p->from, rn);
1062 switch(p->as){
1063 case AMOVB:
1064 case AMOVBU:
1065 case AMOVH:
1066 case AMOVHU:
1067 case AMOVW:
1068 case AMOVWU:
1069 p->as = AMOV;
1070 }
1071 if(debug['R'])
1072 print("\t.c%P\n", p);
1073 }
1074 if((r->use2.b[z]|r->set.b[z]) & bb) {
1075 if(debug['R'])
1076 print("%P", p);
1077 addreg(&p->to, rn);
1078 if(debug['R'])
1079 print("\t.c%P\n", p);
1080 }
1081
1082 if(STORE(r) & r->regdiff.b[z] & bb)
1083 addmove(r, bn, rn, 1);
1084 r->regu |= rb;
1085
1086 if(r->refbehind.b[z] & bb)
1087 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1088 if(r1->refahead.b[z] & bb)
1089 paint3(r1, bn, rb, rn);
1090
1091 if(!(r->refahead.b[z] & bb))
1092 break;
1093 r1 = r->s2;
1094 if(r1 != R)
1095 if(r1->refbehind.b[z] & bb)
1096 paint3(r1, bn, rb, rn);
1097 r = r->s1;
1098 if(r == R)
1099 break;
1100 if(r->act.b[z] & bb)
1101 break;
1102 if(!(r->refbehind.b[z] & bb))
1103 break;
1104 }
1105 }
1106
1107 void
addreg(Adr * a,int rn)1108 addreg(Adr *a, int rn)
1109 {
1110
1111 a->sym = 0;
1112 a->name = D_NONE;
1113 a->type = D_REG;
1114 a->reg = rn;
1115 if(rn >= NREG) {
1116 a->type = D_FREG;
1117 a->reg = rn-NREG;
1118 }
1119 }
1120
1121 /*
1122 * bit reg
1123 * 0 R9
1124 * 1 R10
1125 * ... ...
1126 * 6 R15
1127 */
1128 long
RtoB(int r)1129 RtoB(int r)
1130 {
1131
1132 if(r < 9 || r > 15)
1133 return 0;
1134 return 1L << (r-9);
1135 }
1136
1137 int
BtoR(long b)1138 BtoR(long b)
1139 {
1140
1141 b &= 0x007fL;
1142 if(b == 0)
1143 return 0;
1144 return bitno(b) + 9;
1145 }
1146
1147 /*
1148 * bit reg
1149 * 7 F1
1150 * 8 F2
1151 * ... ...
1152 * 31 F25
1153 */
1154 long
FtoB(int f)1155 FtoB(int f)
1156 {
1157
1158 if(f < 1 || f > 25)
1159 return 0;
1160 return 1L << (f + 6);
1161 }
1162
1163 int
BtoF(long b)1164 BtoF(long b)
1165 {
1166
1167 b &= 0x03ffff80L;
1168 if(b == 0)
1169 return 0;
1170 return bitno(b) - 6;
1171 }
1172