1 #include "gc.h"
2
3 void addsplits(void);
4
5 Reg*
rega(void)6 rega(void)
7 {
8 Reg *r;
9
10 r = freer;
11 if(r == R) {
12 r = alloc(sizeof(*r));
13 } else
14 freer = r->link;
15
16 *r = zreg;
17 return r;
18 }
19
20 int
rcmp(const void * a1,const void * a2)21 rcmp(const void *a1, const void *a2)
22 {
23 Rgn *p1, *p2;
24 int c1, c2;
25
26 p1 = (Rgn*)a1;
27 p2 = (Rgn*)a2;
28 c1 = p2->cost;
29 c2 = p1->cost;
30 if(c1 -= c2)
31 return c1;
32 return p2->varno - p1->varno;
33 }
34
35 void
regopt(Prog * p)36 regopt(Prog *p)
37 {
38 Reg *r, *r1, *r2;
39 Prog *p1;
40 int i, z;
41 long initpc, val, npc;
42 ulong vreg;
43 Bits bit;
44 struct
45 {
46 long m;
47 long c;
48 Reg* p;
49 } log5[6], *lp;
50
51 firstr = R;
52 lastr = R;
53 nvar = 0;
54 regbits = 0;
55 for(z=0; z<BITS; z++) {
56 externs.b[z] = 0;
57 params.b[z] = 0;
58 consts.b[z] = 0;
59 addrs.b[z] = 0;
60 }
61
62 /*
63 * pass 1
64 * build aux data structure
65 * allocate pcs
66 * find use and set of variables
67 */
68 val = 5L * 5L * 5L * 5L * 5L;
69 lp = log5;
70 for(i=0; i<5; i++) {
71 lp->m = val;
72 lp->c = 0;
73 lp->p = R;
74 val /= 5L;
75 lp++;
76 }
77 val = 0;
78 for(; p != P; p = p->link) {
79 switch(p->as) {
80 case ADATA:
81 case AGLOBL:
82 case ANAME:
83 case ASIGNAME:
84 continue;
85 }
86 r = rega();
87 if(firstr == R) {
88 firstr = r;
89 lastr = r;
90 } else {
91 lastr->link = r;
92 r->p1 = lastr;
93 lastr->s1 = r;
94 lastr = r;
95 }
96 r->prog = p;
97 r->pc = val;
98 val++;
99
100 lp = log5;
101 for(i=0; i<5; i++) {
102 lp->c--;
103 if(lp->c <= 0) {
104 lp->c = lp->m;
105 if(lp->p != R)
106 lp->p->log5 = r;
107 lp->p = r;
108 (lp+1)->c = 0;
109 break;
110 }
111 lp++;
112 }
113
114 r1 = r->p1;
115 if(r1 != R)
116 switch(r1->prog->as) {
117 case ARET:
118 case AJMP:
119 case ARFE:
120 r->p1 = R;
121 r1->s1 = R;
122 }
123
124 /*
125 * left side always read
126 */
127 bit = mkvar(&p->from, p->as==AMOVW);
128 for(z=0; z<BITS; z++)
129 r->use1.b[z] |= bit.b[z];
130
131 /*
132 * right side depends on opcode
133 */
134 bit = mkvar(&p->to, 0);
135 if(bany(&bit))
136 switch(p->as) {
137 default:
138 diag(Z, "reg: unknown asop: %A", p->as);
139 break;
140
141 /*
142 * right side write
143 */
144 case ANOP:
145 case AMOVB:
146 case AMOVBU:
147 case AMOVH:
148 case AMOVHU:
149 case AMOVW:
150 case AMOVF:
151 case AMOVD:
152 for(z=0; z<BITS; z++)
153 r->set.b[z] |= bit.b[z];
154 break;
155
156 /*
157 * funny
158 */
159 case AJAL:
160 for(z=0; z<BITS; z++)
161 addrs.b[z] |= bit.b[z];
162 break;
163 }
164 }
165 if(firstr == R)
166 return;
167 initpc = pc - val;
168 npc = val;
169
170 /*
171 * pass 2
172 * turn branch references to pointers
173 * build back pointers
174 */
175 for(r = firstr; r != R; r = r->link) {
176 p = r->prog;
177 if(p->to.type == D_BRANCH) {
178 val = p->to.offset - initpc;
179 r1 = firstr;
180 while(r1 != R) {
181 r2 = r1->log5;
182 if(r2 != R && val >= r2->pc) {
183 r1 = r2;
184 continue;
185 }
186 if(r1->pc == val)
187 break;
188 r1 = r1->link;
189 }
190 if(r1 == R) {
191 nearln = p->lineno;
192 diag(Z, "ref not found\n%P", p);
193 continue;
194 }
195 if(r1 == r) {
196 nearln = p->lineno;
197 diag(Z, "ref to self\n%P", p);
198 continue;
199 }
200 r->s2 = r1;
201 r->p2link = r1->p2;
202 r1->p2 = r;
203 }
204 }
205 if(debug['R']) {
206 p = firstr->prog;
207 print("\n%L %D\n", p->lineno, &p->from);
208 }
209
210 /*
211 * pass 2.5
212 * find looping structure
213 */
214 for(r = firstr; r != R; r = r->link)
215 r->active = 0;
216 change = 0;
217 loopit(firstr, npc);
218
219 /*
220 * pass 3
221 * iterate propagating usage
222 * back until flow graph is complete
223 */
224 loop1:
225 change = 0;
226 for(r = firstr; r != R; r = r->link)
227 r->active = 0;
228 for(r = firstr; r != R; r = r->link)
229 if(r->prog->as == ARET)
230 prop(r, zbits, zbits);
231 loop11:
232 /* pick up unreachable code */
233 i = 0;
234 for(r = firstr; r != R; r = r1) {
235 r1 = r->link;
236 if(r1 && r1->active && !r->active) {
237 prop(r, zbits, zbits);
238 i = 1;
239 }
240 }
241 if(i)
242 goto loop11;
243 if(change)
244 goto loop1;
245
246
247 /*
248 * pass 4
249 * iterate propagating register/variable synchrony
250 * forward until graph is complete
251 */
252 loop2:
253 change = 0;
254 for(r = firstr; r != R; r = r->link)
255 r->active = 0;
256 synch(firstr, zbits);
257 if(change)
258 goto loop2;
259
260 addsplits();
261
262 if(debug['R'] && debug['v']) {
263 print("\nprop structure:\n");
264 for(r = firstr; r != R; r = r->link) {
265 print("%ld:%P", r->loop, r->prog);
266 for(z=0; z<BITS; z++)
267 bit.b[z] = r->set.b[z] |
268 r->refahead.b[z] | r->calahead.b[z] |
269 r->refbehind.b[z] | r->calbehind.b[z] |
270 r->use1.b[z] | r->use2.b[z];
271 if(bany(&bit)) {
272 print("\t");
273 if(bany(&r->use1))
274 print(" u1=%B", r->use1);
275 if(bany(&r->use2))
276 print(" u2=%B", r->use2);
277 if(bany(&r->set))
278 print(" st=%B", r->set);
279 if(bany(&r->refahead))
280 print(" ra=%B", r->refahead);
281 if(bany(&r->calahead))
282 print(" ca=%B", r->calahead);
283 if(bany(&r->refbehind))
284 print(" rb=%B", r->refbehind);
285 if(bany(&r->calbehind))
286 print(" cb=%B", r->calbehind);
287 if(bany(&r->regdiff))
288 print(" rd=%B", r->regdiff);
289 }
290 print("\n");
291 }
292 }
293
294 /*
295 * pass 5
296 * isolate regions
297 * calculate costs (paint1)
298 */
299 r = firstr;
300 if(r) {
301 for(z=0; z<BITS; z++)
302 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
303 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
304 if(bany(&bit)) {
305 nearln = r->prog->lineno;
306 warn(Z, "used and not set: %B", bit);
307 if(debug['R'] && !debug['w'])
308 print("used and not set: %B\n", bit);
309 }
310 }
311
312 for(r = firstr; r != R; r = r->link)
313 r->act = zbits;
314 rgp = region;
315 nregion = 0;
316 for(r = firstr; r != R; r = r->link) {
317 for(z=0; z<BITS; z++)
318 bit.b[z] = r->set.b[z] &
319 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
320 if(bany(&bit)) {
321 nearln = r->prog->lineno;
322 warn(Z, "set and not used: %B", bit);
323 if(debug['R'])
324 print("set and not used: %B\n", bit);
325 excise(r);
326 }
327 for(z=0; z<BITS; z++)
328 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
329 while(bany(&bit)) {
330 i = bnum(bit);
331 rgp->enter = r;
332 rgp->varno = i;
333 change = 0;
334 if(debug['R'] && debug['v'])
335 print("\n");
336 paint1(r, i);
337 bit.b[i/32] &= ~(1L<<(i%32));
338 if(change <= 0) {
339 if(debug['R'])
340 print("%L $%d: %B\n",
341 r->prog->lineno, change, blsh(i));
342 continue;
343 }
344 rgp->cost = change;
345 nregion++;
346 if(nregion >= NRGN) {
347 warn(Z, "too many regions");
348 goto brk;
349 }
350 rgp++;
351 }
352 }
353 brk:
354 qsort(region, nregion, sizeof(region[0]), rcmp);
355
356 /*
357 * pass 6
358 * determine used registers (paint2)
359 * replace code (paint3)
360 */
361 rgp = region;
362 for(i=0; i<nregion; i++) {
363 bit = blsh(rgp->varno);
364 vreg = paint2(rgp->enter, rgp->varno);
365 vreg = allreg(vreg, rgp);
366 if(debug['R']) {
367 if(rgp->regno >= NREG)
368 print("%L $%d F%d: %B\n",
369 rgp->enter->prog->lineno,
370 rgp->cost,
371 rgp->regno-NREG,
372 bit);
373 else
374 print("%L $%d R%d: %B\n",
375 rgp->enter->prog->lineno,
376 rgp->cost,
377 rgp->regno,
378 bit);
379 }
380 if(rgp->regno != 0)
381 paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
382 rgp++;
383 }
384 /*
385 * pass 7
386 * peep-hole on basic block
387 */
388 if(!debug['R'] || debug['P'])
389 peep();
390
391 /*
392 * pass 8
393 * recalculate pc
394 */
395 val = initpc;
396 for(r = firstr; r != R; r = r1) {
397 r->pc = val;
398 p = r->prog;
399 p1 = P;
400 r1 = r->link;
401 if(r1 != R)
402 p1 = r1->prog;
403 for(; p != p1; p = p->link) {
404 switch(p->as) {
405 default:
406 val++;
407 break;
408
409 case ANOP:
410 case ADATA:
411 case AGLOBL:
412 case ANAME:
413 case ASIGNAME:
414 break;
415 }
416 }
417 }
418 pc = val;
419
420 /*
421 * fix up branches
422 */
423 if(debug['R'])
424 if(bany(&addrs))
425 print("addrs: %B\n", addrs);
426
427 r1 = 0; /* set */
428 for(r = firstr; r != R; r = r->link) {
429 p = r->prog;
430 if(p->to.type == D_BRANCH)
431 p->to.offset = r->s2->pc;
432 r1 = r;
433 }
434
435 /*
436 * last pass
437 * eliminate nops
438 * free aux structures
439 */
440 for(p = firstr->prog; p != P; p = p->link){
441 while(p->link && p->link->as == ANOP)
442 p->link = p->link->link;
443 }
444 if(r1 != R) {
445 r1->link = freer;
446 freer = firstr;
447 }
448 }
449
450 void
addsplits(void)451 addsplits(void)
452 {
453 Reg *r, *r1;
454 int z, i;
455 Bits bit;
456
457 for(r = firstr; r != R; r = r->link) {
458 if(r->loop > 1)
459 continue;
460 if(r->prog->as == AJAL)
461 continue;
462 for(r1 = r->p2; r1 != R; r1 = r1->p2link) {
463 if(r1->loop <= 1)
464 continue;
465 for(z=0; z<BITS; z++)
466 bit.b[z] = r1->calbehind.b[z] &
467 (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
468 ~(r->calahead.b[z] & addrs.b[z]);
469 while(bany(&bit)) {
470 i = bnum(bit);
471 bit.b[i/32] &= ~(1L << (i%32));
472 }
473 }
474 }
475 }
476
477 /*
478 * add mov b,rn
479 * just after r
480 */
481 void
addmove(Reg * r,int bn,int rn,int f)482 addmove(Reg *r, int bn, int rn, int f)
483 {
484 Prog *p, *p1;
485 Adr *a;
486 Var *v;
487
488 p1 = alloc(sizeof(*p1));
489 *p1 = zprog;
490 p = r->prog;
491
492 p1->link = p->link;
493 p->link = p1;
494 p1->lineno = p->lineno;
495
496 v = var + bn;
497
498 a = &p1->to;
499 a->sym = v->sym;
500 a->name = v->name;
501 a->offset = v->offset;
502 a->etype = v->etype;
503 a->type = D_OREG;
504 if(a->etype == TARRAY || a->sym == S)
505 a->type = D_CONST;
506
507 p1->as = AMOVW;
508 if(v->etype == TCHAR || v->etype == TUCHAR)
509 p1->as = AMOVB;
510 if(v->etype == TSHORT || v->etype == TUSHORT)
511 p1->as = AMOVH;
512 if(v->etype == TFLOAT)
513 p1->as = AMOVF;
514 if(v->etype == TDOUBLE)
515 p1->as = AMOVD;
516
517 p1->from.type = D_REG;
518 p1->from.reg = rn;
519 if(rn >= NREG) {
520 p1->from.type = D_FREG;
521 p1->from.reg = rn-NREG;
522 }
523 if(!f) {
524 p1->from = *a;
525 *a = zprog.from;
526 a->type = D_REG;
527 a->reg = rn;
528 if(rn >= NREG) {
529 a->type = D_FREG;
530 a->reg = rn-NREG;
531 }
532 if(v->etype == TUCHAR)
533 p1->as = AMOVBU;
534 if(v->etype == TUSHORT)
535 p1->as = AMOVHU;
536 }
537 if(debug['R'])
538 print("%P\t.a%P\n", p, p1);
539 }
540
541 Bits
mkvar(Adr * a,int docon)542 mkvar(Adr *a, int docon)
543 {
544 Var *v;
545 int i, t, n, et, z;
546 long o;
547 Bits bit;
548 Sym *s;
549
550 t = a->type;
551 if(t == D_REG && a->reg != NREG)
552 regbits |= RtoB(a->reg);
553 if(t == D_FREG && a->reg != NREG)
554 regbits |= FtoB(a->reg);
555 s = a->sym;
556 o = a->offset;
557 et = a->etype;
558 if(s == S) {
559 if(t != D_CONST || !docon || a->reg != NREG)
560 goto none;
561 et = TLONG;
562 }
563 if(t == D_CONST) {
564 if(s == S && sval(o))
565 goto none;
566 }
567
568 n = a->name;
569 v = var;
570 for(i=0; i<nvar; i++) {
571 if(s == v->sym)
572 if(n == v->name)
573 if(o == v->offset)
574 goto out;
575 v++;
576 }
577 if(s)
578 if(s->name[0] == '.')
579 goto none;
580 if(nvar >= NVAR) {
581 if(debug['w'] > 1 && s)
582 warn(Z, "variable not optimized: %s", s->name);
583 goto none;
584 }
585 i = nvar;
586 nvar++;
587 v = &var[i];
588 v->sym = s;
589 v->offset = o;
590 v->etype = et;
591 v->name = n;
592 if(debug['R'])
593 print("bit=%2d et=%2d %D\n", i, et, a);
594 out:
595 bit = blsh(i);
596 if(n == D_EXTERN || n == D_STATIC)
597 for(z=0; z<BITS; z++)
598 externs.b[z] |= bit.b[z];
599 if(n == D_PARAM)
600 for(z=0; z<BITS; z++)
601 params.b[z] |= bit.b[z];
602 if(v->etype != et || !typechlpfd[et]) /* funny punning */
603 for(z=0; z<BITS; z++)
604 addrs.b[z] |= bit.b[z];
605 if(t == D_CONST) {
606 if(s == S) {
607 for(z=0; z<BITS; z++)
608 consts.b[z] |= bit.b[z];
609 return bit;
610 }
611 if(et != TARRAY)
612 for(z=0; z<BITS; z++)
613 addrs.b[z] |= bit.b[z];
614 for(z=0; z<BITS; z++)
615 params.b[z] |= bit.b[z];
616 return bit;
617 }
618 if(t == D_OREG)
619 return bit;
620
621 none:
622 return zbits;
623 }
624
625 void
prop(Reg * r,Bits ref,Bits cal)626 prop(Reg *r, Bits ref, Bits cal)
627 {
628 Reg *r1, *r2;
629 int z;
630
631 for(r1 = r; r1 != R; r1 = r1->p1) {
632 for(z=0; z<BITS; z++) {
633 ref.b[z] |= r1->refahead.b[z];
634 if(ref.b[z] != r1->refahead.b[z]) {
635 r1->refahead.b[z] = ref.b[z];
636 change++;
637 }
638 cal.b[z] |= r1->calahead.b[z];
639 if(cal.b[z] != r1->calahead.b[z]) {
640 r1->calahead.b[z] = cal.b[z];
641 change++;
642 }
643 }
644 switch(r1->prog->as) {
645 case AJAL:
646 for(z=0; z<BITS; z++) {
647 cal.b[z] |= ref.b[z] | externs.b[z];
648 ref.b[z] = 0;
649 }
650 break;
651
652 case ATEXT:
653 for(z=0; z<BITS; z++) {
654 cal.b[z] = 0;
655 ref.b[z] = 0;
656 }
657 break;
658
659 case ARET:
660 for(z=0; z<BITS; z++) {
661 cal.b[z] = externs.b[z];
662 ref.b[z] = 0;
663 }
664 }
665 for(z=0; z<BITS; z++) {
666 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
667 r1->use1.b[z] | r1->use2.b[z];
668 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
669 r1->refbehind.b[z] = ref.b[z];
670 r1->calbehind.b[z] = cal.b[z];
671 }
672 if(r1->active)
673 break;
674 r1->active = 1;
675 }
676 for(; r != r1; r = r->p1)
677 for(r2 = r->p2; r2 != R; r2 = r2->p2link)
678 prop(r2, r->refbehind, r->calbehind);
679 }
680
681 /*
682 * find looping structure
683 *
684 * 1) find reverse postordering
685 * 2) find approximate dominators,
686 * the actual dominators if the flow graph is reducible
687 * otherwise, dominators plus some other non-dominators.
688 * See Matthew S. Hecht and Jeffrey D. Ullman,
689 * "Analysis of a Simple Algorithm for Global Data Flow Problems",
690 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
691 * Oct. 1-3, 1973, pp. 207-217.
692 * 3) find all nodes with a predecessor dominated by the current node.
693 * such a node is a loop head.
694 * recursively, all preds with a greater rpo number are in the loop
695 */
696 long
postorder(Reg * r,Reg ** rpo2r,long n)697 postorder(Reg *r, Reg **rpo2r, long n)
698 {
699 Reg *r1;
700
701 r->rpo = 1;
702 r1 = r->s1;
703 if(r1 && !r1->rpo)
704 n = postorder(r1, rpo2r, n);
705 r1 = r->s2;
706 if(r1 && !r1->rpo)
707 n = postorder(r1, rpo2r, n);
708 rpo2r[n] = r;
709 n++;
710 return n;
711 }
712
713 long
rpolca(long * idom,long rpo1,long rpo2)714 rpolca(long *idom, long rpo1, long rpo2)
715 {
716 long t;
717
718 if(rpo1 == -1)
719 return rpo2;
720 while(rpo1 != rpo2){
721 if(rpo1 > rpo2){
722 t = rpo2;
723 rpo2 = rpo1;
724 rpo1 = t;
725 }
726 while(rpo1 < rpo2){
727 t = idom[rpo2];
728 if(t >= rpo2)
729 fatal(Z, "bad idom");
730 rpo2 = t;
731 }
732 }
733 return rpo1;
734 }
735
736 int
doms(long * idom,long r,long s)737 doms(long *idom, long r, long s)
738 {
739 while(s > r)
740 s = idom[s];
741 return s == r;
742 }
743
744 int
loophead(long * idom,Reg * r)745 loophead(long *idom, Reg *r)
746 {
747 long src;
748
749 src = r->rpo;
750 if(r->p1 != R && doms(idom, src, r->p1->rpo))
751 return 1;
752 for(r = r->p2; r != R; r = r->p2link)
753 if(doms(idom, src, r->rpo))
754 return 1;
755 return 0;
756 }
757
758 void
loopmark(Reg ** rpo2r,long head,Reg * r)759 loopmark(Reg **rpo2r, long head, Reg *r)
760 {
761 if(r->rpo < head || r->active == head)
762 return;
763 r->active = head;
764 r->loop += LOOP;
765 if(r->p1 != R)
766 loopmark(rpo2r, head, r->p1);
767 for(r = r->p2; r != R; r = r->p2link)
768 loopmark(rpo2r, head, r);
769 }
770
771 void
loopit(Reg * r,long nr)772 loopit(Reg *r, long nr)
773 {
774 Reg *r1;
775 long i, d, me;
776
777 if(nr > maxnr) {
778 rpo2r = alloc(nr * sizeof(Reg*));
779 idom = alloc(nr * sizeof(long));
780 maxnr = nr;
781 }
782
783 d = postorder(r, rpo2r, 0);
784 if(d > nr)
785 fatal(Z, "too many reg nodes");
786 nr = d;
787 for(i = 0; i < nr / 2; i++){
788 r1 = rpo2r[i];
789 rpo2r[i] = rpo2r[nr - 1 - i];
790 rpo2r[nr - 1 - i] = r1;
791 }
792 for(i = 0; i < nr; i++)
793 rpo2r[i]->rpo = i;
794
795 idom[0] = 0;
796 for(i = 0; i < nr; i++){
797 r1 = rpo2r[i];
798 me = r1->rpo;
799 d = -1;
800 if(r1->p1 != R && r1->p1->rpo < me)
801 d = r1->p1->rpo;
802 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
803 if(r1->rpo < me)
804 d = rpolca(idom, d, r1->rpo);
805 idom[i] = d;
806 }
807
808 for(i = 0; i < nr; i++){
809 r1 = rpo2r[i];
810 r1->loop++;
811 if(r1->p2 != R && loophead(idom, r1))
812 loopmark(rpo2r, i, r1);
813 }
814 }
815
816 void
synch(Reg * r,Bits dif)817 synch(Reg *r, Bits dif)
818 {
819 Reg *r1;
820 int z;
821
822 for(r1 = r; r1 != R; r1 = r1->s1) {
823 for(z=0; z<BITS; z++) {
824 dif.b[z] = (dif.b[z] &
825 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
826 r1->set.b[z] | r1->regdiff.b[z];
827 if(dif.b[z] != r1->regdiff.b[z]) {
828 r1->regdiff.b[z] = dif.b[z];
829 change++;
830 }
831 }
832 if(r1->active)
833 break;
834 r1->active = 1;
835 for(z=0; z<BITS; z++)
836 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
837 if(r1->s2 != R)
838 synch(r1->s2, dif);
839 }
840 }
841
842 ulong
allreg(ulong b,Rgn * r)843 allreg(ulong b, Rgn *r)
844 {
845 Var *v;
846 int i;
847
848 v = var + r->varno;
849 r->regno = 0;
850 switch(v->etype) {
851
852 default:
853 diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
854 break;
855
856 case TCHAR:
857 case TUCHAR:
858 case TSHORT:
859 case TUSHORT:
860 case TINT:
861 case TUINT:
862 case TLONG:
863 case TULONG:
864 case TIND:
865 case TARRAY:
866 i = BtoR(~b);
867 if(i && r->cost >= 0) {
868 r->regno = i;
869 return RtoB(i);
870 }
871 break;
872
873 case TDOUBLE:
874 case TFLOAT:
875 i = BtoF(~b);
876 if(i && r->cost >= 0) {
877 r->regno = i+NREG;
878 return FtoB(i);
879 }
880 break;
881 }
882 return 0;
883 }
884
885 void
paint1(Reg * r,int bn)886 paint1(Reg *r, int bn)
887 {
888 Reg *r1;
889 Prog *p;
890 int z;
891 ulong bb;
892
893 z = bn/32;
894 bb = 1L<<(bn%32);
895 if(r->act.b[z] & bb)
896 return;
897 for(;;) {
898 if(!(r->refbehind.b[z] & bb))
899 break;
900 r1 = r->p1;
901 if(r1 == R)
902 break;
903 if(!(r1->refahead.b[z] & bb))
904 break;
905 if(r1->act.b[z] & bb)
906 break;
907 r = r1;
908 }
909
910 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
911 change -= CLOAD * r->loop;
912 if(debug['R'] && debug['v'])
913 print("%ld%P\tld %B $%d\n", r->loop,
914 r->prog, blsh(bn), change);
915 }
916 for(;;) {
917 r->act.b[z] |= bb;
918 p = r->prog;
919
920 if(r->use1.b[z] & bb) {
921 change += CREF * r->loop;
922 if(debug['R'] && debug['v'])
923 print("%ld%P\tu1 %B $%d\n", r->loop,
924 p, blsh(bn), change);
925 }
926
927 if((r->use2.b[z]|r->set.b[z]) & bb) {
928 change += CREF * r->loop;
929 if(debug['R'] && debug['v'])
930 print("%ld%P\tu2 %B $%d\n", r->loop,
931 p, blsh(bn), change);
932 }
933
934 if(STORE(r) & r->regdiff.b[z] & bb) {
935 change -= CLOAD * r->loop;
936 if(debug['R'] && debug['v'])
937 print("%ld%P\tst %B $%d\n", r->loop,
938 p, blsh(bn), change);
939 }
940
941 if(r->refbehind.b[z] & bb)
942 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
943 if(r1->refahead.b[z] & bb)
944 paint1(r1, bn);
945
946 if(!(r->refahead.b[z] & bb))
947 break;
948 r1 = r->s2;
949 if(r1 != R)
950 if(r1->refbehind.b[z] & bb)
951 paint1(r1, bn);
952 r = r->s1;
953 if(r == R)
954 break;
955 if(r->act.b[z] & bb)
956 break;
957 if(!(r->refbehind.b[z] & bb))
958 break;
959 }
960 }
961
962 ulong
paint2(Reg * r,int bn)963 paint2(Reg *r, int bn)
964 {
965 Reg *r1;
966 int z;
967 ulong bb, vreg;
968
969 z = bn/32;
970 bb = 1L << (bn%32);
971 vreg = regbits;
972 if(!(r->act.b[z] & bb))
973 return vreg;
974 for(;;) {
975 if(!(r->refbehind.b[z] & bb))
976 break;
977 r1 = r->p1;
978 if(r1 == R)
979 break;
980 if(!(r1->refahead.b[z] & bb))
981 break;
982 if(!(r1->act.b[z] & bb))
983 break;
984 r = r1;
985 }
986 for(;;) {
987 r->act.b[z] &= ~bb;
988
989 vreg |= r->regu;
990
991 if(r->refbehind.b[z] & bb)
992 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
993 if(r1->refahead.b[z] & bb)
994 vreg |= paint2(r1, bn);
995
996 if(!(r->refahead.b[z] & bb))
997 break;
998 r1 = r->s2;
999 if(r1 != R)
1000 if(r1->refbehind.b[z] & bb)
1001 vreg |= paint2(r1, bn);
1002 r = r->s1;
1003 if(r == R)
1004 break;
1005 if(!(r->act.b[z] & bb))
1006 break;
1007 if(!(r->refbehind.b[z] & bb))
1008 break;
1009 }
1010 return vreg;
1011 }
1012
1013 void
paint3(Reg * r,int bn,long rb,int rn)1014 paint3(Reg *r, int bn, long rb, int rn)
1015 {
1016 Reg *r1;
1017 Prog *p;
1018 int z;
1019 ulong bb;
1020
1021 z = bn/32;
1022 bb = 1L << (bn%32);
1023 if(r->act.b[z] & bb)
1024 return;
1025 for(;;) {
1026 if(!(r->refbehind.b[z] & bb))
1027 break;
1028 r1 = r->p1;
1029 if(r1 == R)
1030 break;
1031 if(!(r1->refahead.b[z] & bb))
1032 break;
1033 if(r1->act.b[z] & bb)
1034 break;
1035 r = r1;
1036 }
1037
1038 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1039 addmove(r, bn, rn, 0);
1040 for(;;) {
1041 r->act.b[z] |= bb;
1042 p = r->prog;
1043
1044 if(r->use1.b[z] & bb) {
1045 if(debug['R'])
1046 print("%P", p);
1047 addreg(&p->from, rn);
1048 if(debug['R'])
1049 print("\t.c%P\n", p);
1050 }
1051 if((r->use2.b[z]|r->set.b[z]) & bb) {
1052 if(debug['R'])
1053 print("%P", p);
1054 addreg(&p->to, rn);
1055 if(debug['R'])
1056 print("\t.c%P\n", p);
1057 }
1058
1059 if(STORE(r) & r->regdiff.b[z] & bb)
1060 addmove(r, bn, rn, 1);
1061 r->regu |= rb;
1062
1063 if(r->refbehind.b[z] & bb)
1064 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1065 if(r1->refahead.b[z] & bb)
1066 paint3(r1, bn, rb, rn);
1067
1068 if(!(r->refahead.b[z] & bb))
1069 break;
1070 r1 = r->s2;
1071 if(r1 != R)
1072 if(r1->refbehind.b[z] & bb)
1073 paint3(r1, bn, rb, rn);
1074 r = r->s1;
1075 if(r == R)
1076 break;
1077 if(r->act.b[z] & bb)
1078 break;
1079 if(!(r->refbehind.b[z] & bb))
1080 break;
1081 }
1082 }
1083
1084 void
addreg(Adr * a,int rn)1085 addreg(Adr *a, int rn)
1086 {
1087
1088 a->sym = 0;
1089 a->name = D_NONE;
1090 a->type = D_REG;
1091 a->reg = rn;
1092 if(rn >= NREG) {
1093 a->type = D_FREG;
1094 a->reg = rn-NREG;
1095 }
1096 }
1097
1098 /*
1099 * bit reg
1100 * 0 R3
1101 * 1 R4
1102 * ... ...
1103 * 19 R22
1104 * 20 R23
1105 */
1106 long
RtoB(int r)1107 RtoB(int r)
1108 {
1109
1110 if(r < 3 || r > 23)
1111 return 0;
1112 return 1L << (r-3);
1113 }
1114
BtoR(long b)1115 BtoR(long b)
1116 {
1117
1118 b &= 0x001fffffL;
1119 if(b == 0)
1120 return 0;
1121 return bitno(b) + 3;
1122 }
1123
1124 /*
1125 * bit reg
1126 * 22 F4
1127 * 23 F6
1128 * ... ...
1129 * 31 F22
1130 */
1131 long
FtoB(int f)1132 FtoB(int f)
1133 {
1134
1135 if(f < 4 || f > 22 || (f&1))
1136 return 0;
1137 return 1L << (f/2 + 20);
1138 }
1139
1140 int
BtoF(long b)1141 BtoF(long b)
1142 {
1143
1144 b &= 0xffc00000L;
1145 if(b == 0)
1146 return 0;
1147 return bitno(b)*2 - 40;
1148 }
1149