1 #include "gc.h"
2
3 void addsplits(void);
4
5 Reg*
rega(void)6 rega(void)
7 {
8 Reg *r;
9
10 r = freer;
11 if(r == R) {
12 r = alloc(sizeof(*r));
13 } else
14 freer = r->link;
15
16 *r = zreg;
17 return r;
18 }
19
20 int
rcmp(const void * a1,const void * a2)21 rcmp(const void *a1, const void *a2)
22 {
23 Rgn *p1, *p2;
24 int c1, c2;
25
26 p1 = (Rgn*)a1;
27 p2 = (Rgn*)a2;
28 c1 = p2->cost;
29 c2 = p1->cost;
30 if(c1 -= c2)
31 return c1;
32 return p2->varno - p1->varno;
33 }
34
35 void
regopt(Prog * p)36 regopt(Prog *p)
37 {
38 Reg *r, *r1, *r2;
39 Prog *p1;
40 int i, z;
41 long initpc, val, npc;
42 ulong vreg;
43 Bits bit;
44 struct
45 {
46 long m;
47 long c;
48 Reg* p;
49 } log5[6], *lp;
50
51 firstr = R;
52 lastr = R;
53 nvar = 0;
54 regbits = 0;
55 for(z=0; z<BITS; z++) {
56 externs.b[z] = 0;
57 params.b[z] = 0;
58 consts.b[z] = 0;
59 addrs.b[z] = 0;
60 }
61
62 /*
63 * pass 1
64 * build aux data structure
65 * allocate pcs
66 * find use and set of variables
67 */
68 val = 5L * 5L * 5L * 5L * 5L;
69 lp = log5;
70 for(i=0; i<5; i++) {
71 lp->m = val;
72 lp->c = 0;
73 lp->p = R;
74 val /= 5L;
75 lp++;
76 }
77 val = 0;
78 for(; p != P; p = p->link) {
79 switch(p->as) {
80 case ADATA:
81 case AGLOBL:
82 case ANAME:
83 case ASIGNAME:
84 continue;
85 }
86 r = rega();
87 if(firstr == R) {
88 firstr = r;
89 lastr = r;
90 } else {
91 lastr->link = r;
92 r->p1 = lastr;
93 lastr->s1 = r;
94 lastr = r;
95 }
96 r->prog = p;
97 r->pc = val;
98 val++;
99
100 lp = log5;
101 for(i=0; i<5; i++) {
102 lp->c--;
103 if(lp->c <= 0) {
104 lp->c = lp->m;
105 if(lp->p != R)
106 lp->p->log5 = r;
107 lp->p = r;
108 (lp+1)->c = 0;
109 break;
110 }
111 lp++;
112 }
113
114 r1 = r->p1;
115 if(r1 != R)
116 switch(r1->prog->as) {
117 case ARET:
118 case AB:
119 case ARFE:
120 r->p1 = R;
121 r1->s1 = R;
122 }
123
124 /*
125 * left side always read
126 */
127 bit = mkvar(&p->from, p->as==AMOVW);
128 for(z=0; z<BITS; z++)
129 r->use1.b[z] |= bit.b[z];
130
131 /*
132 * right side depends on opcode
133 */
134 bit = mkvar(&p->to, 0);
135 if(bany(&bit))
136 switch(p->as) {
137 default:
138 diag(Z, "reg: unknown asop: %A", p->as);
139 break;
140
141 /*
142 * right side write
143 */
144 case ANOP:
145 case AMOVB:
146 case AMOVBU:
147 case AMOVH:
148 case AMOVHU:
149 case AMOVW:
150 case AMOVF:
151 case AMOVD:
152 for(z=0; z<BITS; z++)
153 r->set.b[z] |= bit.b[z];
154 break;
155
156 /*
157 * funny
158 */
159 case ABL:
160 for(z=0; z<BITS; z++)
161 addrs.b[z] |= bit.b[z];
162 break;
163 }
164
165 if(p->as == AMOVM) {
166 if(p->from.type == D_CONST)
167 z = p->from.offset;
168 else
169 z = p->to.offset;
170 for(i=0; z; i++) {
171 if(z&1)
172 regbits |= RtoB(i);
173 z >>= 1;
174 }
175 }
176 }
177 if(firstr == R)
178 return;
179 initpc = pc - val;
180 npc = val;
181
182 /*
183 * pass 2
184 * turn branch references to pointers
185 * build back pointers
186 */
187 for(r = firstr; r != R; r = r->link) {
188 p = r->prog;
189 if(p->to.type == D_BRANCH) {
190 val = p->to.offset - initpc;
191 r1 = firstr;
192 while(r1 != R) {
193 r2 = r1->log5;
194 if(r2 != R && val >= r2->pc) {
195 r1 = r2;
196 continue;
197 }
198 if(r1->pc == val)
199 break;
200 r1 = r1->link;
201 }
202 if(r1 == R) {
203 nearln = p->lineno;
204 diag(Z, "ref not found\n%P", p);
205 continue;
206 }
207 if(r1 == r) {
208 nearln = p->lineno;
209 diag(Z, "ref to self\n%P", p);
210 continue;
211 }
212 r->s2 = r1;
213 r->p2link = r1->p2;
214 r1->p2 = r;
215 }
216 }
217 if(debug['R']) {
218 p = firstr->prog;
219 print("\n%L %D\n", p->lineno, &p->from);
220 }
221
222 /*
223 * pass 2.5
224 * find looping structure
225 */
226 for(r = firstr; r != R; r = r->link)
227 r->active = 0;
228 change = 0;
229 loopit(firstr, npc);
230
231 /*
232 * pass 3
233 * iterate propagating usage
234 * back until flow graph is complete
235 */
236 loop1:
237 change = 0;
238 for(r = firstr; r != R; r = r->link)
239 r->active = 0;
240 for(r = firstr; r != R; r = r->link)
241 if(r->prog->as == ARET)
242 prop(r, zbits, zbits);
243 loop11:
244 /* pick up unreachable code */
245 i = 0;
246 for(r = firstr; r != R; r = r1) {
247 r1 = r->link;
248 if(r1 && r1->active && !r->active) {
249 prop(r, zbits, zbits);
250 i = 1;
251 }
252 }
253 if(i)
254 goto loop11;
255 if(change)
256 goto loop1;
257
258
259 /*
260 * pass 4
261 * iterate propagating register/variable synchrony
262 * forward until graph is complete
263 */
264 loop2:
265 change = 0;
266 for(r = firstr; r != R; r = r->link)
267 r->active = 0;
268 synch(firstr, zbits);
269 if(change)
270 goto loop2;
271
272 addsplits();
273
274 if(debug['R'] && debug['v']) {
275 print("\nprop structure:\n");
276 for(r = firstr; r != R; r = r->link) {
277 print("%ld:%P", r->loop, r->prog);
278 for(z=0; z<BITS; z++)
279 bit.b[z] = r->set.b[z] |
280 r->refahead.b[z] | r->calahead.b[z] |
281 r->refbehind.b[z] | r->calbehind.b[z] |
282 r->use1.b[z] | r->use2.b[z];
283 if(bany(&bit)) {
284 print("\t");
285 if(bany(&r->use1))
286 print(" u1=%B", r->use1);
287 if(bany(&r->use2))
288 print(" u2=%B", r->use2);
289 if(bany(&r->set))
290 print(" st=%B", r->set);
291 if(bany(&r->refahead))
292 print(" ra=%B", r->refahead);
293 if(bany(&r->calahead))
294 print(" ca=%B", r->calahead);
295 if(bany(&r->refbehind))
296 print(" rb=%B", r->refbehind);
297 if(bany(&r->calbehind))
298 print(" cb=%B", r->calbehind);
299 }
300 print("\n");
301 }
302 }
303
304 /*
305 * pass 5
306 * isolate regions
307 * calculate costs (paint1)
308 */
309 r = firstr;
310 if(r) {
311 for(z=0; z<BITS; z++)
312 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
313 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
314 if(bany(&bit)) {
315 nearln = r->prog->lineno;
316 warn(Z, "used and not set: %B", bit);
317 if(debug['R'] && !debug['w'])
318 print("used and not set: %B\n", bit);
319 }
320 }
321
322 for(r = firstr; r != R; r = r->link)
323 r->act = zbits;
324 rgp = region;
325 nregion = 0;
326 for(r = firstr; r != R; r = r->link) {
327 for(z=0; z<BITS; z++)
328 bit.b[z] = r->set.b[z] &
329 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
330 if(bany(&bit)) {
331 nearln = r->prog->lineno;
332 warn(Z, "set and not used: %B", bit);
333 if(debug['R'])
334 print("set and not used: %B\n", bit);
335 excise(r);
336 }
337 for(z=0; z<BITS; z++)
338 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
339 while(bany(&bit)) {
340 i = bnum(bit);
341 rgp->enter = r;
342 rgp->varno = i;
343 change = 0;
344 if(debug['R'] && debug['v'])
345 print("\n");
346 paint1(r, i);
347 bit.b[i/32] &= ~(1L<<(i%32));
348 if(change <= 0) {
349 if(debug['R'])
350 print("%L $%d: %B\n",
351 r->prog->lineno, change, blsh(i));
352 continue;
353 }
354 rgp->cost = change;
355 nregion++;
356 if(nregion >= NRGN) {
357 warn(Z, "too many regions");
358 goto brk;
359 }
360 rgp++;
361 }
362 }
363 brk:
364 qsort(region, nregion, sizeof(region[0]), rcmp);
365
366 /*
367 * pass 6
368 * determine used registers (paint2)
369 * replace code (paint3)
370 */
371 rgp = region;
372 for(i=0; i<nregion; i++) {
373 bit = blsh(rgp->varno);
374 vreg = paint2(rgp->enter, rgp->varno);
375 vreg = allreg(vreg, rgp);
376 if(debug['R']) {
377 if(rgp->regno >= NREG)
378 print("%L $%d F%d: %B\n",
379 rgp->enter->prog->lineno,
380 rgp->cost,
381 rgp->regno-NREG,
382 bit);
383 else
384 print("%L $%d R%d: %B\n",
385 rgp->enter->prog->lineno,
386 rgp->cost,
387 rgp->regno,
388 bit);
389 }
390 if(rgp->regno != 0)
391 paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
392 rgp++;
393 }
394 /*
395 * pass 7
396 * peep-hole on basic block
397 */
398 if(!debug['R'] || debug['P'])
399 peep();
400
401 /*
402 * pass 8
403 * recalculate pc
404 */
405 val = initpc;
406 for(r = firstr; r != R; r = r1) {
407 r->pc = val;
408 p = r->prog;
409 p1 = P;
410 r1 = r->link;
411 if(r1 != R)
412 p1 = r1->prog;
413 for(; p != p1; p = p->link) {
414 switch(p->as) {
415 default:
416 val++;
417 break;
418
419 case ANOP:
420 case ADATA:
421 case AGLOBL:
422 case ANAME:
423 case ASIGNAME:
424 break;
425 }
426 }
427 }
428 pc = val;
429
430 /*
431 * fix up branches
432 */
433 if(debug['R'])
434 if(bany(&addrs))
435 print("addrs: %B\n", addrs);
436
437 r1 = 0; /* set */
438 for(r = firstr; r != R; r = r->link) {
439 p = r->prog;
440 if(p->to.type == D_BRANCH)
441 p->to.offset = r->s2->pc;
442 r1 = r;
443 }
444
445 /*
446 * last pass
447 * eliminate nops
448 * free aux structures
449 */
450 for(p = firstr->prog; p != P; p = p->link){
451 while(p->link && p->link->as == ANOP)
452 p->link = p->link->link;
453 }
454 if(r1 != R) {
455 r1->link = freer;
456 freer = firstr;
457 }
458 }
459
460 void
addsplits(void)461 addsplits(void)
462 {
463 Reg *r, *r1;
464 int z, i;
465 Bits bit;
466
467 for(r = firstr; r != R; r = r->link) {
468 if(r->loop > 1)
469 continue;
470 if(r->prog->as == ABL)
471 continue;
472 for(r1 = r->p2; r1 != R; r1 = r1->p2link) {
473 if(r1->loop <= 1)
474 continue;
475 for(z=0; z<BITS; z++)
476 bit.b[z] = r1->calbehind.b[z] &
477 (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
478 ~(r->calahead.b[z] & addrs.b[z]);
479 while(bany(&bit)) {
480 i = bnum(bit);
481 bit.b[i/32] &= ~(1L << (i%32));
482 }
483 }
484 }
485 }
486
487 /*
488 * add mov b,rn
489 * just after r
490 */
491 void
addmove(Reg * r,int bn,int rn,int f)492 addmove(Reg *r, int bn, int rn, int f)
493 {
494 Prog *p, *p1;
495 Adr *a;
496 Var *v;
497
498 p1 = alloc(sizeof(*p1));
499 *p1 = zprog;
500 p = r->prog;
501
502 p1->link = p->link;
503 p->link = p1;
504 p1->lineno = p->lineno;
505
506 v = var + bn;
507
508 a = &p1->to;
509 a->sym = v->sym;
510 a->name = v->name;
511 a->offset = v->offset;
512 a->etype = v->etype;
513 a->type = D_OREG;
514 if(a->etype == TARRAY || a->sym == S)
515 a->type = D_CONST;
516
517 p1->as = AMOVW;
518 if(v->etype == TCHAR || v->etype == TUCHAR)
519 p1->as = AMOVB;
520 if(v->etype == TSHORT || v->etype == TUSHORT)
521 p1->as = AMOVH;
522 if(v->etype == TFLOAT)
523 p1->as = AMOVF;
524 if(v->etype == TDOUBLE)
525 p1->as = AMOVD;
526
527 p1->from.type = D_REG;
528 p1->from.reg = rn;
529 if(rn >= NREG) {
530 p1->from.type = D_FREG;
531 p1->from.reg = rn-NREG;
532 }
533 if(!f) {
534 p1->from = *a;
535 *a = zprog.from;
536 a->type = D_REG;
537 a->reg = rn;
538 if(rn >= NREG) {
539 a->type = D_FREG;
540 a->reg = rn-NREG;
541 }
542 if(v->etype == TUCHAR)
543 p1->as = AMOVBU;
544 if(v->etype == TUSHORT)
545 p1->as = AMOVHU;
546 }
547 if(debug['R'])
548 print("%P\t.a%P\n", p, p1);
549 }
550
551 Bits
mkvar(Adr * a,int docon)552 mkvar(Adr *a, int docon)
553 {
554 Var *v;
555 int i, t, n, et, z;
556 long o;
557 Bits bit;
558 Sym *s;
559
560 t = a->type;
561 if(t == D_REG && a->reg != NREG)
562 regbits |= RtoB(a->reg);
563 if(t == D_FREG && a->reg != NREG)
564 regbits |= FtoB(a->reg);
565 s = a->sym;
566 o = a->offset;
567 et = a->etype;
568 if(s == S) {
569 if(t != D_CONST || !docon || a->reg != NREG)
570 goto none;
571 et = TLONG;
572 }
573 if(t == D_CONST) {
574 if(s == S && sval(o))
575 goto none;
576 }
577
578 n = a->name;
579 v = var;
580 for(i=0; i<nvar; i++) {
581 if(s == v->sym)
582 if(n == v->name)
583 if(o == v->offset)
584 goto out;
585 v++;
586 }
587 if(s)
588 if(s->name[0] == '.')
589 goto none;
590 if(nvar >= NVAR) {
591 if(debug['w'] > 1 && s)
592 warn(Z, "variable not optimized: %s", s->name);
593 goto none;
594 }
595 i = nvar;
596 nvar++;
597 v = &var[i];
598 v->sym = s;
599 v->offset = o;
600 v->etype = et;
601 v->name = n;
602 if(debug['R'])
603 print("bit=%2d et=%2d %D\n", i, et, a);
604 out:
605 bit = blsh(i);
606 if(n == D_EXTERN || n == D_STATIC)
607 for(z=0; z<BITS; z++)
608 externs.b[z] |= bit.b[z];
609 if(n == D_PARAM)
610 for(z=0; z<BITS; z++)
611 params.b[z] |= bit.b[z];
612 if(v->etype != et || !typechlpfd[et]) /* funny punning */
613 for(z=0; z<BITS; z++)
614 addrs.b[z] |= bit.b[z];
615 if(t == D_CONST) {
616 if(s == S) {
617 for(z=0; z<BITS; z++)
618 consts.b[z] |= bit.b[z];
619 return bit;
620 }
621 if(et != TARRAY)
622 for(z=0; z<BITS; z++)
623 addrs.b[z] |= bit.b[z];
624 for(z=0; z<BITS; z++)
625 params.b[z] |= bit.b[z];
626 return bit;
627 }
628 if(t == D_OREG)
629 return bit;
630
631 none:
632 return zbits;
633 }
634
635 void
prop(Reg * r,Bits ref,Bits cal)636 prop(Reg *r, Bits ref, Bits cal)
637 {
638 Reg *r1, *r2;
639 int z;
640
641 for(r1 = r; r1 != R; r1 = r1->p1) {
642 for(z=0; z<BITS; z++) {
643 ref.b[z] |= r1->refahead.b[z];
644 if(ref.b[z] != r1->refahead.b[z]) {
645 r1->refahead.b[z] = ref.b[z];
646 change++;
647 }
648 cal.b[z] |= r1->calahead.b[z];
649 if(cal.b[z] != r1->calahead.b[z]) {
650 r1->calahead.b[z] = cal.b[z];
651 change++;
652 }
653 }
654 switch(r1->prog->as) {
655 case ABL:
656 for(z=0; z<BITS; z++) {
657 cal.b[z] |= ref.b[z] | externs.b[z];
658 ref.b[z] = 0;
659 }
660 break;
661
662 case ATEXT:
663 for(z=0; z<BITS; z++) {
664 cal.b[z] = 0;
665 ref.b[z] = 0;
666 }
667 break;
668
669 case ARET:
670 for(z=0; z<BITS; z++) {
671 cal.b[z] = externs.b[z];
672 ref.b[z] = 0;
673 }
674 }
675 for(z=0; z<BITS; z++) {
676 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
677 r1->use1.b[z] | r1->use2.b[z];
678 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
679 r1->refbehind.b[z] = ref.b[z];
680 r1->calbehind.b[z] = cal.b[z];
681 }
682 if(r1->active)
683 break;
684 r1->active = 1;
685 }
686 for(; r != r1; r = r->p1)
687 for(r2 = r->p2; r2 != R; r2 = r2->p2link)
688 prop(r2, r->refbehind, r->calbehind);
689 }
690
691 /*
692 * find looping structure
693 *
694 * 1) find reverse postordering
695 * 2) find approximate dominators,
696 * the actual dominators if the flow graph is reducible
697 * otherwise, dominators plus some other non-dominators.
698 * See Matthew S. Hecht and Jeffrey D. Ullman,
699 * "Analysis of a Simple Algorithm for Global Data Flow Problems",
700 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
701 * Oct. 1-3, 1973, pp. 207-217.
702 * 3) find all nodes with a predecessor dominated by the current node.
703 * such a node is a loop head.
704 * recursively, all preds with a greater rpo number are in the loop
705 */
706 long
postorder(Reg * r,Reg ** rpo2r,long n)707 postorder(Reg *r, Reg **rpo2r, long n)
708 {
709 Reg *r1;
710
711 r->rpo = 1;
712 r1 = r->s1;
713 if(r1 && !r1->rpo)
714 n = postorder(r1, rpo2r, n);
715 r1 = r->s2;
716 if(r1 && !r1->rpo)
717 n = postorder(r1, rpo2r, n);
718 rpo2r[n] = r;
719 n++;
720 return n;
721 }
722
723 long
rpolca(long * idom,long rpo1,long rpo2)724 rpolca(long *idom, long rpo1, long rpo2)
725 {
726 long t;
727
728 if(rpo1 == -1)
729 return rpo2;
730 while(rpo1 != rpo2){
731 if(rpo1 > rpo2){
732 t = rpo2;
733 rpo2 = rpo1;
734 rpo1 = t;
735 }
736 while(rpo1 < rpo2){
737 t = idom[rpo2];
738 if(t >= rpo2)
739 fatal(Z, "bad idom");
740 rpo2 = t;
741 }
742 }
743 return rpo1;
744 }
745
746 int
doms(long * idom,long r,long s)747 doms(long *idom, long r, long s)
748 {
749 while(s > r)
750 s = idom[s];
751 return s == r;
752 }
753
754 int
loophead(long * idom,Reg * r)755 loophead(long *idom, Reg *r)
756 {
757 long src;
758
759 src = r->rpo;
760 if(r->p1 != R && doms(idom, src, r->p1->rpo))
761 return 1;
762 for(r = r->p2; r != R; r = r->p2link)
763 if(doms(idom, src, r->rpo))
764 return 1;
765 return 0;
766 }
767
768 void
loopmark(Reg ** rpo2r,long head,Reg * r)769 loopmark(Reg **rpo2r, long head, Reg *r)
770 {
771 if(r->rpo < head || r->active == head)
772 return;
773 r->active = head;
774 r->loop += LOOP;
775 if(r->p1 != R)
776 loopmark(rpo2r, head, r->p1);
777 for(r = r->p2; r != R; r = r->p2link)
778 loopmark(rpo2r, head, r);
779 }
780
781 void
loopit(Reg * r,long nr)782 loopit(Reg *r, long nr)
783 {
784 Reg *r1;
785 long i, d, me;
786
787 if(nr > maxnr) {
788 rpo2r = alloc(nr * sizeof(Reg*));
789 idom = alloc(nr * sizeof(long));
790 maxnr = nr;
791 }
792
793 d = postorder(r, rpo2r, 0);
794 if(d > nr)
795 fatal(Z, "too many reg nodes");
796 nr = d;
797 for(i = 0; i < nr / 2; i++){
798 r1 = rpo2r[i];
799 rpo2r[i] = rpo2r[nr - 1 - i];
800 rpo2r[nr - 1 - i] = r1;
801 }
802 for(i = 0; i < nr; i++)
803 rpo2r[i]->rpo = i;
804
805 idom[0] = 0;
806 for(i = 0; i < nr; i++){
807 r1 = rpo2r[i];
808 me = r1->rpo;
809 d = -1;
810 if(r1->p1 != R && r1->p1->rpo < me)
811 d = r1->p1->rpo;
812 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
813 if(r1->rpo < me)
814 d = rpolca(idom, d, r1->rpo);
815 idom[i] = d;
816 }
817
818 for(i = 0; i < nr; i++){
819 r1 = rpo2r[i];
820 r1->loop++;
821 if(r1->p2 != R && loophead(idom, r1))
822 loopmark(rpo2r, i, r1);
823 }
824 }
825
826 void
synch(Reg * r,Bits dif)827 synch(Reg *r, Bits dif)
828 {
829 Reg *r1;
830 int z;
831
832 for(r1 = r; r1 != R; r1 = r1->s1) {
833 for(z=0; z<BITS; z++) {
834 dif.b[z] = (dif.b[z] &
835 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
836 r1->set.b[z] | r1->regdiff.b[z];
837 if(dif.b[z] != r1->regdiff.b[z]) {
838 r1->regdiff.b[z] = dif.b[z];
839 change++;
840 }
841 }
842 if(r1->active)
843 break;
844 r1->active = 1;
845 for(z=0; z<BITS; z++)
846 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
847 if(r1->s2 != R)
848 synch(r1->s2, dif);
849 }
850 }
851
852 ulong
allreg(ulong b,Rgn * r)853 allreg(ulong b, Rgn *r)
854 {
855 Var *v;
856 int i;
857
858 v = var + r->varno;
859 r->regno = 0;
860 switch(v->etype) {
861
862 default:
863 diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
864 break;
865
866 case TCHAR:
867 case TUCHAR:
868 case TSHORT:
869 case TUSHORT:
870 case TINT:
871 case TUINT:
872 case TLONG:
873 case TULONG:
874 case TIND:
875 case TARRAY:
876 i = BtoR(~b);
877 if(i && r->cost >= 0) {
878 r->regno = i;
879 return RtoB(i);
880 }
881 break;
882
883 case TVLONG:
884 case TDOUBLE:
885 case TFLOAT:
886 i = BtoF(~b);
887 if(i && r->cost >= 0) {
888 r->regno = i+NREG;
889 return FtoB(i);
890 }
891 break;
892 }
893 return 0;
894 }
895
896 void
paint1(Reg * r,int bn)897 paint1(Reg *r, int bn)
898 {
899 Reg *r1;
900 Prog *p;
901 int z;
902 ulong bb;
903
904 z = bn/32;
905 bb = 1L<<(bn%32);
906 if(r->act.b[z] & bb)
907 return;
908 for(;;) {
909 if(!(r->refbehind.b[z] & bb))
910 break;
911 r1 = r->p1;
912 if(r1 == R)
913 break;
914 if(!(r1->refahead.b[z] & bb))
915 break;
916 if(r1->act.b[z] & bb)
917 break;
918 r = r1;
919 }
920
921 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
922 change -= CLOAD * r->loop;
923 if(debug['R'] && debug['v'])
924 print("%ld%P\tld %B $%d\n", r->loop,
925 r->prog, blsh(bn), change);
926 }
927 for(;;) {
928 r->act.b[z] |= bb;
929 p = r->prog;
930
931 if(r->use1.b[z] & bb) {
932 change += CREF * r->loop;
933 if(debug['R'] && debug['v'])
934 print("%ld%P\tu1 %B $%d\n", r->loop,
935 p, blsh(bn), change);
936 }
937
938 if((r->use2.b[z]|r->set.b[z]) & bb) {
939 change += CREF * r->loop;
940 if(debug['R'] && debug['v'])
941 print("%ld%P\tu2 %B $%d\n", r->loop,
942 p, blsh(bn), change);
943 }
944
945 if(STORE(r) & r->regdiff.b[z] & bb) {
946 change -= CLOAD * r->loop;
947 if(debug['R'] && debug['v'])
948 print("%ld%P\tst %B $%d\n", r->loop,
949 p, blsh(bn), change);
950 }
951
952 if(r->refbehind.b[z] & bb)
953 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
954 if(r1->refahead.b[z] & bb)
955 paint1(r1, bn);
956
957 if(!(r->refahead.b[z] & bb))
958 break;
959 r1 = r->s2;
960 if(r1 != R)
961 if(r1->refbehind.b[z] & bb)
962 paint1(r1, bn);
963 r = r->s1;
964 if(r == R)
965 break;
966 if(r->act.b[z] & bb)
967 break;
968 if(!(r->refbehind.b[z] & bb))
969 break;
970 }
971 }
972
973 ulong
paint2(Reg * r,int bn)974 paint2(Reg *r, int bn)
975 {
976 Reg *r1;
977 int z;
978 ulong bb, vreg;
979
980 z = bn/32;
981 bb = 1L << (bn%32);
982 vreg = regbits;
983 if(!(r->act.b[z] & bb))
984 return vreg;
985 for(;;) {
986 if(!(r->refbehind.b[z] & bb))
987 break;
988 r1 = r->p1;
989 if(r1 == R)
990 break;
991 if(!(r1->refahead.b[z] & bb))
992 break;
993 if(!(r1->act.b[z] & bb))
994 break;
995 r = r1;
996 }
997 for(;;) {
998 r->act.b[z] &= ~bb;
999
1000 vreg |= r->regu;
1001
1002 if(r->refbehind.b[z] & bb)
1003 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1004 if(r1->refahead.b[z] & bb)
1005 vreg |= paint2(r1, bn);
1006
1007 if(!(r->refahead.b[z] & bb))
1008 break;
1009 r1 = r->s2;
1010 if(r1 != R)
1011 if(r1->refbehind.b[z] & bb)
1012 vreg |= paint2(r1, bn);
1013 r = r->s1;
1014 if(r == R)
1015 break;
1016 if(!(r->act.b[z] & bb))
1017 break;
1018 if(!(r->refbehind.b[z] & bb))
1019 break;
1020 }
1021 return vreg;
1022 }
1023
1024 void
paint3(Reg * r,int bn,long rb,int rn)1025 paint3(Reg *r, int bn, long rb, int rn)
1026 {
1027 Reg *r1;
1028 Prog *p;
1029 int z;
1030 ulong bb;
1031
1032 z = bn/32;
1033 bb = 1L << (bn%32);
1034 if(r->act.b[z] & bb)
1035 return;
1036 for(;;) {
1037 if(!(r->refbehind.b[z] & bb))
1038 break;
1039 r1 = r->p1;
1040 if(r1 == R)
1041 break;
1042 if(!(r1->refahead.b[z] & bb))
1043 break;
1044 if(r1->act.b[z] & bb)
1045 break;
1046 r = r1;
1047 }
1048
1049 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1050 addmove(r, bn, rn, 0);
1051 for(;;) {
1052 r->act.b[z] |= bb;
1053 p = r->prog;
1054
1055 if(r->use1.b[z] & bb) {
1056 if(debug['R'])
1057 print("%P", p);
1058 addreg(&p->from, rn);
1059 if(debug['R'])
1060 print("\t.c%P\n", p);
1061 }
1062 if((r->use2.b[z]|r->set.b[z]) & bb) {
1063 if(debug['R'])
1064 print("%P", p);
1065 addreg(&p->to, rn);
1066 if(debug['R'])
1067 print("\t.c%P\n", p);
1068 }
1069
1070 if(STORE(r) & r->regdiff.b[z] & bb)
1071 addmove(r, bn, rn, 1);
1072 r->regu |= rb;
1073
1074 if(r->refbehind.b[z] & bb)
1075 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1076 if(r1->refahead.b[z] & bb)
1077 paint3(r1, bn, rb, rn);
1078
1079 if(!(r->refahead.b[z] & bb))
1080 break;
1081 r1 = r->s2;
1082 if(r1 != R)
1083 if(r1->refbehind.b[z] & bb)
1084 paint3(r1, bn, rb, rn);
1085 r = r->s1;
1086 if(r == R)
1087 break;
1088 if(r->act.b[z] & bb)
1089 break;
1090 if(!(r->refbehind.b[z] & bb))
1091 break;
1092 }
1093 }
1094
1095 void
addreg(Adr * a,int rn)1096 addreg(Adr *a, int rn)
1097 {
1098
1099 a->sym = 0;
1100 a->name = D_NONE;
1101 a->type = D_REG;
1102 a->reg = rn;
1103 if(rn >= NREG) {
1104 a->type = D_FREG;
1105 a->reg = rn-NREG;
1106 }
1107 }
1108
1109 /*
1110 * bit reg
1111 * 0 R0
1112 * 1 R1
1113 * ... ...
1114 * 10 R10
1115 */
1116 long
RtoB(int r)1117 RtoB(int r)
1118 {
1119
1120 if(r >= REGMIN && r <= REGMAX)
1121 return 1L << r;
1122 return 0;
1123 }
1124
1125 int
BtoR(long b)1126 BtoR(long b)
1127 {
1128 b &= 0x01fcL; // excluded R9 and R10 for m and g
1129 if(b == 0)
1130 return 0;
1131 return bitno(b);
1132 }
1133
1134 /*
1135 * bit reg
1136 * 18 F2
1137 * 19 F3
1138 * ... ...
1139 * 23 F7
1140 */
1141 long
FtoB(int f)1142 FtoB(int f)
1143 {
1144
1145 if(f < 2 || f > NFREG-1)
1146 return 0;
1147 return 1L << (f + 16);
1148 }
1149
1150 int
BtoF(long b)1151 BtoF(long b)
1152 {
1153
1154 b &= 0xfc0000L;
1155 if(b == 0)
1156 return 0;
1157 return bitno(b) - 16;
1158 }
1159