1 #include "gc.h"
2
3 void addsplits(void);
4
5 Reg*
rega(void)6 rega(void)
7 {
8 Reg *r;
9
10 r = freer;
11 if(r == R) {
12 r = alloc(sizeof(*r));
13 } else
14 freer = r->link;
15
16 *r = zreg;
17 return r;
18 }
19
20 int
rcmp(void * a1,void * a2)21 rcmp(void *a1, void *a2)
22 {
23 Rgn *p1, *p2;
24 int c1, c2;
25
26 p1 = (Rgn*)a1;
27 p2 = (Rgn*)a2;
28 c1 = p2->cost;
29 c2 = p1->cost;
30 if(c1 -= c2)
31 return c1;
32 return p2->varno - p1->varno;
33 }
34
35 void
regopt(Prog * p)36 regopt(Prog *p)
37 {
38 Reg *r, *r1, *r2;
39 Prog *p1;
40 int i, z;
41 long initpc, val, npc;
42 ulong vreg;
43 Bits bit;
44 struct
45 {
46 long m;
47 long c;
48 Reg* p;
49 } log5[6], *lp;
50
51 firstr = R;
52 lastr = R;
53 nvar = 0;
54 regbits = 0;
55 for(z=0; z<BITS; z++) {
56 externs.b[z] = 0;
57 params.b[z] = 0;
58 consts.b[z] = 0;
59 addrs.b[z] = 0;
60 }
61
62 /*
63 * pass 1
64 * build aux data structure
65 * allocate pcs
66 * find use and set of variables
67 */
68 val = 5L * 5L * 5L * 5L * 5L;
69 lp = log5;
70 for(i=0; i<5; i++) {
71 lp->m = val;
72 lp->c = 0;
73 lp->p = R;
74 val /= 5L;
75 lp++;
76 }
77 val = 0;
78 for(; p != P; p = p->link) {
79 switch(p->as) {
80 case ADATA:
81 case AGLOBL:
82 case ANAME:
83 case ASIGNAME:
84 continue;
85 }
86 r = rega();
87 if(firstr == R) {
88 firstr = r;
89 lastr = r;
90 } else {
91 lastr->link = r;
92 r->p1 = lastr;
93 lastr->s1 = r;
94 lastr = r;
95 }
96 r->prog = p;
97 r->pc = val;
98 val++;
99
100 lp = log5;
101 for(i=0; i<5; i++) {
102 lp->c--;
103 if(lp->c <= 0) {
104 lp->c = lp->m;
105 if(lp->p != R)
106 lp->p->log5 = r;
107 lp->p = r;
108 (lp+1)->c = 0;
109 break;
110 }
111 lp++;
112 }
113
114 r1 = r->p1;
115 if(r1 != R)
116 switch(r1->prog->as) {
117 case ARET:
118 case AJMP:
119 case ARFE:
120 r->p1 = R;
121 r1->s1 = R;
122 }
123
124 /*
125 * left side always read
126 */
127 bit = mkvar(&p->from, p->as==AMOVW);
128 for(z=0; z<BITS; z++)
129 r->use1.b[z] |= bit.b[z];
130
131 /*
132 * right side depends on opcode
133 */
134 bit = mkvar(&p->to, 0);
135 if(bany(&bit))
136 switch(p->as) {
137 default:
138 diag(Z, "reg: unknown asop: %A", p->as);
139 break;
140
141 /*
142 * right side write
143 */
144 case ANOP:
145 case AMOVB:
146 case AMOVBU:
147 case AMOVH:
148 case AMOVHU:
149 case AMOVW:
150 case AMOVV:
151 case AMOVF:
152 case AMOVD:
153 for(z=0; z<BITS; z++)
154 r->set.b[z] |= bit.b[z];
155 break;
156
157 /*
158 * funny
159 */
160 case AJAL:
161 for(z=0; z<BITS; z++)
162 addrs.b[z] |= bit.b[z];
163 break;
164 }
165 }
166 if(firstr == R)
167 return;
168 initpc = pc - val;
169 npc = val;
170
171 /*
172 * pass 2
173 * turn branch references to pointers
174 * build back pointers
175 */
176 for(r = firstr; r != R; r = r->link) {
177 p = r->prog;
178 if(p->to.type == D_BRANCH) {
179 val = p->to.offset - initpc;
180 r1 = firstr;
181 while(r1 != R) {
182 r2 = r1->log5;
183 if(r2 != R && val >= r2->pc) {
184 r1 = r2;
185 continue;
186 }
187 if(r1->pc == val)
188 break;
189 r1 = r1->link;
190 }
191 if(r1 == R) {
192 nearln = p->lineno;
193 diag(Z, "ref not found\n%P", p);
194 continue;
195 }
196 if(r1 == r) {
197 nearln = p->lineno;
198 diag(Z, "ref to self\n%P", p);
199 continue;
200 }
201 r->s2 = r1;
202 r->p2link = r1->p2;
203 r1->p2 = r;
204 }
205 }
206 if(debug['R']) {
207 p = firstr->prog;
208 print("\n%L %D\n", p->lineno, &p->from);
209 }
210
211 /*
212 * pass 2.5
213 * find looping structure
214 */
215 for(r = firstr; r != R; r = r->link)
216 r->active = 0;
217 change = 0;
218 loopit(firstr, npc);
219
220 /*
221 * pass 3
222 * iterate propagating usage
223 * back until flow graph is complete
224 */
225 loop1:
226 change = 0;
227 for(r = firstr; r != R; r = r->link)
228 r->active = 0;
229 for(r = firstr; r != R; r = r->link)
230 if(r->prog->as == ARET)
231 prop(r, zbits, zbits);
232 loop11:
233 /* pick up unreachable code */
234 i = 0;
235 for(r = firstr; r != R; r = r1) {
236 r1 = r->link;
237 if(r1 && r1->active && !r->active) {
238 prop(r, zbits, zbits);
239 i = 1;
240 }
241 }
242 if(i)
243 goto loop11;
244 if(change)
245 goto loop1;
246
247
248 /*
249 * pass 4
250 * iterate propagating register/variable synchrony
251 * forward until graph is complete
252 */
253 loop2:
254 change = 0;
255 for(r = firstr; r != R; r = r->link)
256 r->active = 0;
257 synch(firstr, zbits);
258 if(change)
259 goto loop2;
260
261 addsplits();
262
263 if(debug['R'] && debug['v']) {
264 print("\nprop structure:\n");
265 for(r = firstr; r != R; r = r->link) {
266 print("%ld:%P", r->loop, r->prog);
267 for(z=0; z<BITS; z++)
268 bit.b[z] = r->set.b[z] |
269 r->refahead.b[z] | r->calahead.b[z] |
270 r->refbehind.b[z] | r->calbehind.b[z] |
271 r->use1.b[z] | r->use2.b[z];
272 if(bany(&bit)) {
273 print("\t");
274 if(bany(&r->use1))
275 print(" u1=%B", r->use1);
276 if(bany(&r->use2))
277 print(" u2=%B", r->use2);
278 if(bany(&r->set))
279 print(" st=%B", r->set);
280 if(bany(&r->refahead))
281 print(" ra=%B", r->refahead);
282 if(bany(&r->calahead))
283 print(" ca=%B", r->calahead);
284 if(bany(&r->refbehind))
285 print(" rb=%B", r->refbehind);
286 if(bany(&r->calbehind))
287 print(" cb=%B", r->calbehind);
288 }
289 print("\n");
290 }
291 }
292
293 /*
294 * pass 5
295 * isolate regions
296 * calculate costs (paint1)
297 */
298 r = firstr;
299 if(r) {
300 for(z=0; z<BITS; z++)
301 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
302 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
303 if(bany(&bit)) {
304 nearln = r->prog->lineno;
305 warn(Z, "used and not set: %B", bit);
306 if(debug['R'] && !debug['w'])
307 print("used and not set: %B\n", bit);
308 }
309 }
310
311 for(r = firstr; r != R; r = r->link)
312 r->act = zbits;
313 rgp = region;
314 nregion = 0;
315 for(r = firstr; r != R; r = r->link) {
316 for(z=0; z<BITS; z++)
317 bit.b[z] = r->set.b[z] &
318 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
319 if(bany(&bit)) {
320 nearln = r->prog->lineno;
321 warn(Z, "set and not used: %B", bit);
322 if(debug['R'])
323 print("set and not used: %B\n", bit);
324 excise(r);
325 }
326 for(z=0; z<BITS; z++)
327 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
328 while(bany(&bit)) {
329 i = bnum(bit);
330 rgp->enter = r;
331 rgp->varno = i;
332 change = 0;
333 if(debug['R'] && debug['v'])
334 print("\n");
335 paint1(r, i);
336 bit.b[i/32] &= ~(1L<<(i%32));
337 if(change <= 0) {
338 if(debug['R'])
339 print("%L $%d: %B\n",
340 r->prog->lineno, change, blsh(i));
341 continue;
342 }
343 rgp->cost = change;
344 nregion++;
345 if(nregion >= NRGN) {
346 warn(Z, "too many regions");
347 goto brk;
348 }
349 rgp++;
350 }
351 }
352 brk:
353 qsort(region, nregion, sizeof(region[0]), rcmp);
354
355 /*
356 * pass 6
357 * determine used registers (paint2)
358 * replace code (paint3)
359 */
360 rgp = region;
361 for(i=0; i<nregion; i++) {
362 bit = blsh(rgp->varno);
363 vreg = paint2(rgp->enter, rgp->varno);
364 vreg = allreg(vreg, rgp);
365 if(debug['R']) {
366 if(rgp->regno >= NREG)
367 print("%L $%d F%d: %B\n",
368 rgp->enter->prog->lineno,
369 rgp->cost,
370 rgp->regno-NREG,
371 bit);
372 else
373 print("%L $%d R%d: %B\n",
374 rgp->enter->prog->lineno,
375 rgp->cost,
376 rgp->regno,
377 bit);
378 }
379 if(rgp->regno != 0)
380 paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
381 rgp++;
382 }
383 /*
384 * pass 7
385 * peep-hole on basic block
386 */
387 if(!debug['R'] || debug['P'])
388 peep();
389
390 /*
391 * pass 8
392 * recalculate pc
393 */
394 val = initpc;
395 for(r = firstr; r != R; r = r1) {
396 r->pc = val;
397 p = r->prog;
398 p1 = P;
399 r1 = r->link;
400 if(r1 != R)
401 p1 = r1->prog;
402 for(; p != p1; p = p->link) {
403 switch(p->as) {
404 default:
405 val++;
406 break;
407
408 case ANOP:
409 case ADATA:
410 case AGLOBL:
411 case ANAME:
412 case ASIGNAME:
413 break;
414 }
415 }
416 }
417 pc = val;
418
419 /*
420 * fix up branches
421 */
422 if(debug['R'])
423 if(bany(&addrs))
424 print("addrs: %B\n", addrs);
425
426 r1 = 0; /* set */
427 for(r = firstr; r != R; r = r->link) {
428 p = r->prog;
429 if(p->to.type == D_BRANCH)
430 p->to.offset = r->s2->pc;
431 r1 = r;
432 }
433
434 /*
435 * last pass
436 * eliminate nops
437 * free aux structures
438 */
439 for(p = firstr->prog; p != P; p = p->link){
440 while(p->link && p->link->as == ANOP)
441 p->link = p->link->link;
442 }
443 if(r1 != R) {
444 r1->link = freer;
445 freer = firstr;
446 }
447 }
448
449 void
addsplits(void)450 addsplits(void)
451 {
452 Reg *r, *r1;
453 int z, i;
454 Bits bit;
455
456 for(r = firstr; r != R; r = r->link) {
457 if(r->loop > 1)
458 continue;
459 if(r->prog->as == AJAL)
460 continue;
461 for(r1 = r->p2; r1 != R; r1 = r1->p2link) {
462 if(r1->loop <= 1)
463 continue;
464 for(z=0; z<BITS; z++)
465 bit.b[z] = r1->calbehind.b[z] &
466 (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
467 ~(r->calahead.b[z] & addrs.b[z]);
468 while(bany(&bit)) {
469 i = bnum(bit);
470 bit.b[i/32] &= ~(1L << (i%32));
471 }
472 }
473 }
474 }
475
476 /*
477 * add mov b,rn
478 * just after r
479 */
480 void
addmove(Reg * r,int bn,int rn,int f)481 addmove(Reg *r, int bn, int rn, int f)
482 {
483 Prog *p, *p1;
484 Adr *a;
485 Var *v;
486
487 p1 = alloc(sizeof(*p1));
488 *p1 = zprog;
489 p = r->prog;
490
491 p1->link = p->link;
492 p->link = p1;
493 p1->lineno = p->lineno;
494
495 v = var + bn;
496
497 a = &p1->to;
498 a->sym = v->sym;
499 a->name = v->name;
500 a->offset = v->offset;
501 a->etype = v->etype;
502 a->type = D_OREG;
503 if(a->etype == TARRAY || a->sym == S)
504 a->type = D_CONST;
505
506 p1->as = AMOVW;
507 if(v->etype == TCHAR || v->etype == TUCHAR)
508 p1->as = AMOVB;
509 if(v->etype == TSHORT || v->etype == TUSHORT)
510 p1->as = AMOVH;
511 if(v->etype == TVLONG || v->etype == TUVLONG)
512 p1->as = AMOVV;
513 if(v->etype == TFLOAT)
514 p1->as = AMOVF;
515 if(v->etype == TDOUBLE)
516 p1->as = AMOVD;
517
518 p1->from.type = D_REG;
519 p1->from.reg = rn;
520 if(rn >= NREG) {
521 p1->from.type = D_FREG;
522 p1->from.reg = rn-NREG;
523 }
524 if(!f) {
525 p1->from = *a;
526 *a = zprog.from;
527 a->type = D_REG;
528 a->reg = rn;
529 if(rn >= NREG) {
530 a->type = D_FREG;
531 a->reg = rn-NREG;
532 }
533 if(v->etype == TUCHAR)
534 p1->as = AMOVBU;
535 if(v->etype == TUSHORT)
536 p1->as = AMOVHU;
537 }
538 if(debug['R'])
539 print("%P\t.a%P\n", p, p1);
540 }
541
542 Bits
mkvar(Adr * a,int docon)543 mkvar(Adr *a, int docon)
544 {
545 Var *v;
546 int i, t, n, et, z;
547 long o;
548 Bits bit;
549 Sym *s;
550
551 t = a->type;
552 if(t == D_REG && a->reg != NREG)
553 regbits |= RtoB(a->reg);
554 if(t == D_FREG && a->reg != NREG)
555 regbits |= FtoB(a->reg);
556 s = a->sym;
557 o = a->offset;
558 et = a->etype;
559 if(s == S) {
560 if(t != D_CONST || !docon || a->reg != NREG)
561 goto none;
562 et = TLONG;
563 }
564 if(t == D_CONST) {
565 if(s == S && sval(o))
566 goto none;
567 }
568
569 n = a->name;
570 v = var;
571 for(i=0; i<nvar; i++) {
572 if(s == v->sym)
573 if(n == v->name)
574 if(o == v->offset)
575 goto out;
576 v++;
577 }
578 if(s)
579 if(s->name[0] == '.')
580 goto none;
581 if(nvar >= NVAR) {
582 if(debug['w'] > 1 && s)
583 warn(Z, "variable not optimized: %s", s->name);
584 goto none;
585 }
586 i = nvar;
587 nvar++;
588 v = &var[i];
589 v->sym = s;
590 v->offset = o;
591 v->etype = et;
592 v->name = n;
593 if(debug['R'])
594 print("bit=%2d et=%2d %D\n", i, et, a);
595 out:
596 bit = blsh(i);
597 if(n == D_EXTERN || n == D_STATIC)
598 for(z=0; z<BITS; z++)
599 externs.b[z] |= bit.b[z];
600 if(n == D_PARAM)
601 for(z=0; z<BITS; z++)
602 params.b[z] |= bit.b[z];
603 if(v->etype != et || !(typechlpfd[et]/* || typev[et]*/)) /* funny punning */
604 for(z=0; z<BITS; z++)
605 addrs.b[z] |= bit.b[z];
606 if(t == D_CONST) {
607 if(s == S) {
608 for(z=0; z<BITS; z++)
609 consts.b[z] |= bit.b[z];
610 return bit;
611 }
612 if(et != TARRAY)
613 for(z=0; z<BITS; z++)
614 addrs.b[z] |= bit.b[z];
615 for(z=0; z<BITS; z++)
616 params.b[z] |= bit.b[z];
617 return bit;
618 }
619 if(t == D_OREG)
620 return bit;
621
622 none:
623 return zbits;
624 }
625
626 void
prop(Reg * r,Bits ref,Bits cal)627 prop(Reg *r, Bits ref, Bits cal)
628 {
629 Reg *r1, *r2;
630 int z;
631
632 for(r1 = r; r1 != R; r1 = r1->p1) {
633 for(z=0; z<BITS; z++) {
634 ref.b[z] |= r1->refahead.b[z];
635 if(ref.b[z] != r1->refahead.b[z]) {
636 r1->refahead.b[z] = ref.b[z];
637 change++;
638 }
639 cal.b[z] |= r1->calahead.b[z];
640 if(cal.b[z] != r1->calahead.b[z]) {
641 r1->calahead.b[z] = cal.b[z];
642 change++;
643 }
644 }
645 switch(r1->prog->as) {
646 case AJAL:
647 for(z=0; z<BITS; z++) {
648 cal.b[z] |= ref.b[z] | externs.b[z];
649 ref.b[z] = 0;
650 }
651 break;
652
653 case ATEXT:
654 for(z=0; z<BITS; z++) {
655 cal.b[z] = 0;
656 ref.b[z] = 0;
657 }
658 break;
659
660 case ARET:
661 for(z=0; z<BITS; z++) {
662 cal.b[z] = externs.b[z];
663 ref.b[z] = 0;
664 }
665 }
666 for(z=0; z<BITS; z++) {
667 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
668 r1->use1.b[z] | r1->use2.b[z];
669 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
670 r1->refbehind.b[z] = ref.b[z];
671 r1->calbehind.b[z] = cal.b[z];
672 }
673 if(r1->active)
674 break;
675 r1->active = 1;
676 }
677 for(; r != r1; r = r->p1)
678 for(r2 = r->p2; r2 != R; r2 = r2->p2link)
679 prop(r2, r->refbehind, r->calbehind);
680 }
681
682 /*
683 * find looping structure
684 *
685 * 1) find reverse postordering
686 * 2) find approximate dominators,
687 * the actual dominators if the flow graph is reducible
688 * otherwise, dominators plus some other non-dominators.
689 * See Matthew S. Hecht and Jeffrey D. Ullman,
690 * "Analysis of a Simple Algorithm for Global Data Flow Problems",
691 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
692 * Oct. 1-3, 1973, pp. 207-217.
693 * 3) find all nodes with a predecessor dominated by the current node.
694 * such a node is a loop head.
695 * recursively, all preds with a greater rpo number are in the loop
696 */
697 long
postorder(Reg * r,Reg ** rpo2r,long n)698 postorder(Reg *r, Reg **rpo2r, long n)
699 {
700 Reg *r1;
701
702 r->rpo = 1;
703 r1 = r->s1;
704 if(r1 && !r1->rpo)
705 n = postorder(r1, rpo2r, n);
706 r1 = r->s2;
707 if(r1 && !r1->rpo)
708 n = postorder(r1, rpo2r, n);
709 rpo2r[n] = r;
710 n++;
711 return n;
712 }
713
714 long
rpolca(long * idom,long rpo1,long rpo2)715 rpolca(long *idom, long rpo1, long rpo2)
716 {
717 long t;
718
719 if(rpo1 == -1)
720 return rpo2;
721 while(rpo1 != rpo2){
722 if(rpo1 > rpo2){
723 t = rpo2;
724 rpo2 = rpo1;
725 rpo1 = t;
726 }
727 while(rpo1 < rpo2){
728 t = idom[rpo2];
729 if(t >= rpo2)
730 fatal(Z, "bad idom");
731 rpo2 = t;
732 }
733 }
734 return rpo1;
735 }
736
737 int
doms(long * idom,long r,long s)738 doms(long *idom, long r, long s)
739 {
740 while(s > r)
741 s = idom[s];
742 return s == r;
743 }
744
745 int
loophead(long * idom,Reg * r)746 loophead(long *idom, Reg *r)
747 {
748 long src;
749
750 src = r->rpo;
751 if(r->p1 != R && doms(idom, src, r->p1->rpo))
752 return 1;
753 for(r = r->p2; r != R; r = r->p2link)
754 if(doms(idom, src, r->rpo))
755 return 1;
756 return 0;
757 }
758
759 void
loopmark(Reg ** rpo2r,long head,Reg * r)760 loopmark(Reg **rpo2r, long head, Reg *r)
761 {
762 if(r->rpo < head || r->active == head)
763 return;
764 r->active = head;
765 r->loop += LOOP;
766 if(r->p1 != R)
767 loopmark(rpo2r, head, r->p1);
768 for(r = r->p2; r != R; r = r->p2link)
769 loopmark(rpo2r, head, r);
770 }
771
772 void
loopit(Reg * r,long nr)773 loopit(Reg *r, long nr)
774 {
775 Reg *r1;
776 long i, d, me;
777
778 if(nr > maxnr) {
779 rpo2r = alloc(nr * sizeof(Reg*));
780 idom = alloc(nr * sizeof(long));
781 maxnr = nr;
782 }
783
784 d = postorder(r, rpo2r, 0);
785 if(d > nr)
786 fatal(Z, "too many reg nodes");
787 nr = d;
788 for(i = 0; i < nr / 2; i++){
789 r1 = rpo2r[i];
790 rpo2r[i] = rpo2r[nr - 1 - i];
791 rpo2r[nr - 1 - i] = r1;
792 }
793 for(i = 0; i < nr; i++)
794 rpo2r[i]->rpo = i;
795
796 idom[0] = 0;
797 for(i = 0; i < nr; i++){
798 r1 = rpo2r[i];
799 me = r1->rpo;
800 d = -1;
801 if(r1->p1 != R && r1->p1->rpo < me)
802 d = r1->p1->rpo;
803 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
804 if(r1->rpo < me)
805 d = rpolca(idom, d, r1->rpo);
806 idom[i] = d;
807 }
808
809 for(i = 0; i < nr; i++){
810 r1 = rpo2r[i];
811 r1->loop++;
812 if(r1->p2 != R && loophead(idom, r1))
813 loopmark(rpo2r, i, r1);
814 }
815 }
816
817 void
synch(Reg * r,Bits dif)818 synch(Reg *r, Bits dif)
819 {
820 Reg *r1;
821 int z;
822
823 for(r1 = r; r1 != R; r1 = r1->s1) {
824 for(z=0; z<BITS; z++) {
825 dif.b[z] = (dif.b[z] &
826 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
827 r1->set.b[z] | r1->regdiff.b[z];
828 if(dif.b[z] != r1->regdiff.b[z]) {
829 r1->regdiff.b[z] = dif.b[z];
830 change++;
831 }
832 }
833 if(r1->active)
834 break;
835 r1->active = 1;
836 for(z=0; z<BITS; z++)
837 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
838 if(r1->s2 != R)
839 synch(r1->s2, dif);
840 }
841 }
842
843 ulong
allreg(ulong b,Rgn * r)844 allreg(ulong b, Rgn *r)
845 {
846 Var *v;
847 int i;
848
849 v = var + r->varno;
850 r->regno = 0;
851 switch(v->etype) {
852
853 default:
854 diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
855 break;
856
857 case TCHAR:
858 case TUCHAR:
859 case TSHORT:
860 case TUSHORT:
861 case TINT:
862 case TUINT:
863 case TLONG:
864 case TULONG:
865 case TVLONG:
866 case TUVLONG:
867 case TIND:
868 case TARRAY:
869 i = BtoR(~b);
870 if(i && r->cost >= 0) {
871 r->regno = i;
872 return RtoB(i);
873 }
874 break;
875
876 case TDOUBLE:
877 case TFLOAT:
878 i = BtoF(~b);
879 if(i && r->cost >= 0) {
880 r->regno = i+NREG;
881 return FtoB(i);
882 }
883 break;
884 }
885 return 0;
886 }
887
888 void
paint1(Reg * r,int bn)889 paint1(Reg *r, int bn)
890 {
891 Reg *r1;
892 Prog *p;
893 int z;
894 ulong bb;
895
896 z = bn/32;
897 bb = 1L<<(bn%32);
898 if(r->act.b[z] & bb)
899 return;
900 for(;;) {
901 if(!(r->refbehind.b[z] & bb))
902 break;
903 r1 = r->p1;
904 if(r1 == R)
905 break;
906 if(!(r1->refahead.b[z] & bb))
907 break;
908 if(r1->act.b[z] & bb)
909 break;
910 r = r1;
911 }
912
913 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
914 change -= CLOAD * r->loop;
915 if(debug['R'] && debug['v'])
916 print("%ld%P\tld %B $%d\n", r->loop,
917 r->prog, blsh(bn), change);
918 }
919 for(;;) {
920 r->act.b[z] |= bb;
921 p = r->prog;
922
923 if(r->use1.b[z] & bb) {
924 change += CREF * r->loop;
925 if(debug['R'] && debug['v'])
926 print("%ld%P\tu1 %B $%d\n", r->loop,
927 p, blsh(bn), change);
928 }
929
930 if((r->use2.b[z]|r->set.b[z]) & bb) {
931 change += CREF * r->loop;
932 if(debug['R'] && debug['v'])
933 print("%ld%P\tu2 %B $%d\n", r->loop,
934 p, blsh(bn), change);
935 }
936
937 if(STORE(r) & r->regdiff.b[z] & bb) {
938 change -= CLOAD * r->loop;
939 if(debug['R'] && debug['v'])
940 print("%ld%P\tst %B $%d\n", r->loop,
941 p, blsh(bn), change);
942 }
943
944 if(r->refbehind.b[z] & bb)
945 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
946 if(r1->refahead.b[z] & bb)
947 paint1(r1, bn);
948
949 if(!(r->refahead.b[z] & bb))
950 break;
951 r1 = r->s2;
952 if(r1 != R)
953 if(r1->refbehind.b[z] & bb)
954 paint1(r1, bn);
955 r = r->s1;
956 if(r == R)
957 break;
958 if(r->act.b[z] & bb)
959 break;
960 if(!(r->refbehind.b[z] & bb))
961 break;
962 }
963 }
964
965 ulong
paint2(Reg * r,int bn)966 paint2(Reg *r, int bn)
967 {
968 Reg *r1;
969 int z;
970 ulong bb, vreg;
971
972 z = bn/32;
973 bb = 1L << (bn%32);
974 vreg = regbits;
975 if(!(r->act.b[z] & bb))
976 return vreg;
977 for(;;) {
978 if(!(r->refbehind.b[z] & bb))
979 break;
980 r1 = r->p1;
981 if(r1 == R)
982 break;
983 if(!(r1->refahead.b[z] & bb))
984 break;
985 if(!(r1->act.b[z] & bb))
986 break;
987 r = r1;
988 }
989 for(;;) {
990 r->act.b[z] &= ~bb;
991
992 vreg |= r->regu;
993
994 if(r->refbehind.b[z] & bb)
995 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
996 if(r1->refahead.b[z] & bb)
997 vreg |= paint2(r1, bn);
998
999 if(!(r->refahead.b[z] & bb))
1000 break;
1001 r1 = r->s2;
1002 if(r1 != R)
1003 if(r1->refbehind.b[z] & bb)
1004 vreg |= paint2(r1, bn);
1005 r = r->s1;
1006 if(r == R)
1007 break;
1008 if(!(r->act.b[z] & bb))
1009 break;
1010 if(!(r->refbehind.b[z] & bb))
1011 break;
1012 }
1013 return vreg;
1014 }
1015
1016 void
paint3(Reg * r,int bn,long rb,int rn)1017 paint3(Reg *r, int bn, long rb, int rn)
1018 {
1019 Reg *r1;
1020 Prog *p;
1021 int z;
1022 ulong bb;
1023
1024 z = bn/32;
1025 bb = 1L << (bn%32);
1026 if(r->act.b[z] & bb)
1027 return;
1028 for(;;) {
1029 if(!(r->refbehind.b[z] & bb))
1030 break;
1031 r1 = r->p1;
1032 if(r1 == R)
1033 break;
1034 if(!(r1->refahead.b[z] & bb))
1035 break;
1036 if(r1->act.b[z] & bb)
1037 break;
1038 r = r1;
1039 }
1040
1041 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1042 addmove(r, bn, rn, 0);
1043 for(;;) {
1044 r->act.b[z] |= bb;
1045 p = r->prog;
1046
1047 if(r->use1.b[z] & bb) {
1048 if(debug['R'])
1049 print("%P", p);
1050 addreg(&p->from, rn);
1051 if(debug['R'])
1052 print("\t.c%P\n", p);
1053 }
1054 if((r->use2.b[z]|r->set.b[z]) & bb) {
1055 if(debug['R'])
1056 print("%P", p);
1057 addreg(&p->to, rn);
1058 if(debug['R'])
1059 print("\t.c%P\n", p);
1060 }
1061
1062 if(STORE(r) & r->regdiff.b[z] & bb)
1063 addmove(r, bn, rn, 1);
1064 r->regu |= rb;
1065
1066 if(r->refbehind.b[z] & bb)
1067 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1068 if(r1->refahead.b[z] & bb)
1069 paint3(r1, bn, rb, rn);
1070
1071 if(!(r->refahead.b[z] & bb))
1072 break;
1073 r1 = r->s2;
1074 if(r1 != R)
1075 if(r1->refbehind.b[z] & bb)
1076 paint3(r1, bn, rb, rn);
1077 r = r->s1;
1078 if(r == R)
1079 break;
1080 if(r->act.b[z] & bb)
1081 break;
1082 if(!(r->refbehind.b[z] & bb))
1083 break;
1084 }
1085 }
1086
1087 void
addreg(Adr * a,int rn)1088 addreg(Adr *a, int rn)
1089 {
1090
1091 a->sym = 0;
1092 a->name = D_NONE;
1093 a->type = D_REG;
1094 a->reg = rn;
1095 if(rn >= NREG) {
1096 a->type = D_FREG;
1097 a->reg = rn-NREG;
1098 }
1099 }
1100
1101 /*
1102 * bit reg
1103 * 0 R3
1104 * 1 R4
1105 * ... ...
1106 * 19 R22
1107 * 20 R23
1108 */
1109 long
RtoB(int r)1110 RtoB(int r)
1111 {
1112
1113 if(r < 3 || r > 23)
1114 return 0;
1115 return 1L << (r-3);
1116 }
1117
1118 int
BtoR(long b)1119 BtoR(long b)
1120 {
1121
1122 b &= 0x001fffffL;
1123 if(b == 0)
1124 return 0;
1125 return bitno(b) + 3;
1126 }
1127
1128 /*
1129 * bit reg
1130 * 22 F4
1131 * 23 F6
1132 * ... ...
1133 * 31 F22
1134 */
1135 long
FtoB(int f)1136 FtoB(int f)
1137 {
1138
1139 if(f < 4 || f > 22 || (f&1))
1140 return 0;
1141 return 1L << (f/2 + 20);
1142 }
1143
1144 int
BtoF(long b)1145 BtoF(long b)
1146 {
1147
1148 b &= 0xffc00000L;
1149 if(b == 0)
1150 return 0;
1151 return bitno(b)*2 - 40;
1152 }
1153