1 #include "gc.h"
2
3 void addsplits(void);
4
5 Reg*
rega(void)6 rega(void)
7 {
8 Reg *r;
9
10 r = freer;
11 if(r == R) {
12 r = alloc(sizeof(*r));
13 } else
14 freer = r->link;
15
16 *r = zreg;
17 return r;
18 }
19
20 int
rcmp(const void * a1,const void * a2)21 rcmp(const void *a1, const void *a2)
22 {
23 Rgn *p1, *p2;
24 int c1, c2;
25
26 p1 = (Rgn*)a1;
27 p2 = (Rgn*)a2;
28 c1 = p2->cost;
29 c2 = p1->cost;
30 if(c1 -= c2)
31 return c1;
32 return p2->varno - p1->varno;
33 }
34
35 void
regopt(Prog * p)36 regopt(Prog *p)
37 {
38 Reg *r, *r1, *r2;
39 Prog *p1;
40 int i, z;
41 long initpc, val, npc;
42 ulong vreg;
43 Bits bit;
44 struct
45 {
46 long m;
47 long c;
48 Reg* p;
49 } log5[6], *lp;
50
51 firstr = R;
52 lastr = R;
53 nvar = 0;
54 regbits = 0;
55 for(z=0; z<BITS; z++) {
56 externs.b[z] = 0;
57 params.b[z] = 0;
58 consts.b[z] = 0;
59 addrs.b[z] = 0;
60 }
61
62 /*
63 * pass 1
64 * build aux data structure
65 * allocate pcs
66 * find use and set of variables
67 */
68 val = 5L * 5L * 5L * 5L * 5L;
69 lp = log5;
70 for(i=0; i<5; i++) {
71 lp->m = val;
72 lp->c = 0;
73 lp->p = R;
74 val /= 5L;
75 lp++;
76 }
77 val = 0;
78 for(; p != P; p = p->link) {
79 switch(p->as) {
80 case ADATA:
81 case AGLOBL:
82 case ANAME:
83 case ASIGNAME:
84 continue;
85 }
86 r = rega();
87 if(firstr == R) {
88 firstr = r;
89 lastr = r;
90 } else {
91 lastr->link = r;
92 r->p1 = lastr;
93 lastr->s1 = r;
94 lastr = r;
95 }
96 r->prog = p;
97 r->pc = val;
98 val++;
99
100 lp = log5;
101 for(i=0; i<5; i++) {
102 lp->c--;
103 if(lp->c <= 0) {
104 lp->c = lp->m;
105 if(lp->p != R)
106 lp->p->log5 = r;
107 lp->p = r;
108 (lp+1)->c = 0;
109 break;
110 }
111 lp++;
112 }
113
114 r1 = r->p1;
115 if(r1 != R)
116 switch(r1->prog->as) {
117 case ARET:
118 case AJMP:
119 case ARFE:
120 r->p1 = R;
121 r1->s1 = R;
122 }
123
124 /*
125 * left side always read
126 */
127 bit = mkvar(&p->from, p->as==AMOVW);
128 for(z=0; z<BITS; z++)
129 r->use1.b[z] |= bit.b[z];
130
131 /*
132 * right side depends on opcode
133 */
134 bit = mkvar(&p->to, 0);
135 if(bany(&bit))
136 switch(p->as) {
137 default:
138 diag(Z, "reg: unknown asop: %A", p->as);
139 break;
140
141 /*
142 * right side write
143 */
144 case ANOP:
145 case AMOVB:
146 case AMOVBU:
147 case AMOVH:
148 case AMOVHU:
149 case AMOVW:
150 case AMOVV:
151 case AMOVF:
152 case AMOVD:
153 for(z=0; z<BITS; z++)
154 r->set.b[z] |= bit.b[z];
155 break;
156
157 /*
158 * funny
159 */
160 case AJAL:
161 for(z=0; z<BITS; z++)
162 addrs.b[z] |= bit.b[z];
163 break;
164 }
165 }
166 if(firstr == R)
167 return;
168 initpc = pc - val;
169 npc = val;
170
171 /*
172 * pass 2
173 * turn branch references to pointers
174 * build back pointers
175 */
176 for(r = firstr; r != R; r = r->link) {
177 p = r->prog;
178 if(p->to.type == D_BRANCH) {
179 val = p->to.offset - initpc;
180 r1 = firstr;
181 while(r1 != R) {
182 r2 = r1->log5;
183 if(r2 != R && val >= r2->pc) {
184 r1 = r2;
185 continue;
186 }
187 if(r1->pc == val)
188 break;
189 r1 = r1->link;
190 }
191 if(r1 == R) {
192 nearln = p->lineno;
193 diag(Z, "ref not found\n%P", p);
194 continue;
195 }
196 if(r1 == r) {
197 nearln = p->lineno;
198 diag(Z, "ref to self\n%P", p);
199 continue;
200 }
201 r->s2 = r1;
202 r->p2link = r1->p2;
203 r1->p2 = r;
204 }
205 }
206 if(debug['R']) {
207 p = firstr->prog;
208 print("\n%L %D\n", p->lineno, &p->from);
209 }
210
211 /*
212 * pass 2.5
213 * find looping structure
214 */
215 for(r = firstr; r != R; r = r->link)
216 r->active = 0;
217 change = 0;
218 loopit(firstr, npc);
219
220 /*
221 * pass 3
222 * iterate propagating usage
223 * back until flow graph is complete
224 */
225 loop1:
226 change = 0;
227 for(r = firstr; r != R; r = r->link)
228 r->active = 0;
229 for(r = firstr; r != R; r = r->link)
230 if(r->prog->as == ARET)
231 prop(r, zbits, zbits);
232 loop11:
233 /* pick up unreachable code */
234 i = 0;
235 for(r = firstr; r != R; r = r1) {
236 r1 = r->link;
237 if(r1 && r1->active && !r->active) {
238 prop(r, zbits, zbits);
239 i = 1;
240 }
241 }
242 if(i)
243 goto loop11;
244 if(change)
245 goto loop1;
246
247
248 /*
249 * pass 4
250 * iterate propagating register/variable synchrony
251 * forward until graph is complete
252 */
253 loop2:
254 change = 0;
255 for(r = firstr; r != R; r = r->link)
256 r->active = 0;
257 synch(firstr, zbits);
258 if(change)
259 goto loop2;
260
261 addsplits();
262
263 if(debug['R'] && debug['v']) {
264 print("\nprop structure:\n");
265 for(r = firstr; r != R; r = r->link) {
266 print("%ld:%P", r->loop, r->prog);
267 for(z=0; z<BITS; z++)
268 bit.b[z] = r->set.b[z] |
269 r->refahead.b[z] | r->calahead.b[z] |
270 r->refbehind.b[z] | r->calbehind.b[z] |
271 r->use1.b[z] | r->use2.b[z];
272 if(bany(&bit)) {
273 print("\t");
274 if(bany(&r->use1))
275 print(" u1=%B", r->use1);
276 if(bany(&r->use2))
277 print(" u2=%B", r->use2);
278 if(bany(&r->set))
279 print(" st=%B", r->set);
280 if(bany(&r->refahead))
281 print(" ra=%B", r->refahead);
282 if(bany(&r->calahead))
283 print(" ca=%B", r->calahead);
284 if(bany(&r->refbehind))
285 print(" rb=%B", r->refbehind);
286 if(bany(&r->calbehind))
287 print(" cb=%B", r->calbehind);
288 if(bany(&r->regdiff))
289 print(" rd=%B", r->regdiff);
290 }
291 print("\n");
292 }
293 }
294
295 /*
296 * pass 5
297 * isolate regions
298 * calculate costs (paint1)
299 */
300 r = firstr;
301 if(r) {
302 for(z=0; z<BITS; z++)
303 bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
304 ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
305 if(bany(&bit)) {
306 nearln = r->prog->lineno;
307 warn(Z, "used and not set: %B", bit);
308 if(debug['R'] && !debug['w'])
309 print("used and not set: %B\n", bit);
310 }
311 }
312
313 for(r = firstr; r != R; r = r->link)
314 r->act = zbits;
315 rgp = region;
316 nregion = 0;
317 for(r = firstr; r != R; r = r->link) {
318 for(z=0; z<BITS; z++)
319 bit.b[z] = r->set.b[z] &
320 ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
321 if(bany(&bit)) {
322 nearln = r->prog->lineno;
323 warn(Z, "set and not used: %B", bit);
324 if(debug['R'])
325 print("set and not used: %B\n", bit);
326 excise(r);
327 }
328 for(z=0; z<BITS; z++)
329 bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
330 while(bany(&bit)) {
331 i = bnum(bit);
332 rgp->enter = r;
333 rgp->varno = i;
334 change = 0;
335 if(debug['R'] && debug['v'])
336 print("\n");
337 paint1(r, i);
338 bit.b[i/32] &= ~(1L<<(i%32));
339 if(change <= 0) {
340 if(debug['R'])
341 print("%L $%d: %B\n",
342 r->prog->lineno, change, blsh(i));
343 continue;
344 }
345 rgp->cost = change;
346 nregion++;
347 if(nregion >= NRGN) {
348 warn(Z, "too many regions");
349 goto brk;
350 }
351 rgp++;
352 }
353 }
354 brk:
355 qsort(region, nregion, sizeof(region[0]), rcmp);
356
357 /*
358 * pass 6
359 * determine used registers (paint2)
360 * replace code (paint3)
361 */
362 rgp = region;
363 for(i=0; i<nregion; i++) {
364 bit = blsh(rgp->varno);
365 vreg = paint2(rgp->enter, rgp->varno);
366 vreg = allreg(vreg, rgp);
367 if(debug['R']) {
368 if(rgp->regno >= NREG)
369 print("%L $%d F%d: %B\n",
370 rgp->enter->prog->lineno,
371 rgp->cost,
372 rgp->regno-NREG,
373 bit);
374 else
375 print("%L $%d R%d: %B\n",
376 rgp->enter->prog->lineno,
377 rgp->cost,
378 rgp->regno,
379 bit);
380 }
381 if(rgp->regno != 0)
382 paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
383 rgp++;
384 }
385 /*
386 * pass 7
387 * peep-hole on basic block
388 */
389 if(!debug['R'] || debug['P'])
390 peep();
391
392 /*
393 * pass 8
394 * recalculate pc
395 */
396 val = initpc;
397 for(r = firstr; r != R; r = r1) {
398 r->pc = val;
399 p = r->prog;
400 p1 = P;
401 r1 = r->link;
402 if(r1 != R)
403 p1 = r1->prog;
404 for(; p != p1; p = p->link) {
405 switch(p->as) {
406 default:
407 val++;
408 break;
409
410 case ANOP:
411 case ADATA:
412 case AGLOBL:
413 case ANAME:
414 case ASIGNAME:
415 break;
416 }
417 }
418 }
419 pc = val;
420
421 /*
422 * fix up branches
423 */
424 if(debug['R'])
425 if(bany(&addrs))
426 print("addrs: %B\n", addrs);
427
428 r1 = 0; /* set */
429 for(r = firstr; r != R; r = r->link) {
430 p = r->prog;
431 if(p->to.type == D_BRANCH)
432 p->to.offset = r->s2->pc;
433 r1 = r;
434 }
435
436 /*
437 * last pass
438 * eliminate nops
439 * free aux structures
440 */
441 for(p = firstr->prog; p != P; p = p->link){
442 while(p->link && p->link->as == ANOP)
443 p->link = p->link->link;
444 }
445 if(r1 != R) {
446 r1->link = freer;
447 freer = firstr;
448 }
449 }
450
451 void
addsplits(void)452 addsplits(void)
453 {
454 Reg *r, *r1;
455 int z, i;
456 Bits bit;
457
458 for(r = firstr; r != R; r = r->link) {
459 if(r->loop > 1)
460 continue;
461 if(r->prog->as == AJAL)
462 continue;
463 for(r1 = r->p2; r1 != R; r1 = r1->p2link) {
464 if(r1->loop <= 1)
465 continue;
466 for(z=0; z<BITS; z++)
467 bit.b[z] = r1->calbehind.b[z] &
468 (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
469 ~(r->calahead.b[z] & addrs.b[z]);
470 while(bany(&bit)) {
471 i = bnum(bit);
472 bit.b[i/32] &= ~(1L << (i%32));
473 }
474 }
475 }
476 }
477
478 /*
479 * add mov b,rn
480 * just after r
481 */
482 void
addmove(Reg * r,int bn,int rn,int f)483 addmove(Reg *r, int bn, int rn, int f)
484 {
485 Prog *p, *p1;
486 Adr *a;
487 Var *v;
488
489 p1 = alloc(sizeof(*p1));
490 *p1 = zprog;
491 p = r->prog;
492
493 p1->link = p->link;
494 p->link = p1;
495 p1->lineno = p->lineno;
496
497 v = var + bn;
498
499 a = &p1->to;
500 a->sym = v->sym;
501 a->name = v->name;
502 a->offset = v->offset;
503 a->etype = v->etype;
504 a->type = D_OREG;
505 if(a->etype == TARRAY || a->sym == S)
506 a->type = D_CONST;
507
508 p1->as = AMOVW;
509 if(v->etype == TCHAR || v->etype == TUCHAR)
510 p1->as = AMOVB;
511 if(v->etype == TSHORT || v->etype == TUSHORT)
512 p1->as = AMOVH;
513 if(v->etype == TVLONG || v->etype == TUVLONG || v->etype == TIND)
514 p1->as = AMOVV;
515 if(v->etype == TFLOAT)
516 p1->as = AMOVF;
517 if(v->etype == TDOUBLE)
518 p1->as = AMOVD;
519
520 p1->from.type = D_REG;
521 p1->from.reg = rn;
522 if(rn >= NREG) {
523 p1->from.type = D_FREG;
524 p1->from.reg = rn-NREG;
525 }
526 if(!f) {
527 p1->from = *a;
528 *a = zprog.from;
529 a->type = D_REG;
530 a->reg = rn;
531 if(rn >= NREG) {
532 a->type = D_FREG;
533 a->reg = rn-NREG;
534 }
535 if(v->etype == TUCHAR)
536 p1->as = AMOVBU;
537 if(v->etype == TUSHORT)
538 p1->as = AMOVHU;
539 }
540 if(debug['R'])
541 print("%P\t.a%P\n", p, p1);
542 }
543
544 Bits
mkvar(Adr * a,int docon)545 mkvar(Adr *a, int docon)
546 {
547 Var *v;
548 int i, t, n, et, z;
549 long o;
550 Bits bit;
551 Sym *s;
552
553 t = a->type;
554 if(t == D_REG && a->reg != NREG)
555 regbits |= RtoB(a->reg);
556 if(t == D_FREG && a->reg != NREG)
557 regbits |= FtoB(a->reg);
558 s = a->sym;
559 o = a->offset;
560 et = a->etype;
561 if(s == S) {
562 if(t != D_CONST || !docon || a->reg != NREG)
563 goto none;
564 et = TLONG;
565 }
566 if(t == D_CONST) {
567 if(s == S && sval(o))
568 goto none;
569 }
570
571 n = a->name;
572 v = var;
573 for(i=0; i<nvar; i++) {
574 if(s == v->sym)
575 if(n == v->name)
576 if(o == v->offset)
577 goto out;
578 v++;
579 }
580 if(s)
581 if(s->name[0] == '.')
582 goto none;
583 if(nvar >= NVAR) {
584 if(debug['w'] > 1 && s)
585 warn(Z, "variable not optimized: %s", s->name);
586 goto none;
587 }
588 i = nvar;
589 nvar++;
590 v = &var[i];
591 v->sym = s;
592 v->offset = o;
593 v->etype = et;
594 v->name = n;
595 if(debug['R'])
596 print("bit=%2d et=%2d %D\n", i, et, a);
597 out:
598 bit = blsh(i);
599 if(n == D_EXTERN || n == D_STATIC)
600 for(z=0; z<BITS; z++)
601 externs.b[z] |= bit.b[z];
602 if(n == D_PARAM)
603 for(z=0; z<BITS; z++)
604 params.b[z] |= bit.b[z];
605 if(v->etype != et || (!typechlpfd[et] && !typev[et])) /* funny punning */
606 for(z=0; z<BITS; z++)
607 addrs.b[z] |= bit.b[z];
608 if(t == D_CONST) {
609 if(s == S) {
610 for(z=0; z<BITS; z++)
611 consts.b[z] |= bit.b[z];
612 return bit;
613 }
614 if(et != TARRAY)
615 for(z=0; z<BITS; z++)
616 addrs.b[z] |= bit.b[z];
617 for(z=0; z<BITS; z++)
618 params.b[z] |= bit.b[z];
619 return bit;
620 }
621 if(t == D_OREG)
622 return bit;
623
624 none:
625 return zbits;
626 }
627
628 void
prop(Reg * r,Bits ref,Bits cal)629 prop(Reg *r, Bits ref, Bits cal)
630 {
631 Reg *r1, *r2;
632 int z;
633
634 for(r1 = r; r1 != R; r1 = r1->p1) {
635 for(z=0; z<BITS; z++) {
636 ref.b[z] |= r1->refahead.b[z];
637 if(ref.b[z] != r1->refahead.b[z]) {
638 r1->refahead.b[z] = ref.b[z];
639 change++;
640 }
641 cal.b[z] |= r1->calahead.b[z];
642 if(cal.b[z] != r1->calahead.b[z]) {
643 r1->calahead.b[z] = cal.b[z];
644 change++;
645 }
646 }
647 switch(r1->prog->as) {
648 case AJAL:
649 for(z=0; z<BITS; z++) {
650 cal.b[z] |= ref.b[z] | externs.b[z];
651 ref.b[z] = 0;
652 }
653 break;
654
655 case ATEXT:
656 for(z=0; z<BITS; z++) {
657 cal.b[z] = 0;
658 ref.b[z] = 0;
659 }
660 break;
661
662 case ARET:
663 for(z=0; z<BITS; z++) {
664 cal.b[z] = externs.b[z];
665 ref.b[z] = 0;
666 }
667 }
668 for(z=0; z<BITS; z++) {
669 ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
670 r1->use1.b[z] | r1->use2.b[z];
671 cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
672 r1->refbehind.b[z] = ref.b[z];
673 r1->calbehind.b[z] = cal.b[z];
674 }
675 if(r1->active)
676 break;
677 r1->active = 1;
678 }
679 for(; r != r1; r = r->p1)
680 for(r2 = r->p2; r2 != R; r2 = r2->p2link)
681 prop(r2, r->refbehind, r->calbehind);
682 }
683
684 /*
685 * find looping structure
686 *
687 * 1) find reverse postordering
688 * 2) find approximate dominators,
689 * the actual dominators if the flow graph is reducible
690 * otherwise, dominators plus some other non-dominators.
691 * See Matthew S. Hecht and Jeffrey D. Ullman,
692 * "Analysis of a Simple Algorithm for Global Data Flow Problems",
693 * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
694 * Oct. 1-3, 1973, pp. 207-217.
695 * 3) find all nodes with a predecessor dominated by the current node.
696 * such a node is a loop head.
697 * recursively, all preds with a greater rpo number are in the loop
698 */
699 long
postorder(Reg * r,Reg ** rpo2r,long n)700 postorder(Reg *r, Reg **rpo2r, long n)
701 {
702 Reg *r1;
703
704 r->rpo = 1;
705 r1 = r->s1;
706 if(r1 && !r1->rpo)
707 n = postorder(r1, rpo2r, n);
708 r1 = r->s2;
709 if(r1 && !r1->rpo)
710 n = postorder(r1, rpo2r, n);
711 rpo2r[n] = r;
712 n++;
713 return n;
714 }
715
716 long
rpolca(long * idom,long rpo1,long rpo2)717 rpolca(long *idom, long rpo1, long rpo2)
718 {
719 long t;
720
721 if(rpo1 == -1)
722 return rpo2;
723 while(rpo1 != rpo2){
724 if(rpo1 > rpo2){
725 t = rpo2;
726 rpo2 = rpo1;
727 rpo1 = t;
728 }
729 while(rpo1 < rpo2){
730 t = idom[rpo2];
731 if(t >= rpo2)
732 fatal(Z, "bad idom");
733 rpo2 = t;
734 }
735 }
736 return rpo1;
737 }
738
739 int
doms(long * idom,long r,long s)740 doms(long *idom, long r, long s)
741 {
742 while(s > r)
743 s = idom[s];
744 return s == r;
745 }
746
747 int
loophead(long * idom,Reg * r)748 loophead(long *idom, Reg *r)
749 {
750 long src;
751
752 src = r->rpo;
753 if(r->p1 != R && doms(idom, src, r->p1->rpo))
754 return 1;
755 for(r = r->p2; r != R; r = r->p2link)
756 if(doms(idom, src, r->rpo))
757 return 1;
758 return 0;
759 }
760
761 void
loopmark(Reg ** rpo2r,long head,Reg * r)762 loopmark(Reg **rpo2r, long head, Reg *r)
763 {
764 if(r->rpo < head || r->active == head)
765 return;
766 r->active = head;
767 r->loop += LOOP;
768 if(r->p1 != R)
769 loopmark(rpo2r, head, r->p1);
770 for(r = r->p2; r != R; r = r->p2link)
771 loopmark(rpo2r, head, r);
772 }
773
774 void
loopit(Reg * r,long nr)775 loopit(Reg *r, long nr)
776 {
777 Reg *r1;
778 long i, d, me;
779
780 if(nr > maxnr) {
781 rpo2r = alloc(nr * sizeof(Reg*));
782 idom = alloc(nr * sizeof(long));
783 maxnr = nr;
784 }
785
786 d = postorder(r, rpo2r, 0);
787 if(d > nr)
788 fatal(Z, "too many reg nodes");
789 nr = d;
790 for(i = 0; i < nr / 2; i++){
791 r1 = rpo2r[i];
792 rpo2r[i] = rpo2r[nr - 1 - i];
793 rpo2r[nr - 1 - i] = r1;
794 }
795 for(i = 0; i < nr; i++)
796 rpo2r[i]->rpo = i;
797
798 idom[0] = 0;
799 for(i = 0; i < nr; i++){
800 r1 = rpo2r[i];
801 me = r1->rpo;
802 d = -1;
803 if(r1->p1 != R && r1->p1->rpo < me)
804 d = r1->p1->rpo;
805 for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
806 if(r1->rpo < me)
807 d = rpolca(idom, d, r1->rpo);
808 idom[i] = d;
809 }
810
811 for(i = 0; i < nr; i++){
812 r1 = rpo2r[i];
813 r1->loop++;
814 if(r1->p2 != R && loophead(idom, r1))
815 loopmark(rpo2r, i, r1);
816 }
817 }
818
819 void
synch(Reg * r,Bits dif)820 synch(Reg *r, Bits dif)
821 {
822 Reg *r1;
823 int z;
824
825 for(r1 = r; r1 != R; r1 = r1->s1) {
826 for(z=0; z<BITS; z++) {
827 dif.b[z] = (dif.b[z] &
828 ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
829 r1->set.b[z] | r1->regdiff.b[z];
830 if(dif.b[z] != r1->regdiff.b[z]) {
831 r1->regdiff.b[z] = dif.b[z];
832 change++;
833 }
834 }
835 if(r1->active)
836 break;
837 r1->active = 1;
838 for(z=0; z<BITS; z++)
839 dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
840 if(r1->s2 != R)
841 synch(r1->s2, dif);
842 }
843 }
844
845 ulong
allreg(ulong b,Rgn * r)846 allreg(ulong b, Rgn *r)
847 {
848 Var *v;
849 int i;
850
851 v = var + r->varno;
852 r->regno = 0;
853 switch(v->etype) {
854
855 default:
856 diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
857 break;
858
859 case TCHAR:
860 case TUCHAR:
861 case TSHORT:
862 case TUSHORT:
863 case TINT:
864 case TUINT:
865 case TLONG:
866 case TULONG:
867 case TVLONG:
868 case TUVLONG:
869 case TIND:
870 case TARRAY:
871 i = BtoR(~b);
872 if(i && r->cost >= 0) {
873 r->regno = i;
874 return RtoB(i);
875 }
876 break;
877
878 case TDOUBLE:
879 case TFLOAT:
880 i = BtoF(~b);
881 if(i && r->cost >= 0) {
882 r->regno = i+NREG;
883 return FtoB(i);
884 }
885 break;
886 }
887 return 0;
888 }
889
890 void
paint1(Reg * r,int bn)891 paint1(Reg *r, int bn)
892 {
893 Reg *r1;
894 Prog *p;
895 int z;
896 ulong bb;
897
898 z = bn/32;
899 bb = 1L<<(bn%32);
900 if(r->act.b[z] & bb)
901 return;
902 for(;;) {
903 if(!(r->refbehind.b[z] & bb))
904 break;
905 r1 = r->p1;
906 if(r1 == R)
907 break;
908 if(!(r1->refahead.b[z] & bb))
909 break;
910 if(r1->act.b[z] & bb)
911 break;
912 r = r1;
913 }
914
915 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
916 change -= CLOAD * r->loop;
917 if(debug['R'] && debug['v'])
918 print("%ld%P\tld %B $%d\n", r->loop,
919 r->prog, blsh(bn), change);
920 }
921 for(;;) {
922 r->act.b[z] |= bb;
923 p = r->prog;
924
925 if(r->use1.b[z] & bb) {
926 change += CREF * r->loop;
927 if(debug['R'] && debug['v'])
928 print("%ld%P\tu1 %B $%d\n", r->loop,
929 p, blsh(bn), change);
930 }
931
932 if((r->use2.b[z]|r->set.b[z]) & bb) {
933 change += CREF * r->loop;
934 if(debug['R'] && debug['v'])
935 print("%ld%P\tu2 %B $%d\n", r->loop,
936 p, blsh(bn), change);
937 }
938
939 if(STORE(r) & r->regdiff.b[z] & bb) {
940 change -= CLOAD * r->loop;
941 if(debug['R'] && debug['v'])
942 print("%ld%P\tst %B $%d\n", r->loop,
943 p, blsh(bn), change);
944 }
945
946 if(r->refbehind.b[z] & bb)
947 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
948 if(r1->refahead.b[z] & bb)
949 paint1(r1, bn);
950
951 if(!(r->refahead.b[z] & bb))
952 break;
953 r1 = r->s2;
954 if(r1 != R)
955 if(r1->refbehind.b[z] & bb)
956 paint1(r1, bn);
957 r = r->s1;
958 if(r == R)
959 break;
960 if(r->act.b[z] & bb)
961 break;
962 if(!(r->refbehind.b[z] & bb))
963 break;
964 }
965 }
966
967 ulong
paint2(Reg * r,int bn)968 paint2(Reg *r, int bn)
969 {
970 Reg *r1;
971 int z;
972 ulong bb, vreg;
973
974 z = bn/32;
975 bb = 1L << (bn%32);
976 vreg = regbits;
977 if(!(r->act.b[z] & bb))
978 return vreg;
979 for(;;) {
980 if(!(r->refbehind.b[z] & bb))
981 break;
982 r1 = r->p1;
983 if(r1 == R)
984 break;
985 if(!(r1->refahead.b[z] & bb))
986 break;
987 if(!(r1->act.b[z] & bb))
988 break;
989 r = r1;
990 }
991 for(;;) {
992 r->act.b[z] &= ~bb;
993
994 vreg |= r->regu;
995
996 if(r->refbehind.b[z] & bb)
997 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
998 if(r1->refahead.b[z] & bb)
999 vreg |= paint2(r1, bn);
1000
1001 if(!(r->refahead.b[z] & bb))
1002 break;
1003 r1 = r->s2;
1004 if(r1 != R)
1005 if(r1->refbehind.b[z] & bb)
1006 vreg |= paint2(r1, bn);
1007 r = r->s1;
1008 if(r == R)
1009 break;
1010 if(!(r->act.b[z] & bb))
1011 break;
1012 if(!(r->refbehind.b[z] & bb))
1013 break;
1014 }
1015 return vreg;
1016 }
1017
1018 void
paint3(Reg * r,int bn,long rb,int rn)1019 paint3(Reg *r, int bn, long rb, int rn)
1020 {
1021 Reg *r1;
1022 Prog *p;
1023 int z;
1024 ulong bb;
1025
1026 z = bn/32;
1027 bb = 1L << (bn%32);
1028 if(r->act.b[z] & bb)
1029 return;
1030 for(;;) {
1031 if(!(r->refbehind.b[z] & bb))
1032 break;
1033 r1 = r->p1;
1034 if(r1 == R)
1035 break;
1036 if(!(r1->refahead.b[z] & bb))
1037 break;
1038 if(r1->act.b[z] & bb)
1039 break;
1040 r = r1;
1041 }
1042
1043 if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1044 addmove(r, bn, rn, 0);
1045 for(;;) {
1046 r->act.b[z] |= bb;
1047 p = r->prog;
1048
1049 if(r->use1.b[z] & bb) {
1050 if(debug['R'])
1051 print("%P", p);
1052 addreg(&p->from, rn);
1053 if(debug['R'])
1054 print("\t.c%P\n", p);
1055 }
1056 if((r->use2.b[z]|r->set.b[z]) & bb) {
1057 if(debug['R'])
1058 print("%P", p);
1059 addreg(&p->to, rn);
1060 if(debug['R'])
1061 print("\t.c%P\n", p);
1062 }
1063
1064 if(STORE(r) & r->regdiff.b[z] & bb)
1065 addmove(r, bn, rn, 1);
1066 r->regu |= rb;
1067
1068 if(r->refbehind.b[z] & bb)
1069 for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1070 if(r1->refahead.b[z] & bb)
1071 paint3(r1, bn, rb, rn);
1072
1073 if(!(r->refahead.b[z] & bb))
1074 break;
1075 r1 = r->s2;
1076 if(r1 != R)
1077 if(r1->refbehind.b[z] & bb)
1078 paint3(r1, bn, rb, rn);
1079 r = r->s1;
1080 if(r == R)
1081 break;
1082 if(r->act.b[z] & bb)
1083 break;
1084 if(!(r->refbehind.b[z] & bb))
1085 break;
1086 }
1087 }
1088
1089 void
addreg(Adr * a,int rn)1090 addreg(Adr *a, int rn)
1091 {
1092
1093 a->sym = 0;
1094 a->name = D_NONE;
1095 a->type = D_REG;
1096 a->reg = rn;
1097 if(rn >= NREG) {
1098 a->type = D_FREG;
1099 a->reg = rn-NREG;
1100 }
1101 }
1102
1103 /*
1104 * bit reg
1105 * 0 R3
1106 * 1 R4
1107 * ... ...
1108 * 19 R22
1109 * 20 R23
1110 */
1111 long
RtoB(int r)1112 RtoB(int r)
1113 {
1114
1115 if(r < 3 || r > 23)
1116 return 0;
1117 return 1L << (r-3);
1118 }
1119
BtoR(long b)1120 BtoR(long b)
1121 {
1122
1123 b &= 0x001fffffL;
1124 if(b == 0)
1125 return 0;
1126 return bitno(b) + 3;
1127 }
1128
1129 /*
1130 * bit reg
1131 * 22 F4
1132 * 23 F6
1133 * ... ...
1134 * 31 F22
1135 */
1136 long
FtoB(int f)1137 FtoB(int f)
1138 {
1139
1140 if(f < 4 || f > 22 || (f&1))
1141 return 0;
1142 return 1L << (f/2 + 20);
1143 }
1144
1145 int
BtoF(long b)1146 BtoF(long b)
1147 {
1148
1149 b &= 0xffc00000L;
1150 if(b == 0)
1151 return 0;
1152 return bitno(b)*2 - 40;
1153 }
1154