xref: /plan9-contrib/sys/src/cmd/ic/reg.c (revision ce95e1b3727b9cb1c223ffbed69aff21a8ced255)
1 #include "gc.h"
2 
3 void	addsplits(void);
4 
5 Reg*
rega(void)6 rega(void)
7 {
8 	Reg *r;
9 
10 	r = freer;
11 	if(r == R) {
12 		r = alloc(sizeof(*r));
13 	} else
14 		freer = r->link;
15 
16 	*r = zreg;
17 	return r;
18 }
19 
20 int
rcmp(void * a1,void * a2)21 rcmp(void *a1, void *a2)
22 {
23 	Rgn *p1, *p2;
24 	int c1, c2;
25 
26 	p1 = (Rgn*)a1;
27 	p2 = (Rgn*)a2;
28 	c1 = p2->cost;
29 	c2 = p1->cost;
30 	if(c1 -= c2)
31 		return c1;
32 	return p2->varno - p1->varno;
33 }
34 
35 void
regopt(Prog * p)36 regopt(Prog *p)
37 {
38 	Reg *r, *r1, *r2;
39 	Prog *p1;
40 	int i, z;
41 	long initpc, val, npc;
42 	ulong vreg;
43 	Bits bit;
44 	struct
45 	{
46 		long	m;
47 		long	c;
48 		Reg*	p;
49 	} log5[6], *lp;
50 
51 	firstr = R;
52 	lastr = R;
53 	nvar = 0;
54 	regbits = 0;
55 	for(z=0; z<BITS; z++) {
56 		externs.b[z] = 0;
57 		params.b[z] = 0;
58 		consts.b[z] = 0;
59 		addrs.b[z] = 0;
60 	}
61 
62 	/*
63 	 * pass 1
64 	 * build aux data structure
65 	 * allocate pcs
66 	 * find use and set of variables
67 	 */
68 	val = 5L * 5L * 5L * 5L * 5L;
69 	lp = log5;
70 	for(i=0; i<5; i++) {
71 		lp->m = val;
72 		lp->c = 0;
73 		lp->p = R;
74 		val /= 5L;
75 		lp++;
76 	}
77 	val = 0;
78 	for(; p != P; p = p->link) {
79 		switch(p->as) {
80 		case ADATA:
81 		case AGLOBL:
82 		case ANAME:
83 		case ASIGNAME:
84 			continue;
85 		}
86 		r = rega();
87 		if(firstr == R) {
88 			firstr = r;
89 			lastr = r;
90 		} else {
91 			lastr->link = r;
92 			r->p1 = lastr;
93 			lastr->s1 = r;
94 			lastr = r;
95 		}
96 		r->prog = p;
97 		r->pc = val;
98 		val++;
99 
100 		lp = log5;
101 		for(i=0; i<5; i++) {
102 			lp->c--;
103 			if(lp->c <= 0) {
104 				lp->c = lp->m;
105 				if(lp->p != R)
106 					lp->p->log5 = r;
107 				lp->p = r;
108 				(lp+1)->c = 0;
109 				break;
110 			}
111 			lp++;
112 		}
113 
114 		r1 = r->p1;
115 		if(r1 != R)
116 		switch(r1->prog->as) {
117 		case ARET:
118 		case AJMP:
119 			r->p1 = R;
120 			r1->s1 = R;
121 		}
122 
123 		/*
124 		 * left side always read
125 		 */
126 		bit = mkvar(&p->from, p->as==AMOVW || p->as==AMOV);
127 		for(z=0; z<BITS; z++)
128 			r->use1.b[z] |= bit.b[z];
129 
130 		/*
131 		 * right side depends on opcode
132 		 */
133 		bit = mkvar(&p->to, 0);
134 		if(bany(&bit))
135 		switch(p->as) {
136 		default:
137 			diag(Z, "reg: unknown asop: %A", p->as);
138 			break;
139 
140 		/*
141 		 * right side write
142 		 */
143 		case ANOP:
144 		case AMOVB:
145 		case AMOVBU:
146 		case AMOVH:
147 		case AMOVHU:
148 		case AMOVW:
149 		case AMOV:
150 		case AMOVF:
151 		case AMOVD:
152 			for(z=0; z<BITS; z++)
153 				r->set.b[z] |= bit.b[z];
154 			break;
155 
156 		/*
157 		 * funny
158 		 */
159 		case AJAL:
160 			for(z=0; z<BITS; z++)
161 				addrs.b[z] |= bit.b[z];
162 			break;
163 		}
164 	}
165 	if(firstr == R)
166 		return;
167 	initpc = pc - val;
168 	npc = val;
169 
170 	/*
171 	 * pass 2
172 	 * turn branch references to pointers
173 	 * build back pointers
174 	 */
175 	for(r = firstr; r != R; r = r->link) {
176 		p = r->prog;
177 		if(p->to.type == D_BRANCH) {
178 			val = p->to.offset - initpc;
179 			r1 = firstr;
180 			while(r1 != R) {
181 				r2 = r1->log5;
182 				if(r2 != R && val >= r2->pc) {
183 					r1 = r2;
184 					continue;
185 				}
186 				if(r1->pc == val)
187 					break;
188 				r1 = r1->link;
189 			}
190 			if(r1 == R) {
191 				nearln = p->lineno;
192 				diag(Z, "ref not found\n%P", p);
193 				continue;
194 			}
195 			if(r1 == r) {
196 				nearln = p->lineno;
197 				diag(Z, "ref to self\n%P", p);
198 				continue;
199 			}
200 			r->s2 = r1;
201 			r->p2link = r1->p2;
202 			r1->p2 = r;
203 		}
204 	}
205 	if(debug['R']) {
206 		p = firstr->prog;
207 		print("\n%L %D\n", p->lineno, &p->from);
208 	}
209 
210 	/*
211 	 * pass 2.5
212 	 * find looping structure
213 	 */
214 	for(r = firstr; r != R; r = r->link)
215 		r->active = 0;
216 	change = 0;
217 	loopit(firstr, npc);
218 
219 	/*
220 	 * pass 3
221 	 * iterate propagating usage
222 	 * 	back until flow graph is complete
223 	 */
224 loop1:
225 	change = 0;
226 	for(r = firstr; r != R; r = r->link)
227 		r->active = 0;
228 	for(r = firstr; r != R; r = r->link)
229 		if(r->prog->as == ARET)
230 			prop(r, zbits, zbits);
231 loop11:
232 	/* pick up unreachable code */
233 	i = 0;
234 	for(r = firstr; r != R; r = r1) {
235 		r1 = r->link;
236 		if(r1 && r1->active && !r->active) {
237 			prop(r, zbits, zbits);
238 			i = 1;
239 		}
240 	}
241 	if(i)
242 		goto loop11;
243 	if(change)
244 		goto loop1;
245 
246 
247 	/*
248 	 * pass 4
249 	 * iterate propagating register/variable synchrony
250 	 * 	forward until graph is complete
251 	 */
252 loop2:
253 	change = 0;
254 	for(r = firstr; r != R; r = r->link)
255 		r->active = 0;
256 	synch(firstr, zbits);
257 	if(change)
258 		goto loop2;
259 
260 	addsplits();
261 
262 	if(debug['R'] && debug['v']) {
263 		print("\nprop structure:\n");
264 		for(r = firstr; r != R; r = r->link) {
265 			print("%ld:%P", r->loop, r->prog);
266 			for(z=0; z<BITS; z++)
267 				bit.b[z] = r->set.b[z] |
268 					r->refahead.b[z] | r->calahead.b[z] |
269 					r->refbehind.b[z] | r->calbehind.b[z] |
270 					r->use1.b[z] | r->use2.b[z];
271 			if(bany(&bit)) {
272 				print("\t");
273 				if(bany(&r->use1))
274 					print(" u1=%B", r->use1);
275 				if(bany(&r->use2))
276 					print(" u2=%B", r->use2);
277 				if(bany(&r->set))
278 					print(" st=%B", r->set);
279 				if(bany(&r->refahead))
280 					print(" ra=%B", r->refahead);
281 				if(bany(&r->calahead))
282 					print(" ca=%B", r->calahead);
283 				if(bany(&r->refbehind))
284 					print(" rb=%B", r->refbehind);
285 				if(bany(&r->calbehind))
286 					print(" cb=%B", r->calbehind);
287 				if(bany(&r->regdiff))
288 					print(" rd=%B", r->regdiff);
289 			}
290 			print("\n");
291 		}
292 	}
293 
294 	/*
295 	 * pass 5
296 	 * isolate regions
297 	 * calculate costs (paint1)
298 	 */
299 	r = firstr;
300 	if(r) {
301 		for(z=0; z<BITS; z++)
302 			bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
303 			  ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
304 		if(bany(&bit)) {
305 			nearln = r->prog->lineno;
306 			warn(Z, "used and not set: %B", bit);
307 			if(debug['R'] && !debug['w'])
308 				print("used and not set: %B\n", bit);
309 		}
310 	}
311 
312 	for(r = firstr; r != R; r = r->link)
313 		r->act = zbits;
314 	rgp = region;
315 	nregion = 0;
316 	for(r = firstr; r != R; r = r->link) {
317 		for(z=0; z<BITS; z++)
318 			bit.b[z] = r->set.b[z] &
319 			  ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
320 		if(bany(&bit)) {
321 			nearln = r->prog->lineno;
322 			warn(Z, "set and not used: %B", bit);
323 			if(debug['R'])
324 				print("set and not used: %B\n", bit);
325 			excise(r);
326 		}
327 		for(z=0; z<BITS; z++)
328 			bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
329 		while(bany(&bit)) {
330 			i = bnum(bit);
331 			rgp->enter = r;
332 			rgp->varno = i;
333 			change = 0;
334 			if(debug['R'] && debug['v'])
335 				print("\n");
336 			paint1(r, i);
337 			bit.b[i/32] &= ~(1L<<(i%32));
338 			if(change <= 0) {
339 				if(debug['R'])
340 					print("%L $%d: %B\n",
341 						r->prog->lineno, change, blsh(i));
342 				continue;
343 			}
344 			rgp->cost = change;
345 			nregion++;
346 			if(nregion >= NRGN) {
347 				warn(Z, "too many regions");
348 				goto brk;
349 			}
350 			rgp++;
351 		}
352 	}
353 brk:
354 	qsort(region, nregion, sizeof(region[0]), rcmp);
355 
356 	/*
357 	 * pass 6
358 	 * determine used registers (paint2)
359 	 * replace code (paint3)
360 	 */
361 	rgp = region;
362 	for(i=0; i<nregion; i++) {
363 		bit = blsh(rgp->varno);
364 		vreg = paint2(rgp->enter, rgp->varno);
365 		vreg = allreg(vreg, rgp);
366 		if(debug['R']) {
367 			if(rgp->regno >= NREG)
368 				print("%L $%d F%d: %B\n",
369 					rgp->enter->prog->lineno,
370 					rgp->cost,
371 					rgp->regno-NREG,
372 					bit);
373 			else
374 				print("%L $%d R%d: %B\n",
375 					rgp->enter->prog->lineno,
376 					rgp->cost,
377 					rgp->regno,
378 					bit);
379 		}
380 		if(rgp->regno != 0)
381 			paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
382 		rgp++;
383 	}
384 	/*
385 	 * pass 7
386 	 * peep-hole on basic block
387 	 */
388 	if(!debug['R'] || debug['P'])
389 		peep();
390 
391 	/*
392 	 * pass 8
393 	 * recalculate pc
394 	 */
395 	val = initpc;
396 	for(r = firstr; r != R; r = r1) {
397 		r->pc = val;
398 		p = r->prog;
399 		p1 = P;
400 		r1 = r->link;
401 		if(r1 != R)
402 			p1 = r1->prog;
403 		for(; p != p1; p = p->link) {
404 			switch(p->as) {
405 			default:
406 				val++;
407 				break;
408 
409 			case ANOP:
410 			case ADATA:
411 			case AGLOBL:
412 			case ANAME:
413 			case ASIGNAME:
414 				break;
415 			}
416 		}
417 	}
418 	pc = val;
419 
420 	/*
421 	 * fix up branches
422 	 */
423 	if(debug['R'])
424 		if(bany(&addrs))
425 			print("addrs: %B\n", addrs);
426 
427 	r1 = 0; /* set */
428 	for(r = firstr; r != R; r = r->link) {
429 		p = r->prog;
430 		if(p->to.type == D_BRANCH)
431 			p->to.offset = r->s2->pc;
432 		r1 = r;
433 	}
434 
435 	/*
436 	 * last pass
437 	 * fix unsigned stores (needed as type hint in pass 6)
438 	 * eliminate nops
439 	 * free aux structures
440 	 */
441 	for(p = firstr->prog; p != P; p = p->link){
442 		if(p->to.type == D_OREG){
443 			switch(p->as){
444 			case AMOVBU:
445 				p->as = AMOVB;
446 				break;
447 			case AMOVHU:
448 				p->as = AMOVH;
449 				break;
450 			}
451 		}
452 		while(p->link && p->link->as == ANOP)
453 			p->link = p->link->link;
454 	}
455 	if(r1 != R) {
456 		r1->link = freer;
457 		freer = firstr;
458 	}
459 }
460 
461 void
addsplits(void)462 addsplits(void)
463 {
464 	Reg *r, *r1;
465 	int z, i;
466 	Bits bit;
467 
468 	for(r = firstr; r != R; r = r->link) {
469 		if(r->loop > 1)
470 			continue;
471 		if(r->prog->as == AJAL)
472 			continue;
473 		for(r1 = r->p2; r1 != R; r1 = r1->p2link) {
474 			if(r1->loop <= 1)
475 				continue;
476 			for(z=0; z<BITS; z++)
477 				bit.b[z] = r1->calbehind.b[z] &
478 					(r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
479 					~(r->calahead.b[z] & addrs.b[z]);
480 			while(bany(&bit)) {
481 				i = bnum(bit);
482 				bit.b[i/32] &= ~(1L << (i%32));
483 			}
484 		}
485 	}
486 }
487 
488 /*
489  * add mov b,rn
490  * just after r
491  */
492 void
addmove(Reg * r,int bn,int rn,int f)493 addmove(Reg *r, int bn, int rn, int f)
494 {
495 	Prog *p, *p1;
496 	Adr *a;
497 	Var *v;
498 
499 	p1 = alloc(sizeof(*p1));
500 	*p1 = zprog;
501 	p = r->prog;
502 
503 	p1->link = p->link;
504 	p->link = p1;
505 	p1->lineno = p->lineno;
506 
507 	v = var + bn;
508 
509 	a = &p1->to;
510 	a->sym = v->sym;
511 	a->name = v->name;
512 	a->offset = v->offset;
513 	a->etype = v->etype;
514 	a->type = D_OREG;
515 	if(a->etype == TARRAY || a->sym == S)
516 		a->type = D_CONST;
517 
518 	p1->as = AMOVW;
519 	if(v->etype == TCHAR || v->etype == TUCHAR)
520 		p1->as = AMOVB;
521 	if(v->etype == TSHORT || v->etype == TUSHORT)
522 		p1->as = AMOVH;
523 	if(v->etype == TFLOAT)
524 		p1->as = AMOVF;
525 	if(v->etype == TDOUBLE)
526 		p1->as = AMOVD;
527 	if(thechar == 'j')
528 	if(v->etype == TVLONG || v->etype == TUVLONG || v->etype == TIND)
529 		p1->as = AMOV;
530 
531 	p1->from.type = D_REG;
532 	p1->from.reg = rn;
533 	if(rn >= NREG) {
534 		p1->from.type = D_FREG;
535 		p1->from.reg = rn-NREG;
536 	}
537 	if(!f) {
538 		p1->from = *a;
539 		*a = zprog.from;
540 		a->type = D_REG;
541 		a->reg = rn;
542 		if(rn >= NREG) {
543 			a->type = D_FREG;
544 			a->reg = rn-NREG;
545 		}
546 		if(v->etype == TUCHAR)
547 			p1->as = AMOVBU;
548 		if(v->etype == TUSHORT)
549 			p1->as = AMOVHU;
550 	}
551 	if(debug['R'])
552 		print("%P\t.a%P\n", p, p1);
553 }
554 
555 Bits
mkvar(Adr * a,int docon)556 mkvar(Adr *a, int docon)
557 {
558 	Var *v;
559 	int i, t, n, et, z;
560 	long o;
561 	Bits bit;
562 	Sym *s;
563 
564 	t = a->type;
565 	if(t == D_REG && a->reg != NREG)
566 		regbits |= RtoB(a->reg);
567 	if(t == D_FREG && a->reg != NREG)
568 		regbits |= FtoB(a->reg);
569 	s = a->sym;
570 	o = a->offset;
571 	et = a->etype;
572 	if(s == S) {
573 		if(t != D_CONST || !docon || a->reg != NREG)
574 			goto none;
575 		et = TLONG;
576 	}
577 	if(t == D_CONST) {
578 		if(s == S && sval(o))
579 			goto none;
580 	}
581 
582 	n = a->name;
583 	v = var;
584 	for(i=0; i<nvar; i++) {
585 		if(s == v->sym)
586 		if(n == v->name)
587 		if(o == v->offset)
588 			goto out;
589 		v++;
590 	}
591 	if(s)
592 		if(s->name[0] == '.')
593 			goto none;
594 	if(nvar >= NVAR) {
595 		if(debug['w'] > 1 && s)
596 			warn(Z, "variable not optimized: %s", s->name);
597 		goto none;
598 	}
599 	i = nvar;
600 	nvar++;
601 	v = &var[i];
602 	v->sym = s;
603 	v->offset = o;
604 	v->etype = et;
605 	v->name = n;
606 	if(debug['R'])
607 		print("bit=%2d et=%2d %D\n", i, et, a);
608 out:
609 	bit = blsh(i);
610 	if(n == D_EXTERN || n == D_STATIC)
611 		for(z=0; z<BITS; z++)
612 			externs.b[z] |= bit.b[z];
613 	if(n == D_PARAM)
614 		for(z=0; z<BITS; z++)
615 			params.b[z] |= bit.b[z];
616 	if(v->etype != et || !typechlpfd[et])	/* funny punning */
617 		for(z=0; z<BITS; z++)
618 			addrs.b[z] |= bit.b[z];
619 	if(t == D_CONST) {
620 		if(s == S) {
621 			for(z=0; z<BITS; z++)
622 				consts.b[z] |= bit.b[z];
623 			return bit;
624 		}
625 		if(et != TARRAY)
626 			for(z=0; z<BITS; z++)
627 				addrs.b[z] |= bit.b[z];
628 		for(z=0; z<BITS; z++)
629 			params.b[z] |= bit.b[z];
630 		return bit;
631 	}
632 	if(t == D_OREG)
633 		return bit;
634 
635 none:
636 	return zbits;
637 }
638 
639 void
prop(Reg * r,Bits ref,Bits cal)640 prop(Reg *r, Bits ref, Bits cal)
641 {
642 	Reg *r1, *r2;
643 	int z;
644 
645 	for(r1 = r; r1 != R; r1 = r1->p1) {
646 		for(z=0; z<BITS; z++) {
647 			ref.b[z] |= r1->refahead.b[z];
648 			if(ref.b[z] != r1->refahead.b[z]) {
649 				r1->refahead.b[z] = ref.b[z];
650 				change++;
651 			}
652 			cal.b[z] |= r1->calahead.b[z];
653 			if(cal.b[z] != r1->calahead.b[z]) {
654 				r1->calahead.b[z] = cal.b[z];
655 				change++;
656 			}
657 		}
658 		switch(r1->prog->as) {
659 		case AJAL:
660 			for(z=0; z<BITS; z++) {
661 				cal.b[z] |= ref.b[z] | externs.b[z];
662 				ref.b[z] = 0;
663 			}
664 			break;
665 
666 		case ATEXT:
667 			for(z=0; z<BITS; z++) {
668 				cal.b[z] = 0;
669 				ref.b[z] = 0;
670 			}
671 			break;
672 
673 		case ARET:
674 			for(z=0; z<BITS; z++) {
675 				cal.b[z] = externs.b[z];
676 				ref.b[z] = 0;
677 			}
678 		}
679 		for(z=0; z<BITS; z++) {
680 			ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
681 				r1->use1.b[z] | r1->use2.b[z];
682 			cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
683 			r1->refbehind.b[z] = ref.b[z];
684 			r1->calbehind.b[z] = cal.b[z];
685 		}
686 		if(r1->active)
687 			break;
688 		r1->active = 1;
689 	}
690 	for(; r != r1; r = r->p1)
691 		for(r2 = r->p2; r2 != R; r2 = r2->p2link)
692 			prop(r2, r->refbehind, r->calbehind);
693 }
694 
695 /*
696  * find looping structure
697  *
698  * 1) find reverse postordering
699  * 2) find approximate dominators,
700  *	the actual dominators if the flow graph is reducible
701  *	otherwise, dominators plus some other non-dominators.
702  *	See Matthew S. Hecht and Jeffrey D. Ullman,
703  *	"Analysis of a Simple Algorithm for Global Data Flow Problems",
704  *	Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
705  *	Oct. 1-3, 1973, pp.  207-217.
706  * 3) find all nodes with a predecessor dominated by the current node.
707  *	such a node is a loop head.
708  *	recursively, all preds with a greater rpo number are in the loop
709  */
710 long
postorder(Reg * r,Reg ** rpo2r,long n)711 postorder(Reg *r, Reg **rpo2r, long n)
712 {
713 	Reg *r1;
714 
715 	r->rpo = 1;
716 	r1 = r->s1;
717 	if(r1 && !r1->rpo)
718 		n = postorder(r1, rpo2r, n);
719 	r1 = r->s2;
720 	if(r1 && !r1->rpo)
721 		n = postorder(r1, rpo2r, n);
722 	rpo2r[n] = r;
723 	n++;
724 	return n;
725 }
726 
727 long
rpolca(long * idom,long rpo1,long rpo2)728 rpolca(long *idom, long rpo1, long rpo2)
729 {
730 	long t;
731 
732 	if(rpo1 == -1)
733 		return rpo2;
734 	while(rpo1 != rpo2){
735 		if(rpo1 > rpo2){
736 			t = rpo2;
737 			rpo2 = rpo1;
738 			rpo1 = t;
739 		}
740 		while(rpo1 < rpo2){
741 			t = idom[rpo2];
742 			if(t >= rpo2)
743 				fatal(Z, "bad idom");
744 			rpo2 = t;
745 		}
746 	}
747 	return rpo1;
748 }
749 
750 int
doms(long * idom,long r,long s)751 doms(long *idom, long r, long s)
752 {
753 	while(s > r)
754 		s = idom[s];
755 	return s == r;
756 }
757 
758 int
loophead(long * idom,Reg * r)759 loophead(long *idom, Reg *r)
760 {
761 	long src;
762 
763 	src = r->rpo;
764 	if(r->p1 != R && doms(idom, src, r->p1->rpo))
765 		return 1;
766 	for(r = r->p2; r != R; r = r->p2link)
767 		if(doms(idom, src, r->rpo))
768 			return 1;
769 	return 0;
770 }
771 
772 void
loopmark(Reg ** rpo2r,long head,Reg * r)773 loopmark(Reg **rpo2r, long head, Reg *r)
774 {
775 	if(r->rpo < head || r->active == head)
776 		return;
777 	r->active = head;
778 	r->loop += LOOP;
779 	if(r->p1 != R)
780 		loopmark(rpo2r, head, r->p1);
781 	for(r = r->p2; r != R; r = r->p2link)
782 		loopmark(rpo2r, head, r);
783 }
784 
785 void
loopit(Reg * r,long nr)786 loopit(Reg *r, long nr)
787 {
788 	Reg *r1;
789 	long i, d, me;
790 
791 	if(nr > maxnr) {
792 		rpo2r = alloc(nr * sizeof(Reg*));
793 		idom = alloc(nr * sizeof(long));
794 		maxnr = nr;
795 	}
796 
797 	d = postorder(r, rpo2r, 0);
798 	if(d > nr)
799 		fatal(Z, "too many reg nodes");
800 	nr = d;
801 	for(i = 0; i < nr / 2; i++){
802 		r1 = rpo2r[i];
803 		rpo2r[i] = rpo2r[nr - 1 - i];
804 		rpo2r[nr - 1 - i] = r1;
805 	}
806 	for(i = 0; i < nr; i++)
807 		rpo2r[i]->rpo = i;
808 
809 	idom[0] = 0;
810 	for(i = 0; i < nr; i++){
811 		r1 = rpo2r[i];
812 		me = r1->rpo;
813 		d = -1;
814 		if(r1->p1 != R && r1->p1->rpo < me)
815 			d = r1->p1->rpo;
816 		for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
817 			if(r1->rpo < me)
818 				d = rpolca(idom, d, r1->rpo);
819 		idom[i] = d;
820 	}
821 
822 	for(i = 0; i < nr; i++){
823 		r1 = rpo2r[i];
824 		r1->loop++;
825 		if(r1->p2 != R && loophead(idom, r1))
826 			loopmark(rpo2r, i, r1);
827 	}
828 }
829 
830 void
synch(Reg * r,Bits dif)831 synch(Reg *r, Bits dif)
832 {
833 	Reg *r1;
834 	int z;
835 
836 	for(r1 = r; r1 != R; r1 = r1->s1) {
837 		for(z=0; z<BITS; z++) {
838 			dif.b[z] = (dif.b[z] &
839 				~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
840 					r1->set.b[z] | r1->regdiff.b[z];
841 			if(dif.b[z] != r1->regdiff.b[z]) {
842 				r1->regdiff.b[z] = dif.b[z];
843 				change++;
844 			}
845 		}
846 		if(r1->active)
847 			break;
848 		r1->active = 1;
849 		for(z=0; z<BITS; z++)
850 			dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
851 		if(r1->s2 != R)
852 			synch(r1->s2, dif);
853 	}
854 }
855 
856 ulong
allreg(ulong b,Rgn * r)857 allreg(ulong b, Rgn *r)
858 {
859 	Var *v;
860 	int i;
861 
862 	v = var + r->varno;
863 	r->regno = 0;
864 	switch(v->etype) {
865 
866 	default:
867 		diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
868 		break;
869 
870 	case TCHAR:
871 	case TUCHAR:
872 	case TSHORT:
873 	case TUSHORT:
874 	case TINT:
875 	case TUINT:
876 	case TLONG:
877 	case TULONG:
878 	case TIND:
879 	case TARRAY:
880 		i = BtoR(~b);
881 		if(i && r->cost >= 0) {
882 			r->regno = i;
883 			return RtoB(i);
884 		}
885 		break;
886 
887 	case TDOUBLE:
888 	case TFLOAT:
889 		i = BtoF(~b);
890 		if(i && r->cost >= 0) {
891 			r->regno = i+NREG;
892 			return FtoB(i);
893 		}
894 		break;
895 	}
896 	return 0;
897 }
898 
899 void
paint1(Reg * r,int bn)900 paint1(Reg *r, int bn)
901 {
902 	Reg *r1;
903 	Prog *p;
904 	int z;
905 	ulong bb;
906 
907 	z = bn/32;
908 	bb = 1L<<(bn%32);
909 	if(r->act.b[z] & bb)
910 		return;
911 	for(;;) {
912 		if(!(r->refbehind.b[z] & bb))
913 			break;
914 		r1 = r->p1;
915 		if(r1 == R)
916 			break;
917 		if(!(r1->refahead.b[z] & bb))
918 			break;
919 		if(r1->act.b[z] & bb)
920 			break;
921 		r = r1;
922 	}
923 
924 	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
925 		change -= CLOAD * r->loop;
926 		if(debug['R'] && debug['v'])
927 			print("%ld%P\tld %B $%d\n", r->loop,
928 				r->prog, blsh(bn), change);
929 	}
930 	for(;;) {
931 		r->act.b[z] |= bb;
932 		p = r->prog;
933 
934 		if(r->use1.b[z] & bb) {
935 			change += CREF * r->loop;
936 			if(debug['R'] && debug['v'])
937 				print("%ld%P\tu1 %B $%d\n", r->loop,
938 					p, blsh(bn), change);
939 		}
940 
941 		if((r->use2.b[z]|r->set.b[z]) & bb) {
942 			change += CREF * r->loop;
943 			if(debug['R'] && debug['v'])
944 				print("%ld%P\tu2 %B $%d\n", r->loop,
945 					p, blsh(bn), change);
946 		}
947 
948 		if(STORE(r) & r->regdiff.b[z] & bb) {
949 			change -= CLOAD * r->loop;
950 			if(debug['R'] && debug['v'])
951 				print("%ld%P\tst %B $%d\n", r->loop,
952 					p, blsh(bn), change);
953 		}
954 
955 		if(r->refbehind.b[z] & bb)
956 			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
957 				if(r1->refahead.b[z] & bb)
958 					paint1(r1, bn);
959 
960 		if(!(r->refahead.b[z] & bb))
961 			break;
962 		r1 = r->s2;
963 		if(r1 != R)
964 			if(r1->refbehind.b[z] & bb)
965 				paint1(r1, bn);
966 		r = r->s1;
967 		if(r == R)
968 			break;
969 		if(r->act.b[z] & bb)
970 			break;
971 		if(!(r->refbehind.b[z] & bb))
972 			break;
973 	}
974 }
975 
976 ulong
paint2(Reg * r,int bn)977 paint2(Reg *r, int bn)
978 {
979 	Reg *r1;
980 	int z;
981 	ulong bb, vreg;
982 
983 	z = bn/32;
984 	bb = 1L << (bn%32);
985 	vreg = regbits;
986 	if(!(r->act.b[z] & bb))
987 		return vreg;
988 	for(;;) {
989 		if(!(r->refbehind.b[z] & bb))
990 			break;
991 		r1 = r->p1;
992 		if(r1 == R)
993 			break;
994 		if(!(r1->refahead.b[z] & bb))
995 			break;
996 		if(!(r1->act.b[z] & bb))
997 			break;
998 		r = r1;
999 	}
1000 	for(;;) {
1001 		r->act.b[z] &= ~bb;
1002 
1003 		vreg |= r->regu;
1004 
1005 		if(r->refbehind.b[z] & bb)
1006 			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1007 				if(r1->refahead.b[z] & bb)
1008 					vreg |= paint2(r1, bn);
1009 
1010 		if(!(r->refahead.b[z] & bb))
1011 			break;
1012 		r1 = r->s2;
1013 		if(r1 != R)
1014 			if(r1->refbehind.b[z] & bb)
1015 				vreg |= paint2(r1, bn);
1016 		r = r->s1;
1017 		if(r == R)
1018 			break;
1019 		if(!(r->act.b[z] & bb))
1020 			break;
1021 		if(!(r->refbehind.b[z] & bb))
1022 			break;
1023 	}
1024 	return vreg;
1025 }
1026 
1027 void
paint3(Reg * r,int bn,long rb,int rn)1028 paint3(Reg *r, int bn, long rb, int rn)
1029 {
1030 	Reg *r1;
1031 	Prog *p;
1032 	int z;
1033 	ulong bb;
1034 
1035 	z = bn/32;
1036 	bb = 1L << (bn%32);
1037 	if(r->act.b[z] & bb)
1038 		return;
1039 	for(;;) {
1040 		if(!(r->refbehind.b[z] & bb))
1041 			break;
1042 		r1 = r->p1;
1043 		if(r1 == R)
1044 			break;
1045 		if(!(r1->refahead.b[z] & bb))
1046 			break;
1047 		if(r1->act.b[z] & bb)
1048 			break;
1049 		r = r1;
1050 	}
1051 
1052 	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1053 		addmove(r, bn, rn, 0);
1054 	for(;;) {
1055 		r->act.b[z] |= bb;
1056 		p = r->prog;
1057 
1058 		if(r->use1.b[z] & bb) {
1059 			if(debug['R'])
1060 				print("%P", p);
1061 			addreg(&p->from, rn);
1062 			switch(p->as){
1063 			case AMOVB:
1064 			case AMOVBU:
1065 			case AMOVH:
1066 			case AMOVHU:
1067 			case AMOVW:
1068 			case AMOVWU:
1069 				p->as = AMOV;
1070 			}
1071 			if(debug['R'])
1072 				print("\t.c%P\n", p);
1073 		}
1074 		if((r->use2.b[z]|r->set.b[z]) & bb) {
1075 			if(debug['R'])
1076 				print("%P", p);
1077 			addreg(&p->to, rn);
1078 			if(debug['R'])
1079 				print("\t.c%P\n", p);
1080 		}
1081 
1082 		if(STORE(r) & r->regdiff.b[z] & bb)
1083 			addmove(r, bn, rn, 1);
1084 		r->regu |= rb;
1085 
1086 		if(r->refbehind.b[z] & bb)
1087 			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1088 				if(r1->refahead.b[z] & bb)
1089 					paint3(r1, bn, rb, rn);
1090 
1091 		if(!(r->refahead.b[z] & bb))
1092 			break;
1093 		r1 = r->s2;
1094 		if(r1 != R)
1095 			if(r1->refbehind.b[z] & bb)
1096 				paint3(r1, bn, rb, rn);
1097 		r = r->s1;
1098 		if(r == R)
1099 			break;
1100 		if(r->act.b[z] & bb)
1101 			break;
1102 		if(!(r->refbehind.b[z] & bb))
1103 			break;
1104 	}
1105 }
1106 
1107 void
addreg(Adr * a,int rn)1108 addreg(Adr *a, int rn)
1109 {
1110 
1111 	a->sym = 0;
1112 	a->name = D_NONE;
1113 	a->type = D_REG;
1114 	a->reg = rn;
1115 	if(rn >= NREG) {
1116 		a->type = D_FREG;
1117 		a->reg = rn-NREG;
1118 	}
1119 }
1120 
1121 /*
1122  *	bit	reg
1123  *	0	R9
1124  *	1	R10
1125  *  ... ...
1126  *  6   R15
1127  */
1128 long
RtoB(int r)1129 RtoB(int r)
1130 {
1131 
1132 	if(r < 9 || r > 15)
1133 		return 0;
1134 	return 1L << (r-9);
1135 }
1136 
1137 int
BtoR(long b)1138 BtoR(long b)
1139 {
1140 
1141 	b &= 0x007fL;
1142 	if(b == 0)
1143 		return 0;
1144 	return bitno(b) + 9;
1145 }
1146 
1147 /*
1148  *	bit	reg
1149  *	7	F1
1150  *	8	F2
1151  *	...	...
1152  *	31	F25
1153  */
1154 long
FtoB(int f)1155 FtoB(int f)
1156 {
1157 
1158 	if(f < 1 || f > 25)
1159 		return 0;
1160 	return 1L << (f + 6);
1161 }
1162 
1163 int
BtoF(long b)1164 BtoF(long b)
1165 {
1166 
1167 	b &= 0x03ffff80L;
1168 	if(b == 0)
1169 		return 0;
1170 	return bitno(b) - 6;
1171 }
1172