xref: /inferno-os/utils/5c/reg.c (revision 2b69dba5038ffd0b59cf30a4c44bce549e5097f8)
1 #include "gc.h"
2 
3 void	addsplits(void);
4 
5 Reg*
6 rega(void)
7 {
8 	Reg *r;
9 
10 	r = freer;
11 	if(r == R) {
12 		r = alloc(sizeof(*r));
13 	} else
14 		freer = r->link;
15 
16 	*r = zreg;
17 	return r;
18 }
19 
20 int
21 rcmp(const void *a1, const void *a2)
22 {
23 	Rgn *p1, *p2;
24 	int c1, c2;
25 
26 	p1 = (Rgn*)a1;
27 	p2 = (Rgn*)a2;
28 	c1 = p2->cost;
29 	c2 = p1->cost;
30 	if(c1 -= c2)
31 		return c1;
32 	return p2->varno - p1->varno;
33 }
34 
35 void
36 regopt(Prog *p)
37 {
38 	Reg *r, *r1, *r2;
39 	Prog *p1;
40 	int i, z;
41 	long initpc, val, npc;
42 	ulong vreg;
43 	Bits bit;
44 	struct
45 	{
46 		long	m;
47 		long	c;
48 		Reg*	p;
49 	} log5[6], *lp;
50 
51 	firstr = R;
52 	lastr = R;
53 	nvar = 0;
54 	regbits = 0;
55 	for(z=0; z<BITS; z++) {
56 		externs.b[z] = 0;
57 		params.b[z] = 0;
58 		consts.b[z] = 0;
59 		addrs.b[z] = 0;
60 	}
61 
62 	/*
63 	 * pass 1
64 	 * build aux data structure
65 	 * allocate pcs
66 	 * find use and set of variables
67 	 */
68 	val = 5L * 5L * 5L * 5L * 5L;
69 	lp = log5;
70 	for(i=0; i<5; i++) {
71 		lp->m = val;
72 		lp->c = 0;
73 		lp->p = R;
74 		val /= 5L;
75 		lp++;
76 	}
77 	val = 0;
78 	for(; p != P; p = p->link) {
79 		switch(p->as) {
80 		case ADATA:
81 		case AGLOBL:
82 		case ANAME:
83 		case ASIGNAME:
84 			continue;
85 		}
86 		r = rega();
87 		if(firstr == R) {
88 			firstr = r;
89 			lastr = r;
90 		} else {
91 			lastr->link = r;
92 			r->p1 = lastr;
93 			lastr->s1 = r;
94 			lastr = r;
95 		}
96 		r->prog = p;
97 		r->pc = val;
98 		val++;
99 
100 		lp = log5;
101 		for(i=0; i<5; i++) {
102 			lp->c--;
103 			if(lp->c <= 0) {
104 				lp->c = lp->m;
105 				if(lp->p != R)
106 					lp->p->log5 = r;
107 				lp->p = r;
108 				(lp+1)->c = 0;
109 				break;
110 			}
111 			lp++;
112 		}
113 
114 		r1 = r->p1;
115 		if(r1 != R)
116 		switch(r1->prog->as) {
117 		case ARET:
118 		case AB:
119 		case ARFE:
120 			r->p1 = R;
121 			r1->s1 = R;
122 		}
123 
124 		/*
125 		 * left side always read
126 		 */
127 		bit = mkvar(&p->from, p->as==AMOVW);
128 		for(z=0; z<BITS; z++)
129 			r->use1.b[z] |= bit.b[z];
130 
131 		/*
132 		 * right side depends on opcode
133 		 */
134 		bit = mkvar(&p->to, 0);
135 		if(bany(&bit))
136 		switch(p->as) {
137 		default:
138 			diag(Z, "reg: unknown asop: %A", p->as);
139 			break;
140 
141 		/*
142 		 * right side write
143 		 */
144 		case ANOP:
145 		case AMOVB:
146 		case AMOVBU:
147 		case AMOVH:
148 		case AMOVHU:
149 		case AMOVW:
150 		case AMOVF:
151 		case AMOVD:
152 			for(z=0; z<BITS; z++)
153 				r->set.b[z] |= bit.b[z];
154 			break;
155 
156 		/*
157 		 * funny
158 		 */
159 		case ABL:
160 			for(z=0; z<BITS; z++)
161 				addrs.b[z] |= bit.b[z];
162 			break;
163 		}
164 
165 		if(p->as == AMOVM) {
166 			if(p->from.type == D_CONST)
167 				z = p->from.offset;
168 			else
169 				z = p->to.offset;
170 			for(i=0; z; i++) {
171 				if(z&1)
172 					regbits |= RtoB(i);
173 				z >>= 1;
174 			}
175 		}
176 	}
177 	if(firstr == R)
178 		return;
179 	initpc = pc - val;
180 	npc = val;
181 
182 	/*
183 	 * pass 2
184 	 * turn branch references to pointers
185 	 * build back pointers
186 	 */
187 	for(r = firstr; r != R; r = r->link) {
188 		p = r->prog;
189 		if(p->to.type == D_BRANCH) {
190 			val = p->to.offset - initpc;
191 			r1 = firstr;
192 			while(r1 != R) {
193 				r2 = r1->log5;
194 				if(r2 != R && val >= r2->pc) {
195 					r1 = r2;
196 					continue;
197 				}
198 				if(r1->pc == val)
199 					break;
200 				r1 = r1->link;
201 			}
202 			if(r1 == R) {
203 				nearln = p->lineno;
204 				diag(Z, "ref not found\n%P", p);
205 				continue;
206 			}
207 			if(r1 == r) {
208 				nearln = p->lineno;
209 				diag(Z, "ref to self\n%P", p);
210 				continue;
211 			}
212 			r->s2 = r1;
213 			r->p2link = r1->p2;
214 			r1->p2 = r;
215 		}
216 	}
217 	if(debug['R']) {
218 		p = firstr->prog;
219 		print("\n%L %D\n", p->lineno, &p->from);
220 	}
221 
222 	/*
223 	 * pass 2.5
224 	 * find looping structure
225 	 */
226 	for(r = firstr; r != R; r = r->link)
227 		r->active = 0;
228 	change = 0;
229 	loopit(firstr, npc);
230 
231 	/*
232 	 * pass 3
233 	 * iterate propagating usage
234 	 * 	back until flow graph is complete
235 	 */
236 loop1:
237 	change = 0;
238 	for(r = firstr; r != R; r = r->link)
239 		r->active = 0;
240 	for(r = firstr; r != R; r = r->link)
241 		if(r->prog->as == ARET)
242 			prop(r, zbits, zbits);
243 loop11:
244 	/* pick up unreachable code */
245 	i = 0;
246 	for(r = firstr; r != R; r = r1) {
247 		r1 = r->link;
248 		if(r1 && r1->active && !r->active) {
249 			prop(r, zbits, zbits);
250 			i = 1;
251 		}
252 	}
253 	if(i)
254 		goto loop11;
255 	if(change)
256 		goto loop1;
257 
258 
259 	/*
260 	 * pass 4
261 	 * iterate propagating register/variable synchrony
262 	 * 	forward until graph is complete
263 	 */
264 loop2:
265 	change = 0;
266 	for(r = firstr; r != R; r = r->link)
267 		r->active = 0;
268 	synch(firstr, zbits);
269 	if(change)
270 		goto loop2;
271 
272 	addsplits();
273 
274 	if(debug['R'] && debug['v']) {
275 		print("\nprop structure:\n");
276 		for(r = firstr; r != R; r = r->link) {
277 			print("%ld:%P", r->loop, r->prog);
278 			for(z=0; z<BITS; z++)
279 				bit.b[z] = r->set.b[z] |
280 					r->refahead.b[z] | r->calahead.b[z] |
281 					r->refbehind.b[z] | r->calbehind.b[z] |
282 					r->use1.b[z] | r->use2.b[z];
283 			if(bany(&bit)) {
284 				print("\t");
285 				if(bany(&r->use1))
286 					print(" u1=%B", r->use1);
287 				if(bany(&r->use2))
288 					print(" u2=%B", r->use2);
289 				if(bany(&r->set))
290 					print(" st=%B", r->set);
291 				if(bany(&r->refahead))
292 					print(" ra=%B", r->refahead);
293 				if(bany(&r->calahead))
294 					print(" ca=%B", r->calahead);
295 				if(bany(&r->refbehind))
296 					print(" rb=%B", r->refbehind);
297 				if(bany(&r->calbehind))
298 					print(" cb=%B", r->calbehind);
299 			}
300 			print("\n");
301 		}
302 	}
303 
304 	/*
305 	 * pass 5
306 	 * isolate regions
307 	 * calculate costs (paint1)
308 	 */
309 	r = firstr;
310 	if(r) {
311 		for(z=0; z<BITS; z++)
312 			bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
313 			  ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
314 		if(bany(&bit)) {
315 			nearln = r->prog->lineno;
316 			warn(Z, "used and not set: %B", bit);
317 			if(debug['R'] && !debug['w'])
318 				print("used and not set: %B\n", bit);
319 		}
320 	}
321 
322 	for(r = firstr; r != R; r = r->link)
323 		r->act = zbits;
324 	rgp = region;
325 	nregion = 0;
326 	for(r = firstr; r != R; r = r->link) {
327 		for(z=0; z<BITS; z++)
328 			bit.b[z] = r->set.b[z] &
329 			  ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
330 		if(bany(&bit)) {
331 			nearln = r->prog->lineno;
332 			warn(Z, "set and not used: %B", bit);
333 			if(debug['R'])
334 				print("set and not used: %B\n", bit);
335 			excise(r);
336 		}
337 		for(z=0; z<BITS; z++)
338 			bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
339 		while(bany(&bit)) {
340 			i = bnum(bit);
341 			rgp->enter = r;
342 			rgp->varno = i;
343 			change = 0;
344 			if(debug['R'] && debug['v'])
345 				print("\n");
346 			paint1(r, i);
347 			bit.b[i/32] &= ~(1L<<(i%32));
348 			if(change <= 0) {
349 				if(debug['R'])
350 					print("%L $%d: %B\n",
351 						r->prog->lineno, change, blsh(i));
352 				continue;
353 			}
354 			rgp->cost = change;
355 			nregion++;
356 			if(nregion >= NRGN) {
357 				warn(Z, "too many regions");
358 				goto brk;
359 			}
360 			rgp++;
361 		}
362 	}
363 brk:
364 	qsort(region, nregion, sizeof(region[0]), rcmp);
365 
366 	/*
367 	 * pass 6
368 	 * determine used registers (paint2)
369 	 * replace code (paint3)
370 	 */
371 	rgp = region;
372 	for(i=0; i<nregion; i++) {
373 		bit = blsh(rgp->varno);
374 		vreg = paint2(rgp->enter, rgp->varno);
375 		vreg = allreg(vreg, rgp);
376 		if(debug['R']) {
377 			if(rgp->regno >= NREG)
378 				print("%L $%d F%d: %B\n",
379 					rgp->enter->prog->lineno,
380 					rgp->cost,
381 					rgp->regno-NREG,
382 					bit);
383 			else
384 				print("%L $%d R%d: %B\n",
385 					rgp->enter->prog->lineno,
386 					rgp->cost,
387 					rgp->regno,
388 					bit);
389 		}
390 		if(rgp->regno != 0)
391 			paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
392 		rgp++;
393 	}
394 	/*
395 	 * pass 7
396 	 * peep-hole on basic block
397 	 */
398 	if(!debug['R'] || debug['P'])
399 		peep();
400 
401 	/*
402 	 * pass 8
403 	 * recalculate pc
404 	 */
405 	val = initpc;
406 	for(r = firstr; r != R; r = r1) {
407 		r->pc = val;
408 		p = r->prog;
409 		p1 = P;
410 		r1 = r->link;
411 		if(r1 != R)
412 			p1 = r1->prog;
413 		for(; p != p1; p = p->link) {
414 			switch(p->as) {
415 			default:
416 				val++;
417 				break;
418 
419 			case ANOP:
420 			case ADATA:
421 			case AGLOBL:
422 			case ANAME:
423 			case ASIGNAME:
424 				break;
425 			}
426 		}
427 	}
428 	pc = val;
429 
430 	/*
431 	 * fix up branches
432 	 */
433 	if(debug['R'])
434 		if(bany(&addrs))
435 			print("addrs: %B\n", addrs);
436 
437 	r1 = 0; /* set */
438 	for(r = firstr; r != R; r = r->link) {
439 		p = r->prog;
440 		if(p->to.type == D_BRANCH)
441 			p->to.offset = r->s2->pc;
442 		r1 = r;
443 	}
444 
445 	/*
446 	 * last pass
447 	 * eliminate nops
448 	 * free aux structures
449 	 */
450 	for(p = firstr->prog; p != P; p = p->link){
451 		while(p->link && p->link->as == ANOP)
452 			p->link = p->link->link;
453 	}
454 	if(r1 != R) {
455 		r1->link = freer;
456 		freer = firstr;
457 	}
458 }
459 
460 void
461 addsplits(void)
462 {
463 	Reg *r, *r1;
464 	int z, i;
465 	Bits bit;
466 
467 	for(r = firstr; r != R; r = r->link) {
468 		if(r->loop > 1)
469 			continue;
470 		if(r->prog->as == ABL)
471 			continue;
472 		for(r1 = r->p2; r1 != R; r1 = r1->p2link) {
473 			if(r1->loop <= 1)
474 				continue;
475 			for(z=0; z<BITS; z++)
476 				bit.b[z] = r1->calbehind.b[z] &
477 					(r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
478 					~(r->calahead.b[z] & addrs.b[z]);
479 			while(bany(&bit)) {
480 				i = bnum(bit);
481 				bit.b[i/32] &= ~(1L << (i%32));
482 			}
483 		}
484 	}
485 }
486 
487 /*
488  * add mov b,rn
489  * just after r
490  */
491 void
492 addmove(Reg *r, int bn, int rn, int f)
493 {
494 	Prog *p, *p1;
495 	Adr *a;
496 	Var *v;
497 
498 	p1 = alloc(sizeof(*p1));
499 	*p1 = zprog;
500 	p = r->prog;
501 
502 	p1->link = p->link;
503 	p->link = p1;
504 	p1->lineno = p->lineno;
505 
506 	v = var + bn;
507 
508 	a = &p1->to;
509 	a->sym = v->sym;
510 	a->name = v->name;
511 	a->offset = v->offset;
512 	a->etype = v->etype;
513 	a->type = D_OREG;
514 	if(a->etype == TARRAY || a->sym == S)
515 		a->type = D_CONST;
516 
517 	p1->as = AMOVW;
518 	if(v->etype == TCHAR || v->etype == TUCHAR)
519 		p1->as = AMOVB;
520 	if(v->etype == TSHORT || v->etype == TUSHORT)
521 		p1->as = AMOVH;
522 	if(v->etype == TFLOAT)
523 		p1->as = AMOVF;
524 	if(v->etype == TDOUBLE)
525 		p1->as = AMOVD;
526 
527 	p1->from.type = D_REG;
528 	p1->from.reg = rn;
529 	if(rn >= NREG) {
530 		p1->from.type = D_FREG;
531 		p1->from.reg = rn-NREG;
532 	}
533 	if(!f) {
534 		p1->from = *a;
535 		*a = zprog.from;
536 		a->type = D_REG;
537 		a->reg = rn;
538 		if(rn >= NREG) {
539 			a->type = D_FREG;
540 			a->reg = rn-NREG;
541 		}
542 		if(v->etype == TUCHAR)
543 			p1->as = AMOVBU;
544 		if(v->etype == TUSHORT)
545 			p1->as = AMOVHU;
546 	}
547 	if(debug['R'])
548 		print("%P\t.a%P\n", p, p1);
549 }
550 
551 Bits
552 mkvar(Adr *a, int docon)
553 {
554 	Var *v;
555 	int i, t, n, et, z;
556 	long o;
557 	Bits bit;
558 	Sym *s;
559 
560 	t = a->type;
561 	if(t == D_REG && a->reg != NREG)
562 		regbits |= RtoB(a->reg);
563 	if(t == D_FREG && a->reg != NREG)
564 		regbits |= FtoB(a->reg);
565 	s = a->sym;
566 	o = a->offset;
567 	et = a->etype;
568 	if(s == S) {
569 		if(t != D_CONST || !docon || a->reg != NREG)
570 			goto none;
571 		et = TLONG;
572 	}
573 	if(t == D_CONST) {
574 		if(s == S && sval(o))
575 			goto none;
576 	}
577 
578 	n = a->name;
579 	v = var;
580 	for(i=0; i<nvar; i++) {
581 		if(s == v->sym)
582 		if(n == v->name)
583 		if(o == v->offset)
584 			goto out;
585 		v++;
586 	}
587 	if(s)
588 		if(s->name[0] == '.')
589 			goto none;
590 	if(nvar >= NVAR) {
591 		if(debug['w'] > 1 && s)
592 			warn(Z, "variable not optimized: %s", s->name);
593 		goto none;
594 	}
595 	i = nvar;
596 	nvar++;
597 	v = &var[i];
598 	v->sym = s;
599 	v->offset = o;
600 	v->etype = et;
601 	v->name = n;
602 	if(debug['R'])
603 		print("bit=%2d et=%2d %D\n", i, et, a);
604 out:
605 	bit = blsh(i);
606 	if(n == D_EXTERN || n == D_STATIC)
607 		for(z=0; z<BITS; z++)
608 			externs.b[z] |= bit.b[z];
609 	if(n == D_PARAM)
610 		for(z=0; z<BITS; z++)
611 			params.b[z] |= bit.b[z];
612 	if(v->etype != et || !typechlpfd[et])	/* funny punning */
613 		for(z=0; z<BITS; z++)
614 			addrs.b[z] |= bit.b[z];
615 	if(t == D_CONST) {
616 		if(s == S) {
617 			for(z=0; z<BITS; z++)
618 				consts.b[z] |= bit.b[z];
619 			return bit;
620 		}
621 		if(et != TARRAY)
622 			for(z=0; z<BITS; z++)
623 				addrs.b[z] |= bit.b[z];
624 		for(z=0; z<BITS; z++)
625 			params.b[z] |= bit.b[z];
626 		return bit;
627 	}
628 	if(t == D_OREG)
629 		return bit;
630 
631 none:
632 	return zbits;
633 }
634 
635 void
636 prop(Reg *r, Bits ref, Bits cal)
637 {
638 	Reg *r1, *r2;
639 	int z;
640 
641 	for(r1 = r; r1 != R; r1 = r1->p1) {
642 		for(z=0; z<BITS; z++) {
643 			ref.b[z] |= r1->refahead.b[z];
644 			if(ref.b[z] != r1->refahead.b[z]) {
645 				r1->refahead.b[z] = ref.b[z];
646 				change++;
647 			}
648 			cal.b[z] |= r1->calahead.b[z];
649 			if(cal.b[z] != r1->calahead.b[z]) {
650 				r1->calahead.b[z] = cal.b[z];
651 				change++;
652 			}
653 		}
654 		switch(r1->prog->as) {
655 		case ABL:
656 			for(z=0; z<BITS; z++) {
657 				cal.b[z] |= ref.b[z] | externs.b[z];
658 				ref.b[z] = 0;
659 			}
660 			break;
661 
662 		case ATEXT:
663 			for(z=0; z<BITS; z++) {
664 				cal.b[z] = 0;
665 				ref.b[z] = 0;
666 			}
667 			break;
668 
669 		case ARET:
670 			for(z=0; z<BITS; z++) {
671 				cal.b[z] = externs.b[z];
672 				ref.b[z] = 0;
673 			}
674 		}
675 		for(z=0; z<BITS; z++) {
676 			ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
677 				r1->use1.b[z] | r1->use2.b[z];
678 			cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
679 			r1->refbehind.b[z] = ref.b[z];
680 			r1->calbehind.b[z] = cal.b[z];
681 		}
682 		if(r1->active)
683 			break;
684 		r1->active = 1;
685 	}
686 	for(; r != r1; r = r->p1)
687 		for(r2 = r->p2; r2 != R; r2 = r2->p2link)
688 			prop(r2, r->refbehind, r->calbehind);
689 }
690 
691 /*
692  * find looping structure
693  *
694  * 1) find reverse postordering
695  * 2) find approximate dominators,
696  *	the actual dominators if the flow graph is reducible
697  *	otherwise, dominators plus some other non-dominators.
698  *	See Matthew S. Hecht and Jeffrey D. Ullman,
699  *	"Analysis of a Simple Algorithm for Global Data Flow Problems",
700  *	Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
701  *	Oct. 1-3, 1973, pp.  207-217.
702  * 3) find all nodes with a predecessor dominated by the current node.
703  *	such a node is a loop head.
704  *	recursively, all preds with a greater rpo number are in the loop
705  */
706 long
707 postorder(Reg *r, Reg **rpo2r, long n)
708 {
709 	Reg *r1;
710 
711 	r->rpo = 1;
712 	r1 = r->s1;
713 	if(r1 && !r1->rpo)
714 		n = postorder(r1, rpo2r, n);
715 	r1 = r->s2;
716 	if(r1 && !r1->rpo)
717 		n = postorder(r1, rpo2r, n);
718 	rpo2r[n] = r;
719 	n++;
720 	return n;
721 }
722 
723 long
724 rpolca(long *idom, long rpo1, long rpo2)
725 {
726 	long t;
727 
728 	if(rpo1 == -1)
729 		return rpo2;
730 	while(rpo1 != rpo2){
731 		if(rpo1 > rpo2){
732 			t = rpo2;
733 			rpo2 = rpo1;
734 			rpo1 = t;
735 		}
736 		while(rpo1 < rpo2){
737 			t = idom[rpo2];
738 			if(t >= rpo2)
739 				fatal(Z, "bad idom");
740 			rpo2 = t;
741 		}
742 	}
743 	return rpo1;
744 }
745 
746 int
747 doms(long *idom, long r, long s)
748 {
749 	while(s > r)
750 		s = idom[s];
751 	return s == r;
752 }
753 
754 int
755 loophead(long *idom, Reg *r)
756 {
757 	long src;
758 
759 	src = r->rpo;
760 	if(r->p1 != R && doms(idom, src, r->p1->rpo))
761 		return 1;
762 	for(r = r->p2; r != R; r = r->p2link)
763 		if(doms(idom, src, r->rpo))
764 			return 1;
765 	return 0;
766 }
767 
768 void
769 loopmark(Reg **rpo2r, long head, Reg *r)
770 {
771 	if(r->rpo < head || r->active == head)
772 		return;
773 	r->active = head;
774 	r->loop += LOOP;
775 	if(r->p1 != R)
776 		loopmark(rpo2r, head, r->p1);
777 	for(r = r->p2; r != R; r = r->p2link)
778 		loopmark(rpo2r, head, r);
779 }
780 
781 void
782 loopit(Reg *r, long nr)
783 {
784 	Reg *r1;
785 	long i, d, me;
786 
787 	if(nr > maxnr) {
788 		rpo2r = alloc(nr * sizeof(Reg*));
789 		idom = alloc(nr * sizeof(long));
790 		maxnr = nr;
791 	}
792 
793 	d = postorder(r, rpo2r, 0);
794 	if(d > nr)
795 		fatal(Z, "too many reg nodes");
796 	nr = d;
797 	for(i = 0; i < nr / 2; i++){
798 		r1 = rpo2r[i];
799 		rpo2r[i] = rpo2r[nr - 1 - i];
800 		rpo2r[nr - 1 - i] = r1;
801 	}
802 	for(i = 0; i < nr; i++)
803 		rpo2r[i]->rpo = i;
804 
805 	idom[0] = 0;
806 	for(i = 0; i < nr; i++){
807 		r1 = rpo2r[i];
808 		me = r1->rpo;
809 		d = -1;
810 		if(r1->p1 != R && r1->p1->rpo < me)
811 			d = r1->p1->rpo;
812 		for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
813 			if(r1->rpo < me)
814 				d = rpolca(idom, d, r1->rpo);
815 		idom[i] = d;
816 	}
817 
818 	for(i = 0; i < nr; i++){
819 		r1 = rpo2r[i];
820 		r1->loop++;
821 		if(r1->p2 != R && loophead(idom, r1))
822 			loopmark(rpo2r, i, r1);
823 	}
824 }
825 
826 void
827 synch(Reg *r, Bits dif)
828 {
829 	Reg *r1;
830 	int z;
831 
832 	for(r1 = r; r1 != R; r1 = r1->s1) {
833 		for(z=0; z<BITS; z++) {
834 			dif.b[z] = (dif.b[z] &
835 				~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
836 					r1->set.b[z] | r1->regdiff.b[z];
837 			if(dif.b[z] != r1->regdiff.b[z]) {
838 				r1->regdiff.b[z] = dif.b[z];
839 				change++;
840 			}
841 		}
842 		if(r1->active)
843 			break;
844 		r1->active = 1;
845 		for(z=0; z<BITS; z++)
846 			dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
847 		if(r1->s2 != R)
848 			synch(r1->s2, dif);
849 	}
850 }
851 
852 ulong
853 allreg(ulong b, Rgn *r)
854 {
855 	Var *v;
856 	int i;
857 
858 	v = var + r->varno;
859 	r->regno = 0;
860 	switch(v->etype) {
861 
862 	default:
863 		diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
864 		break;
865 
866 	case TCHAR:
867 	case TUCHAR:
868 	case TSHORT:
869 	case TUSHORT:
870 	case TINT:
871 	case TUINT:
872 	case TLONG:
873 	case TULONG:
874 	case TIND:
875 	case TARRAY:
876 		i = BtoR(~b);
877 		if(i && r->cost >= 0) {
878 			r->regno = i;
879 			return RtoB(i);
880 		}
881 		break;
882 
883 	case TVLONG:
884 	case TDOUBLE:
885 	case TFLOAT:
886 		i = BtoF(~b);
887 		if(i && r->cost >= 0) {
888 			r->regno = i+NREG;
889 			return FtoB(i);
890 		}
891 		break;
892 	}
893 	return 0;
894 }
895 
896 void
897 paint1(Reg *r, int bn)
898 {
899 	Reg *r1;
900 	Prog *p;
901 	int z;
902 	ulong bb;
903 
904 	z = bn/32;
905 	bb = 1L<<(bn%32);
906 	if(r->act.b[z] & bb)
907 		return;
908 	for(;;) {
909 		if(!(r->refbehind.b[z] & bb))
910 			break;
911 		r1 = r->p1;
912 		if(r1 == R)
913 			break;
914 		if(!(r1->refahead.b[z] & bb))
915 			break;
916 		if(r1->act.b[z] & bb)
917 			break;
918 		r = r1;
919 	}
920 
921 	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
922 		change -= CLOAD * r->loop;
923 		if(debug['R'] && debug['v'])
924 			print("%ld%P\tld %B $%d\n", r->loop,
925 				r->prog, blsh(bn), change);
926 	}
927 	for(;;) {
928 		r->act.b[z] |= bb;
929 		p = r->prog;
930 
931 		if(r->use1.b[z] & bb) {
932 			change += CREF * r->loop;
933 			if(debug['R'] && debug['v'])
934 				print("%ld%P\tu1 %B $%d\n", r->loop,
935 					p, blsh(bn), change);
936 		}
937 
938 		if((r->use2.b[z]|r->set.b[z]) & bb) {
939 			change += CREF * r->loop;
940 			if(debug['R'] && debug['v'])
941 				print("%ld%P\tu2 %B $%d\n", r->loop,
942 					p, blsh(bn), change);
943 		}
944 
945 		if(STORE(r) & r->regdiff.b[z] & bb) {
946 			change -= CLOAD * r->loop;
947 			if(debug['R'] && debug['v'])
948 				print("%ld%P\tst %B $%d\n", r->loop,
949 					p, blsh(bn), change);
950 		}
951 
952 		if(r->refbehind.b[z] & bb)
953 			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
954 				if(r1->refahead.b[z] & bb)
955 					paint1(r1, bn);
956 
957 		if(!(r->refahead.b[z] & bb))
958 			break;
959 		r1 = r->s2;
960 		if(r1 != R)
961 			if(r1->refbehind.b[z] & bb)
962 				paint1(r1, bn);
963 		r = r->s1;
964 		if(r == R)
965 			break;
966 		if(r->act.b[z] & bb)
967 			break;
968 		if(!(r->refbehind.b[z] & bb))
969 			break;
970 	}
971 }
972 
973 ulong
974 paint2(Reg *r, int bn)
975 {
976 	Reg *r1;
977 	int z;
978 	ulong bb, vreg;
979 
980 	z = bn/32;
981 	bb = 1L << (bn%32);
982 	vreg = regbits;
983 	if(!(r->act.b[z] & bb))
984 		return vreg;
985 	for(;;) {
986 		if(!(r->refbehind.b[z] & bb))
987 			break;
988 		r1 = r->p1;
989 		if(r1 == R)
990 			break;
991 		if(!(r1->refahead.b[z] & bb))
992 			break;
993 		if(!(r1->act.b[z] & bb))
994 			break;
995 		r = r1;
996 	}
997 	for(;;) {
998 		r->act.b[z] &= ~bb;
999 
1000 		vreg |= r->regu;
1001 
1002 		if(r->refbehind.b[z] & bb)
1003 			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1004 				if(r1->refahead.b[z] & bb)
1005 					vreg |= paint2(r1, bn);
1006 
1007 		if(!(r->refahead.b[z] & bb))
1008 			break;
1009 		r1 = r->s2;
1010 		if(r1 != R)
1011 			if(r1->refbehind.b[z] & bb)
1012 				vreg |= paint2(r1, bn);
1013 		r = r->s1;
1014 		if(r == R)
1015 			break;
1016 		if(!(r->act.b[z] & bb))
1017 			break;
1018 		if(!(r->refbehind.b[z] & bb))
1019 			break;
1020 	}
1021 	return vreg;
1022 }
1023 
1024 void
1025 paint3(Reg *r, int bn, long rb, int rn)
1026 {
1027 	Reg *r1;
1028 	Prog *p;
1029 	int z;
1030 	ulong bb;
1031 
1032 	z = bn/32;
1033 	bb = 1L << (bn%32);
1034 	if(r->act.b[z] & bb)
1035 		return;
1036 	for(;;) {
1037 		if(!(r->refbehind.b[z] & bb))
1038 			break;
1039 		r1 = r->p1;
1040 		if(r1 == R)
1041 			break;
1042 		if(!(r1->refahead.b[z] & bb))
1043 			break;
1044 		if(r1->act.b[z] & bb)
1045 			break;
1046 		r = r1;
1047 	}
1048 
1049 	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1050 		addmove(r, bn, rn, 0);
1051 	for(;;) {
1052 		r->act.b[z] |= bb;
1053 		p = r->prog;
1054 
1055 		if(r->use1.b[z] & bb) {
1056 			if(debug['R'])
1057 				print("%P", p);
1058 			addreg(&p->from, rn);
1059 			if(debug['R'])
1060 				print("\t.c%P\n", p);
1061 		}
1062 		if((r->use2.b[z]|r->set.b[z]) & bb) {
1063 			if(debug['R'])
1064 				print("%P", p);
1065 			addreg(&p->to, rn);
1066 			if(debug['R'])
1067 				print("\t.c%P\n", p);
1068 		}
1069 
1070 		if(STORE(r) & r->regdiff.b[z] & bb)
1071 			addmove(r, bn, rn, 1);
1072 		r->regu |= rb;
1073 
1074 		if(r->refbehind.b[z] & bb)
1075 			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1076 				if(r1->refahead.b[z] & bb)
1077 					paint3(r1, bn, rb, rn);
1078 
1079 		if(!(r->refahead.b[z] & bb))
1080 			break;
1081 		r1 = r->s2;
1082 		if(r1 != R)
1083 			if(r1->refbehind.b[z] & bb)
1084 				paint3(r1, bn, rb, rn);
1085 		r = r->s1;
1086 		if(r == R)
1087 			break;
1088 		if(r->act.b[z] & bb)
1089 			break;
1090 		if(!(r->refbehind.b[z] & bb))
1091 			break;
1092 	}
1093 }
1094 
1095 void
1096 addreg(Adr *a, int rn)
1097 {
1098 
1099 	a->sym = 0;
1100 	a->name = D_NONE;
1101 	a->type = D_REG;
1102 	a->reg = rn;
1103 	if(rn >= NREG) {
1104 		a->type = D_FREG;
1105 		a->reg = rn-NREG;
1106 	}
1107 }
1108 
1109 /*
1110  *	bit	reg
1111  *	0	R0
1112  *	1	R1
1113  *	...	...
1114  *	10	R10
1115  */
1116 long
1117 RtoB(int r)
1118 {
1119 
1120 	if(r >= REGMIN && r <= REGMAX)
1121 		return 1L << r;
1122 	return 0;
1123 }
1124 
1125 int
1126 BtoR(long b)
1127 {
1128 	b &= 0x01fcL;
1129 	if(b == 0)
1130 		return 0;
1131 	return bitno(b);
1132 }
1133 
1134 /*
1135  *	bit	reg
1136  *	18	F2
1137  *	19	F3
1138  *	...	...
1139  *	23	F7
1140  */
1141 long
1142 FtoB(int f)
1143 {
1144 
1145 	if(f < 2 || f > NFREG-1)
1146 		return 0;
1147 	return 1L << (f + 16);
1148 }
1149 
1150 int
1151 BtoF(long b)
1152 {
1153 
1154 	b &= 0xfc0000L;
1155 	if(b == 0)
1156 		return 0;
1157 	return bitno(b) - 16;
1158 }
1159