xref: /plan9-contrib/sys/src/cmd/6c/reg.c (revision 219b2ee8daee37f4aad58d63f21287faa8e4ffdc)
1 #include "gc.h"
2 
3 #define	COL1	40
4 
5 Reg*
6 rega(void)
7 {
8 	Reg *r;
9 
10 	r = freer;
11 	if(r == R) {
12 		ALLOC(r, Reg);
13 	} else
14 		freer = r->link;
15 
16 	*r = zreg;
17 	return r;
18 }
19 
20 int
21 rcmp(void *a1, void *a2)
22 {
23 	Rgn *p1, *p2;
24 	int c1, c2;
25 
26 	p1 = a1;
27 	p2 = a2;
28 	c1 = p2->cost;
29 	c2 = p1->cost;
30 	if(c1 -= c2)
31 		return c1;
32 	return p2->varno - p1->varno;
33 }
34 
35 void
36 regopt(Prog *p)
37 {
38 	Reg *r, *r1, *r2;
39 	Prog *p1;
40 	int i, z;
41 	long initpc, val;
42 	ulong vreg;
43 	Bits bit;
44 	struct
45 	{
46 		long	m;
47 		long	c;
48 		Reg*	p;
49 	} log5[6], *lp;
50 
51 	firstr = R;
52 	lastr = R;
53 	nvar = 0;
54 	regbits = RtoB(REGSP) |
55 		RtoB(REGSB) |
56 		RtoB(REGZERO) |
57 		RtoB(REGRET) |
58 		RtoB(REGTMP) |
59 		RtoB(REGLINK) |
60 		RtoB(REGBAD1) |
61 		RtoB(REGBAD2) |
62 		RtoB(REGBAD3) |
63 		RtoB(REGBAD4) |
64 		0;
65 	for(z=0; z<BITS; z++) {
66 		externs.b[z] = 0;
67 		params.b[z] = 0;
68 		consts.b[z] = 0;
69 		addrs.b[z] = 0;
70 	}
71 
72 	/*
73 	 * pass 1
74 	 * build aux data structure
75 	 * allocate pcs
76 	 * find use and set of variables
77 	 */
78 	val = 5L * 5L * 5L * 5L * 5L;
79 	lp = log5;
80 	for(i=0; i<5; i++) {
81 		lp->m = val;
82 		lp->c = 0;
83 		lp->p = R;
84 		val /= 5L;
85 		lp++;
86 	}
87 	val = 0;
88 	for(; p != P; p = p->link) {
89 		switch(p->as) {
90 		case ADATA:
91 		case AGLOBL:
92 		case ANAME:
93 			continue;
94 		}
95 
96 		r = rega();
97 		if(firstr == R) {
98 			firstr = r;
99 			lastr = r;
100 		} else {
101 			lastr->link = r;
102 			r->p1 = lastr;
103 			lastr->s1 = r;
104 			lastr = r;
105 		}
106 		r->prog = p;
107 		r->pc = val;
108 		val++;
109 
110 		lp = log5;
111 		for(i=0; i<5; i++) {
112 			lp->c--;
113 			if(lp->c <= 0) {
114 				lp->c = lp->m;
115 				if(lp->p != R)
116 					lp->p->log5 = r;
117 				lp->p = r;
118 				(lp+1)->c = 0;
119 				break;
120 			}
121 			lp++;
122 		}
123 
124 		r1 = r->p1;
125 		if(r1 != R)
126 		switch(r1->prog->as) {
127 		case ARTS:
128 		case AB:
129 			r->p1 = R;
130 			r1->s1 = R;
131 		}
132 
133 		bit = mkvar(r, &p->from);
134 		if(bany(&bit))
135 		switch(p->as) {
136 		/*
137 		 * left side read
138 		 */
139 		default:
140 print("default -- lhs read %P\n", p);
141 		case AMOV:
142 		case AMOVIB:
143 		case AMOVOB:
144 		case AMOVIS:
145 		case AMOVOS:
146 		case ATEXT:
147 			for(z=0; z<BITS; z++)
148 				r->use1.b[z] |= bit.b[z];
149 			break;
150 
151 		/*
152 		 * funny
153 		 */
154 		case AMOVA:
155 			for(z=0; z<BITS; z++)
156 				addrs.b[z] |= bit.b[z];
157 			break;
158 		}
159 
160 		bit = mkvar(r, &p->to);
161 		if(bany(&bit))
162 		switch(p->as) {
163 		default:
164 			diag(Z, "reg: unknown op: %P", p);
165 			break;
166 
167 		/*
168 		 * right side read
169 		 */
170 		case ACMPI:
171 		case ACMPO:
172 			for(z=0; z<BITS; z++)
173 				r->use2.b[z] |= bit.b[z];
174 			break;
175 
176 		/*
177 		 * right side write
178 		 */
179 		case AMOV:
180 		case AMOVIB:
181 		case AMOVOB:
182 		case AMOVIS:
183 		case AMOVOS:
184 			for(z=0; z<BITS; z++)
185 				r->set.b[z] |= bit.b[z];
186 			break;
187 
188 		/*
189 		 * right side read+write
190 		 */
191 		case AADDI:
192 		case AADDO:
193 		case AAND:
194 		case ASUBI:
195 		case ASUBO:
196 		case AOR:
197 		case AXOR:
198 			for(z=0; z<BITS; z++) {
199 				r->set.b[z] |= bit.b[z];
200 				r->use2.b[z] |= bit.b[z];
201 			}
202 			break;
203 
204 		/*
205 		 * funny
206 		 */
207 		case ABAL:
208 			for(z=0; z<BITS; z++)
209 				addrs.b[z] |= bit.b[z];
210 			break;
211 		}
212 	}
213 	if(firstr == R)
214 		return;
215 	initpc = pc - val;
216 
217 	/*
218 	 * pass 2
219 	 * turn branch references to pointers
220 	 * build back pointers
221 	 */
222 	for(r = firstr; r != R; r = r->link) {
223 		p = r->prog;
224 		if(p->to.type == D_BRANCH) {
225 			val = p->to.offset - initpc;
226 			r1 = firstr;
227 			while(r1 != R) {
228 				r2 = r1->log5;
229 				if(r2 != R && val >= r2->pc) {
230 					r1 = r2;
231 					continue;
232 				}
233 				if(r1->pc == val)
234 					break;
235 				r1 = r1->link;
236 			}
237 			if(r1 == R) {
238 				nearln = p->lineno;
239 				diag(Z, "ref not found\n%P", p);
240 				continue;
241 			}
242 			if(r1 == r) {
243 				nearln = p->lineno;
244 				diag(Z, "ref to self\n%P", p);
245 				continue;
246 			}
247 			r->s2 = r1;
248 			r->p2link = r1->p2;
249 			r1->p2 = r;
250 		}
251 	}
252 	if(debug['R']) {
253 		p = firstr->prog;
254 		print("\n%L %D\n", p->lineno, &p->from);
255 	}
256 
257 	/*
258 	 * pass 2.5
259 	 * find looping structure
260 	 */
261 	for(r = firstr; r != R; r = r->link)
262 		r->active = 0;
263 	change = 0;
264 	loopit(firstr);
265 	if(debug['R'] && debug['v']) {
266 		print("\nlooping structure:\n");
267 		for(r = firstr; r != R; r = r->link) {
268 			print("%d:%P", r->loop, r->prog);
269 			for(z=0; z<BITS; z++)
270 				bit.b[z] = r->use1.b[z] |
271 					   r->use2.b[z] |
272 					   r->set.b[z];
273 			if(bany(&bit)) {
274 				print("%|", COL1);
275 				if(bany(&r->use1))
276 					print(" u1=%B", r->use1);
277 				if(bany(&r->use2))
278 					print(" u2=%B", r->use2);
279 				if(bany(&r->set))
280 					print(" st=%B", r->set);
281 			}
282 			print("\n");
283 		}
284 	}
285 
286 	/*
287 	 * pass 3
288 	 * iterate propagating usage
289 	 * 	back until flow graph is complete
290 	 */
291 loop1:
292 	change = 0;
293 	for(r = firstr; r != R; r = r->link)
294 		r->active = 0;
295 	for(r = firstr; r != R; r = r->link)
296 		if(r->prog->as == ARTS)
297 			prop(r, zbits, zbits);
298 loop11:
299 	/* pick up unreachable code */
300 	i = 0;
301 	for(r = firstr; r != R; r = r1) {
302 		r1 = r->link;
303 		if(r1 && r1->active && !r->active) {
304 			prop(r, zbits, zbits);
305 			i = 1;
306 		}
307 	}
308 	if(i)
309 		goto loop11;
310 	if(change)
311 		goto loop1;
312 
313 
314 	/*
315 	 * pass 4
316 	 * iterate propagating register/variable synchrony
317 	 * 	forward until graph is complete
318 	 */
319 loop2:
320 	change = 0;
321 	for(r = firstr; r != R; r = r->link)
322 		r->active = 0;
323 	synch(firstr, zbits);
324 	if(change)
325 		goto loop2;
326 
327 
328 	/*
329 	 * pass 5
330 	 * isolate regions
331 	 * calculate costs (paint1)
332 	 */
333 	r = firstr;
334 	if(r) {
335 		for(z=0; z<BITS; z++)
336 			bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
337 			  ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
338 		if(bany(&bit)) {
339 			nearln = r->prog->lineno;
340 			warn(Z, "used and not set: %B", bit);
341 			if(debug['R'] && !debug['w'])
342 				print("used and not set: %B\n", bit);
343 		}
344 	}
345 	if(debug['R'] && debug['v'])
346 		print("\nprop structure:\n");
347 	for(r = firstr; r != R; r = r->link)
348 		r->act = zbits;
349 	rgp = region;
350 	nregion = 0;
351 	for(r = firstr; r != R; r = r->link) {
352 		if(debug['R'] && debug['v']) {
353 			print("%P%|", r->prog, COL1);
354 			if(bany(&r->set))
355 				print("s:%B ", r->set);
356 			if(bany(&r->refahead))
357 				print("ra:%B ", r->refahead);
358 			if(bany(&r->calahead))
359 				print("ca:%B ", r->calahead);
360 			print("\n");
361 		}
362 		for(z=0; z<BITS; z++)
363 			bit.b[z] = r->set.b[z] &
364 			  ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
365 		if(bany(&bit)) {
366 			nearln = r->prog->lineno;
367 			warn(Z, "set and not used: %B", bit);
368 			if(debug['R'])
369 				print("set an not used: %B\n", bit);
370 			excise(r);
371 		}
372 		for(z=0; z<BITS; z++)
373 			bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
374 		while(bany(&bit)) {
375 			i = bnum(bit);
376 			rgp->enter = r;
377 			rgp->varno = i;
378 			change = 0;
379 			if(debug['R'] && debug['v'])
380 				print("\n");
381 			paint1(r, i);
382 			bit.b[i/32] &= ~(1L<<(i%32));
383 			if(change <= 0) {
384 				if(debug['R'])
385 					print("%L$%d: %B\n",
386 						r->prog->lineno, change, blsh(i));
387 				continue;
388 			}
389 			rgp->cost = change;
390 			nregion++;
391 			if(nregion >= NRGN) {
392 				warn(Z, "too many regions");
393 				goto brk;
394 			}
395 			rgp++;
396 		}
397 	}
398 brk:
399 	qsort(region, nregion, sizeof(region[0]), rcmp);
400 
401 	/*
402 	 * pass 6
403 	 * determine used registers (paint2)
404 	 * replace code (paint3)
405 	 */
406 	rgp = region;
407 	for(i=0; i<nregion; i++) {
408 		bit = blsh(rgp->varno);
409 		vreg = paint2(rgp->enter, rgp->varno);
410 		vreg = allreg(vreg, rgp);
411 		if(debug['R']) {
412 			print("%L$%d %R: %B\n",
413 				rgp->enter->prog->lineno,
414 				rgp->cost,
415 				rgp->regno,
416 				bit);
417 		}
418 		if(rgp->regno != 0)
419 			paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
420 		rgp++;
421 	}
422 	/*
423 	 * pass 7
424 	 * peep-hole on basic block
425 	 */
426 	if(!debug['R'] || debug['P'])
427 		peep();
428 
429 	/*
430 	 * pass 8
431 	 * recalculate pc
432 	 */
433 	val = initpc;
434 	for(r = firstr; r != R; r = r1) {
435 		r->pc = val;
436 		p = r->prog;
437 		p1 = P;
438 		r1 = r->link;
439 		if(r1 != R)
440 			p1 = r1->prog;
441 		for(; p != p1; p = p->link) {
442 			switch(p->as) {
443 			default:
444 				val++;
445 				break;
446 
447 			case ANOP:
448 			case ADATA:
449 			case AGLOBL:
450 			case ANAME:
451 				break;
452 			}
453 		}
454 	}
455 	pc = val;
456 
457 	/*
458 	 * fix up branches
459 	 */
460 	if(debug['R'])
461 		if(bany(&addrs))
462 			print("addrs: %B\n", addrs);
463 
464 	r1 = 0; /* set */
465 	for(r = firstr; r != R; r = r->link) {
466 		p = r->prog;
467 		if(p->to.type == D_BRANCH)
468 			p->to.offset = r->s2->pc;
469 		r1 = r;
470 	}
471 
472 	/*
473 	 * last pass
474 	 * eliminate nops
475 	 * free aux structures
476 	 */
477 	for(p = firstr->prog; p != P; p = p->link){
478 		while(p->link && p->link->as == ANOP)
479 			p->link = p->link->link;
480 	}
481 	if(r1 != R) {
482 		r1->link = freer;
483 		freer = firstr;
484 	}
485 }
486 
487 /*
488  * add mov b,rn
489  * just after r
490  */
491 void
492 addmove(Reg *r, int bn, int rn, int f)
493 {
494 	Prog *p, *p1;
495 	Adr *a;
496 	Var *v;
497 
498 	ALLOC(p1,Prog);
499 	*p1 = zprog;
500 	p = r->prog;
501 
502 	p1->link = p->link;
503 	p->link = p1;
504 	p1->lineno = p->lineno;
505 
506 	v = var + bn;
507 
508 	a = &p1->to;
509 	a->sym = v->sym;
510 	a->offset = v->offset;
511 	a->etype = v->etype;
512 	a->type = v->name;
513 
514 	switch(v->etype) {
515 	default:
516 		p1->as = AMOV;
517 		break;
518 
519 	case TCHAR:
520 		p1->as = AMOVIB;
521 		break;
522 
523 	case TUCHAR:
524 		p1->as = AMOVOB;
525 		break;
526 
527 	case TSHORT:
528 		p1->as = AMOVIS;
529 		break;
530 
531 	case TUSHORT:
532 		p1->as = AMOVOS;
533 		break;
534 	}
535 
536 	p1->from.type = rn;
537 	if(!f) {
538 		p1->from = *a;
539 		*a = zprog.from;
540 		a->type = rn;
541 	}
542 	if(debug['R'])
543 		print("%P%|.a%P\n", p, COL1, p1);
544 }
545 
546 ulong
547 doregbits(int r)
548 {
549 	ulong b;
550 
551 	b = 0;
552 	if(r >= D_INDIR)
553 		r -= D_INDIR;
554 	if(r >= D_R0 && r <= D_R0+31)
555 		b |= RtoB(r);
556 	return b;
557 }
558 
559 Bits
560 mkvar(Reg *r, Adr *a)
561 {
562 	Var *v;
563 	int i, t, n, et, z;
564 	long o;
565 	Bits bit;
566 	Sym *s;
567 
568 	/*
569 	 * mark registers used
570 	 */
571 	t = a->type;
572 	r->regu |= doregbits(t);
573 	r->regu |= doregbits(a->index);
574 
575 	switch(t) {
576 	default:
577 		goto none;
578 	case D_ADDR:
579 		a->type = a->index;
580 		bit = mkvar(r, a);
581 		for(z=0; z<BITS; z++)
582 			addrs.b[z] |= bit.b[z];
583 		a->type = t;
584 		goto none;
585 	case D_EXTERN:
586 	case D_STATIC:
587 	case D_PARAM:
588 	case D_AUTO:
589 		n = t;
590 		break;
591 	}
592 	s = a->sym;
593 	if(s == S)
594 		goto none;
595 	if(s->name[0] == '.')
596 		goto none;
597 	et = a->etype;
598 	o = a->offset;
599 	v = var;
600 	for(i=0; i<nvar; i++) {
601 		if(s == v->sym)
602 		if(n == v->name)
603 		if(o == v->offset)
604 			goto out;
605 		v++;
606 	}
607 	if(nvar >= NVAR) {
608 		warn(Z, "variable not optimized: %s", s->name);
609 		goto none;
610 	}
611 	i = nvar;
612 	nvar++;
613 	v = &var[i];
614 	v->sym = s;
615 	v->offset = o;
616 	v->name = n;
617 	v->etype = et;
618 	if(debug['R'])
619 		print("bit=%2d et=%2d %D\n", i, et, a);
620 
621 out:
622 	bit = blsh(i);
623 	if(n == D_EXTERN || n == D_STATIC)
624 		for(z=0; z<BITS; z++)
625 			externs.b[z] |= bit.b[z];
626 	if(n == D_PARAM)
627 		for(z=0; z<BITS; z++)
628 			params.b[z] |= bit.b[z];
629 	if(v->etype != et || !typechlpfd[et])	/* funny punning */
630 		for(z=0; z<BITS; z++)
631 			addrs.b[z] |= bit.b[z];
632 	return bit;
633 
634 none:
635 	return zbits;
636 }
637 
638 void
639 prop(Reg *r, Bits ref, Bits cal)
640 {
641 	Reg *r1, *r2;
642 	int z;
643 
644 	for(r1 = r; r1 != R; r1 = r1->p1) {
645 		for(z=0; z<BITS; z++) {
646 			ref.b[z] |= r1->refahead.b[z];
647 			if(ref.b[z] != r1->refahead.b[z]) {
648 				r1->refahead.b[z] = ref.b[z];
649 				change++;
650 			}
651 			cal.b[z] |= r1->calahead.b[z];
652 			if(cal.b[z] != r1->calahead.b[z]) {
653 				r1->calahead.b[z] = cal.b[z];
654 				change++;
655 			}
656 		}
657 		switch(r1->prog->as) {
658 		case ABAL:
659 			for(z=0; z<BITS; z++) {
660 				cal.b[z] |= ref.b[z] | externs.b[z];
661 				ref.b[z] = 0;
662 			}
663 			break;
664 
665 		case ATEXT:
666 			for(z=0; z<BITS; z++) {
667 				cal.b[z] = 0;
668 				ref.b[z] = 0;
669 			}
670 			break;
671 
672 		case ARTS:
673 			for(z=0; z<BITS; z++) {
674 				cal.b[z] = externs.b[z];
675 				ref.b[z] = 0;
676 			}
677 		}
678 		for(z=0; z<BITS; z++) {
679 			ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
680 				r1->use1.b[z] | r1->use2.b[z];
681 			cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
682 			r1->refbehind.b[z] = ref.b[z];
683 			r1->calbehind.b[z] = cal.b[z];
684 		}
685 		if(r1->active)
686 			break;
687 		r1->active = 1;
688 	}
689 	for(; r != r1; r = r->p1)
690 		for(r2 = r->p2; r2 != R; r2 = r2->p2link)
691 			prop(r2, r->refbehind, r->calbehind);
692 }
693 
694 int
695 loopit(Reg *r)
696 {
697 	Reg *r1;
698 	int l, m;
699 
700 	l = 0;
701 	r->active = 1;
702 	r->loop = 0;
703 	if(r1 = r->s1)
704 	switch(r1->active) {
705 	case 0:
706 		l += loopit(r1);
707 		break;
708 	case 1:
709 		l += LOOP;
710 		r1->loop += LOOP;
711 	}
712 	if(r1 = r->s2)
713 	switch(r1->active) {
714 	case 0:
715 		l += loopit(r1);
716 		break;
717 	case 1:
718 		l += LOOP;
719 		r1->loop += LOOP;
720 	}
721 	r->active = 2;
722 	m = r->loop;
723 	r->loop = l + 1;
724 	return l - m;
725 }
726 
727 void
728 synch(Reg *r, Bits dif)
729 {
730 	Reg *r1;
731 	int z;
732 
733 	for(r1 = r; r1 != R; r1 = r1->s1) {
734 		for(z=0; z<BITS; z++) {
735 			dif.b[z] = (dif.b[z] &
736 				~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
737 					r1->set.b[z] | r1->regdiff.b[z];
738 			if(dif.b[z] != r1->regdiff.b[z]) {
739 				r1->regdiff.b[z] = dif.b[z];
740 				change++;
741 			}
742 		}
743 		if(r1->active)
744 			break;
745 		r1->active = 1;
746 		for(z=0; z<BITS; z++)
747 			dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
748 		if(r1->s2 != R)
749 			synch(r1->s2, dif);
750 	}
751 }
752 
753 ulong
754 allreg(ulong b, Rgn *r)
755 {
756 	Var *v;
757 	int i;
758 
759 	v = var + r->varno;
760 	r->regno = 0;
761 	switch(v->etype) {
762 
763 	default:
764 		diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
765 		break;
766 
767 	case TCHAR:
768 	case TUCHAR:
769 	case TSHORT:
770 	case TUSHORT:
771 	case TLONG:
772 	case TULONG:
773 	case TIND:
774 	case TARRAY:
775 		i = BtoR(~b);
776 		if(i && r->cost > 0) {
777 			r->regno = i;
778 			return RtoB(i);
779 		}
780 		break;
781 
782 	case TVLONG:
783 	case TDOUBLE:
784 	case TFLOAT:
785 		break;
786 	}
787 	return 0;
788 }
789 
790 void
791 paint1(Reg *r, int bn)
792 {
793 	Reg *r1;
794 	Prog *p;
795 	int z;
796 	ulong bb;
797 
798 	z = bn/32;
799 	bb = 1L<<(bn%32);
800 	if(r->act.b[z] & bb)
801 		return;
802 	for(;;) {
803 		if(!(r->refbehind.b[z] & bb))
804 			break;
805 		r1 = r->p1;
806 		if(r1 == R)
807 			break;
808 		if(!(r1->refahead.b[z] & bb))
809 			break;
810 		if(r1->act.b[z] & bb)
811 			break;
812 		r = r1;
813 	}
814 
815 	if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
816 		change -= CLOAD * r->loop;
817 		if(debug['R'] && debug['v'])
818 			print("%d%P%|ld %B $%d\n", r->loop,
819 				r->prog, COL1, blsh(bn), change);
820 	}
821 	for(;;) {
822 		r->act.b[z] |= bb;
823 		p = r->prog;
824 
825 		if(r->use1.b[z] & bb) {
826 			change += CREF * r->loop;
827 			if(debug['R'] && debug['v'])
828 				print("%d%P%|u1 %B $%d\n", r->loop,
829 					p, COL1, blsh(bn), change);
830 		}
831 
832 		if((r->use2.b[z]|r->set.b[z]) & bb) {
833 			change += CREF * r->loop;
834 			if(debug['R'] && debug['v'])
835 				print("%d%P%|u2 %B $%d\n", r->loop,
836 					p, COL1, blsh(bn), change);
837 		}
838 
839 		if(STORE(r) & r->regdiff.b[z] & bb) {
840 			change -= CLOAD * r->loop;
841 			if(debug['R'] && debug['v'])
842 				print("%d%P%|st %B $%d\n", r->loop,
843 					p, COL1, blsh(bn), change);
844 		}
845 
846 		if(r->refbehind.b[z] & bb)
847 			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
848 				if(r1->refahead.b[z] & bb)
849 					paint1(r1, bn);
850 
851 		if(!(r->refahead.b[z] & bb))
852 			break;
853 		r1 = r->s2;
854 		if(r1 != R)
855 			if(r1->refbehind.b[z] & bb)
856 				paint1(r1, bn);
857 		r = r->s1;
858 		if(r == R)
859 			break;
860 		if(r->act.b[z] & bb)
861 			break;
862 		if(!(r->refbehind.b[z] & bb))
863 			break;
864 	}
865 }
866 
867 ulong
868 paint2(Reg *r, int bn)
869 {
870 	Reg *r1;
871 	int z;
872 	ulong bb, vreg;
873 
874 	z = bn/32;
875 	bb = 1L << (bn%32);
876 	vreg = regbits;
877 	if(!(r->act.b[z] & bb))
878 		return vreg;
879 	for(;;) {
880 		if(!(r->refbehind.b[z] & bb))
881 			break;
882 		r1 = r->p1;
883 		if(r1 == R)
884 			break;
885 		if(!(r1->refahead.b[z] & bb))
886 			break;
887 		if(!(r1->act.b[z] & bb))
888 			break;
889 		r = r1;
890 	}
891 	for(;;) {
892 		r->act.b[z] &= ~bb;
893 
894 		vreg |= r->regu;
895 
896 		if(r->refbehind.b[z] & bb)
897 			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
898 				if(r1->refahead.b[z] & bb)
899 					vreg |= paint2(r1, bn);
900 
901 		if(!(r->refahead.b[z] & bb))
902 			break;
903 		r1 = r->s2;
904 		if(r1 != R)
905 			if(r1->refbehind.b[z] & bb)
906 				vreg |= paint2(r1, bn);
907 		r = r->s1;
908 		if(r == R)
909 			break;
910 		if(!(r->act.b[z] & bb))
911 			break;
912 		if(!(r->refbehind.b[z] & bb))
913 			break;
914 	}
915 	return vreg;
916 }
917 
918 void
919 paint3(Reg *r, int bn, long rb, int rn)
920 {
921 	Reg *r1;
922 	Prog *p;
923 	int z;
924 	ulong bb;
925 
926 	z = bn/32;
927 	bb = 1L << (bn%32);
928 	if(r->act.b[z] & bb)
929 		return;
930 	for(;;) {
931 		if(!(r->refbehind.b[z] & bb))
932 			break;
933 		r1 = r->p1;
934 		if(r1 == R)
935 			break;
936 		if(!(r1->refahead.b[z] & bb))
937 			break;
938 		if(r1->act.b[z] & bb)
939 			break;
940 		r = r1;
941 	}
942 
943 	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
944 		addmove(r, bn, rn, 0);
945 	for(;;) {
946 		r->act.b[z] |= bb;
947 		p = r->prog;
948 
949 		if(r->use1.b[z] & bb) {
950 			if(debug['R'])
951 				print("%P", p);
952 			addreg(&p->from, rn);
953 			if(debug['R'])
954 				print("%|.c%P\n", COL1, p);
955 		}
956 		if((r->use2.b[z]|r->set.b[z]) & bb) {
957 			if(debug['R'])
958 				print("%P", p);
959 			addreg(&p->to, rn);
960 			if(debug['R'])
961 				print("%|.c%P\n", COL1, p);
962 		}
963 
964 		if(STORE(r) & r->regdiff.b[z] & bb)
965 			addmove(r, bn, rn, 1);
966 		r->regu |= rb;
967 
968 		if(r->refbehind.b[z] & bb)
969 			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
970 				if(r1->refahead.b[z] & bb)
971 					paint3(r1, bn, rb, rn);
972 
973 		if(!(r->refahead.b[z] & bb))
974 			break;
975 		r1 = r->s2;
976 		if(r1 != R)
977 			if(r1->refbehind.b[z] & bb)
978 				paint3(r1, bn, rb, rn);
979 		r = r->s1;
980 		if(r == R)
981 			break;
982 		if(r->act.b[z] & bb)
983 			break;
984 		if(!(r->refbehind.b[z] & bb))
985 			break;
986 	}
987 }
988 
989 void
990 addreg(Adr *a, int rn)
991 {
992 
993 	a->sym = 0;
994 	a->offset = 0;
995 	a->type = rn;
996 }
997 
998 long
999 RtoB(int r)
1000 {
1001 
1002 	if(r >= D_R0 && r <= D_R0+31)
1003 		return 1L << r-D_R0;
1004 	return 0;
1005 }
1006 
1007 BtoR(long b)
1008 {
1009 
1010 	if(b == 0)
1011 		return 0;
1012 	return bitno(b) + D_R0;
1013 }
1014