xref: /plan9-contrib/sys/src/cmd/vc/reg.c (revision 219b2ee8daee37f4aad58d63f21287faa8e4ffdc)
1 #include "gc.h"
2 
3 #define	COL1	32
4 
5 void	addsplits(void);
6 
7 Reg*
8 rega(void)
9 {
10 	Reg *r;
11 
12 	r = freer;
13 	if(r == R) {
14 		ALLOC(r, Reg);
15 	} else
16 		freer = r->link;
17 
18 	*r = zreg;
19 	return r;
20 }
21 
22 int
23 rcmp(void *a1, void *a2)
24 {
25 	Rgn *p1, *p2;
26 	int c1, c2;
27 
28 	p1 = a1;
29 	p2 = a2;
30 	c1 = p2->cost;
31 	c2 = p1->cost;
32 	if(c1 -= c2)
33 		return c1;
34 	return p2->varno - p1->varno;
35 }
36 
37 void
38 regopt(Prog *p)
39 {
40 	Reg *r, *r1, *r2;
41 	Prog *p1;
42 	int i, z;
43 	long initpc, val;
44 	ulong vreg;
45 	Bits bit;
46 	struct
47 	{
48 		long	m;
49 		long	c;
50 		Reg*	p;
51 	} log5[6], *lp;
52 
53 	firstr = R;
54 	lastr = R;
55 	nvar = 0;
56 	regbits = 0;
57 	for(z=0; z<BITS; z++) {
58 		externs.b[z] = 0;
59 		params.b[z] = 0;
60 		consts.b[z] = 0;
61 		addrs.b[z] = 0;
62 	}
63 
64 	/*
65 	 * pass 1
66 	 * build aux data structure
67 	 * allocate pcs
68 	 * find use and set of variables
69 	 */
70 	val = 5L * 5L * 5L * 5L * 5L;
71 	lp = log5;
72 	for(i=0; i<5; i++) {
73 		lp->m = val;
74 		lp->c = 0;
75 		lp->p = R;
76 		val /= 5L;
77 		lp++;
78 	}
79 	val = 0;
80 	for(; p != P; p = p->link) {
81 		switch(p->as) {
82 		case ADATA:
83 		case AGLOBL:
84 		case ANAME:
85 			continue;
86 		}
87 		r = rega();
88 		if(firstr == R) {
89 			firstr = r;
90 			lastr = r;
91 		} else {
92 			lastr->link = r;
93 			r->p1 = lastr;
94 			lastr->s1 = r;
95 			lastr = r;
96 		}
97 		r->prog = p;
98 		r->pc = val;
99 		val++;
100 
101 		lp = log5;
102 		for(i=0; i<5; i++) {
103 			lp->c--;
104 			if(lp->c <= 0) {
105 				lp->c = lp->m;
106 				if(lp->p != R)
107 					lp->p->log5 = r;
108 				lp->p = r;
109 				(lp+1)->c = 0;
110 				break;
111 			}
112 			lp++;
113 		}
114 
115 		r1 = r->p1;
116 		if(r1 != R)
117 		switch(r1->prog->as) {
118 		case ARET:
119 		case AJMP:
120 		case ARFE:
121 			r->p1 = R;
122 			r1->s1 = R;
123 		}
124 
125 		/*
126 		 * left side always read
127 		 */
128 		bit = mkvar(&p->from, p->as==AMOVW);
129 		for(z=0; z<BITS; z++)
130 			r->use1.b[z] |= bit.b[z];
131 
132 		/*
133 		 * right side depends on opcode
134 		 */
135 		bit = mkvar(&p->to, 0);
136 		if(bany(&bit))
137 		switch(p->as) {
138 		default:
139 			diag(Z, "reg: unknown asop: %A", p->as);
140 			break;
141 
142 		/*
143 		 * right side write
144 		 */
145 		case ANOP:
146 		case AMOVB:
147 		case AMOVBU:
148 		case AMOVH:
149 		case AMOVHU:
150 		case AMOVW:
151 		case AMOVF:
152 		case AMOVD:
153 			for(z=0; z<BITS; z++)
154 				r->set.b[z] |= bit.b[z];
155 			break;
156 
157 		/*
158 		 * funny
159 		 */
160 		case AJAL:
161 			for(z=0; z<BITS; z++)
162 				addrs.b[z] |= bit.b[z];
163 			break;
164 		}
165 	}
166 	if(firstr == R)
167 		return;
168 	initpc = pc - val;
169 
170 	/*
171 	 * pass 2
172 	 * turn branch references to pointers
173 	 * build back pointers
174 	 */
175 	for(r = firstr; r != R; r = r->link) {
176 		p = r->prog;
177 		if(p->to.type == D_BRANCH) {
178 			val = p->to.offset - initpc;
179 			r1 = firstr;
180 			while(r1 != R) {
181 				r2 = r1->log5;
182 				if(r2 != R && val >= r2->pc) {
183 					r1 = r2;
184 					continue;
185 				}
186 				if(r1->pc == val)
187 					break;
188 				r1 = r1->link;
189 			}
190 			if(r1 == R) {
191 				nearln = p->lineno;
192 				diag(Z, "ref not found\n%P", p);
193 				continue;
194 			}
195 			if(r1 == r) {
196 				nearln = p->lineno;
197 				diag(Z, "ref to self\n%P", p);
198 				continue;
199 			}
200 			r->s2 = r1;
201 			r->p2link = r1->p2;
202 			r1->p2 = r;
203 		}
204 	}
205 	if(debug['R']) {
206 		p = firstr->prog;
207 		print("\n%L %D\n", p->lineno, &p->from);
208 	}
209 
210 	/*
211 	 * pass 2.5
212 	 * find looping structure
213 	 */
214 	for(r = firstr; r != R; r = r->link)
215 		r->active = 0;
216 	change = 0;
217 	loopit(firstr);
218 
219 	/*
220 	 * pass 3
221 	 * iterate propagating usage
222 	 * 	back until flow graph is complete
223 	 */
224 loop1:
225 	change = 0;
226 	for(r = firstr; r != R; r = r->link)
227 		r->active = 0;
228 	for(r = firstr; r != R; r = r->link)
229 		if(r->prog->as == ARET)
230 			prop(r, zbits, zbits);
231 loop11:
232 	/* pick up unreachable code */
233 	i = 0;
234 	for(r = firstr; r != R; r = r1) {
235 		r1 = r->link;
236 		if(r1 && r1->active && !r->active) {
237 			prop(r, zbits, zbits);
238 			i = 1;
239 		}
240 	}
241 	if(i)
242 		goto loop11;
243 	if(change)
244 		goto loop1;
245 
246 
247 	/*
248 	 * pass 4
249 	 * iterate propagating register/variable synchrony
250 	 * 	forward until graph is complete
251 	 */
252 loop2:
253 	change = 0;
254 	for(r = firstr; r != R; r = r->link)
255 		r->active = 0;
256 	synch(firstr, zbits);
257 	if(change)
258 		goto loop2;
259 
260 	addsplits();
261 
262 	if(debug['R'] && debug['v']) {
263 		print("\nprop structure:\n");
264 		for(r = firstr; r != R; r = r->link) {
265 			print("%d:%P", r->loop, r->prog);
266 			for(z=0; z<BITS; z++)
267 				bit.b[z] = r->set.b[z] |
268 					r->refahead.b[z] | r->calahead.b[z] |
269 					r->refbehind.b[z] | r->calbehind.b[z] |
270 					r->use1.b[z] | r->use2.b[z];
271 			if(bany(&bit)) {
272 				print("%|", COL1);
273 				if(bany(&r->use1))
274 					print(" u1=%B", r->use1);
275 				if(bany(&r->use2))
276 					print(" u2=%B", r->use2);
277 				if(bany(&r->set))
278 					print(" st=%B", r->set);
279 				if(bany(&r->refahead))
280 					print(" ra=%B", r->refahead);
281 				if(bany(&r->calahead))
282 					print(" ca=%B", r->calahead);
283 				if(bany(&r->refbehind))
284 					print(" rb=%B", r->refbehind);
285 				if(bany(&r->calbehind))
286 					print(" cb=%B", r->calbehind);
287 			}
288 			print("\n");
289 		}
290 	}
291 
292 	/*
293 	 * pass 5
294 	 * isolate regions
295 	 * calculate costs (paint1)
296 	 */
297 	r = firstr;
298 	if(r) {
299 		for(z=0; z<BITS; z++)
300 			bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
301 			  ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
302 		if(bany(&bit)) {
303 			nearln = r->prog->lineno;
304 			warn(Z, "used and not set: %B", bit);
305 			if(debug['R'] && !debug['w'])
306 				print("used and not set: %B\n", bit);
307 		}
308 	}
309 
310 	for(r = firstr; r != R; r = r->link)
311 		r->act = zbits;
312 	rgp = region;
313 	nregion = 0;
314 	for(r = firstr; r != R; r = r->link) {
315 		for(z=0; z<BITS; z++)
316 			bit.b[z] = r->set.b[z] &
317 			  ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
318 		if(bany(&bit)) {
319 			nearln = r->prog->lineno;
320 			warn(Z, "set and not used: %B", bit);
321 			if(debug['R'])
322 				print("set and not used: %B\n", bit);
323 			excise(r);
324 		}
325 		for(z=0; z<BITS; z++)
326 			bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
327 		while(bany(&bit)) {
328 			i = bnum(bit);
329 			rgp->enter = r;
330 			rgp->varno = i;
331 			change = 0;
332 			if(debug['R'] && debug['v'])
333 				print("\n");
334 			paint1(r, i);
335 			bit.b[i/32] &= ~(1L<<(i%32));
336 			if(change <= 0) {
337 				if(debug['R'])
338 					print("%L $%d: %B\n",
339 						r->prog->lineno, change, blsh(i));
340 				continue;
341 			}
342 			rgp->cost = change;
343 			nregion++;
344 			if(nregion >= NRGN) {
345 				warn(Z, "too many regions");
346 				goto brk;
347 			}
348 			rgp++;
349 		}
350 	}
351 brk:
352 	qsort(region, nregion, sizeof(region[0]), rcmp);
353 
354 	/*
355 	 * pass 6
356 	 * determine used registers (paint2)
357 	 * replace code (paint3)
358 	 */
359 	rgp = region;
360 	for(i=0; i<nregion; i++) {
361 		bit = blsh(rgp->varno);
362 		vreg = paint2(rgp->enter, rgp->varno);
363 		vreg = allreg(vreg, rgp);
364 		if(debug['R']) {
365 			if(rgp->regno >= NREG)
366 				print("%L $%d F%d: %B\n",
367 					rgp->enter->prog->lineno,
368 					rgp->cost,
369 					rgp->regno-NREG,
370 					bit);
371 			else
372 				print("%L $%d R%d: %B\n",
373 					rgp->enter->prog->lineno,
374 					rgp->cost,
375 					rgp->regno,
376 					bit);
377 		}
378 		if(rgp->regno != 0)
379 			paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
380 		rgp++;
381 	}
382 	/*
383 	 * pass 7
384 	 * peep-hole on basic block
385 	 */
386 	if(!debug['R'] || debug['P'])
387 		peep();
388 
389 	/*
390 	 * pass 8
391 	 * recalculate pc
392 	 */
393 	val = initpc;
394 	for(r = firstr; r != R; r = r1) {
395 		r->pc = val;
396 		p = r->prog;
397 		p1 = P;
398 		r1 = r->link;
399 		if(r1 != R)
400 			p1 = r1->prog;
401 		for(; p != p1; p = p->link) {
402 			switch(p->as) {
403 			default:
404 				val++;
405 				break;
406 
407 			case ANOP:
408 			case ADATA:
409 			case AGLOBL:
410 			case ANAME:
411 				break;
412 			}
413 		}
414 	}
415 	pc = val;
416 
417 	/*
418 	 * fix up branches
419 	 */
420 	if(debug['R'])
421 		if(bany(&addrs))
422 			print("addrs: %B\n", addrs);
423 
424 	r1 = 0; /* set */
425 	for(r = firstr; r != R; r = r->link) {
426 		p = r->prog;
427 		if(p->to.type == D_BRANCH)
428 			p->to.offset = r->s2->pc;
429 		r1 = r;
430 	}
431 
432 	/*
433 	 * last pass
434 	 * eliminate nops
435 	 * free aux structures
436 	 */
437 	for(p = firstr->prog; p != P; p = p->link){
438 		while(p->link && p->link->as == ANOP)
439 			p->link = p->link->link;
440 	}
441 	if(r1 != R) {
442 		r1->link = freer;
443 		freer = firstr;
444 	}
445 }
446 
447 void
448 addsplits(void)
449 {
450 	Reg *r, *r1;
451 	int z, i;
452 	Bits bit;
453 
454 	for(r = firstr; r != R; r = r->link) {
455 		if(r->loop > 1)
456 			continue;
457 		if(r->prog->as == AJAL)
458 			continue;
459 		for(r1 = r->p2; r1 != R; r1 = r1->p2link) {
460 			if(r1->loop <= 1)
461 				continue;
462 			for(z=0; z<BITS; z++)
463 				bit.b[z] = r1->calbehind.b[z] &
464 					(r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
465 					~(r->calahead.b[z] & addrs.b[z]);
466 			while(bany(&bit)) {
467 				i = bnum(bit);
468 				bit.b[i/32] &= ~(1L << (i%32));
469 			}
470 		}
471 	}
472 }
473 
474 /*
475  * add mov b,rn
476  * just after r
477  */
478 void
479 addmove(Reg *r, int bn, int rn, int f)
480 {
481 	Prog *p, *p1;
482 	Adr *a;
483 	Var *v;
484 
485 	ALLOC(p1,Prog);
486 	*p1 = zprog;
487 	p = r->prog;
488 
489 	p1->link = p->link;
490 	p->link = p1;
491 	p1->lineno = p->lineno;
492 
493 	v = var + bn;
494 
495 	a = &p1->to;
496 	a->sym = v->sym;
497 	a->name = v->name;
498 	a->offset = v->offset;
499 	a->etype = v->etype;
500 	a->type = D_OREG;
501 	if(a->etype == TARRAY || a->sym == S)
502 		a->type = D_CONST;
503 
504 	p1->as = AMOVW;
505 	if(v->etype == TCHAR || v->etype == TUCHAR)
506 		p1->as = AMOVB;
507 	if(v->etype == TSHORT || v->etype == TUSHORT)
508 		p1->as = AMOVH;
509 	if(v->etype == TFLOAT)
510 		p1->as = AMOVF;
511 	if(v->etype == TDOUBLE)
512 		p1->as = AMOVD;
513 
514 	p1->from.type = D_REG;
515 	p1->from.reg = rn;
516 	if(rn >= NREG) {
517 		p1->from.type = D_FREG;
518 		p1->from.reg = rn-NREG;
519 	}
520 	if(!f) {
521 		p1->from = *a;
522 		*a = zprog.from;
523 		a->type = D_REG;
524 		a->reg = rn;
525 		if(rn >= NREG) {
526 			a->type = D_FREG;
527 			a->reg = rn-NREG;
528 		}
529 		if(v->etype == TUCHAR)
530 			p1->as = AMOVBU;
531 		if(v->etype == TUSHORT)
532 			p1->as = AMOVHU;
533 	}
534 	if(debug['R'])
535 		print("%P%|.a%P\n", p, COL1, p1);
536 }
537 
538 Bits
539 mkvar(Adr *a, int docon)
540 {
541 	Var *v;
542 	int i, t, n, et, z;
543 	long o;
544 	Bits bit;
545 	Sym *s;
546 
547 	t = a->type;
548 	if(t == D_REG && a->reg != NREG)
549 		regbits |= RtoB(a->reg);
550 	if(t == D_FREG && a->reg != NREG)
551 		regbits |= FtoB(a->reg);
552 	s = a->sym;
553 	o = a->offset;
554 	et = a->etype;
555 	if(s == S) {
556 		if(t != D_CONST || !docon || a->reg != NREG)
557 			goto none;
558 		et = TLONG;
559 	}
560 	if(t == D_CONST) {
561 		if(s == S && sval(o))
562 			goto none;
563 	}
564 
565 	n = a->name;
566 	v = var;
567 	for(i=0; i<nvar; i++) {
568 		if(s == v->sym)
569 		if(n == v->name)
570 		if(o == v->offset)
571 			goto out;
572 		v++;
573 	}
574 	if(s)
575 		if(s->name[0] == '.')
576 			goto none;
577 	if(nvar >= NVAR) {
578 		if(s)
579 			warn(Z, "variable not optimized: %s", s->name);
580 		goto none;
581 	}
582 	i = nvar;
583 	nvar++;
584 	v = &var[i];
585 	v->sym = s;
586 	v->offset = o;
587 	v->etype = et;
588 	v->name = n;
589 	if(debug['R'])
590 		print("bit=%2d et=%2d %D\n", i, et, a);
591 out:
592 	bit = blsh(i);
593 	if(n == D_EXTERN || n == D_STATIC)
594 		for(z=0; z<BITS; z++)
595 			externs.b[z] |= bit.b[z];
596 	if(n == D_PARAM)
597 		for(z=0; z<BITS; z++)
598 			params.b[z] |= bit.b[z];
599 	if(v->etype != et || !typechlpfd[et])	/* funny punning */
600 		for(z=0; z<BITS; z++)
601 			addrs.b[z] |= bit.b[z];
602 	if(t == D_CONST) {
603 		if(s == S) {
604 			for(z=0; z<BITS; z++)
605 				consts.b[z] |= bit.b[z];
606 			return bit;
607 		}
608 		if(et != TARRAY)
609 			for(z=0; z<BITS; z++)
610 				addrs.b[z] |= bit.b[z];
611 		for(z=0; z<BITS; z++)
612 			params.b[z] |= bit.b[z];
613 		return bit;
614 	}
615 	if(t == D_OREG)
616 		return bit;
617 
618 none:
619 	return zbits;
620 }
621 
622 void
623 prop(Reg *r, Bits ref, Bits cal)
624 {
625 	Reg *r1, *r2;
626 	int z;
627 
628 	for(r1 = r; r1 != R; r1 = r1->p1) {
629 		for(z=0; z<BITS; z++) {
630 			ref.b[z] |= r1->refahead.b[z];
631 			if(ref.b[z] != r1->refahead.b[z]) {
632 				r1->refahead.b[z] = ref.b[z];
633 				change++;
634 			}
635 			cal.b[z] |= r1->calahead.b[z];
636 			if(cal.b[z] != r1->calahead.b[z]) {
637 				r1->calahead.b[z] = cal.b[z];
638 				change++;
639 			}
640 		}
641 		switch(r1->prog->as) {
642 		case AJAL:
643 			for(z=0; z<BITS; z++) {
644 				cal.b[z] |= ref.b[z] | externs.b[z];
645 				ref.b[z] = 0;
646 			}
647 			break;
648 
649 		case ATEXT:
650 			for(z=0; z<BITS; z++) {
651 				cal.b[z] = 0;
652 				ref.b[z] = 0;
653 			}
654 			break;
655 
656 		case ARET:
657 			for(z=0; z<BITS; z++) {
658 				cal.b[z] = externs.b[z];
659 				ref.b[z] = 0;
660 			}
661 		}
662 		for(z=0; z<BITS; z++) {
663 			ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
664 				r1->use1.b[z] | r1->use2.b[z];
665 			cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
666 			r1->refbehind.b[z] = ref.b[z];
667 			r1->calbehind.b[z] = cal.b[z];
668 		}
669 		if(r1->active)
670 			break;
671 		r1->active = 1;
672 	}
673 	for(; r != r1; r = r->p1)
674 		for(r2 = r->p2; r2 != R; r2 = r2->p2link)
675 			prop(r2, r->refbehind, r->calbehind);
676 }
677 
678 int
679 loopit(Reg *r)
680 {
681 	Reg *r1;
682 	int l, m;
683 
684 	l = 0;
685 	r->active = 1;
686 	r->loop = 0;
687 	if(r1 = r->s1)
688 	switch(r1->active) {
689 	case 0:
690 		l += loopit(r1);
691 		break;
692 	case 1:
693 		l += LOOP;
694 		r1->loop += LOOP;
695 	}
696 	if(r1 = r->s2)
697 	switch(r1->active) {
698 	case 0:
699 		l += loopit(r1);
700 		break;
701 	case 1:
702 		l += LOOP;
703 		r1->loop += LOOP;
704 	}
705 	r->active = 2;
706 	m = r->loop;
707 	r->loop = l + 1;
708 	return l - m;
709 }
710 
711 void
712 synch(Reg *r, Bits dif)
713 {
714 	Reg *r1;
715 	int z;
716 
717 	for(r1 = r; r1 != R; r1 = r1->s1) {
718 		for(z=0; z<BITS; z++) {
719 			dif.b[z] = (dif.b[z] &
720 				~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
721 					r1->set.b[z] | r1->regdiff.b[z];
722 			if(dif.b[z] != r1->regdiff.b[z]) {
723 				r1->regdiff.b[z] = dif.b[z];
724 				change++;
725 			}
726 		}
727 		if(r1->active)
728 			break;
729 		r1->active = 1;
730 		for(z=0; z<BITS; z++)
731 			dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
732 		if(r1->s2 != R)
733 			synch(r1->s2, dif);
734 	}
735 }
736 
737 ulong
738 allreg(ulong b, Rgn *r)
739 {
740 	Var *v;
741 	int i;
742 
743 	v = var + r->varno;
744 	r->regno = 0;
745 	switch(v->etype) {
746 
747 	default:
748 		diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
749 		break;
750 
751 	case TCHAR:
752 	case TUCHAR:
753 	case TSHORT:
754 	case TUSHORT:
755 	case TLONG:
756 	case TULONG:
757 	case TIND:
758 	case TARRAY:
759 		i = BtoR(~b);
760 		if(i && r->cost >= 0) {
761 			r->regno = i;
762 			return RtoB(i);
763 		}
764 		break;
765 
766 	case TVLONG:
767 	case TDOUBLE:
768 	case TFLOAT:
769 		i = BtoF(~b);
770 		if(i && r->cost >= 0) {
771 			r->regno = i+NREG;
772 			return FtoB(i);
773 		}
774 		break;
775 	}
776 	return 0;
777 }
778 
779 void
780 paint1(Reg *r, int bn)
781 {
782 	Reg *r1;
783 	Prog *p;
784 	int z;
785 	ulong bb;
786 
787 	z = bn/32;
788 	bb = 1L<<(bn%32);
789 	if(r->act.b[z] & bb)
790 		return;
791 	for(;;) {
792 		if(!(r->refbehind.b[z] & bb))
793 			break;
794 		r1 = r->p1;
795 		if(r1 == R)
796 			break;
797 		if(!(r1->refahead.b[z] & bb))
798 			break;
799 		if(r1->act.b[z] & bb)
800 			break;
801 		r = r1;
802 	}
803 
804 	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
805 		change -= CLOAD * r->loop;
806 		if(debug['R'] && debug['v'])
807 			print("%d%P%|ld %B $%d\n", r->loop,
808 				r->prog, COL1, blsh(bn), change);
809 	}
810 	for(;;) {
811 		r->act.b[z] |= bb;
812 		p = r->prog;
813 
814 		if(r->use1.b[z] & bb) {
815 			change += CREF * r->loop;
816 			if(debug['R'] && debug['v'])
817 				print("%d%P%|u1 %B $%d\n", r->loop,
818 					p, COL1, blsh(bn), change);
819 		}
820 
821 		if((r->use2.b[z]|r->set.b[z]) & bb) {
822 			change += CREF * r->loop;
823 			if(debug['R'] && debug['v'])
824 				print("%d%P%|u2 %B $%d\n", r->loop,
825 					p, COL1, blsh(bn), change);
826 		}
827 
828 		if(STORE(r) & r->regdiff.b[z] & bb) {
829 			change -= CLOAD * r->loop;
830 			if(debug['R'] && debug['v'])
831 				print("%d%P%|st %B $%d\n", r->loop,
832 					p, COL1, blsh(bn), change);
833 		}
834 
835 		if(r->refbehind.b[z] & bb)
836 			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
837 				if(r1->refahead.b[z] & bb)
838 					paint1(r1, bn);
839 
840 		if(!(r->refahead.b[z] & bb))
841 			break;
842 		r1 = r->s2;
843 		if(r1 != R)
844 			if(r1->refbehind.b[z] & bb)
845 				paint1(r1, bn);
846 		r = r->s1;
847 		if(r == R)
848 			break;
849 		if(r->act.b[z] & bb)
850 			break;
851 		if(!(r->refbehind.b[z] & bb))
852 			break;
853 	}
854 }
855 
856 ulong
857 paint2(Reg *r, int bn)
858 {
859 	Reg *r1;
860 	int z;
861 	ulong bb, vreg;
862 
863 	z = bn/32;
864 	bb = 1L << (bn%32);
865 	vreg = regbits;
866 	if(!(r->act.b[z] & bb))
867 		return vreg;
868 	for(;;) {
869 		if(!(r->refbehind.b[z] & bb))
870 			break;
871 		r1 = r->p1;
872 		if(r1 == R)
873 			break;
874 		if(!(r1->refahead.b[z] & bb))
875 			break;
876 		if(!(r1->act.b[z] & bb))
877 			break;
878 		r = r1;
879 	}
880 	for(;;) {
881 		r->act.b[z] &= ~bb;
882 
883 		vreg |= r->regu;
884 
885 		if(r->refbehind.b[z] & bb)
886 			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
887 				if(r1->refahead.b[z] & bb)
888 					vreg |= paint2(r1, bn);
889 
890 		if(!(r->refahead.b[z] & bb))
891 			break;
892 		r1 = r->s2;
893 		if(r1 != R)
894 			if(r1->refbehind.b[z] & bb)
895 				vreg |= paint2(r1, bn);
896 		r = r->s1;
897 		if(r == R)
898 			break;
899 		if(!(r->act.b[z] & bb))
900 			break;
901 		if(!(r->refbehind.b[z] & bb))
902 			break;
903 	}
904 	return vreg;
905 }
906 
907 void
908 paint3(Reg *r, int bn, long rb, int rn)
909 {
910 	Reg *r1;
911 	Prog *p;
912 	int z;
913 	ulong bb;
914 
915 	z = bn/32;
916 	bb = 1L << (bn%32);
917 	if(r->act.b[z] & bb)
918 		return;
919 	for(;;) {
920 		if(!(r->refbehind.b[z] & bb))
921 			break;
922 		r1 = r->p1;
923 		if(r1 == R)
924 			break;
925 		if(!(r1->refahead.b[z] & bb))
926 			break;
927 		if(r1->act.b[z] & bb)
928 			break;
929 		r = r1;
930 	}
931 
932 	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
933 		addmove(r, bn, rn, 0);
934 	for(;;) {
935 		r->act.b[z] |= bb;
936 		p = r->prog;
937 
938 		if(r->use1.b[z] & bb) {
939 			if(debug['R'])
940 				print("%P", p);
941 			addreg(&p->from, rn);
942 			if(debug['R'])
943 				print("%|.c%P\n", COL1, p);
944 		}
945 		if((r->use2.b[z]|r->set.b[z]) & bb) {
946 			if(debug['R'])
947 				print("%P", p);
948 			addreg(&p->to, rn);
949 			if(debug['R'])
950 				print("%|.c%P\n", COL1, p);
951 		}
952 
953 		if(STORE(r) & r->regdiff.b[z] & bb)
954 			addmove(r, bn, rn, 1);
955 		r->regu |= rb;
956 
957 		if(r->refbehind.b[z] & bb)
958 			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
959 				if(r1->refahead.b[z] & bb)
960 					paint3(r1, bn, rb, rn);
961 
962 		if(!(r->refahead.b[z] & bb))
963 			break;
964 		r1 = r->s2;
965 		if(r1 != R)
966 			if(r1->refbehind.b[z] & bb)
967 				paint3(r1, bn, rb, rn);
968 		r = r->s1;
969 		if(r == R)
970 			break;
971 		if(r->act.b[z] & bb)
972 			break;
973 		if(!(r->refbehind.b[z] & bb))
974 			break;
975 	}
976 }
977 
978 void
979 addreg(Adr *a, int rn)
980 {
981 
982 	a->sym = 0;
983 	a->name = D_NONE;
984 	a->type = D_REG;
985 	a->reg = rn;
986 	if(rn >= NREG) {
987 		a->type = D_FREG;
988 		a->reg = rn-NREG;
989 	}
990 }
991 
992 /*
993  *	bit	reg
994  *	0	R3
995  *	1	R4
996  *	...	...
997  *	19	R22
998  *	20	R23
999  */
1000 long
1001 RtoB(int r)
1002 {
1003 
1004 	if(r < 3 || r > 23)
1005 		return 0;
1006 	return 1L << (r-3);
1007 }
1008 
1009 BtoR(long b)
1010 {
1011 
1012 	b &= 0x001fffffL;
1013 	if(b == 0)
1014 		return 0;
1015 	return bitno(b) + 3;
1016 }
1017 
1018 /*
1019  *	bit	reg
1020  *	22	F4
1021  *	23	F6
1022  *	...	...
1023  *	31	F22
1024  */
1025 long
1026 FtoB(int f)
1027 {
1028 
1029 	if(f < 4 || f > 22 || (f&1))
1030 		return 0;
1031 	return 1L << (f/2 + 20);
1032 }
1033 
1034 int
1035 BtoF(long b)
1036 {
1037 
1038 	b &= 0xffc00000L;
1039 	if(b == 0)
1040 		return 0;
1041 	return bitno(b)*2 - 40;
1042 }
1043