xref: /plan9-contrib/sys/src/cmd/8c/reg.c (revision 219b2ee8daee37f4aad58d63f21287faa8e4ffdc)
1 #include "gc.h"
2 
3 #define	COL1	40
4 
5 Reg*
6 rega(void)
7 {
8 	Reg *r;
9 
10 	r = freer;
11 	if(r == R) {
12 		ALLOC(r, Reg);
13 	} else
14 		freer = r->link;
15 
16 	*r = zreg;
17 	return r;
18 }
19 
20 int
21 rcmp(void *a1, void *a2)
22 {
23 	Rgn *p1, *p2;
24 	int c1, c2;
25 
26 	p1 = a1;
27 	p2 = a2;
28 	c1 = p2->cost;
29 	c2 = p1->cost;
30 	if(c1 -= c2)
31 		return c1;
32 	return p2->varno - p1->varno;
33 }
34 
35 void
36 regopt(Prog *p)
37 {
38 	Reg *r, *r1, *r2;
39 	Prog *p1;
40 	int i, z;
41 	long initpc, val;
42 	ulong vreg;
43 	Bits bit;
44 	struct
45 	{
46 		long	m;
47 		long	c;
48 		Reg*	p;
49 	} log5[6], *lp;
50 
51 	firstr = R;
52 	lastr = R;
53 	nvar = 0;
54 	regbits = RtoB(D_SP) | RtoB(D_AX);
55 	for(z=0; z<BITS; z++) {
56 		externs.b[z] = 0;
57 		params.b[z] = 0;
58 		consts.b[z] = 0;
59 		addrs.b[z] = 0;
60 	}
61 
62 	/*
63 	 * pass 1
64 	 * build aux data structure
65 	 * allocate pcs
66 	 * find use and set of variables
67 	 */
68 	val = 5L * 5L * 5L * 5L * 5L;
69 	lp = log5;
70 	for(i=0; i<5; i++) {
71 		lp->m = val;
72 		lp->c = 0;
73 		lp->p = R;
74 		val /= 5L;
75 		lp++;
76 	}
77 	val = 0;
78 	for(; p != P; p = p->link) {
79 		switch(p->as) {
80 		case ADATA:
81 		case AGLOBL:
82 		case ANAME:
83 			continue;
84 		}
85 		r = rega();
86 		if(firstr == R) {
87 			firstr = r;
88 			lastr = r;
89 		} else {
90 			lastr->link = r;
91 			r->p1 = lastr;
92 			lastr->s1 = r;
93 			lastr = r;
94 		}
95 		r->prog = p;
96 		r->pc = val;
97 		val++;
98 
99 		lp = log5;
100 		for(i=0; i<5; i++) {
101 			lp->c--;
102 			if(lp->c <= 0) {
103 				lp->c = lp->m;
104 				if(lp->p != R)
105 					lp->p->log5 = r;
106 				lp->p = r;
107 				(lp+1)->c = 0;
108 				break;
109 			}
110 			lp++;
111 		}
112 
113 		r1 = r->p1;
114 		if(r1 != R)
115 		switch(r1->prog->as) {
116 		case ARET:
117 		case AJMP:
118 		case AIRETL:
119 			r->p1 = R;
120 			r1->s1 = R;
121 		}
122 
123 		bit = mkvar(r, &p->from);
124 		if(bany(&bit))
125 		switch(p->as) {
126 		/*
127 		 * funny
128 		 */
129 		case ALEAL:
130 			for(z=0; z<BITS; z++)
131 				addrs.b[z] |= bit.b[z];
132 			break;
133 
134 		/*
135 		 * left side read
136 		 */
137 		default:
138 			for(z=0; z<BITS; z++)
139 				r->use1.b[z] |= bit.b[z];
140 			break;
141 		}
142 
143 		bit = mkvar(r, &p->to);
144 		if(bany(&bit))
145 		switch(p->as) {
146 		default:
147 			diag(Z, "reg: unknown op: %A", p->as);
148 			break;
149 
150 		/*
151 		 * right side read
152 		 */
153 		case ACMPB:
154 		case ACMPL:
155 		case ACMPW:
156 			for(z=0; z<BITS; z++)
157 				r->use2.b[z] |= bit.b[z];
158 			break;
159 
160 		/*
161 		 * right side write
162 		 */
163 		case ANOP:
164 		case AMOVL:
165 		case AMOVB:
166 		case AMOVW:
167 		case AMOVBLSX:
168 		case AMOVBLZX:
169 		case AMOVWLSX:
170 		case AMOVWLZX:
171 			for(z=0; z<BITS; z++)
172 				r->set.b[z] |= bit.b[z];
173 			break;
174 
175 		/*
176 		 * right side read+write
177 		 */
178 		case AADDB:
179 		case AADDL:
180 		case AADDW:
181 		case AANDB:
182 		case AANDL:
183 		case AANDW:
184 		case ASUBB:
185 		case ASUBL:
186 		case ASUBW:
187 		case AORB:
188 		case AORL:
189 		case AORW:
190 		case AXORB:
191 		case AXORL:
192 		case AXORW:
193 		case ASALB:
194 		case ASALL:
195 		case ASALW:
196 		case ASARB:
197 		case ASARL:
198 		case ASARW:
199 		case AROLB:
200 		case AROLL:
201 		case AROLW:
202 		case ARORB:
203 		case ARORL:
204 		case ARORW:
205 		case ASHLB:
206 		case ASHLL:
207 		case ASHLW:
208 		case ASHRB:
209 		case ASHRL:
210 		case ASHRW:
211 			for(z=0; z<BITS; z++) {
212 				r->set.b[z] |= bit.b[z];
213 				r->use2.b[z] |= bit.b[z];
214 			}
215 			break;
216 
217 		/*
218 		 * funny
219 		 */
220 		case AFMOVDP:
221 		case AFMOVFP:
222 		case ACALL:
223 			for(z=0; z<BITS; z++)
224 				addrs.b[z] |= bit.b[z];
225 			break;
226 		}
227 
228 		switch(p->as) {
229 		case AIDIVB:
230 		case AIDIVL:
231 		case AIDIVW:
232 		case AIMULB:
233 		case AIMULL:
234 		case AIMULW:
235 		case ADIVB:
236 		case ADIVL:
237 		case ADIVW:
238 		case AMULB:
239 		case AMULL:
240 		case AMULW:
241 
242 		case ACWD:
243 		case ACDQ:
244 			r->regu |= RtoB(D_AX) | RtoB(D_DX);
245 			break;
246 
247 		case AREP:
248 		case AREPN:
249 		case ALOOP:
250 		case ALOOPEQ:
251 		case ALOOPNE:
252 			r->regu |= RtoB(D_CX);
253 			break;
254 
255 		case AMOVSB:
256 		case AMOVSL:
257 		case AMOVSW:
258 		case ACMPSB:
259 		case ACMPSL:
260 		case ACMPSW:
261 			r->regu |= RtoB(D_SI) | RtoB(D_DI);
262 			break;
263 
264 		case ASTOSB:
265 		case ASTOSL:
266 		case ASTOSW:
267 		case ASCASB:
268 		case ASCASL:
269 		case ASCASW:
270 			r->regu |= RtoB(D_AX) | RtoB(D_DI);
271 			break;
272 
273 		case AINSB:
274 		case AINSL:
275 		case AINSW:
276 		case AOUTSB:
277 		case AOUTSL:
278 		case AOUTSW:
279 			r->regu |= RtoB(D_DI) | RtoB(D_DX);
280 			break;
281 
282 		case AFSTSW:
283 		case ASAHF:
284 			r->regu |= RtoB(D_AX);
285 			break;
286 		}
287 	}
288 	if(firstr == R)
289 		return;
290 	initpc = pc - val;
291 
292 	/*
293 	 * pass 2
294 	 * turn branch references to pointers
295 	 * build back pointers
296 	 */
297 	for(r = firstr; r != R; r = r->link) {
298 		p = r->prog;
299 		if(p->to.type == D_BRANCH) {
300 			val = p->to.offset - initpc;
301 			r1 = firstr;
302 			while(r1 != R) {
303 				r2 = r1->log5;
304 				if(r2 != R && val >= r2->pc) {
305 					r1 = r2;
306 					continue;
307 				}
308 				if(r1->pc == val)
309 					break;
310 				r1 = r1->link;
311 			}
312 			if(r1 == R) {
313 				nearln = p->lineno;
314 				diag(Z, "ref not found\n%P", p);
315 				continue;
316 			}
317 			if(r1 == r) {
318 				nearln = p->lineno;
319 				diag(Z, "ref to self\n%P", p);
320 				continue;
321 			}
322 			r->s2 = r1;
323 			r->p2link = r1->p2;
324 			r1->p2 = r;
325 		}
326 	}
327 	if(debug['R']) {
328 		p = firstr->prog;
329 		print("\n%L %D\n", p->lineno, &p->from);
330 	}
331 
332 	/*
333 	 * pass 2.5
334 	 * find looping structure
335 	 */
336 	for(r = firstr; r != R; r = r->link)
337 		r->active = 0;
338 	change = 0;
339 	loopit(firstr);
340 	if(debug['R'] && debug['v']) {
341 		print("\nlooping structure:\n");
342 		for(r = firstr; r != R; r = r->link) {
343 			print("%d:%P", r->loop, r->prog);
344 			for(z=0; z<BITS; z++)
345 				bit.b[z] = r->use1.b[z] |
346 					   r->use2.b[z] |
347 					   r->set.b[z];
348 			if(bany(&bit)) {
349 				print("%|", COL1);
350 				if(bany(&r->use1))
351 					print(" u1=%B", r->use1);
352 				if(bany(&r->use2))
353 					print(" u2=%B", r->use2);
354 				if(bany(&r->set))
355 					print(" st=%B", r->set);
356 			}
357 			print("\n");
358 		}
359 	}
360 
361 	/*
362 	 * pass 3
363 	 * iterate propagating usage
364 	 * 	back until flow graph is complete
365 	 */
366 loop1:
367 	change = 0;
368 	for(r = firstr; r != R; r = r->link)
369 		r->active = 0;
370 	for(r = firstr; r != R; r = r->link)
371 		if(r->prog->as == ARET)
372 			prop(r, zbits, zbits);
373 loop11:
374 	/* pick up unreachable code */
375 	i = 0;
376 	for(r = firstr; r != R; r = r1) {
377 		r1 = r->link;
378 		if(r1 && r1->active && !r->active) {
379 			prop(r, zbits, zbits);
380 			i = 1;
381 		}
382 	}
383 	if(i)
384 		goto loop11;
385 	if(change)
386 		goto loop1;
387 
388 
389 	/*
390 	 * pass 4
391 	 * iterate propagating register/variable synchrony
392 	 * 	forward until graph is complete
393 	 */
394 loop2:
395 	change = 0;
396 	for(r = firstr; r != R; r = r->link)
397 		r->active = 0;
398 	synch(firstr, zbits);
399 	if(change)
400 		goto loop2;
401 
402 
403 	/*
404 	 * pass 5
405 	 * isolate regions
406 	 * calculate costs (paint1)
407 	 */
408 	r = firstr;
409 	if(r) {
410 		for(z=0; z<BITS; z++)
411 			bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
412 			  ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
413 		if(bany(&bit)) {
414 			nearln = r->prog->lineno;
415 			warn(Z, "used and not set: %B", bit);
416 			if(debug['R'] && !debug['w'])
417 				print("used and not set: %B\n", bit);
418 		}
419 	}
420 	if(debug['R'] && debug['v'])
421 		print("\nprop structure:\n");
422 	for(r = firstr; r != R; r = r->link)
423 		r->act = zbits;
424 	rgp = region;
425 	nregion = 0;
426 	for(r = firstr; r != R; r = r->link) {
427 		if(debug['R'] && debug['v']) {
428 			print("%P%|", r->prog, COL1);
429 			if(bany(&r->set))
430 				print("s:%B ", r->set);
431 			if(bany(&r->refahead))
432 				print("ra:%B ", r->refahead);
433 			if(bany(&r->calahead))
434 				print("ca:%B ", r->calahead);
435 			print("\n");
436 		}
437 		for(z=0; z<BITS; z++)
438 			bit.b[z] = r->set.b[z] &
439 			  ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
440 		if(bany(&bit)) {
441 			nearln = r->prog->lineno;
442 			warn(Z, "set and not used: %B", bit);
443 			if(debug['R'])
444 				print("set an not used: %B\n", bit);
445 			excise(r);
446 		}
447 		for(z=0; z<BITS; z++)
448 			bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
449 		while(bany(&bit)) {
450 			i = bnum(bit);
451 			rgp->enter = r;
452 			rgp->varno = i;
453 			change = 0;
454 			if(debug['R'] && debug['v'])
455 				print("\n");
456 			paint1(r, i);
457 			bit.b[i/32] &= ~(1L<<(i%32));
458 			if(change <= 0) {
459 				if(debug['R'])
460 					print("%L$%d: %B\n",
461 						r->prog->lineno, change, blsh(i));
462 				continue;
463 			}
464 			rgp->cost = change;
465 			nregion++;
466 			if(nregion >= NRGN) {
467 				warn(Z, "too many regions");
468 				goto brk;
469 			}
470 			rgp++;
471 		}
472 	}
473 brk:
474 	qsort(region, nregion, sizeof(region[0]), rcmp);
475 
476 	/*
477 	 * pass 6
478 	 * determine used registers (paint2)
479 	 * replace code (paint3)
480 	 */
481 	rgp = region;
482 	for(i=0; i<nregion; i++) {
483 		bit = blsh(rgp->varno);
484 		vreg = paint2(rgp->enter, rgp->varno);
485 		vreg = allreg(vreg, rgp);
486 		if(debug['R']) {
487 			print("%L$%d %R: %B\n",
488 				rgp->enter->prog->lineno,
489 				rgp->cost,
490 				rgp->regno,
491 				bit);
492 		}
493 		if(rgp->regno != 0)
494 			paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
495 		rgp++;
496 	}
497 	/*
498 	 * pass 7
499 	 * peep-hole on basic block
500 	 */
501 	if(!debug['R'] || debug['P'])
502 		peep();
503 
504 	/*
505 	 * pass 8
506 	 * recalculate pc
507 	 */
508 	val = initpc;
509 	for(r = firstr; r != R; r = r1) {
510 		r->pc = val;
511 		p = r->prog;
512 		p1 = P;
513 		r1 = r->link;
514 		if(r1 != R)
515 			p1 = r1->prog;
516 		for(; p != p1; p = p->link) {
517 			switch(p->as) {
518 			default:
519 				val++;
520 				break;
521 
522 			case ANOP:
523 			case ADATA:
524 			case AGLOBL:
525 			case ANAME:
526 				break;
527 			}
528 		}
529 	}
530 	pc = val;
531 
532 	/*
533 	 * fix up branches
534 	 */
535 	if(debug['R'])
536 		if(bany(&addrs))
537 			print("addrs: %B\n", addrs);
538 
539 	r1 = 0; /* set */
540 	for(r = firstr; r != R; r = r->link) {
541 		p = r->prog;
542 		if(p->to.type == D_BRANCH)
543 			p->to.offset = r->s2->pc;
544 		r1 = r;
545 	}
546 
547 	/*
548 	 * last pass
549 	 * eliminate nops
550 	 * free aux structures
551 	 */
552 	for(p = firstr->prog; p != P; p = p->link){
553 		while(p->link && p->link->as == ANOP)
554 			p->link = p->link->link;
555 	}
556 	if(r1 != R) {
557 		r1->link = freer;
558 		freer = firstr;
559 	}
560 }
561 
562 /*
563  * add mov b,rn
564  * just after r
565  */
566 void
567 addmove(Reg *r, int bn, int rn, int f)
568 {
569 	Prog *p, *p1;
570 	Adr *a;
571 	Var *v;
572 
573 	ALLOC(p1,Prog);
574 	*p1 = zprog;
575 	p = r->prog;
576 
577 	p1->link = p->link;
578 	p->link = p1;
579 	p1->lineno = p->lineno;
580 
581 	v = var + bn;
582 
583 	a = &p1->to;
584 	a->sym = v->sym;
585 	a->offset = v->offset;
586 	a->etype = v->etype;
587 	a->type = v->name;
588 
589 	p1->as = AMOVL;
590 	if(v->etype == TCHAR || v->etype == TUCHAR)
591 		p1->as = AMOVB;
592 	if(v->etype == TSHORT || v->etype == TUSHORT)
593 		p1->as = AMOVW;
594 
595 	p1->from.type = rn;
596 	if(!f) {
597 		p1->from = *a;
598 		*a = zprog.from;
599 		a->type = rn;
600 		if(v->etype == TUCHAR)
601 			p1->as = AMOVB;
602 		if(v->etype == TUSHORT)
603 			p1->as = AMOVW;
604 	}
605 	if(debug['R'])
606 		print("%P%|.a%P\n", p, COL1, p1);
607 }
608 
609 ulong
610 doregbits(int r)
611 {
612 	ulong b;
613 
614 	b = 0;
615 	if(r >= D_INDIR)
616 		r -= D_INDIR;
617 	if(r >= D_AX && r <= D_DI)
618 		b |= RtoB(r);
619 	else
620 	if(r >= D_AL && r <= D_BL)
621 		b |= RtoB(r-D_AL+D_AX);
622 	else
623 	if(r >= D_AH && r <= D_BH)
624 		b |= RtoB(r-D_AH+D_AX);
625 	return b;
626 }
627 
628 Bits
629 mkvar(Reg *r, Adr *a)
630 {
631 	Var *v;
632 	int i, t, n, et, z;
633 	long o;
634 	Bits bit;
635 	Sym *s;
636 
637 	/*
638 	 * mark registers used
639 	 */
640 	t = a->type;
641 	r->regu |= doregbits(t);
642 	r->regu |= doregbits(a->index);
643 
644 	switch(t) {
645 	default:
646 		goto none;
647 	case D_ADDR:
648 		a->type = a->index;
649 		bit = mkvar(r, a);
650 		for(z=0; z<BITS; z++)
651 			addrs.b[z] |= bit.b[z];
652 		a->type = t;
653 		goto none;
654 	case D_EXTERN:
655 	case D_STATIC:
656 	case D_PARAM:
657 	case D_AUTO:
658 		n = t;
659 		break;
660 	}
661 	s = a->sym;
662 	if(s == S)
663 		goto none;
664 	if(s->name[0] == '.')
665 		goto none;
666 	et = a->etype;
667 	o = a->offset;
668 	v = var;
669 	for(i=0; i<nvar; i++) {
670 		if(s == v->sym)
671 		if(n == v->name)
672 		if(o == v->offset)
673 			goto out;
674 		v++;
675 	}
676 	if(nvar >= NVAR) {
677 		warn(Z, "variable not optimized: %s", s->name);
678 		goto none;
679 	}
680 	i = nvar;
681 	nvar++;
682 	v = &var[i];
683 	v->sym = s;
684 	v->offset = o;
685 	v->name = n;
686 	v->etype = et;
687 	if(debug['R'])
688 		print("bit=%2d et=%2d %D\n", i, et, a);
689 
690 out:
691 	bit = blsh(i);
692 	if(n == D_EXTERN || n == D_STATIC)
693 		for(z=0; z<BITS; z++)
694 			externs.b[z] |= bit.b[z];
695 	if(n == D_PARAM)
696 		for(z=0; z<BITS; z++)
697 			params.b[z] |= bit.b[z];
698 	if(v->etype != et || !typechlpfd[et])	/* funny punning */
699 		for(z=0; z<BITS; z++)
700 			addrs.b[z] |= bit.b[z];
701 	return bit;
702 
703 none:
704 	return zbits;
705 }
706 
707 void
708 prop(Reg *r, Bits ref, Bits cal)
709 {
710 	Reg *r1, *r2;
711 	int z;
712 
713 	for(r1 = r; r1 != R; r1 = r1->p1) {
714 		for(z=0; z<BITS; z++) {
715 			ref.b[z] |= r1->refahead.b[z];
716 			if(ref.b[z] != r1->refahead.b[z]) {
717 				r1->refahead.b[z] = ref.b[z];
718 				change++;
719 			}
720 			cal.b[z] |= r1->calahead.b[z];
721 			if(cal.b[z] != r1->calahead.b[z]) {
722 				r1->calahead.b[z] = cal.b[z];
723 				change++;
724 			}
725 		}
726 		switch(r1->prog->as) {
727 		case ACALL:
728 			for(z=0; z<BITS; z++) {
729 				cal.b[z] |= ref.b[z] | externs.b[z];
730 				ref.b[z] = 0;
731 			}
732 			break;
733 
734 		case ATEXT:
735 			for(z=0; z<BITS; z++) {
736 				cal.b[z] = 0;
737 				ref.b[z] = 0;
738 			}
739 			break;
740 
741 		case ARET:
742 			for(z=0; z<BITS; z++) {
743 				cal.b[z] = externs.b[z];
744 				ref.b[z] = 0;
745 			}
746 		}
747 		for(z=0; z<BITS; z++) {
748 			ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
749 				r1->use1.b[z] | r1->use2.b[z];
750 			cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
751 			r1->refbehind.b[z] = ref.b[z];
752 			r1->calbehind.b[z] = cal.b[z];
753 		}
754 		if(r1->active)
755 			break;
756 		r1->active = 1;
757 	}
758 	for(; r != r1; r = r->p1)
759 		for(r2 = r->p2; r2 != R; r2 = r2->p2link)
760 			prop(r2, r->refbehind, r->calbehind);
761 }
762 
763 int
764 loopit(Reg *r)
765 {
766 	Reg *r1;
767 	int l, m;
768 
769 	l = 0;
770 	r->active = 1;
771 	r->loop = 0;
772 	if(r1 = r->s1)
773 	switch(r1->active) {
774 	case 0:
775 		l += loopit(r1);
776 		break;
777 	case 1:
778 		l += LOOP;
779 		r1->loop += LOOP;
780 	}
781 	if(r1 = r->s2)
782 	switch(r1->active) {
783 	case 0:
784 		l += loopit(r1);
785 		break;
786 	case 1:
787 		l += LOOP;
788 		r1->loop += LOOP;
789 	}
790 	r->active = 2;
791 	m = r->loop;
792 	r->loop = l + 1;
793 	return l - m;
794 }
795 
796 void
797 synch(Reg *r, Bits dif)
798 {
799 	Reg *r1;
800 	int z;
801 
802 	for(r1 = r; r1 != R; r1 = r1->s1) {
803 		for(z=0; z<BITS; z++) {
804 			dif.b[z] = (dif.b[z] &
805 				~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
806 					r1->set.b[z] | r1->regdiff.b[z];
807 			if(dif.b[z] != r1->regdiff.b[z]) {
808 				r1->regdiff.b[z] = dif.b[z];
809 				change++;
810 			}
811 		}
812 		if(r1->active)
813 			break;
814 		r1->active = 1;
815 		for(z=0; z<BITS; z++)
816 			dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
817 		if(r1->s2 != R)
818 			synch(r1->s2, dif);
819 	}
820 }
821 
822 ulong
823 allreg(ulong b, Rgn *r)
824 {
825 	Var *v;
826 	int i;
827 
828 	v = var + r->varno;
829 	r->regno = 0;
830 	switch(v->etype) {
831 
832 	default:
833 		diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
834 		break;
835 
836 	case TCHAR:
837 	case TUCHAR:
838 	case TSHORT:
839 	case TUSHORT:
840 	case TLONG:
841 	case TULONG:
842 	case TIND:
843 	case TARRAY:
844 		i = BtoR(~b);
845 		if(i && r->cost > 0) {
846 			r->regno = i;
847 			return RtoB(i);
848 		}
849 		break;
850 
851 	case TVLONG:
852 	case TDOUBLE:
853 	case TFLOAT:
854 		break;
855 	}
856 	return 0;
857 }
858 
859 void
860 paint1(Reg *r, int bn)
861 {
862 	Reg *r1;
863 	Prog *p;
864 	int z;
865 	ulong bb;
866 
867 	z = bn/32;
868 	bb = 1L<<(bn%32);
869 	if(r->act.b[z] & bb)
870 		return;
871 	for(;;) {
872 		if(!(r->refbehind.b[z] & bb))
873 			break;
874 		r1 = r->p1;
875 		if(r1 == R)
876 			break;
877 		if(!(r1->refahead.b[z] & bb))
878 			break;
879 		if(r1->act.b[z] & bb)
880 			break;
881 		r = r1;
882 	}
883 
884 	if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
885 		change -= CLOAD * r->loop;
886 		if(debug['R'] && debug['v'])
887 			print("%d%P%|ld %B $%d\n", r->loop,
888 				r->prog, COL1, blsh(bn), change);
889 	}
890 	for(;;) {
891 		r->act.b[z] |= bb;
892 		p = r->prog;
893 
894 		if(r->use1.b[z] & bb) {
895 			change += CREF * r->loop;
896 			if(p->as == AFMOVL)
897 				if(BtoR(bb) != D_F0)
898 					change = -CINF;
899 			if(debug['R'] && debug['v'])
900 				print("%d%P%|u1 %B $%d\n", r->loop,
901 					p, COL1, blsh(bn), change);
902 		}
903 
904 		if((r->use2.b[z]|r->set.b[z]) & bb) {
905 			change += CREF * r->loop;
906 			if(p->as == AFMOVL)
907 				if(BtoR(bb) != D_F0)
908 					change = -CINF;
909 			if(debug['R'] && debug['v'])
910 				print("%d%P%|u2 %B $%d\n", r->loop,
911 					p, COL1, blsh(bn), change);
912 		}
913 
914 		if(STORE(r) & r->regdiff.b[z] & bb) {
915 			change -= CLOAD * r->loop;
916 			if(p->as == AFMOVL)
917 				if(BtoR(bb) != D_F0)
918 					change = -CINF;
919 			if(debug['R'] && debug['v'])
920 				print("%d%P%|st %B $%d\n", r->loop,
921 					p, COL1, blsh(bn), change);
922 		}
923 
924 		if(r->refbehind.b[z] & bb)
925 			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
926 				if(r1->refahead.b[z] & bb)
927 					paint1(r1, bn);
928 
929 		if(!(r->refahead.b[z] & bb))
930 			break;
931 		r1 = r->s2;
932 		if(r1 != R)
933 			if(r1->refbehind.b[z] & bb)
934 				paint1(r1, bn);
935 		r = r->s1;
936 		if(r == R)
937 			break;
938 		if(r->act.b[z] & bb)
939 			break;
940 		if(!(r->refbehind.b[z] & bb))
941 			break;
942 	}
943 }
944 
945 ulong
946 regset(Reg *r, ulong bb)
947 {
948 	ulong b, set;
949 	Adr v;
950 	int c;
951 
952 	set = 0;
953 	v = zprog.from;
954 	while(b = bb & ~(bb-1)) {
955 		v.type = BtoR(b);
956 		c = copyu(r->prog, &v, A);
957 		if(c == 3)
958 			set |= b;
959 		bb &= ~b;
960 	}
961 	return set;
962 }
963 
964 ulong
965 reguse(Reg *r, ulong bb)
966 {
967 	ulong b, set;
968 	Adr v;
969 	int c;
970 
971 	set = 0;
972 	v = zprog.from;
973 	while(b = bb & ~(bb-1)) {
974 		v.type = BtoR(b);
975 		c = copyu(r->prog, &v, A);
976 		if(c == 1 || c == 2 || c == 4)
977 			set |= b;
978 		bb &= ~b;
979 	}
980 	return set;
981 }
982 
983 ulong
984 paint2(Reg *r, int bn)
985 {
986 	Reg *r1;
987 	int z;
988 	ulong bb, vreg, x;
989 
990 	z = bn/32;
991 	bb = 1L << (bn%32);
992 	vreg = regbits;
993 	if(!(r->act.b[z] & bb))
994 		return vreg;
995 	for(;;) {
996 		if(!(r->refbehind.b[z] & bb))
997 			break;
998 		r1 = r->p1;
999 		if(r1 == R)
1000 			break;
1001 		if(!(r1->refahead.b[z] & bb))
1002 			break;
1003 		if(!(r1->act.b[z] & bb))
1004 			break;
1005 		r = r1;
1006 	}
1007 	for(;;) {
1008 		r->act.b[z] &= ~bb;
1009 
1010 		vreg |= r->regu;
1011 
1012 		if(r->refbehind.b[z] & bb)
1013 			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1014 				if(r1->refahead.b[z] & bb)
1015 					vreg |= paint2(r1, bn);
1016 
1017 		if(!(r->refahead.b[z] & bb))
1018 			break;
1019 		r1 = r->s2;
1020 		if(r1 != R)
1021 			if(r1->refbehind.b[z] & bb)
1022 				vreg |= paint2(r1, bn);
1023 		r = r->s1;
1024 		if(r == R)
1025 			break;
1026 		if(!(r->act.b[z] & bb))
1027 			break;
1028 		if(!(r->refbehind.b[z] & bb))
1029 			break;
1030 	}
1031 
1032 	bb = vreg;
1033 	for(; r; r=r->s1) {
1034 		x = r->regu & ~bb;
1035 		if(x) {
1036 			vreg |= reguse(r, x);
1037 			bb |= regset(r, x);
1038 		}
1039 	}
1040 	return vreg;
1041 }
1042 
1043 void
1044 paint3(Reg *r, int bn, long rb, int rn)
1045 {
1046 	Reg *r1;
1047 	Prog *p;
1048 	int z;
1049 	ulong bb;
1050 
1051 	z = bn/32;
1052 	bb = 1L << (bn%32);
1053 	if(r->act.b[z] & bb)
1054 		return;
1055 	for(;;) {
1056 		if(!(r->refbehind.b[z] & bb))
1057 			break;
1058 		r1 = r->p1;
1059 		if(r1 == R)
1060 			break;
1061 		if(!(r1->refahead.b[z] & bb))
1062 			break;
1063 		if(r1->act.b[z] & bb)
1064 			break;
1065 		r = r1;
1066 	}
1067 
1068 	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1069 		addmove(r, bn, rn, 0);
1070 	for(;;) {
1071 		r->act.b[z] |= bb;
1072 		p = r->prog;
1073 
1074 		if(r->use1.b[z] & bb) {
1075 			if(debug['R'])
1076 				print("%P", p);
1077 			addreg(&p->from, rn);
1078 			if(debug['R'])
1079 				print("%|.c%P\n", COL1, p);
1080 		}
1081 		if((r->use2.b[z]|r->set.b[z]) & bb) {
1082 			if(debug['R'])
1083 				print("%P", p);
1084 			addreg(&p->to, rn);
1085 			if(debug['R'])
1086 				print("%|.c%P\n", COL1, p);
1087 		}
1088 
1089 		if(STORE(r) & r->regdiff.b[z] & bb)
1090 			addmove(r, bn, rn, 1);
1091 		r->regu |= rb;
1092 
1093 		if(r->refbehind.b[z] & bb)
1094 			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1095 				if(r1->refahead.b[z] & bb)
1096 					paint3(r1, bn, rb, rn);
1097 
1098 		if(!(r->refahead.b[z] & bb))
1099 			break;
1100 		r1 = r->s2;
1101 		if(r1 != R)
1102 			if(r1->refbehind.b[z] & bb)
1103 				paint3(r1, bn, rb, rn);
1104 		r = r->s1;
1105 		if(r == R)
1106 			break;
1107 		if(r->act.b[z] & bb)
1108 			break;
1109 		if(!(r->refbehind.b[z] & bb))
1110 			break;
1111 	}
1112 }
1113 
1114 void
1115 addreg(Adr *a, int rn)
1116 {
1117 
1118 	a->sym = 0;
1119 	a->offset = 0;
1120 	a->type = rn;
1121 }
1122 
1123 long
1124 RtoB(int r)
1125 {
1126 
1127 	switch(r < D_AX || r > D_DI || r == D_SP)
1128 		return 0;
1129 	return 1L << (r-D_AX);
1130 }
1131 
1132 int
1133 BtoR(long b)
1134 {
1135 
1136 	b &= 0xffL;
1137 	if(b == 0)
1138 		return 0;
1139 	return bitno(b) + D_AX;
1140 }
1141