xref: /inferno-os/utils/vc/reg.c (revision 4eb166cf184c1f102fb79e31b1465ea3e2021c39)
1 #include "gc.h"
2 
3 void	addsplits(void);
4 
5 Reg*
6 rega(void)
7 {
8 	Reg *r;
9 
10 	r = freer;
11 	if(r == R) {
12 		r = alloc(sizeof(*r));
13 	} else
14 		freer = r->link;
15 
16 	*r = zreg;
17 	return r;
18 }
19 
20 int
21 rcmp(const void *a1, const void *a2)
22 {
23 	Rgn *p1, *p2;
24 	int c1, c2;
25 
26 	p1 = (Rgn*)a1;
27 	p2 = (Rgn*)a2;
28 	c1 = p2->cost;
29 	c2 = p1->cost;
30 	if(c1 -= c2)
31 		return c1;
32 	return p2->varno - p1->varno;
33 }
34 
35 void
36 regopt(Prog *p)
37 {
38 	Reg *r, *r1, *r2;
39 	Prog *p1;
40 	int i, z;
41 	long initpc, val, npc;
42 	ulong vreg;
43 	Bits bit;
44 	struct
45 	{
46 		long	m;
47 		long	c;
48 		Reg*	p;
49 	} log5[6], *lp;
50 
51 	firstr = R;
52 	lastr = R;
53 	nvar = 0;
54 	regbits = 0;
55 	for(z=0; z<BITS; z++) {
56 		externs.b[z] = 0;
57 		params.b[z] = 0;
58 		consts.b[z] = 0;
59 		addrs.b[z] = 0;
60 	}
61 
62 	/*
63 	 * pass 1
64 	 * build aux data structure
65 	 * allocate pcs
66 	 * find use and set of variables
67 	 */
68 	val = 5L * 5L * 5L * 5L * 5L;
69 	lp = log5;
70 	for(i=0; i<5; i++) {
71 		lp->m = val;
72 		lp->c = 0;
73 		lp->p = R;
74 		val /= 5L;
75 		lp++;
76 	}
77 	val = 0;
78 	for(; p != P; p = p->link) {
79 		switch(p->as) {
80 		case ADATA:
81 		case AGLOBL:
82 		case ANAME:
83 		case ASIGNAME:
84 			continue;
85 		}
86 		r = rega();
87 		if(firstr == R) {
88 			firstr = r;
89 			lastr = r;
90 		} else {
91 			lastr->link = r;
92 			r->p1 = lastr;
93 			lastr->s1 = r;
94 			lastr = r;
95 		}
96 		r->prog = p;
97 		r->pc = val;
98 		val++;
99 
100 		lp = log5;
101 		for(i=0; i<5; i++) {
102 			lp->c--;
103 			if(lp->c <= 0) {
104 				lp->c = lp->m;
105 				if(lp->p != R)
106 					lp->p->log5 = r;
107 				lp->p = r;
108 				(lp+1)->c = 0;
109 				break;
110 			}
111 			lp++;
112 		}
113 
114 		r1 = r->p1;
115 		if(r1 != R)
116 		switch(r1->prog->as) {
117 		case ARET:
118 		case AJMP:
119 		case ARFE:
120 			r->p1 = R;
121 			r1->s1 = R;
122 		}
123 
124 		/*
125 		 * left side always read
126 		 */
127 		bit = mkvar(&p->from, p->as==AMOVW);
128 		for(z=0; z<BITS; z++)
129 			r->use1.b[z] |= bit.b[z];
130 
131 		/*
132 		 * right side depends on opcode
133 		 */
134 		bit = mkvar(&p->to, 0);
135 		if(bany(&bit))
136 		switch(p->as) {
137 		default:
138 			diag(Z, "reg: unknown asop: %A", p->as);
139 			break;
140 
141 		/*
142 		 * right side write
143 		 */
144 		case ANOP:
145 		case AMOVB:
146 		case AMOVBU:
147 		case AMOVH:
148 		case AMOVHU:
149 		case AMOVW:
150 		case AMOVF:
151 		case AMOVD:
152 			for(z=0; z<BITS; z++)
153 				r->set.b[z] |= bit.b[z];
154 			break;
155 
156 		/*
157 		 * funny
158 		 */
159 		case AJAL:
160 			for(z=0; z<BITS; z++)
161 				addrs.b[z] |= bit.b[z];
162 			break;
163 		}
164 	}
165 	if(firstr == R)
166 		return;
167 	initpc = pc - val;
168 	npc = val;
169 
170 	/*
171 	 * pass 2
172 	 * turn branch references to pointers
173 	 * build back pointers
174 	 */
175 	for(r = firstr; r != R; r = r->link) {
176 		p = r->prog;
177 		if(p->to.type == D_BRANCH) {
178 			val = p->to.offset - initpc;
179 			r1 = firstr;
180 			while(r1 != R) {
181 				r2 = r1->log5;
182 				if(r2 != R && val >= r2->pc) {
183 					r1 = r2;
184 					continue;
185 				}
186 				if(r1->pc == val)
187 					break;
188 				r1 = r1->link;
189 			}
190 			if(r1 == R) {
191 				nearln = p->lineno;
192 				diag(Z, "ref not found\n%P", p);
193 				continue;
194 			}
195 			if(r1 == r) {
196 				nearln = p->lineno;
197 				diag(Z, "ref to self\n%P", p);
198 				continue;
199 			}
200 			r->s2 = r1;
201 			r->p2link = r1->p2;
202 			r1->p2 = r;
203 		}
204 	}
205 	if(debug['R']) {
206 		p = firstr->prog;
207 		print("\n%L %D\n", p->lineno, &p->from);
208 	}
209 
210 	/*
211 	 * pass 2.5
212 	 * find looping structure
213 	 */
214 	for(r = firstr; r != R; r = r->link)
215 		r->active = 0;
216 	change = 0;
217 	loopit(firstr, npc);
218 
219 	/*
220 	 * pass 3
221 	 * iterate propagating usage
222 	 * 	back until flow graph is complete
223 	 */
224 loop1:
225 	change = 0;
226 	for(r = firstr; r != R; r = r->link)
227 		r->active = 0;
228 	for(r = firstr; r != R; r = r->link)
229 		if(r->prog->as == ARET)
230 			prop(r, zbits, zbits);
231 loop11:
232 	/* pick up unreachable code */
233 	i = 0;
234 	for(r = firstr; r != R; r = r1) {
235 		r1 = r->link;
236 		if(r1 && r1->active && !r->active) {
237 			prop(r, zbits, zbits);
238 			i = 1;
239 		}
240 	}
241 	if(i)
242 		goto loop11;
243 	if(change)
244 		goto loop1;
245 
246 
247 	/*
248 	 * pass 4
249 	 * iterate propagating register/variable synchrony
250 	 * 	forward until graph is complete
251 	 */
252 loop2:
253 	change = 0;
254 	for(r = firstr; r != R; r = r->link)
255 		r->active = 0;
256 	synch(firstr, zbits);
257 	if(change)
258 		goto loop2;
259 
260 	addsplits();
261 
262 	if(debug['R'] && debug['v']) {
263 		print("\nprop structure:\n");
264 		for(r = firstr; r != R; r = r->link) {
265 			print("%ld:%P", r->loop, r->prog);
266 			for(z=0; z<BITS; z++)
267 				bit.b[z] = r->set.b[z] |
268 					r->refahead.b[z] | r->calahead.b[z] |
269 					r->refbehind.b[z] | r->calbehind.b[z] |
270 					r->use1.b[z] | r->use2.b[z];
271 			if(bany(&bit)) {
272 				print("\t");
273 				if(bany(&r->use1))
274 					print(" u1=%B", r->use1);
275 				if(bany(&r->use2))
276 					print(" u2=%B", r->use2);
277 				if(bany(&r->set))
278 					print(" st=%B", r->set);
279 				if(bany(&r->refahead))
280 					print(" ra=%B", r->refahead);
281 				if(bany(&r->calahead))
282 					print(" ca=%B", r->calahead);
283 				if(bany(&r->refbehind))
284 					print(" rb=%B", r->refbehind);
285 				if(bany(&r->calbehind))
286 					print(" cb=%B", r->calbehind);
287 				if(bany(&r->regdiff))
288 					print(" rd=%B", r->regdiff);
289 			}
290 			print("\n");
291 		}
292 	}
293 
294 	/*
295 	 * pass 5
296 	 * isolate regions
297 	 * calculate costs (paint1)
298 	 */
299 	r = firstr;
300 	if(r) {
301 		for(z=0; z<BITS; z++)
302 			bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
303 			  ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
304 		if(bany(&bit)) {
305 			nearln = r->prog->lineno;
306 			warn(Z, "used and not set: %B", bit);
307 			if(debug['R'] && !debug['w'])
308 				print("used and not set: %B\n", bit);
309 		}
310 	}
311 
312 	for(r = firstr; r != R; r = r->link)
313 		r->act = zbits;
314 	rgp = region;
315 	nregion = 0;
316 	for(r = firstr; r != R; r = r->link) {
317 		for(z=0; z<BITS; z++)
318 			bit.b[z] = r->set.b[z] &
319 			  ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
320 		if(bany(&bit)) {
321 			nearln = r->prog->lineno;
322 			warn(Z, "set and not used: %B", bit);
323 			if(debug['R'])
324 				print("set and not used: %B\n", bit);
325 			excise(r);
326 		}
327 		for(z=0; z<BITS; z++)
328 			bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
329 		while(bany(&bit)) {
330 			i = bnum(bit);
331 			rgp->enter = r;
332 			rgp->varno = i;
333 			change = 0;
334 			if(debug['R'] && debug['v'])
335 				print("\n");
336 			paint1(r, i);
337 			bit.b[i/32] &= ~(1L<<(i%32));
338 			if(change <= 0) {
339 				if(debug['R'])
340 					print("%L $%d: %B\n",
341 						r->prog->lineno, change, blsh(i));
342 				continue;
343 			}
344 			rgp->cost = change;
345 			nregion++;
346 			if(nregion >= NRGN) {
347 				warn(Z, "too many regions");
348 				goto brk;
349 			}
350 			rgp++;
351 		}
352 	}
353 brk:
354 	qsort(region, nregion, sizeof(region[0]), rcmp);
355 
356 	/*
357 	 * pass 6
358 	 * determine used registers (paint2)
359 	 * replace code (paint3)
360 	 */
361 	rgp = region;
362 	for(i=0; i<nregion; i++) {
363 		bit = blsh(rgp->varno);
364 		vreg = paint2(rgp->enter, rgp->varno);
365 		vreg = allreg(vreg, rgp);
366 		if(debug['R']) {
367 			if(rgp->regno >= NREG)
368 				print("%L $%d F%d: %B\n",
369 					rgp->enter->prog->lineno,
370 					rgp->cost,
371 					rgp->regno-NREG,
372 					bit);
373 			else
374 				print("%L $%d R%d: %B\n",
375 					rgp->enter->prog->lineno,
376 					rgp->cost,
377 					rgp->regno,
378 					bit);
379 		}
380 		if(rgp->regno != 0)
381 			paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
382 		rgp++;
383 	}
384 	/*
385 	 * pass 7
386 	 * peep-hole on basic block
387 	 */
388 	if(!debug['R'] || debug['P'])
389 		peep();
390 
391 	/*
392 	 * pass 8
393 	 * recalculate pc
394 	 */
395 	val = initpc;
396 	for(r = firstr; r != R; r = r1) {
397 		r->pc = val;
398 		p = r->prog;
399 		p1 = P;
400 		r1 = r->link;
401 		if(r1 != R)
402 			p1 = r1->prog;
403 		for(; p != p1; p = p->link) {
404 			switch(p->as) {
405 			default:
406 				val++;
407 				break;
408 
409 			case ANOP:
410 			case ADATA:
411 			case AGLOBL:
412 			case ANAME:
413 			case ASIGNAME:
414 				break;
415 			}
416 		}
417 	}
418 	pc = val;
419 
420 	/*
421 	 * fix up branches
422 	 */
423 	if(debug['R'])
424 		if(bany(&addrs))
425 			print("addrs: %B\n", addrs);
426 
427 	r1 = 0; /* set */
428 	for(r = firstr; r != R; r = r->link) {
429 		p = r->prog;
430 		if(p->to.type == D_BRANCH)
431 			p->to.offset = r->s2->pc;
432 		r1 = r;
433 	}
434 
435 	/*
436 	 * last pass
437 	 * eliminate nops
438 	 * free aux structures
439 	 */
440 	for(p = firstr->prog; p != P; p = p->link){
441 		while(p->link && p->link->as == ANOP)
442 			p->link = p->link->link;
443 	}
444 	if(r1 != R) {
445 		r1->link = freer;
446 		freer = firstr;
447 	}
448 }
449 
450 void
451 addsplits(void)
452 {
453 	Reg *r, *r1;
454 	int z, i;
455 	Bits bit;
456 
457 	for(r = firstr; r != R; r = r->link) {
458 		if(r->loop > 1)
459 			continue;
460 		if(r->prog->as == AJAL)
461 			continue;
462 		for(r1 = r->p2; r1 != R; r1 = r1->p2link) {
463 			if(r1->loop <= 1)
464 				continue;
465 			for(z=0; z<BITS; z++)
466 				bit.b[z] = r1->calbehind.b[z] &
467 					(r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
468 					~(r->calahead.b[z] & addrs.b[z]);
469 			while(bany(&bit)) {
470 				i = bnum(bit);
471 				bit.b[i/32] &= ~(1L << (i%32));
472 			}
473 		}
474 	}
475 }
476 
477 /*
478  * add mov b,rn
479  * just after r
480  */
481 void
482 addmove(Reg *r, int bn, int rn, int f)
483 {
484 	Prog *p, *p1;
485 	Adr *a;
486 	Var *v;
487 
488 	p1 = alloc(sizeof(*p1));
489 	*p1 = zprog;
490 	p = r->prog;
491 
492 	p1->link = p->link;
493 	p->link = p1;
494 	p1->lineno = p->lineno;
495 
496 	v = var + bn;
497 
498 	a = &p1->to;
499 	a->sym = v->sym;
500 	a->name = v->name;
501 	a->offset = v->offset;
502 	a->etype = v->etype;
503 	a->type = D_OREG;
504 	if(a->etype == TARRAY || a->sym == S)
505 		a->type = D_CONST;
506 
507 	p1->as = AMOVW;
508 	if(v->etype == TCHAR || v->etype == TUCHAR)
509 		p1->as = AMOVB;
510 	if(v->etype == TSHORT || v->etype == TUSHORT)
511 		p1->as = AMOVH;
512 	if(v->etype == TFLOAT)
513 		p1->as = AMOVF;
514 	if(v->etype == TDOUBLE)
515 		p1->as = AMOVD;
516 
517 	p1->from.type = D_REG;
518 	p1->from.reg = rn;
519 	if(rn >= NREG) {
520 		p1->from.type = D_FREG;
521 		p1->from.reg = rn-NREG;
522 	}
523 	if(!f) {
524 		p1->from = *a;
525 		*a = zprog.from;
526 		a->type = D_REG;
527 		a->reg = rn;
528 		if(rn >= NREG) {
529 			a->type = D_FREG;
530 			a->reg = rn-NREG;
531 		}
532 		if(v->etype == TUCHAR)
533 			p1->as = AMOVBU;
534 		if(v->etype == TUSHORT)
535 			p1->as = AMOVHU;
536 	}
537 	if(debug['R'])
538 		print("%P\t.a%P\n", p, p1);
539 }
540 
541 Bits
542 mkvar(Adr *a, int docon)
543 {
544 	Var *v;
545 	int i, t, n, et, z;
546 	long o;
547 	Bits bit;
548 	Sym *s;
549 
550 	t = a->type;
551 	if(t == D_REG && a->reg != NREG)
552 		regbits |= RtoB(a->reg);
553 	if(t == D_FREG && a->reg != NREG)
554 		regbits |= FtoB(a->reg);
555 	s = a->sym;
556 	o = a->offset;
557 	et = a->etype;
558 	if(s == S) {
559 		if(t != D_CONST || !docon || a->reg != NREG)
560 			goto none;
561 		et = TLONG;
562 	}
563 	if(t == D_CONST) {
564 		if(s == S && sval(o))
565 			goto none;
566 	}
567 
568 	n = a->name;
569 	v = var;
570 	for(i=0; i<nvar; i++) {
571 		if(s == v->sym)
572 		if(n == v->name)
573 		if(o == v->offset)
574 			goto out;
575 		v++;
576 	}
577 	if(s)
578 		if(s->name[0] == '.')
579 			goto none;
580 	if(nvar >= NVAR) {
581 		if(debug['w'] > 1 && s)
582 			warn(Z, "variable not optimized: %s", s->name);
583 		goto none;
584 	}
585 	i = nvar;
586 	nvar++;
587 	v = &var[i];
588 	v->sym = s;
589 	v->offset = o;
590 	v->etype = et;
591 	v->name = n;
592 	if(debug['R'])
593 		print("bit=%2d et=%2d %D\n", i, et, a);
594 out:
595 	bit = blsh(i);
596 	if(n == D_EXTERN || n == D_STATIC)
597 		for(z=0; z<BITS; z++)
598 			externs.b[z] |= bit.b[z];
599 	if(n == D_PARAM)
600 		for(z=0; z<BITS; z++)
601 			params.b[z] |= bit.b[z];
602 	if(v->etype != et || !typechlpfd[et])	/* funny punning */
603 		for(z=0; z<BITS; z++)
604 			addrs.b[z] |= bit.b[z];
605 	if(t == D_CONST) {
606 		if(s == S) {
607 			for(z=0; z<BITS; z++)
608 				consts.b[z] |= bit.b[z];
609 			return bit;
610 		}
611 		if(et != TARRAY)
612 			for(z=0; z<BITS; z++)
613 				addrs.b[z] |= bit.b[z];
614 		for(z=0; z<BITS; z++)
615 			params.b[z] |= bit.b[z];
616 		return bit;
617 	}
618 	if(t == D_OREG)
619 		return bit;
620 
621 none:
622 	return zbits;
623 }
624 
625 void
626 prop(Reg *r, Bits ref, Bits cal)
627 {
628 	Reg *r1, *r2;
629 	int z;
630 
631 	for(r1 = r; r1 != R; r1 = r1->p1) {
632 		for(z=0; z<BITS; z++) {
633 			ref.b[z] |= r1->refahead.b[z];
634 			if(ref.b[z] != r1->refahead.b[z]) {
635 				r1->refahead.b[z] = ref.b[z];
636 				change++;
637 			}
638 			cal.b[z] |= r1->calahead.b[z];
639 			if(cal.b[z] != r1->calahead.b[z]) {
640 				r1->calahead.b[z] = cal.b[z];
641 				change++;
642 			}
643 		}
644 		switch(r1->prog->as) {
645 		case AJAL:
646 			for(z=0; z<BITS; z++) {
647 				cal.b[z] |= ref.b[z] | externs.b[z];
648 				ref.b[z] = 0;
649 			}
650 			break;
651 
652 		case ATEXT:
653 			for(z=0; z<BITS; z++) {
654 				cal.b[z] = 0;
655 				ref.b[z] = 0;
656 			}
657 			break;
658 
659 		case ARET:
660 			for(z=0; z<BITS; z++) {
661 				cal.b[z] = externs.b[z];
662 				ref.b[z] = 0;
663 			}
664 		}
665 		for(z=0; z<BITS; z++) {
666 			ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
667 				r1->use1.b[z] | r1->use2.b[z];
668 			cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
669 			r1->refbehind.b[z] = ref.b[z];
670 			r1->calbehind.b[z] = cal.b[z];
671 		}
672 		if(r1->active)
673 			break;
674 		r1->active = 1;
675 	}
676 	for(; r != r1; r = r->p1)
677 		for(r2 = r->p2; r2 != R; r2 = r2->p2link)
678 			prop(r2, r->refbehind, r->calbehind);
679 }
680 
681 /*
682  * find looping structure
683  *
684  * 1) find reverse postordering
685  * 2) find approximate dominators,
686  *	the actual dominators if the flow graph is reducible
687  *	otherwise, dominators plus some other non-dominators.
688  *	See Matthew S. Hecht and Jeffrey D. Ullman,
689  *	"Analysis of a Simple Algorithm for Global Data Flow Problems",
690  *	Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
691  *	Oct. 1-3, 1973, pp.  207-217.
692  * 3) find all nodes with a predecessor dominated by the current node.
693  *	such a node is a loop head.
694  *	recursively, all preds with a greater rpo number are in the loop
695  */
696 long
697 postorder(Reg *r, Reg **rpo2r, long n)
698 {
699 	Reg *r1;
700 
701 	r->rpo = 1;
702 	r1 = r->s1;
703 	if(r1 && !r1->rpo)
704 		n = postorder(r1, rpo2r, n);
705 	r1 = r->s2;
706 	if(r1 && !r1->rpo)
707 		n = postorder(r1, rpo2r, n);
708 	rpo2r[n] = r;
709 	n++;
710 	return n;
711 }
712 
713 long
714 rpolca(long *idom, long rpo1, long rpo2)
715 {
716 	long t;
717 
718 	if(rpo1 == -1)
719 		return rpo2;
720 	while(rpo1 != rpo2){
721 		if(rpo1 > rpo2){
722 			t = rpo2;
723 			rpo2 = rpo1;
724 			rpo1 = t;
725 		}
726 		while(rpo1 < rpo2){
727 			t = idom[rpo2];
728 			if(t >= rpo2)
729 				fatal(Z, "bad idom");
730 			rpo2 = t;
731 		}
732 	}
733 	return rpo1;
734 }
735 
736 int
737 doms(long *idom, long r, long s)
738 {
739 	while(s > r)
740 		s = idom[s];
741 	return s == r;
742 }
743 
744 int
745 loophead(long *idom, Reg *r)
746 {
747 	long src;
748 
749 	src = r->rpo;
750 	if(r->p1 != R && doms(idom, src, r->p1->rpo))
751 		return 1;
752 	for(r = r->p2; r != R; r = r->p2link)
753 		if(doms(idom, src, r->rpo))
754 			return 1;
755 	return 0;
756 }
757 
758 void
759 loopmark(Reg **rpo2r, long head, Reg *r)
760 {
761 	if(r->rpo < head || r->active == head)
762 		return;
763 	r->active = head;
764 	r->loop += LOOP;
765 	if(r->p1 != R)
766 		loopmark(rpo2r, head, r->p1);
767 	for(r = r->p2; r != R; r = r->p2link)
768 		loopmark(rpo2r, head, r);
769 }
770 
771 void
772 loopit(Reg *r, long nr)
773 {
774 	Reg *r1;
775 	long i, d, me;
776 
777 	if(nr > maxnr) {
778 		rpo2r = alloc(nr * sizeof(Reg*));
779 		idom = alloc(nr * sizeof(long));
780 		maxnr = nr;
781 	}
782 
783 	d = postorder(r, rpo2r, 0);
784 	if(d > nr)
785 		fatal(Z, "too many reg nodes");
786 	nr = d;
787 	for(i = 0; i < nr / 2; i++){
788 		r1 = rpo2r[i];
789 		rpo2r[i] = rpo2r[nr - 1 - i];
790 		rpo2r[nr - 1 - i] = r1;
791 	}
792 	for(i = 0; i < nr; i++)
793 		rpo2r[i]->rpo = i;
794 
795 	idom[0] = 0;
796 	for(i = 0; i < nr; i++){
797 		r1 = rpo2r[i];
798 		me = r1->rpo;
799 		d = -1;
800 		if(r1->p1 != R && r1->p1->rpo < me)
801 			d = r1->p1->rpo;
802 		for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
803 			if(r1->rpo < me)
804 				d = rpolca(idom, d, r1->rpo);
805 		idom[i] = d;
806 	}
807 
808 	for(i = 0; i < nr; i++){
809 		r1 = rpo2r[i];
810 		r1->loop++;
811 		if(r1->p2 != R && loophead(idom, r1))
812 			loopmark(rpo2r, i, r1);
813 	}
814 }
815 
816 void
817 synch(Reg *r, Bits dif)
818 {
819 	Reg *r1;
820 	int z;
821 
822 	for(r1 = r; r1 != R; r1 = r1->s1) {
823 		for(z=0; z<BITS; z++) {
824 			dif.b[z] = (dif.b[z] &
825 				~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
826 					r1->set.b[z] | r1->regdiff.b[z];
827 			if(dif.b[z] != r1->regdiff.b[z]) {
828 				r1->regdiff.b[z] = dif.b[z];
829 				change++;
830 			}
831 		}
832 		if(r1->active)
833 			break;
834 		r1->active = 1;
835 		for(z=0; z<BITS; z++)
836 			dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
837 		if(r1->s2 != R)
838 			synch(r1->s2, dif);
839 	}
840 }
841 
842 ulong
843 allreg(ulong b, Rgn *r)
844 {
845 	Var *v;
846 	int i;
847 
848 	v = var + r->varno;
849 	r->regno = 0;
850 	switch(v->etype) {
851 
852 	default:
853 		diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
854 		break;
855 
856 	case TCHAR:
857 	case TUCHAR:
858 	case TSHORT:
859 	case TUSHORT:
860 	case TINT:
861 	case TUINT:
862 	case TLONG:
863 	case TULONG:
864 	case TIND:
865 	case TARRAY:
866 		i = BtoR(~b);
867 		if(i && r->cost >= 0) {
868 			r->regno = i;
869 			return RtoB(i);
870 		}
871 		break;
872 
873 	case TDOUBLE:
874 	case TFLOAT:
875 		i = BtoF(~b);
876 		if(i && r->cost >= 0) {
877 			r->regno = i+NREG;
878 			return FtoB(i);
879 		}
880 		break;
881 	}
882 	return 0;
883 }
884 
885 void
886 paint1(Reg *r, int bn)
887 {
888 	Reg *r1;
889 	Prog *p;
890 	int z;
891 	ulong bb;
892 
893 	z = bn/32;
894 	bb = 1L<<(bn%32);
895 	if(r->act.b[z] & bb)
896 		return;
897 	for(;;) {
898 		if(!(r->refbehind.b[z] & bb))
899 			break;
900 		r1 = r->p1;
901 		if(r1 == R)
902 			break;
903 		if(!(r1->refahead.b[z] & bb))
904 			break;
905 		if(r1->act.b[z] & bb)
906 			break;
907 		r = r1;
908 	}
909 
910 	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
911 		change -= CLOAD * r->loop;
912 		if(debug['R'] && debug['v'])
913 			print("%ld%P\tld %B $%d\n", r->loop,
914 				r->prog, blsh(bn), change);
915 	}
916 	for(;;) {
917 		r->act.b[z] |= bb;
918 		p = r->prog;
919 
920 		if(r->use1.b[z] & bb) {
921 			change += CREF * r->loop;
922 			if(debug['R'] && debug['v'])
923 				print("%ld%P\tu1 %B $%d\n", r->loop,
924 					p, blsh(bn), change);
925 		}
926 
927 		if((r->use2.b[z]|r->set.b[z]) & bb) {
928 			change += CREF * r->loop;
929 			if(debug['R'] && debug['v'])
930 				print("%ld%P\tu2 %B $%d\n", r->loop,
931 					p, blsh(bn), change);
932 		}
933 
934 		if(STORE(r) & r->regdiff.b[z] & bb) {
935 			change -= CLOAD * r->loop;
936 			if(debug['R'] && debug['v'])
937 				print("%ld%P\tst %B $%d\n", r->loop,
938 					p, blsh(bn), change);
939 		}
940 
941 		if(r->refbehind.b[z] & bb)
942 			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
943 				if(r1->refahead.b[z] & bb)
944 					paint1(r1, bn);
945 
946 		if(!(r->refahead.b[z] & bb))
947 			break;
948 		r1 = r->s2;
949 		if(r1 != R)
950 			if(r1->refbehind.b[z] & bb)
951 				paint1(r1, bn);
952 		r = r->s1;
953 		if(r == R)
954 			break;
955 		if(r->act.b[z] & bb)
956 			break;
957 		if(!(r->refbehind.b[z] & bb))
958 			break;
959 	}
960 }
961 
962 ulong
963 paint2(Reg *r, int bn)
964 {
965 	Reg *r1;
966 	int z;
967 	ulong bb, vreg;
968 
969 	z = bn/32;
970 	bb = 1L << (bn%32);
971 	vreg = regbits;
972 	if(!(r->act.b[z] & bb))
973 		return vreg;
974 	for(;;) {
975 		if(!(r->refbehind.b[z] & bb))
976 			break;
977 		r1 = r->p1;
978 		if(r1 == R)
979 			break;
980 		if(!(r1->refahead.b[z] & bb))
981 			break;
982 		if(!(r1->act.b[z] & bb))
983 			break;
984 		r = r1;
985 	}
986 	for(;;) {
987 		r->act.b[z] &= ~bb;
988 
989 		vreg |= r->regu;
990 
991 		if(r->refbehind.b[z] & bb)
992 			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
993 				if(r1->refahead.b[z] & bb)
994 					vreg |= paint2(r1, bn);
995 
996 		if(!(r->refahead.b[z] & bb))
997 			break;
998 		r1 = r->s2;
999 		if(r1 != R)
1000 			if(r1->refbehind.b[z] & bb)
1001 				vreg |= paint2(r1, bn);
1002 		r = r->s1;
1003 		if(r == R)
1004 			break;
1005 		if(!(r->act.b[z] & bb))
1006 			break;
1007 		if(!(r->refbehind.b[z] & bb))
1008 			break;
1009 	}
1010 	return vreg;
1011 }
1012 
1013 void
1014 paint3(Reg *r, int bn, long rb, int rn)
1015 {
1016 	Reg *r1;
1017 	Prog *p;
1018 	int z;
1019 	ulong bb;
1020 
1021 	z = bn/32;
1022 	bb = 1L << (bn%32);
1023 	if(r->act.b[z] & bb)
1024 		return;
1025 	for(;;) {
1026 		if(!(r->refbehind.b[z] & bb))
1027 			break;
1028 		r1 = r->p1;
1029 		if(r1 == R)
1030 			break;
1031 		if(!(r1->refahead.b[z] & bb))
1032 			break;
1033 		if(r1->act.b[z] & bb)
1034 			break;
1035 		r = r1;
1036 	}
1037 
1038 	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1039 		addmove(r, bn, rn, 0);
1040 	for(;;) {
1041 		r->act.b[z] |= bb;
1042 		p = r->prog;
1043 
1044 		if(r->use1.b[z] & bb) {
1045 			if(debug['R'])
1046 				print("%P", p);
1047 			addreg(&p->from, rn);
1048 			if(debug['R'])
1049 				print("\t.c%P\n", p);
1050 		}
1051 		if((r->use2.b[z]|r->set.b[z]) & bb) {
1052 			if(debug['R'])
1053 				print("%P", p);
1054 			addreg(&p->to, rn);
1055 			if(debug['R'])
1056 				print("\t.c%P\n", p);
1057 		}
1058 
1059 		if(STORE(r) & r->regdiff.b[z] & bb)
1060 			addmove(r, bn, rn, 1);
1061 		r->regu |= rb;
1062 
1063 		if(r->refbehind.b[z] & bb)
1064 			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1065 				if(r1->refahead.b[z] & bb)
1066 					paint3(r1, bn, rb, rn);
1067 
1068 		if(!(r->refahead.b[z] & bb))
1069 			break;
1070 		r1 = r->s2;
1071 		if(r1 != R)
1072 			if(r1->refbehind.b[z] & bb)
1073 				paint3(r1, bn, rb, rn);
1074 		r = r->s1;
1075 		if(r == R)
1076 			break;
1077 		if(r->act.b[z] & bb)
1078 			break;
1079 		if(!(r->refbehind.b[z] & bb))
1080 			break;
1081 	}
1082 }
1083 
1084 void
1085 addreg(Adr *a, int rn)
1086 {
1087 
1088 	a->sym = 0;
1089 	a->name = D_NONE;
1090 	a->type = D_REG;
1091 	a->reg = rn;
1092 	if(rn >= NREG) {
1093 		a->type = D_FREG;
1094 		a->reg = rn-NREG;
1095 	}
1096 }
1097 
1098 /*
1099  *	bit	reg
1100  *	0	R3
1101  *	1	R4
1102  *	...	...
1103  *	19	R22
1104  *	20	R23
1105  */
1106 long
1107 RtoB(int r)
1108 {
1109 
1110 	if(r < 3 || r > 23)
1111 		return 0;
1112 	return 1L << (r-3);
1113 }
1114 
1115 int
1116 BtoR(long b)
1117 {
1118 
1119 	b &= 0x001fffffL;
1120 	if(b == 0)
1121 		return 0;
1122 	return bitno(b) + 3;
1123 }
1124 
1125 /*
1126  *	bit	reg
1127  *	22	F4
1128  *	23	F6
1129  *	...	...
1130  *	31	F22
1131  */
1132 long
1133 FtoB(int f)
1134 {
1135 
1136 	if(f < 4 || f > 22 || (f&1))
1137 		return 0;
1138 	return 1L << (f/2 + 20);
1139 }
1140 
1141 int
1142 BtoF(long b)
1143 {
1144 
1145 	b &= 0xffc00000L;
1146 	if(b == 0)
1147 		return 0;
1148 	return bitno(b)*2 - 40;
1149 }
1150