xref: /inferno-os/utils/0c/reg.c (revision 50b0dbb170df61467e42c7ea4deb0b5692d15f4c)
1 #include "gc.h"
2 
3 void	addsplits(void);
4 
5 Reg*
6 rega(void)
7 {
8 	Reg *r;
9 
10 	r = freer;
11 	if(r == R) {
12 		r = alloc(sizeof(*r));
13 	} else
14 		freer = r->link;
15 
16 	*r = zreg;
17 	return r;
18 }
19 
20 int
21 rcmp(void *a1, void *a2)
22 {
23 	Rgn *p1, *p2;
24 	int c1, c2;
25 
26 	p1 = (Rgn*)a1;
27 	p2 = (Rgn*)a2;
28 	c1 = p2->cost;
29 	c2 = p1->cost;
30 	if(c1 -= c2)
31 		return c1;
32 	return p2->varno - p1->varno;
33 }
34 
35 void
36 regopt(Prog *p)
37 {
38 	Reg *r, *r1, *r2;
39 	Prog *p1;
40 	int i, z;
41 	long initpc, val, npc;
42 	ulong vreg;
43 	Bits bit;
44 	struct
45 	{
46 		long	m;
47 		long	c;
48 		Reg*	p;
49 	} log5[6], *lp;
50 
51 	firstr = R;
52 	lastr = R;
53 	nvar = 0;
54 	regbits = 0;
55 	for(z=0; z<BITS; z++) {
56 		externs.b[z] = 0;
57 		params.b[z] = 0;
58 		consts.b[z] = 0;
59 		addrs.b[z] = 0;
60 	}
61 
62 	/*
63 	 * pass 1
64 	 * build aux data structure
65 	 * allocate pcs
66 	 * find use and set of variables
67 	 */
68 	val = 5L * 5L * 5L * 5L * 5L;
69 	lp = log5;
70 	for(i=0; i<5; i++) {
71 		lp->m = val;
72 		lp->c = 0;
73 		lp->p = R;
74 		val /= 5L;
75 		lp++;
76 	}
77 	val = 0;
78 	for(; p != P; p = p->link) {
79 		switch(p->as) {
80 		case ADATA:
81 		case AGLOBL:
82 		case ANAME:
83 		case ASIGNAME:
84 			continue;
85 		}
86 		r = rega();
87 		if(firstr == R) {
88 			firstr = r;
89 			lastr = r;
90 		} else {
91 			lastr->link = r;
92 			r->p1 = lastr;
93 			lastr->s1 = r;
94 			lastr = r;
95 		}
96 		r->prog = p;
97 		r->pc = val;
98 		val++;
99 
100 		lp = log5;
101 		for(i=0; i<5; i++) {
102 			lp->c--;
103 			if(lp->c <= 0) {
104 				lp->c = lp->m;
105 				if(lp->p != R)
106 					lp->p->log5 = r;
107 				lp->p = r;
108 				(lp+1)->c = 0;
109 				break;
110 			}
111 			lp++;
112 		}
113 
114 		r1 = r->p1;
115 		if(r1 != R)
116 		switch(r1->prog->as) {
117 		case ARET:
118 		case AJMP:
119 		case ARFE:
120 			r->p1 = R;
121 			r1->s1 = R;
122 		}
123 
124 		/*
125 		 * left side always read
126 		 */
127 		bit = mkvar(&p->from, p->as==AMOVW);
128 		for(z=0; z<BITS; z++)
129 			r->use1.b[z] |= bit.b[z];
130 
131 		/*
132 		 * right side depends on opcode
133 		 */
134 		bit = mkvar(&p->to, 0);
135 		if(bany(&bit))
136 		switch(p->as) {
137 		default:
138 			diag(Z, "reg: unknown asop: %A", p->as);
139 			break;
140 
141 		/*
142 		 * right side write
143 		 */
144 		case ANOP:
145 		case AMOVB:
146 		case AMOVBU:
147 		case AMOVH:
148 		case AMOVHU:
149 		case AMOVW:
150 		case AMOVV:
151 		case AMOVF:
152 		case AMOVD:
153 			for(z=0; z<BITS; z++)
154 				r->set.b[z] |= bit.b[z];
155 			break;
156 
157 		/*
158 		 * funny
159 		 */
160 		case AJAL:
161 			for(z=0; z<BITS; z++)
162 				addrs.b[z] |= bit.b[z];
163 			break;
164 		}
165 	}
166 	if(firstr == R)
167 		return;
168 	initpc = pc - val;
169 	npc = val;
170 
171 	/*
172 	 * pass 2
173 	 * turn branch references to pointers
174 	 * build back pointers
175 	 */
176 	for(r = firstr; r != R; r = r->link) {
177 		p = r->prog;
178 		if(p->to.type == D_BRANCH) {
179 			val = p->to.offset - initpc;
180 			r1 = firstr;
181 			while(r1 != R) {
182 				r2 = r1->log5;
183 				if(r2 != R && val >= r2->pc) {
184 					r1 = r2;
185 					continue;
186 				}
187 				if(r1->pc == val)
188 					break;
189 				r1 = r1->link;
190 			}
191 			if(r1 == R) {
192 				nearln = p->lineno;
193 				diag(Z, "ref not found\n%P", p);
194 				continue;
195 			}
196 			if(r1 == r) {
197 				nearln = p->lineno;
198 				diag(Z, "ref to self\n%P", p);
199 				continue;
200 			}
201 			r->s2 = r1;
202 			r->p2link = r1->p2;
203 			r1->p2 = r;
204 		}
205 	}
206 	if(debug['R']) {
207 		p = firstr->prog;
208 		print("\n%L %D\n", p->lineno, &p->from);
209 	}
210 
211 	/*
212 	 * pass 2.5
213 	 * find looping structure
214 	 */
215 	for(r = firstr; r != R; r = r->link)
216 		r->active = 0;
217 	change = 0;
218 	loopit(firstr, npc);
219 
220 	/*
221 	 * pass 3
222 	 * iterate propagating usage
223 	 * 	back until flow graph is complete
224 	 */
225 loop1:
226 	change = 0;
227 	for(r = firstr; r != R; r = r->link)
228 		r->active = 0;
229 	for(r = firstr; r != R; r = r->link)
230 		if(r->prog->as == ARET)
231 			prop(r, zbits, zbits);
232 loop11:
233 	/* pick up unreachable code */
234 	i = 0;
235 	for(r = firstr; r != R; r = r1) {
236 		r1 = r->link;
237 		if(r1 && r1->active && !r->active) {
238 			prop(r, zbits, zbits);
239 			i = 1;
240 		}
241 	}
242 	if(i)
243 		goto loop11;
244 	if(change)
245 		goto loop1;
246 
247 
248 	/*
249 	 * pass 4
250 	 * iterate propagating register/variable synchrony
251 	 * 	forward until graph is complete
252 	 */
253 loop2:
254 	change = 0;
255 	for(r = firstr; r != R; r = r->link)
256 		r->active = 0;
257 	synch(firstr, zbits);
258 	if(change)
259 		goto loop2;
260 
261 	addsplits();
262 
263 	if(debug['R'] && debug['v']) {
264 		print("\nprop structure:\n");
265 		for(r = firstr; r != R; r = r->link) {
266 			print("%ld:%P", r->loop, r->prog);
267 			for(z=0; z<BITS; z++)
268 				bit.b[z] = r->set.b[z] |
269 					r->refahead.b[z] | r->calahead.b[z] |
270 					r->refbehind.b[z] | r->calbehind.b[z] |
271 					r->use1.b[z] | r->use2.b[z];
272 			if(bany(&bit)) {
273 				print("\t");
274 				if(bany(&r->use1))
275 					print(" u1=%B", r->use1);
276 				if(bany(&r->use2))
277 					print(" u2=%B", r->use2);
278 				if(bany(&r->set))
279 					print(" st=%B", r->set);
280 				if(bany(&r->refahead))
281 					print(" ra=%B", r->refahead);
282 				if(bany(&r->calahead))
283 					print(" ca=%B", r->calahead);
284 				if(bany(&r->refbehind))
285 					print(" rb=%B", r->refbehind);
286 				if(bany(&r->calbehind))
287 					print(" cb=%B", r->calbehind);
288 			}
289 			print("\n");
290 		}
291 	}
292 
293 	/*
294 	 * pass 5
295 	 * isolate regions
296 	 * calculate costs (paint1)
297 	 */
298 	r = firstr;
299 	if(r) {
300 		for(z=0; z<BITS; z++)
301 			bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
302 			  ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
303 		if(bany(&bit)) {
304 			nearln = r->prog->lineno;
305 			warn(Z, "used and not set: %B", bit);
306 			if(debug['R'] && !debug['w'])
307 				print("used and not set: %B\n", bit);
308 		}
309 	}
310 
311 	for(r = firstr; r != R; r = r->link)
312 		r->act = zbits;
313 	rgp = region;
314 	nregion = 0;
315 	for(r = firstr; r != R; r = r->link) {
316 		for(z=0; z<BITS; z++)
317 			bit.b[z] = r->set.b[z] &
318 			  ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
319 		if(bany(&bit)) {
320 			nearln = r->prog->lineno;
321 			warn(Z, "set and not used: %B", bit);
322 			if(debug['R'])
323 				print("set and not used: %B\n", bit);
324 			excise(r);
325 		}
326 		for(z=0; z<BITS; z++)
327 			bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
328 		while(bany(&bit)) {
329 			i = bnum(bit);
330 			rgp->enter = r;
331 			rgp->varno = i;
332 			change = 0;
333 			if(debug['R'] && debug['v'])
334 				print("\n");
335 			paint1(r, i);
336 			bit.b[i/32] &= ~(1L<<(i%32));
337 			if(change <= 0) {
338 				if(debug['R'])
339 					print("%L $%d: %B\n",
340 						r->prog->lineno, change, blsh(i));
341 				continue;
342 			}
343 			rgp->cost = change;
344 			nregion++;
345 			if(nregion >= NRGN) {
346 				warn(Z, "too many regions");
347 				goto brk;
348 			}
349 			rgp++;
350 		}
351 	}
352 brk:
353 	qsort(region, nregion, sizeof(region[0]), rcmp);
354 
355 	/*
356 	 * pass 6
357 	 * determine used registers (paint2)
358 	 * replace code (paint3)
359 	 */
360 	rgp = region;
361 	for(i=0; i<nregion; i++) {
362 		bit = blsh(rgp->varno);
363 		vreg = paint2(rgp->enter, rgp->varno);
364 		vreg = allreg(vreg, rgp);
365 		if(debug['R']) {
366 			if(rgp->regno >= NREG)
367 				print("%L $%d F%d: %B\n",
368 					rgp->enter->prog->lineno,
369 					rgp->cost,
370 					rgp->regno-NREG,
371 					bit);
372 			else
373 				print("%L $%d R%d: %B\n",
374 					rgp->enter->prog->lineno,
375 					rgp->cost,
376 					rgp->regno,
377 					bit);
378 		}
379 		if(rgp->regno != 0)
380 			paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
381 		rgp++;
382 	}
383 	/*
384 	 * pass 7
385 	 * peep-hole on basic block
386 	 */
387 	if(!debug['R'] || debug['P'])
388 		peep();
389 
390 	/*
391 	 * pass 8
392 	 * recalculate pc
393 	 */
394 	val = initpc;
395 	for(r = firstr; r != R; r = r1) {
396 		r->pc = val;
397 		p = r->prog;
398 		p1 = P;
399 		r1 = r->link;
400 		if(r1 != R)
401 			p1 = r1->prog;
402 		for(; p != p1; p = p->link) {
403 			switch(p->as) {
404 			default:
405 				val++;
406 				break;
407 
408 			case ANOP:
409 			case ADATA:
410 			case AGLOBL:
411 			case ANAME:
412 			case ASIGNAME:
413 				break;
414 			}
415 		}
416 	}
417 	pc = val;
418 
419 	/*
420 	 * fix up branches
421 	 */
422 	if(debug['R'])
423 		if(bany(&addrs))
424 			print("addrs: %B\n", addrs);
425 
426 	r1 = 0; /* set */
427 	for(r = firstr; r != R; r = r->link) {
428 		p = r->prog;
429 		if(p->to.type == D_BRANCH)
430 			p->to.offset = r->s2->pc;
431 		r1 = r;
432 	}
433 
434 	/*
435 	 * last pass
436 	 * eliminate nops
437 	 * free aux structures
438 	 */
439 	for(p = firstr->prog; p != P; p = p->link){
440 		while(p->link && p->link->as == ANOP)
441 			p->link = p->link->link;
442 	}
443 	if(r1 != R) {
444 		r1->link = freer;
445 		freer = firstr;
446 	}
447 }
448 
449 void
450 addsplits(void)
451 {
452 	Reg *r, *r1;
453 	int z, i;
454 	Bits bit;
455 
456 	for(r = firstr; r != R; r = r->link) {
457 		if(r->loop > 1)
458 			continue;
459 		if(r->prog->as == AJAL)
460 			continue;
461 		for(r1 = r->p2; r1 != R; r1 = r1->p2link) {
462 			if(r1->loop <= 1)
463 				continue;
464 			for(z=0; z<BITS; z++)
465 				bit.b[z] = r1->calbehind.b[z] &
466 					(r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
467 					~(r->calahead.b[z] & addrs.b[z]);
468 			while(bany(&bit)) {
469 				i = bnum(bit);
470 				bit.b[i/32] &= ~(1L << (i%32));
471 			}
472 		}
473 	}
474 }
475 
476 /*
477  * add mov b,rn
478  * just after r
479  */
480 void
481 addmove(Reg *r, int bn, int rn, int f)
482 {
483 	Prog *p, *p1;
484 	Adr *a;
485 	Var *v;
486 
487 	p1 = alloc(sizeof(*p1));
488 	*p1 = zprog;
489 	p = r->prog;
490 
491 	p1->link = p->link;
492 	p->link = p1;
493 	p1->lineno = p->lineno;
494 
495 	v = var + bn;
496 
497 	a = &p1->to;
498 	a->sym = v->sym;
499 	a->name = v->name;
500 	a->offset = v->offset;
501 	a->etype = v->etype;
502 	a->type = D_OREG;
503 	if(a->etype == TARRAY || a->sym == S)
504 		a->type = D_CONST;
505 
506 	p1->as = AMOVW;
507 	if(v->etype == TCHAR || v->etype == TUCHAR)
508 		p1->as = AMOVB;
509 	if(v->etype == TSHORT || v->etype == TUSHORT)
510 		p1->as = AMOVH;
511 	if(v->etype == TVLONG || v->etype == TUVLONG)
512 		p1->as = AMOVV;
513 	if(v->etype == TFLOAT)
514 		p1->as = AMOVF;
515 	if(v->etype == TDOUBLE)
516 		p1->as = AMOVD;
517 
518 	p1->from.type = D_REG;
519 	p1->from.reg = rn;
520 	if(rn >= NREG) {
521 		p1->from.type = D_FREG;
522 		p1->from.reg = rn-NREG;
523 	}
524 	if(!f) {
525 		p1->from = *a;
526 		*a = zprog.from;
527 		a->type = D_REG;
528 		a->reg = rn;
529 		if(rn >= NREG) {
530 			a->type = D_FREG;
531 			a->reg = rn-NREG;
532 		}
533 		if(v->etype == TUCHAR)
534 			p1->as = AMOVBU;
535 		if(v->etype == TUSHORT)
536 			p1->as = AMOVHU;
537 	}
538 	if(debug['R'])
539 		print("%P\t.a%P\n", p, p1);
540 }
541 
542 Bits
543 mkvar(Adr *a, int docon)
544 {
545 	Var *v;
546 	int i, t, n, et, z;
547 	long o;
548 	Bits bit;
549 	Sym *s;
550 
551 	t = a->type;
552 	if(t == D_REG && a->reg != NREG)
553 		regbits |= RtoB(a->reg);
554 	if(t == D_FREG && a->reg != NREG)
555 		regbits |= FtoB(a->reg);
556 	s = a->sym;
557 	o = a->offset;
558 	et = a->etype;
559 	if(s == S) {
560 		if(t != D_CONST || !docon || a->reg != NREG)
561 			goto none;
562 		et = TLONG;
563 	}
564 	if(t == D_CONST) {
565 		if(s == S && sval(o))
566 			goto none;
567 	}
568 
569 	n = a->name;
570 	v = var;
571 	for(i=0; i<nvar; i++) {
572 		if(s == v->sym)
573 		if(n == v->name)
574 		if(o == v->offset)
575 			goto out;
576 		v++;
577 	}
578 	if(s)
579 		if(s->name[0] == '.')
580 			goto none;
581 	if(nvar >= NVAR) {
582 		if(debug['w'] > 1 && s)
583 			warn(Z, "variable not optimized: %s", s->name);
584 		goto none;
585 	}
586 	i = nvar;
587 	nvar++;
588 	v = &var[i];
589 	v->sym = s;
590 	v->offset = o;
591 	v->etype = et;
592 	v->name = n;
593 	if(debug['R'])
594 		print("bit=%2d et=%2d %D\n", i, et, a);
595 out:
596 	bit = blsh(i);
597 	if(n == D_EXTERN || n == D_STATIC)
598 		for(z=0; z<BITS; z++)
599 			externs.b[z] |= bit.b[z];
600 	if(n == D_PARAM)
601 		for(z=0; z<BITS; z++)
602 			params.b[z] |= bit.b[z];
603 	if(v->etype != et || !(typechlpfd[et]/* || typev[et]*/))	/* funny punning */
604 		for(z=0; z<BITS; z++)
605 			addrs.b[z] |= bit.b[z];
606 	if(t == D_CONST) {
607 		if(s == S) {
608 			for(z=0; z<BITS; z++)
609 				consts.b[z] |= bit.b[z];
610 			return bit;
611 		}
612 		if(et != TARRAY)
613 			for(z=0; z<BITS; z++)
614 				addrs.b[z] |= bit.b[z];
615 		for(z=0; z<BITS; z++)
616 			params.b[z] |= bit.b[z];
617 		return bit;
618 	}
619 	if(t == D_OREG)
620 		return bit;
621 
622 none:
623 	return zbits;
624 }
625 
626 void
627 prop(Reg *r, Bits ref, Bits cal)
628 {
629 	Reg *r1, *r2;
630 	int z;
631 
632 	for(r1 = r; r1 != R; r1 = r1->p1) {
633 		for(z=0; z<BITS; z++) {
634 			ref.b[z] |= r1->refahead.b[z];
635 			if(ref.b[z] != r1->refahead.b[z]) {
636 				r1->refahead.b[z] = ref.b[z];
637 				change++;
638 			}
639 			cal.b[z] |= r1->calahead.b[z];
640 			if(cal.b[z] != r1->calahead.b[z]) {
641 				r1->calahead.b[z] = cal.b[z];
642 				change++;
643 			}
644 		}
645 		switch(r1->prog->as) {
646 		case AJAL:
647 			for(z=0; z<BITS; z++) {
648 				cal.b[z] |= ref.b[z] | externs.b[z];
649 				ref.b[z] = 0;
650 			}
651 			break;
652 
653 		case ATEXT:
654 			for(z=0; z<BITS; z++) {
655 				cal.b[z] = 0;
656 				ref.b[z] = 0;
657 			}
658 			break;
659 
660 		case ARET:
661 			for(z=0; z<BITS; z++) {
662 				cal.b[z] = externs.b[z];
663 				ref.b[z] = 0;
664 			}
665 		}
666 		for(z=0; z<BITS; z++) {
667 			ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
668 				r1->use1.b[z] | r1->use2.b[z];
669 			cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
670 			r1->refbehind.b[z] = ref.b[z];
671 			r1->calbehind.b[z] = cal.b[z];
672 		}
673 		if(r1->active)
674 			break;
675 		r1->active = 1;
676 	}
677 	for(; r != r1; r = r->p1)
678 		for(r2 = r->p2; r2 != R; r2 = r2->p2link)
679 			prop(r2, r->refbehind, r->calbehind);
680 }
681 
682 /*
683  * find looping structure
684  *
685  * 1) find reverse postordering
686  * 2) find approximate dominators,
687  *	the actual dominators if the flow graph is reducible
688  *	otherwise, dominators plus some other non-dominators.
689  *	See Matthew S. Hecht and Jeffrey D. Ullman,
690  *	"Analysis of a Simple Algorithm for Global Data Flow Problems",
691  *	Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
692  *	Oct. 1-3, 1973, pp.  207-217.
693  * 3) find all nodes with a predecessor dominated by the current node.
694  *	such a node is a loop head.
695  *	recursively, all preds with a greater rpo number are in the loop
696  */
697 long
698 postorder(Reg *r, Reg **rpo2r, long n)
699 {
700 	Reg *r1;
701 
702 	r->rpo = 1;
703 	r1 = r->s1;
704 	if(r1 && !r1->rpo)
705 		n = postorder(r1, rpo2r, n);
706 	r1 = r->s2;
707 	if(r1 && !r1->rpo)
708 		n = postorder(r1, rpo2r, n);
709 	rpo2r[n] = r;
710 	n++;
711 	return n;
712 }
713 
714 long
715 rpolca(long *idom, long rpo1, long rpo2)
716 {
717 	long t;
718 
719 	if(rpo1 == -1)
720 		return rpo2;
721 	while(rpo1 != rpo2){
722 		if(rpo1 > rpo2){
723 			t = rpo2;
724 			rpo2 = rpo1;
725 			rpo1 = t;
726 		}
727 		while(rpo1 < rpo2){
728 			t = idom[rpo2];
729 			if(t >= rpo2)
730 				fatal(Z, "bad idom");
731 			rpo2 = t;
732 		}
733 	}
734 	return rpo1;
735 }
736 
737 int
738 doms(long *idom, long r, long s)
739 {
740 	while(s > r)
741 		s = idom[s];
742 	return s == r;
743 }
744 
745 int
746 loophead(long *idom, Reg *r)
747 {
748 	long src;
749 
750 	src = r->rpo;
751 	if(r->p1 != R && doms(idom, src, r->p1->rpo))
752 		return 1;
753 	for(r = r->p2; r != R; r = r->p2link)
754 		if(doms(idom, src, r->rpo))
755 			return 1;
756 	return 0;
757 }
758 
759 void
760 loopmark(Reg **rpo2r, long head, Reg *r)
761 {
762 	if(r->rpo < head || r->active == head)
763 		return;
764 	r->active = head;
765 	r->loop += LOOP;
766 	if(r->p1 != R)
767 		loopmark(rpo2r, head, r->p1);
768 	for(r = r->p2; r != R; r = r->p2link)
769 		loopmark(rpo2r, head, r);
770 }
771 
772 void
773 loopit(Reg *r, long nr)
774 {
775 	Reg *r1;
776 	long i, d, me;
777 
778 	if(nr > maxnr) {
779 		rpo2r = alloc(nr * sizeof(Reg*));
780 		idom = alloc(nr * sizeof(long));
781 		maxnr = nr;
782 	}
783 
784 	d = postorder(r, rpo2r, 0);
785 	if(d > nr)
786 		fatal(Z, "too many reg nodes");
787 	nr = d;
788 	for(i = 0; i < nr / 2; i++){
789 		r1 = rpo2r[i];
790 		rpo2r[i] = rpo2r[nr - 1 - i];
791 		rpo2r[nr - 1 - i] = r1;
792 	}
793 	for(i = 0; i < nr; i++)
794 		rpo2r[i]->rpo = i;
795 
796 	idom[0] = 0;
797 	for(i = 0; i < nr; i++){
798 		r1 = rpo2r[i];
799 		me = r1->rpo;
800 		d = -1;
801 		if(r1->p1 != R && r1->p1->rpo < me)
802 			d = r1->p1->rpo;
803 		for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
804 			if(r1->rpo < me)
805 				d = rpolca(idom, d, r1->rpo);
806 		idom[i] = d;
807 	}
808 
809 	for(i = 0; i < nr; i++){
810 		r1 = rpo2r[i];
811 		r1->loop++;
812 		if(r1->p2 != R && loophead(idom, r1))
813 			loopmark(rpo2r, i, r1);
814 	}
815 }
816 
817 void
818 synch(Reg *r, Bits dif)
819 {
820 	Reg *r1;
821 	int z;
822 
823 	for(r1 = r; r1 != R; r1 = r1->s1) {
824 		for(z=0; z<BITS; z++) {
825 			dif.b[z] = (dif.b[z] &
826 				~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
827 					r1->set.b[z] | r1->regdiff.b[z];
828 			if(dif.b[z] != r1->regdiff.b[z]) {
829 				r1->regdiff.b[z] = dif.b[z];
830 				change++;
831 			}
832 		}
833 		if(r1->active)
834 			break;
835 		r1->active = 1;
836 		for(z=0; z<BITS; z++)
837 			dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
838 		if(r1->s2 != R)
839 			synch(r1->s2, dif);
840 	}
841 }
842 
843 ulong
844 allreg(ulong b, Rgn *r)
845 {
846 	Var *v;
847 	int i;
848 
849 	v = var + r->varno;
850 	r->regno = 0;
851 	switch(v->etype) {
852 
853 	default:
854 		diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
855 		break;
856 
857 	case TCHAR:
858 	case TUCHAR:
859 	case TSHORT:
860 	case TUSHORT:
861 	case TINT:
862 	case TUINT:
863 	case TLONG:
864 	case TULONG:
865 	case TVLONG:
866 	case TUVLONG:
867 	case TIND:
868 	case TARRAY:
869 		i = BtoR(~b);
870 		if(i && r->cost >= 0) {
871 			r->regno = i;
872 			return RtoB(i);
873 		}
874 		break;
875 
876 	case TDOUBLE:
877 	case TFLOAT:
878 		i = BtoF(~b);
879 		if(i && r->cost >= 0) {
880 			r->regno = i+NREG;
881 			return FtoB(i);
882 		}
883 		break;
884 	}
885 	return 0;
886 }
887 
888 void
889 paint1(Reg *r, int bn)
890 {
891 	Reg *r1;
892 	Prog *p;
893 	int z;
894 	ulong bb;
895 
896 	z = bn/32;
897 	bb = 1L<<(bn%32);
898 	if(r->act.b[z] & bb)
899 		return;
900 	for(;;) {
901 		if(!(r->refbehind.b[z] & bb))
902 			break;
903 		r1 = r->p1;
904 		if(r1 == R)
905 			break;
906 		if(!(r1->refahead.b[z] & bb))
907 			break;
908 		if(r1->act.b[z] & bb)
909 			break;
910 		r = r1;
911 	}
912 
913 	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
914 		change -= CLOAD * r->loop;
915 		if(debug['R'] && debug['v'])
916 			print("%ld%P\tld %B $%d\n", r->loop,
917 				r->prog, blsh(bn), change);
918 	}
919 	for(;;) {
920 		r->act.b[z] |= bb;
921 		p = r->prog;
922 
923 		if(r->use1.b[z] & bb) {
924 			change += CREF * r->loop;
925 			if(debug['R'] && debug['v'])
926 				print("%ld%P\tu1 %B $%d\n", r->loop,
927 					p, blsh(bn), change);
928 		}
929 
930 		if((r->use2.b[z]|r->set.b[z]) & bb) {
931 			change += CREF * r->loop;
932 			if(debug['R'] && debug['v'])
933 				print("%ld%P\tu2 %B $%d\n", r->loop,
934 					p, blsh(bn), change);
935 		}
936 
937 		if(STORE(r) & r->regdiff.b[z] & bb) {
938 			change -= CLOAD * r->loop;
939 			if(debug['R'] && debug['v'])
940 				print("%ld%P\tst %B $%d\n", r->loop,
941 					p, blsh(bn), change);
942 		}
943 
944 		if(r->refbehind.b[z] & bb)
945 			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
946 				if(r1->refahead.b[z] & bb)
947 					paint1(r1, bn);
948 
949 		if(!(r->refahead.b[z] & bb))
950 			break;
951 		r1 = r->s2;
952 		if(r1 != R)
953 			if(r1->refbehind.b[z] & bb)
954 				paint1(r1, bn);
955 		r = r->s1;
956 		if(r == R)
957 			break;
958 		if(r->act.b[z] & bb)
959 			break;
960 		if(!(r->refbehind.b[z] & bb))
961 			break;
962 	}
963 }
964 
965 ulong
966 paint2(Reg *r, int bn)
967 {
968 	Reg *r1;
969 	int z;
970 	ulong bb, vreg;
971 
972 	z = bn/32;
973 	bb = 1L << (bn%32);
974 	vreg = regbits;
975 	if(!(r->act.b[z] & bb))
976 		return vreg;
977 	for(;;) {
978 		if(!(r->refbehind.b[z] & bb))
979 			break;
980 		r1 = r->p1;
981 		if(r1 == R)
982 			break;
983 		if(!(r1->refahead.b[z] & bb))
984 			break;
985 		if(!(r1->act.b[z] & bb))
986 			break;
987 		r = r1;
988 	}
989 	for(;;) {
990 		r->act.b[z] &= ~bb;
991 
992 		vreg |= r->regu;
993 
994 		if(r->refbehind.b[z] & bb)
995 			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
996 				if(r1->refahead.b[z] & bb)
997 					vreg |= paint2(r1, bn);
998 
999 		if(!(r->refahead.b[z] & bb))
1000 			break;
1001 		r1 = r->s2;
1002 		if(r1 != R)
1003 			if(r1->refbehind.b[z] & bb)
1004 				vreg |= paint2(r1, bn);
1005 		r = r->s1;
1006 		if(r == R)
1007 			break;
1008 		if(!(r->act.b[z] & bb))
1009 			break;
1010 		if(!(r->refbehind.b[z] & bb))
1011 			break;
1012 	}
1013 	return vreg;
1014 }
1015 
1016 void
1017 paint3(Reg *r, int bn, long rb, int rn)
1018 {
1019 	Reg *r1;
1020 	Prog *p;
1021 	int z;
1022 	ulong bb;
1023 
1024 	z = bn/32;
1025 	bb = 1L << (bn%32);
1026 	if(r->act.b[z] & bb)
1027 		return;
1028 	for(;;) {
1029 		if(!(r->refbehind.b[z] & bb))
1030 			break;
1031 		r1 = r->p1;
1032 		if(r1 == R)
1033 			break;
1034 		if(!(r1->refahead.b[z] & bb))
1035 			break;
1036 		if(r1->act.b[z] & bb)
1037 			break;
1038 		r = r1;
1039 	}
1040 
1041 	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1042 		addmove(r, bn, rn, 0);
1043 	for(;;) {
1044 		r->act.b[z] |= bb;
1045 		p = r->prog;
1046 
1047 		if(r->use1.b[z] & bb) {
1048 			if(debug['R'])
1049 				print("%P", p);
1050 			addreg(&p->from, rn);
1051 			if(debug['R'])
1052 				print("\t.c%P\n", p);
1053 		}
1054 		if((r->use2.b[z]|r->set.b[z]) & bb) {
1055 			if(debug['R'])
1056 				print("%P", p);
1057 			addreg(&p->to, rn);
1058 			if(debug['R'])
1059 				print("\t.c%P\n", p);
1060 		}
1061 
1062 		if(STORE(r) & r->regdiff.b[z] & bb)
1063 			addmove(r, bn, rn, 1);
1064 		r->regu |= rb;
1065 
1066 		if(r->refbehind.b[z] & bb)
1067 			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1068 				if(r1->refahead.b[z] & bb)
1069 					paint3(r1, bn, rb, rn);
1070 
1071 		if(!(r->refahead.b[z] & bb))
1072 			break;
1073 		r1 = r->s2;
1074 		if(r1 != R)
1075 			if(r1->refbehind.b[z] & bb)
1076 				paint3(r1, bn, rb, rn);
1077 		r = r->s1;
1078 		if(r == R)
1079 			break;
1080 		if(r->act.b[z] & bb)
1081 			break;
1082 		if(!(r->refbehind.b[z] & bb))
1083 			break;
1084 	}
1085 }
1086 
1087 void
1088 addreg(Adr *a, int rn)
1089 {
1090 
1091 	a->sym = 0;
1092 	a->name = D_NONE;
1093 	a->type = D_REG;
1094 	a->reg = rn;
1095 	if(rn >= NREG) {
1096 		a->type = D_FREG;
1097 		a->reg = rn-NREG;
1098 	}
1099 }
1100 
1101 /*
1102  *	bit	reg
1103  *	0	R3
1104  *	1	R4
1105  *	...	...
1106  *	19	R22
1107  *	20	R23
1108  */
1109 long
1110 RtoB(int r)
1111 {
1112 
1113 	if(r < 3 || r > 23)
1114 		return 0;
1115 	return 1L << (r-3);
1116 }
1117 
1118 int
1119 BtoR(long b)
1120 {
1121 
1122 	b &= 0x001fffffL;
1123 	if(b == 0)
1124 		return 0;
1125 	return bitno(b) + 3;
1126 }
1127 
1128 /*
1129  *	bit	reg
1130  *	22	F4
1131  *	23	F6
1132  *	...	...
1133  *	31	F22
1134  */
1135 long
1136 FtoB(int f)
1137 {
1138 
1139 	if(f < 4 || f > 22 || (f&1))
1140 		return 0;
1141 	return 1L << (f/2 + 20);
1142 }
1143 
1144 int
1145 BtoF(long b)
1146 {
1147 
1148 	b &= 0xffc00000L;
1149 	if(b == 0)
1150 		return 0;
1151 	return bitno(b)*2 - 40;
1152 }
1153