xref: /plan9-contrib/sys/src/cmd/vc/reg.c (revision d46c239f8612929b7dbade67d0d071633df3a15d)
1 #include "gc.h"
2 
3 void	addsplits(void);
4 
5 Reg*
6 rega(void)
7 {
8 	Reg *r;
9 
10 	r = freer;
11 	if(r == R) {
12 		r = alloc(sizeof(*r));
13 	} else
14 		freer = r->link;
15 
16 	*r = zreg;
17 	return r;
18 }
19 
20 int
21 rcmp(const void *a1, const void *a2)
22 {
23 	Rgn *p1, *p2;
24 	int c1, c2;
25 
26 	p1 = (Rgn*)a1;
27 	p2 = (Rgn*)a2;
28 	c1 = p2->cost;
29 	c2 = p1->cost;
30 	if(c1 -= c2)
31 		return c1;
32 	return p2->varno - p1->varno;
33 }
34 
35 void
36 regopt(Prog *p)
37 {
38 	Reg *r, *r1, *r2;
39 	Prog *p1;
40 	int i, z;
41 	long initpc, val, npc;
42 	ulong vreg;
43 	Bits bit;
44 	struct
45 	{
46 		long	m;
47 		long	c;
48 		Reg*	p;
49 	} log5[6], *lp;
50 
51 	firstr = R;
52 	lastr = R;
53 	nvar = 0;
54 	regbits = 0;
55 	for(z=0; z<BITS; z++) {
56 		externs.b[z] = 0;
57 		params.b[z] = 0;
58 		consts.b[z] = 0;
59 		addrs.b[z] = 0;
60 	}
61 
62 	/*
63 	 * pass 1
64 	 * build aux data structure
65 	 * allocate pcs
66 	 * find use and set of variables
67 	 */
68 	val = 5L * 5L * 5L * 5L * 5L;
69 	lp = log5;
70 	for(i=0; i<5; i++) {
71 		lp->m = val;
72 		lp->c = 0;
73 		lp->p = R;
74 		val /= 5L;
75 		lp++;
76 	}
77 	val = 0;
78 	for(; p != P; p = p->link) {
79 		switch(p->as) {
80 		case ADATA:
81 		case AGLOBL:
82 		case ANAME:
83 			continue;
84 		}
85 		r = rega();
86 		if(firstr == R) {
87 			firstr = r;
88 			lastr = r;
89 		} else {
90 			lastr->link = r;
91 			r->p1 = lastr;
92 			lastr->s1 = r;
93 			lastr = r;
94 		}
95 		r->prog = p;
96 		r->pc = val;
97 		val++;
98 
99 		lp = log5;
100 		for(i=0; i<5; i++) {
101 			lp->c--;
102 			if(lp->c <= 0) {
103 				lp->c = lp->m;
104 				if(lp->p != R)
105 					lp->p->log5 = r;
106 				lp->p = r;
107 				(lp+1)->c = 0;
108 				break;
109 			}
110 			lp++;
111 		}
112 
113 		r1 = r->p1;
114 		if(r1 != R)
115 		switch(r1->prog->as) {
116 		case ARET:
117 		case AJMP:
118 		case ARFE:
119 			r->p1 = R;
120 			r1->s1 = R;
121 		}
122 
123 		/*
124 		 * left side always read
125 		 */
126 		bit = mkvar(&p->from, p->as==AMOVW);
127 		for(z=0; z<BITS; z++)
128 			r->use1.b[z] |= bit.b[z];
129 
130 		/*
131 		 * right side depends on opcode
132 		 */
133 		bit = mkvar(&p->to, 0);
134 		if(bany(&bit))
135 		switch(p->as) {
136 		default:
137 			diag(Z, "reg: unknown asop: %A", p->as);
138 			break;
139 
140 		/*
141 		 * right side write
142 		 */
143 		case ANOP:
144 		case AMOVB:
145 		case AMOVBU:
146 		case AMOVH:
147 		case AMOVHU:
148 		case AMOVW:
149 		case AMOVF:
150 		case AMOVD:
151 			for(z=0; z<BITS; z++)
152 				r->set.b[z] |= bit.b[z];
153 			break;
154 
155 		/*
156 		 * funny
157 		 */
158 		case AJAL:
159 			for(z=0; z<BITS; z++)
160 				addrs.b[z] |= bit.b[z];
161 			break;
162 		}
163 	}
164 	if(firstr == R)
165 		return;
166 	initpc = pc - val;
167 	npc = val;
168 
169 	/*
170 	 * pass 2
171 	 * turn branch references to pointers
172 	 * build back pointers
173 	 */
174 	for(r = firstr; r != R; r = r->link) {
175 		p = r->prog;
176 		if(p->to.type == D_BRANCH) {
177 			val = p->to.offset - initpc;
178 			r1 = firstr;
179 			while(r1 != R) {
180 				r2 = r1->log5;
181 				if(r2 != R && val >= r2->pc) {
182 					r1 = r2;
183 					continue;
184 				}
185 				if(r1->pc == val)
186 					break;
187 				r1 = r1->link;
188 			}
189 			if(r1 == R) {
190 				nearln = p->lineno;
191 				diag(Z, "ref not found\n%P", p);
192 				continue;
193 			}
194 			if(r1 == r) {
195 				nearln = p->lineno;
196 				diag(Z, "ref to self\n%P", p);
197 				continue;
198 			}
199 			r->s2 = r1;
200 			r->p2link = r1->p2;
201 			r1->p2 = r;
202 		}
203 	}
204 	if(debug['R']) {
205 		p = firstr->prog;
206 		print("\n%L %D\n", p->lineno, &p->from);
207 	}
208 
209 	/*
210 	 * pass 2.5
211 	 * find looping structure
212 	 */
213 	for(r = firstr; r != R; r = r->link)
214 		r->active = 0;
215 	change = 0;
216 	loopit(firstr, npc);
217 
218 	/*
219 	 * pass 3
220 	 * iterate propagating usage
221 	 * 	back until flow graph is complete
222 	 */
223 loop1:
224 	change = 0;
225 	for(r = firstr; r != R; r = r->link)
226 		r->active = 0;
227 	for(r = firstr; r != R; r = r->link)
228 		if(r->prog->as == ARET)
229 			prop(r, zbits, zbits);
230 loop11:
231 	/* pick up unreachable code */
232 	i = 0;
233 	for(r = firstr; r != R; r = r1) {
234 		r1 = r->link;
235 		if(r1 && r1->active && !r->active) {
236 			prop(r, zbits, zbits);
237 			i = 1;
238 		}
239 	}
240 	if(i)
241 		goto loop11;
242 	if(change)
243 		goto loop1;
244 
245 
246 	/*
247 	 * pass 4
248 	 * iterate propagating register/variable synchrony
249 	 * 	forward until graph is complete
250 	 */
251 loop2:
252 	change = 0;
253 	for(r = firstr; r != R; r = r->link)
254 		r->active = 0;
255 	synch(firstr, zbits);
256 	if(change)
257 		goto loop2;
258 
259 	addsplits();
260 
261 	if(debug['R'] && debug['v']) {
262 		print("\nprop structure:\n");
263 		for(r = firstr; r != R; r = r->link) {
264 			print("%ld:%P", r->loop, r->prog);
265 			for(z=0; z<BITS; z++)
266 				bit.b[z] = r->set.b[z] |
267 					r->refahead.b[z] | r->calahead.b[z] |
268 					r->refbehind.b[z] | r->calbehind.b[z] |
269 					r->use1.b[z] | r->use2.b[z];
270 			if(bany(&bit)) {
271 				print("\t");
272 				if(bany(&r->use1))
273 					print(" u1=%B", r->use1);
274 				if(bany(&r->use2))
275 					print(" u2=%B", r->use2);
276 				if(bany(&r->set))
277 					print(" st=%B", r->set);
278 				if(bany(&r->refahead))
279 					print(" ra=%B", r->refahead);
280 				if(bany(&r->calahead))
281 					print(" ca=%B", r->calahead);
282 				if(bany(&r->refbehind))
283 					print(" rb=%B", r->refbehind);
284 				if(bany(&r->calbehind))
285 					print(" cb=%B", r->calbehind);
286 				if(bany(&r->regdiff))
287 					print(" rd=%B", r->regdiff);
288 			}
289 			print("\n");
290 		}
291 	}
292 
293 	/*
294 	 * pass 5
295 	 * isolate regions
296 	 * calculate costs (paint1)
297 	 */
298 	r = firstr;
299 	if(r) {
300 		for(z=0; z<BITS; z++)
301 			bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
302 			  ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
303 		if(bany(&bit)) {
304 			nearln = r->prog->lineno;
305 			warn(Z, "used and not set: %B", bit);
306 			if(debug['R'] && !debug['w'])
307 				print("used and not set: %B\n", bit);
308 		}
309 	}
310 
311 	for(r = firstr; r != R; r = r->link)
312 		r->act = zbits;
313 	rgp = region;
314 	nregion = 0;
315 	for(r = firstr; r != R; r = r->link) {
316 		for(z=0; z<BITS; z++)
317 			bit.b[z] = r->set.b[z] &
318 			  ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
319 		if(bany(&bit)) {
320 			nearln = r->prog->lineno;
321 			warn(Z, "set and not used: %B", bit);
322 			if(debug['R'])
323 				print("set and not used: %B\n", bit);
324 			excise(r);
325 		}
326 		for(z=0; z<BITS; z++)
327 			bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
328 		while(bany(&bit)) {
329 			i = bnum(bit);
330 			rgp->enter = r;
331 			rgp->varno = i;
332 			change = 0;
333 			if(debug['R'] && debug['v'])
334 				print("\n");
335 			paint1(r, i);
336 			bit.b[i/32] &= ~(1L<<(i%32));
337 			if(change <= 0) {
338 				if(debug['R'])
339 					print("%L $%d: %B\n",
340 						r->prog->lineno, change, blsh(i));
341 				continue;
342 			}
343 			rgp->cost = change;
344 			nregion++;
345 			if(nregion >= NRGN) {
346 				warn(Z, "too many regions");
347 				goto brk;
348 			}
349 			rgp++;
350 		}
351 	}
352 brk:
353 	qsort(region, nregion, sizeof(region[0]), rcmp);
354 
355 	/*
356 	 * pass 6
357 	 * determine used registers (paint2)
358 	 * replace code (paint3)
359 	 */
360 	rgp = region;
361 	for(i=0; i<nregion; i++) {
362 		bit = blsh(rgp->varno);
363 		vreg = paint2(rgp->enter, rgp->varno);
364 		vreg = allreg(vreg, rgp);
365 		if(debug['R']) {
366 			if(rgp->regno >= NREG)
367 				print("%L $%d F%d: %B\n",
368 					rgp->enter->prog->lineno,
369 					rgp->cost,
370 					rgp->regno-NREG,
371 					bit);
372 			else
373 				print("%L $%d R%d: %B\n",
374 					rgp->enter->prog->lineno,
375 					rgp->cost,
376 					rgp->regno,
377 					bit);
378 		}
379 		if(rgp->regno != 0)
380 			paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
381 		rgp++;
382 	}
383 	/*
384 	 * pass 7
385 	 * peep-hole on basic block
386 	 */
387 	if(!debug['R'] || debug['P'])
388 		peep();
389 
390 	/*
391 	 * pass 8
392 	 * recalculate pc
393 	 */
394 	val = initpc;
395 	for(r = firstr; r != R; r = r1) {
396 		r->pc = val;
397 		p = r->prog;
398 		p1 = P;
399 		r1 = r->link;
400 		if(r1 != R)
401 			p1 = r1->prog;
402 		for(; p != p1; p = p->link) {
403 			switch(p->as) {
404 			default:
405 				val++;
406 				break;
407 
408 			case ANOP:
409 			case ADATA:
410 			case AGLOBL:
411 			case ANAME:
412 				break;
413 			}
414 		}
415 	}
416 	pc = val;
417 
418 	/*
419 	 * fix up branches
420 	 */
421 	if(debug['R'])
422 		if(bany(&addrs))
423 			print("addrs: %B\n", addrs);
424 
425 	r1 = 0; /* set */
426 	for(r = firstr; r != R; r = r->link) {
427 		p = r->prog;
428 		if(p->to.type == D_BRANCH)
429 			p->to.offset = r->s2->pc;
430 		r1 = r;
431 	}
432 
433 	/*
434 	 * last pass
435 	 * eliminate nops
436 	 * free aux structures
437 	 */
438 	for(p = firstr->prog; p != P; p = p->link){
439 		while(p->link && p->link->as == ANOP)
440 			p->link = p->link->link;
441 	}
442 	if(r1 != R) {
443 		r1->link = freer;
444 		freer = firstr;
445 	}
446 }
447 
448 void
449 addsplits(void)
450 {
451 	Reg *r, *r1;
452 	int z, i;
453 	Bits bit;
454 
455 	for(r = firstr; r != R; r = r->link) {
456 		if(r->loop > 1)
457 			continue;
458 		if(r->prog->as == AJAL)
459 			continue;
460 		for(r1 = r->p2; r1 != R; r1 = r1->p2link) {
461 			if(r1->loop <= 1)
462 				continue;
463 			for(z=0; z<BITS; z++)
464 				bit.b[z] = r1->calbehind.b[z] &
465 					(r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
466 					~(r->calahead.b[z] & addrs.b[z]);
467 			while(bany(&bit)) {
468 				i = bnum(bit);
469 				bit.b[i/32] &= ~(1L << (i%32));
470 			}
471 		}
472 	}
473 }
474 
475 /*
476  * add mov b,rn
477  * just after r
478  */
479 void
480 addmove(Reg *r, int bn, int rn, int f)
481 {
482 	Prog *p, *p1;
483 	Adr *a;
484 	Var *v;
485 
486 	p1 = alloc(sizeof(*p1));
487 	*p1 = zprog;
488 	p = r->prog;
489 
490 	p1->link = p->link;
491 	p->link = p1;
492 	p1->lineno = p->lineno;
493 
494 	v = var + bn;
495 
496 	a = &p1->to;
497 	a->sym = v->sym;
498 	a->name = v->name;
499 	a->offset = v->offset;
500 	a->etype = v->etype;
501 	a->type = D_OREG;
502 	if(a->etype == TARRAY || a->sym == S)
503 		a->type = D_CONST;
504 
505 	p1->as = AMOVW;
506 	if(v->etype == TCHAR || v->etype == TUCHAR)
507 		p1->as = AMOVB;
508 	if(v->etype == TSHORT || v->etype == TUSHORT)
509 		p1->as = AMOVH;
510 	if(v->etype == TFLOAT)
511 		p1->as = AMOVF;
512 	if(v->etype == TDOUBLE)
513 		p1->as = AMOVD;
514 
515 	p1->from.type = D_REG;
516 	p1->from.reg = rn;
517 	if(rn >= NREG) {
518 		p1->from.type = D_FREG;
519 		p1->from.reg = rn-NREG;
520 	}
521 	if(!f) {
522 		p1->from = *a;
523 		*a = zprog.from;
524 		a->type = D_REG;
525 		a->reg = rn;
526 		if(rn >= NREG) {
527 			a->type = D_FREG;
528 			a->reg = rn-NREG;
529 		}
530 		if(v->etype == TUCHAR)
531 			p1->as = AMOVBU;
532 		if(v->etype == TUSHORT)
533 			p1->as = AMOVHU;
534 	}
535 	if(debug['R'])
536 		print("%P\t.a%P\n", p, p1);
537 }
538 
539 Bits
540 mkvar(Adr *a, int docon)
541 {
542 	Var *v;
543 	int i, t, n, et, z;
544 	long o;
545 	Bits bit;
546 	Sym *s;
547 
548 	t = a->type;
549 	if(t == D_REG && a->reg != NREG)
550 		regbits |= RtoB(a->reg);
551 	if(t == D_FREG && a->reg != NREG)
552 		regbits |= FtoB(a->reg);
553 	s = a->sym;
554 	o = a->offset;
555 	et = a->etype;
556 	if(s == S) {
557 		if(t != D_CONST || !docon || a->reg != NREG)
558 			goto none;
559 		et = TLONG;
560 	}
561 	if(t == D_CONST) {
562 		if(s == S && sval(o))
563 			goto none;
564 	}
565 
566 	n = a->name;
567 	v = var;
568 	for(i=0; i<nvar; i++) {
569 		if(s == v->sym)
570 		if(n == v->name)
571 		if(o == v->offset)
572 			goto out;
573 		v++;
574 	}
575 	if(s)
576 		if(s->name[0] == '.')
577 			goto none;
578 	if(nvar >= NVAR) {
579 		if(debug['w'] > 1 && s)
580 			warn(Z, "variable not optimized: %s", s->name);
581 		goto none;
582 	}
583 	i = nvar;
584 	nvar++;
585 	v = &var[i];
586 	v->sym = s;
587 	v->offset = o;
588 	v->etype = et;
589 	v->name = n;
590 	if(debug['R'])
591 		print("bit=%2d et=%2d %D\n", i, et, a);
592 out:
593 	bit = blsh(i);
594 	if(n == D_EXTERN || n == D_STATIC)
595 		for(z=0; z<BITS; z++)
596 			externs.b[z] |= bit.b[z];
597 	if(n == D_PARAM)
598 		for(z=0; z<BITS; z++)
599 			params.b[z] |= bit.b[z];
600 	if(v->etype != et || !typechlpfd[et])	/* funny punning */
601 		for(z=0; z<BITS; z++)
602 			addrs.b[z] |= bit.b[z];
603 	if(t == D_CONST) {
604 		if(s == S) {
605 			for(z=0; z<BITS; z++)
606 				consts.b[z] |= bit.b[z];
607 			return bit;
608 		}
609 		if(et != TARRAY)
610 			for(z=0; z<BITS; z++)
611 				addrs.b[z] |= bit.b[z];
612 		for(z=0; z<BITS; z++)
613 			params.b[z] |= bit.b[z];
614 		return bit;
615 	}
616 	if(t == D_OREG)
617 		return bit;
618 
619 none:
620 	return zbits;
621 }
622 
623 void
624 prop(Reg *r, Bits ref, Bits cal)
625 {
626 	Reg *r1, *r2;
627 	int z;
628 
629 	for(r1 = r; r1 != R; r1 = r1->p1) {
630 		for(z=0; z<BITS; z++) {
631 			ref.b[z] |= r1->refahead.b[z];
632 			if(ref.b[z] != r1->refahead.b[z]) {
633 				r1->refahead.b[z] = ref.b[z];
634 				change++;
635 			}
636 			cal.b[z] |= r1->calahead.b[z];
637 			if(cal.b[z] != r1->calahead.b[z]) {
638 				r1->calahead.b[z] = cal.b[z];
639 				change++;
640 			}
641 		}
642 		switch(r1->prog->as) {
643 		case AJAL:
644 			for(z=0; z<BITS; z++) {
645 				cal.b[z] |= ref.b[z] | externs.b[z];
646 				ref.b[z] = 0;
647 			}
648 			break;
649 
650 		case ATEXT:
651 			for(z=0; z<BITS; z++) {
652 				cal.b[z] = 0;
653 				ref.b[z] = 0;
654 			}
655 			break;
656 
657 		case ARET:
658 			for(z=0; z<BITS; z++) {
659 				cal.b[z] = externs.b[z];
660 				ref.b[z] = 0;
661 			}
662 		}
663 		for(z=0; z<BITS; z++) {
664 			ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
665 				r1->use1.b[z] | r1->use2.b[z];
666 			cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
667 			r1->refbehind.b[z] = ref.b[z];
668 			r1->calbehind.b[z] = cal.b[z];
669 		}
670 		if(r1->active)
671 			break;
672 		r1->active = 1;
673 	}
674 	for(; r != r1; r = r->p1)
675 		for(r2 = r->p2; r2 != R; r2 = r2->p2link)
676 			prop(r2, r->refbehind, r->calbehind);
677 }
678 
679 /*
680  * find looping structure
681  *
682  * 1) find reverse postordering
683  * 2) find approximate dominators,
684  *	the actual dominators if the flow graph is reducible
685  *	otherwise, dominators plus some other non-dominators.
686  *	See Matthew S. Hecht and Jeffrey D. Ullman,
687  *	"Analysis of a Simple Algorithm for Global Data Flow Problems",
688  *	Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
689  *	Oct. 1-3, 1973, pp.  207-217.
690  * 3) find all nodes with a predecessor dominated by the current node.
691  *	such a node is a loop head.
692  *	recursively, all preds with a greater rpo number are in the loop
693  */
694 long
695 postorder(Reg *r, Reg **rpo2r, long n)
696 {
697 	Reg *r1;
698 
699 	r->rpo = 1;
700 	r1 = r->s1;
701 	if(r1 && !r1->rpo)
702 		n = postorder(r1, rpo2r, n);
703 	r1 = r->s2;
704 	if(r1 && !r1->rpo)
705 		n = postorder(r1, rpo2r, n);
706 	rpo2r[n] = r;
707 	n++;
708 	return n;
709 }
710 
711 long
712 rpolca(long *idom, long rpo1, long rpo2)
713 {
714 	long t;
715 
716 	if(rpo1 == -1)
717 		return rpo2;
718 	while(rpo1 != rpo2){
719 		if(rpo1 > rpo2){
720 			t = rpo2;
721 			rpo2 = rpo1;
722 			rpo1 = t;
723 		}
724 		while(rpo1 < rpo2){
725 			t = idom[rpo2];
726 			if(t >= rpo2)
727 				sysfatal("bad idom");
728 			rpo2 = t;
729 		}
730 	}
731 	return rpo1;
732 }
733 
734 int
735 doms(long *idom, long r, long s)
736 {
737 	while(s > r)
738 		s = idom[s];
739 	return s == r;
740 }
741 
742 int
743 loophead(long *idom, Reg *r)
744 {
745 	long src;
746 
747 	src = r->rpo;
748 	if(r->p1 != R && doms(idom, src, r->p1->rpo))
749 		return 1;
750 	for(r = r->p2; r != R; r = r->p2link)
751 		if(doms(idom, src, r->rpo))
752 			return 1;
753 	return 0;
754 }
755 
756 void
757 loopmark(Reg **rpo2r, long head, Reg *r)
758 {
759 	if(r->rpo < head || r->active == head)
760 		return;
761 	r->active = head;
762 	r->loop += LOOP;
763 	if(r->p1 != R)
764 		loopmark(rpo2r, head, r->p1);
765 	for(r = r->p2; r != R; r = r->p2link)
766 		loopmark(rpo2r, head, r);
767 }
768 
769 void
770 loopit(Reg *r, long nr)
771 {
772 	Reg *r1;
773 	long i, d, me;
774 
775 	if(nr > maxnr) {
776 		rpo2r = alloc(nr * sizeof(Reg*));
777 		idom = alloc(nr * sizeof(long));
778 		maxnr = nr;
779 	}
780 
781 	d = postorder(r, rpo2r, 0);
782 	if(d > nr)
783 		sysfatal("too many reg nodes");
784 	nr = d;
785 	for(i = 0; i < nr / 2; i++){
786 		r1 = rpo2r[i];
787 		rpo2r[i] = rpo2r[nr - 1 - i];
788 		rpo2r[nr - 1 - i] = r1;
789 	}
790 	for(i = 0; i < nr; i++)
791 		rpo2r[i]->rpo = i;
792 
793 	idom[0] = 0;
794 	for(i = 0; i < nr; i++){
795 		r1 = rpo2r[i];
796 		me = r1->rpo;
797 		d = -1;
798 		if(r1->p1 != R && r1->p1->rpo < me)
799 			d = r1->p1->rpo;
800 		for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
801 			if(r1->rpo < me)
802 				d = rpolca(idom, d, r1->rpo);
803 		idom[i] = d;
804 	}
805 
806 	for(i = 0; i < nr; i++){
807 		r1 = rpo2r[i];
808 		r1->loop++;
809 		if(r1->p2 != R && loophead(idom, r1))
810 			loopmark(rpo2r, i, r1);
811 	}
812 }
813 
814 void
815 synch(Reg *r, Bits dif)
816 {
817 	Reg *r1;
818 	int z;
819 
820 	for(r1 = r; r1 != R; r1 = r1->s1) {
821 		for(z=0; z<BITS; z++) {
822 			dif.b[z] = (dif.b[z] &
823 				~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
824 					r1->set.b[z] | r1->regdiff.b[z];
825 			if(dif.b[z] != r1->regdiff.b[z]) {
826 				r1->regdiff.b[z] = dif.b[z];
827 				change++;
828 			}
829 		}
830 		if(r1->active)
831 			break;
832 		r1->active = 1;
833 		for(z=0; z<BITS; z++)
834 			dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
835 		if(r1->s2 != R)
836 			synch(r1->s2, dif);
837 	}
838 }
839 
840 ulong
841 allreg(ulong b, Rgn *r)
842 {
843 	Var *v;
844 	int i;
845 
846 	v = var + r->varno;
847 	r->regno = 0;
848 	switch(v->etype) {
849 
850 	default:
851 		diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
852 		break;
853 
854 	case TCHAR:
855 	case TUCHAR:
856 	case TSHORT:
857 	case TUSHORT:
858 	case TINT:
859 	case TUINT:
860 	case TLONG:
861 	case TULONG:
862 	case TIND:
863 	case TARRAY:
864 		i = BtoR(~b);
865 		if(i && r->cost >= 0) {
866 			r->regno = i;
867 			return RtoB(i);
868 		}
869 		break;
870 
871 	case TDOUBLE:
872 	case TFLOAT:
873 		i = BtoF(~b);
874 		if(i && r->cost >= 0) {
875 			r->regno = i+NREG;
876 			return FtoB(i);
877 		}
878 		break;
879 	}
880 	return 0;
881 }
882 
883 void
884 paint1(Reg *r, int bn)
885 {
886 	Reg *r1;
887 	Prog *p;
888 	int z;
889 	ulong bb;
890 
891 	z = bn/32;
892 	bb = 1L<<(bn%32);
893 	if(r->act.b[z] & bb)
894 		return;
895 	for(;;) {
896 		if(!(r->refbehind.b[z] & bb))
897 			break;
898 		r1 = r->p1;
899 		if(r1 == R)
900 			break;
901 		if(!(r1->refahead.b[z] & bb))
902 			break;
903 		if(r1->act.b[z] & bb)
904 			break;
905 		r = r1;
906 	}
907 
908 	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
909 		change -= CLOAD * r->loop;
910 		if(debug['R'] && debug['v'])
911 			print("%ld%P\tld %B $%d\n", r->loop,
912 				r->prog, blsh(bn), change);
913 	}
914 	for(;;) {
915 		r->act.b[z] |= bb;
916 		p = r->prog;
917 
918 		if(r->use1.b[z] & bb) {
919 			change += CREF * r->loop;
920 			if(debug['R'] && debug['v'])
921 				print("%ld%P\tu1 %B $%d\n", r->loop,
922 					p, blsh(bn), change);
923 		}
924 
925 		if((r->use2.b[z]|r->set.b[z]) & bb) {
926 			change += CREF * r->loop;
927 			if(debug['R'] && debug['v'])
928 				print("%ld%P\tu2 %B $%d\n", r->loop,
929 					p, blsh(bn), change);
930 		}
931 
932 		if(STORE(r) & r->regdiff.b[z] & bb) {
933 			change -= CLOAD * r->loop;
934 			if(debug['R'] && debug['v'])
935 				print("%ld%P\tst %B $%d\n", r->loop,
936 					p, blsh(bn), change);
937 		}
938 
939 		if(r->refbehind.b[z] & bb)
940 			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
941 				if(r1->refahead.b[z] & bb)
942 					paint1(r1, bn);
943 
944 		if(!(r->refahead.b[z] & bb))
945 			break;
946 		r1 = r->s2;
947 		if(r1 != R)
948 			if(r1->refbehind.b[z] & bb)
949 				paint1(r1, bn);
950 		r = r->s1;
951 		if(r == R)
952 			break;
953 		if(r->act.b[z] & bb)
954 			break;
955 		if(!(r->refbehind.b[z] & bb))
956 			break;
957 	}
958 }
959 
960 ulong
961 paint2(Reg *r, int bn)
962 {
963 	Reg *r1;
964 	int z;
965 	ulong bb, vreg;
966 
967 	z = bn/32;
968 	bb = 1L << (bn%32);
969 	vreg = regbits;
970 	if(!(r->act.b[z] & bb))
971 		return vreg;
972 	for(;;) {
973 		if(!(r->refbehind.b[z] & bb))
974 			break;
975 		r1 = r->p1;
976 		if(r1 == R)
977 			break;
978 		if(!(r1->refahead.b[z] & bb))
979 			break;
980 		if(!(r1->act.b[z] & bb))
981 			break;
982 		r = r1;
983 	}
984 	for(;;) {
985 		r->act.b[z] &= ~bb;
986 
987 		vreg |= r->regu;
988 
989 		if(r->refbehind.b[z] & bb)
990 			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
991 				if(r1->refahead.b[z] & bb)
992 					vreg |= paint2(r1, bn);
993 
994 		if(!(r->refahead.b[z] & bb))
995 			break;
996 		r1 = r->s2;
997 		if(r1 != R)
998 			if(r1->refbehind.b[z] & bb)
999 				vreg |= paint2(r1, bn);
1000 		r = r->s1;
1001 		if(r == R)
1002 			break;
1003 		if(!(r->act.b[z] & bb))
1004 			break;
1005 		if(!(r->refbehind.b[z] & bb))
1006 			break;
1007 	}
1008 	return vreg;
1009 }
1010 
1011 void
1012 paint3(Reg *r, int bn, long rb, int rn)
1013 {
1014 	Reg *r1;
1015 	Prog *p;
1016 	int z;
1017 	ulong bb;
1018 
1019 	z = bn/32;
1020 	bb = 1L << (bn%32);
1021 	if(r->act.b[z] & bb)
1022 		return;
1023 	for(;;) {
1024 		if(!(r->refbehind.b[z] & bb))
1025 			break;
1026 		r1 = r->p1;
1027 		if(r1 == R)
1028 			break;
1029 		if(!(r1->refahead.b[z] & bb))
1030 			break;
1031 		if(r1->act.b[z] & bb)
1032 			break;
1033 		r = r1;
1034 	}
1035 
1036 	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1037 		addmove(r, bn, rn, 0);
1038 	for(;;) {
1039 		r->act.b[z] |= bb;
1040 		p = r->prog;
1041 
1042 		if(r->use1.b[z] & bb) {
1043 			if(debug['R'])
1044 				print("%P", p);
1045 			addreg(&p->from, rn);
1046 			if(debug['R'])
1047 				print("\t.c%P\n", p);
1048 		}
1049 		if((r->use2.b[z]|r->set.b[z]) & bb) {
1050 			if(debug['R'])
1051 				print("%P", p);
1052 			addreg(&p->to, rn);
1053 			if(debug['R'])
1054 				print("\t.c%P\n", p);
1055 		}
1056 
1057 		if(STORE(r) & r->regdiff.b[z] & bb)
1058 			addmove(r, bn, rn, 1);
1059 		r->regu |= rb;
1060 
1061 		if(r->refbehind.b[z] & bb)
1062 			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1063 				if(r1->refahead.b[z] & bb)
1064 					paint3(r1, bn, rb, rn);
1065 
1066 		if(!(r->refahead.b[z] & bb))
1067 			break;
1068 		r1 = r->s2;
1069 		if(r1 != R)
1070 			if(r1->refbehind.b[z] & bb)
1071 				paint3(r1, bn, rb, rn);
1072 		r = r->s1;
1073 		if(r == R)
1074 			break;
1075 		if(r->act.b[z] & bb)
1076 			break;
1077 		if(!(r->refbehind.b[z] & bb))
1078 			break;
1079 	}
1080 }
1081 
1082 void
1083 addreg(Adr *a, int rn)
1084 {
1085 
1086 	a->sym = 0;
1087 	a->name = D_NONE;
1088 	a->type = D_REG;
1089 	a->reg = rn;
1090 	if(rn >= NREG) {
1091 		a->type = D_FREG;
1092 		a->reg = rn-NREG;
1093 	}
1094 }
1095 
1096 /*
1097  *	bit	reg
1098  *	0	R3
1099  *	1	R4
1100  *	...	...
1101  *	19	R22
1102  *	20	R23
1103  */
1104 long
1105 RtoB(int r)
1106 {
1107 
1108 	if(r < 3 || r > 23)
1109 		return 0;
1110 	return 1L << (r-3);
1111 }
1112 
1113 BtoR(long b)
1114 {
1115 
1116 	b &= 0x001fffffL;
1117 	if(b == 0)
1118 		return 0;
1119 	return bitno(b) + 3;
1120 }
1121 
1122 /*
1123  *	bit	reg
1124  *	22	F4
1125  *	23	F6
1126  *	...	...
1127  *	31	F22
1128  */
1129 long
1130 FtoB(int f)
1131 {
1132 
1133 	if(f < 4 || f > 22 || (f&1))
1134 		return 0;
1135 	return 1L << (f/2 + 20);
1136 }
1137 
1138 int
1139 BtoF(long b)
1140 {
1141 
1142 	b &= 0xffc00000L;
1143 	if(b == 0)
1144 		return 0;
1145 	return bitno(b)*2 - 40;
1146 }
1147