xref: /plan9-contrib/sys/src/cmd/9c/reg.c (revision fbadb1c4d4463e58337ffb1ed396c9caee5d1889)
1 #include "gc.h"
2 
3 Reg*
rega(void)4 rega(void)
5 {
6 	Reg *r;
7 
8 	r = freer;
9 	if(r == R) {
10 		r = alloc(sizeof(*r));
11 	} else
12 		freer = r->link;
13 
14 	*r = zreg;
15 	return r;
16 }
17 
18 int
rcmp(void * a1,void * a2)19 rcmp(void *a1, void *a2)
20 {
21 	Rgn *p1, *p2;
22 	int c1, c2;
23 
24 	p1 = a1;
25 	p2 = a2;
26 	c1 = p2->cost;
27 	c2 = p1->cost;
28 	if(c1 -= c2)
29 		return c1;
30 	return p2->varno - p1->varno;
31 }
32 
33 void
regopt(Prog * p)34 regopt(Prog *p)
35 {
36 	Reg *r, *r1, *r2;
37 	Prog *p1;
38 	int i, z;
39 	long initpc, val, npc;
40 	ulong vreg;
41 	Bits bit;
42 	struct
43 	{
44 		long	m;
45 		long	c;
46 		Reg*	p;
47 	} log5[6], *lp;
48 
49 	firstr = R;
50 	lastr = R;
51 	nvar = 0;
52 	regbits = 0;
53 	for(z=0; z<BITS; z++) {
54 		externs.b[z] = 0;
55 		params.b[z] = 0;
56 		consts.b[z] = 0;
57 		addrs.b[z] = 0;
58 	}
59 
60 	/*
61 	 * pass 1
62 	 * build aux data structure
63 	 * allocate pcs
64 	 * find use and set of variables
65 	 */
66 	val = 5L * 5L * 5L * 5L * 5L;
67 	lp = log5;
68 	for(i=0; i<5; i++) {
69 		lp->m = val;
70 		lp->c = 0;
71 		lp->p = R;
72 		val /= 5L;
73 		lp++;
74 	}
75 	val = 0;
76 	for(; p != P; p = p->link) {
77 		switch(p->as) {
78 		case ADATA:
79 		case AGLOBL:
80 		case ANAME:
81 		case ASIGNAME:
82 			continue;
83 		}
84 		r = rega();
85 		if(firstr == R) {
86 			firstr = r;
87 			lastr = r;
88 		} else {
89 			lastr->link = r;
90 			r->p1 = lastr;
91 			lastr->s1 = r;
92 			lastr = r;
93 		}
94 		r->prog = p;
95 		r->pc = val;
96 		val++;
97 
98 		lp = log5;
99 		for(i=0; i<5; i++) {
100 			lp->c--;
101 			if(lp->c <= 0) {
102 				lp->c = lp->m;
103 				if(lp->p != R)
104 					lp->p->log5 = r;
105 				lp->p = r;
106 				(lp+1)->c = 0;
107 				break;
108 			}
109 			lp++;
110 		}
111 
112 		r1 = r->p1;
113 		if(r1 != R)
114 		switch(r1->prog->as) {
115 		case ARETURN:
116 		case ABR:
117 		case ARFI:
118 		case ARFCI:
119 		case ARFID:
120 			r->p1 = R;
121 			r1->s1 = R;
122 		}
123 
124 		/*
125 		 * left side always read
126 		 */
127 		bit = mkvar(&p->from, p->as==AMOVW || p->as == AMOVWZ || p->as == AMOVD);
128 		for(z=0; z<BITS; z++)
129 			r->use1.b[z] |= bit.b[z];
130 
131 		/*
132 		 * right side depends on opcode
133 		 */
134 		bit = mkvar(&p->to, 0);
135 		if(bany(&bit))
136 		switch(p->as) {
137 		default:
138 			diag(Z, "reg: unknown asop: %A", p->as);
139 			break;
140 
141 		/*
142 		 * right side write
143 		 */
144 		case ANOP:
145 		case AMOVB:
146 		case AMOVBU:
147 		case AMOVBZ:
148 		case AMOVBZU:
149 		case AMOVH:
150 		case AMOVHBR:
151 		case AMOVWBR:
152 		case AMOVHU:
153 		case AMOVHZ:
154 		case AMOVHZU:
155 		case AMOVW:
156 		case AMOVWU:
157 		case AMOVWZ:
158 		case AMOVWZU:
159 		case AMOVD:
160 		case AMOVDU:
161 		case AFMOVD:
162 		case AFMOVDCC:
163 		case AFMOVDU:
164 		case AFMOVS:
165 		case AFMOVSU:
166 		case AFRSP:
167 			for(z=0; z<BITS; z++)
168 				r->set.b[z] |= bit.b[z];
169 			break;
170 
171 		/*
172 		 * funny
173 		 */
174 		case ABL:
175 			for(z=0; z<BITS; z++)
176 				addrs.b[z] |= bit.b[z];
177 			break;
178 		}
179 	}
180 	if(firstr == R)
181 		return;
182 	initpc = pc - val;
183 	npc = val;
184 
185 	/*
186 	 * pass 2
187 	 * turn branch references to pointers
188 	 * build back pointers
189 	 */
190 	for(r = firstr; r != R; r = r->link) {
191 		p = r->prog;
192 		if(p->to.type == D_BRANCH) {
193 			val = p->to.offset - initpc;
194 			r1 = firstr;
195 			while(r1 != R) {
196 				r2 = r1->log5;
197 				if(r2 != R && val >= r2->pc) {
198 					r1 = r2;
199 					continue;
200 				}
201 				if(r1->pc == val)
202 					break;
203 				r1 = r1->link;
204 			}
205 			if(r1 == R) {
206 				nearln = p->lineno;
207 				diag(Z, "ref not found\n%P", p);
208 				continue;
209 			}
210 			if(r1 == r) {
211 				nearln = p->lineno;
212 				diag(Z, "ref to self\n%P", p);
213 				continue;
214 			}
215 			r->s2 = r1;
216 			r->p2link = r1->p2;
217 			r1->p2 = r;
218 		}
219 	}
220 	if(debug['R']) {
221 		p = firstr->prog;
222 		print("\n%L %D\n", p->lineno, &p->from);
223 	}
224 
225 	/*
226 	 * pass 2.5
227 	 * find looping structure
228 	 */
229 	for(r = firstr; r != R; r = r->link)
230 		r->active = 0;
231 	change = 0;
232 	loopit(firstr, npc);
233 	if(debug['R'] && debug['v']) {
234 		print("\nlooping structure:\n");
235 		for(r = firstr; r != R; r = r->link) {
236 			print("%ld:%P", r->loop, r->prog);
237 			for(z=0; z<BITS; z++)
238 				bit.b[z] = r->use1.b[z] |
239 					r->use2.b[z] | r->set.b[z];
240 			if(bany(&bit)) {
241 				print("\t");
242 				if(bany(&r->use1))
243 					print(" u1=%B", r->use1);
244 				if(bany(&r->use2))
245 					print(" u2=%B", r->use2);
246 				if(bany(&r->set))
247 					print(" st=%B", r->set);
248 			}
249 			print("\n");
250 		}
251 	}
252 
253 	/*
254 	 * pass 3
255 	 * iterate propagating usage
256 	 * 	back until flow graph is complete
257 	 */
258 loop1:
259 	change = 0;
260 	for(r = firstr; r != R; r = r->link)
261 		r->active = 0;
262 	for(r = firstr; r != R; r = r->link)
263 		if(r->prog->as == ARETURN)
264 			prop(r, zbits, zbits);
265 loop11:
266 	/* pick up unreachable code */
267 	i = 0;
268 	for(r = firstr; r != R; r = r1) {
269 		r1 = r->link;
270 		if(r1 && r1->active && !r->active) {
271 			prop(r, zbits, zbits);
272 			i = 1;
273 		}
274 	}
275 	if(i)
276 		goto loop11;
277 	if(change)
278 		goto loop1;
279 
280 
281 	/*
282 	 * pass 4
283 	 * iterate propagating register/variable synchrony
284 	 * 	forward until graph is complete
285 	 */
286 loop2:
287 	change = 0;
288 	for(r = firstr; r != R; r = r->link)
289 		r->active = 0;
290 	synch(firstr, zbits);
291 	if(change)
292 		goto loop2;
293 
294 
295 	/*
296 	 * pass 5
297 	 * isolate regions
298 	 * calculate costs (paint1)
299 	 */
300 	r = firstr;
301 	if(r) {
302 		for(z=0; z<BITS; z++)
303 			bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
304 			  ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
305 		if(bany(&bit)) {
306 			nearln = r->prog->lineno;
307 			warn(Z, "used and not set: %B", bit);
308 			if(debug['R'] && !debug['w'])
309 				print("used and not set: %B\n", bit);
310 		}
311 	}
312 	if(debug['R'] && debug['v'])
313 		print("\nprop structure:\n");
314 	for(r = firstr; r != R; r = r->link)
315 		r->act = zbits;
316 	rgp = region;
317 	nregion = 0;
318 	for(r = firstr; r != R; r = r->link) {
319 		if(debug['R'] && debug['v'])
320 			print("%P\n	set = %B; rah = %B; cal = %B\n",
321 				r->prog, r->set, r->refahead, r->calahead);
322 		for(z=0; z<BITS; z++)
323 			bit.b[z] = r->set.b[z] &
324 			  ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
325 		if(bany(&bit)) {
326 			nearln = r->prog->lineno;
327 			warn(Z, "set and not used: %B", bit);
328 			if(debug['R'])
329 				print("set an not used: %B\n", bit);
330 			excise(r);
331 		}
332 		for(z=0; z<BITS; z++)
333 			bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
334 		while(bany(&bit)) {
335 			i = bnum(bit);
336 			rgp->enter = r;
337 			rgp->varno = i;
338 			change = 0;
339 			if(debug['R'] && debug['v'])
340 				print("\n");
341 			paint1(r, i);
342 			bit.b[i/32] &= ~(1L<<(i%32));
343 			if(change <= 0) {
344 				if(debug['R'])
345 					print("%L$%d: %B\n",
346 						r->prog->lineno, change, blsh(i));
347 				continue;
348 			}
349 			rgp->cost = change;
350 			nregion++;
351 			if(nregion >= NRGN) {
352 				warn(Z, "too many regions");
353 				goto brk;
354 			}
355 			rgp++;
356 		}
357 	}
358 brk:
359 	qsort(region, nregion, sizeof(region[0]), rcmp);
360 
361 	/*
362 	 * pass 6
363 	 * determine used registers (paint2)
364 	 * replace code (paint3)
365 	 */
366 	rgp = region;
367 	for(i=0; i<nregion; i++) {
368 		bit = blsh(rgp->varno);
369 		vreg = paint2(rgp->enter, rgp->varno);
370 		vreg = allreg(vreg, rgp);
371 		if(debug['R']) {
372 			if(rgp->regno >= NREG)
373 				print("%L$%d F%d: %B\n",
374 					rgp->enter->prog->lineno,
375 					rgp->cost,
376 					rgp->regno-NREG,
377 					bit);
378 			else
379 				print("%L$%d R%d: %B\n",
380 					rgp->enter->prog->lineno,
381 					rgp->cost,
382 					rgp->regno,
383 					bit);
384 		}
385 		if(rgp->regno != 0)
386 			paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
387 		rgp++;
388 	}
389 	/*
390 	 * pass 7
391 	 * peep-hole on basic block
392 	 */
393 	if(!debug['R'] || debug['P'])
394 		peep();
395 
396 	/*
397 	 * pass 8
398 	 * recalculate pc
399 	 */
400 	val = initpc;
401 	for(r = firstr; r != R; r = r1) {
402 		r->pc = val;
403 		p = r->prog;
404 		p1 = P;
405 		r1 = r->link;
406 		if(r1 != R)
407 			p1 = r1->prog;
408 		for(; p != p1; p = p->link) {
409 			switch(p->as) {
410 			default:
411 				val++;
412 				break;
413 
414 			case ANOP:
415 			case ADATA:
416 			case AGLOBL:
417 			case ANAME:
418 			case ASIGNAME:
419 				break;
420 			}
421 		}
422 	}
423 	pc = val;
424 
425 	/*
426 	 * fix up branches
427 	 */
428 	if(debug['R'])
429 		if(bany(&addrs))
430 			print("addrs: %B\n", addrs);
431 
432 	r1 = 0; /* set */
433 	for(r = firstr; r != R; r = r->link) {
434 		p = r->prog;
435 		if(p->to.type == D_BRANCH)
436 			p->to.offset = r->s2->pc;
437 		r1 = r;
438 	}
439 
440 	/*
441 	 * last pass
442 	 * eliminate nops
443 	 * free aux structures
444 	 */
445 	for(p = firstr->prog; p != P; p = p->link){
446 		while(p->link && p->link->as == ANOP)
447 			p->link = p->link->link;
448 	}
449 	if(r1 != R) {
450 		r1->link = freer;
451 		freer = firstr;
452 	}
453 }
454 
455 /*
456  * add mov b,rn
457  * just after r
458  */
459 void
addmove(Reg * r,int bn,int rn,int f)460 addmove(Reg *r, int bn, int rn, int f)
461 {
462 	Prog *p, *p1;
463 	Adr *a;
464 	Var *v;
465 
466 	p1 = alloc(sizeof(*p1));
467 	*p1 = zprog;
468 	p = r->prog;
469 
470 	p1->link = p->link;
471 	p->link = p1;
472 	p1->lineno = p->lineno;
473 
474 	v = var + bn;
475 
476 	a = &p1->to;
477 	a->sym = v->sym;
478 	a->name = v->name;
479 	a->offset = v->offset;
480 	a->etype = v->etype;
481 	a->type = D_OREG;
482 	if(a->etype == TARRAY || a->sym == S)
483 		a->type = D_CONST;
484 
485 	p1->as = AMOVW;
486 	if(v->etype == TCHAR || v->etype == TUCHAR)
487 		p1->as = AMOVB;
488 	if(v->etype == TSHORT || v->etype == TUSHORT)
489 		p1->as = AMOVH;
490 	if(v->etype == TVLONG || v->etype == TUVLONG || v->etype == TIND)
491 		p1->as = AMOVD;
492 	if(v->etype == TFLOAT)
493 		p1->as = AFMOVS;
494 	if(v->etype == TDOUBLE)
495 		p1->as = AFMOVD;
496 
497 	p1->from.type = D_REG;
498 	p1->from.reg = rn;
499 	if(rn >= NREG) {
500 		p1->from.type = D_FREG;
501 		p1->from.reg = rn-NREG;
502 	}
503 	if(!f) {
504 		p1->from = *a;
505 		*a = zprog.from;
506 		a->type = D_REG;
507 		a->reg = rn;
508 		if(rn >= NREG) {
509 			a->type = D_FREG;
510 			a->reg = rn-NREG;
511 		}
512 		if(v->etype == TUCHAR)
513 			p1->as = AMOVBZ;
514 		if(v->etype == TUSHORT)
515 			p1->as = AMOVHZ;
516 		if(v->etype == TUINT || v->etype == TULONG)
517 			p1->as = AMOVWZ;
518 	}
519 	if(debug['R'])
520 		print("%P\t.a%P\n", p, p1);
521 }
522 
523 Bits
mkvar(Adr * a,int docon)524 mkvar(Adr *a, int docon)
525 {
526 	Var *v;
527 	int i, t, n, et, z;
528 	long o;
529 	Bits bit;
530 	Sym *s;
531 
532 	t = a->type;
533 	if(t == D_REG && a->reg != NREG)
534 		regbits |= RtoB(a->reg);
535 	if(t == D_FREG && a->reg != NREG)
536 		regbits |= FtoB(a->reg);
537 	s = a->sym;
538 	o = a->offset;
539 	et = a->etype;
540 	if(s == S) {
541 		if(t != D_CONST || !docon || a->reg != NREG)
542 			goto none;
543 		et = TLONG;
544 	}
545 	if(t == D_CONST) {
546 		if(s == S && sval(o))
547 			goto none;
548 	}
549 	n = a->name;
550 	v = var;
551 	for(i=0; i<nvar; i++) {
552 		if(s == v->sym)
553 		if(n == v->name)
554 		if(o == v->offset)
555 			goto out;
556 		v++;
557 	}
558 	if(s)
559 		if(s->name[0] == '.')
560 			goto none;
561 	if(nvar >= NVAR) {
562 		if(debug['w'] > 1 && s)
563 			warn(Z, "variable not optimized: %s", s->name);
564 		goto none;
565 	}
566 	i = nvar;
567 	nvar++;
568 	v = &var[i];
569 	v->sym = s;
570 	v->offset = o;
571 	v->etype = et;
572 	v->name = n;
573 	if(debug['R'])
574 		print("bit=%2d et=%2d %D\n", i, et, a);
575 out:
576 	bit = blsh(i);
577 	if(n == D_EXTERN || n == D_STATIC)
578 		for(z=0; z<BITS; z++)
579 			externs.b[z] |= bit.b[z];
580 	if(n == D_PARAM)
581 		for(z=0; z<BITS; z++)
582 			params.b[z] |= bit.b[z];
583 	if(v->etype != et || !(typechlpfd[et] || typev[et]))	/* funny punning */
584 		for(z=0; z<BITS; z++)
585 			addrs.b[z] |= bit.b[z];
586 	if(t == D_CONST) {
587 		if(s == S) {
588 			for(z=0; z<BITS; z++)
589 				consts.b[z] |= bit.b[z];
590 			return bit;
591 		}
592 		if(et != TARRAY)
593 			for(z=0; z<BITS; z++)
594 				addrs.b[z] |= bit.b[z];
595 		for(z=0; z<BITS; z++)
596 			params.b[z] |= bit.b[z];
597 		return bit;
598 	}
599 	if(t == D_OREG)
600 		return bit;
601 
602 none:
603 	return zbits;
604 }
605 
606 void
prop(Reg * r,Bits ref,Bits cal)607 prop(Reg *r, Bits ref, Bits cal)
608 {
609 	Reg *r1, *r2;
610 	int z;
611 
612 	for(r1 = r; r1 != R; r1 = r1->p1) {
613 		for(z=0; z<BITS; z++) {
614 			ref.b[z] |= r1->refahead.b[z];
615 			if(ref.b[z] != r1->refahead.b[z]) {
616 				r1->refahead.b[z] = ref.b[z];
617 				change++;
618 			}
619 			cal.b[z] |= r1->calahead.b[z];
620 			if(cal.b[z] != r1->calahead.b[z]) {
621 				r1->calahead.b[z] = cal.b[z];
622 				change++;
623 			}
624 		}
625 		switch(r1->prog->as) {
626 		case ABL:
627 			for(z=0; z<BITS; z++) {
628 				cal.b[z] |= ref.b[z] | externs.b[z];
629 				ref.b[z] = 0;
630 			}
631 			break;
632 
633 		case ATEXT:
634 			for(z=0; z<BITS; z++) {
635 				cal.b[z] = 0;
636 				ref.b[z] = 0;
637 			}
638 			break;
639 
640 		case ARETURN:
641 			for(z=0; z<BITS; z++) {
642 				cal.b[z] = externs.b[z];
643 				ref.b[z] = 0;
644 			}
645 		}
646 		for(z=0; z<BITS; z++) {
647 			ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
648 				r1->use1.b[z] | r1->use2.b[z];
649 			cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
650 			r1->refbehind.b[z] = ref.b[z];
651 			r1->calbehind.b[z] = cal.b[z];
652 		}
653 		if(r1->active)
654 			break;
655 		r1->active = 1;
656 	}
657 	for(; r != r1; r = r->p1)
658 		for(r2 = r->p2; r2 != R; r2 = r2->p2link)
659 			prop(r2, r->refbehind, r->calbehind);
660 }
661 
662 /*
663  * find looping structure
664  *
665  * 1) find reverse postordering
666  * 2) find approximate dominators,
667  *	the actual dominators if the flow graph is reducible
668  *	otherwise, dominators plus some other non-dominators.
669  *	See Matthew S. Hecht and Jeffrey D. Ullman,
670  *	"Analysis of a Simple Algorithm for Global Data Flow Problems",
671  *	Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
672  *	Oct. 1-3, 1973, pp.  207-217.
673  * 3) find all nodes with a predecessor dominated by the current node.
674  *	such a node is a loop head.
675  *	recursively, all preds with a greater rpo number are in the loop
676  */
677 long
postorder(Reg * r,Reg ** rpo2r,long n)678 postorder(Reg *r, Reg **rpo2r, long n)
679 {
680 	Reg *r1;
681 
682 	r->rpo = 1;
683 	r1 = r->s1;
684 	if(r1 && !r1->rpo)
685 		n = postorder(r1, rpo2r, n);
686 	r1 = r->s2;
687 	if(r1 && !r1->rpo)
688 		n = postorder(r1, rpo2r, n);
689 	rpo2r[n] = r;
690 	n++;
691 	return n;
692 }
693 
694 long
rpolca(long * idom,long rpo1,long rpo2)695 rpolca(long *idom, long rpo1, long rpo2)
696 {
697 	long t;
698 
699 	if(rpo1 == -1)
700 		return rpo2;
701 	while(rpo1 != rpo2){
702 		if(rpo1 > rpo2){
703 			t = rpo2;
704 			rpo2 = rpo1;
705 			rpo1 = t;
706 		}
707 		while(rpo1 < rpo2){
708 			t = idom[rpo2];
709 			if(t >= rpo2)
710 				fatal(Z, "bad idom");
711 			rpo2 = t;
712 		}
713 	}
714 	return rpo1;
715 }
716 
717 int
doms(long * idom,long r,long s)718 doms(long *idom, long r, long s)
719 {
720 	while(s > r)
721 		s = idom[s];
722 	return s == r;
723 }
724 
725 int
loophead(long * idom,Reg * r)726 loophead(long *idom, Reg *r)
727 {
728 	long src;
729 
730 	src = r->rpo;
731 	if(r->p1 != R && doms(idom, src, r->p1->rpo))
732 		return 1;
733 	for(r = r->p2; r != R; r = r->p2link)
734 		if(doms(idom, src, r->rpo))
735 			return 1;
736 	return 0;
737 }
738 
739 void
loopmark(Reg ** rpo2r,long head,Reg * r)740 loopmark(Reg **rpo2r, long head, Reg *r)
741 {
742 	if(r->rpo < head || r->active == head)
743 		return;
744 	r->active = head;
745 	r->loop += LOOP;
746 	if(r->p1 != R)
747 		loopmark(rpo2r, head, r->p1);
748 	for(r = r->p2; r != R; r = r->p2link)
749 		loopmark(rpo2r, head, r);
750 }
751 
752 void
loopit(Reg * r,long nr)753 loopit(Reg *r, long nr)
754 {
755 	Reg *r1;
756 	long i, d, me;
757 
758 	if(nr > maxnr) {
759 		rpo2r = alloc(nr * sizeof(Reg*));
760 		idom = alloc(nr * sizeof(long));
761 		maxnr = nr;
762 	}
763 
764 	d = postorder(r, rpo2r, 0);
765 	if(d > nr)
766 		fatal(Z, "too many reg nodes");
767 	nr = d;
768 	for(i = 0; i < nr / 2; i++){
769 		r1 = rpo2r[i];
770 		rpo2r[i] = rpo2r[nr - 1 - i];
771 		rpo2r[nr - 1 - i] = r1;
772 	}
773 	for(i = 0; i < nr; i++)
774 		rpo2r[i]->rpo = i;
775 
776 	idom[0] = 0;
777 	for(i = 0; i < nr; i++){
778 		r1 = rpo2r[i];
779 		me = r1->rpo;
780 		d = -1;
781 		if(r1->p1 != R && r1->p1->rpo < me)
782 			d = r1->p1->rpo;
783 		for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
784 			if(r1->rpo < me)
785 				d = rpolca(idom, d, r1->rpo);
786 		idom[i] = d;
787 	}
788 
789 	for(i = 0; i < nr; i++){
790 		r1 = rpo2r[i];
791 		r1->loop++;
792 		if(r1->p2 != R && loophead(idom, r1))
793 			loopmark(rpo2r, i, r1);
794 	}
795 }
796 
797 void
synch(Reg * r,Bits dif)798 synch(Reg *r, Bits dif)
799 {
800 	Reg *r1;
801 	int z;
802 
803 	for(r1 = r; r1 != R; r1 = r1->s1) {
804 		for(z=0; z<BITS; z++) {
805 			dif.b[z] = (dif.b[z] &
806 				~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
807 					r1->set.b[z] | r1->regdiff.b[z];
808 			if(dif.b[z] != r1->regdiff.b[z]) {
809 				r1->regdiff.b[z] = dif.b[z];
810 				change++;
811 			}
812 		}
813 		if(r1->active)
814 			break;
815 		r1->active = 1;
816 		for(z=0; z<BITS; z++)
817 			dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
818 		if(r1->s2 != R)
819 			synch(r1->s2, dif);
820 	}
821 }
822 
823 ulong
allreg(ulong b,Rgn * r)824 allreg(ulong b, Rgn *r)
825 {
826 	Var *v;
827 	int i;
828 
829 	v = var + r->varno;
830 	r->regno = 0;
831 	switch(v->etype) {
832 
833 	default:
834 		diag(Z, "unknown etype %d/%d", bitno(b), v->etype);
835 		break;
836 
837 	case TCHAR:
838 	case TUCHAR:
839 	case TSHORT:
840 	case TUSHORT:
841 	case TINT:
842 	case TUINT:
843 	case TLONG:
844 	case TULONG:
845 	case TIND:
846 	case TVLONG:
847 	case TUVLONG:
848 	case TARRAY:
849 		i = BtoR(~b);
850 		if(i && r->cost > 0) {
851 			r->regno = i;
852 			return RtoB(i);
853 		}
854 		break;
855 
856 	case TDOUBLE:
857 	case TFLOAT:
858 		i = BtoF(~b);
859 		if(i && r->cost > 0) {
860 			r->regno = i+NREG;
861 			return FtoB(i);
862 		}
863 		break;
864 	}
865 	return 0;
866 }
867 
868 void
paint1(Reg * r,int bn)869 paint1(Reg *r, int bn)
870 {
871 	Reg *r1;
872 	Prog *p;
873 	int z;
874 	ulong bb;
875 
876 	z = bn/32;
877 	bb = 1L<<(bn%32);
878 	if(r->act.b[z] & bb)
879 		return;
880 	for(;;) {
881 		if(!(r->refbehind.b[z] & bb))
882 			break;
883 		r1 = r->p1;
884 		if(r1 == R)
885 			break;
886 		if(!(r1->refahead.b[z] & bb))
887 			break;
888 		if(r1->act.b[z] & bb)
889 			break;
890 		r = r1;
891 	}
892 
893 	if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
894 		change -= CLOAD * r->loop;
895 		if(debug['R'] && debug['v'])
896 			print("%ld%P\tld %B $%d\n", r->loop,
897 				r->prog, blsh(bn), change);
898 	}
899 	for(;;) {
900 		r->act.b[z] |= bb;
901 		p = r->prog;
902 
903 		if(r->use1.b[z] & bb) {
904 			change += CREF * r->loop;
905 			if(p->to.type == D_FREG && (p->as == AMOVW || p->as == AMOVD))
906 				change = -CINF;		/* cant go Rreg to Freg */
907 			if(debug['R'] && debug['v'])
908 				print("%ld%P\tu1 %B $%d\n", r->loop,
909 					p, blsh(bn), change);
910 		}
911 
912 		if((r->use2.b[z]|r->set.b[z]) & bb) {
913 			change += CREF * r->loop;
914 			if(p->from.type == D_FREG && (p->as == AMOVW || p->as == AMOVD))
915 				change = -CINF;		/* cant go Rreg to Freg */
916 			if(debug['R'] && debug['v'])
917 				print("%ld%P\tu2 %B $%d\n", r->loop,
918 					p, blsh(bn), change);
919 		}
920 
921 		if(STORE(r) & r->regdiff.b[z] & bb) {
922 			change -= CLOAD * r->loop;
923 			if(debug['R'] && debug['v'])
924 				print("%ld%P\tst %B $%d\n", r->loop,
925 					p, blsh(bn), change);
926 		}
927 
928 		if(r->refbehind.b[z] & bb)
929 			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
930 				if(r1->refahead.b[z] & bb)
931 					paint1(r1, bn);
932 
933 		if(!(r->refahead.b[z] & bb))
934 			break;
935 		r1 = r->s2;
936 		if(r1 != R)
937 			if(r1->refbehind.b[z] & bb)
938 				paint1(r1, bn);
939 		r = r->s1;
940 		if(r == R)
941 			break;
942 		if(r->act.b[z] & bb)
943 			break;
944 		if(!(r->refbehind.b[z] & bb))
945 			break;
946 	}
947 }
948 
949 ulong
paint2(Reg * r,int bn)950 paint2(Reg *r, int bn)
951 {
952 	Reg *r1;
953 	int z;
954 	ulong bb, vreg;
955 
956 	z = bn/32;
957 	bb = 1L << (bn%32);
958 	vreg = regbits;
959 	if(!(r->act.b[z] & bb))
960 		return vreg;
961 	for(;;) {
962 		if(!(r->refbehind.b[z] & bb))
963 			break;
964 		r1 = r->p1;
965 		if(r1 == R)
966 			break;
967 		if(!(r1->refahead.b[z] & bb))
968 			break;
969 		if(!(r1->act.b[z] & bb))
970 			break;
971 		r = r1;
972 	}
973 	for(;;) {
974 		r->act.b[z] &= ~bb;
975 
976 		vreg |= r->regu;
977 
978 		if(r->refbehind.b[z] & bb)
979 			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
980 				if(r1->refahead.b[z] & bb)
981 					vreg |= paint2(r1, bn);
982 
983 		if(!(r->refahead.b[z] & bb))
984 			break;
985 		r1 = r->s2;
986 		if(r1 != R)
987 			if(r1->refbehind.b[z] & bb)
988 				vreg |= paint2(r1, bn);
989 		r = r->s1;
990 		if(r == R)
991 			break;
992 		if(!(r->act.b[z] & bb))
993 			break;
994 		if(!(r->refbehind.b[z] & bb))
995 			break;
996 	}
997 	return vreg;
998 }
999 
1000 void
paint3(Reg * r,int bn,long rb,int rn)1001 paint3(Reg *r, int bn, long rb, int rn)
1002 {
1003 	Reg *r1;
1004 	Prog *p;
1005 	int z;
1006 	ulong bb;
1007 
1008 	z = bn/32;
1009 	bb = 1L << (bn%32);
1010 	if(r->act.b[z] & bb)
1011 		return;
1012 	for(;;) {
1013 		if(!(r->refbehind.b[z] & bb))
1014 			break;
1015 		r1 = r->p1;
1016 		if(r1 == R)
1017 			break;
1018 		if(!(r1->refahead.b[z] & bb))
1019 			break;
1020 		if(r1->act.b[z] & bb)
1021 			break;
1022 		r = r1;
1023 	}
1024 
1025 	if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1026 		addmove(r, bn, rn, 0);
1027 	for(;;) {
1028 		r->act.b[z] |= bb;
1029 		p = r->prog;
1030 
1031 		if(r->use1.b[z] & bb) {
1032 			if(debug['R'])
1033 				print("%P", p);
1034 			addreg(&p->from, rn);
1035 			if(debug['R'])
1036 				print("\t.c%P\n", p);
1037 		}
1038 		if((r->use2.b[z]|r->set.b[z]) & bb) {
1039 			if(debug['R'])
1040 				print("%P", p);
1041 			addreg(&p->to, rn);
1042 			if(debug['R'])
1043 				print("\t.c%P\n", p);
1044 		}
1045 
1046 		if(STORE(r) & r->regdiff.b[z] & bb)
1047 			addmove(r, bn, rn, 1);
1048 		r->regu |= rb;
1049 
1050 		if(r->refbehind.b[z] & bb)
1051 			for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1052 				if(r1->refahead.b[z] & bb)
1053 					paint3(r1, bn, rb, rn);
1054 
1055 		if(!(r->refahead.b[z] & bb))
1056 			break;
1057 		r1 = r->s2;
1058 		if(r1 != R)
1059 			if(r1->refbehind.b[z] & bb)
1060 				paint3(r1, bn, rb, rn);
1061 		r = r->s1;
1062 		if(r == R)
1063 			break;
1064 		if(r->act.b[z] & bb)
1065 			break;
1066 		if(!(r->refbehind.b[z] & bb))
1067 			break;
1068 	}
1069 }
1070 
1071 void
addreg(Adr * a,int rn)1072 addreg(Adr *a, int rn)
1073 {
1074 
1075 	a->sym = 0;
1076 	a->name = D_NONE;
1077 	a->type = D_REG;
1078 	a->reg = rn;
1079 	if(rn >= NREG) {
1080 		a->type = D_FREG;
1081 		a->reg = rn-NREG;
1082 	}
1083 }
1084 
1085 /*
1086  * track register variables including external registers:
1087  *	bit	reg
1088  *	0	R7
1089  *	1	R8
1090  *	...	...
1091  *	21	R28
1092  */
1093 long
RtoB(int r)1094 RtoB(int r)
1095 {
1096 
1097 	if(r >= REGMIN && r <= REGMAX)
1098 		return 1L << (r-REGMIN);
1099 	return 0;
1100 }
1101 
1102 int
BtoR(long b)1103 BtoR(long b)
1104 {
1105 	b &= 0x001fffffL;
1106 	if(b == 0)
1107 		return 0;
1108 	return bitno(b) + REGMIN;
1109 }
1110 
1111 /*
1112  *	bit	reg
1113  *	22	F17
1114  *	23	F18
1115  *	...	...
1116  *	31	F26
1117  */
1118 long
FtoB(int f)1119 FtoB(int f)
1120 {
1121 	if(f < FREGMIN || f > FREGEXT)
1122 		return 0;
1123 	return 1L << (f - FREGMIN + 22);
1124 }
1125 
1126 int
BtoF(long b)1127 BtoF(long b)
1128 {
1129 
1130 	b &= 0xffc00000L;
1131 	if(b == 0)
1132 		return 0;
1133 	return bitno(b) - 22 + FREGMIN;
1134 }
1135