xref: /plan9-contrib/sys/src/cmd/4l/sched.c (revision f8bc6aaf8056e137bcdfb6117a990ac3eff62cc9)
1 #include	"l.h"
2 
3 enum
4 {
5 	E_HILO	= 1<<0,
6 	E_FCR	= 1<<1,
7 	E_MCR	= 1<<2,
8 	E_MEM	= 1<<3,
9 	E_MEMSP	= 1<<4,	/* uses offset and size */
10 	E_MEMSB	= 1<<5,	/* uses offset and size */
11 	ANYMEM	= E_MEM|E_MEMSP|E_MEMSB,
12 	DELAY	= BRANCH|LOAD|FCMP,
13 };
14 
15 typedef	struct	Sch	Sch;
16 typedef	struct	Dep	Dep;
17 
18 struct	Dep
19 {
20 	ulong	ireg;
21 	ulong	freg;
22 	ulong	cc;
23 };
24 struct	Sch
25 {
26 	Prog	p;
27 	Dep	set;
28 	Dep	used;
29 	long	soffset;
30 	char	size;
31 	char	nop;
32 	char	comp;
33 };
34 
35 void	regsused(Sch*, Prog*);
36 int	depend(Sch*, Sch*);
37 int	conflict(Sch*, Sch*);
38 int	offoverlap(Sch*, Sch*);
39 void	dumpbits(Sch*, Dep*);
40 
41 void
sched(Prog * p0,Prog * pe)42 sched(Prog *p0, Prog *pe)
43 {
44 	Prog *p, *q;
45 	Sch sch[NSCHED], *s, *t, *u, *se, stmp;
46 
47 	/*
48 	 * build side structure
49 	 */
50 	s = sch;
51 	for(p=p0;; p=p->link) {
52 		memset(s, 0, sizeof(*s));
53 		s->p = *p;
54 		regsused(s, p);
55 		if(debug['X']) {
56 			Bprint(&bso, "%P\t\tset", &s->p);
57 			dumpbits(s, &s->set);
58 			Bprint(&bso, "; used");
59 			dumpbits(s, &s->used);
60 			if(s->comp)
61 				Bprint(&bso, "; compound");
62 			if(s->p.mark & LOAD)
63 				Bprint(&bso, "; load");
64 			if(s->p.mark & BRANCH)
65 				Bprint(&bso, "; branch");
66 			if(s->p.mark & FCMP)
67 				Bprint(&bso, "; fcmp");
68 			Bprint(&bso, "\n");
69 		}
70 		if(p == pe)
71 			break;
72 		s++;
73 	}
74 	se = s;
75 
76 	/*
77 	 * prepass to move things around
78 	 * does nothing, but tries to make
79 	 * the actual scheduler work better
80 	 */
81 	for(s=sch; s<=se; s++) {
82 		if(!(s->p.mark & LOAD))
83 			continue;
84 		/* always good to put nonconflict loads together */
85 		for(t=s+1; t<=se; t++) {
86 			if(!(t->p.mark & LOAD))
87 				continue;
88 			if(t->p.mark & BRANCH)
89 				break;
90 			if(conflict(s, t))
91 				break;
92 			for(u=t-1; u>s; u--)
93 				if(depend(u, t))
94 					goto no11;
95 			u = s+1;
96 			stmp = *t;
97 			memmove(s+2, u, (uchar*)t - (uchar*)u);
98 			*u = stmp;
99 			break;
100 		}
101 	no11:
102 
103 		/* put schedule fodder above load */
104 		for(t=s+1; t<=se; t++) {
105 			if(t->p.mark & BRANCH)
106 				break;
107 			if(s > sch && conflict(s-1, t))
108 				continue;
109 			for(u=t-1; u>=s; u--)
110 				if(depend(t, u))
111 					goto no1;
112 			stmp = *t;
113 			memmove(s+1, s, (uchar*)t - (uchar*)s);
114 			*s = stmp;
115 			if(!(s->p.mark & LOAD))
116 				break;
117 		no1:;
118 		}
119 	}
120 
121 	for(s=se; s>=sch; s--) {
122 		if(!(s->p.mark & DELAY))
123 			continue;
124 		if(s < se)
125 			if(!conflict(s, s+1))
126 				goto out3;
127 		/*
128 		 * s is load, s+1 is immediate use of result or end of block
129 		 * t is the trial instruction to insert between s and s+1
130 		 */
131 		if(!debug['Y'])
132 		for(t=s-1; t>=sch; t--) {
133 			if(t->comp)
134 				if(s->p.mark & BRANCH)
135 					goto no2;
136 			if(t->p.mark & DELAY)
137 				if(s >= se || conflict(t, s+1))
138 					goto no2;
139 			for(u=t+1; u<=s; u++)
140 				if(depend(u, t))
141 					goto no2;
142 			goto out2;
143 		no2:;
144 		}
145 		if(debug['X'])
146 			Bprint(&bso, "?l%P\n", &s->p);
147 		s->nop = 1;
148 		if(debug['v']) {
149 			if(s->p.mark & LOAD) {
150 				nop.load.count++;
151 				nop.load.outof++;
152 			}
153 			if(s->p.mark & BRANCH) {
154 				nop.branch.count++;
155 				nop.branch.outof++;
156 			}
157 			if(s->p.mark & FCMP) {
158 				nop.fcmp.count++;
159 				nop.fcmp.outof++;
160 			}
161 		}
162 		continue;
163 
164 	out2:
165 		if(debug['X']) {
166 			Bprint(&bso, "!l%P\n", &t->p);
167 			Bprint(&bso, "%P\n", &s->p);
168 		}
169 		stmp = *t;
170 		memmove(t, t+1, (uchar*)s - (uchar*)t);
171 		*s = stmp;
172 		s--;
173 
174 	out3:
175 		if(debug['v']) {
176 			if(s->p.mark & LOAD)
177 				nop.load.outof++;
178 			if(s->p.mark & BRANCH)
179 				nop.branch.outof++;
180 			if(s->p.mark & FCMP)
181 				nop.fcmp.outof++;
182 		}
183 	}
184 
185 	/* Avoid HI/LO use->set */
186 	t = sch+1;
187 	for(s=sch; s<se-1; s++, t++) {
188 		if((s->used.cc & E_HILO) == 0)
189 			continue;
190 		if(t->set.cc & E_HILO)
191 			s->nop = 2;
192 	}
193 
194 	/*
195 	 * put it all back
196 	 */
197 	for(s=sch, p=p0; s<=se; s++, p=q) {
198 		q = p->link;
199 		if(q != s->p.link) {
200 			*p = s->p;
201 			p->link = q;
202 		}
203 		while(s->nop--)
204 			addnop(p);
205 	}
206 	if(debug['X']) {
207 		Bprint(&bso, "\n");
208 		Bflush(&bso);
209 	}
210 }
211 
212 void
regsused(Sch * s,Prog * realp)213 regsused(Sch *s, Prog *realp)
214 {
215 	int c, ar, ad, ld, sz;
216 	ulong m;
217 	Prog *p;
218 
219 	p = &s->p;
220 	s->comp = compound(p);
221 	s->nop = 0;
222 	if(s->comp) {
223 		s->set.ireg |= 1<<REGTMP;
224 		s->used.ireg |= 1<<REGTMP;
225 	}
226 
227 	ar = 0;		/* dest is really reference */
228 	ad = 0;		/* source/dest is really address */
229 	ld = 0;		/* opcode is load instruction */
230 	sz = 20;	/* size of load/store for overlap computation */
231 
232 /*
233  * flags based on opcode
234  */
235 	switch(p->as) {
236 	case ATEXT:
237 		curtext = realp;
238 		autosize = p->to.offset + 8;
239 		ad = 1;
240 		break;
241 	case AJAL:
242 		c = p->reg;
243 		if(c == NREG)
244 			c = REGLINK;
245 		s->set.ireg |= 1<<c;
246 		ar = 1;
247 		ad = 1;
248 		break;
249 	case ABGEZAL:
250 	case ABLTZAL:
251 		s->set.ireg |= 1<<REGLINK;
252 	case ABEQ:
253 	case ABGEZ:
254 	case ABGTZ:
255 	case ABLEZ:
256 	case ABLTZ:
257 	case ABNE:
258 		ar = 1;
259 		ad = 1;
260 		break;
261 	case ABFPT:
262 	case ABFPF:
263 		ad = 1;
264 		s->used.cc |= E_FCR;
265 		break;
266 	case ACMPEQD:
267 	case ACMPEQF:
268 	case ACMPGED:
269 	case ACMPGEF:
270 	case ACMPGTD:
271 	case ACMPGTF:
272 		ar = 1;
273 		s->set.cc |= E_FCR;
274 		p->mark |= FCMP;
275 		break;
276 	case AJMP:
277 		ar = 1;
278 		ad = 1;
279 		break;
280 	case AMOVB:
281 	case AMOVBU:
282 		sz = 1;
283 		ld = 1;
284 		break;
285 	case AMOVH:
286 	case AMOVHU:
287 		sz = 2;
288 		ld = 1;
289 		break;
290 	case AMOVF:
291 	case AMOVW:
292 	case AMOVWL:
293 	case AMOVWR:
294 		sz = 4;
295 		ld = 1;
296 		break;
297 	case AMOVD:
298 	case AMOVV:
299 	case AMOVVL:
300 	case AMOVVR:
301 		sz = 8;
302 		ld = 1;
303 		break;
304 	case ADIV:
305 	case ADIVU:
306 	case AMUL:
307 	case AMULU:
308 	case AREM:
309 	case AREMU:
310 	case ADIVV:
311 	case ADIVVU:
312 	case AMULV:
313 	case AMULVU:
314 	case AREMV:
315 	case AREMVU:
316 		s->set.cc = E_HILO;
317 	case AADD:
318 	case AADDU:
319 	case AADDV:
320 	case AADDVU:
321 	case AAND:
322 	case ANOR:
323 	case AOR:
324 	case ASGT:
325 	case ASGTU:
326 	case ASLL:
327 	case ASRA:
328 	case ASRL:
329 	case ASLLV:
330 	case ASRAV:
331 	case ASRLV:
332 	case ASUB:
333 	case ASUBU:
334 	case ASUBV:
335 	case ASUBVU:
336 	case AXOR:
337 
338 	case AADDD:
339 	case AADDF:
340 	case AADDW:
341 	case ASUBD:
342 	case ASUBF:
343 	case ASUBW:
344 	case AMULF:
345 	case AMULD:
346 	case AMULW:
347 	case ADIVF:
348 	case ADIVD:
349 	case ADIVW:
350 		if(p->reg == NREG) {
351 			if(p->to.type == D_REG || p->to.type == D_FREG)
352 				p->reg = p->to.reg;
353 			if(p->reg == NREG)
354 				print("botch %P\n", p);
355 		}
356 		break;
357 	}
358 
359 /*
360  * flags based on 'to' field
361  */
362 	c = p->to.class;
363 	if(c == 0) {
364 		c = aclass(&p->to) + 1;
365 		p->to.class = c;
366 	}
367 	c--;
368 	switch(c) {
369 	default:
370 		print("unknown class %d %D\n", c, &p->to);
371 
372 	case C_ZCON:
373 	case C_SCON:
374 	case C_ADD0CON:
375 	case C_AND0CON:
376 	case C_ADDCON:
377 	case C_ANDCON:
378 	case C_UCON:
379 	case C_LCON:
380 	case C_NONE:
381 	case C_SBRA:
382 	case C_LBRA:
383 		break;
384 
385 	case C_HI:
386 	case C_LO:
387 		s->set.cc |= E_HILO;
388 		break;
389 	case C_FCREG:
390 		s->set.cc |= E_FCR;
391 		break;
392 	case C_MREG:
393 		s->set.cc |= E_MCR;
394 		break;
395 	case C_ZOREG:
396 	case C_SOREG:
397 	case C_LOREG:
398 		c = p->to.reg;
399 		s->used.ireg |= 1<<c;
400 		if(ad)
401 			break;
402 		s->size = sz;
403 		s->soffset = regoff(&p->to);
404 
405 		m = ANYMEM;
406 		if(c == REGSB)
407 			m = E_MEMSB;
408 		if(c == REGSP)
409 			m = E_MEMSP;
410 
411 		if(ar)
412 			s->used.cc |= m;
413 		else
414 			s->set.cc |= m;
415 		break;
416 	case C_SACON:
417 	case C_LACON:
418 		s->used.ireg |= 1<<REGSP;
419 		break;
420 	case C_SECON:
421 	case C_LECON:
422 		s->used.ireg |= 1<<REGSB;
423 		break;
424 	case C_REG:
425 		if(ar)
426 			s->used.ireg |= 1<<p->to.reg;
427 		else
428 			s->set.ireg |= 1<<p->to.reg;
429 		break;
430 	case C_FREG:
431 		/* do better -- determine double prec */
432 		if(ar) {
433 			s->used.freg |= 1<<p->to.reg;
434 			s->used.freg |= 1<<(p->to.reg|1);
435 		} else {
436 			s->set.freg |= 1<<p->to.reg;
437 			s->set.freg |= 1<<(p->to.reg|1);
438 		}
439 		if(ld && p->from.type == D_REG)
440 			p->mark |= LOAD;
441 		break;
442 	case C_SAUTO:
443 	case C_LAUTO:
444 		s->used.ireg |= 1<<REGSP;
445 		if(ad)
446 			break;
447 		s->size = sz;
448 		s->soffset = regoff(&p->to);
449 
450 		if(ar)
451 			s->used.cc |= E_MEMSP;
452 		else
453 			s->set.cc |= E_MEMSP;
454 		break;
455 	case C_SEXT:
456 	case C_LEXT:
457 		s->used.ireg |= 1<<REGSB;
458 		if(ad)
459 			break;
460 		s->size = sz;
461 		s->soffset = regoff(&p->to);
462 
463 		if(ar)
464 			s->used.cc |= E_MEMSB;
465 		else
466 			s->set.cc |= E_MEMSB;
467 		break;
468 	}
469 
470 /*
471  * flags based on 'from' field
472  */
473 	c = p->from.class;
474 	if(c == 0) {
475 		c = aclass(&p->from) + 1;
476 		p->from.class = c;
477 	}
478 	c--;
479 	switch(c) {
480 	default:
481 		print("unknown class %d %D\n", c, &p->from);
482 
483 	case C_ZCON:
484 	case C_SCON:
485 	case C_ADD0CON:
486 	case C_AND0CON:
487 	case C_ADDCON:
488 	case C_ANDCON:
489 	case C_UCON:
490 	case C_LCON:
491 	case C_NONE:
492 	case C_SBRA:
493 	case C_LBRA:
494 		break;
495 	case C_HI:
496 	case C_LO:
497 		s->used.cc |= E_HILO;
498 		break;
499 	case C_FCREG:
500 		s->used.cc |= E_FCR;
501 		break;
502 	case C_MREG:
503 		s->used.cc |= E_MCR;
504 		break;
505 	case C_ZOREG:
506 	case C_SOREG:
507 	case C_LOREG:
508 		c = p->from.reg;
509 		s->used.ireg |= 1<<c;
510 		if(ld)
511 			p->mark |= LOAD;
512 		s->size = sz;
513 		s->soffset = regoff(&p->from);
514 
515 		m = ANYMEM;
516 		if(c == REGSB)
517 			m = E_MEMSB;
518 		if(c == REGSP)
519 			m = E_MEMSP;
520 
521 		s->used.cc |= m;
522 		break;
523 	case C_SACON:
524 	case C_LACON:
525 		s->used.ireg |= 1<<REGSP;
526 		break;
527 	case C_SECON:
528 	case C_LECON:
529 		s->used.ireg |= 1<<REGSB;
530 		break;
531 	case C_REG:
532 		s->used.ireg |= 1<<p->from.reg;
533 		break;
534 	case C_FREG:
535 		/* do better -- determine double prec */
536 		s->used.freg |= 1<<p->from.reg;
537 		s->used.freg |= 1<<(p->from.reg|1);
538 		if(ld && p->to.type == D_REG)
539 			p->mark |= LOAD;
540 		break;
541 	case C_SAUTO:
542 	case C_LAUTO:
543 		s->used.ireg |= 1<<REGSP;
544 		if(ld)
545 			p->mark |= LOAD;
546 		if(ad)
547 			break;
548 		s->size = sz;
549 		s->soffset = regoff(&p->from);
550 
551 		s->used.cc |= E_MEMSP;
552 		break;
553 	case C_SEXT:
554 	case C_LEXT:
555 		s->used.ireg |= 1<<REGSB;
556 		if(ld)
557 			p->mark |= LOAD;
558 		if(ad)
559 			break;
560 		s->size = sz;
561 		s->soffset = regoff(&p->from);
562 
563 		s->used.cc |= E_MEMSB;
564 		break;
565 	}
566 
567 	c = p->reg;
568 	if(c != NREG) {
569 		if(p->from.type == D_FREG || p->to.type == D_FREG) {
570 			s->used.freg |= 1<<c;
571 			s->used.freg |= 1<<(c|1);
572 		} else
573 			s->used.ireg |= 1<<c;
574 	}
575 	s->set.ireg &= ~(1<<REGZERO);		/* R0 cant be set */
576 }
577 
578 /*
579  * test to see if 2 instrictions can be
580  * interchanged without changing semantics
581  */
582 int
depend(Sch * sa,Sch * sb)583 depend(Sch *sa, Sch *sb)
584 {
585 	ulong x;
586 
587 	if(sa->set.ireg & (sb->set.ireg|sb->used.ireg))
588 		return 1;
589 	if(sb->set.ireg & sa->used.ireg)
590 		return 1;
591 
592 	if(sa->set.freg & (sb->set.freg|sb->used.freg))
593 		return 1;
594 	if(sb->set.freg & sa->used.freg)
595 		return 1;
596 
597 	/*
598 	 * special case.
599 	 * loads from same address cannot pass.
600 	 * this is for hardware fifo's and the like
601 	 */
602 	if(sa->used.cc & sb->used.cc & E_MEM)
603 		if(sa->p.reg == sb->p.reg)
604 		if(regoff(&sa->p.from) == regoff(&sb->p.from))
605 			return 1;
606 
607 	x = (sa->set.cc & (sb->set.cc|sb->used.cc)) |
608 		(sb->set.cc & sa->used.cc);
609 	if(x) {
610 		/*
611 		 * allow SB and SP to pass each other.
612 		 * allow SB to pass SB iff doffsets are ok
613 		 * anything else conflicts
614 		 */
615 		if(x != E_MEMSP && x != E_MEMSB)
616 			return 1;
617 		x = sa->set.cc | sb->set.cc |
618 			sa->used.cc | sb->used.cc;
619 		if(x & E_MEM)
620 			return 1;
621 		if(offoverlap(sa, sb))
622 			return 1;
623 	}
624 
625 	return 0;
626 }
627 
628 int
offoverlap(Sch * sa,Sch * sb)629 offoverlap(Sch *sa, Sch *sb)
630 {
631 
632 	if(sa->soffset < sb->soffset) {
633 		if(sa->soffset+sa->size > sb->soffset)
634 			return 1;
635 		return 0;
636 	}
637 	if(sb->soffset+sb->size > sa->soffset)
638 		return 1;
639 	return 0;
640 }
641 
642 /*
643  * test 2 adjacent instructions
644  * and find out if inserted instructions
645  * are desired to prevent stalls.
646  */
647 int
conflict(Sch * sa,Sch * sb)648 conflict(Sch *sa, Sch *sb)
649 {
650 
651 	if(sa->set.ireg & sb->used.ireg)
652 		return 1;
653 	if(sa->set.freg & sb->used.freg)
654 		return 1;
655 	if(sa->set.cc & sb->used.cc)
656 		return 1;
657 
658 	return 0;
659 }
660 
661 int
compound(Prog * p)662 compound(Prog *p)
663 {
664 	Optab *o;
665 
666 	o = oplook(p);
667 	if(o->size != 4)
668 		return 1;
669 	if(p->to.type == D_REG && p->to.reg == REGSB)
670 		return 1;
671 	return 0;
672 }
673 
674 void
dumpbits(Sch * s,Dep * d)675 dumpbits(Sch *s, Dep *d)
676 {
677 	int i;
678 
679 	for(i=0; i<32; i++)
680 		if(d->ireg & (1<<i))
681 			Bprint(&bso, " R%d", i);
682 	for(i=0; i<32; i++)
683 		if(d->freg & (1<<i))
684 			Bprint(&bso, " F%d", i);
685 	for(i=0; i<32; i++)
686 		switch(d->cc & (1<<i)) {
687 		default:
688 			break;
689 		case E_HILO:
690 			Bprint(&bso, " HILO");
691 			break;
692 		case E_FCR:
693 			Bprint(&bso, " FCR");
694 			break;
695 		case E_MCR:
696 			Bprint(&bso, " MCR");
697 			break;
698 		case E_MEM:
699 			Bprint(&bso, " MEM%d", s->size);
700 			break;
701 		case E_MEMSB:
702 			Bprint(&bso, " SB%d", s->size);
703 			break;
704 		case E_MEMSP:
705 			Bprint(&bso, " SP%d", s->size);
706 			break;
707 		}
708 }
709