xref: /inferno-os/utils/vl/sched.c (revision d0e1d143ef6f03c75c008c7ec648859dd260cbab)
1 #include	"l.h"
2 
3 enum
4 {
5 	E_HILO	= 1<<0,
6 	E_FCR	= 1<<1,
7 	E_MCR	= 1<<2,
8 	E_MEM	= 1<<3,
9 	E_MEMSP	= 1<<4,	/* uses offset and size */
10 	E_MEMSB	= 1<<5,	/* uses offset and size */
11 	ANYMEM	= E_MEM|E_MEMSP|E_MEMSB,
12 	DELAY	= BRANCH|LOAD|FCMP,
13 };
14 
15 typedef	struct	Sch	Sch;
16 typedef	struct	Dep	Dep;
17 
18 struct	Dep
19 {
20 	ulong	ireg;
21 	ulong	freg;
22 	ulong	cc;
23 };
24 struct	Sch
25 {
26 	Prog	p;
27 	Dep	set;
28 	Dep	used;
29 	long	soffset;
30 	char	size;
31 	char	nop;
32 	char	comp;
33 };
34 
35 void	regsused(Sch*, Prog*);
36 int	depend(Sch*, Sch*);
37 int	conflict(Sch*, Sch*);
38 int	offoverlap(Sch*, Sch*);
39 void	dumpbits(Sch*, Dep*);
40 
41 void
42 sched(Prog *p0, Prog *pe)
43 {
44 	Prog *p, *q;
45 	Sch sch[NSCHED], *s, *t, *u, *se, stmp;
46 
47 	/*
48 	 * build side structure
49 	 */
50 	s = sch;
51 	for(p=p0;; p=p->link) {
52 		memset(s, 0, sizeof(*s));
53 		s->p = *p;
54 		regsused(s, p);
55 		if(debug['X']) {
56 			Bprint(&bso, "%P\t\tset", &s->p);
57 			dumpbits(s, &s->set);
58 			Bprint(&bso, "; used");
59 			dumpbits(s, &s->used);
60 			if(s->comp)
61 				Bprint(&bso, "; compound");
62 			if(s->p.mark & LOAD)
63 				Bprint(&bso, "; load");
64 			if(s->p.mark & BRANCH)
65 				Bprint(&bso, "; branch");
66 			if(s->p.mark & FCMP)
67 				Bprint(&bso, "; fcmp");
68 			Bprint(&bso, "\n");
69 		}
70 		if(p == pe)
71 			break;
72 		s++;
73 	}
74 	se = s;
75 
76 	/*
77 	 * prepass to move things around
78 	 * does nothing, but tries to make
79 	 * the actual scheduler work better
80 	 */
81 	for(s=sch; s<=se; s++) {
82 		if(!(s->p.mark & LOAD))
83 			continue;
84 		/* always good to put nonconflict loads together */
85 		for(t=s+1; t<=se; t++) {
86 			if(!(t->p.mark & LOAD))
87 				continue;
88 			if(t->p.mark & BRANCH)
89 				break;
90 			if(conflict(s, t))
91 				break;
92 			for(u=t-1; u>s; u--)
93 				if(depend(u, t))
94 					goto no11;
95 			u = s+1;
96 			stmp = *t;
97 			memmove(s+2, u, (uchar*)t - (uchar*)u);
98 			*u = stmp;
99 			break;
100 		}
101 	no11:
102 
103 		/* put schedule fodder above load */
104 		for(t=s+1; t<=se; t++) {
105 			if(t->p.mark & BRANCH)
106 				break;
107 			if(s > sch && conflict(s-1, t))
108 				continue;
109 			for(u=t-1; u>=s; u--)
110 				if(depend(t, u))
111 					goto no1;
112 			stmp = *t;
113 			memmove(s+1, s, (uchar*)t - (uchar*)s);
114 			*s = stmp;
115 			if(!(s->p.mark & LOAD))
116 				break;
117 		no1:;
118 		}
119 	}
120 
121 	for(s=se; s>=sch; s--) {
122 		if(!(s->p.mark & DELAY))
123 			continue;
124 		if(s < se)
125 			if(!conflict(s, s+1))
126 				goto out3;
127 		/*
128 		 * s is load, s+1 is immediate use of result or end of block
129 		 * t is the trial instruction to insert between s and s+1
130 		 */
131 		if(!debug['Y'])
132 		for(t=s-1; t>=sch; t--) {
133 			if(t->comp)
134 				if(s->p.mark & BRANCH)
135 					goto no2;
136 			if(t->p.mark & DELAY)
137 				if(s >= se || conflict(t, s+1))
138 					goto no2;
139 			for(u=t+1; u<=s; u++)
140 				if(depend(u, t))
141 					goto no2;
142 			goto out2;
143 		no2:;
144 		}
145 		if(debug['X'])
146 			Bprint(&bso, "?l%P\n", &s->p);
147 		s->nop = 1;
148 		if(debug['v']) {
149 			if(s->p.mark & LOAD) {
150 				nop.load.count++;
151 				nop.load.outof++;
152 			}
153 			if(s->p.mark & BRANCH) {
154 				nop.branch.count++;
155 				nop.branch.outof++;
156 			}
157 			if(s->p.mark & FCMP) {
158 				nop.fcmp.count++;
159 				nop.fcmp.outof++;
160 			}
161 		}
162 		continue;
163 
164 	out2:
165 		if(debug['X']) {
166 			Bprint(&bso, "!l%P\n", &t->p);
167 			Bprint(&bso, "%P\n", &s->p);
168 		}
169 		stmp = *t;
170 		memmove(t, t+1, (uchar*)s - (uchar*)t);
171 		*s = stmp;
172 		s--;
173 
174 	out3:
175 		if(debug['v']) {
176 			if(s->p.mark & LOAD)
177 				nop.load.outof++;
178 			if(s->p.mark & BRANCH)
179 				nop.branch.outof++;
180 			if(s->p.mark & FCMP)
181 				nop.fcmp.outof++;
182 		}
183 	}
184 
185 	/* Avoid HI/LO use->set */
186 	t = sch+1;
187 	for(s=sch; s<se-1; s++, t++) {
188 		if((s->used.cc & E_HILO) == 0)
189 			continue;
190 		if(t->set.cc & E_HILO)
191 			s->nop = 2;
192 	}
193 
194 	/*
195 	 * put it all back
196 	 */
197 	for(s=sch, p=p0; s<=se; s++, p=q) {
198 		q = p->link;
199 		if(q != s->p.link) {
200 			*p = s->p;
201 			p->link = q;
202 		}
203 		while(s->nop--)
204 			addnop(p);
205 	}
206 	if(debug['X']) {
207 		Bprint(&bso, "\n");
208 		Bflush(&bso);
209 	}
210 }
211 
212 void
213 regsused(Sch *s, Prog *realp)
214 {
215 	int c, ar, ad, ld, sz;
216 	ulong m;
217 	Prog *p;
218 
219 	p = &s->p;
220 	s->comp = compound(p);
221 	s->nop = 0;
222 	if(s->comp) {
223 		s->set.ireg |= 1<<REGTMP;
224 		s->used.ireg |= 1<<REGTMP;
225 	}
226 
227 	ar = 0;		/* dest is really reference */
228 	ad = 0;		/* source/dest is really address */
229 	ld = 0;		/* opcode is load instruction */
230 	sz = 20;	/* size of load/store for overlap computation */
231 
232 /*
233  * flags based on opcode
234  */
235 	switch(p->as) {
236 	case ATEXT:
237 		curtext = realp;
238 		autosize = p->to.offset + 4;
239 		ad = 1;
240 		break;
241 	case AJAL:
242 		c = p->reg;
243 		if(c == NREG)
244 			c = REGLINK;
245 		s->set.ireg |= 1<<c;
246 		ar = 1;
247 		ad = 1;
248 		break;
249 	case ABGEZAL:
250 	case ABLTZAL:
251 		s->set.ireg |= 1<<REGLINK;
252 	case ABEQ:
253 	case ABGEZ:
254 	case ABGTZ:
255 	case ABLEZ:
256 	case ABLTZ:
257 	case ABNE:
258 		ar = 1;
259 		ad = 1;
260 		break;
261 	case ABFPT:
262 	case ABFPF:
263 		ad = 1;
264 		s->used.cc |= E_FCR;
265 		break;
266 	case ACMPEQD:
267 	case ACMPEQF:
268 	case ACMPGED:
269 	case ACMPGEF:
270 	case ACMPGTD:
271 	case ACMPGTF:
272 		ar = 1;
273 		s->set.cc |= E_FCR;
274 		p->mark |= FCMP;
275 		break;
276 	case AJMP:
277 		ar = 1;
278 		ad = 1;
279 		break;
280 	case AMOVB:
281 	case AMOVBU:
282 		sz = 1;
283 		ld = 1;
284 		break;
285 	case AMOVH:
286 	case AMOVHU:
287 		sz = 2;
288 		ld = 1;
289 		break;
290 	case AMOVF:
291 	case AMOVW:
292 	case AMOVWL:
293 	case AMOVWR:
294 		sz = 4;
295 		ld = 1;
296 		break;
297 	case AMOVD:
298 	case AMOVV:
299 	case AMOVVL:
300 	case AMOVVR:
301 		sz = 8;
302 		ld = 1;
303 		break;
304 	case ADIV:
305 	case ADIVU:
306 	case AMUL:
307 	case AMULU:
308 	case AREM:
309 	case AREMU:
310 		s->set.cc = E_HILO;
311 	case AADD:
312 	case AADDU:
313 	case AAND:
314 	case ANOR:
315 	case AOR:
316 	case ASGT:
317 	case ASGTU:
318 	case ASLL:
319 	case ASRA:
320 	case ASRL:
321 	case ASUB:
322 	case ASUBU:
323 	case AXOR:
324 
325 	case AADDD:
326 	case AADDF:
327 	case AADDW:
328 	case ASUBD:
329 	case ASUBF:
330 	case ASUBW:
331 	case AMULF:
332 	case AMULD:
333 	case AMULW:
334 	case ADIVF:
335 	case ADIVD:
336 	case ADIVW:
337 		if(p->reg == NREG) {
338 			if(p->to.type == D_REG || p->to.type == D_FREG)
339 				p->reg = p->to.reg;
340 			if(p->reg == NREG)
341 				print("botch %P\n", p);
342 		}
343 		break;
344 	}
345 
346 /*
347  * flags based on 'to' field
348  */
349 	c = p->to.class;
350 	if(c == 0) {
351 		c = aclass(&p->to) + 1;
352 		p->to.class = c;
353 	}
354 	c--;
355 	switch(c) {
356 	default:
357 		print("unknown class %d %D\n", c, &p->to);
358 
359 	case C_ZCON:
360 	case C_SCON:
361 	case C_ADD0CON:
362 	case C_AND0CON:
363 	case C_ADDCON:
364 	case C_ANDCON:
365 	case C_UCON:
366 	case C_LCON:
367 	case C_NONE:
368 	case C_SBRA:
369 	case C_LBRA:
370 		break;
371 
372 	case C_HI:
373 	case C_LO:
374 		s->set.cc |= E_HILO;
375 		break;
376 	case C_FCREG:
377 		s->set.cc |= E_FCR;
378 		break;
379 	case C_MREG:
380 		s->set.cc |= E_MCR;
381 		break;
382 	case C_ZOREG:
383 	case C_SOREG:
384 	case C_LOREG:
385 		c = p->to.reg;
386 		s->used.ireg |= 1<<c;
387 		if(ad)
388 			break;
389 		s->size = sz;
390 		s->soffset = regoff(&p->to);
391 
392 		m = ANYMEM;
393 		if(c == REGSB)
394 			m = E_MEMSB;
395 		if(c == REGSP)
396 			m = E_MEMSP;
397 
398 		if(ar)
399 			s->used.cc |= m;
400 		else
401 			s->set.cc |= m;
402 		break;
403 	case C_SACON:
404 	case C_LACON:
405 		s->used.ireg |= 1<<REGSP;
406 		break;
407 	case C_SECON:
408 	case C_LECON:
409 		s->used.ireg |= 1<<REGSB;
410 		break;
411 	case C_REG:
412 		if(ar)
413 			s->used.ireg |= 1<<p->to.reg;
414 		else
415 			s->set.ireg |= 1<<p->to.reg;
416 		break;
417 	case C_FREG:
418 		/* do better -- determine double prec */
419 		if(ar) {
420 			s->used.freg |= 1<<p->to.reg;
421 			s->used.freg |= 1<<(p->to.reg|1);
422 		} else {
423 			s->set.freg |= 1<<p->to.reg;
424 			s->set.freg |= 1<<(p->to.reg|1);
425 		}
426 		if(ld && p->from.type == D_REG)
427 			p->mark |= LOAD;
428 		break;
429 	case C_SAUTO:
430 	case C_LAUTO:
431 		s->used.ireg |= 1<<REGSP;
432 		if(ad)
433 			break;
434 		s->size = sz;
435 		s->soffset = regoff(&p->to);
436 
437 		if(ar)
438 			s->used.cc |= E_MEMSP;
439 		else
440 			s->set.cc |= E_MEMSP;
441 		break;
442 	case C_SEXT:
443 	case C_LEXT:
444 		s->used.ireg |= 1<<REGSB;
445 		if(ad)
446 			break;
447 		s->size = sz;
448 		s->soffset = regoff(&p->to);
449 
450 		if(ar)
451 			s->used.cc |= E_MEMSB;
452 		else
453 			s->set.cc |= E_MEMSB;
454 		break;
455 	}
456 
457 /*
458  * flags based on 'from' field
459  */
460 	c = p->from.class;
461 	if(c == 0) {
462 		c = aclass(&p->from) + 1;
463 		p->from.class = c;
464 	}
465 	c--;
466 	switch(c) {
467 	default:
468 		print("unknown class %d %D\n", c, &p->from);
469 
470 	case C_ZCON:
471 	case C_SCON:
472 	case C_ADD0CON:
473 	case C_AND0CON:
474 	case C_ADDCON:
475 	case C_ANDCON:
476 	case C_UCON:
477 	case C_LCON:
478 	case C_NONE:
479 	case C_SBRA:
480 	case C_LBRA:
481 		break;
482 	case C_HI:
483 	case C_LO:
484 		s->used.cc |= E_HILO;
485 		break;
486 	case C_FCREG:
487 		s->used.cc |= E_FCR;
488 		break;
489 	case C_MREG:
490 		s->used.cc |= E_MCR;
491 		break;
492 	case C_ZOREG:
493 	case C_SOREG:
494 	case C_LOREG:
495 		c = p->from.reg;
496 		s->used.ireg |= 1<<c;
497 		if(ld)
498 			p->mark |= LOAD;
499 		s->size = sz;
500 		s->soffset = regoff(&p->from);
501 
502 		m = ANYMEM;
503 		if(c == REGSB)
504 			m = E_MEMSB;
505 		if(c == REGSP)
506 			m = E_MEMSP;
507 
508 		s->used.cc |= m;
509 		break;
510 	case C_SACON:
511 	case C_LACON:
512 		s->used.ireg |= 1<<REGSP;
513 		break;
514 	case C_SECON:
515 	case C_LECON:
516 		s->used.ireg |= 1<<REGSB;
517 		break;
518 	case C_REG:
519 		s->used.ireg |= 1<<p->from.reg;
520 		break;
521 	case C_FREG:
522 		/* do better -- determine double prec */
523 		s->used.freg |= 1<<p->from.reg;
524 		s->used.freg |= 1<<(p->from.reg|1);
525 		if(ld && p->to.type == D_REG)
526 			p->mark |= LOAD;
527 		break;
528 	case C_SAUTO:
529 	case C_LAUTO:
530 		s->used.ireg |= 1<<REGSP;
531 		if(ld)
532 			p->mark |= LOAD;
533 		if(ad)
534 			break;
535 		s->size = sz;
536 		s->soffset = regoff(&p->from);
537 
538 		s->used.cc |= E_MEMSP;
539 		break;
540 	case C_SEXT:
541 	case C_LEXT:
542 		s->used.ireg |= 1<<REGSB;
543 		if(ld)
544 			p->mark |= LOAD;
545 		if(ad)
546 			break;
547 		s->size = sz;
548 		s->soffset = regoff(&p->from);
549 
550 		s->used.cc |= E_MEMSB;
551 		break;
552 	}
553 
554 	c = p->reg;
555 	if(c != NREG) {
556 		if(p->from.type == D_FREG || p->to.type == D_FREG) {
557 			s->used.freg |= 1<<c;
558 			s->used.freg |= 1<<(c|1);
559 		} else
560 			s->used.ireg |= 1<<c;
561 	}
562 	s->set.ireg &= ~(1<<REGZERO);		/* R0 cant be set */
563 }
564 
565 /*
566  * test to see if 2 instrictions can be
567  * interchanged without changing semantics
568  */
569 int
570 depend(Sch *sa, Sch *sb)
571 {
572 	ulong x;
573 
574 	if(sa->set.ireg & (sb->set.ireg|sb->used.ireg))
575 		return 1;
576 	if(sb->set.ireg & sa->used.ireg)
577 		return 1;
578 
579 	if(sa->set.freg & (sb->set.freg|sb->used.freg))
580 		return 1;
581 	if(sb->set.freg & sa->used.freg)
582 		return 1;
583 
584 	/*
585 	 * special case.
586 	 * loads from same address cannot pass.
587 	 * this is for hardware fifo's and the like
588 	 */
589 	if(sa->used.cc & sb->used.cc & E_MEM)
590 		if(sa->p.reg == sb->p.reg)
591 		if(regoff(&sa->p.from) == regoff(&sb->p.from))
592 			return 1;
593 
594 	x = (sa->set.cc & (sb->set.cc|sb->used.cc)) |
595 		(sb->set.cc & sa->used.cc);
596 	if(x) {
597 		/*
598 		 * allow SB and SP to pass each other.
599 		 * allow SB to pass SB iff doffsets are ok
600 		 * anything else conflicts
601 		 */
602 		if(x != E_MEMSP && x != E_MEMSB)
603 			return 1;
604 		x = sa->set.cc | sb->set.cc |
605 			sa->used.cc | sb->used.cc;
606 		if(x & E_MEM)
607 			return 1;
608 		if(offoverlap(sa, sb))
609 			return 1;
610 	}
611 
612 	return 0;
613 }
614 
615 int
616 offoverlap(Sch *sa, Sch *sb)
617 {
618 
619 	if(sa->soffset < sb->soffset) {
620 		if(sa->soffset+sa->size > sb->soffset)
621 			return 1;
622 		return 0;
623 	}
624 	if(sb->soffset+sb->size > sa->soffset)
625 		return 1;
626 	return 0;
627 }
628 
629 /*
630  * test 2 adjacent instructions
631  * and find out if inserted instructions
632  * are desired to prevent stalls.
633  */
634 int
635 conflict(Sch *sa, Sch *sb)
636 {
637 
638 	if(sa->set.ireg & sb->used.ireg)
639 		return 1;
640 	if(sa->set.freg & sb->used.freg)
641 		return 1;
642 	if(sa->set.cc & sb->used.cc)
643 		return 1;
644 
645 	return 0;
646 }
647 
648 int
649 compound(Prog *p)
650 {
651 	Optab *o;
652 
653 	o = oplook(p);
654 	if(o->size != 4)
655 		return 1;
656 	if(p->to.type == D_REG && p->to.reg == REGSB)
657 		return 1;
658 	return 0;
659 }
660 
661 void
662 dumpbits(Sch *s, Dep *d)
663 {
664 	int i;
665 
666 	for(i=0; i<32; i++)
667 		if(d->ireg & (1<<i))
668 			Bprint(&bso, " R%d", i);
669 	for(i=0; i<32; i++)
670 		if(d->freg & (1<<i))
671 			Bprint(&bso, " F%d", i);
672 	for(i=0; i<32; i++)
673 		switch(d->cc & (1<<i)) {
674 		default:
675 			break;
676 		case E_HILO:
677 			Bprint(&bso, " HILO");
678 			break;
679 		case E_FCR:
680 			Bprint(&bso, " FCR");
681 			break;
682 		case E_MCR:
683 			Bprint(&bso, " MCR");
684 			break;
685 		case E_MEM:
686 			Bprint(&bso, " MEM%d", s->size);
687 			break;
688 		case E_MEMSB:
689 			Bprint(&bso, " SB%d", s->size);
690 			break;
691 		case E_MEMSP:
692 			Bprint(&bso, " SP%d", s->size);
693 			break;
694 		}
695 }
696