xref: /inferno-os/utils/0l/sched.c (revision d0e1d143ef6f03c75c008c7ec648859dd260cbab)
1 #include	"l.h"
2 
3 enum
4 {
5 	E_HILO	= 1<<0,
6 	E_FCR	= 1<<1,
7 	E_MCR	= 1<<2,
8 	E_MEM	= 1<<3,
9 	E_MEMSP	= 1<<4,	/* uses offset and size */
10 	E_MEMSB	= 1<<5,	/* uses offset and size */
11 	ANYMEM	= E_MEM|E_MEMSP|E_MEMSB,
12 	DELAY	= BRANCH|LOAD|FCMP,
13 };
14 
15 typedef	struct	Sch	Sch;
16 typedef	struct	Dep	Dep;
17 
18 struct	Dep
19 {
20 	ulong	ireg;
21 	ulong	freg;
22 	ulong	cc;
23 };
24 struct	Sch
25 {
26 	Prog	p;
27 	Dep	set;
28 	Dep	used;
29 	long	soffset;
30 	char	size;
31 	char	nop;
32 	char	comp;
33 };
34 
35 void	markregused(Sch*, Prog*);
36 int	depend(Sch*, Sch*);
37 int	conflict(Sch*, Sch*);
38 int	offoverlap(Sch*, Sch*);
39 void	dumpbits(Sch*, Dep*);
40 
41 void
42 sched(Prog *p0, Prog *pe)
43 {
44 	Prog *p, *q;
45 	Sch sch[NSCHED], *s, *t, *u, *se, stmp;
46 
47 	/*
48 	 * build side structure
49 	 */
50 	s = sch;
51 	for(p=p0;; p=p->link) {
52 		memset(s, 0, sizeof(*s));
53 		s->p = *p;
54 		markregused(s, p);
55 		if(debug['X']) {
56 			Bprint(&bso, "%P%|set", &s->p, 40);
57 			dumpbits(s, &s->set);
58 			Bprint(&bso, "; used");
59 			dumpbits(s, &s->used);
60 			if(s->comp)
61 				Bprint(&bso, "; compound");
62 			if(s->p.mark & LOAD)
63 				Bprint(&bso, "; load");
64 			if(s->p.mark & BRANCH)
65 				Bprint(&bso, "; branch");
66 			if(s->p.mark & FCMP)
67 				Bprint(&bso, "; fcmp");
68 			Bprint(&bso, "\n");
69 		}
70 		if(p == pe)
71 			break;
72 		s++;
73 	}
74 	se = s;
75 
76 	/*
77 	 * prepass to move things around
78 	 * does nothing, but tries to make
79 	 * the actual scheduler work better
80 	 */
81 	for(s=sch; s<=se; s++) {
82 		if(!(s->p.mark & LOAD))
83 			continue;
84 		/* always good to put nonconflict loads together */
85 		for(t=s+1; t<=se; t++) {
86 			if(!(t->p.mark & LOAD))
87 				continue;
88 			if(t->p.mark & BRANCH)
89 				break;
90 			if(conflict(s, t))
91 				break;
92 			for(u=t-1; u>s; u--)
93 				if(depend(u, t))
94 					goto no11;
95 			u = s+1;
96 			stmp = *t;
97 			memmove(s+2, u, (uchar*)t - (uchar*)u);
98 			*u = stmp;
99 			break;
100 		}
101 	no11:
102 
103 		/* put schedule fodder above load */
104 		for(t=s+1; t<=se; t++) {
105 			if(t->p.mark & BRANCH)
106 				break;
107 			if(s > sch && conflict(s-1, t))
108 				continue;
109 			for(u=t-1; u>=s; u--)
110 				if(depend(t, u))
111 					goto no1;
112 			stmp = *t;
113 			memmove(s+1, s, (uchar*)t - (uchar*)s);
114 			*s = stmp;
115 			if(!(s->p.mark & LOAD))
116 				break;
117 		no1:;
118 		}
119 	}
120 
121 	for(s=se; s>=sch; s--) {
122 		if(!(s->p.mark & DELAY))
123 			continue;
124 		if(s < se)
125 			if(!conflict(s, s+1))
126 				goto out3;
127 		/*
128 		 * s is load, s+1 is immediate use of result or end of block
129 		 * t is the trial instruction to insert between s and s+1
130 		 */
131 		if(!debug['Y'])
132 		for(t=s-1; t>=sch; t--) {
133 			if(t->comp)
134 				if(s->p.mark & BRANCH)
135 					goto no2;
136 			if(t->p.mark & DELAY)
137 				if(s >= se || conflict(t, s+1))
138 					goto no2;
139 			for(u=t+1; u<=s; u++)
140 				if(depend(u, t))
141 					goto no2;
142 			goto out2;
143 		no2:;
144 		}
145 		if(debug['X'])
146 			Bprint(&bso, "?l%P\n", &s->p);
147 		if(s->p.mark & BRANCH)
148 			s->nop = 1;
149 		if(debug['v']) {
150 			if(s->p.mark & LOAD) {
151 				nop.load.count++;
152 				nop.load.outof++;
153 			}
154 			if(s->p.mark & BRANCH) {
155 				nop.branch.count++;
156 				nop.branch.outof++;
157 			}
158 			if(s->p.mark & FCMP) {
159 				nop.fcmp.count++;
160 				nop.fcmp.outof++;
161 			}
162 		}
163 		continue;
164 
165 	out2:
166 		if(debug['X']) {
167 			Bprint(&bso, "!l%P\n", &t->p);
168 			Bprint(&bso, "%P\n", &s->p);
169 		}
170 		stmp = *t;
171 		memmove(t, t+1, (uchar*)s - (uchar*)t);
172 		*s = stmp;
173 		s--;
174 
175 	out3:
176 		if(debug['v']) {
177 			if(s->p.mark & LOAD)
178 				nop.load.outof++;
179 			if(s->p.mark & BRANCH)
180 				nop.branch.outof++;
181 			if(s->p.mark & FCMP)
182 				nop.fcmp.outof++;
183 		}
184 	}
185 
186 	/* Avoid HI/LO use->set */
187 	t = sch+1;
188 	for(s=sch; s<se-1; s++, t++) {
189 		if((s->used.cc & E_HILO) == 0)
190 			continue;
191 		if(t->set.cc & E_HILO)
192 			s->nop = 2;
193 	}
194 
195 	/*
196 	 * put it all back
197 	 */
198 	for(s=sch, p=p0; s<=se; s++, p=q) {
199 		q = p->link;
200 		if(q != s->p.link) {
201 			*p = s->p;
202 			p->link = q;
203 		}
204 		while(s->nop--)
205 			addnop(p);
206 	}
207 	if(debug['X']) {
208 		Bprint(&bso, "\n");
209 		Bflush(&bso);
210 	}
211 }
212 
213 void
214 markregused(Sch *s, Prog *realp)
215 {
216 	int c, ar, ad, ld, sz;
217 	ulong m;
218 	Prog *p;
219 
220 	p = &s->p;
221 	s->comp = compound(p);
222 	s->nop = 0;
223 	if(s->comp) {
224 		s->set.ireg |= 1<<REGTMP;
225 		s->used.ireg |= 1<<REGTMP;
226 	}
227 
228 	ar = 0;		/* dest is really reference */
229 	ad = 0;		/* source/dest is really address */
230 	ld = 0;		/* opcode is load instruction */
231 	sz = 20;	/* size of load/store for overlap computation */
232 
233 /*
234  * flags based on opcode
235  */
236 	switch(p->as) {
237 	case ATEXT:
238 		curtext = realp;
239 		autosize = p->to.offset + 8;
240 		ad = 1;
241 		break;
242 	case AJAL:
243 		c = p->reg;
244 		if(c == NREG)
245 			c = REGLINK;
246 		s->set.ireg |= 1<<c;
247 		ar = 1;
248 		ad = 1;
249 		break;
250 	case ABGEZAL:
251 	case ABLTZAL:
252 		s->set.ireg |= 1<<REGLINK;
253 	case ABEQ:
254 	case ABGEZ:
255 	case ABGTZ:
256 	case ABLEZ:
257 	case ABLTZ:
258 	case ABNE:
259 		ar = 1;
260 		ad = 1;
261 		break;
262 	case ABFPT:
263 	case ABFPF:
264 		ad = 1;
265 		s->used.cc |= E_FCR;
266 		break;
267 	case ACMPEQD:
268 	case ACMPEQF:
269 	case ACMPGED:
270 	case ACMPGEF:
271 	case ACMPGTD:
272 	case ACMPGTF:
273 		ar = 1;
274 		s->set.cc |= E_FCR;
275 		p->mark |= FCMP;
276 		break;
277 	case AJMP:
278 		ar = 1;
279 		ad = 1;
280 		break;
281 	case AMOVB:
282 	case AMOVBU:
283 		sz = 1;
284 		ld = 1;
285 		break;
286 	case AMOVH:
287 	case AMOVHU:
288 		sz = 2;
289 		ld = 1;
290 		break;
291 	case AMOVF:
292 	case AMOVW:
293 	case AMOVWL:
294 	case AMOVWR:
295 		sz = 4;
296 		ld = 1;
297 		break;
298 	case AMOVD:
299 	case AMOVV:
300 	case AMOVVL:
301 	case AMOVVR:
302 		sz = 8;
303 		ld = 1;
304 		break;
305 	case ADIV:
306 	case ADIVU:
307 	case AMUL:
308 	case AMULU:
309 	case AREM:
310 	case AREMU:
311 	case ADIVV:
312 	case ADIVVU:
313 	case AMULV:
314 	case AMULVU:
315 	case AREMV:
316 	case AREMVU:
317 		s->set.cc = E_HILO;
318 	case AADD:
319 	case AADDU:
320 	case AADDV:
321 	case AADDVU:
322 	case AAND:
323 	case ANOR:
324 	case AOR:
325 	case ASGT:
326 	case ASGTU:
327 	case ASLL:
328 	case ASRA:
329 	case ASRL:
330 	case ASLLV:
331 	case ASRAV:
332 	case ASRLV:
333 	case ASUB:
334 	case ASUBU:
335 	case ASUBV:
336 	case ASUBVU:
337 	case AXOR:
338 
339 	case AADDD:
340 	case AADDF:
341 	case AADDW:
342 	case ASUBD:
343 	case ASUBF:
344 	case ASUBW:
345 	case AMULF:
346 	case AMULD:
347 	case AMULW:
348 	case ADIVF:
349 	case ADIVD:
350 	case ADIVW:
351 		if(p->reg == NREG) {
352 			if(p->to.type == D_REG || p->to.type == D_FREG)
353 				p->reg = p->to.reg;
354 			if(p->reg == NREG)
355 				print("botch %P\n", p);
356 		}
357 		break;
358 	}
359 
360 /*
361  * flags based on 'to' field
362  */
363 	c = p->to.class;
364 	if(c == 0) {
365 		c = aclass(&p->to) + 1;
366 		p->to.class = c;
367 	}
368 	c--;
369 	switch(c) {
370 	default:
371 		print("unknown class %d %D\n", c, &p->to);
372 
373 	case C_ZCON:
374 	case C_SCON:
375 	case C_ADD0CON:
376 	case C_AND0CON:
377 	case C_ADDCON:
378 	case C_ANDCON:
379 	case C_UCON:
380 	case C_LCON:
381 	case C_NONE:
382 	case C_SBRA:
383 	case C_LBRA:
384 		break;
385 
386 	case C_HI:
387 	case C_LO:
388 		s->set.cc |= E_HILO;
389 		break;
390 	case C_FCREG:
391 		s->set.cc |= E_FCR;
392 		break;
393 	case C_MREG:
394 		s->set.cc |= E_MCR;
395 		break;
396 	case C_ZOREG:
397 	case C_SOREG:
398 	case C_LOREG:
399 		c = p->to.reg;
400 		s->used.ireg |= 1<<c;
401 		if(ad)
402 			break;
403 		s->size = sz;
404 		s->soffset = regoff(&p->to);
405 
406 		m = ANYMEM;
407 		if(c == REGSB)
408 			m = E_MEMSB;
409 		if(c == REGSP)
410 			m = E_MEMSP;
411 
412 		if(ar)
413 			s->used.cc |= m;
414 		else
415 			s->set.cc |= m;
416 		break;
417 	case C_SACON:
418 	case C_LACON:
419 		s->used.ireg |= 1<<REGSP;
420 		break;
421 	case C_SECON:
422 	case C_LECON:
423 		s->used.ireg |= 1<<REGSB;
424 		break;
425 	case C_REG:
426 		if(ar)
427 			s->used.ireg |= 1<<p->to.reg;
428 		else
429 			s->set.ireg |= 1<<p->to.reg;
430 		break;
431 	case C_FREG:
432 		/* do better -- determine double prec */
433 		if(ar) {
434 			s->used.freg |= 1<<p->to.reg;
435 			s->used.freg |= 1<<(p->to.reg|1);
436 		} else {
437 			s->set.freg |= 1<<p->to.reg;
438 			s->set.freg |= 1<<(p->to.reg|1);
439 		}
440 		if(ld && p->from.type == D_REG)
441 			p->mark |= LOAD;
442 		break;
443 	case C_SAUTO:
444 	case C_LAUTO:
445 		s->used.ireg |= 1<<REGSP;
446 		if(ad)
447 			break;
448 		s->size = sz;
449 		s->soffset = regoff(&p->to);
450 
451 		if(ar)
452 			s->used.cc |= E_MEMSP;
453 		else
454 			s->set.cc |= E_MEMSP;
455 		break;
456 	case C_SEXT:
457 	case C_LEXT:
458 		s->used.ireg |= 1<<REGSB;
459 		if(ad)
460 			break;
461 		s->size = sz;
462 		s->soffset = regoff(&p->to);
463 
464 		if(ar)
465 			s->used.cc |= E_MEMSB;
466 		else
467 			s->set.cc |= E_MEMSB;
468 		break;
469 	}
470 
471 /*
472  * flags based on 'from' field
473  */
474 	c = p->from.class;
475 	if(c == 0) {
476 		c = aclass(&p->from) + 1;
477 		p->from.class = c;
478 	}
479 	c--;
480 	switch(c) {
481 	default:
482 		print("unknown class %d %D\n", c, &p->from);
483 
484 	case C_ZCON:
485 	case C_SCON:
486 	case C_ADD0CON:
487 	case C_AND0CON:
488 	case C_ADDCON:
489 	case C_ANDCON:
490 	case C_UCON:
491 	case C_LCON:
492 	case C_NONE:
493 	case C_SBRA:
494 	case C_LBRA:
495 		break;
496 	case C_HI:
497 	case C_LO:
498 		s->used.cc |= E_HILO;
499 		break;
500 	case C_FCREG:
501 		s->used.cc |= E_FCR;
502 		break;
503 	case C_MREG:
504 		s->used.cc |= E_MCR;
505 		break;
506 	case C_ZOREG:
507 	case C_SOREG:
508 	case C_LOREG:
509 		c = p->from.reg;
510 		s->used.ireg |= 1<<c;
511 		if(ld)
512 			p->mark |= LOAD;
513 		s->size = sz;
514 		s->soffset = regoff(&p->from);
515 
516 		m = ANYMEM;
517 		if(c == REGSB)
518 			m = E_MEMSB;
519 		if(c == REGSP)
520 			m = E_MEMSP;
521 
522 		s->used.cc |= m;
523 		break;
524 	case C_SACON:
525 	case C_LACON:
526 		s->used.ireg |= 1<<REGSP;
527 		break;
528 	case C_SECON:
529 	case C_LECON:
530 		s->used.ireg |= 1<<REGSB;
531 		break;
532 	case C_REG:
533 		s->used.ireg |= 1<<p->from.reg;
534 		break;
535 	case C_FREG:
536 		/* do better -- determine double prec */
537 		s->used.freg |= 1<<p->from.reg;
538 		s->used.freg |= 1<<(p->from.reg|1);
539 		if(ld && p->to.type == D_REG)
540 			p->mark |= LOAD;
541 		break;
542 	case C_SAUTO:
543 	case C_LAUTO:
544 		s->used.ireg |= 1<<REGSP;
545 		if(ld)
546 			p->mark |= LOAD;
547 		if(ad)
548 			break;
549 		s->size = sz;
550 		s->soffset = regoff(&p->from);
551 
552 		s->used.cc |= E_MEMSP;
553 		break;
554 	case C_SEXT:
555 	case C_LEXT:
556 		s->used.ireg |= 1<<REGSB;
557 		if(ld)
558 			p->mark |= LOAD;
559 		if(ad)
560 			break;
561 		s->size = sz;
562 		s->soffset = regoff(&p->from);
563 
564 		s->used.cc |= E_MEMSB;
565 		break;
566 	}
567 
568 	c = p->reg;
569 	if(c != NREG) {
570 		if(p->from.type == D_FREG || p->to.type == D_FREG) {
571 			s->used.freg |= 1<<c;
572 			s->used.freg |= 1<<(c|1);
573 		} else
574 			s->used.ireg |= 1<<c;
575 	}
576 	s->set.ireg &= ~(1<<REGZERO);		/* R0 cant be set */
577 }
578 
579 /*
580  * test to see if 2 instrictions can be
581  * interchanged without changing semantics
582  */
583 int
584 depend(Sch *sa, Sch *sb)
585 {
586 	ulong x;
587 
588 	if(sa->set.ireg & (sb->set.ireg|sb->used.ireg))
589 		return 1;
590 	if(sb->set.ireg & sa->used.ireg)
591 		return 1;
592 
593 	if(sa->set.freg & (sb->set.freg|sb->used.freg))
594 		return 1;
595 	if(sb->set.freg & sa->used.freg)
596 		return 1;
597 
598 	/*
599 	 * special case.
600 	 * loads from same address cannot pass.
601 	 * this is for hardware fifo's and the like
602 	 */
603 	if(sa->used.cc & sb->used.cc & E_MEM)
604 		if(sa->p.reg == sb->p.reg)
605 		if(regoff(&sa->p.from) == regoff(&sb->p.from))
606 			return 1;
607 
608 	x = (sa->set.cc & (sb->set.cc|sb->used.cc)) |
609 		(sb->set.cc & sa->used.cc);
610 	if(x) {
611 		/*
612 		 * allow SB and SP to pass each other.
613 		 * allow SB to pass SB iff doffsets are ok
614 		 * anything else conflicts
615 		 */
616 		if(x != E_MEMSP && x != E_MEMSB)
617 			return 1;
618 		x = sa->set.cc | sb->set.cc |
619 			sa->used.cc | sb->used.cc;
620 		if(x & E_MEM)
621 			return 1;
622 		if(offoverlap(sa, sb))
623 			return 1;
624 	}
625 
626 	return 0;
627 }
628 
629 int
630 offoverlap(Sch *sa, Sch *sb)
631 {
632 
633 	if(sa->soffset < sb->soffset) {
634 		if(sa->soffset+sa->size > sb->soffset)
635 			return 1;
636 		return 0;
637 	}
638 	if(sb->soffset+sb->size > sa->soffset)
639 		return 1;
640 	return 0;
641 }
642 
643 /*
644  * test 2 adjacent instructions
645  * and find out if inserted instructions
646  * are desired to prevent stalls.
647  */
648 int
649 conflict(Sch *sa, Sch *sb)
650 {
651 
652 	if(sa->set.ireg & sb->used.ireg)
653 		return 1;
654 	if(sa->set.freg & sb->used.freg)
655 		return 1;
656 	if(sa->set.cc & sb->used.cc)
657 		return 1;
658 
659 	return 0;
660 }
661 
662 int
663 compound(Prog *p)
664 {
665 	Optab *o;
666 
667 	o = oplook(p);
668 	if(o->size != 4)
669 		return 1;
670 	if(p->to.type == D_REG && p->to.reg == REGSB)
671 		return 1;
672 	return 0;
673 }
674 
675 void
676 dumpbits(Sch *s, Dep *d)
677 {
678 	int i;
679 
680 	for(i=0; i<32; i++)
681 		if(d->ireg & (1<<i))
682 			Bprint(&bso, " R%d", i);
683 	for(i=0; i<32; i++)
684 		if(d->freg & (1<<i))
685 			Bprint(&bso, " F%d", i);
686 	for(i=0; i<32; i++)
687 		switch(d->cc & (1<<i)) {
688 		default:
689 			break;
690 		case E_HILO:
691 			Bprint(&bso, " HILO");
692 			break;
693 		case E_FCR:
694 			Bprint(&bso, " FCR");
695 			break;
696 		case E_MCR:
697 			Bprint(&bso, " MCR");
698 			break;
699 		case E_MEM:
700 			Bprint(&bso, " MEM%d", s->size);
701 			break;
702 		case E_MEMSB:
703 			Bprint(&bso, " SB%d", s->size);
704 			break;
705 		case E_MEMSP:
706 			Bprint(&bso, " SP%d", s->size);
707 			break;
708 		}
709 }
710