xref: /inferno-os/utils/kl/sched.c (revision 9dc22068e29604f4b484e746112a9a4efe6fd57f)
1 #include	"l.h"
2 
3 enum
4 {
5 	E_ICC	= 1<<0,
6 	E_FCC	= 1<<1,
7 	E_MEM	= 1<<2,
8 	E_MEMSP	= 1<<3,	/* uses offset and size */
9 	E_MEMSB	= 1<<4,	/* uses offset and size */
10 	ANYMEM	= E_MEM|E_MEMSP|E_MEMSB,
11 	ALL	= ~0
12 };
13 
14 typedef	struct	Sch	Sch;
15 typedef	struct	Dep	Dep;
16 
17 struct	Dep
18 {
19 	ulong	ireg;
20 	ulong	freg;
21 	ulong	cc;
22 };
23 struct	Sch
24 {
25 	Prog	p;
26 	Dep	set;
27 	Dep	used;
28 	long	soffset;
29 	char	size;
30 	char	nop;
31 	char	comp;
32 };
33 
34 void	regsused(Sch*, Prog*);
35 int	depend(Sch*, Sch*);
36 int	conflict(Sch*, Sch*);
37 int	offoverlap(Sch*, Sch*);
38 void	dumpbits(Sch*, Dep*);
39 
40 void
41 sched(Prog *p0, Prog *pe)
42 {
43 	Prog *p, *q;
44 	Sch sch[NSCHED], *s, *t, *u, *se, stmp;
45 
46 	/*
47 	 * build side structure
48 	 */
49 	s = sch;
50 	for(p=p0;; p=p->link) {
51 		memset(s, 0, sizeof(*s));
52 		s->p = *p;
53 		regsused(s, p);
54 		if(debug['X']) {
55 			Bprint(&bso, "%P\tset", &s->p);
56 			dumpbits(s, &s->set);
57 			Bprint(&bso, "; used");
58 			dumpbits(s, &s->used);
59 			if(s->comp)
60 				Bprint(&bso, "; compound");
61 			if(s->p.mark & LOAD)
62 				Bprint(&bso, "; load");
63 			if(s->p.mark & BRANCH)
64 				Bprint(&bso, "; branch");
65 			if(s->p.mark & FCMP)
66 				Bprint(&bso, "; fcmp");
67 			Bprint(&bso, "\n");
68 		}
69 		s++;
70 		if(p == pe)
71 			break;
72 	}
73 	se = s;
74 
75 	for(s=se-1; s>=sch; s--) {
76 		/*
77 		 * branch delay slot
78 		 */
79 		if(s->p.mark & BRANCH) {
80 			/* t is the trial instruction to use */
81 			for(t=s-1; t>=sch; t--) {
82 				if(t->comp || (t->p.mark & FCMP))
83 					goto no1;
84 				for(u=t+1; u<=s; u++)
85 					if(depend(u, t))
86 						goto no1;
87 				goto out1;
88 			no1:;
89 			}
90 			if(debug['X'])
91 				Bprint(&bso, "?b%P\n", &s->p);
92 			s->nop = 1;
93 			continue;
94 
95 		out1:
96 			if(debug['X']) {
97 				Bprint(&bso, "!b%P\n", &t->p);
98 				Bprint(&bso, "%P\n", &s->p);
99 			}
100 			stmp = *t;
101 			memmove(t, t+1, (uchar*)s - (uchar*)t);
102 			*s = stmp;
103 			s--;
104 			continue;
105 		}
106 
107 		/*
108 		 * load delay. interlocked.
109 		 */
110 		if(s->p.mark & LOAD) {
111 			if(s >= se-1)
112 				continue;
113 			if(!conflict(s, (s+1)))
114 				continue;
115 			/*
116 			 * s is load, s+1 is immediate use of result
117 			 * t is the trial instruction to insert between s and s+1
118 			 */
119 			for(t=s-1; t>=sch; t--) {
120 				if(t->p.mark & BRANCH)
121 					goto no2;
122 				if(t->p.mark & FCMP)
123 					if((s+1)->p.mark & BRANCH)
124 						goto no2;
125 				if(t->p.mark & LOAD)
126 					if(conflict(t, (s+1)))
127 						goto no2;
128 				for(u=t+1; u<=s; u++)
129 					if(depend(u, t))
130 						goto no2;
131 				goto out2;
132 			no2:;
133 			}
134 			if(debug['X'])
135 				Bprint(&bso, "?l%P\n", &s->p);
136 			continue;
137 		out2:
138 			if(debug['X']) {
139 				Bprint(&bso, "!l%P\n", &t->p);
140 				Bprint(&bso, "%P\n", &s->p);
141 			}
142 			stmp = *t;
143 			memmove(t, t+1, (uchar*)s - (uchar*)t);
144 			*s = stmp;
145 			s--;
146 			continue;
147 		}
148 
149 		/*
150 		 * fop2 delay.
151 		 */
152 		if(s->p.mark & FCMP) {
153 			if(s >= se-1) {
154 				s->nop = 1;
155 				continue;
156 			}
157 			if(!((s+1)->p.mark & BRANCH))
158 				continue;
159 			/* t is the trial instruction to use */
160 			for(t=s-1; t>=sch; t--) {
161 				for(u=t+1; u<=s; u++)
162 					if(depend(u, t))
163 						goto no3;
164 				goto out3;
165 			no3:;
166 			}
167 			if(debug['X'])
168 				Bprint(&bso, "?f%P\n", &s->p);
169 			s->nop = 1;
170 			continue;
171 		out3:
172 			if(debug['X']) {
173 				Bprint(&bso, "!f%P\n", &t->p);
174 				Bprint(&bso, "%P\n", &s->p);
175 			}
176 			stmp = *t;
177 			memmove(t, t+1, (uchar*)s - (uchar*)t);
178 			*s = stmp;
179 			s--;
180 			continue;
181 		}
182 	}
183 
184 	/*
185 	 * put it all back
186 	 */
187 	for(s=sch, p=p0; s<se; s++, p=q) {
188 		q = p->link;
189 		if(q != s->p.link) {
190 			*p = s->p;
191 			p->link = q;
192 		}
193 		if(s->nop)
194 			addnop(p);
195 	}
196 	if(debug['X'])
197 		Bprint(&bso, "\n");
198 }
199 
200 void
201 regsused(Sch *s, Prog *realp)
202 {
203 	int c, ar, ad, ld, sz;
204 	ulong m;
205 	Prog *p;
206 
207 	p = &s->p;
208 	s->comp = compound(p);
209 	s->nop = 0;
210 	if(s->comp) {
211 		s->set.ireg |= 1<<REGTMP;
212 		s->used.ireg |= 1<<REGTMP;
213 	}
214 	ar = 0;		/* dest is really reference */
215 	ad = 0;		/* source/dest is really address */
216 	ld = 0;		/* opcode is load instruction */
217 	sz = 20;		/* size of load/store for overlap computation */
218 
219 /*
220  * flags based on opcode
221  */
222 	switch(p->as) {
223 	case ATEXT:
224 		curtext = realp;
225 		autosize = p->to.offset + 4;
226 		ad = 1;
227 		break;
228 	case AJMPL:
229 		c = p->reg;
230 		if(c == NREG)
231 			c = REGLINK;
232 		s->set.ireg |= 1<<c;
233 		ar = 1;
234 		ad = 1;
235 		break;
236 	case AJMP:
237 		ar = 1;
238 		ad = 1;
239 		break;
240 	case ACMP:
241 		s->set.cc |= E_ICC;
242 		ar = 1;
243 		break;
244 	case AFCMPD:
245 	case AFCMPED:
246 	case AFCMPEF:
247 	case AFCMPEX:
248 	case AFCMPF:
249 	case AFCMPX:
250 		s->set.cc |= E_FCC;
251 		ar = 1;
252 		break;
253 	case ABE:
254 	case ABNE:
255 	case ABLE:
256 	case ABG:
257 	case ABL:
258 	case ABGE:
259 	case ABLEU:
260 	case ABGU:
261 	case ABCS:
262 	case ABCC:
263 	case ABNEG:
264 	case ABPOS:
265 	case ABVC:
266 	case ABVS:
267 		s->used.cc |= E_ICC;
268 		ar = 1;
269 		break;
270 	case AFBE:
271 	case AFBLG:
272 	case AFBG:
273 	case AFBLE:
274 	case AFBGE:
275 	case AFBL:
276 		s->used.cc |= E_FCC;
277 		ar = 1;
278 		break;
279 	case AMOVB:
280 	case AMOVBU:
281 		sz = 1;
282 		ld = 1;
283 		break;
284 	case AMOVH:
285 	case AMOVHU:
286 		sz = 2;
287 		ld = 1;
288 		break;
289 	case AFMOVF:
290 	case AMOVW:
291 		sz = 4;
292 		ld = 1;
293 		break;
294 	case AMOVD:
295 	case AFMOVD:
296 		sz = 8;
297 		ld = 1;
298 		break;
299 	case AFMOVX:	/* gok */
300 		sz = 16;
301 		ld = 1;
302 		break;
303 	case AADDCC:
304 	case AADDXCC:
305 	case AANDCC:
306 	case AANDNCC:
307 	case AORCC:
308 	case AORNCC:
309 	case ASUBCC:
310 	case ASUBXCC:
311 	case ATADDCC:
312 	case ATADDCCTV:
313 	case ATSUBCC:
314 	case ATSUBCCTV:
315 	case AXNORCC:
316 	case AXORCC:
317 		s->set.cc |= E_ICC;
318 		break;
319 	case ADIV:
320 	case ADIVL:
321 	case AMOD:
322 	case AMODL:
323 	case AMUL:
324 	case AMULSCC:
325 	case ATAS:
326 		s->set.ireg = ALL;
327 		s->set.freg = ALL;
328 		s->set.cc = ALL;
329 		break;
330 	}
331 
332 /*
333  * flags based on 'to' field
334  */
335 	c = p->to.class;
336 	if(c == 0) {
337 		c = aclass(&p->to) + 1;
338 		p->to.class = c;
339 	}
340 	c--;
341 	switch(c) {
342 	default:
343 		print("unknown class %d %D\n", c, &p->to);
344 
345 	case C_ZCON:
346 	case C_SCON:
347 	case C_UCON:
348 	case C_LCON:
349 	case C_NONE:
350 	case C_SBRA:
351 	case C_LBRA:
352 		break;
353 	case C_CREG:
354 	case C_FSR:
355 	case C_FQ:
356 	case C_PREG:
357 		s->set.ireg = ALL;
358 		s->set.freg = ALL;
359 		s->set.cc = ALL;
360 		break;
361 	case C_ZOREG:
362 	case C_SOREG:
363 	case C_LOREG:
364 	case C_ASI:
365 		c = p->to.reg;
366 		s->used.ireg |= 1<<c;
367 		if(ad)
368 			break;
369 		s->size = sz;
370 		s->soffset = regoff(&p->to);
371 
372 		m = ANYMEM;
373 		if(c == REGSB)
374 			m = E_MEMSB;
375 		if(c == REGSP)
376 			m = E_MEMSP;
377 
378 		if(ar)
379 			s->used.cc |= m;
380 		else
381 			s->set.cc |= m;
382 		break;
383 	case C_SACON:
384 	case C_LACON:
385 		s->used.ireg |= 1<<REGSP;
386 		break;
387 	case C_SECON:
388 	case C_LECON:
389 		s->used.ireg |= 1<<REGSB;
390 		break;
391 	case C_REG:
392 		if(ar)
393 			s->used.ireg |= 1<<p->to.reg;
394 		else
395 			s->set.ireg |= 1<<p->to.reg;
396 		break;
397 	case C_FREG:
398 		/* do better -- determine double prec */
399 		if(ar) {
400 			s->used.freg |= 1<<p->to.reg;
401 			s->used.freg |= 1<<(p->to.reg|1);
402 		} else {
403 			s->set.freg |= 1<<p->to.reg;
404 			s->set.freg |= 1<<(p->to.reg|1);
405 		}
406 		break;
407 	case C_SAUTO:
408 	case C_LAUTO:
409 	case C_ESAUTO:
410 	case C_OSAUTO:
411 	case C_ELAUTO:
412 	case C_OLAUTO:
413 		s->used.ireg |= 1<<REGSP;
414 		if(ad)
415 			break;
416 		s->size = sz;
417 		s->soffset = regoff(&p->to);
418 
419 		if(ar)
420 			s->used.cc |= E_MEMSP;
421 		else
422 			s->set.cc |= E_MEMSP;
423 		break;
424 	case C_SEXT:
425 	case C_LEXT:
426 	case C_ESEXT:
427 	case C_OSEXT:
428 	case C_ELEXT:
429 	case C_OLEXT:
430 		s->used.ireg |= 1<<REGSB;
431 		if(ad)
432 			break;
433 		s->size = sz;
434 		s->soffset = regoff(&p->to);
435 
436 		if(ar)
437 			s->used.cc |= E_MEMSB;
438 		else
439 			s->set.cc |= E_MEMSB;
440 		break;
441 	}
442 
443 /*
444  * flags based on 'from' field
445  */
446 	c = p->from.class;
447 	if(c == 0) {
448 		c = aclass(&p->from) + 1;
449 		p->from.class = c;
450 	}
451 	c--;
452 	switch(c) {
453 	default:
454 		print("unknown class %d %D\n", c, &p->from);
455 
456 	case C_ZCON:
457 	case C_SCON:
458 	case C_UCON:
459 	case C_LCON:
460 	case C_NONE:
461 	case C_SBRA:
462 	case C_LBRA:
463 		c = p->from.reg;
464 		if(c != NREG)
465 			s->used.ireg |= 1<<c;
466 		break;
467 	case C_CREG:
468 	case C_FSR:
469 	case C_FQ:
470 	case C_PREG:
471 		s->set.ireg = ALL;
472 		s->set.freg = ALL;
473 		s->set.cc = ALL;
474 		break;
475 	case C_ZOREG:
476 	case C_SOREG:
477 	case C_LOREG:
478 	case C_ASI:
479 		c = p->from.reg;
480 		s->used.ireg |= 1<<c;
481 		if(ld)
482 			p->mark |= LOAD;
483 		if(ad)
484 			break;
485 		s->size = sz;
486 		s->soffset = regoff(&p->from);
487 
488 		m = ANYMEM;
489 		if(c == REGSB)
490 			m = E_MEMSB;
491 		if(c == REGSP)
492 			m = E_MEMSP;
493 
494 		s->used.cc |= m;
495 		break;
496 	case C_SACON:
497 	case C_LACON:
498 		s->used.ireg |= 1<<REGSP;
499 		break;
500 	case C_SECON:
501 	case C_LECON:
502 		s->used.ireg |= 1<<REGSB;
503 		break;
504 	case C_REG:
505 		s->used.ireg |= 1<<p->from.reg;
506 		break;
507 	case C_FREG:
508 		/* do better -- determine double prec */
509 		s->used.freg |= 1<<p->from.reg;
510 		s->used.freg |= 1<<(p->from.reg|1);
511 		break;
512 	case C_SAUTO:
513 	case C_LAUTO:
514 	case C_ESAUTO:
515 	case C_ELAUTO:
516 	case C_OSAUTO:
517 	case C_OLAUTO:
518 		s->used.ireg |= 1<<REGSP;
519 		if(ld)
520 			p->mark |= LOAD;
521 		if(ad)
522 			break;
523 		s->size = sz;
524 		s->soffset = regoff(&p->from);
525 
526 		s->used.cc |= E_MEMSP;
527 		break;
528 	case C_SEXT:
529 	case C_LEXT:
530 	case C_ESEXT:
531 	case C_ELEXT:
532 	case C_OSEXT:
533 	case C_OLEXT:
534 		s->used.ireg |= 1<<REGSB;
535 		if(ld)
536 			p->mark |= LOAD;
537 		if(ad)
538 			break;
539 		s->size = sz;
540 		s->soffset = regoff(&p->from);
541 
542 		s->used.cc |= E_MEMSB;
543 		break;
544 	}
545 
546 	c = p->reg;
547 	if(c != NREG) {
548 		if(p->from.type == D_FREG || p->to.type == D_FREG) {
549 			s->used.freg |= 1<<c;
550 			s->used.freg |= 1<<(c|1);
551 		} else
552 			s->used.ireg |= 1<<c;
553 	}
554 	s->set.ireg &= ~(1<<0);		/* R0 cant be set */
555 }
556 
557 /*
558  * test to see if 2 instrictions can be
559  * interchanged without changing semantics
560  */
561 int
562 depend(Sch *sa, Sch *sb)
563 {
564 	ulong x;
565 
566 	if(sa->set.ireg & (sb->set.ireg|sb->used.ireg))
567 		return 1;
568 	if(sb->set.ireg & sa->used.ireg)
569 		return 1;
570 
571 	if(sa->set.freg & (sb->set.freg|sb->used.freg))
572 		return 1;
573 	if(sb->set.freg & sa->used.freg)
574 		return 1;
575 
576 	x = (sa->set.cc & (sb->set.cc|sb->used.cc)) |
577 		(sb->set.cc & sa->used.cc);
578 	if(x) {
579 		/*
580 		 * allow SB and SP to pass each other.
581 		 * allow SB to pass SB iff doffsets are ok
582 		 * anything else conflicts
583 		 */
584 		if(x != E_MEMSP && x != E_MEMSB)
585 			return 1;
586 		x = sa->set.cc | sb->set.cc |
587 			sa->used.cc | sb->used.cc;
588 		if(x & E_MEM)
589 			return 1;
590 		if(offoverlap(sa, sb))
591 			return 1;
592 	}
593 
594 	return 0;
595 }
596 
597 int
598 offoverlap(Sch *sa, Sch *sb)
599 {
600 
601 	if(sa->soffset < sb->soffset) {
602 		if(sa->soffset+sa->size > sb->soffset)
603 			return 1;
604 		return 0;
605 	}
606 	if(sb->soffset+sb->size > sa->soffset)
607 		return 1;
608 	return 0;
609 }
610 
611 /*
612  * test 2 adjacent instructions
613  * and find out if inserted instructions
614  * are desired to prevent stalls.
615  * first instruction is a load instruction.
616  */
617 int
618 conflict(Sch *sa, Sch *sb)
619 {
620 
621 	if(sa->set.ireg & sb->used.ireg)
622 		return 1;
623 	if(sa->set.freg & sb->used.freg)
624 		return 1;
625 	return 0;
626 }
627 
628 int
629 compound(Prog *p)
630 {
631 	Optab *o;
632 
633 	o = oplook(p);
634 	if(o->size != 4)
635 		return 1;
636 	if(p->to.type == D_REG && p->to.reg == REGSB)
637 		return 1;
638 	return 0;
639 }
640 
641 void
642 dumpbits(Sch *s, Dep *d)
643 {
644 	int i;
645 
646 	for(i=0; i<32; i++)
647 		if(d->ireg & (1<<i))
648 			Bprint(&bso, " R%d", i);
649 	for(i=0; i<32; i++)
650 		if(d->freg & (1<<i))
651 			Bprint(&bso, " F%d", i);
652 	for(i=0; i<32; i++)
653 		switch(d->cc & (1<<i)) {
654 		default:
655 			break;
656 		case E_ICC:
657 			Bprint(&bso, " ICC");
658 			break;
659 		case E_FCC:
660 			Bprint(&bso, " FCC");
661 			break;
662 		case E_MEM:
663 			Bprint(&bso, " MEM%d", s->size);
664 			break;
665 		case E_MEMSB:
666 			Bprint(&bso, " SB%d", s->size);
667 			break;
668 		case E_MEMSP:
669 			Bprint(&bso, " SP%d", s->size);
670 			break;
671 		}
672 }
673