xref: /inferno-os/utils/ql/sched.c (revision 4eb166cf184c1f102fb79e31b1465ea3e2021c39)
1 #include	"l.h"
2 
3 enum
4 {
5 	E_ICC	= 1<<0,
6 	E_FCC	= 1<<1,
7 	E_MEM	= 1<<2,
8 	E_MEMSP	= 1<<3,	/* uses offset and size */
9 	E_MEMSB	= 1<<4,	/* uses offset and size */
10 	E_LR	= 1<<5,
11 	E_CR = 1<<6,
12 	E_CTR = 1<<7,
13 	E_XER = 1<<8,
14 
15 	E_CR0 = 0xF<<0,
16 	E_CR1 = 0xF<<4,
17 
18 	ANYMEM	= E_MEM|E_MEMSP|E_MEMSB,
19 	ALL	= ~0,
20 };
21 
22 typedef	struct	Sch	Sch;
23 typedef	struct	Dep	Dep;
24 
25 struct	Dep
26 {
27 	ulong	ireg;
28 	ulong	freg;
29 	ulong	cc;
30 	ulong	cr;
31 };
32 struct	Sch
33 {
34 	Prog	p;
35 	Dep	set;
36 	Dep	used;
37 	long	soffset;
38 	char	size;
39 	char	comp;
40 };
41 
42 void	regused(Sch*, Prog*);
43 int	depend(Sch*, Sch*);
44 int	conflict(Sch*, Sch*);
45 int	offoverlap(Sch*, Sch*);
46 void	dumpbits(Sch*, Dep*);
47 
48 void
49 sched(Prog *p0, Prog *pe)
50 {
51 	Prog *p, *q;
52 	Sch sch[NSCHED], *s, *t, *u, *se, stmp;
53 
54 	if(!debug['Q'])
55 		return;
56 	/*
57 	 * build side structure
58 	 */
59 	s = sch;
60 	for(p=p0;; p=p->link) {
61 		memset(s, 0, sizeof(*s));
62 		s->p = *p;
63 		regused(s, p);
64 		if(debug['X']) {
65 			Bprint(&bso, "%P\tset", &s->p);
66 			dumpbits(s, &s->set);
67 			Bprint(&bso, "; used");
68 			dumpbits(s, &s->used);
69 			if(s->comp)
70 				Bprint(&bso, "; compound");
71 			if(s->p.mark & LOAD)
72 				Bprint(&bso, "; load");
73 			if(s->p.mark & BRANCH)
74 				Bprint(&bso, "; branch");
75 			if(s->p.mark & FCMP)
76 				Bprint(&bso, "; fcmp");
77 			Bprint(&bso, "\n");
78 		}
79 		s++;
80 		if(p == pe)
81 			break;
82 	}
83 	se = s;
84 
85 	for(s=se-1; s>=sch; s--) {
86 
87 		/*
88 		 * load delay. interlocked.
89 		 */
90 		if(s->p.mark & LOAD) {
91 			if(s >= se-1)
92 				continue;
93 			if(!conflict(s, (s+1)))
94 				continue;
95 			/*
96 			 * s is load, s+1 is immediate use of result
97 			 * t is the trial instruction to insert between s and s+1
98 			 */
99 			for(t=s-1; t>=sch; t--) {
100 				if(t->p.mark & BRANCH)
101 					goto no2;
102 				if(t->p.mark & FCMP)
103 					if((s+1)->p.mark & BRANCH)
104 						goto no2;
105 				if(t->p.mark & LOAD)
106 					if(conflict(t, (s+1)))
107 						goto no2;
108 				for(u=t+1; u<=s; u++)
109 					if(depend(u, t))
110 						goto no2;
111 				goto out2;
112 			no2:;
113 			}
114 			if(debug['X'])
115 				Bprint(&bso, "?l%P\n", &s->p);
116 			continue;
117 		out2:
118 			if(debug['X']) {
119 				Bprint(&bso, "!l%P\n", &t->p);
120 				Bprint(&bso, "%P\n", &s->p);
121 			}
122 			stmp = *t;
123 			memmove(t, t+1, (uchar*)s - (uchar*)t);
124 			*s = stmp;
125 			s--;
126 			continue;
127 		}
128 
129 		/*
130 		 * fop2 delay.
131 		 */
132 		if(s->p.mark & FCMP) {
133 			if(s >= se-1)
134 				continue;
135 			if(!((s+1)->p.mark & BRANCH))
136 				continue;
137 			/* t is the trial instruction to use */
138 			for(t=s-1; t>=sch; t--) {
139 				for(u=t+1; u<=s; u++)
140 					if(depend(u, t))
141 						goto no3;
142 				goto out3;
143 			no3:;
144 			}
145 			if(debug['X'])
146 				Bprint(&bso, "?f%P\n", &s->p);
147 			continue;
148 		out3:
149 			if(debug['X']) {
150 				Bprint(&bso, "!f%P\n", &t->p);
151 				Bprint(&bso, "%P\n", &s->p);
152 			}
153 			stmp = *t;
154 			memmove(t, t+1, (uchar*)s - (uchar*)t);
155 			*s = stmp;
156 			s--;
157 			continue;
158 		}
159 	}
160 
161 	/*
162 	 * put it all back
163 	 */
164 	for(s=sch, p=p0; s<se; s++, p=q) {
165 		q = p->link;
166 		if(q != s->p.link) {
167 			*p = s->p;
168 			p->link = q;
169 		}
170 	}
171 	if(debug['X'])
172 		Bprint(&bso, "\n");
173 }
174 
175 void
176 regused(Sch *s, Prog *realp)
177 {
178 	int c, ar, ad, ld, sz, nr, upd;
179 	ulong m;
180 	Prog *p;
181 
182 	p = &s->p;
183 	s->comp = compound(p);
184 	if(s->comp) {
185 		s->set.ireg |= 1<<REGTMP;
186 		s->used.ireg |= 1<<REGTMP;
187 	}
188 	ar = 0;		/* dest is really reference */
189 	ad = 0;		/* source/dest is really address */
190 	ld = 0;		/* opcode is load instruction */
191 	sz = 32*4;		/* size of load/store for overlap computation */
192 	nr = 0;	/* source/dest is not really reg */
193 	upd = 0;	/* move with update; changes reg */
194 
195 /*
196  * flags based on opcode
197  */
198 	switch(p->as) {
199 	case ATEXT:
200 		curtext = realp;
201 		autosize = p->to.offset + 4;
202 		ad = 1;
203 		break;
204 	case ABL:
205 		s->set.cc |= E_LR;
206 		ar = 1;
207 		ad = 1;
208 		break;
209 	case ABR:
210 		ar = 1;
211 		ad = 1;
212 		break;
213 	case ACMP:
214 		s->set.cc |= E_ICC;
215 		if(p->reg == 0)
216 			s->set.cr |= E_CR0;
217 		else
218 			s->set.cr |= (0xF<<((p->reg&7)*4));
219 		ar = 1;
220 		break;
221 	case AFCMPO:
222 	case AFCMPU:
223 		s->set.cc |= E_FCC;
224 		if(p->reg == 0)
225 			s->set.cr |= E_CR0;
226 		else
227 			s->set.cr |= (0xF<<((p->reg&7)*4));
228 		ar = 1;
229 		break;
230 	case ACRAND:
231 	case ACRANDN:
232 	case ACREQV:
233 	case ACRNAND:
234 	case ACRNOR:
235 	case ACROR:
236 	case ACRORN:
237 	case ACRXOR:
238 		s->used.cr |= 1<<p->from.reg;
239 		s->set.cr |= 1<<p->to.reg;
240 		nr = 1;
241 		break;
242 	case ABCL:	/* tricky */
243 		s->used.cc |= E_FCC|E_ICC;
244 		s->used.cr = ALL;
245 		s->set.cc |= E_LR;
246 		ar = 1;
247 		break;
248 	case ABC:		/* tricky */
249 		s->used.cc |= E_FCC|E_ICC;
250 		s->used.cr = ALL;
251 		ar = 1;
252 		break;
253 	case ABEQ:
254 	case ABGE:
255 	case ABGT:
256 	case ABLE:
257 	case ABLT:
258 	case ABNE:
259 	case ABVC:
260 	case ABVS:
261 		s->used.cc |= E_ICC;
262 		s->used.cr |= E_CR0;
263 		ar = 1;
264 		break;
265 	case ALSW:
266 	case AMOVMW:
267 		/* could do better */
268 		sz = 32*4;
269 		ld = 1;
270 		break;
271 	case AMOVBU:
272 	case AMOVBZU:
273 		upd = 1;
274 		sz = 1;
275 		ld = 1;
276 		break;
277 	case AMOVB:
278 	case AMOVBZ:
279 		sz = 1;
280 		ld = 1;
281 		break;
282 	case AMOVHU:
283 	case AMOVHZU:
284 		upd = 1;
285 		sz = 2;
286 		ld = 1;
287 		break;
288 	case AMOVH:
289 	case AMOVHBR:
290 	case AMOVHZ:
291 		sz = 2;
292 		ld = 1;
293 		break;
294 	case AFMOVSU:
295 	case AMOVWU:
296 		upd = 1;
297 		sz = 4;
298 		ld = 1;
299 		break;
300 	case AFMOVS:
301 	case AMOVW:
302 	case AMOVWBR:
303 	case ALWAR:
304 		sz = 4;
305 		ld = 1;
306 		break;
307 	case AFMOVDU:
308 		upd = 1;
309 		sz = 8;
310 		ld = 1;
311 		break;
312 	case AFMOVD:
313 		sz = 8;
314 		ld = 1;
315 		break;
316 	case AFMOVDCC:
317 		sz = 8;
318 		ld = 1;
319 		s->set.cc |= E_FCC;
320 		s->set.cr |= E_CR1;
321 		break;
322 	case AMOVFL:
323 	case AMOVCRFS:
324 	case AMTFSB0:
325 	case AMTFSB0CC:
326 	case AMTFSB1:
327 	case AMTFSB1CC:
328 		s->set.ireg = ALL;
329 		s->set.freg = ALL;
330 		s->set.cc = ALL;
331 		s->set.cr = ALL;
332 		break;
333 	case AADDCC:
334 	case AADDVCC:
335 	case AADDCCC:
336 	case AADDCVCC:
337 	case AADDMECC:
338 	case AADDMEVCC:
339 	case AADDECC:
340 	case AADDEVCC:
341 	case AADDZECC:
342 	case AADDZEVCC:
343 	case AANDCC:
344 	case AANDNCC:
345 	case ACNTLZWCC:
346 	case ADIVWCC:
347 	case ADIVWVCC:
348 	case ADIVWUCC:
349 	case ADIVWUVCC:
350 	case AEQVCC:
351 	case AEXTSBCC:
352 	case AEXTSHCC:
353 	case AMULHWCC:
354 	case AMULHWUCC:
355 	case AMULLWCC:
356 	case AMULLWVCC:
357 	case ANANDCC:
358 	case ANEGCC:
359 	case ANEGVCC:
360 	case ANORCC:
361 	case AORCC:
362 	case AORNCC:
363 	case AREMCC:
364 	case AREMVCC:
365 	case AREMUCC:
366 	case AREMUVCC:
367 	case ARLWMICC:
368 	case ARLWNMCC:
369 	case ASLWCC:
370 	case ASRAWCC:
371 	case ASRWCC:
372 	case ASTWCCC:
373 	case ASUBCC:
374 	case ASUBVCC:
375 	case ASUBCCC:
376 	case ASUBCVCC:
377 	case ASUBMECC:
378 	case ASUBMEVCC:
379 	case ASUBECC:
380 	case ASUBEVCC:
381 	case ASUBZECC:
382 	case ASUBZEVCC:
383 	case AXORCC:
384 		s->set.cc |= E_ICC;
385 		s->set.cr |= E_CR0;
386 		break;
387 	case AFABSCC:
388 	case AFADDCC:
389 	case AFADDSCC:
390 	case AFCTIWCC:
391 	case AFCTIWZCC:
392 	case AFDIVCC:
393 	case AFDIVSCC:
394 	case AFMADDCC:
395 	case AFMADDSCC:
396 	case AFMSUBCC:
397 	case AFMSUBSCC:
398 	case AFMULCC:
399 	case AFMULSCC:
400 	case AFNABSCC:
401 	case AFNEGCC:
402 	case AFNMADDCC:
403 	case AFNMADDSCC:
404 	case AFNMSUBCC:
405 	case AFNMSUBSCC:
406 	case AFRSPCC:
407 	case AFSUBCC:
408 	case AFSUBSCC:
409 		s->set.cc |= E_FCC;
410 		s->set.cr |= E_CR1;
411 		break;
412 	}
413 
414 /*
415  * flags based on 'to' field
416  */
417 	c = p->to.class;
418 	if(c == 0) {
419 		c = aclass(&p->to) + 1;
420 		p->to.class = c;
421 	}
422 	c--;
423 	switch(c) {
424 	default:
425 		print("unknown class %d %D\n", c, &p->to);
426 
427 	case C_NONE:
428 	case C_ZCON:
429 	case C_SCON:
430 	case C_UCON:
431 	case C_LCON:
432 	case C_ADDCON:
433 	case C_ANDCON:
434 	case C_SBRA:
435 	case C_LBRA:
436 		break;
437 	case C_CREG:
438 		c = p->to.reg;
439 		if(c == NREG)
440 			s->set.cr = ALL;
441 		else
442 			s->set.cr |= (0xF << ((p->from.reg&7)*4));
443 		s->set.cc = ALL;
444 		break;
445 	case C_SPR:
446 	case C_SREG:
447 	case C_FPSCR:
448 	case C_MSR:
449 	case C_XER:
450 		s->set.ireg = ALL;
451 		s->set.freg = ALL;
452 		s->set.cc = ALL;
453 		s->set.cr = ALL;
454 		break;
455 	case C_LR:
456 		s->set.cc |= E_LR;
457 		break;
458 	case C_CTR:
459 		s->set.cc |= E_CTR;
460 		break;
461 	case C_ZOREG:
462 	case C_SOREG:
463 	case C_LOREG:
464 		c = p->to.reg;
465 		s->used.ireg |= 1<<c;
466 		if(upd)
467 			s->set.ireg |= 1<<c;
468 		if(ad)
469 			break;
470 		s->size = sz;
471 		s->soffset = regoff(&p->to);
472 
473 		m = ANYMEM;
474 		if(c == REGSB)
475 			m = E_MEMSB;
476 		if(c == REGSP)
477 			m = E_MEMSP;
478 
479 		if(ar)
480 			s->used.cc |= m;
481 		else
482 			s->set.cc |= m;
483 		break;
484 	case C_SACON:
485 	case C_LACON:
486 		s->used.ireg |= 1<<REGSP;
487 		if(upd)
488 			s->set.ireg |= 1<<c;
489 		break;
490 	case C_SECON:
491 	case C_LECON:
492 		s->used.ireg |= 1<<REGSB;
493 		if(upd)
494 			s->set.ireg |= 1<<c;
495 		break;
496 	case C_REG:
497 		if(nr)
498 			break;
499 		if(ar)
500 			s->used.ireg |= 1<<p->to.reg;
501 		else
502 			s->set.ireg |= 1<<p->to.reg;
503 		break;
504 	case C_FREG:
505 		if(ar)
506 			s->used.freg |= 1<<p->to.reg;
507 		else
508 			s->set.freg |= 1<<p->to.reg;
509 		break;
510 	case C_SAUTO:
511 	case C_LAUTO:
512 		s->used.ireg |= 1<<REGSP;
513 		if(upd)
514 			s->set.ireg |= 1<<c;
515 		if(ad)
516 			break;
517 		s->size = sz;
518 		s->soffset = regoff(&p->to);
519 
520 		if(ar)
521 			s->used.cc |= E_MEMSP;
522 		else
523 			s->set.cc |= E_MEMSP;
524 		break;
525 	case C_SEXT:
526 	case C_LEXT:
527 		s->used.ireg |= 1<<REGSB;
528 		if(upd)
529 			s->set.ireg |= 1<<c;
530 		if(ad)
531 			break;
532 		s->size = sz;
533 		s->soffset = regoff(&p->to);
534 
535 		if(ar)
536 			s->used.cc |= E_MEMSB;
537 		else
538 			s->set.cc |= E_MEMSB;
539 		break;
540 	}
541 
542 /*
543  * flags based on 'from' field
544  */
545 	c = p->from.class;
546 	if(c == 0) {
547 		c = aclass(&p->from) + 1;
548 		p->from.class = c;
549 	}
550 	c--;
551 	switch(c) {
552 	default:
553 		print("unknown class %d %D\n", c, &p->from);
554 
555 	case C_NONE:
556 	case C_ZCON:
557 	case C_SCON:
558 	case C_UCON:
559 	case C_LCON:
560 	case C_ADDCON:
561 	case C_ANDCON:
562 	case C_SBRA:
563 	case C_LBRA:
564 		c = p->from.reg;
565 		if(c != NREG)
566 			s->used.ireg |= 1<<c;
567 		break;
568 	case C_CREG:
569 		c = p->from.reg;
570 		if(c == NREG)
571 			s->used.cr = ALL;
572 		else
573 			s->used.cr |= (0xF << ((p->from.reg&7)*4));
574 		s->used.cc = ALL;
575 		break;
576 	case C_SPR:
577 	case C_SREG:
578 	case C_FPSCR:
579 	case C_MSR:
580 	case C_XER:
581 		s->set.ireg = ALL;
582 		s->set.freg = ALL;
583 		s->set.cc = ALL;
584 		s->set.cr = ALL;
585 		break;
586 	case C_LR:
587 		s->used.cc |= E_LR;
588 		break;
589 	case C_CTR:
590 		s->used.cc |= E_CTR;
591 		break;
592 	case C_ZOREG:
593 	case C_SOREG:
594 	case C_LOREG:
595 		c = p->from.reg;
596 		s->used.ireg |= 1<<c;
597 		if(ld)
598 			p->mark |= LOAD;
599 		if(ad)
600 			break;
601 		s->size = sz;
602 		s->soffset = regoff(&p->from);
603 
604 		m = ANYMEM;
605 		if(c == REGSB)
606 			m = E_MEMSB;
607 		if(c == REGSP)
608 			m = E_MEMSP;
609 
610 		s->used.cc |= m;
611 		break;
612 	case C_SACON:
613 	case C_LACON:
614 		s->used.ireg |= 1<<REGSP;
615 		break;
616 	case C_SECON:
617 	case C_LECON:
618 		s->used.ireg |= 1<<REGSB;
619 		break;
620 	case C_REG:
621 		if(nr)
622 			break;
623 		s->used.ireg |= 1<<p->from.reg;
624 		break;
625 	case C_FREG:
626 		s->used.freg |= 1<<p->from.reg;
627 		break;
628 	case C_SAUTO:
629 	case C_LAUTO:
630 		s->used.ireg |= 1<<REGSP;
631 		if(ld)
632 			p->mark |= LOAD;
633 		if(ad)
634 			break;
635 		s->size = sz;
636 		s->soffset = regoff(&p->from);
637 
638 		s->used.cc |= E_MEMSP;
639 		break;
640 	case C_SEXT:
641 	case C_LEXT:
642 		s->used.ireg |= 1<<REGSB;
643 		if(ld)
644 			p->mark |= LOAD;
645 		if(ad)
646 			break;
647 		s->size = sz;
648 		s->soffset = regoff(&p->from);
649 
650 		s->used.cc |= E_MEMSB;
651 		break;
652 	}
653 
654 	c = p->reg;
655 	if(c != NREG) {
656 		if(p->from.type == D_FREG || p->to.type == D_FREG)
657 			s->used.freg |= 1<<c;
658 		else
659 			s->used.ireg |= 1<<c;
660 	}
661 }
662 
663 /*
664  * test to see if 2 instrictions can be
665  * interchanged without changing semantics
666  */
667 int
668 depend(Sch *sa, Sch *sb)
669 {
670 	ulong x;
671 
672 	if(sa->set.ireg & (sb->set.ireg|sb->used.ireg))
673 		return 1;
674 	if(sb->set.ireg & sa->used.ireg)
675 		return 1;
676 
677 	if(sa->set.freg & (sb->set.freg|sb->used.freg))
678 		return 1;
679 	if(sb->set.freg & sa->used.freg)
680 		return 1;
681 
682 	if(sa->set.cr & (sb->set.cr|sb->used.cr))
683 		return 1;
684 	if(sb->set.cr & sa->used.cr)
685 		return 1;
686 
687 
688 	x = (sa->set.cc & (sb->set.cc|sb->used.cc)) |
689 		(sb->set.cc & sa->used.cc);
690 	if(x) {
691 		/*
692 		 * allow SB and SP to pass each other.
693 		 * allow SB to pass SB iff doffsets are ok
694 		 * anything else conflicts
695 		 */
696 		if(x != E_MEMSP && x != E_MEMSB)
697 			return 1;
698 		x = sa->set.cc | sb->set.cc |
699 			sa->used.cc | sb->used.cc;
700 		if(x & E_MEM)
701 			return 1;
702 		if(offoverlap(sa, sb))
703 			return 1;
704 	}
705 
706 	return 0;
707 }
708 
709 int
710 offoverlap(Sch *sa, Sch *sb)
711 {
712 
713 	if(sa->soffset < sb->soffset) {
714 		if(sa->soffset+sa->size > sb->soffset)
715 			return 1;
716 		return 0;
717 	}
718 	if(sb->soffset+sb->size > sa->soffset)
719 		return 1;
720 	return 0;
721 }
722 
723 /*
724  * test 2 adjacent instructions
725  * and find out if inserted instructions
726  * are desired to prevent stalls.
727  * first instruction is a load instruction.
728  */
729 int
730 conflict(Sch *sa, Sch *sb)
731 {
732 
733 	if(sa->set.ireg & sb->used.ireg)
734 		return 1;
735 	if(sa->set.freg & sb->used.freg)
736 		return 1;
737 	if(sa->set.cr & sb->used.cr)
738 		return 1;
739 	return 0;
740 }
741 
742 int
743 compound(Prog *p)
744 {
745 	Optab *o;
746 
747 	o = oplook(p);
748 	if(o->size != 4)
749 		return 1;
750 	if(p->to.type == D_REG && p->to.reg == REGSB)
751 		return 1;
752 	return 0;
753 }
754 
755 void
756 dumpbits(Sch *s, Dep *d)
757 {
758 	int i;
759 
760 	for(i=0; i<32; i++)
761 		if(d->ireg & (1<<i))
762 			Bprint(&bso, " R%d", i);
763 	for(i=0; i<32; i++)
764 		if(d->freg & (1<<i))
765 			Bprint(&bso, " F%d", i);
766 	for(i=0; i<32; i++)
767 		if(d->cr & (1<<i))
768 			Bprint(&bso, " C%d", i);
769 	for(i=0; i<32; i++)
770 		switch(d->cc & (1<<i)) {
771 		default:
772 			break;
773 		case E_ICC:
774 			Bprint(&bso, " ICC");
775 			break;
776 		case E_FCC:
777 			Bprint(&bso, " FCC");
778 			break;
779 		case E_LR:
780 			Bprint(&bso, " LR");
781 			break;
782 		case E_CR:
783 			Bprint(&bso, " CR");
784 			break;
785 		case E_CTR:
786 			Bprint(&bso, " CTR");
787 			break;
788 		case E_XER:
789 			Bprint(&bso, " XER");
790 			break;
791 		case E_MEM:
792 			Bprint(&bso, " MEM%d", s->size);
793 			break;
794 		case E_MEMSB:
795 			Bprint(&bso, " SB%d", s->size);
796 			break;
797 		case E_MEMSP:
798 			Bprint(&bso, " SP%d", s->size);
799 			break;
800 		}
801 }
802