xref: /plan9-contrib/sys/src/cmd/9l/sched.c (revision fbadb1c4d4463e58337ffb1ed396c9caee5d1889)
1 #include	"l.h"
2 
3 enum
4 {
5 	E_ICC	= 1<<0,
6 	E_FCC	= 1<<1,
7 	E_MEM	= 1<<2,
8 	E_MEMSP	= 1<<3,	/* uses offset and size */
9 	E_MEMSB	= 1<<4,	/* uses offset and size */
10 	E_LR	= 1<<5,
11 	E_CR = 1<<6,
12 	E_CTR = 1<<7,
13 	E_XER = 1<<8,
14 
15 	E_CR0 = 0xF<<0,
16 	E_CR1 = 0xF<<4,
17 
18 	ANYMEM	= E_MEM|E_MEMSP|E_MEMSB,
19 	ALL	= ~0,
20 };
21 
22 typedef	struct	Sch	Sch;
23 typedef	struct	Dep	Dep;
24 
25 struct	Dep
26 {
27 	ulong	ireg;
28 	ulong	freg;
29 	ulong	cc;
30 	ulong	cr;
31 };
32 struct	Sch
33 {
34 	Prog	p;
35 	Dep	set;
36 	Dep	used;
37 	long	soffset;
38 	char	size;
39 	char	comp;
40 };
41 
42 void	regused(Sch*, Prog*);
43 int	depend(Sch*, Sch*);
44 int	conflict(Sch*, Sch*);
45 int	offoverlap(Sch*, Sch*);
46 void	dumpbits(Sch*, Dep*);
47 
48 void
sched(Prog * p0,Prog * pe)49 sched(Prog *p0, Prog *pe)
50 {
51 	Prog *p, *q;
52 	Sch sch[NSCHED], *s, *t, *u, *se, stmp;
53 
54 	if(!debug['Q'])
55 		return;
56 	/*
57 	 * build side structure
58 	 */
59 	s = sch;
60 	for(p=p0;; p=p->link) {
61 		memset(s, 0, sizeof(*s));
62 		s->p = *p;
63 		regused(s, p);
64 		if(debug['X']) {
65 			Bprint(&bso, "%P\tset", &s->p);
66 			dumpbits(s, &s->set);
67 			Bprint(&bso, "; used");
68 			dumpbits(s, &s->used);
69 			if(s->comp)
70 				Bprint(&bso, "; compound");
71 			if(s->p.mark & LOAD)
72 				Bprint(&bso, "; load");
73 			if(s->p.mark & BRANCH)
74 				Bprint(&bso, "; branch");
75 			if(s->p.mark & FCMP)
76 				Bprint(&bso, "; fcmp");
77 			Bprint(&bso, "\n");
78 		}
79 		s++;
80 		if(p == pe)
81 			break;
82 	}
83 	se = s;
84 
85 	for(s=se-1; s>=sch; s--) {
86 
87 		/*
88 		 * load delay. interlocked.
89 		 */
90 		if(s->p.mark & LOAD) {
91 			if(s >= se-1)
92 				continue;
93 			if(!conflict(s, (s+1)))
94 				continue;
95 			/*
96 			 * s is load, s+1 is immediate use of result
97 			 * t is the trial instruction to insert between s and s+1
98 			 */
99 			for(t=s-1; t>=sch; t--) {
100 				if(t->p.mark & BRANCH)
101 					goto no2;
102 				if(t->p.mark & FCMP)
103 					if((s+1)->p.mark & BRANCH)
104 						goto no2;
105 				if(t->p.mark & LOAD)
106 					if(conflict(t, (s+1)))
107 						goto no2;
108 				for(u=t+1; u<=s; u++)
109 					if(depend(u, t))
110 						goto no2;
111 				goto out2;
112 			no2:;
113 			}
114 			if(debug['X'])
115 				Bprint(&bso, "?l%P\n", &s->p);
116 			continue;
117 		out2:
118 			if(debug['X']) {
119 				Bprint(&bso, "!l%P\n", &t->p);
120 				Bprint(&bso, "%P\n", &s->p);
121 			}
122 			stmp = *t;
123 			memmove(t, t+1, (uchar*)s - (uchar*)t);
124 			*s = stmp;
125 			s--;
126 			continue;
127 		}
128 
129 		/*
130 		 * fop2 delay.
131 		 */
132 		if(s->p.mark & FCMP) {
133 			if(s >= se-1)
134 				continue;
135 			if(!((s+1)->p.mark & BRANCH))
136 				continue;
137 			/* t is the trial instruction to use */
138 			for(t=s-1; t>=sch; t--) {
139 				for(u=t+1; u<=s; u++)
140 					if(depend(u, t))
141 						goto no3;
142 				goto out3;
143 			no3:;
144 			}
145 			if(debug['X'])
146 				Bprint(&bso, "?f%P\n", &s->p);
147 			continue;
148 		out3:
149 			if(debug['X']) {
150 				Bprint(&bso, "!f%P\n", &t->p);
151 				Bprint(&bso, "%P\n", &s->p);
152 			}
153 			stmp = *t;
154 			memmove(t, t+1, (uchar*)s - (uchar*)t);
155 			*s = stmp;
156 			s--;
157 			continue;
158 		}
159 	}
160 
161 	/*
162 	 * put it all back
163 	 */
164 	for(s=sch, p=p0; s<se; s++, p=q) {
165 		q = p->link;
166 		if(q != s->p.link) {
167 			*p = s->p;
168 			p->link = q;
169 		}
170 	}
171 	if(debug['X'])
172 		Bprint(&bso, "\n");
173 }
174 
175 void
regused(Sch * s,Prog * realp)176 regused(Sch *s, Prog *realp)
177 {
178 	int c, ar, ad, ld, sz, nr, upd;
179 	ulong m;
180 	Prog *p;
181 
182 	p = &s->p;
183 	s->comp = compound(p);
184 	if(s->comp) {
185 		s->set.ireg |= 1<<REGTMP;
186 		s->used.ireg |= 1<<REGTMP;
187 	}
188 	ar = 0;		/* dest is really reference */
189 	ad = 0;		/* source/dest is really address */
190 	ld = 0;		/* opcode is load instruction */
191 	sz = 32*4;		/* size of load/store for overlap computation */
192 	nr = 0;	/* source/dest is not really reg */
193 	upd = 0;	/* move with update; changes reg */
194 
195 /*
196  * flags based on opcode
197  */
198 	switch(p->as) {
199 	case ATEXT:
200 		curtext = realp;
201 		autosize = p->to.offset + 8;
202 		ad = 1;
203 		break;
204 	case ABL:
205 		s->set.cc |= E_LR;
206 		ar = 1;
207 		ad = 1;
208 		break;
209 	case ABR:
210 		ar = 1;
211 		ad = 1;
212 		break;
213 	case ACMP:
214 	case ACMPU:
215 	case ACMPW:
216 	case ACMPWU:
217 		s->set.cc |= E_ICC;
218 		if(p->reg == 0)
219 			s->set.cr |= E_CR0;
220 		else
221 			s->set.cr |= (0xF<<((p->reg&7)*4));
222 		ar = 1;
223 		break;
224 	case AFCMPO:
225 	case AFCMPU:
226 		s->set.cc |= E_FCC;
227 		if(p->reg == 0)
228 			s->set.cr |= E_CR0;
229 		else
230 			s->set.cr |= (0xF<<((p->reg&7)*4));
231 		ar = 1;
232 		break;
233 	case ACRAND:
234 	case ACRANDN:
235 	case ACREQV:
236 	case ACRNAND:
237 	case ACRNOR:
238 	case ACROR:
239 	case ACRORN:
240 	case ACRXOR:
241 		s->used.cr |= 1<<p->from.reg;
242 		s->set.cr |= 1<<p->to.reg;
243 		nr = 1;
244 		break;
245 	case ABCL:	/* tricky */
246 		s->used.cc |= E_FCC|E_ICC;
247 		s->used.cr = ALL;
248 		s->set.cc |= E_LR;
249 		ar = 1;
250 		break;
251 	case ABC:		/* tricky */
252 		s->used.cc |= E_FCC|E_ICC;
253 		s->used.cr = ALL;
254 		ar = 1;
255 		break;
256 	case ABEQ:
257 	case ABGE:
258 	case ABGT:
259 	case ABLE:
260 	case ABLT:
261 	case ABNE:
262 	case ABVC:
263 	case ABVS:
264 		s->used.cc |= E_ICC;
265 		s->used.cr |= E_CR0;
266 		ar = 1;
267 		break;
268 	case ALSW:
269 	case AMOVMW:
270 		/* could do better */
271 		sz = 32*4;
272 		ld = 1;
273 		break;
274 	case AMOVBU:
275 	case AMOVBZU:
276 		upd = 1;
277 		sz = 1;
278 		ld = 1;
279 		break;
280 	case AMOVB:
281 	case AMOVBZ:
282 		sz = 1;
283 		ld = 1;
284 		break;
285 	case AMOVHU:
286 	case AMOVHZU:
287 		upd = 1;
288 		sz = 2;
289 		ld = 1;
290 		break;
291 	case AMOVH:
292 	case AMOVHBR:
293 	case AMOVHZ:
294 		sz = 2;
295 		ld = 1;
296 		break;
297 	case AFMOVSU:
298 	case AMOVWU:
299 	case AMOVWZU:
300 		upd = 1;
301 		sz = 4;
302 		ld = 1;
303 		break;
304 	case AFMOVS:
305 	case AMOVW:
306 	case AMOVWZ:
307 	case AMOVWBR:
308 	case ALWAR:
309 		sz = 4;
310 		ld = 1;
311 		break;
312 	case AFMOVDU:
313 		upd = 1;
314 		sz = 8;
315 		ld = 1;
316 		break;
317 	case AFMOVD:
318 		sz = 8;
319 		ld = 1;
320 		break;
321 	case AFMOVDCC:
322 		sz = 8;
323 		ld = 1;
324 		s->set.cc |= E_FCC;
325 		s->set.cr |= E_CR1;
326 		break;
327 	case AMOVFL:
328 	case AMOVCRFS:
329 	case AMTFSB0:
330 	case AMTFSB0CC:
331 	case AMTFSB1:
332 	case AMTFSB1CC:
333 		s->set.ireg = ALL;
334 		s->set.freg = ALL;
335 		s->set.cc = ALL;
336 		s->set.cr = ALL;
337 		break;
338 	case AADDCC:
339 	case AADDVCC:
340 	case AADDCCC:
341 	case AADDCVCC:
342 	case AADDMECC:
343 	case AADDMEVCC:
344 	case AADDECC:
345 	case AADDEVCC:
346 	case AADDZECC:
347 	case AADDZEVCC:
348 	case AANDCC:
349 	case AANDNCC:
350 	case ACNTLZWCC:
351 	case ADIVWCC:
352 	case ADIVWVCC:
353 	case ADIVWUCC:
354 	case ADIVWUVCC:
355 	case AEQVCC:
356 	case AEXTSBCC:
357 	case AEXTSHCC:
358 	case AMULHWCC:
359 	case AMULHWUCC:
360 	case AMULLWCC:
361 	case AMULLWVCC:
362 	case ANANDCC:
363 	case ANEGCC:
364 	case ANEGVCC:
365 	case ANORCC:
366 	case AORCC:
367 	case AORNCC:
368 	case AREMCC:
369 	case AREMVCC:
370 	case AREMUCC:
371 	case AREMUVCC:
372 	case ARLWMICC:
373 	case ARLWNMCC:
374 	case ASLWCC:
375 	case ASRAWCC:
376 	case ASRWCC:
377 	case ASTWCCC:
378 	case ASUBCC:
379 	case ASUBVCC:
380 	case ASUBCCC:
381 	case ASUBCVCC:
382 	case ASUBMECC:
383 	case ASUBMEVCC:
384 	case ASUBECC:
385 	case ASUBEVCC:
386 	case ASUBZECC:
387 	case ASUBZEVCC:
388 	case AXORCC:
389 		s->set.cc |= E_ICC;
390 		s->set.cr |= E_CR0;
391 		break;
392 	case AFABSCC:
393 	case AFADDCC:
394 	case AFADDSCC:
395 	case AFCTIWCC:
396 	case AFCTIWZCC:
397 	case AFDIVCC:
398 	case AFDIVSCC:
399 	case AFMADDCC:
400 	case AFMADDSCC:
401 	case AFMSUBCC:
402 	case AFMSUBSCC:
403 	case AFMULCC:
404 	case AFMULSCC:
405 	case AFNABSCC:
406 	case AFNEGCC:
407 	case AFNMADDCC:
408 	case AFNMADDSCC:
409 	case AFNMSUBCC:
410 	case AFNMSUBSCC:
411 	case AFRSPCC:
412 	case AFSUBCC:
413 	case AFSUBSCC:
414 		s->set.cc |= E_FCC;
415 		s->set.cr |= E_CR1;
416 		break;
417 	}
418 
419 /*
420  * flags based on 'to' field
421  */
422 	c = p->to.class;
423 	if(c == 0) {
424 		c = aclass(&p->to) + 1;
425 		p->to.class = c;
426 	}
427 	c--;
428 	switch(c) {
429 	default:
430 		print("unknown class %d %D\n", c, &p->to);
431 
432 	case C_NONE:
433 	case C_ZCON:
434 	case C_SCON:
435 	case C_UCON:
436 	case C_LCON:
437 	case C_ADDCON:
438 	case C_ANDCON:
439 	case C_SBRA:
440 	case C_LBRA:
441 		break;
442 	case C_CREG:
443 		c = p->to.reg;
444 		if(c == NREG)
445 			s->set.cr = ALL;
446 		else
447 			s->set.cr |= (0xF << ((p->from.reg&7)*4));
448 		s->set.cc = ALL;
449 		break;
450 	case C_SPR:
451 	case C_FPSCR:
452 	case C_MSR:
453 	case C_XER:
454 		s->set.ireg = ALL;
455 		s->set.freg = ALL;
456 		s->set.cc = ALL;
457 		s->set.cr = ALL;
458 		break;
459 	case C_LR:
460 		s->set.cc |= E_LR;
461 		break;
462 	case C_CTR:
463 		s->set.cc |= E_CTR;
464 		break;
465 	case C_ZOREG:
466 	case C_SOREG:
467 	case C_LOREG:
468 		c = p->to.reg;
469 		s->used.ireg |= 1<<c;
470 		if(upd)
471 			s->set.ireg |= 1<<c;
472 		if(ad)
473 			break;
474 		s->size = sz;
475 		s->soffset = regoff(&p->to);
476 
477 		m = ANYMEM;
478 		if(c == REGSB)
479 			m = E_MEMSB;
480 		if(c == REGSP)
481 			m = E_MEMSP;
482 
483 		if(ar)
484 			s->used.cc |= m;
485 		else
486 			s->set.cc |= m;
487 		break;
488 	case C_SACON:
489 	case C_LACON:
490 		s->used.ireg |= 1<<REGSP;
491 		if(upd)
492 			s->set.ireg |= 1<<c;
493 		break;
494 	case C_SECON:
495 	case C_LECON:
496 		s->used.ireg |= 1<<REGSB;
497 		if(upd)
498 			s->set.ireg |= 1<<c;
499 		break;
500 	case C_REG:
501 		if(nr)
502 			break;
503 		if(ar)
504 			s->used.ireg |= 1<<p->to.reg;
505 		else
506 			s->set.ireg |= 1<<p->to.reg;
507 		break;
508 	case C_FREG:
509 		if(ar)
510 			s->used.freg |= 1<<p->to.reg;
511 		else
512 			s->set.freg |= 1<<p->to.reg;
513 		break;
514 	case C_SAUTO:
515 	case C_LAUTO:
516 		s->used.ireg |= 1<<REGSP;
517 		if(upd)
518 			s->set.ireg |= 1<<c;
519 		if(ad)
520 			break;
521 		s->size = sz;
522 		s->soffset = regoff(&p->to);
523 
524 		if(ar)
525 			s->used.cc |= E_MEMSP;
526 		else
527 			s->set.cc |= E_MEMSP;
528 		break;
529 	case C_SEXT:
530 	case C_LEXT:
531 		s->used.ireg |= 1<<REGSB;
532 		if(upd)
533 			s->set.ireg |= 1<<c;
534 		if(ad)
535 			break;
536 		s->size = sz;
537 		s->soffset = regoff(&p->to);
538 
539 		if(ar)
540 			s->used.cc |= E_MEMSB;
541 		else
542 			s->set.cc |= E_MEMSB;
543 		break;
544 	}
545 
546 /*
547  * flags based on 'from' field
548  */
549 	c = p->from.class;
550 	if(c == 0) {
551 		c = aclass(&p->from) + 1;
552 		p->from.class = c;
553 	}
554 	c--;
555 	switch(c) {
556 	default:
557 		print("unknown class %d %D\n", c, &p->from);
558 
559 	case C_NONE:
560 	case C_ZCON:
561 	case C_SCON:
562 	case C_UCON:
563 	case C_LCON:
564 	case C_ADDCON:
565 	case C_ANDCON:
566 	case C_SBRA:
567 	case C_LBRA:
568 		c = p->from.reg;
569 		if(c != NREG)
570 			s->used.ireg |= 1<<c;
571 		break;
572 	case C_CREG:
573 		c = p->from.reg;
574 		if(c == NREG)
575 			s->used.cr = ALL;
576 		else
577 			s->used.cr |= (0xF << ((p->from.reg&7)*4));
578 		s->used.cc = ALL;
579 		break;
580 	case C_SPR:
581 	case C_FPSCR:
582 	case C_MSR:
583 	case C_XER:
584 		s->set.ireg = ALL;
585 		s->set.freg = ALL;
586 		s->set.cc = ALL;
587 		s->set.cr = ALL;
588 		break;
589 	case C_LR:
590 		s->used.cc |= E_LR;
591 		break;
592 	case C_CTR:
593 		s->used.cc |= E_CTR;
594 		break;
595 	case C_ZOREG:
596 	case C_SOREG:
597 	case C_LOREG:
598 		c = p->from.reg;
599 		s->used.ireg |= 1<<c;
600 		if(ld)
601 			p->mark |= LOAD;
602 		if(ad)
603 			break;
604 		s->size = sz;
605 		s->soffset = regoff(&p->from);
606 
607 		m = ANYMEM;
608 		if(c == REGSB)
609 			m = E_MEMSB;
610 		if(c == REGSP)
611 			m = E_MEMSP;
612 
613 		s->used.cc |= m;
614 		break;
615 	case C_SACON:
616 	case C_LACON:
617 		s->used.ireg |= 1<<REGSP;
618 		break;
619 	case C_SECON:
620 	case C_LECON:
621 		s->used.ireg |= 1<<REGSB;
622 		break;
623 	case C_REG:
624 		if(nr)
625 			break;
626 		s->used.ireg |= 1<<p->from.reg;
627 		break;
628 	case C_FREG:
629 		s->used.freg |= 1<<p->from.reg;
630 		break;
631 	case C_SAUTO:
632 	case C_LAUTO:
633 		s->used.ireg |= 1<<REGSP;
634 		if(ld)
635 			p->mark |= LOAD;
636 		if(ad)
637 			break;
638 		s->size = sz;
639 		s->soffset = regoff(&p->from);
640 
641 		s->used.cc |= E_MEMSP;
642 		break;
643 	case C_SEXT:
644 	case C_LEXT:
645 		s->used.ireg |= 1<<REGSB;
646 		if(ld)
647 			p->mark |= LOAD;
648 		if(ad)
649 			break;
650 		s->size = sz;
651 		s->soffset = regoff(&p->from);
652 
653 		s->used.cc |= E_MEMSB;
654 		break;
655 	}
656 
657 	c = p->reg;
658 	if(c != NREG) {
659 		if(p->from.type == D_FREG || p->to.type == D_FREG)
660 			s->used.freg |= 1<<c;
661 		else
662 			s->used.ireg |= 1<<c;
663 	}
664 }
665 
666 /*
667  * test to see if 2 instrictions can be
668  * interchanged without changing semantics
669  */
670 int
depend(Sch * sa,Sch * sb)671 depend(Sch *sa, Sch *sb)
672 {
673 	ulong x;
674 
675 	if(sa->set.ireg & (sb->set.ireg|sb->used.ireg))
676 		return 1;
677 	if(sb->set.ireg & sa->used.ireg)
678 		return 1;
679 
680 	if(sa->set.freg & (sb->set.freg|sb->used.freg))
681 		return 1;
682 	if(sb->set.freg & sa->used.freg)
683 		return 1;
684 
685 	if(sa->set.cr & (sb->set.cr|sb->used.cr))
686 		return 1;
687 	if(sb->set.cr & sa->used.cr)
688 		return 1;
689 
690 
691 	x = (sa->set.cc & (sb->set.cc|sb->used.cc)) |
692 		(sb->set.cc & sa->used.cc);
693 	if(x) {
694 		/*
695 		 * allow SB and SP to pass each other.
696 		 * allow SB to pass SB iff doffsets are ok
697 		 * anything else conflicts
698 		 */
699 		if(x != E_MEMSP && x != E_MEMSB)
700 			return 1;
701 		x = sa->set.cc | sb->set.cc |
702 			sa->used.cc | sb->used.cc;
703 		if(x & E_MEM)
704 			return 1;
705 		if(offoverlap(sa, sb))
706 			return 1;
707 	}
708 
709 	return 0;
710 }
711 
712 int
offoverlap(Sch * sa,Sch * sb)713 offoverlap(Sch *sa, Sch *sb)
714 {
715 
716 	if(sa->soffset < sb->soffset) {
717 		if(sa->soffset+sa->size > sb->soffset)
718 			return 1;
719 		return 0;
720 	}
721 	if(sb->soffset+sb->size > sa->soffset)
722 		return 1;
723 	return 0;
724 }
725 
726 /*
727  * test 2 adjacent instructions
728  * and find out if inserted instructions
729  * are desired to prevent stalls.
730  * first instruction is a load instruction.
731  */
732 int
conflict(Sch * sa,Sch * sb)733 conflict(Sch *sa, Sch *sb)
734 {
735 
736 	if(sa->set.ireg & sb->used.ireg)
737 		return 1;
738 	if(sa->set.freg & sb->used.freg)
739 		return 1;
740 	if(sa->set.cr & sb->used.cr)
741 		return 1;
742 	return 0;
743 }
744 
745 int
compound(Prog * p)746 compound(Prog *p)
747 {
748 	Optab *o;
749 
750 	o = oplook(p);
751 	if(o->size != 4)
752 		return 1;
753 	if(p->to.type == D_REG && p->to.reg == REGSB)
754 		return 1;
755 	return 0;
756 }
757 
758 void
dumpbits(Sch * s,Dep * d)759 dumpbits(Sch *s, Dep *d)
760 {
761 	int i;
762 
763 	for(i=0; i<32; i++)
764 		if(d->ireg & (1<<i))
765 			Bprint(&bso, " R%d", i);
766 	for(i=0; i<32; i++)
767 		if(d->freg & (1<<i))
768 			Bprint(&bso, " F%d", i);
769 	for(i=0; i<32; i++)
770 		if(d->cr & (1<<i))
771 			Bprint(&bso, " C%d", i);
772 	for(i=0; i<32; i++)
773 		switch(d->cc & (1<<i)) {
774 		default:
775 			break;
776 		case E_ICC:
777 			Bprint(&bso, " ICC");
778 			break;
779 		case E_FCC:
780 			Bprint(&bso, " FCC");
781 			break;
782 		case E_LR:
783 			Bprint(&bso, " LR");
784 			break;
785 		case E_CR:
786 			Bprint(&bso, " CR");
787 			break;
788 		case E_CTR:
789 			Bprint(&bso, " CTR");
790 			break;
791 		case E_XER:
792 			Bprint(&bso, " XER");
793 			break;
794 		case E_MEM:
795 			Bprint(&bso, " MEM%d", s->size);
796 			break;
797 		case E_MEMSB:
798 			Bprint(&bso, " SB%d", s->size);
799 			break;
800 		case E_MEMSP:
801 			Bprint(&bso, " SP%d", s->size);
802 			break;
803 		}
804 }
805