xref: /plan9/sys/src/cmd/vl/sched.c (revision 219b2ee8daee37f4aad58d63f21287faa8e4ffdc)
1 #include	"l.h"
2 
3 enum
4 {
5 	E_HILO	= 1<<0,
6 	E_FCR	= 1<<1,
7 	E_MCR	= 1<<2,
8 	E_MEM	= 1<<3,
9 	E_MEMSP	= 1<<4,	/* uses offset and size */
10 	E_MEMSB	= 1<<5,	/* uses offset and size */
11 	ANYMEM	= E_MEM|E_MEMSP|E_MEMSB,
12 	DELAY	= BRANCH|LOAD|FCMP,
13 };
14 
15 typedef	struct	Sch	Sch;
16 typedef	struct	Dep	Dep;
17 
18 struct	Dep
19 {
20 	ulong	ireg;
21 	ulong	freg;
22 	ulong	cc;
23 };
24 struct	Sch
25 {
26 	Prog	p;
27 	Dep	set;
28 	Dep	used;
29 	long	offset;
30 	char	size;
31 	char	nop;
32 	char	comp;
33 };
34 
35 void	regused(Sch*, Prog*);
36 int	depend(Sch*, Sch*);
37 int	conflict(Sch*, Sch*);
38 int	offoverlap(Sch*, Sch*);
39 void	dumpbits(Sch*, Dep*);
40 
41 void
42 sched(Prog *p0, Prog *pe)
43 {
44 	Prog *p, *q;
45 	Sch sch[NSCHED], *s, *t, *u, *se, stmp;
46 
47 	/*
48 	 * build side structure
49 	 */
50 	s = sch;
51 	for(p=p0;; p=p->link) {
52 		memset(s, 0, sizeof(*s));
53 		s->p = *p;
54 		regused(s, p);
55 		if(debug['X']) {
56 			Bprint(&bso, "%P%|set", &s->p, 40);
57 			dumpbits(s, &s->set);
58 			Bprint(&bso, "; used");
59 			dumpbits(s, &s->used);
60 			if(s->comp)
61 				Bprint(&bso, "; compound");
62 			if(s->p.mark & LOAD)
63 				Bprint(&bso, "; load");
64 			if(s->p.mark & BRANCH)
65 				Bprint(&bso, "; branch");
66 			if(s->p.mark & FCMP)
67 				Bprint(&bso, "; fcmp");
68 			Bprint(&bso, "\n");
69 		}
70 		if(p == pe)
71 			break;
72 		s++;
73 	}
74 	se = s;
75 
76 	/*
77 	 * prepass to move things around
78 	 * does nothing, but tries to make
79 	 * the actual scheduler work better
80 	 */
81 	for(s=sch; s<=se; s++) {
82 		if(!(s->p.mark & LOAD))
83 			continue;
84 		/* always good to put nonconflict loads together */
85 		for(t=s+1; t<=se; t++) {
86 			if(!(t->p.mark & LOAD))
87 				continue;
88 			if(t->p.mark & BRANCH)
89 				break;
90 			if(conflict(s, t))
91 				break;
92 			for(u=t-1; u>s; u--)
93 				if(depend(u, t))
94 					goto no11;
95 			u = s+1;
96 			stmp = *t;
97 			memmove(s+2, u, (uchar*)t - (uchar*)u);
98 			*u = stmp;
99 			break;
100 		}
101 	no11:
102 
103 		/* put schedule fodder above load */
104 		for(t=s+1; t<=se; t++) {
105 			if(t->p.mark & BRANCH)
106 				break;
107 			if(s > sch && conflict(s-1, t))
108 				continue;
109 			for(u=t-1; u>=s; u--)
110 				if(depend(t, u))
111 					goto no1;
112 			stmp = *t;
113 			memmove(s+1, s, (uchar*)t - (uchar*)s);
114 			*s = stmp;
115 			if(!(s->p.mark & LOAD))
116 				break;
117 		no1:;
118 		}
119 	no12:;
120 	}
121 
122 	for(s=se; s>=sch; s--) {
123 		if(!(s->p.mark & DELAY))
124 			continue;
125 		if(s < se)
126 			if(!conflict(s, s+1))
127 				goto out3;
128 		/*
129 		 * s is load, s+1 is immediate use of result or end of block
130 		 * t is the trial instruction to insert between s and s+1
131 		 */
132 		if(!debug['Y'])
133 		for(t=s-1; t>=sch; t--) {
134 			if(t->comp)
135 				if(s->p.mark & BRANCH)
136 					goto no2;
137 			if(t->p.mark & DELAY)
138 				if(s >= se || conflict(t, s+1))
139 					goto no2;
140 			for(u=t+1; u<=s; u++)
141 				if(depend(u, t))
142 					goto no2;
143 			goto out2;
144 		no2:;
145 		}
146 		if(debug['X'])
147 			Bprint(&bso, "?l%P\n", &s->p);
148 		s->nop = 1;
149 		if(debug['v']) {
150 			if(s->p.mark & LOAD) {
151 				nop.load.count++;
152 				nop.load.outof++;
153 			}
154 			if(s->p.mark & BRANCH) {
155 				nop.branch.count++;
156 				nop.branch.outof++;
157 			}
158 			if(s->p.mark & FCMP) {
159 				nop.fcmp.count++;
160 				nop.fcmp.outof++;
161 			}
162 		}
163 		continue;
164 
165 	out2:
166 		if(debug['X']) {
167 			Bprint(&bso, "!l%P\n", &t->p);
168 			Bprint(&bso, "%P\n", &s->p);
169 		}
170 		stmp = *t;
171 		memmove(t, t+1, (uchar*)s - (uchar*)t);
172 		*s = stmp;
173 		s--;
174 
175 	out3:
176 		if(debug['v']) {
177 			if(s->p.mark & LOAD)
178 				nop.load.outof++;
179 			if(s->p.mark & BRANCH)
180 				nop.branch.outof++;
181 			if(s->p.mark & FCMP)
182 				nop.fcmp.outof++;
183 		}
184 	}
185 
186 	/* Avoid HI/LO use->set */
187 	t = sch+1;
188 	for(s=sch; s<se-1; s++, t++) {
189 		if((s->used.cc & E_HILO) == 0)
190 			continue;
191 		if(t->set.cc & E_HILO)
192 			s->nop = 2;
193 	}
194 
195 	/*
196 	 * put it all back
197 	 */
198 	for(s=sch, p=p0; s<=se; s++, p=q) {
199 		q = p->link;
200 		if(q != s->p.link) {
201 			*p = s->p;
202 			p->link = q;
203 		}
204 		while(s->nop--)
205 			addnop(p);
206 	}
207 	if(debug['X']) {
208 		Bprint(&bso, "\n");
209 		Bflush(&bso);
210 	}
211 }
212 
213 void
214 regused(Sch *s, Prog *realp)
215 {
216 	int c, ar, ad, ld, sz;
217 	ulong m;
218 	Prog *p;
219 
220 	p = &s->p;
221 	s->comp = compound(p);
222 	s->nop = 0;
223 	if(s->comp) {
224 		s->set.ireg |= 1<<REGTMP;
225 		s->used.ireg |= 1<<REGTMP;
226 	}
227 
228 	ar = 0;		/* dest is really reference */
229 	ad = 0;		/* source/dest is really address */
230 	ld = 0;		/* opcode is load instruction */
231 	sz = 20;	/* size of load/store for overlap computation */
232 
233 /*
234  * flags based on opcode
235  */
236 	switch(p->as) {
237 	case ATEXT:
238 		curtext = realp;
239 		autosize = p->to.offset + 4;
240 		ad = 1;
241 		break;
242 	case AJAL:
243 		c = p->reg;
244 		if(c == NREG)
245 			c = REGLINK;
246 		s->set.ireg |= 1<<c;
247 		ar = 1;
248 		ad = 1;
249 		break;
250 	case ABGEZAL:
251 	case ABLTZAL:
252 		s->set.ireg |= 1<<REGLINK;
253 	case ABEQ:
254 	case ABGEZ:
255 	case ABGTZ:
256 	case ABLEZ:
257 	case ABLTZ:
258 	case ABNE:
259 		ar = 1;
260 		ad = 1;
261 		break;
262 	case ABFPT:
263 	case ABFPF:
264 		ad = 1;
265 		s->used.cc |= E_FCR;
266 		break;
267 	case ACMPEQD:
268 	case ACMPEQF:
269 	case ACMPGED:
270 	case ACMPGEF:
271 	case ACMPGTD:
272 	case ACMPGTF:
273 		ar = 1;
274 		s->set.cc |= E_FCR;
275 		p->mark |= FCMP;
276 		break;
277 	case AJMP:
278 		ar = 1;
279 		ad = 1;
280 		break;
281 	case AMOVB:
282 	case AMOVBU:
283 		sz = 1;
284 		ld = 1;
285 		break;
286 	case AMOVH:
287 	case AMOVHU:
288 		sz = 2;
289 		ld = 1;
290 		break;
291 	case AMOVF:
292 	case AMOVW:
293 	case AMOVWL:
294 	case AMOVWR:
295 		sz = 4;
296 		ld = 1;
297 		break;
298 	case AMOVD:
299 	case AMOVV:
300 	case AMOVVL:
301 	case AMOVVR:
302 		sz = 8;
303 		ld = 1;
304 		break;
305 	case ADIV:
306 	case ADIVU:
307 	case AMUL:
308 	case AMULU:
309 	case AREM:
310 	case AREMU:
311 		s->set.cc = E_HILO;
312 	case AADD:
313 	case AADDU:
314 	case AAND:
315 	case ANOR:
316 	case AOR:
317 	case ASGT:
318 	case ASGTU:
319 	case ASLL:
320 	case ASRA:
321 	case ASRL:
322 	case ASUB:
323 	case ASUBU:
324 	case AXOR:
325 
326 	case AADDD:
327 	case AADDF:
328 	case AADDW:
329 	case ASUBD:
330 	case ASUBF:
331 	case ASUBW:
332 	case AMULF:
333 	case AMULD:
334 	case AMULW:
335 	case ADIVF:
336 	case ADIVD:
337 	case ADIVW:
338 		if(p->reg == NREG) {
339 			if(p->to.type == D_REG || p->to.type == D_FREG)
340 				p->reg = p->to.reg;
341 			if(p->reg == NREG)
342 				print("botch %P\n", p);
343 		}
344 		break;
345 	}
346 
347 /*
348  * flags based on 'to' field
349  */
350 	c = p->to.class;
351 	if(c == 0) {
352 		c = aclass(&p->to) + 1;
353 		p->to.class = c;
354 	}
355 	c--;
356 	switch(c) {
357 	default:
358 		print("unknown class %d %D\n", c, &p->to);
359 
360 	case C_ZCON:
361 	case C_SCON:
362 	case C_ADD0CON:
363 	case C_AND0CON:
364 	case C_ADDCON:
365 	case C_ANDCON:
366 	case C_UCON:
367 	case C_LCON:
368 	case C_NONE:
369 	case C_SBRA:
370 	case C_LBRA:
371 		break;
372 
373 	case C_HI:
374 	case C_LO:
375 		s->set.cc |= E_HILO;
376 		break;
377 	case C_FCREG:
378 		s->set.cc |= E_FCR;
379 		break;
380 	case C_MREG:
381 		s->set.cc |= E_MCR;
382 		break;
383 	case C_ZOREG:
384 	case C_SOREG:
385 	case C_LOREG:
386 		c = p->to.reg;
387 		s->used.ireg |= 1<<c;
388 		if(ad)
389 			break;
390 		s->size = sz;
391 		s->offset = regoff(&p->to);
392 
393 		m = ANYMEM;
394 		if(c == REGSB)
395 			m = E_MEMSB;
396 		if(c == REGSP)
397 			m = E_MEMSP;
398 
399 		if(ar)
400 			s->used.cc |= m;
401 		else
402 			s->set.cc |= m;
403 		break;
404 	case C_SACON:
405 	case C_LACON:
406 		s->used.ireg |= 1<<REGSP;
407 		break;
408 	case C_SECON:
409 	case C_LECON:
410 		s->used.ireg |= 1<<REGSB;
411 		break;
412 	case C_REG:
413 		if(ar)
414 			s->used.ireg |= 1<<p->to.reg;
415 		else
416 			s->set.ireg |= 1<<p->to.reg;
417 		break;
418 	case C_FREG:
419 		/* do better -- determine double prec */
420 		if(ar) {
421 			s->used.freg |= 1<<p->to.reg;
422 			s->used.freg |= 1<<(p->to.reg|1);
423 		} else {
424 			s->set.freg |= 1<<p->to.reg;
425 			s->set.freg |= 1<<(p->to.reg|1);
426 		}
427 		if(ld && p->from.type == D_REG)
428 			p->mark |= LOAD;
429 		break;
430 	case C_SAUTO:
431 	case C_LAUTO:
432 		s->used.ireg |= 1<<REGSP;
433 		if(ad)
434 			break;
435 		s->size = sz;
436 		s->offset = regoff(&p->to);
437 
438 		if(ar)
439 			s->used.cc |= E_MEMSP;
440 		else
441 			s->set.cc |= E_MEMSP;
442 		break;
443 	case C_SEXT:
444 	case C_LEXT:
445 		s->used.ireg |= 1<<REGSB;
446 		if(ad)
447 			break;
448 		s->size = sz;
449 		s->offset = regoff(&p->to);
450 
451 		if(ar)
452 			s->used.cc |= E_MEMSB;
453 		else
454 			s->set.cc |= E_MEMSB;
455 		break;
456 	}
457 
458 /*
459  * flags based on 'from' field
460  */
461 	c = p->from.class;
462 	if(c == 0) {
463 		c = aclass(&p->from) + 1;
464 		p->from.class = c;
465 	}
466 	c--;
467 	switch(c) {
468 	default:
469 		print("unknown class %d %D\n", c, &p->from);
470 
471 	case C_ZCON:
472 	case C_SCON:
473 	case C_ADD0CON:
474 	case C_AND0CON:
475 	case C_ADDCON:
476 	case C_ANDCON:
477 	case C_UCON:
478 	case C_LCON:
479 	case C_NONE:
480 	case C_SBRA:
481 	case C_LBRA:
482 		break;
483 	case C_HI:
484 	case C_LO:
485 		s->used.cc |= E_HILO;
486 		break;
487 	case C_FCREG:
488 		s->used.cc |= E_FCR;
489 		break;
490 	case C_MREG:
491 		s->used.cc |= E_MCR;
492 		break;
493 	case C_ZOREG:
494 	case C_SOREG:
495 	case C_LOREG:
496 		c = p->from.reg;
497 		s->used.ireg |= 1<<c;
498 		if(ld)
499 			p->mark |= LOAD;
500 		s->size = sz;
501 		s->offset = regoff(&p->from);
502 
503 		m = ANYMEM;
504 		if(c == REGSB)
505 			m = E_MEMSB;
506 		if(c == REGSP)
507 			m = E_MEMSP;
508 
509 		s->used.cc |= m;
510 		break;
511 	case C_SACON:
512 	case C_LACON:
513 		s->used.ireg |= 1<<REGSP;
514 		break;
515 	case C_SECON:
516 	case C_LECON:
517 		s->used.ireg |= 1<<REGSB;
518 		break;
519 	case C_REG:
520 		s->used.ireg |= 1<<p->from.reg;
521 		break;
522 	case C_FREG:
523 		/* do better -- determine double prec */
524 		s->used.freg |= 1<<p->from.reg;
525 		s->used.freg |= 1<<(p->from.reg|1);
526 		if(ld && p->to.type == D_REG)
527 			p->mark |= LOAD;
528 		break;
529 	case C_SAUTO:
530 	case C_LAUTO:
531 		s->used.ireg |= 1<<REGSP;
532 		if(ld)
533 			p->mark |= LOAD;
534 		if(ad)
535 			break;
536 		s->size = sz;
537 		s->offset = regoff(&p->from);
538 
539 		s->used.cc |= E_MEMSP;
540 		break;
541 	case C_SEXT:
542 	case C_LEXT:
543 		s->used.ireg |= 1<<REGSB;
544 		if(ld)
545 			p->mark |= LOAD;
546 		if(ad)
547 			break;
548 		s->size = sz;
549 		s->offset = regoff(&p->from);
550 
551 		s->used.cc |= E_MEMSB;
552 		break;
553 	}
554 
555 	c = p->reg;
556 	if(c != NREG) {
557 		if(p->from.type == D_FREG || p->to.type == D_FREG) {
558 			s->used.freg |= 1<<c;
559 			s->used.freg |= 1<<(c|1);
560 		} else
561 			s->used.ireg |= 1<<c;
562 	}
563 	s->set.ireg &= ~(1<<REGZERO);		/* R0 cant be set */
564 }
565 
566 /*
567  * test to see if 2 instrictions can be
568  * interchanged without changing semantics
569  */
570 int
571 depend(Sch *sa, Sch *sb)
572 {
573 	ulong x;
574 
575 	if(sa->set.ireg & (sb->set.ireg|sb->used.ireg))
576 		return 1;
577 	if(sb->set.ireg & sa->used.ireg)
578 		return 1;
579 
580 	if(sa->set.freg & (sb->set.freg|sb->used.freg))
581 		return 1;
582 	if(sb->set.freg & sa->used.freg)
583 		return 1;
584 
585 	/*
586 	 * special case.
587 	 * loads from same address cannot pass.
588 	 * this is for hardware fifo's and the like
589 	 */
590 	if(sa->used.cc & sb->used.cc & E_MEM)
591 		if(sa->p.reg == sb->p.reg)
592 		if(regoff(&sa->p.from) == regoff(&sb->p.from))
593 			return 1;
594 
595 	x = (sa->set.cc & (sb->set.cc|sb->used.cc)) |
596 		(sb->set.cc & sa->used.cc);
597 	if(x) {
598 		/*
599 		 * allow SB and SP to pass each other.
600 		 * allow SB to pass SB iff doffsets are ok
601 		 * anything else conflicts
602 		 */
603 		if(x != E_MEMSP && x != E_MEMSB)
604 			return 1;
605 		x = sa->set.cc | sb->set.cc |
606 			sa->used.cc | sb->used.cc;
607 		if(x & E_MEM)
608 			return 1;
609 		if(offoverlap(sa, sb))
610 			return 1;
611 	}
612 
613 	return 0;
614 }
615 
616 int
617 offoverlap(Sch *sa, Sch *sb)
618 {
619 
620 	if(sa->offset < sb->offset) {
621 		if(sa->offset+sa->size > sb->offset)
622 			return 1;
623 		return 0;
624 	}
625 	if(sb->offset+sb->size > sa->offset)
626 		return 1;
627 	return 0;
628 }
629 
630 /*
631  * test 2 adjacent instructions
632  * and find out if inserted instructions
633  * are desired to prevent stalls.
634  */
635 int
636 conflict(Sch *sa, Sch *sb)
637 {
638 
639 	if(sa->set.ireg & sb->used.ireg)
640 		return 1;
641 	if(sa->set.freg & sb->used.freg)
642 		return 1;
643 	if(sa->set.cc & sb->used.cc)
644 		return 1;
645 
646 	return 0;
647 }
648 
649 int
650 compound(Prog *p)
651 {
652 	Optab *o;
653 
654 	o = oplook(p);
655 	if(o->size != 4)
656 		return 1;
657 	if(p->to.type == D_REG && p->to.reg == REGSB)
658 		return 1;
659 	return 0;
660 }
661 
662 void
663 dumpbits(Sch *s, Dep *d)
664 {
665 	int i;
666 
667 	for(i=0; i<32; i++)
668 		if(d->ireg & (1<<i))
669 			Bprint(&bso, " R%d", i);
670 	for(i=0; i<32; i++)
671 		if(d->freg & (1<<i))
672 			Bprint(&bso, " F%d", i);
673 	for(i=0; i<32; i++)
674 		switch(d->cc & (1<<i)) {
675 		default:
676 			break;
677 		case E_HILO:
678 			Bprint(&bso, " HILO");
679 			break;
680 		case E_FCR:
681 			Bprint(&bso, " FCR");
682 			break;
683 		case E_MCR:
684 			Bprint(&bso, " MCR");
685 			break;
686 		case E_MEM:
687 			Bprint(&bso, " MEM%d", s->size);
688 			break;
689 		case E_MEMSB:
690 			Bprint(&bso, " SB%d", s->size);
691 			break;
692 		case E_MEMSP:
693 			Bprint(&bso, " SP%d", s->size);
694 			break;
695 		}
696 }
697