xref: /inferno-os/utils/vl/sched.c (revision 9dc22068e29604f4b484e746112a9a4efe6fd57f)
1  #include	"l.h"
2  
3  enum
4  {
5  	E_HILO	= 1<<0,
6  	E_FCR	= 1<<1,
7  	E_MCR	= 1<<2,
8  	E_MEM	= 1<<3,
9  	E_MEMSP	= 1<<4,	/* uses offset and size */
10  	E_MEMSB	= 1<<5,	/* uses offset and size */
11  	ANYMEM	= E_MEM|E_MEMSP|E_MEMSB,
12  	DELAY	= BRANCH|LOAD|FCMP,
13  };
14  
15  typedef	struct	Sch	Sch;
16  typedef	struct	Dep	Dep;
17  
18  struct	Dep
19  {
20  	ulong	ireg;
21  	ulong	freg;
22  	ulong	cc;
23  };
24  struct	Sch
25  {
26  	Prog	p;
27  	Dep	set;
28  	Dep	used;
29  	long	soffset;
30  	char	size;
31  	char	nop;
32  	char	comp;
33  };
34  
35  void	regsused(Sch*, Prog*);
36  int	depend(Sch*, Sch*);
37  int	conflict(Sch*, Sch*);
38  int	offoverlap(Sch*, Sch*);
39  void	dumpbits(Sch*, Dep*);
40  
41  void
42  sched(Prog *p0, Prog *pe)
43  {
44  	Prog *p, *q;
45  	Sch sch[NSCHED], *s, *t, *u, *se, stmp;
46  
47  	/*
48  	 * build side structure
49  	 */
50  	s = sch;
51  	for(p=p0;; p=p->link) {
52  		memset(s, 0, sizeof(*s));
53  		s->p = *p;
54  		regsused(s, p);
55  		if(debug['X']) {
56  			Bprint(&bso, "%P\t\tset", &s->p);
57  			dumpbits(s, &s->set);
58  			Bprint(&bso, "; used");
59  			dumpbits(s, &s->used);
60  			if(s->comp)
61  				Bprint(&bso, "; compound");
62  			if(s->p.mark & LOAD)
63  				Bprint(&bso, "; load");
64  			if(s->p.mark & BRANCH)
65  				Bprint(&bso, "; branch");
66  			if(s->p.mark & FCMP)
67  				Bprint(&bso, "; fcmp");
68  			Bprint(&bso, "\n");
69  		}
70  		if(p == pe)
71  			break;
72  		s++;
73  	}
74  	se = s;
75  
76  	/*
77  	 * prepass to move things around
78  	 * does nothing, but tries to make
79  	 * the actual scheduler work better
80  	 */
81  	for(s=sch; s<=se; s++) {
82  		if(!(s->p.mark & LOAD))
83  			continue;
84  		/* always good to put nonconflict loads together */
85  		for(t=s+1; t<=se; t++) {
86  			if(!(t->p.mark & LOAD))
87  				continue;
88  			if(t->p.mark & BRANCH)
89  				break;
90  			if(conflict(s, t))
91  				break;
92  			for(u=t-1; u>s; u--)
93  				if(depend(u, t))
94  					goto no11;
95  			u = s+1;
96  			stmp = *t;
97  			memmove(s+2, u, (uchar*)t - (uchar*)u);
98  			*u = stmp;
99  			break;
100  		}
101  	no11:
102  
103  		/* put schedule fodder above load */
104  		for(t=s+1; t<=se; t++) {
105  			if(t->p.mark & BRANCH)
106  				break;
107  			if(s > sch && conflict(s-1, t))
108  				continue;
109  			for(u=t-1; u>=s; u--)
110  				if(depend(t, u))
111  					goto no1;
112  			stmp = *t;
113  			memmove(s+1, s, (uchar*)t - (uchar*)s);
114  			*s = stmp;
115  			if(!(s->p.mark & LOAD))
116  				break;
117  		no1:;
118  		}
119  	}
120  
121  	for(s=se; s>=sch; s--) {
122  		if(!(s->p.mark & DELAY))
123  			continue;
124  		if(s < se)
125  			if(!conflict(s, s+1))
126  				goto out3;
127  		/*
128  		 * s is load, s+1 is immediate use of result or end of block
129  		 * t is the trial instruction to insert between s and s+1
130  		 */
131  		if(!debug['Y'])
132  		for(t=s-1; t>=sch; t--) {
133  			if(t->comp)
134  				if(s->p.mark & BRANCH)
135  					goto no2;
136  			if(t->p.mark & DELAY)
137  				if(s >= se || conflict(t, s+1))
138  					goto no2;
139  			for(u=t+1; u<=s; u++)
140  				if(depend(u, t))
141  					goto no2;
142  			goto out2;
143  		no2:;
144  		}
145  		if(debug['X'])
146  			Bprint(&bso, "?l%P\n", &s->p);
147  		s->nop = 1;
148  		if(debug['v']) {
149  			if(s->p.mark & LOAD) {
150  				nop.load.count++;
151  				nop.load.outof++;
152  			}
153  			if(s->p.mark & BRANCH) {
154  				nop.branch.count++;
155  				nop.branch.outof++;
156  			}
157  			if(s->p.mark & FCMP) {
158  				nop.fcmp.count++;
159  				nop.fcmp.outof++;
160  			}
161  		}
162  		continue;
163  
164  	out2:
165  		if(debug['X']) {
166  			Bprint(&bso, "!l%P\n", &t->p);
167  			Bprint(&bso, "%P\n", &s->p);
168  		}
169  		stmp = *t;
170  		memmove(t, t+1, (uchar*)s - (uchar*)t);
171  		*s = stmp;
172  		s--;
173  
174  	out3:
175  		if(debug['v']) {
176  			if(s->p.mark & LOAD)
177  				nop.load.outof++;
178  			if(s->p.mark & BRANCH)
179  				nop.branch.outof++;
180  			if(s->p.mark & FCMP)
181  				nop.fcmp.outof++;
182  		}
183  	}
184  
185  	/* Avoid HI/LO use->set */
186  	t = sch+1;
187  	for(s=sch; s<se-1; s++, t++) {
188  		if((s->used.cc & E_HILO) == 0)
189  			continue;
190  		if(t->set.cc & E_HILO)
191  			s->nop = 2;
192  	}
193  
194  	/*
195  	 * put it all back
196  	 */
197  	for(s=sch, p=p0; s<=se; s++, p=q) {
198  		q = p->link;
199  		if(q != s->p.link) {
200  			*p = s->p;
201  			p->link = q;
202  		}
203  		while(s->nop--)
204  			addnop(p);
205  	}
206  	if(debug['X']) {
207  		Bprint(&bso, "\n");
208  		Bflush(&bso);
209  	}
210  }
211  
212  void
213  regsused(Sch *s, Prog *realp)
214  {
215  	int c, ar, ad, ld, sz;
216  	ulong m;
217  	Prog *p;
218  
219  	p = &s->p;
220  	s->comp = compound(p);
221  	s->nop = 0;
222  	if(s->comp) {
223  		s->set.ireg |= 1<<REGTMP;
224  		s->used.ireg |= 1<<REGTMP;
225  	}
226  
227  	ar = 0;		/* dest is really reference */
228  	ad = 0;		/* source/dest is really address */
229  	ld = 0;		/* opcode is load instruction */
230  	sz = 20;	/* size of load/store for overlap computation */
231  
232  /*
233   * flags based on opcode
234   */
235  	switch(p->as) {
236  	case ATEXT:
237  		curtext = realp;
238  		autosize = p->to.offset + 4;
239  		ad = 1;
240  		break;
241  	case AJAL:
242  		c = p->reg;
243  		if(c == NREG)
244  			c = REGLINK;
245  		s->set.ireg |= 1<<c;
246  		ar = 1;
247  		ad = 1;
248  		break;
249  	case ABGEZAL:
250  	case ABLTZAL:
251  		s->set.ireg |= 1<<REGLINK;
252  	case ABEQ:
253  	case ABGEZ:
254  	case ABGTZ:
255  	case ABLEZ:
256  	case ABLTZ:
257  	case ABNE:
258  		ar = 1;
259  		ad = 1;
260  		break;
261  	case ABFPT:
262  	case ABFPF:
263  		ad = 1;
264  		s->used.cc |= E_FCR;
265  		break;
266  	case ACMPEQD:
267  	case ACMPEQF:
268  	case ACMPGED:
269  	case ACMPGEF:
270  	case ACMPGTD:
271  	case ACMPGTF:
272  		ar = 1;
273  		s->set.cc |= E_FCR;
274  		p->mark |= FCMP;
275  		break;
276  	case AJMP:
277  		ar = 1;
278  		ad = 1;
279  		break;
280  	case AMOVB:
281  	case AMOVBU:
282  		sz = 1;
283  		ld = 1;
284  		break;
285  	case AMOVH:
286  	case AMOVHU:
287  		sz = 2;
288  		ld = 1;
289  		break;
290  	case AMOVF:
291  	case AMOVW:
292  	case AMOVWL:
293  	case AMOVWR:
294  		sz = 4;
295  		ld = 1;
296  		break;
297  	case AMOVD:
298  	case AMOVV:
299  	case AMOVVL:
300  	case AMOVVR:
301  		sz = 8;
302  		ld = 1;
303  		break;
304  	case ADIV:
305  	case ADIVU:
306  	case AMUL:
307  	case AMULU:
308  	case AREM:
309  	case AREMU:
310  		s->set.cc = E_HILO;
311  	case AADD:
312  	case AADDU:
313  	case AAND:
314  	case ANOR:
315  	case AOR:
316  	case ASGT:
317  	case ASGTU:
318  	case ASLL:
319  	case ASRA:
320  	case ASRL:
321  	case ASUB:
322  	case ASUBU:
323  	case AXOR:
324  
325  	case AADDD:
326  	case AADDF:
327  	case AADDW:
328  	case ASUBD:
329  	case ASUBF:
330  	case ASUBW:
331  	case AMULF:
332  	case AMULD:
333  	case AMULW:
334  	case ADIVF:
335  	case ADIVD:
336  	case ADIVW:
337  		if(p->reg == NREG) {
338  			if(p->to.type == D_REG || p->to.type == D_FREG)
339  				p->reg = p->to.reg;
340  			if(p->reg == NREG)
341  				print("botch %P\n", p);
342  		}
343  		break;
344  	}
345  
346  /*
347   * flags based on 'to' field
348   */
349  	c = p->to.class;
350  	if(c == 0) {
351  		c = aclass(&p->to) + 1;
352  		p->to.class = c;
353  	}
354  	c--;
355  	switch(c) {
356  	default:
357  		print("unknown class %d %D\n", c, &p->to);
358  
359  	case C_ZCON:
360  	case C_SCON:
361  	case C_ADD0CON:
362  	case C_AND0CON:
363  	case C_ADDCON:
364  	case C_ANDCON:
365  	case C_UCON:
366  	case C_LCON:
367  	case C_NONE:
368  	case C_SBRA:
369  	case C_LBRA:
370  		break;
371  
372  	case C_HI:
373  	case C_LO:
374  		s->set.cc |= E_HILO;
375  		break;
376  	case C_FCREG:
377  		s->set.cc |= E_FCR;
378  		break;
379  	case C_MREG:
380  		s->set.cc |= E_MCR;
381  		break;
382  	case C_ZOREG:
383  	case C_SOREG:
384  	case C_LOREG:
385  		c = p->to.reg;
386  		s->used.ireg |= 1<<c;
387  		if(ad)
388  			break;
389  		s->size = sz;
390  		s->soffset = regoff(&p->to);
391  
392  		m = ANYMEM;
393  		if(c == REGSB)
394  			m = E_MEMSB;
395  		if(c == REGSP)
396  			m = E_MEMSP;
397  
398  		if(ar)
399  			s->used.cc |= m;
400  		else
401  			s->set.cc |= m;
402  		break;
403  	case C_SACON:
404  	case C_LACON:
405  		s->used.ireg |= 1<<REGSP;
406  		break;
407  	case C_SECON:
408  	case C_LECON:
409  		s->used.ireg |= 1<<REGSB;
410  		break;
411  	case C_REG:
412  		if(ar)
413  			s->used.ireg |= 1<<p->to.reg;
414  		else
415  			s->set.ireg |= 1<<p->to.reg;
416  		break;
417  	case C_FREG:
418  		/* do better -- determine double prec */
419  		if(ar) {
420  			s->used.freg |= 1<<p->to.reg;
421  			s->used.freg |= 1<<(p->to.reg|1);
422  		} else {
423  			s->set.freg |= 1<<p->to.reg;
424  			s->set.freg |= 1<<(p->to.reg|1);
425  		}
426  		if(ld && p->from.type == D_REG)
427  			p->mark |= LOAD;
428  		break;
429  	case C_SAUTO:
430  	case C_LAUTO:
431  		s->used.ireg |= 1<<REGSP;
432  		if(ad)
433  			break;
434  		s->size = sz;
435  		s->soffset = regoff(&p->to);
436  
437  		if(ar)
438  			s->used.cc |= E_MEMSP;
439  		else
440  			s->set.cc |= E_MEMSP;
441  		break;
442  	case C_SEXT:
443  	case C_LEXT:
444  		s->used.ireg |= 1<<REGSB;
445  		if(ad)
446  			break;
447  		s->size = sz;
448  		s->soffset = regoff(&p->to);
449  
450  		if(ar)
451  			s->used.cc |= E_MEMSB;
452  		else
453  			s->set.cc |= E_MEMSB;
454  		break;
455  	}
456  
457  /*
458   * flags based on 'from' field
459   */
460  	c = p->from.class;
461  	if(c == 0) {
462  		c = aclass(&p->from) + 1;
463  		p->from.class = c;
464  	}
465  	c--;
466  	switch(c) {
467  	default:
468  		print("unknown class %d %D\n", c, &p->from);
469  
470  	case C_ZCON:
471  	case C_SCON:
472  	case C_ADD0CON:
473  	case C_AND0CON:
474  	case C_ADDCON:
475  	case C_ANDCON:
476  	case C_UCON:
477  	case C_LCON:
478  	case C_NONE:
479  	case C_SBRA:
480  	case C_LBRA:
481  		break;
482  	case C_HI:
483  	case C_LO:
484  		s->used.cc |= E_HILO;
485  		break;
486  	case C_FCREG:
487  		s->used.cc |= E_FCR;
488  		break;
489  	case C_MREG:
490  		s->used.cc |= E_MCR;
491  		break;
492  	case C_ZOREG:
493  	case C_SOREG:
494  	case C_LOREG:
495  		c = p->from.reg;
496  		s->used.ireg |= 1<<c;
497  		if(ld)
498  			p->mark |= LOAD;
499  		s->size = sz;
500  		s->soffset = regoff(&p->from);
501  
502  		m = ANYMEM;
503  		if(c == REGSB)
504  			m = E_MEMSB;
505  		if(c == REGSP)
506  			m = E_MEMSP;
507  
508  		s->used.cc |= m;
509  		break;
510  	case C_SACON:
511  	case C_LACON:
512  		s->used.ireg |= 1<<REGSP;
513  		break;
514  	case C_SECON:
515  	case C_LECON:
516  		s->used.ireg |= 1<<REGSB;
517  		break;
518  	case C_REG:
519  		s->used.ireg |= 1<<p->from.reg;
520  		break;
521  	case C_FREG:
522  		/* do better -- determine double prec */
523  		s->used.freg |= 1<<p->from.reg;
524  		s->used.freg |= 1<<(p->from.reg|1);
525  		if(ld && p->to.type == D_REG)
526  			p->mark |= LOAD;
527  		break;
528  	case C_SAUTO:
529  	case C_LAUTO:
530  		s->used.ireg |= 1<<REGSP;
531  		if(ld)
532  			p->mark |= LOAD;
533  		if(ad)
534  			break;
535  		s->size = sz;
536  		s->soffset = regoff(&p->from);
537  
538  		s->used.cc |= E_MEMSP;
539  		break;
540  	case C_SEXT:
541  	case C_LEXT:
542  		s->used.ireg |= 1<<REGSB;
543  		if(ld)
544  			p->mark |= LOAD;
545  		if(ad)
546  			break;
547  		s->size = sz;
548  		s->soffset = regoff(&p->from);
549  
550  		s->used.cc |= E_MEMSB;
551  		break;
552  	}
553  
554  	c = p->reg;
555  	if(c != NREG) {
556  		if(p->from.type == D_FREG || p->to.type == D_FREG) {
557  			s->used.freg |= 1<<c;
558  			s->used.freg |= 1<<(c|1);
559  		} else
560  			s->used.ireg |= 1<<c;
561  	}
562  	s->set.ireg &= ~(1<<REGZERO);		/* R0 cant be set */
563  }
564  
565  /*
566   * test to see if 2 instrictions can be
567   * interchanged without changing semantics
568   */
569  int
570  depend(Sch *sa, Sch *sb)
571  {
572  	ulong x;
573  
574  	if(sa->set.ireg & (sb->set.ireg|sb->used.ireg))
575  		return 1;
576  	if(sb->set.ireg & sa->used.ireg)
577  		return 1;
578  
579  	if(sa->set.freg & (sb->set.freg|sb->used.freg))
580  		return 1;
581  	if(sb->set.freg & sa->used.freg)
582  		return 1;
583  
584  	/*
585  	 * special case.
586  	 * loads from same address cannot pass.
587  	 * this is for hardware fifo's and the like
588  	 */
589  	if(sa->used.cc & sb->used.cc & E_MEM)
590  		if(sa->p.reg == sb->p.reg)
591  		if(regoff(&sa->p.from) == regoff(&sb->p.from))
592  			return 1;
593  
594  	x = (sa->set.cc & (sb->set.cc|sb->used.cc)) |
595  		(sb->set.cc & sa->used.cc);
596  	if(x) {
597  		/*
598  		 * allow SB and SP to pass each other.
599  		 * allow SB to pass SB iff doffsets are ok
600  		 * anything else conflicts
601  		 */
602  		if(x != E_MEMSP && x != E_MEMSB)
603  			return 1;
604  		x = sa->set.cc | sb->set.cc |
605  			sa->used.cc | sb->used.cc;
606  		if(x & E_MEM)
607  			return 1;
608  		if(offoverlap(sa, sb))
609  			return 1;
610  	}
611  
612  	return 0;
613  }
614  
615  int
616  offoverlap(Sch *sa, Sch *sb)
617  {
618  
619  	if(sa->soffset < sb->soffset) {
620  		if(sa->soffset+sa->size > sb->soffset)
621  			return 1;
622  		return 0;
623  	}
624  	if(sb->soffset+sb->size > sa->soffset)
625  		return 1;
626  	return 0;
627  }
628  
629  /*
630   * test 2 adjacent instructions
631   * and find out if inserted instructions
632   * are desired to prevent stalls.
633   */
634  int
635  conflict(Sch *sa, Sch *sb)
636  {
637  
638  	if(sa->set.ireg & sb->used.ireg)
639  		return 1;
640  	if(sa->set.freg & sb->used.freg)
641  		return 1;
642  	if(sa->set.cc & sb->used.cc)
643  		return 1;
644  
645  	return 0;
646  }
647  
648  int
649  compound(Prog *p)
650  {
651  	Optab *o;
652  
653  	o = oplook(p);
654  	if(o->size != 4)
655  		return 1;
656  	if(p->to.type == D_REG && p->to.reg == REGSB)
657  		return 1;
658  	return 0;
659  }
660  
661  void
662  dumpbits(Sch *s, Dep *d)
663  {
664  	int i;
665  
666  	for(i=0; i<32; i++)
667  		if(d->ireg & (1<<i))
668  			Bprint(&bso, " R%d", i);
669  	for(i=0; i<32; i++)
670  		if(d->freg & (1<<i))
671  			Bprint(&bso, " F%d", i);
672  	for(i=0; i<32; i++)
673  		switch(d->cc & (1<<i)) {
674  		default:
675  			break;
676  		case E_HILO:
677  			Bprint(&bso, " HILO");
678  			break;
679  		case E_FCR:
680  			Bprint(&bso, " FCR");
681  			break;
682  		case E_MCR:
683  			Bprint(&bso, " MCR");
684  			break;
685  		case E_MEM:
686  			Bprint(&bso, " MEM%d", s->size);
687  			break;
688  		case E_MEMSB:
689  			Bprint(&bso, " SB%d", s->size);
690  			break;
691  		case E_MEMSP:
692  			Bprint(&bso, " SP%d", s->size);
693  			break;
694  		}
695  }
696