xref: /plan9-contrib/sys/src/cmd/9l/sched.c (revision d46c239f8612929b7dbade67d0d071633df3a15d)
1 #include	"l.h"
2 
3 enum
4 {
5 	E_MEM	= 1<<3,
6 	E_MEMSP	= 1<<4,	/* uses offset and size */
7 	E_MEMSB	= 1<<5,	/* uses offset and size */
8 	ANYMEM	= E_MEM|E_MEMSP|E_MEMSB,
9 	DELAY	= BRANCH|LOAD|FCMP,
10 
11 	NRWORD	= 8,
12 	isdouble = 0,
13 };
14 
15 typedef	struct	Sch	Sch;
16 typedef	struct	Dep	Dep;
17 
18 struct	Dep
19 {
20 	ulong	ireg[NRWORD];
21 	ulong	cc;
22 };
23 struct	Sch
24 {
25 	Prog	p;
26 	Dep	set;
27 	Dep	used;
28 	long	offset;
29 	char	size;
30 	char	nop;
31 	char	comp;
32 };
33 
34 void	bitset(ulong*, int);
35 int	bittest(ulong*, int);
36 
37 void	regused(Sch*, Prog*);
38 int	depend(Sch*, Sch*);
39 int	conflict(Sch*, Sch*);
40 int	offoverlap(Sch*, Sch*);
41 void	dumpbits(Sch*, Dep*);
42 
43 void
44 sched(Prog *p0, Prog *pe)
45 {
46 	Prog *p, *q;
47 	Sch sch[NSCHED], *s, *t, *u, *se, stmp;
48 
49 	/*
50 	 * build side structure
51 	 */
52 	s = sch;
53 	for(p=p0;; p=p->link) {
54 		memset(s, 0, sizeof(*s));
55 		s->p = *p;
56 		regused(s, p);
57 		if(debug['X']) {
58 			Bprint(&bso, "%P\t\tmark %x set", &s->p, s->p.mark);
59 			dumpbits(s, &s->set);
60 			Bprint(&bso, "; used");
61 			dumpbits(s, &s->used);
62 			if(s->comp)
63 				Bprint(&bso, "; compound");
64 			if(s->p.mark & LOAD)
65 				Bprint(&bso, "; load");
66 			if(s->p.mark & BRANCH)
67 				Bprint(&bso, "; branch");
68 			if(s->p.mark & FCMP)
69 				Bprint(&bso, "; fcmp");
70 			Bprint(&bso, "\n");
71 		}
72 		if(p == pe)
73 			break;
74 		s++;
75 	}
76 	se = s;
77 
78 	/*
79 	 * prepass to move things around
80 	 * does nothing, but tries to make
81 	 * the actual scheduler work better
82 	 */
83 	if(!debug['Y'])
84 	for(s=sch; s<=se; s++) {
85 		if(!(s->p.mark & LOAD))
86 			continue;
87 		/* always good to put nonconflict loads together */
88 		for(t=s+1; t<=se; t++) {
89 			if(!(t->p.mark & LOAD))
90 				continue;
91 			if(t->p.mark & BRANCH)
92 				break;
93 			if(conflict(s, t))
94 				break;
95 			for(u=t-1; u>s; u--)
96 				if(depend(u, t))
97 					goto no11;
98 			u = s+1;
99 			stmp = *t;
100 			memmove(s+2, u, (uchar*)t - (uchar*)u);
101 			*u = stmp;
102 			break;
103 		}
104 	no11:
105 
106 		/* put schedule fodder above load */
107 		for(t=s+1; t<=se; t++) {
108 			if(t->p.mark & BRANCH)
109 				break;
110 			if(s > sch && conflict(s-1, t))
111 				continue;
112 			for(u=t-1; u>=s; u--)
113 				if(depend(t, u))
114 					goto no1;
115 			stmp = *t;
116 			memmove(s+1, s, (uchar*)t - (uchar*)s);
117 			*s = stmp;
118 			if(!(s->p.mark & LOAD))
119 				break;
120 		no1:;
121 		}
122 	}
123 
124 	for(s=se; s>=sch; s--) {
125 		if(!(s->p.mark & DELAY))
126 			continue;
127 		if(s < se)
128 			if(!conflict(s, s+1))
129 				goto out3;
130 		/*
131 		 * s is load, s+1 is immediate use of result or end of block
132 		 * t is the trial instruction to insert between s and s+1
133 		 */
134 		if(!debug['Y'])
135 		for(t=s-1; t>=sch; t--) {
136 			if(t->comp)
137 				if(s->p.mark & BRANCH)
138 					goto no2;
139 			if(t->p.mark & DELAY)
140 				if(s >= se || conflict(t, s+1))
141 					goto no2;
142 			for(u=t+1; u<=s; u++)
143 				if(depend(u, t))
144 					goto no2;
145 			goto out2;
146 		no2:;
147 		}
148 		if(debug['X'])
149 			Bprint(&bso, "?l%P\n", &s->p);
150 		if(s->p.mark & BRANCH)
151 			s->nop = 1;
152 		if(debug['v']) {
153 			if(s->p.mark & LOAD) {
154 				nop.load.count++;
155 				nop.load.outof++;
156 			}
157 			if(s->p.mark & BRANCH) {
158 				nop.branch.count++;
159 				nop.branch.outof++;
160 			}
161 			if(s->p.mark & FCMP) {
162 				nop.fcmp.count++;
163 				nop.fcmp.outof++;
164 			}
165 		}
166 		continue;
167 
168 	out2:
169 		if(debug['X']) {
170 			Bprint(&bso, "!l%P\n", &t->p);
171 			Bprint(&bso, "%P\n", &s->p);
172 		}
173 		stmp = *t;
174 		memmove(t, t+1, (uchar*)s - (uchar*)t);
175 		*s = stmp;
176 		s--;
177 
178 	out3:
179 		if(debug['v']) {
180 			if(s->p.mark & LOAD)
181 				nop.load.outof++;
182 			if(s->p.mark & BRANCH)
183 				nop.branch.outof++;
184 			if(s->p.mark & FCMP)
185 				nop.fcmp.outof++;
186 		}
187 	}
188 
189 	/*
190 	 * put it all back
191 	 */
192 	for(s=sch, p=p0; s<=se; s++, p=q) {
193 		q = p->link;
194 		if(q != s->p.link) {
195 			*p = s->p;
196 			p->link = q;
197 		}
198 		while(s->nop--)
199 			addnop(p);
200 	}
201 	if(debug['X']) {
202 		Bprint(&bso, "\n");
203 		Bflush(&bso);
204 	}
205 }
206 
207 void
208 regused(Sch *s, Prog *realp)
209 {
210 	int c, ar, ad, ld, sz;
211 	ulong m;
212 	Prog *p;
213 
214 	p = &s->p;
215 	s->comp = compound(p);
216 	s->nop = 0;
217 	if(s->comp) {
218 		bitset(s->set.ireg, REGTMP);
219 		bitset(s->used.ireg, REGTMP);
220 	}
221 
222 	ar = 0;		/* dest is really reference */
223 	ad = 0;		/* source/dest is really address */
224 	ld = 0;		/* opcode is load instruction */
225 	sz = 20;	/* size of load/store for overlap computation */
226 
227 /*
228  * flags based on opcode
229  */
230 	switch(p->as) {
231 	case ATEXT:
232 		curtext = realp;
233 		autosize = p->to.offset + 4;
234 		ad = 1;
235 		break;
236 	case ACALL:
237 		c = p->reg;
238 		if(c == NREG)
239 			c = REGLINK;
240 		bitset(s->set.ireg, c);
241 		ar = 1;
242 		ad = 1;
243 		break;
244 	case AJMP:
245 		ar = 1;
246 		ad = 1;
247 		break;
248 	case AMOVB:
249 	case AMOVBU:
250 		sz = 1;
251 		ld = 1;
252 		break;
253 	case AMOVH:
254 	case AMOVHU:
255 		sz = 2;
256 		ld = 1;
257 		break;
258 	case AMOVF:
259 	case AMOVL:
260 		sz = 4;
261 		ld = 1;
262 		break;
263 	case ADIVL:
264 	case ADIVUL:
265 	case AMULL:
266 	case AMULUL:
267 	case AMULML:
268 	case AMULMUL:
269 	case AMSTEPL:
270 	case AMSTEPUL:
271 	case AMSTEPLL:
272 	case ADSTEPL:
273 	case ADSTEP0L:
274 	case ADSTEPLL:
275 	case ADSTEPRL:
276 		bitset(s->set.ireg, REGQ);
277 		bitset(s->used.ireg, REGQ);
278 		break;
279 	case AMOVD:
280 		sz = 8;
281 		ld = 1;
282 		if(p->from.type == D_REG && p->to.type == D_REG)
283 			break;
284 	case ALOADM:
285 	case ASTOREM:
286 		bitset(s->set.ireg, REGC);
287 		bitset(s->used.ireg, REGC);
288 		break;
289 	}
290 
291 /*
292  * flags based on 'to' field
293  */
294 	c = p->to.class;
295 	if(c == 0) {
296 		c = aclass(&p->to) + 1;
297 		p->to.class = c;
298 	}
299 	c--;
300 	switch(c) {
301 	default:
302 		print("unknown class %d %D\n", c, &p->to);
303 
304 	case C_ZCON:
305 	case C_SCON:
306 	case C_MCON:
307 	case C_PCON:
308 	case C_NCON:
309 	case C_LCON:
310 	case C_NONE:
311 	case C_SBRA:
312 	case C_LBRA:
313 		break;
314 
315 	case C_ZOREG:
316 	case C_SOREG:
317 	case C_MOREG:
318 	case C_POREG:
319 	case C_NOREG:
320 	case C_LOREG:
321 		c = p->to.reg;
322 		bitset(s->used.ireg, c);
323 		if(ad)
324 			break;
325 		s->size = sz;
326 		s->offset = regoff(&p->to);
327 
328 		m = ANYMEM;
329 		if(c == REGSP)
330 			m = E_MEMSP;
331 
332 		if(ar)
333 			s->used.cc |= m;
334 		else
335 			s->set.cc |= m;
336 		break;
337 
338 	case C_ZACON:
339 	case C_SACON:
340 	case C_MACON:
341 	case C_PACON:
342 	case C_NACON:
343 	case C_LACON:
344 		bitset(s->used.ireg, REGSP);
345 		break;
346 
347 	case C_REG:
348 		if(ar)
349 			bitset(s->used.ireg, p->to.reg);
350 		else
351 			bitset(s->set.ireg, p->to.reg);
352 		break;
353 
354 	case C_ZAUTO:
355 	case C_SAUTO:
356 	case C_MAUTO:
357 	case C_PAUTO:
358 	case C_NAUTO:
359 	case C_LAUTO:
360 		bitset(s->used.ireg, REGSP);
361 		if(ad)
362 			break;
363 		s->size = sz;
364 		s->offset = regoff(&p->to);
365 
366 		if(ar)
367 			s->used.cc |= E_MEMSP;
368 		else
369 			s->set.cc |= E_MEMSP;
370 		break;
371 	case C_SEXT:
372 	case C_LEXT:
373 		if(ad)
374 			break;
375 		s->size = sz;
376 		s->offset = regoff(&p->to);
377 
378 		if(ar)
379 			s->used.cc |= E_MEMSB;
380 		else
381 			s->set.cc |= E_MEMSB;
382 		break;
383 	}
384 
385 /*
386  * flags based on 'from' field
387  */
388 	c = p->from.class;
389 	if(c == 0) {
390 		c = aclass(&p->from) + 1;
391 		p->from.class = c;
392 	}
393 	c--;
394 	switch(c) {
395 	default:
396 		print("unknown class %d %D\n", c, &p->from);
397 
398 	case C_ZCON:
399 	case C_SCON:
400 	case C_MCON:
401 	case C_PCON:
402 	case C_NCON:
403 	case C_LCON:
404 	case C_NONE:
405 	case C_SBRA:
406 	case C_LBRA:
407 		break;
408 
409 	case C_ZOREG:
410 	case C_SOREG:
411 	case C_MOREG:
412 	case C_POREG:
413 	case C_NOREG:
414 	case C_LOREG:
415 		c = p->from.reg;
416 		bitset(s->used.ireg, c);
417 		if(ld)
418 			p->mark |= LOAD;
419 		s->size = sz;
420 		s->offset = regoff(&p->from);
421 
422 		m = ANYMEM;
423 		if(c == REGSP)
424 			m = E_MEMSP;
425 
426 		s->used.cc |= m;
427 		break;
428 	case C_ZACON:
429 	case C_SACON:
430 	case C_MACON:
431 	case C_PACON:
432 	case C_NACON:
433 	case C_LACON:
434 		bitset(s->used.ireg, REGSP);
435 		break;
436 	case C_REG:
437 		bitset(s->used.ireg, p->from.reg);
438 		if(isdouble)
439 			bitset(s->used.ireg, p->from.reg+1);
440 		break;
441 	case C_ZAUTO:
442 	case C_SAUTO:
443 	case C_MAUTO:
444 	case C_PAUTO:
445 	case C_NAUTO:
446 	case C_LAUTO:
447 		bitset(s->used.ireg, REGSP);
448 		if(ld)
449 			p->mark |= LOAD;
450 		if(ad)
451 			break;
452 		s->size = sz;
453 		s->offset = regoff(&p->from);
454 
455 		s->used.cc |= E_MEMSP;
456 		break;
457 	case C_SEXT:
458 	case C_LEXT:
459 		if(ld)
460 			p->mark |= LOAD;
461 		if(ad)
462 			break;
463 		s->size = sz;
464 		s->offset = regoff(&p->from);
465 
466 		s->used.cc |= E_MEMSB;
467 		break;
468 	}
469 
470 	c = p->reg;
471 	if(c != NREG) {
472 		if(isdouble) {
473 			bitset(s->used.ireg, c);
474 			bitset(s->used.ireg, c+1);
475 		} else
476 			bitset(s->used.ireg, c);
477 	}
478 }
479 
480 /*
481  * test to see if 2 instrictions can be
482  * interchanged without changing semantics
483  */
484 int
485 depend(Sch *sa, Sch *sb)
486 {
487 	ulong x;
488 	int i;
489 
490 	for(i=0; i<NRWORD; i++) {
491 		if(sa->set.ireg[i] & (sb->set.ireg[i]|sb->used.ireg[i]))
492 			return 1;
493 		if(sb->set.ireg[i] & sa->used.ireg[i])
494 			return 1;
495 	}
496 
497 	/*
498 	 * special case.
499 	 * loads from same address cannot pass.
500 	 * this is for hardware fifo's and the like
501 	 */
502 	if(sa->used.cc & sb->used.cc & E_MEM)
503 		if(sa->p.reg == sb->p.reg)
504 		if(regoff(&sa->p.from) == regoff(&sb->p.from))
505 			return 1;
506 
507 	x = (sa->set.cc & (sb->set.cc|sb->used.cc)) |
508 		(sb->set.cc & sa->used.cc);
509 	if(x) {
510 		/*
511 		 * allow SB and SP to pass each other.
512 		 * allow SB to pass SB iff doffsets are ok
513 		 * anything else conflicts
514 		 */
515 		if(x != E_MEMSP && x != E_MEMSB)
516 			return 1;
517 		x = sa->set.cc | sb->set.cc |
518 			sa->used.cc | sb->used.cc;
519 		if(x & E_MEM)
520 			return 1;
521 		if(offoverlap(sa, sb))
522 			return 1;
523 	}
524 
525 	return 0;
526 }
527 
528 int
529 offoverlap(Sch *sa, Sch *sb)
530 {
531 
532 	if(sa->offset < sb->offset) {
533 		if(sa->offset+sa->size > sb->offset)
534 			return 1;
535 		return 0;
536 	}
537 	if(sb->offset+sb->size > sa->offset)
538 		return 1;
539 	return 0;
540 }
541 
542 /*
543  * test 2 adjacent instructions
544  * and find out if inserted instructions
545  * are desired to prevent stalls.
546  */
547 int
548 conflict(Sch *sa, Sch *sb)
549 {
550 	int i;
551 
552 	for(i=0; i<NRWORD; i++)
553 		if(sa->set.ireg[i] & sb->used.ireg[i])
554 			return 1;
555 	if(sa->set.cc & sb->used.cc)
556 		return 1;
557 
558 	return 0;
559 }
560 
561 int
562 compound(Prog *p)
563 {
564 	Optab *o;
565 
566 	o = oplook(p);
567 	if(o->size != 4)
568 		return 1;
569 	return 0;
570 }
571 
572 void
573 dumpbits(Sch *s, Dep *d)
574 {
575 	int i;
576 
577 	for(i=0; i<NRWORD*32; i++)
578 		if(bittest(d->ireg, i))
579 			Bprint(&bso, " R%d", i);
580 	for(i=0; i<32; i++)
581 		switch(d->cc & (1<<i)) {
582 		default:
583 			Bprint(&bso, " CC%d", i);
584 		case 0:
585 			break;
586 		case E_MEM:
587 			Bprint(&bso, " MEM.%d", s->size);
588 			break;
589 		case E_MEMSB:
590 			Bprint(&bso, " SB.%ld.%d", s->offset, s->size);
591 			break;
592 		case E_MEMSP:
593 			Bprint(&bso, " SP.%ld.%d", s->offset, s->size);
594 			break;
595 		}
596 }
597 
598 void
599 bitset(ulong *b, int r)
600 {
601 
602 	b[r>>5] |= 1<<(r&31);
603 }
604 
605 int
606 bittest(ulong *b, int r)
607 {
608 
609 	if(b[r>>5] & 1<<(r&31))
610 		return 1;
611 	return 0;
612 }
613