1 #include "l.h"
2
3 enum
4 {
5 E_HILO = 1<<0,
6 E_FCR = 1<<1,
7 E_MCR = 1<<2,
8 E_MEM = 1<<3,
9 E_MEMSP = 1<<4, /* uses offset and size */
10 E_MEMSB = 1<<5, /* uses offset and size */
11 ANYMEM = E_MEM|E_MEMSP|E_MEMSB,
12 DELAY = BRANCH|LOAD|FCMP,
13 };
14
15 typedef struct Sch Sch;
16 typedef struct Dep Dep;
17
18 struct Dep
19 {
20 ulong ireg;
21 ulong freg;
22 ulong cc;
23 };
24 struct Sch
25 {
26 Prog p;
27 Dep set;
28 Dep used;
29 long soffset;
30 char size;
31 char nop;
32 char comp;
33 };
34
35 void markregused(Sch*, Prog*);
36 int depend(Sch*, Sch*);
37 int conflict(Sch*, Sch*);
38 int offoverlap(Sch*, Sch*);
39 void dumpbits(Sch*, Dep*);
40
41 void
sched(Prog * p0,Prog * pe)42 sched(Prog *p0, Prog *pe)
43 {
44 Prog *p, *q;
45 Sch sch[NSCHED], *s, *t, *u, *se, stmp;
46
47 /*
48 * build side structure
49 */
50 s = sch;
51 for(p=p0;; p=p->link) {
52 memset(s, 0, sizeof(*s));
53 s->p = *p;
54 markregused(s, p);
55 if(debug['X']) {
56 Bprint(&bso, "%P\t\tset", &s->p);
57 dumpbits(s, &s->set);
58 Bprint(&bso, "; used");
59 dumpbits(s, &s->used);
60 if(s->comp)
61 Bprint(&bso, "; compound");
62 if(s->p.mark & LOAD)
63 Bprint(&bso, "; load");
64 if(s->p.mark & BRANCH)
65 Bprint(&bso, "; branch");
66 if(s->p.mark & FCMP)
67 Bprint(&bso, "; fcmp");
68 Bprint(&bso, "\n");
69 }
70 if(p == pe)
71 break;
72 s++;
73 }
74 se = s;
75
76 /*
77 * prepass to move things around
78 * does nothing, but tries to make
79 * the actual scheduler work better
80 */
81 for(s=sch; s<=se; s++) {
82 if(!(s->p.mark & LOAD))
83 continue;
84 /* always good to put nonconflict loads together */
85 for(t=s+1; t<=se; t++) {
86 if(!(t->p.mark & LOAD))
87 continue;
88 if(t->p.mark & BRANCH)
89 break;
90 if(conflict(s, t))
91 break;
92 for(u=t-1; u>s; u--)
93 if(depend(u, t))
94 goto no11;
95 u = s+1;
96 stmp = *t;
97 memmove(s+2, u, (uchar*)t - (uchar*)u);
98 *u = stmp;
99 break;
100 }
101 no11:
102
103 /* put schedule fodder above load */
104 for(t=s+1; t<=se; t++) {
105 if(t->p.mark & BRANCH)
106 break;
107 if(s > sch && conflict(s-1, t))
108 continue;
109 for(u=t-1; u>=s; u--)
110 if(depend(t, u))
111 goto no1;
112 stmp = *t;
113 memmove(s+1, s, (uchar*)t - (uchar*)s);
114 *s = stmp;
115 if(!(s->p.mark & LOAD))
116 break;
117 no1:;
118 }
119 }
120
121 for(s=se; s>=sch; s--) {
122 if(!(s->p.mark & DELAY))
123 continue;
124 if(s < se)
125 if(!conflict(s, s+1))
126 goto out3;
127 /*
128 * s is load, s+1 is immediate use of result or end of block
129 * t is the trial instruction to insert between s and s+1
130 */
131 if(!debug['Y'])
132 for(t=s-1; t>=sch; t--) {
133 if(t->comp)
134 if(s->p.mark & BRANCH)
135 goto no2;
136 if(t->p.mark & DELAY)
137 if(s >= se || conflict(t, s+1))
138 goto no2;
139 for(u=t+1; u<=s; u++)
140 if(depend(u, t))
141 goto no2;
142 goto out2;
143 no2:;
144 }
145 if(debug['X'])
146 Bprint(&bso, "?l%P\n", &s->p);
147 if(s->p.mark & BRANCH)
148 s->nop = 1;
149 if(debug['v']) {
150 if(s->p.mark & LOAD) {
151 nop.load.count++;
152 nop.load.outof++;
153 }
154 if(s->p.mark & BRANCH) {
155 nop.branch.count++;
156 nop.branch.outof++;
157 }
158 if(s->p.mark & FCMP) {
159 nop.fcmp.count++;
160 nop.fcmp.outof++;
161 }
162 }
163 continue;
164
165 out2:
166 if(debug['X']) {
167 Bprint(&bso, "!l%P\n", &t->p);
168 Bprint(&bso, "%P\n", &s->p);
169 }
170 stmp = *t;
171 memmove(t, t+1, (uchar*)s - (uchar*)t);
172 *s = stmp;
173 s--;
174
175 out3:
176 if(debug['v']) {
177 if(s->p.mark & LOAD)
178 nop.load.outof++;
179 if(s->p.mark & BRANCH)
180 nop.branch.outof++;
181 if(s->p.mark & FCMP)
182 nop.fcmp.outof++;
183 }
184 }
185
186 /* Avoid HI/LO use->set */
187 t = sch+1;
188 for(s=sch; s<se-1; s++, t++) {
189 if((s->used.cc & E_HILO) == 0)
190 continue;
191 if(t->set.cc & E_HILO)
192 s->nop = 2;
193 }
194
195 /*
196 * put it all back
197 */
198 for(s=sch, p=p0; s<=se; s++, p=q) {
199 q = p->link;
200 if(q != s->p.link) {
201 *p = s->p;
202 p->link = q;
203 }
204 while(s->nop--)
205 addnop(p);
206 }
207 if(debug['X']) {
208 Bprint(&bso, "\n");
209 Bflush(&bso);
210 }
211 }
212
213 void
markregused(Sch * s,Prog * realp)214 markregused(Sch *s, Prog *realp)
215 {
216 int c, ar, ad, ld, sz;
217 ulong m;
218 Prog *p;
219
220 p = &s->p;
221 s->comp = compound(p);
222 s->nop = 0;
223 if(s->comp) {
224 s->set.ireg |= 1<<REGTMP;
225 s->used.ireg |= 1<<REGTMP;
226 }
227
228 ar = 0; /* dest is really reference */
229 ad = 0; /* source/dest is really address */
230 ld = 0; /* opcode is load instruction */
231 sz = 20; /* size of load/store for overlap computation */
232
233 /*
234 * flags based on opcode
235 */
236 switch(p->as) {
237 case ATEXT:
238 curtext = realp;
239 autosize = p->to.offset + 8;
240 ad = 1;
241 break;
242 case AJAL:
243 c = p->reg;
244 if(c == NREG)
245 c = REGLINK;
246 s->set.ireg |= 1<<c;
247 ar = 1;
248 ad = 1;
249 break;
250 case ABGEZAL:
251 case ABLTZAL:
252 s->set.ireg |= 1<<REGLINK;
253 case ABEQ:
254 case ABGEZ:
255 case ABGTZ:
256 case ABLEZ:
257 case ABLTZ:
258 case ABNE:
259 ar = 1;
260 ad = 1;
261 break;
262 case ABFPT:
263 case ABFPF:
264 ad = 1;
265 s->used.cc |= E_FCR;
266 break;
267 case ACMPEQD:
268 case ACMPEQF:
269 case ACMPGED:
270 case ACMPGEF:
271 case ACMPGTD:
272 case ACMPGTF:
273 ar = 1;
274 s->set.cc |= E_FCR;
275 p->mark |= FCMP;
276 break;
277 case AJMP:
278 ar = 1;
279 ad = 1;
280 break;
281 case AMOVB:
282 case AMOVBU:
283 sz = 1;
284 ld = 1;
285 break;
286 case AMOVH:
287 case AMOVHU:
288 sz = 2;
289 ld = 1;
290 break;
291 case AMOVF:
292 case AMOVW:
293 case AMOVWL:
294 case AMOVWR:
295 sz = 4;
296 ld = 1;
297 break;
298 case AMOVD:
299 case AMOVV:
300 case AMOVVL:
301 case AMOVVR:
302 sz = 8;
303 ld = 1;
304 break;
305 case ADIV:
306 case ADIVU:
307 case AMUL:
308 case AMULU:
309 case AREM:
310 case AREMU:
311 case ADIVV:
312 case ADIVVU:
313 case AMULV:
314 case AMULVU:
315 case AREMV:
316 case AREMVU:
317 s->set.cc = E_HILO;
318 case AADD:
319 case AADDU:
320 case AADDV:
321 case AADDVU:
322 case AAND:
323 case ANOR:
324 case AOR:
325 case ASGT:
326 case ASGTU:
327 case ASLL:
328 case ASRA:
329 case ASRL:
330 case ASLLV:
331 case ASRAV:
332 case ASRLV:
333 case ASUB:
334 case ASUBU:
335 case ASUBV:
336 case ASUBVU:
337 case AXOR:
338
339 case AADDD:
340 case AADDF:
341 case AADDW:
342 case ASUBD:
343 case ASUBF:
344 case ASUBW:
345 case AMULF:
346 case AMULD:
347 case AMULW:
348 case ADIVF:
349 case ADIVD:
350 case ADIVW:
351 if(p->reg == NREG) {
352 if(p->to.type == D_REG || p->to.type == D_FREG)
353 p->reg = p->to.reg;
354 if(p->reg == NREG)
355 print("botch %P\n", p);
356 }
357 break;
358 }
359
360 /*
361 * flags based on 'to' field
362 */
363 c = p->to.class;
364 if(c == 0) {
365 c = aclass(&p->to) + 1;
366 p->to.class = c;
367 }
368 c--;
369 switch(c) {
370 default:
371 print("unknown class %d %D\n", c, &p->to);
372
373 case C_ZCON:
374 case C_SCON:
375 case C_ADD0CON:
376 case C_AND0CON:
377 case C_ADDCON:
378 case C_ANDCON:
379 case C_UCON:
380 case C_LCON:
381 case C_NONE:
382 case C_SBRA:
383 case C_LBRA:
384 break;
385
386 case C_HI:
387 case C_LO:
388 s->set.cc |= E_HILO;
389 break;
390 case C_FCREG:
391 s->set.cc |= E_FCR;
392 break;
393 case C_MREG:
394 s->set.cc |= E_MCR;
395 break;
396 case C_ZOREG:
397 case C_SOREG:
398 case C_LOREG:
399 c = p->to.reg;
400 s->used.ireg |= 1<<c;
401 if(ad)
402 break;
403 s->size = sz;
404 s->soffset = regoff(&p->to);
405
406 m = ANYMEM;
407 if(c == REGSB)
408 m = E_MEMSB;
409 if(c == REGSP)
410 m = E_MEMSP;
411
412 if(ar)
413 s->used.cc |= m;
414 else
415 s->set.cc |= m;
416 break;
417 case C_SACON:
418 case C_LACON:
419 s->used.ireg |= 1<<REGSP;
420 break;
421 case C_SECON:
422 case C_LECON:
423 s->used.ireg |= 1<<REGSB;
424 break;
425 case C_REG:
426 if(ar)
427 s->used.ireg |= 1<<p->to.reg;
428 else
429 s->set.ireg |= 1<<p->to.reg;
430 break;
431 case C_FREG:
432 /* do better -- determine double prec */
433 if(ar) {
434 s->used.freg |= 1<<p->to.reg;
435 s->used.freg |= 1<<(p->to.reg|1);
436 } else {
437 s->set.freg |= 1<<p->to.reg;
438 s->set.freg |= 1<<(p->to.reg|1);
439 }
440 if(ld && p->from.type == D_REG)
441 p->mark |= LOAD;
442 break;
443 case C_SAUTO:
444 case C_LAUTO:
445 s->used.ireg |= 1<<REGSP;
446 if(ad)
447 break;
448 s->size = sz;
449 s->soffset = regoff(&p->to);
450
451 if(ar)
452 s->used.cc |= E_MEMSP;
453 else
454 s->set.cc |= E_MEMSP;
455 break;
456 case C_SEXT:
457 case C_LEXT:
458 s->used.ireg |= 1<<REGSB;
459 if(ad)
460 break;
461 s->size = sz;
462 s->soffset = regoff(&p->to);
463
464 if(ar)
465 s->used.cc |= E_MEMSB;
466 else
467 s->set.cc |= E_MEMSB;
468 break;
469 }
470
471 /*
472 * flags based on 'from' field
473 */
474 c = p->from.class;
475 if(c == 0) {
476 c = aclass(&p->from) + 1;
477 p->from.class = c;
478 }
479 c--;
480 switch(c) {
481 default:
482 print("unknown class %d %D\n", c, &p->from);
483
484 case C_ZCON:
485 case C_SCON:
486 case C_ADD0CON:
487 case C_AND0CON:
488 case C_ADDCON:
489 case C_ANDCON:
490 case C_UCON:
491 case C_LCON:
492 case C_NONE:
493 case C_SBRA:
494 case C_LBRA:
495 break;
496 case C_HI:
497 case C_LO:
498 s->used.cc |= E_HILO;
499 break;
500 case C_FCREG:
501 s->used.cc |= E_FCR;
502 break;
503 case C_MREG:
504 s->used.cc |= E_MCR;
505 break;
506 case C_ZOREG:
507 case C_SOREG:
508 case C_LOREG:
509 c = p->from.reg;
510 s->used.ireg |= 1<<c;
511 if(ld)
512 p->mark |= LOAD;
513 s->size = sz;
514 s->soffset = regoff(&p->from);
515
516 m = ANYMEM;
517 if(c == REGSB)
518 m = E_MEMSB;
519 if(c == REGSP)
520 m = E_MEMSP;
521
522 s->used.cc |= m;
523 break;
524 case C_SACON:
525 case C_LACON:
526 s->used.ireg |= 1<<REGSP;
527 break;
528 case C_SECON:
529 case C_LECON:
530 s->used.ireg |= 1<<REGSB;
531 break;
532 case C_REG:
533 s->used.ireg |= 1<<p->from.reg;
534 break;
535 case C_FREG:
536 /* do better -- determine double prec */
537 s->used.freg |= 1<<p->from.reg;
538 s->used.freg |= 1<<(p->from.reg|1);
539 if(ld && p->to.type == D_REG)
540 p->mark |= LOAD;
541 break;
542 case C_SAUTO:
543 case C_LAUTO:
544 s->used.ireg |= 1<<REGSP;
545 if(ld)
546 p->mark |= LOAD;
547 if(ad)
548 break;
549 s->size = sz;
550 s->soffset = regoff(&p->from);
551
552 s->used.cc |= E_MEMSP;
553 break;
554 case C_SEXT:
555 case C_LEXT:
556 s->used.ireg |= 1<<REGSB;
557 if(ld)
558 p->mark |= LOAD;
559 if(ad)
560 break;
561 s->size = sz;
562 s->soffset = regoff(&p->from);
563
564 s->used.cc |= E_MEMSB;
565 break;
566 }
567
568 c = p->reg;
569 if(c != NREG) {
570 if(p->from.type == D_FREG || p->to.type == D_FREG) {
571 s->used.freg |= 1<<c;
572 s->used.freg |= 1<<(c|1);
573 } else
574 s->used.ireg |= 1<<c;
575 }
576 s->set.ireg &= ~(1<<REGZERO); /* R0 cant be set */
577 }
578
579 /*
580 * test to see if 2 instrictions can be
581 * interchanged without changing semantics
582 */
583 int
depend(Sch * sa,Sch * sb)584 depend(Sch *sa, Sch *sb)
585 {
586 ulong x;
587
588 if(sa->set.ireg & (sb->set.ireg|sb->used.ireg))
589 return 1;
590 if(sb->set.ireg & sa->used.ireg)
591 return 1;
592
593 if(sa->set.freg & (sb->set.freg|sb->used.freg))
594 return 1;
595 if(sb->set.freg & sa->used.freg)
596 return 1;
597
598 /*
599 * special case.
600 * loads from same address cannot pass.
601 * this is for hardware fifo's and the like
602 */
603 if(sa->used.cc & sb->used.cc & E_MEM)
604 if(sa->p.reg == sb->p.reg)
605 if(regoff(&sa->p.from) == regoff(&sb->p.from))
606 return 1;
607
608 x = (sa->set.cc & (sb->set.cc|sb->used.cc)) |
609 (sb->set.cc & sa->used.cc);
610 if(x) {
611 /*
612 * allow SB and SP to pass each other.
613 * allow SB to pass SB iff doffsets are ok
614 * anything else conflicts
615 */
616 if(x != E_MEMSP && x != E_MEMSB)
617 return 1;
618 x = sa->set.cc | sb->set.cc |
619 sa->used.cc | sb->used.cc;
620 if(x & E_MEM)
621 return 1;
622 if(offoverlap(sa, sb))
623 return 1;
624 }
625
626 return 0;
627 }
628
629 int
offoverlap(Sch * sa,Sch * sb)630 offoverlap(Sch *sa, Sch *sb)
631 {
632
633 if(sa->soffset < sb->soffset) {
634 if(sa->soffset+sa->size > sb->soffset)
635 return 1;
636 return 0;
637 }
638 if(sb->soffset+sb->size > sa->soffset)
639 return 1;
640 return 0;
641 }
642
643 /*
644 * test 2 adjacent instructions
645 * and find out if inserted instructions
646 * are desired to prevent stalls.
647 */
648 int
conflict(Sch * sa,Sch * sb)649 conflict(Sch *sa, Sch *sb)
650 {
651
652 if(sa->set.ireg & sb->used.ireg)
653 return 1;
654 if(sa->set.freg & sb->used.freg)
655 return 1;
656 if(sa->set.cc & sb->used.cc)
657 return 1;
658
659 return 0;
660 }
661
662 int
compound(Prog * p)663 compound(Prog *p)
664 {
665 Optab *o;
666
667 o = oplook(p);
668 if(o->size != 4)
669 return 1;
670 if(p->to.type == D_REG && p->to.reg == REGSB)
671 return 1;
672 return 0;
673 }
674
675 void
dumpbits(Sch * s,Dep * d)676 dumpbits(Sch *s, Dep *d)
677 {
678 int i;
679
680 for(i=0; i<32; i++)
681 if(d->ireg & (1<<i))
682 Bprint(&bso, " R%d", i);
683 for(i=0; i<32; i++)
684 if(d->freg & (1<<i))
685 Bprint(&bso, " F%d", i);
686 for(i=0; i<32; i++)
687 switch(d->cc & (1<<i)) {
688 default:
689 break;
690 case E_HILO:
691 Bprint(&bso, " HILO");
692 break;
693 case E_FCR:
694 Bprint(&bso, " FCR");
695 break;
696 case E_MCR:
697 Bprint(&bso, " MCR");
698 break;
699 case E_MEM:
700 Bprint(&bso, " MEM%d", s->size);
701 break;
702 case E_MEMSB:
703 Bprint(&bso, " SB%d", s->size);
704 break;
705 case E_MEMSP:
706 Bprint(&bso, " SP%d", s->size);
707 break;
708 }
709 }
710