1 #include "l.h"
2
3 enum
4 {
5 E_HILO = 1<<0,
6 E_FCR = 1<<1,
7 E_MCR = 1<<2,
8 E_MEM = 1<<3,
9 E_MEMSP = 1<<4, /* uses offset and size */
10 E_MEMSB = 1<<5, /* uses offset and size */
11 ANYMEM = E_MEM|E_MEMSP|E_MEMSB,
12 DELAY = BRANCH|LOAD|FCMP,
13 };
14
15 typedef struct Sch Sch;
16 typedef struct Dep Dep;
17
18 struct Dep
19 {
20 ulong ireg;
21 ulong freg;
22 ulong cc;
23 };
24 struct Sch
25 {
26 Prog p;
27 Dep set;
28 Dep used;
29 long soffset;
30 char size;
31 char nop;
32 char comp;
33 };
34
35 void regsused(Sch*, Prog*);
36 int depend(Sch*, Sch*);
37 int conflict(Sch*, Sch*);
38 int offoverlap(Sch*, Sch*);
39 void dumpbits(Sch*, Dep*);
40
41 void
sched(Prog * p0,Prog * pe)42 sched(Prog *p0, Prog *pe)
43 {
44 Prog *p, *q;
45 Sch sch[NSCHED], *s, *t, *u, *se, stmp;
46
47 /*
48 * build side structure
49 */
50 s = sch;
51 for(p=p0;; p=p->link) {
52 memset(s, 0, sizeof(*s));
53 s->p = *p;
54 regsused(s, p);
55 if(debug['X']) {
56 Bprint(&bso, "%P\t\tset", &s->p);
57 dumpbits(s, &s->set);
58 Bprint(&bso, "; used");
59 dumpbits(s, &s->used);
60 if(s->comp)
61 Bprint(&bso, "; compound");
62 if(s->p.mark & LOAD)
63 Bprint(&bso, "; load");
64 if(s->p.mark & BRANCH)
65 Bprint(&bso, "; branch");
66 if(s->p.mark & FCMP)
67 Bprint(&bso, "; fcmp");
68 Bprint(&bso, "\n");
69 }
70 if(p == pe)
71 break;
72 s++;
73 }
74 se = s;
75
76 /*
77 * prepass to move things around
78 * does nothing, but tries to make
79 * the actual scheduler work better
80 */
81 for(s=sch; s<=se; s++) {
82 if(!(s->p.mark & LOAD))
83 continue;
84 /* always good to put nonconflict loads together */
85 for(t=s+1; t<=se; t++) {
86 if(!(t->p.mark & LOAD))
87 continue;
88 if(t->p.mark & BRANCH)
89 break;
90 if(conflict(s, t))
91 break;
92 for(u=t-1; u>s; u--)
93 if(depend(u, t))
94 goto no11;
95 u = s+1;
96 stmp = *t;
97 memmove(s+2, u, (uchar*)t - (uchar*)u);
98 *u = stmp;
99 break;
100 }
101 no11:
102
103 /* put schedule fodder above load */
104 for(t=s+1; t<=se; t++) {
105 if(t->p.mark & BRANCH)
106 break;
107 if(s > sch && conflict(s-1, t))
108 continue;
109 for(u=t-1; u>=s; u--)
110 if(depend(t, u))
111 goto no1;
112 stmp = *t;
113 memmove(s+1, s, (uchar*)t - (uchar*)s);
114 *s = stmp;
115 if(!(s->p.mark & LOAD))
116 break;
117 no1:;
118 }
119 }
120
121 for(s=se; s>=sch; s--) {
122 if(!(s->p.mark & DELAY))
123 continue;
124 if(s < se)
125 if(!conflict(s, s+1))
126 goto out3;
127 /*
128 * s is load, s+1 is immediate use of result or end of block
129 * t is the trial instruction to insert between s and s+1
130 */
131 if(!debug['Y'])
132 for(t=s-1; t>=sch; t--) {
133 if(t->comp)
134 if(s->p.mark & BRANCH)
135 goto no2;
136 if(t->p.mark & DELAY)
137 if(s >= se || conflict(t, s+1))
138 goto no2;
139 for(u=t+1; u<=s; u++)
140 if(depend(u, t))
141 goto no2;
142 goto out2;
143 no2:;
144 }
145 if(debug['X'])
146 Bprint(&bso, "?l%P\n", &s->p);
147 s->nop = 1;
148 if(debug['v']) {
149 if(s->p.mark & LOAD) {
150 nop.load.count++;
151 nop.load.outof++;
152 }
153 if(s->p.mark & BRANCH) {
154 nop.branch.count++;
155 nop.branch.outof++;
156 }
157 if(s->p.mark & FCMP) {
158 nop.fcmp.count++;
159 nop.fcmp.outof++;
160 }
161 }
162 continue;
163
164 out2:
165 if(debug['X']) {
166 Bprint(&bso, "!l%P\n", &t->p);
167 Bprint(&bso, "%P\n", &s->p);
168 }
169 stmp = *t;
170 memmove(t, t+1, (uchar*)s - (uchar*)t);
171 *s = stmp;
172 s--;
173
174 out3:
175 if(debug['v']) {
176 if(s->p.mark & LOAD)
177 nop.load.outof++;
178 if(s->p.mark & BRANCH)
179 nop.branch.outof++;
180 if(s->p.mark & FCMP)
181 nop.fcmp.outof++;
182 }
183 }
184
185 /* Avoid HI/LO use->set */
186 t = sch+1;
187 for(s=sch; s<se-1; s++, t++) {
188 if((s->used.cc & E_HILO) == 0)
189 continue;
190 if(t->set.cc & E_HILO)
191 s->nop = 2;
192 }
193
194 /*
195 * put it all back
196 */
197 for(s=sch, p=p0; s<=se; s++, p=q) {
198 q = p->link;
199 if(q != s->p.link) {
200 *p = s->p;
201 p->link = q;
202 }
203 while(s->nop--)
204 addnop(p);
205 }
206 if(debug['X']) {
207 Bprint(&bso, "\n");
208 Bflush(&bso);
209 }
210 }
211
212 void
regsused(Sch * s,Prog * realp)213 regsused(Sch *s, Prog *realp)
214 {
215 int c, ar, ad, ld, sz;
216 ulong m;
217 Prog *p;
218
219 p = &s->p;
220 s->comp = compound(p);
221 s->nop = 0;
222 if(s->comp) {
223 s->set.ireg |= 1<<REGTMP;
224 s->used.ireg |= 1<<REGTMP;
225 }
226
227 ar = 0; /* dest is really reference */
228 ad = 0; /* source/dest is really address */
229 ld = 0; /* opcode is load instruction */
230 sz = 20; /* size of load/store for overlap computation */
231
232 /*
233 * flags based on opcode
234 */
235 switch(p->as) {
236 case ATEXT:
237 curtext = realp;
238 autosize = p->to.offset + 8;
239 ad = 1;
240 break;
241 case AJAL:
242 c = p->reg;
243 if(c == NREG)
244 c = REGLINK;
245 s->set.ireg |= 1<<c;
246 ar = 1;
247 ad = 1;
248 break;
249 case ABGEZAL:
250 case ABLTZAL:
251 s->set.ireg |= 1<<REGLINK;
252 case ABEQ:
253 case ABGEZ:
254 case ABGTZ:
255 case ABLEZ:
256 case ABLTZ:
257 case ABNE:
258 ar = 1;
259 ad = 1;
260 break;
261 case ABFPT:
262 case ABFPF:
263 ad = 1;
264 s->used.cc |= E_FCR;
265 break;
266 case ACMPEQD:
267 case ACMPEQF:
268 case ACMPGED:
269 case ACMPGEF:
270 case ACMPGTD:
271 case ACMPGTF:
272 ar = 1;
273 s->set.cc |= E_FCR;
274 p->mark |= FCMP;
275 break;
276 case AJMP:
277 ar = 1;
278 ad = 1;
279 break;
280 case AMOVB:
281 case AMOVBU:
282 sz = 1;
283 ld = 1;
284 break;
285 case AMOVH:
286 case AMOVHU:
287 sz = 2;
288 ld = 1;
289 break;
290 case AMOVF:
291 case AMOVW:
292 case AMOVWL:
293 case AMOVWR:
294 sz = 4;
295 ld = 1;
296 break;
297 case AMOVD:
298 case AMOVV:
299 case AMOVVL:
300 case AMOVVR:
301 sz = 8;
302 ld = 1;
303 break;
304 case ADIV:
305 case ADIVU:
306 case AMUL:
307 case AMULU:
308 case AREM:
309 case AREMU:
310 case ADIVV:
311 case ADIVVU:
312 case AMULV:
313 case AMULVU:
314 case AREMV:
315 case AREMVU:
316 s->set.cc = E_HILO;
317 case AADD:
318 case AADDU:
319 case AADDV:
320 case AADDVU:
321 case AAND:
322 case ANOR:
323 case AOR:
324 case ASGT:
325 case ASGTU:
326 case ASLL:
327 case ASRA:
328 case ASRL:
329 case ASLLV:
330 case ASRAV:
331 case ASRLV:
332 case ASUB:
333 case ASUBU:
334 case ASUBV:
335 case ASUBVU:
336 case AXOR:
337
338 case AADDD:
339 case AADDF:
340 case AADDW:
341 case ASUBD:
342 case ASUBF:
343 case ASUBW:
344 case AMULF:
345 case AMULD:
346 case AMULW:
347 case ADIVF:
348 case ADIVD:
349 case ADIVW:
350 if(p->reg == NREG) {
351 if(p->to.type == D_REG || p->to.type == D_FREG)
352 p->reg = p->to.reg;
353 if(p->reg == NREG)
354 print("botch %P\n", p);
355 }
356 break;
357 }
358
359 /*
360 * flags based on 'to' field
361 */
362 c = p->to.class;
363 if(c == 0) {
364 c = aclass(&p->to) + 1;
365 p->to.class = c;
366 }
367 c--;
368 switch(c) {
369 default:
370 print("unknown class %d %D\n", c, &p->to);
371
372 case C_ZCON:
373 case C_SCON:
374 case C_ADD0CON:
375 case C_AND0CON:
376 case C_ADDCON:
377 case C_ANDCON:
378 case C_UCON:
379 case C_LCON:
380 case C_NONE:
381 case C_SBRA:
382 case C_LBRA:
383 break;
384
385 case C_HI:
386 case C_LO:
387 s->set.cc |= E_HILO;
388 break;
389 case C_FCREG:
390 s->set.cc |= E_FCR;
391 break;
392 case C_MREG:
393 s->set.cc |= E_MCR;
394 break;
395 case C_ZOREG:
396 case C_SOREG:
397 case C_LOREG:
398 c = p->to.reg;
399 s->used.ireg |= 1<<c;
400 if(ad)
401 break;
402 s->size = sz;
403 s->soffset = regoff(&p->to);
404
405 m = ANYMEM;
406 if(c == REGSB)
407 m = E_MEMSB;
408 if(c == REGSP)
409 m = E_MEMSP;
410
411 if(ar)
412 s->used.cc |= m;
413 else
414 s->set.cc |= m;
415 break;
416 case C_SACON:
417 case C_LACON:
418 s->used.ireg |= 1<<REGSP;
419 break;
420 case C_SECON:
421 case C_LECON:
422 s->used.ireg |= 1<<REGSB;
423 break;
424 case C_REG:
425 if(ar)
426 s->used.ireg |= 1<<p->to.reg;
427 else
428 s->set.ireg |= 1<<p->to.reg;
429 break;
430 case C_FREG:
431 /* do better -- determine double prec */
432 if(ar) {
433 s->used.freg |= 1<<p->to.reg;
434 s->used.freg |= 1<<(p->to.reg|1);
435 } else {
436 s->set.freg |= 1<<p->to.reg;
437 s->set.freg |= 1<<(p->to.reg|1);
438 }
439 if(ld && p->from.type == D_REG)
440 p->mark |= LOAD;
441 break;
442 case C_SAUTO:
443 case C_LAUTO:
444 s->used.ireg |= 1<<REGSP;
445 if(ad)
446 break;
447 s->size = sz;
448 s->soffset = regoff(&p->to);
449
450 if(ar)
451 s->used.cc |= E_MEMSP;
452 else
453 s->set.cc |= E_MEMSP;
454 break;
455 case C_SEXT:
456 case C_LEXT:
457 s->used.ireg |= 1<<REGSB;
458 if(ad)
459 break;
460 s->size = sz;
461 s->soffset = regoff(&p->to);
462
463 if(ar)
464 s->used.cc |= E_MEMSB;
465 else
466 s->set.cc |= E_MEMSB;
467 break;
468 }
469
470 /*
471 * flags based on 'from' field
472 */
473 c = p->from.class;
474 if(c == 0) {
475 c = aclass(&p->from) + 1;
476 p->from.class = c;
477 }
478 c--;
479 switch(c) {
480 default:
481 print("unknown class %d %D\n", c, &p->from);
482
483 case C_ZCON:
484 case C_SCON:
485 case C_ADD0CON:
486 case C_AND0CON:
487 case C_ADDCON:
488 case C_ANDCON:
489 case C_UCON:
490 case C_LCON:
491 case C_NONE:
492 case C_SBRA:
493 case C_LBRA:
494 break;
495 case C_HI:
496 case C_LO:
497 s->used.cc |= E_HILO;
498 break;
499 case C_FCREG:
500 s->used.cc |= E_FCR;
501 break;
502 case C_MREG:
503 s->used.cc |= E_MCR;
504 break;
505 case C_ZOREG:
506 case C_SOREG:
507 case C_LOREG:
508 c = p->from.reg;
509 s->used.ireg |= 1<<c;
510 if(ld)
511 p->mark |= LOAD;
512 s->size = sz;
513 s->soffset = regoff(&p->from);
514
515 m = ANYMEM;
516 if(c == REGSB)
517 m = E_MEMSB;
518 if(c == REGSP)
519 m = E_MEMSP;
520
521 s->used.cc |= m;
522 break;
523 case C_SACON:
524 case C_LACON:
525 s->used.ireg |= 1<<REGSP;
526 break;
527 case C_SECON:
528 case C_LECON:
529 s->used.ireg |= 1<<REGSB;
530 break;
531 case C_REG:
532 s->used.ireg |= 1<<p->from.reg;
533 break;
534 case C_FREG:
535 /* do better -- determine double prec */
536 s->used.freg |= 1<<p->from.reg;
537 s->used.freg |= 1<<(p->from.reg|1);
538 if(ld && p->to.type == D_REG)
539 p->mark |= LOAD;
540 break;
541 case C_SAUTO:
542 case C_LAUTO:
543 s->used.ireg |= 1<<REGSP;
544 if(ld)
545 p->mark |= LOAD;
546 if(ad)
547 break;
548 s->size = sz;
549 s->soffset = regoff(&p->from);
550
551 s->used.cc |= E_MEMSP;
552 break;
553 case C_SEXT:
554 case C_LEXT:
555 s->used.ireg |= 1<<REGSB;
556 if(ld)
557 p->mark |= LOAD;
558 if(ad)
559 break;
560 s->size = sz;
561 s->soffset = regoff(&p->from);
562
563 s->used.cc |= E_MEMSB;
564 break;
565 }
566
567 c = p->reg;
568 if(c != NREG) {
569 if(p->from.type == D_FREG || p->to.type == D_FREG) {
570 s->used.freg |= 1<<c;
571 s->used.freg |= 1<<(c|1);
572 } else
573 s->used.ireg |= 1<<c;
574 }
575 s->set.ireg &= ~(1<<REGZERO); /* R0 cant be set */
576 }
577
578 /*
579 * test to see if 2 instrictions can be
580 * interchanged without changing semantics
581 */
582 int
depend(Sch * sa,Sch * sb)583 depend(Sch *sa, Sch *sb)
584 {
585 ulong x;
586
587 if(sa->set.ireg & (sb->set.ireg|sb->used.ireg))
588 return 1;
589 if(sb->set.ireg & sa->used.ireg)
590 return 1;
591
592 if(sa->set.freg & (sb->set.freg|sb->used.freg))
593 return 1;
594 if(sb->set.freg & sa->used.freg)
595 return 1;
596
597 /*
598 * special case.
599 * loads from same address cannot pass.
600 * this is for hardware fifo's and the like
601 */
602 if(sa->used.cc & sb->used.cc & E_MEM)
603 if(sa->p.reg == sb->p.reg)
604 if(regoff(&sa->p.from) == regoff(&sb->p.from))
605 return 1;
606
607 x = (sa->set.cc & (sb->set.cc|sb->used.cc)) |
608 (sb->set.cc & sa->used.cc);
609 if(x) {
610 /*
611 * allow SB and SP to pass each other.
612 * allow SB to pass SB iff doffsets are ok
613 * anything else conflicts
614 */
615 if(x != E_MEMSP && x != E_MEMSB)
616 return 1;
617 x = sa->set.cc | sb->set.cc |
618 sa->used.cc | sb->used.cc;
619 if(x & E_MEM)
620 return 1;
621 if(offoverlap(sa, sb))
622 return 1;
623 }
624
625 return 0;
626 }
627
628 int
offoverlap(Sch * sa,Sch * sb)629 offoverlap(Sch *sa, Sch *sb)
630 {
631
632 if(sa->soffset < sb->soffset) {
633 if(sa->soffset+sa->size > sb->soffset)
634 return 1;
635 return 0;
636 }
637 if(sb->soffset+sb->size > sa->soffset)
638 return 1;
639 return 0;
640 }
641
642 /*
643 * test 2 adjacent instructions
644 * and find out if inserted instructions
645 * are desired to prevent stalls.
646 */
647 int
conflict(Sch * sa,Sch * sb)648 conflict(Sch *sa, Sch *sb)
649 {
650
651 if(sa->set.ireg & sb->used.ireg)
652 return 1;
653 if(sa->set.freg & sb->used.freg)
654 return 1;
655 if(sa->set.cc & sb->used.cc)
656 return 1;
657
658 return 0;
659 }
660
661 int
compound(Prog * p)662 compound(Prog *p)
663 {
664 Optab *o;
665
666 o = oplook(p);
667 if(o->size != 4)
668 return 1;
669 if(p->to.type == D_REG && p->to.reg == REGSB)
670 return 1;
671 return 0;
672 }
673
674 void
dumpbits(Sch * s,Dep * d)675 dumpbits(Sch *s, Dep *d)
676 {
677 int i;
678
679 for(i=0; i<32; i++)
680 if(d->ireg & (1<<i))
681 Bprint(&bso, " R%d", i);
682 for(i=0; i<32; i++)
683 if(d->freg & (1<<i))
684 Bprint(&bso, " F%d", i);
685 for(i=0; i<32; i++)
686 switch(d->cc & (1<<i)) {
687 default:
688 break;
689 case E_HILO:
690 Bprint(&bso, " HILO");
691 break;
692 case E_FCR:
693 Bprint(&bso, " FCR");
694 break;
695 case E_MCR:
696 Bprint(&bso, " MCR");
697 break;
698 case E_MEM:
699 Bprint(&bso, " MEM%d", s->size);
700 break;
701 case E_MEMSB:
702 Bprint(&bso, " SB%d", s->size);
703 break;
704 case E_MEMSP:
705 Bprint(&bso, " SP%d", s->size);
706 break;
707 }
708 }
709