1 #include "l.h"
2
3 enum
4 {
5 E_HILO = 1<<0,
6 E_FCR = 1<<1,
7 E_MCR = 1<<2,
8 E_MEM = 1<<3,
9 E_MEMSP = 1<<4, /* uses offset and size */
10 E_MEMSB = 1<<5, /* uses offset and size */
11 ANYMEM = E_MEM|E_MEMSP|E_MEMSB,
12 DELAY = BRANCH|LOAD|FCMP,
13 };
14
15 typedef struct Sch Sch;
16 typedef struct Dep Dep;
17
18 struct Dep
19 {
20 ulong ireg;
21 ulong freg;
22 ulong cc;
23 };
24 struct Sch
25 {
26 Prog p;
27 Dep set;
28 Dep used;
29 long soffset;
30 char size;
31 char nop;
32 char comp;
33 };
34
35 void regsused(Sch*, Prog*);
36 int depend(Sch*, Sch*);
37 int conflict(Sch*, Sch*);
38 int offoverlap(Sch*, Sch*);
39 void dumpbits(Sch*, Dep*);
40
41 void
sched(Prog * p0,Prog * pe)42 sched(Prog *p0, Prog *pe)
43 {
44 Prog *p, *q;
45 Sch sch[NSCHED], *s, *t, *u, *se, stmp;
46
47 /*
48 * build side structure
49 */
50 s = sch;
51 for(p=p0;; p=p->link) {
52 memset(s, 0, sizeof(*s));
53 s->p = *p;
54 regsused(s, p);
55 if(debug['X']) {
56 Bprint(&bso, "%P\t\tset", &s->p);
57 dumpbits(s, &s->set);
58 Bprint(&bso, "; used");
59 dumpbits(s, &s->used);
60 if(s->comp)
61 Bprint(&bso, "; compound");
62 if(s->p.mark & LOAD)
63 Bprint(&bso, "; load");
64 if(s->p.mark & BRANCH)
65 Bprint(&bso, "; branch");
66 if(s->p.mark & FCMP)
67 Bprint(&bso, "; fcmp");
68 Bprint(&bso, "\n");
69 }
70 if(p == pe)
71 break;
72 s++;
73 }
74 se = s;
75
76 /*
77 * prepass to move things around
78 * does nothing, but tries to make
79 * the actual scheduler work better
80 */
81 for(s=sch; s<=se; s++) {
82 if(!(s->p.mark & LOAD))
83 continue;
84 /* always good to put nonconflict loads together */
85 for(t=s+1; t<=se; t++) {
86 if(!(t->p.mark & LOAD))
87 continue;
88 if(t->p.mark & BRANCH)
89 break;
90 if(conflict(s, t))
91 break;
92 for(u=t-1; u>s; u--)
93 if(depend(u, t))
94 goto no11;
95 u = s+1;
96 stmp = *t;
97 memmove(s+2, u, (uchar*)t - (uchar*)u);
98 *u = stmp;
99 break;
100 }
101 no11:
102
103 /* put schedule fodder above load */
104 for(t=s+1; t<=se; t++) {
105 if(t->p.mark & BRANCH)
106 break;
107 if(s > sch && conflict(s-1, t))
108 continue;
109 for(u=t-1; u>=s; u--)
110 if(depend(t, u))
111 goto no1;
112 stmp = *t;
113 memmove(s+1, s, (uchar*)t - (uchar*)s);
114 *s = stmp;
115 if(!(s->p.mark & LOAD))
116 break;
117 no1:;
118 }
119 }
120
121 for(s=se; s>=sch; s--) {
122 if(!(s->p.mark & DELAY))
123 continue;
124 if(s < se)
125 if(!conflict(s, s+1))
126 goto out3;
127 /*
128 * s is load, s+1 is immediate use of result or end of block
129 * t is the trial instruction to insert between s and s+1
130 */
131 if(!debug['Y'])
132 for(t=s-1; t>=sch; t--) {
133 if(t->comp)
134 if(s->p.mark & BRANCH)
135 goto no2;
136 if(t->p.mark & DELAY)
137 if(s >= se || conflict(t, s+1))
138 goto no2;
139 for(u=t+1; u<=s; u++)
140 if(depend(u, t))
141 goto no2;
142 goto out2;
143 no2:;
144 }
145 if(debug['X'])
146 Bprint(&bso, "?l%P\n", &s->p);
147 s->nop = 1;
148 if(debug['v']) {
149 if(s->p.mark & LOAD) {
150 nop.load.count++;
151 nop.load.outof++;
152 }
153 if(s->p.mark & BRANCH) {
154 nop.branch.count++;
155 nop.branch.outof++;
156 }
157 if(s->p.mark & FCMP) {
158 nop.fcmp.count++;
159 nop.fcmp.outof++;
160 }
161 }
162 continue;
163
164 out2:
165 if(debug['X']) {
166 Bprint(&bso, "!l%P\n", &t->p);
167 Bprint(&bso, "%P\n", &s->p);
168 }
169 stmp = *t;
170 memmove(t, t+1, (uchar*)s - (uchar*)t);
171 *s = stmp;
172 s--;
173
174 out3:
175 if(debug['v']) {
176 if(s->p.mark & LOAD)
177 nop.load.outof++;
178 if(s->p.mark & BRANCH)
179 nop.branch.outof++;
180 if(s->p.mark & FCMP)
181 nop.fcmp.outof++;
182 }
183 }
184
185 /* Avoid HI/LO use->set */
186 t = sch+1;
187 for(s=sch; s<se-1; s++, t++) {
188 if((s->used.cc & E_HILO) == 0)
189 continue;
190 if(t->set.cc & E_HILO)
191 s->nop = 2;
192 }
193
194 /*
195 * put it all back
196 */
197 for(s=sch, p=p0; s<=se; s++, p=q) {
198 q = p->link;
199 if(q != s->p.link) {
200 *p = s->p;
201 p->link = q;
202 }
203 while(s->nop--)
204 addnop(p);
205 }
206 if(debug['X']) {
207 Bprint(&bso, "\n");
208 Bflush(&bso);
209 }
210 }
211
212 void
regsused(Sch * s,Prog * realp)213 regsused(Sch *s, Prog *realp)
214 {
215 int c, ar, ad, ld, sz;
216 ulong m;
217 Prog *p;
218
219 p = &s->p;
220 s->comp = compound(p);
221 s->nop = 0;
222 if(s->comp) {
223 s->set.ireg |= 1<<REGTMP;
224 s->used.ireg |= 1<<REGTMP;
225 }
226
227 ar = 0; /* dest is really reference */
228 ad = 0; /* source/dest is really address */
229 ld = 0; /* opcode is load instruction */
230 sz = 20; /* size of load/store for overlap computation */
231
232 /*
233 * flags based on opcode
234 */
235 switch(p->as) {
236 case ATEXT:
237 curtext = realp;
238 autosize = p->to.offset + 4;
239 ad = 1;
240 break;
241 case AJAL:
242 c = p->reg;
243 if(c == NREG)
244 c = REGLINK;
245 s->set.ireg |= 1<<c;
246 ar = 1;
247 ad = 1;
248 break;
249 case ABGEZAL:
250 case ABLTZAL:
251 s->set.ireg |= 1<<REGLINK;
252 case ABEQ:
253 case ABGEZ:
254 case ABGTZ:
255 case ABLEZ:
256 case ABLTZ:
257 case ABNE:
258 ar = 1;
259 ad = 1;
260 break;
261 case ABFPT:
262 case ABFPF:
263 ad = 1;
264 s->used.cc |= E_FCR;
265 break;
266 case ACMPEQD:
267 case ACMPEQF:
268 case ACMPGED:
269 case ACMPGEF:
270 case ACMPGTD:
271 case ACMPGTF:
272 ar = 1;
273 s->set.cc |= E_FCR;
274 p->mark |= FCMP;
275 break;
276 case AJMP:
277 ar = 1;
278 ad = 1;
279 break;
280 case AMOVB:
281 case AMOVBU:
282 sz = 1;
283 ld = 1;
284 break;
285 case AMOVH:
286 case AMOVHU:
287 sz = 2;
288 ld = 1;
289 break;
290 case AMOVF:
291 case AMOVW:
292 case AMOVWL:
293 case AMOVWR:
294 sz = 4;
295 ld = 1;
296 break;
297 case AMOVD:
298 case AMOVV:
299 case AMOVVL:
300 case AMOVVR:
301 sz = 8;
302 ld = 1;
303 break;
304 case ADIV:
305 case ADIVU:
306 case AMUL:
307 case AMULU:
308 case AREM:
309 case AREMU:
310 s->set.cc = E_HILO;
311 case AADD:
312 case AADDU:
313 case AAND:
314 case ANOR:
315 case AOR:
316 case ASGT:
317 case ASGTU:
318 case ASLL:
319 case ASRA:
320 case ASRL:
321 case ASUB:
322 case ASUBU:
323 case AXOR:
324
325 case AADDD:
326 case AADDF:
327 case AADDW:
328 case ASUBD:
329 case ASUBF:
330 case ASUBW:
331 case AMULF:
332 case AMULD:
333 case AMULW:
334 case ADIVF:
335 case ADIVD:
336 case ADIVW:
337 if(p->reg == NREG) {
338 if(p->to.type == D_REG || p->to.type == D_FREG)
339 p->reg = p->to.reg;
340 if(p->reg == NREG)
341 print("botch %P\n", p);
342 }
343 break;
344 }
345
346 /*
347 * flags based on 'to' field
348 */
349 c = p->to.class;
350 if(c == 0) {
351 c = aclass(&p->to) + 1;
352 p->to.class = c;
353 }
354 c--;
355 switch(c) {
356 default:
357 print("unknown class %d %D\n", c, &p->to);
358
359 case C_ZCON:
360 case C_SCON:
361 case C_ADD0CON:
362 case C_AND0CON:
363 case C_ADDCON:
364 case C_ANDCON:
365 case C_UCON:
366 case C_LCON:
367 case C_NONE:
368 case C_SBRA:
369 case C_LBRA:
370 break;
371
372 case C_HI:
373 case C_LO:
374 s->set.cc |= E_HILO;
375 break;
376 case C_FCREG:
377 s->set.cc |= E_FCR;
378 break;
379 case C_MREG:
380 s->set.cc |= E_MCR;
381 break;
382 case C_ZOREG:
383 case C_SOREG:
384 case C_LOREG:
385 c = p->to.reg;
386 s->used.ireg |= 1<<c;
387 if(ad)
388 break;
389 s->size = sz;
390 s->soffset = regoff(&p->to);
391
392 m = ANYMEM;
393 if(c == REGSB)
394 m = E_MEMSB;
395 if(c == REGSP)
396 m = E_MEMSP;
397
398 if(ar)
399 s->used.cc |= m;
400 else
401 s->set.cc |= m;
402 break;
403 case C_SACON:
404 case C_LACON:
405 s->used.ireg |= 1<<REGSP;
406 break;
407 case C_SECON:
408 case C_LECON:
409 s->used.ireg |= 1<<REGSB;
410 break;
411 case C_REG:
412 if(ar)
413 s->used.ireg |= 1<<p->to.reg;
414 else
415 s->set.ireg |= 1<<p->to.reg;
416 break;
417 case C_FREG:
418 /* do better -- determine double prec */
419 if(ar) {
420 s->used.freg |= 1<<p->to.reg;
421 s->used.freg |= 1<<(p->to.reg|1);
422 } else {
423 s->set.freg |= 1<<p->to.reg;
424 s->set.freg |= 1<<(p->to.reg|1);
425 }
426 if(ld && p->from.type == D_REG)
427 p->mark |= LOAD;
428 break;
429 case C_SAUTO:
430 case C_LAUTO:
431 s->used.ireg |= 1<<REGSP;
432 if(ad)
433 break;
434 s->size = sz;
435 s->soffset = regoff(&p->to);
436
437 if(ar)
438 s->used.cc |= E_MEMSP;
439 else
440 s->set.cc |= E_MEMSP;
441 break;
442 case C_SEXT:
443 case C_LEXT:
444 s->used.ireg |= 1<<REGSB;
445 if(ad)
446 break;
447 s->size = sz;
448 s->soffset = regoff(&p->to);
449
450 if(ar)
451 s->used.cc |= E_MEMSB;
452 else
453 s->set.cc |= E_MEMSB;
454 break;
455 }
456
457 /*
458 * flags based on 'from' field
459 */
460 c = p->from.class;
461 if(c == 0) {
462 c = aclass(&p->from) + 1;
463 p->from.class = c;
464 }
465 c--;
466 switch(c) {
467 default:
468 print("unknown class %d %D\n", c, &p->from);
469
470 case C_ZCON:
471 case C_SCON:
472 case C_ADD0CON:
473 case C_AND0CON:
474 case C_ADDCON:
475 case C_ANDCON:
476 case C_UCON:
477 case C_LCON:
478 case C_NONE:
479 case C_SBRA:
480 case C_LBRA:
481 break;
482 case C_HI:
483 case C_LO:
484 s->used.cc |= E_HILO;
485 break;
486 case C_FCREG:
487 s->used.cc |= E_FCR;
488 break;
489 case C_MREG:
490 s->used.cc |= E_MCR;
491 break;
492 case C_ZOREG:
493 case C_SOREG:
494 case C_LOREG:
495 c = p->from.reg;
496 s->used.ireg |= 1<<c;
497 if(ld)
498 p->mark |= LOAD;
499 s->size = sz;
500 s->soffset = regoff(&p->from);
501
502 m = ANYMEM;
503 if(c == REGSB)
504 m = E_MEMSB;
505 if(c == REGSP)
506 m = E_MEMSP;
507
508 s->used.cc |= m;
509 break;
510 case C_SACON:
511 case C_LACON:
512 s->used.ireg |= 1<<REGSP;
513 break;
514 case C_SECON:
515 case C_LECON:
516 s->used.ireg |= 1<<REGSB;
517 break;
518 case C_REG:
519 s->used.ireg |= 1<<p->from.reg;
520 break;
521 case C_FREG:
522 /* do better -- determine double prec */
523 s->used.freg |= 1<<p->from.reg;
524 s->used.freg |= 1<<(p->from.reg|1);
525 if(ld && p->to.type == D_REG)
526 p->mark |= LOAD;
527 break;
528 case C_SAUTO:
529 case C_LAUTO:
530 s->used.ireg |= 1<<REGSP;
531 if(ld)
532 p->mark |= LOAD;
533 if(ad)
534 break;
535 s->size = sz;
536 s->soffset = regoff(&p->from);
537
538 s->used.cc |= E_MEMSP;
539 break;
540 case C_SEXT:
541 case C_LEXT:
542 s->used.ireg |= 1<<REGSB;
543 if(ld)
544 p->mark |= LOAD;
545 if(ad)
546 break;
547 s->size = sz;
548 s->soffset = regoff(&p->from);
549
550 s->used.cc |= E_MEMSB;
551 break;
552 }
553
554 c = p->reg;
555 if(c != NREG) {
556 if(p->from.type == D_FREG || p->to.type == D_FREG) {
557 s->used.freg |= 1<<c;
558 s->used.freg |= 1<<(c|1);
559 } else
560 s->used.ireg |= 1<<c;
561 }
562 s->set.ireg &= ~(1<<REGZERO); /* R0 cant be set */
563 }
564
565 /*
566 * test to see if 2 instrictions can be
567 * interchanged without changing semantics
568 */
569 int
depend(Sch * sa,Sch * sb)570 depend(Sch *sa, Sch *sb)
571 {
572 ulong x;
573
574 if(sa->set.ireg & (sb->set.ireg|sb->used.ireg))
575 return 1;
576 if(sb->set.ireg & sa->used.ireg)
577 return 1;
578
579 if(sa->set.freg & (sb->set.freg|sb->used.freg))
580 return 1;
581 if(sb->set.freg & sa->used.freg)
582 return 1;
583
584 /*
585 * special case.
586 * loads from same address cannot pass.
587 * this is for hardware fifo's and the like
588 */
589 if(sa->used.cc & sb->used.cc & E_MEM)
590 if(sa->p.reg == sb->p.reg)
591 if(regoff(&sa->p.from) == regoff(&sb->p.from))
592 return 1;
593
594 x = (sa->set.cc & (sb->set.cc|sb->used.cc)) |
595 (sb->set.cc & sa->used.cc);
596 if(x) {
597 /*
598 * allow SB and SP to pass each other.
599 * allow SB to pass SB iff doffsets are ok
600 * anything else conflicts
601 */
602 if(x != E_MEMSP && x != E_MEMSB)
603 return 1;
604 x = sa->set.cc | sb->set.cc |
605 sa->used.cc | sb->used.cc;
606 if(x & E_MEM)
607 return 1;
608 if(offoverlap(sa, sb))
609 return 1;
610 }
611
612 return 0;
613 }
614
615 int
offoverlap(Sch * sa,Sch * sb)616 offoverlap(Sch *sa, Sch *sb)
617 {
618
619 if(sa->soffset < sb->soffset) {
620 if(sa->soffset+sa->size > sb->soffset)
621 return 1;
622 return 0;
623 }
624 if(sb->soffset+sb->size > sa->soffset)
625 return 1;
626 return 0;
627 }
628
629 /*
630 * test 2 adjacent instructions
631 * and find out if inserted instructions
632 * are desired to prevent stalls.
633 */
634 int
conflict(Sch * sa,Sch * sb)635 conflict(Sch *sa, Sch *sb)
636 {
637
638 if(sa->set.ireg & sb->used.ireg)
639 return 1;
640 if(sa->set.freg & sb->used.freg)
641 return 1;
642 if(sa->set.cc & sb->used.cc)
643 return 1;
644
645 return 0;
646 }
647
648 int
compound(Prog * p)649 compound(Prog *p)
650 {
651 Optab *o;
652
653 o = oplook(p);
654 if(o->size != 4)
655 return 1;
656 if(p->to.type == D_REG && p->to.reg == REGSB)
657 return 1;
658 return 0;
659 }
660
661 void
dumpbits(Sch * s,Dep * d)662 dumpbits(Sch *s, Dep *d)
663 {
664 int i;
665
666 for(i=0; i<32; i++)
667 if(d->ireg & (1<<i))
668 Bprint(&bso, " R%d", i);
669 for(i=0; i<32; i++)
670 if(d->freg & (1<<i))
671 Bprint(&bso, " F%d", i);
672 for(i=0; i<32; i++)
673 switch(d->cc & (1<<i)) {
674 default:
675 break;
676 case E_HILO:
677 Bprint(&bso, " HILO");
678 break;
679 case E_FCR:
680 Bprint(&bso, " FCR");
681 break;
682 case E_MCR:
683 Bprint(&bso, " MCR");
684 break;
685 case E_MEM:
686 Bprint(&bso, " MEM%d", s->size);
687 break;
688 case E_MEMSB:
689 Bprint(&bso, " SB%d", s->size);
690 break;
691 case E_MEMSP:
692 Bprint(&bso, " SP%d", s->size);
693 break;
694 }
695 }
696