1 #include "l.h"
2
3 enum
4 {
5 E_ICC = 1<<0,
6 E_FCC = 1<<1,
7 E_MEM = 1<<2,
8 E_MEMSP = 1<<3, /* uses offset and size */
9 E_MEMSB = 1<<4, /* uses offset and size */
10 E_LR = 1<<5,
11 E_CR = 1<<6,
12 E_CTR = 1<<7,
13 E_XER = 1<<8,
14
15 E_CR0 = 0xF<<0,
16 E_CR1 = 0xF<<4,
17
18 ANYMEM = E_MEM|E_MEMSP|E_MEMSB,
19 ALL = ~0,
20 };
21
22 typedef struct Sch Sch;
23 typedef struct Dep Dep;
24
25 struct Dep
26 {
27 ulong ireg;
28 ulong freg;
29 ulong cc;
30 ulong cr;
31 };
32 struct Sch
33 {
34 Prog p;
35 Dep set;
36 Dep used;
37 long soffset;
38 char size;
39 char comp;
40 };
41
42 void regused(Sch*, Prog*);
43 int depend(Sch*, Sch*);
44 int conflict(Sch*, Sch*);
45 int offoverlap(Sch*, Sch*);
46 void dumpbits(Sch*, Dep*);
47
48 void
sched(Prog * p0,Prog * pe)49 sched(Prog *p0, Prog *pe)
50 {
51 Prog *p, *q;
52 Sch sch[NSCHED], *s, *t, *u, *se, stmp;
53
54 if(!debug['Q'])
55 return;
56 /*
57 * build side structure
58 */
59 s = sch;
60 for(p=p0;; p=p->link) {
61 memset(s, 0, sizeof(*s));
62 s->p = *p;
63 regused(s, p);
64 if(debug['X']) {
65 Bprint(&bso, "%P\tset", &s->p);
66 dumpbits(s, &s->set);
67 Bprint(&bso, "; used");
68 dumpbits(s, &s->used);
69 if(s->comp)
70 Bprint(&bso, "; compound");
71 if(s->p.mark & LOAD)
72 Bprint(&bso, "; load");
73 if(s->p.mark & BRANCH)
74 Bprint(&bso, "; branch");
75 if(s->p.mark & FCMP)
76 Bprint(&bso, "; fcmp");
77 Bprint(&bso, "\n");
78 }
79 s++;
80 if(p == pe)
81 break;
82 }
83 se = s;
84
85 for(s=se-1; s>=sch; s--) {
86
87 /*
88 * load delay. interlocked.
89 */
90 if(s->p.mark & LOAD) {
91 if(s >= se-1)
92 continue;
93 if(!conflict(s, (s+1)))
94 continue;
95 /*
96 * s is load, s+1 is immediate use of result
97 * t is the trial instruction to insert between s and s+1
98 */
99 for(t=s-1; t>=sch; t--) {
100 if(t->p.mark & BRANCH)
101 goto no2;
102 if(t->p.mark & FCMP)
103 if((s+1)->p.mark & BRANCH)
104 goto no2;
105 if(t->p.mark & LOAD)
106 if(conflict(t, (s+1)))
107 goto no2;
108 for(u=t+1; u<=s; u++)
109 if(depend(u, t))
110 goto no2;
111 goto out2;
112 no2:;
113 }
114 if(debug['X'])
115 Bprint(&bso, "?l%P\n", &s->p);
116 continue;
117 out2:
118 if(debug['X']) {
119 Bprint(&bso, "!l%P\n", &t->p);
120 Bprint(&bso, "%P\n", &s->p);
121 }
122 stmp = *t;
123 memmove(t, t+1, (uchar*)s - (uchar*)t);
124 *s = stmp;
125 s--;
126 continue;
127 }
128
129 /*
130 * fop2 delay.
131 */
132 if(s->p.mark & FCMP) {
133 if(s >= se-1)
134 continue;
135 if(!((s+1)->p.mark & BRANCH))
136 continue;
137 /* t is the trial instruction to use */
138 for(t=s-1; t>=sch; t--) {
139 for(u=t+1; u<=s; u++)
140 if(depend(u, t))
141 goto no3;
142 goto out3;
143 no3:;
144 }
145 if(debug['X'])
146 Bprint(&bso, "?f%P\n", &s->p);
147 continue;
148 out3:
149 if(debug['X']) {
150 Bprint(&bso, "!f%P\n", &t->p);
151 Bprint(&bso, "%P\n", &s->p);
152 }
153 stmp = *t;
154 memmove(t, t+1, (uchar*)s - (uchar*)t);
155 *s = stmp;
156 s--;
157 continue;
158 }
159 }
160
161 /*
162 * put it all back
163 */
164 for(s=sch, p=p0; s<se; s++, p=q) {
165 q = p->link;
166 if(q != s->p.link) {
167 *p = s->p;
168 p->link = q;
169 }
170 }
171 if(debug['X'])
172 Bprint(&bso, "\n");
173 }
174
175 void
regused(Sch * s,Prog * realp)176 regused(Sch *s, Prog *realp)
177 {
178 int c, ar, ad, ld, sz, nr, upd;
179 ulong m;
180 Prog *p;
181
182 p = &s->p;
183 s->comp = compound(p);
184 if(s->comp) {
185 s->set.ireg |= 1<<REGTMP;
186 s->used.ireg |= 1<<REGTMP;
187 }
188 ar = 0; /* dest is really reference */
189 ad = 0; /* source/dest is really address */
190 ld = 0; /* opcode is load instruction */
191 sz = 32*4; /* size of load/store for overlap computation */
192 nr = 0; /* source/dest is not really reg */
193 upd = 0; /* move with update; changes reg */
194
195 /*
196 * flags based on opcode
197 */
198 switch(p->as) {
199 case ATEXT:
200 curtext = realp;
201 autosize = p->to.offset + 8;
202 ad = 1;
203 break;
204 case ABL:
205 s->set.cc |= E_LR;
206 ar = 1;
207 ad = 1;
208 break;
209 case ABR:
210 ar = 1;
211 ad = 1;
212 break;
213 case ACMP:
214 case ACMPU:
215 case ACMPW:
216 case ACMPWU:
217 s->set.cc |= E_ICC;
218 if(p->reg == 0)
219 s->set.cr |= E_CR0;
220 else
221 s->set.cr |= (0xF<<((p->reg&7)*4));
222 ar = 1;
223 break;
224 case AFCMPO:
225 case AFCMPU:
226 s->set.cc |= E_FCC;
227 if(p->reg == 0)
228 s->set.cr |= E_CR0;
229 else
230 s->set.cr |= (0xF<<((p->reg&7)*4));
231 ar = 1;
232 break;
233 case ACRAND:
234 case ACRANDN:
235 case ACREQV:
236 case ACRNAND:
237 case ACRNOR:
238 case ACROR:
239 case ACRORN:
240 case ACRXOR:
241 s->used.cr |= 1<<p->from.reg;
242 s->set.cr |= 1<<p->to.reg;
243 nr = 1;
244 break;
245 case ABCL: /* tricky */
246 s->used.cc |= E_FCC|E_ICC;
247 s->used.cr = ALL;
248 s->set.cc |= E_LR;
249 ar = 1;
250 break;
251 case ABC: /* tricky */
252 s->used.cc |= E_FCC|E_ICC;
253 s->used.cr = ALL;
254 ar = 1;
255 break;
256 case ABEQ:
257 case ABGE:
258 case ABGT:
259 case ABLE:
260 case ABLT:
261 case ABNE:
262 case ABVC:
263 case ABVS:
264 s->used.cc |= E_ICC;
265 s->used.cr |= E_CR0;
266 ar = 1;
267 break;
268 case ALSW:
269 case AMOVMW:
270 /* could do better */
271 sz = 32*4;
272 ld = 1;
273 break;
274 case AMOVBU:
275 case AMOVBZU:
276 upd = 1;
277 sz = 1;
278 ld = 1;
279 break;
280 case AMOVB:
281 case AMOVBZ:
282 sz = 1;
283 ld = 1;
284 break;
285 case AMOVHU:
286 case AMOVHZU:
287 upd = 1;
288 sz = 2;
289 ld = 1;
290 break;
291 case AMOVH:
292 case AMOVHBR:
293 case AMOVHZ:
294 sz = 2;
295 ld = 1;
296 break;
297 case AFMOVSU:
298 case AMOVWU:
299 case AMOVWZU:
300 upd = 1;
301 sz = 4;
302 ld = 1;
303 break;
304 case AFMOVS:
305 case AMOVW:
306 case AMOVWZ:
307 case AMOVWBR:
308 case ALWAR:
309 sz = 4;
310 ld = 1;
311 break;
312 case AFMOVDU:
313 upd = 1;
314 sz = 8;
315 ld = 1;
316 break;
317 case AFMOVD:
318 sz = 8;
319 ld = 1;
320 break;
321 case AFMOVDCC:
322 sz = 8;
323 ld = 1;
324 s->set.cc |= E_FCC;
325 s->set.cr |= E_CR1;
326 break;
327 case AMOVFL:
328 case AMOVCRFS:
329 case AMTFSB0:
330 case AMTFSB0CC:
331 case AMTFSB1:
332 case AMTFSB1CC:
333 s->set.ireg = ALL;
334 s->set.freg = ALL;
335 s->set.cc = ALL;
336 s->set.cr = ALL;
337 break;
338 case AADDCC:
339 case AADDVCC:
340 case AADDCCC:
341 case AADDCVCC:
342 case AADDMECC:
343 case AADDMEVCC:
344 case AADDECC:
345 case AADDEVCC:
346 case AADDZECC:
347 case AADDZEVCC:
348 case AANDCC:
349 case AANDNCC:
350 case ACNTLZWCC:
351 case ADIVWCC:
352 case ADIVWVCC:
353 case ADIVWUCC:
354 case ADIVWUVCC:
355 case AEQVCC:
356 case AEXTSBCC:
357 case AEXTSHCC:
358 case AMULHWCC:
359 case AMULHWUCC:
360 case AMULLWCC:
361 case AMULLWVCC:
362 case ANANDCC:
363 case ANEGCC:
364 case ANEGVCC:
365 case ANORCC:
366 case AORCC:
367 case AORNCC:
368 case AREMCC:
369 case AREMVCC:
370 case AREMUCC:
371 case AREMUVCC:
372 case ARLWMICC:
373 case ARLWNMCC:
374 case ASLWCC:
375 case ASRAWCC:
376 case ASRWCC:
377 case ASTWCCC:
378 case ASUBCC:
379 case ASUBVCC:
380 case ASUBCCC:
381 case ASUBCVCC:
382 case ASUBMECC:
383 case ASUBMEVCC:
384 case ASUBECC:
385 case ASUBEVCC:
386 case ASUBZECC:
387 case ASUBZEVCC:
388 case AXORCC:
389 s->set.cc |= E_ICC;
390 s->set.cr |= E_CR0;
391 break;
392 case AFABSCC:
393 case AFADDCC:
394 case AFADDSCC:
395 case AFCTIWCC:
396 case AFCTIWZCC:
397 case AFDIVCC:
398 case AFDIVSCC:
399 case AFMADDCC:
400 case AFMADDSCC:
401 case AFMSUBCC:
402 case AFMSUBSCC:
403 case AFMULCC:
404 case AFMULSCC:
405 case AFNABSCC:
406 case AFNEGCC:
407 case AFNMADDCC:
408 case AFNMADDSCC:
409 case AFNMSUBCC:
410 case AFNMSUBSCC:
411 case AFRSPCC:
412 case AFSUBCC:
413 case AFSUBSCC:
414 s->set.cc |= E_FCC;
415 s->set.cr |= E_CR1;
416 break;
417 }
418
419 /*
420 * flags based on 'to' field
421 */
422 c = p->to.class;
423 if(c == 0) {
424 c = aclass(&p->to) + 1;
425 p->to.class = c;
426 }
427 c--;
428 switch(c) {
429 default:
430 print("unknown class %d %D\n", c, &p->to);
431
432 case C_NONE:
433 case C_ZCON:
434 case C_SCON:
435 case C_UCON:
436 case C_LCON:
437 case C_ADDCON:
438 case C_ANDCON:
439 case C_SBRA:
440 case C_LBRA:
441 break;
442 case C_CREG:
443 c = p->to.reg;
444 if(c == NREG)
445 s->set.cr = ALL;
446 else
447 s->set.cr |= (0xF << ((p->from.reg&7)*4));
448 s->set.cc = ALL;
449 break;
450 case C_SPR:
451 case C_FPSCR:
452 case C_MSR:
453 case C_XER:
454 s->set.ireg = ALL;
455 s->set.freg = ALL;
456 s->set.cc = ALL;
457 s->set.cr = ALL;
458 break;
459 case C_LR:
460 s->set.cc |= E_LR;
461 break;
462 case C_CTR:
463 s->set.cc |= E_CTR;
464 break;
465 case C_ZOREG:
466 case C_SOREG:
467 case C_LOREG:
468 c = p->to.reg;
469 s->used.ireg |= 1<<c;
470 if(upd)
471 s->set.ireg |= 1<<c;
472 if(ad)
473 break;
474 s->size = sz;
475 s->soffset = regoff(&p->to);
476
477 m = ANYMEM;
478 if(c == REGSB)
479 m = E_MEMSB;
480 if(c == REGSP)
481 m = E_MEMSP;
482
483 if(ar)
484 s->used.cc |= m;
485 else
486 s->set.cc |= m;
487 break;
488 case C_SACON:
489 case C_LACON:
490 s->used.ireg |= 1<<REGSP;
491 if(upd)
492 s->set.ireg |= 1<<c;
493 break;
494 case C_SECON:
495 case C_LECON:
496 s->used.ireg |= 1<<REGSB;
497 if(upd)
498 s->set.ireg |= 1<<c;
499 break;
500 case C_REG:
501 if(nr)
502 break;
503 if(ar)
504 s->used.ireg |= 1<<p->to.reg;
505 else
506 s->set.ireg |= 1<<p->to.reg;
507 break;
508 case C_FREG:
509 if(ar)
510 s->used.freg |= 1<<p->to.reg;
511 else
512 s->set.freg |= 1<<p->to.reg;
513 break;
514 case C_SAUTO:
515 case C_LAUTO:
516 s->used.ireg |= 1<<REGSP;
517 if(upd)
518 s->set.ireg |= 1<<c;
519 if(ad)
520 break;
521 s->size = sz;
522 s->soffset = regoff(&p->to);
523
524 if(ar)
525 s->used.cc |= E_MEMSP;
526 else
527 s->set.cc |= E_MEMSP;
528 break;
529 case C_SEXT:
530 case C_LEXT:
531 s->used.ireg |= 1<<REGSB;
532 if(upd)
533 s->set.ireg |= 1<<c;
534 if(ad)
535 break;
536 s->size = sz;
537 s->soffset = regoff(&p->to);
538
539 if(ar)
540 s->used.cc |= E_MEMSB;
541 else
542 s->set.cc |= E_MEMSB;
543 break;
544 }
545
546 /*
547 * flags based on 'from' field
548 */
549 c = p->from.class;
550 if(c == 0) {
551 c = aclass(&p->from) + 1;
552 p->from.class = c;
553 }
554 c--;
555 switch(c) {
556 default:
557 print("unknown class %d %D\n", c, &p->from);
558
559 case C_NONE:
560 case C_ZCON:
561 case C_SCON:
562 case C_UCON:
563 case C_LCON:
564 case C_ADDCON:
565 case C_ANDCON:
566 case C_SBRA:
567 case C_LBRA:
568 c = p->from.reg;
569 if(c != NREG)
570 s->used.ireg |= 1<<c;
571 break;
572 case C_CREG:
573 c = p->from.reg;
574 if(c == NREG)
575 s->used.cr = ALL;
576 else
577 s->used.cr |= (0xF << ((p->from.reg&7)*4));
578 s->used.cc = ALL;
579 break;
580 case C_SPR:
581 case C_FPSCR:
582 case C_MSR:
583 case C_XER:
584 s->set.ireg = ALL;
585 s->set.freg = ALL;
586 s->set.cc = ALL;
587 s->set.cr = ALL;
588 break;
589 case C_LR:
590 s->used.cc |= E_LR;
591 break;
592 case C_CTR:
593 s->used.cc |= E_CTR;
594 break;
595 case C_ZOREG:
596 case C_SOREG:
597 case C_LOREG:
598 c = p->from.reg;
599 s->used.ireg |= 1<<c;
600 if(ld)
601 p->mark |= LOAD;
602 if(ad)
603 break;
604 s->size = sz;
605 s->soffset = regoff(&p->from);
606
607 m = ANYMEM;
608 if(c == REGSB)
609 m = E_MEMSB;
610 if(c == REGSP)
611 m = E_MEMSP;
612
613 s->used.cc |= m;
614 break;
615 case C_SACON:
616 case C_LACON:
617 s->used.ireg |= 1<<REGSP;
618 break;
619 case C_SECON:
620 case C_LECON:
621 s->used.ireg |= 1<<REGSB;
622 break;
623 case C_REG:
624 if(nr)
625 break;
626 s->used.ireg |= 1<<p->from.reg;
627 break;
628 case C_FREG:
629 s->used.freg |= 1<<p->from.reg;
630 break;
631 case C_SAUTO:
632 case C_LAUTO:
633 s->used.ireg |= 1<<REGSP;
634 if(ld)
635 p->mark |= LOAD;
636 if(ad)
637 break;
638 s->size = sz;
639 s->soffset = regoff(&p->from);
640
641 s->used.cc |= E_MEMSP;
642 break;
643 case C_SEXT:
644 case C_LEXT:
645 s->used.ireg |= 1<<REGSB;
646 if(ld)
647 p->mark |= LOAD;
648 if(ad)
649 break;
650 s->size = sz;
651 s->soffset = regoff(&p->from);
652
653 s->used.cc |= E_MEMSB;
654 break;
655 }
656
657 c = p->reg;
658 if(c != NREG) {
659 if(p->from.type == D_FREG || p->to.type == D_FREG)
660 s->used.freg |= 1<<c;
661 else
662 s->used.ireg |= 1<<c;
663 }
664 }
665
666 /*
667 * test to see if 2 instrictions can be
668 * interchanged without changing semantics
669 */
670 int
depend(Sch * sa,Sch * sb)671 depend(Sch *sa, Sch *sb)
672 {
673 ulong x;
674
675 if(sa->set.ireg & (sb->set.ireg|sb->used.ireg))
676 return 1;
677 if(sb->set.ireg & sa->used.ireg)
678 return 1;
679
680 if(sa->set.freg & (sb->set.freg|sb->used.freg))
681 return 1;
682 if(sb->set.freg & sa->used.freg)
683 return 1;
684
685 if(sa->set.cr & (sb->set.cr|sb->used.cr))
686 return 1;
687 if(sb->set.cr & sa->used.cr)
688 return 1;
689
690
691 x = (sa->set.cc & (sb->set.cc|sb->used.cc)) |
692 (sb->set.cc & sa->used.cc);
693 if(x) {
694 /*
695 * allow SB and SP to pass each other.
696 * allow SB to pass SB iff doffsets are ok
697 * anything else conflicts
698 */
699 if(x != E_MEMSP && x != E_MEMSB)
700 return 1;
701 x = sa->set.cc | sb->set.cc |
702 sa->used.cc | sb->used.cc;
703 if(x & E_MEM)
704 return 1;
705 if(offoverlap(sa, sb))
706 return 1;
707 }
708
709 return 0;
710 }
711
712 int
offoverlap(Sch * sa,Sch * sb)713 offoverlap(Sch *sa, Sch *sb)
714 {
715
716 if(sa->soffset < sb->soffset) {
717 if(sa->soffset+sa->size > sb->soffset)
718 return 1;
719 return 0;
720 }
721 if(sb->soffset+sb->size > sa->soffset)
722 return 1;
723 return 0;
724 }
725
726 /*
727 * test 2 adjacent instructions
728 * and find out if inserted instructions
729 * are desired to prevent stalls.
730 * first instruction is a load instruction.
731 */
732 int
conflict(Sch * sa,Sch * sb)733 conflict(Sch *sa, Sch *sb)
734 {
735
736 if(sa->set.ireg & sb->used.ireg)
737 return 1;
738 if(sa->set.freg & sb->used.freg)
739 return 1;
740 if(sa->set.cr & sb->used.cr)
741 return 1;
742 return 0;
743 }
744
745 int
compound(Prog * p)746 compound(Prog *p)
747 {
748 Optab *o;
749
750 o = oplook(p);
751 if(o->size != 4)
752 return 1;
753 if(p->to.type == D_REG && p->to.reg == REGSB)
754 return 1;
755 return 0;
756 }
757
758 void
dumpbits(Sch * s,Dep * d)759 dumpbits(Sch *s, Dep *d)
760 {
761 int i;
762
763 for(i=0; i<32; i++)
764 if(d->ireg & (1<<i))
765 Bprint(&bso, " R%d", i);
766 for(i=0; i<32; i++)
767 if(d->freg & (1<<i))
768 Bprint(&bso, " F%d", i);
769 for(i=0; i<32; i++)
770 if(d->cr & (1<<i))
771 Bprint(&bso, " C%d", i);
772 for(i=0; i<32; i++)
773 switch(d->cc & (1<<i)) {
774 default:
775 break;
776 case E_ICC:
777 Bprint(&bso, " ICC");
778 break;
779 case E_FCC:
780 Bprint(&bso, " FCC");
781 break;
782 case E_LR:
783 Bprint(&bso, " LR");
784 break;
785 case E_CR:
786 Bprint(&bso, " CR");
787 break;
788 case E_CTR:
789 Bprint(&bso, " CTR");
790 break;
791 case E_XER:
792 Bprint(&bso, " XER");
793 break;
794 case E_MEM:
795 Bprint(&bso, " MEM%d", s->size);
796 break;
797 case E_MEMSB:
798 Bprint(&bso, " SB%d", s->size);
799 break;
800 case E_MEMSP:
801 Bprint(&bso, " SP%d", s->size);
802 break;
803 }
804 }
805