1 #include "l.h"
2
3 enum
4 {
5 E_ICC = 1<<0,
6 E_FCC = 1<<1,
7 E_MEM = 1<<2,
8 E_MEMSP = 1<<3, /* uses offset and size */
9 E_MEMSB = 1<<4, /* uses offset and size */
10 E_LR = 1<<5,
11 E_CR = 1<<6,
12 E_CTR = 1<<7,
13 E_XER = 1<<8,
14
15 E_CR0 = 0xF<<0,
16 E_CR1 = 0xF<<4,
17
18 ANYMEM = E_MEM|E_MEMSP|E_MEMSB,
19 ALL = ~0,
20 };
21
22 typedef struct Sch Sch;
23 typedef struct Dep Dep;
24
25 struct Dep
26 {
27 ulong ireg;
28 ulong freg;
29 ulong cc;
30 ulong cr;
31 };
32 struct Sch
33 {
34 Prog p;
35 Dep set;
36 Dep used;
37 long soffset;
38 char size;
39 char comp;
40 };
41
42 void regused(Sch*, Prog*);
43 int depend(Sch*, Sch*);
44 int conflict(Sch*, Sch*);
45 int offoverlap(Sch*, Sch*);
46 void dumpbits(Sch*, Dep*);
47
48 void
sched(Prog * p0,Prog * pe)49 sched(Prog *p0, Prog *pe)
50 {
51 Prog *p, *q;
52 Sch sch[NSCHED], *s, *t, *u, *se, stmp;
53
54 if(!debug['Q'])
55 return;
56 /*
57 * build side structure
58 */
59 s = sch;
60 for(p=p0;; p=p->link) {
61 memset(s, 0, sizeof(*s));
62 s->p = *p;
63 regused(s, p);
64 if(debug['X']) {
65 Bprint(&bso, "%P\tset", &s->p);
66 dumpbits(s, &s->set);
67 Bprint(&bso, "; used");
68 dumpbits(s, &s->used);
69 if(s->comp)
70 Bprint(&bso, "; compound");
71 if(s->p.mark & LOAD)
72 Bprint(&bso, "; load");
73 if(s->p.mark & BRANCH)
74 Bprint(&bso, "; branch");
75 if(s->p.mark & FCMP)
76 Bprint(&bso, "; fcmp");
77 Bprint(&bso, "\n");
78 }
79 s++;
80 if(p == pe)
81 break;
82 }
83 se = s;
84
85 for(s=se-1; s>=sch; s--) {
86
87 /*
88 * load delay. interlocked.
89 */
90 if(s->p.mark & LOAD) {
91 if(s >= se-1)
92 continue;
93 if(!conflict(s, (s+1)))
94 continue;
95 /*
96 * s is load, s+1 is immediate use of result
97 * t is the trial instruction to insert between s and s+1
98 */
99 for(t=s-1; t>=sch; t--) {
100 if(t->p.mark & BRANCH)
101 goto no2;
102 if(t->p.mark & FCMP)
103 if((s+1)->p.mark & BRANCH)
104 goto no2;
105 if(t->p.mark & LOAD)
106 if(conflict(t, (s+1)))
107 goto no2;
108 for(u=t+1; u<=s; u++)
109 if(depend(u, t))
110 goto no2;
111 goto out2;
112 no2:;
113 }
114 if(debug['X'])
115 Bprint(&bso, "?l%P\n", &s->p);
116 continue;
117 out2:
118 if(debug['X']) {
119 Bprint(&bso, "!l%P\n", &t->p);
120 Bprint(&bso, "%P\n", &s->p);
121 }
122 stmp = *t;
123 memmove(t, t+1, (uchar*)s - (uchar*)t);
124 *s = stmp;
125 s--;
126 continue;
127 }
128
129 /*
130 * fop2 delay.
131 */
132 if(s->p.mark & FCMP) {
133 if(s >= se-1)
134 continue;
135 if(!((s+1)->p.mark & BRANCH))
136 continue;
137 /* t is the trial instruction to use */
138 for(t=s-1; t>=sch; t--) {
139 for(u=t+1; u<=s; u++)
140 if(depend(u, t))
141 goto no3;
142 goto out3;
143 no3:;
144 }
145 if(debug['X'])
146 Bprint(&bso, "?f%P\n", &s->p);
147 continue;
148 out3:
149 if(debug['X']) {
150 Bprint(&bso, "!f%P\n", &t->p);
151 Bprint(&bso, "%P\n", &s->p);
152 }
153 stmp = *t;
154 memmove(t, t+1, (uchar*)s - (uchar*)t);
155 *s = stmp;
156 s--;
157 continue;
158 }
159 }
160
161 /*
162 * put it all back
163 */
164 for(s=sch, p=p0; s<se; s++, p=q) {
165 q = p->link;
166 if(q != s->p.link) {
167 *p = s->p;
168 p->link = q;
169 }
170 }
171 if(debug['X'])
172 Bprint(&bso, "\n");
173 }
174
175 void
regused(Sch * s,Prog * realp)176 regused(Sch *s, Prog *realp)
177 {
178 int c, ar, ad, ld, sz, nr, upd;
179 ulong m;
180 Prog *p;
181
182 p = &s->p;
183 s->comp = compound(p);
184 if(s->comp) {
185 s->set.ireg |= 1<<REGTMP;
186 s->used.ireg |= 1<<REGTMP;
187 }
188 ar = 0; /* dest is really reference */
189 ad = 0; /* source/dest is really address */
190 ld = 0; /* opcode is load instruction */
191 sz = 32*4; /* size of load/store for overlap computation */
192 nr = 0; /* source/dest is not really reg */
193 upd = 0; /* move with update; changes reg */
194
195 /*
196 * flags based on opcode
197 */
198 switch(p->as) {
199 case ATEXT:
200 curtext = realp;
201 autosize = p->to.offset + 4;
202 ad = 1;
203 break;
204 case ABL:
205 s->set.cc |= E_LR;
206 ar = 1;
207 ad = 1;
208 break;
209 case ABR:
210 ar = 1;
211 ad = 1;
212 break;
213 case ACMP:
214 s->set.cc |= E_ICC;
215 if(p->reg == 0)
216 s->set.cr |= E_CR0;
217 else
218 s->set.cr |= (0xF<<((p->reg&7)*4));
219 ar = 1;
220 break;
221 case AFCMPO:
222 case AFCMPU:
223 s->set.cc |= E_FCC;
224 if(p->reg == 0)
225 s->set.cr |= E_CR0;
226 else
227 s->set.cr |= (0xF<<((p->reg&7)*4));
228 ar = 1;
229 break;
230 case ACRAND:
231 case ACRANDN:
232 case ACREQV:
233 case ACRNAND:
234 case ACRNOR:
235 case ACROR:
236 case ACRORN:
237 case ACRXOR:
238 s->used.cr |= 1<<p->from.reg;
239 s->set.cr |= 1<<p->to.reg;
240 nr = 1;
241 break;
242 case ABCL: /* tricky */
243 s->used.cc |= E_FCC|E_ICC;
244 s->used.cr = ALL;
245 s->set.cc |= E_LR;
246 ar = 1;
247 break;
248 case ABC: /* tricky */
249 s->used.cc |= E_FCC|E_ICC;
250 s->used.cr = ALL;
251 ar = 1;
252 break;
253 case ABEQ:
254 case ABGE:
255 case ABGT:
256 case ABLE:
257 case ABLT:
258 case ABNE:
259 case ABVC:
260 case ABVS:
261 s->used.cc |= E_ICC;
262 s->used.cr |= E_CR0;
263 ar = 1;
264 break;
265 case ALSW:
266 case AMOVMW:
267 /* could do better */
268 sz = 32*4;
269 ld = 1;
270 break;
271 case AMOVBU:
272 case AMOVBZU:
273 upd = 1;
274 sz = 1;
275 ld = 1;
276 break;
277 case AMOVB:
278 case AMOVBZ:
279 sz = 1;
280 ld = 1;
281 break;
282 case AMOVHU:
283 case AMOVHZU:
284 upd = 1;
285 sz = 2;
286 ld = 1;
287 break;
288 case AMOVH:
289 case AMOVHBR:
290 case AMOVHZ:
291 sz = 2;
292 ld = 1;
293 break;
294 case AFMOVSU:
295 case AMOVWU:
296 upd = 1;
297 sz = 4;
298 ld = 1;
299 break;
300 case AFMOVS:
301 case AMOVW:
302 case AMOVWBR:
303 case ALWAR:
304 sz = 4;
305 ld = 1;
306 break;
307 case AFMOVDU:
308 upd = 1;
309 sz = 8;
310 ld = 1;
311 break;
312 case AFMOVD:
313 sz = 8;
314 ld = 1;
315 break;
316 case AFMOVDCC:
317 sz = 8;
318 ld = 1;
319 s->set.cc |= E_FCC;
320 s->set.cr |= E_CR1;
321 break;
322 case AMOVFL:
323 case AMOVCRFS:
324 case AMTFSB0:
325 case AMTFSB0CC:
326 case AMTFSB1:
327 case AMTFSB1CC:
328 s->set.ireg = ALL;
329 s->set.freg = ALL;
330 s->set.cc = ALL;
331 s->set.cr = ALL;
332 break;
333 case AADDCC:
334 case AADDVCC:
335 case AADDCCC:
336 case AADDCVCC:
337 case AADDMECC:
338 case AADDMEVCC:
339 case AADDECC:
340 case AADDEVCC:
341 case AADDZECC:
342 case AADDZEVCC:
343 case AANDCC:
344 case AANDNCC:
345 case ACNTLZWCC:
346 case ADIVWCC:
347 case ADIVWVCC:
348 case ADIVWUCC:
349 case ADIVWUVCC:
350 case AEQVCC:
351 case AEXTSBCC:
352 case AEXTSHCC:
353 case AMULHWCC:
354 case AMULHWUCC:
355 case AMULLWCC:
356 case AMULLWVCC:
357 case ANANDCC:
358 case ANEGCC:
359 case ANEGVCC:
360 case ANORCC:
361 case AORCC:
362 case AORNCC:
363 case AREMCC:
364 case AREMVCC:
365 case AREMUCC:
366 case AREMUVCC:
367 case ARLWMICC:
368 case ARLWNMCC:
369 case ASLWCC:
370 case ASRAWCC:
371 case ASRWCC:
372 case ASTWCCC:
373 case ASUBCC:
374 case ASUBVCC:
375 case ASUBCCC:
376 case ASUBCVCC:
377 case ASUBMECC:
378 case ASUBMEVCC:
379 case ASUBECC:
380 case ASUBEVCC:
381 case ASUBZECC:
382 case ASUBZEVCC:
383 case AXORCC:
384 s->set.cc |= E_ICC;
385 s->set.cr |= E_CR0;
386 break;
387 case AFABSCC:
388 case AFADDCC:
389 case AFADDSCC:
390 case AFCTIWCC:
391 case AFCTIWZCC:
392 case AFDIVCC:
393 case AFDIVSCC:
394 case AFMADDCC:
395 case AFMADDSCC:
396 case AFMSUBCC:
397 case AFMSUBSCC:
398 case AFMULCC:
399 case AFMULSCC:
400 case AFNABSCC:
401 case AFNEGCC:
402 case AFNMADDCC:
403 case AFNMADDSCC:
404 case AFNMSUBCC:
405 case AFNMSUBSCC:
406 case AFRSPCC:
407 case AFSUBCC:
408 case AFSUBSCC:
409 s->set.cc |= E_FCC;
410 s->set.cr |= E_CR1;
411 break;
412 }
413
414 /*
415 * flags based on 'to' field
416 */
417 c = p->to.class;
418 if(c == 0) {
419 c = aclass(&p->to) + 1;
420 p->to.class = c;
421 }
422 c--;
423 switch(c) {
424 default:
425 print("unknown class %d %D\n", c, &p->to);
426
427 case C_NONE:
428 case C_ZCON:
429 case C_SCON:
430 case C_UCON:
431 case C_LCON:
432 case C_ADDCON:
433 case C_ANDCON:
434 case C_SBRA:
435 case C_LBRA:
436 break;
437 case C_CREG:
438 c = p->to.reg;
439 if(c == NREG)
440 s->set.cr = ALL;
441 else
442 s->set.cr |= (0xF << ((p->from.reg&7)*4));
443 s->set.cc = ALL;
444 break;
445 case C_SPR:
446 case C_SREG:
447 case C_FPSCR:
448 case C_MSR:
449 case C_XER:
450 s->set.ireg = ALL;
451 s->set.freg = ALL;
452 s->set.cc = ALL;
453 s->set.cr = ALL;
454 break;
455 case C_LR:
456 s->set.cc |= E_LR;
457 break;
458 case C_CTR:
459 s->set.cc |= E_CTR;
460 break;
461 case C_ZOREG:
462 case C_SOREG:
463 case C_LOREG:
464 c = p->to.reg;
465 s->used.ireg |= 1<<c;
466 if(upd)
467 s->set.ireg |= 1<<c;
468 if(ad)
469 break;
470 s->size = sz;
471 s->soffset = regoff(&p->to);
472
473 m = ANYMEM;
474 if(c == REGSB)
475 m = E_MEMSB;
476 if(c == REGSP)
477 m = E_MEMSP;
478
479 if(ar)
480 s->used.cc |= m;
481 else
482 s->set.cc |= m;
483 break;
484 case C_SACON:
485 case C_LACON:
486 s->used.ireg |= 1<<REGSP;
487 if(upd)
488 s->set.ireg |= 1<<c;
489 break;
490 case C_SECON:
491 case C_LECON:
492 s->used.ireg |= 1<<REGSB;
493 if(upd)
494 s->set.ireg |= 1<<c;
495 break;
496 case C_REG:
497 if(nr)
498 break;
499 if(ar)
500 s->used.ireg |= 1<<p->to.reg;
501 else
502 s->set.ireg |= 1<<p->to.reg;
503 break;
504 case C_FREG:
505 if(ar)
506 s->used.freg |= 1<<p->to.reg;
507 else
508 s->set.freg |= 1<<p->to.reg;
509 break;
510 case C_SAUTO:
511 case C_LAUTO:
512 s->used.ireg |= 1<<REGSP;
513 if(upd)
514 s->set.ireg |= 1<<c;
515 if(ad)
516 break;
517 s->size = sz;
518 s->soffset = regoff(&p->to);
519
520 if(ar)
521 s->used.cc |= E_MEMSP;
522 else
523 s->set.cc |= E_MEMSP;
524 break;
525 case C_SEXT:
526 case C_LEXT:
527 s->used.ireg |= 1<<REGSB;
528 if(upd)
529 s->set.ireg |= 1<<c;
530 if(ad)
531 break;
532 s->size = sz;
533 s->soffset = regoff(&p->to);
534
535 if(ar)
536 s->used.cc |= E_MEMSB;
537 else
538 s->set.cc |= E_MEMSB;
539 break;
540 }
541
542 /*
543 * flags based on 'from' field
544 */
545 c = p->from.class;
546 if(c == 0) {
547 c = aclass(&p->from) + 1;
548 p->from.class = c;
549 }
550 c--;
551 switch(c) {
552 default:
553 print("unknown class %d %D\n", c, &p->from);
554
555 case C_NONE:
556 case C_ZCON:
557 case C_SCON:
558 case C_UCON:
559 case C_LCON:
560 case C_ADDCON:
561 case C_ANDCON:
562 case C_SBRA:
563 case C_LBRA:
564 c = p->from.reg;
565 if(c != NREG)
566 s->used.ireg |= 1<<c;
567 break;
568 case C_CREG:
569 c = p->from.reg;
570 if(c == NREG)
571 s->used.cr = ALL;
572 else
573 s->used.cr |= (0xF << ((p->from.reg&7)*4));
574 s->used.cc = ALL;
575 break;
576 case C_SPR:
577 case C_SREG:
578 case C_FPSCR:
579 case C_MSR:
580 case C_XER:
581 s->set.ireg = ALL;
582 s->set.freg = ALL;
583 s->set.cc = ALL;
584 s->set.cr = ALL;
585 break;
586 case C_LR:
587 s->used.cc |= E_LR;
588 break;
589 case C_CTR:
590 s->used.cc |= E_CTR;
591 break;
592 case C_ZOREG:
593 case C_SOREG:
594 case C_LOREG:
595 c = p->from.reg;
596 s->used.ireg |= 1<<c;
597 if(ld)
598 p->mark |= LOAD;
599 if(ad)
600 break;
601 s->size = sz;
602 s->soffset = regoff(&p->from);
603
604 m = ANYMEM;
605 if(c == REGSB)
606 m = E_MEMSB;
607 if(c == REGSP)
608 m = E_MEMSP;
609
610 s->used.cc |= m;
611 break;
612 case C_SACON:
613 case C_LACON:
614 s->used.ireg |= 1<<REGSP;
615 break;
616 case C_SECON:
617 case C_LECON:
618 s->used.ireg |= 1<<REGSB;
619 break;
620 case C_REG:
621 if(nr)
622 break;
623 s->used.ireg |= 1<<p->from.reg;
624 break;
625 case C_FREG:
626 s->used.freg |= 1<<p->from.reg;
627 break;
628 case C_SAUTO:
629 case C_LAUTO:
630 s->used.ireg |= 1<<REGSP;
631 if(ld)
632 p->mark |= LOAD;
633 if(ad)
634 break;
635 s->size = sz;
636 s->soffset = regoff(&p->from);
637
638 s->used.cc |= E_MEMSP;
639 break;
640 case C_SEXT:
641 case C_LEXT:
642 s->used.ireg |= 1<<REGSB;
643 if(ld)
644 p->mark |= LOAD;
645 if(ad)
646 break;
647 s->size = sz;
648 s->soffset = regoff(&p->from);
649
650 s->used.cc |= E_MEMSB;
651 break;
652 }
653
654 c = p->reg;
655 if(c != NREG) {
656 if(p->from.type == D_FREG || p->to.type == D_FREG)
657 s->used.freg |= 1<<c;
658 else
659 s->used.ireg |= 1<<c;
660 }
661 }
662
663 /*
664 * test to see if 2 instrictions can be
665 * interchanged without changing semantics
666 */
667 int
depend(Sch * sa,Sch * sb)668 depend(Sch *sa, Sch *sb)
669 {
670 ulong x;
671
672 if(sa->set.ireg & (sb->set.ireg|sb->used.ireg))
673 return 1;
674 if(sb->set.ireg & sa->used.ireg)
675 return 1;
676
677 if(sa->set.freg & (sb->set.freg|sb->used.freg))
678 return 1;
679 if(sb->set.freg & sa->used.freg)
680 return 1;
681
682 if(sa->set.cr & (sb->set.cr|sb->used.cr))
683 return 1;
684 if(sb->set.cr & sa->used.cr)
685 return 1;
686
687
688 x = (sa->set.cc & (sb->set.cc|sb->used.cc)) |
689 (sb->set.cc & sa->used.cc);
690 if(x) {
691 /*
692 * allow SB and SP to pass each other.
693 * allow SB to pass SB iff doffsets are ok
694 * anything else conflicts
695 */
696 if(x != E_MEMSP && x != E_MEMSB)
697 return 1;
698 x = sa->set.cc | sb->set.cc |
699 sa->used.cc | sb->used.cc;
700 if(x & E_MEM)
701 return 1;
702 if(offoverlap(sa, sb))
703 return 1;
704 }
705
706 return 0;
707 }
708
709 int
offoverlap(Sch * sa,Sch * sb)710 offoverlap(Sch *sa, Sch *sb)
711 {
712
713 if(sa->soffset < sb->soffset) {
714 if(sa->soffset+sa->size > sb->soffset)
715 return 1;
716 return 0;
717 }
718 if(sb->soffset+sb->size > sa->soffset)
719 return 1;
720 return 0;
721 }
722
723 /*
724 * test 2 adjacent instructions
725 * and find out if inserted instructions
726 * are desired to prevent stalls.
727 * first instruction is a load instruction.
728 */
729 int
conflict(Sch * sa,Sch * sb)730 conflict(Sch *sa, Sch *sb)
731 {
732
733 if(sa->set.ireg & sb->used.ireg)
734 return 1;
735 if(sa->set.freg & sb->used.freg)
736 return 1;
737 if(sa->set.cr & sb->used.cr)
738 return 1;
739 return 0;
740 }
741
742 int
compound(Prog * p)743 compound(Prog *p)
744 {
745 Optab *o;
746
747 o = oplook(p);
748 if(o->size != 4)
749 return 1;
750 if(p->to.type == D_REG && p->to.reg == REGSB)
751 return 1;
752 return 0;
753 }
754
755 void
dumpbits(Sch * s,Dep * d)756 dumpbits(Sch *s, Dep *d)
757 {
758 int i;
759
760 for(i=0; i<32; i++)
761 if(d->ireg & (1<<i))
762 Bprint(&bso, " R%d", i);
763 for(i=0; i<32; i++)
764 if(d->freg & (1<<i))
765 Bprint(&bso, " F%d", i);
766 for(i=0; i<32; i++)
767 if(d->cr & (1<<i))
768 Bprint(&bso, " C%d", i);
769 for(i=0; i<32; i++)
770 switch(d->cc & (1<<i)) {
771 default:
772 break;
773 case E_ICC:
774 Bprint(&bso, " ICC");
775 break;
776 case E_FCC:
777 Bprint(&bso, " FCC");
778 break;
779 case E_LR:
780 Bprint(&bso, " LR");
781 break;
782 case E_CR:
783 Bprint(&bso, " CR");
784 break;
785 case E_CTR:
786 Bprint(&bso, " CTR");
787 break;
788 case E_XER:
789 Bprint(&bso, " XER");
790 break;
791 case E_MEM:
792 Bprint(&bso, " MEM%d", s->size);
793 break;
794 case E_MEMSB:
795 Bprint(&bso, " SB%d", s->size);
796 break;
797 case E_MEMSP:
798 Bprint(&bso, " SP%d", s->size);
799 break;
800 }
801 }
802