1 #include "l.h"
2
3 enum
4 {
5 E_ICC = 1<<0,
6 E_FCC = 1<<1,
7 E_MEM = 1<<2,
8 E_MEMSP = 1<<3, /* uses offset and size */
9 E_MEMSB = 1<<4, /* uses offset and size */
10 ANYMEM = E_MEM|E_MEMSP|E_MEMSB,
11 ALL = ~0
12 };
13
14 typedef struct Sch Sch;
15 typedef struct Dep Dep;
16
17 struct Dep
18 {
19 ulong ireg;
20 ulong freg;
21 ulong cc;
22 };
23 struct Sch
24 {
25 Prog p;
26 Dep set;
27 Dep used;
28 long soffset;
29 char size;
30 char nop;
31 char comp;
32 };
33
34 void regsused(Sch*, Prog*);
35 int depend(Sch*, Sch*);
36 int conflict(Sch*, Sch*);
37 int offoverlap(Sch*, Sch*);
38 void dumpbits(Sch*, Dep*);
39
40 void
sched(Prog * p0,Prog * pe)41 sched(Prog *p0, Prog *pe)
42 {
43 Prog *p, *q;
44 Sch sch[NSCHED], *s, *t, *u, *se, stmp;
45
46 /*
47 * build side structure
48 */
49 s = sch;
50 for(p=p0;; p=p->link) {
51 memset(s, 0, sizeof(*s));
52 s->p = *p;
53 regsused(s, p);
54 if(debug['X']) {
55 Bprint(&bso, "%P\tset", &s->p);
56 dumpbits(s, &s->set);
57 Bprint(&bso, "; used");
58 dumpbits(s, &s->used);
59 if(s->comp)
60 Bprint(&bso, "; compound");
61 if(s->p.mark & LOAD)
62 Bprint(&bso, "; load");
63 if(s->p.mark & BRANCH)
64 Bprint(&bso, "; branch");
65 if(s->p.mark & FCMP)
66 Bprint(&bso, "; fcmp");
67 Bprint(&bso, "\n");
68 }
69 s++;
70 if(p == pe)
71 break;
72 }
73 se = s;
74
75 for(s=se-1; s>=sch; s--) {
76 /*
77 * branch delay slot
78 */
79 if(s->p.mark & BRANCH) {
80 /* t is the trial instruction to use */
81 for(t=s-1; t>=sch; t--) {
82 if(t->comp || (t->p.mark & FCMP))
83 goto no1;
84 for(u=t+1; u<=s; u++)
85 if(depend(u, t))
86 goto no1;
87 goto out1;
88 no1:;
89 }
90 if(debug['X'])
91 Bprint(&bso, "?b%P\n", &s->p);
92 s->nop = 1;
93 continue;
94
95 out1:
96 if(debug['X']) {
97 Bprint(&bso, "!b%P\n", &t->p);
98 Bprint(&bso, "%P\n", &s->p);
99 }
100 stmp = *t;
101 memmove(t, t+1, (uchar*)s - (uchar*)t);
102 *s = stmp;
103 s--;
104 continue;
105 }
106
107 /*
108 * load delay. interlocked.
109 */
110 if(s->p.mark & LOAD) {
111 if(s >= se-1)
112 continue;
113 if(!conflict(s, (s+1)))
114 continue;
115 /*
116 * s is load, s+1 is immediate use of result
117 * t is the trial instruction to insert between s and s+1
118 */
119 for(t=s-1; t>=sch; t--) {
120 if(t->p.mark & BRANCH)
121 goto no2;
122 if(t->p.mark & FCMP)
123 if((s+1)->p.mark & BRANCH)
124 goto no2;
125 if(t->p.mark & LOAD)
126 if(conflict(t, (s+1)))
127 goto no2;
128 for(u=t+1; u<=s; u++)
129 if(depend(u, t))
130 goto no2;
131 goto out2;
132 no2:;
133 }
134 if(debug['X'])
135 Bprint(&bso, "?l%P\n", &s->p);
136 continue;
137 out2:
138 if(debug['X']) {
139 Bprint(&bso, "!l%P\n", &t->p);
140 Bprint(&bso, "%P\n", &s->p);
141 }
142 stmp = *t;
143 memmove(t, t+1, (uchar*)s - (uchar*)t);
144 *s = stmp;
145 s--;
146 continue;
147 }
148
149 /*
150 * fop2 delay.
151 */
152 if(s->p.mark & FCMP) {
153 if(s >= se-1) {
154 s->nop = 1;
155 continue;
156 }
157 if(!((s+1)->p.mark & BRANCH))
158 continue;
159 /* t is the trial instruction to use */
160 for(t=s-1; t>=sch; t--) {
161 for(u=t+1; u<=s; u++)
162 if(depend(u, t))
163 goto no3;
164 goto out3;
165 no3:;
166 }
167 if(debug['X'])
168 Bprint(&bso, "?f%P\n", &s->p);
169 s->nop = 1;
170 continue;
171 out3:
172 if(debug['X']) {
173 Bprint(&bso, "!f%P\n", &t->p);
174 Bprint(&bso, "%P\n", &s->p);
175 }
176 stmp = *t;
177 memmove(t, t+1, (uchar*)s - (uchar*)t);
178 *s = stmp;
179 s--;
180 continue;
181 }
182 }
183
184 /*
185 * put it all back
186 */
187 for(s=sch, p=p0; s<se; s++, p=q) {
188 q = p->link;
189 if(q != s->p.link) {
190 *p = s->p;
191 p->link = q;
192 }
193 if(s->nop)
194 addnop(p);
195 }
196 if(debug['X'])
197 Bprint(&bso, "\n");
198 }
199
200 void
regsused(Sch * s,Prog * realp)201 regsused(Sch *s, Prog *realp)
202 {
203 int c, ar, ad, ld, sz;
204 ulong m;
205 Prog *p;
206
207 p = &s->p;
208 s->comp = compound(p);
209 s->nop = 0;
210 if(s->comp) {
211 s->set.ireg |= 1<<REGTMP;
212 s->used.ireg |= 1<<REGTMP;
213 }
214 ar = 0; /* dest is really reference */
215 ad = 0; /* source/dest is really address */
216 ld = 0; /* opcode is load instruction */
217 sz = 20; /* size of load/store for overlap computation */
218
219 /*
220 * flags based on opcode
221 */
222 switch(p->as) {
223 case ATEXT:
224 curtext = realp;
225 autosize = p->to.offset + 4;
226 ad = 1;
227 break;
228 case AJMPL:
229 c = p->reg;
230 if(c == NREG)
231 c = REGLINK;
232 s->set.ireg |= 1<<c;
233 ar = 1;
234 ad = 1;
235 break;
236 case AJMP:
237 ar = 1;
238 ad = 1;
239 break;
240 case ACMP:
241 s->set.cc |= E_ICC;
242 ar = 1;
243 break;
244 case AFCMPD:
245 case AFCMPED:
246 case AFCMPEF:
247 case AFCMPEX:
248 case AFCMPF:
249 case AFCMPX:
250 s->set.cc |= E_FCC;
251 ar = 1;
252 break;
253 case ABE:
254 case ABNE:
255 case ABLE:
256 case ABG:
257 case ABL:
258 case ABGE:
259 case ABLEU:
260 case ABGU:
261 case ABCS:
262 case ABCC:
263 case ABNEG:
264 case ABPOS:
265 case ABVC:
266 case ABVS:
267 s->used.cc |= E_ICC;
268 ar = 1;
269 break;
270 case AFBE:
271 case AFBLG:
272 case AFBG:
273 case AFBLE:
274 case AFBGE:
275 case AFBL:
276 s->used.cc |= E_FCC;
277 ar = 1;
278 break;
279 case AMOVB:
280 case AMOVBU:
281 sz = 1;
282 ld = 1;
283 break;
284 case AMOVH:
285 case AMOVHU:
286 sz = 2;
287 ld = 1;
288 break;
289 case AFMOVF:
290 case AMOVW:
291 sz = 4;
292 ld = 1;
293 break;
294 case AMOVD:
295 case AFMOVD:
296 sz = 8;
297 ld = 1;
298 break;
299 case AFMOVX: /* gok */
300 sz = 16;
301 ld = 1;
302 break;
303 case AADDCC:
304 case AADDXCC:
305 case AANDCC:
306 case AANDNCC:
307 case AORCC:
308 case AORNCC:
309 case ASUBCC:
310 case ASUBXCC:
311 case ATADDCC:
312 case ATADDCCTV:
313 case ATSUBCC:
314 case ATSUBCCTV:
315 case AXNORCC:
316 case AXORCC:
317 s->set.cc |= E_ICC;
318 break;
319 case ADIV:
320 case ADIVL:
321 case AMOD:
322 case AMODL:
323 case AMUL:
324 case AMULSCC:
325 case ATAS:
326 s->set.ireg = ALL;
327 s->set.freg = ALL;
328 s->set.cc = ALL;
329 break;
330 }
331
332 /*
333 * flags based on 'to' field
334 */
335 c = p->to.class;
336 if(c == 0) {
337 c = aclass(&p->to) + 1;
338 p->to.class = c;
339 }
340 c--;
341 switch(c) {
342 default:
343 print("unknown class %d %D\n", c, &p->to);
344
345 case C_ZCON:
346 case C_SCON:
347 case C_UCON:
348 case C_LCON:
349 case C_NONE:
350 case C_SBRA:
351 case C_LBRA:
352 break;
353 case C_CREG:
354 case C_FSR:
355 case C_FQ:
356 case C_PREG:
357 s->set.ireg = ALL;
358 s->set.freg = ALL;
359 s->set.cc = ALL;
360 break;
361 case C_ZOREG:
362 case C_SOREG:
363 case C_LOREG:
364 case C_ASI:
365 c = p->to.reg;
366 s->used.ireg |= 1<<c;
367 if(ad)
368 break;
369 s->size = sz;
370 s->soffset = regoff(&p->to);
371
372 m = ANYMEM;
373 if(c == REGSB)
374 m = E_MEMSB;
375 if(c == REGSP)
376 m = E_MEMSP;
377
378 if(ar)
379 s->used.cc |= m;
380 else
381 s->set.cc |= m;
382 break;
383 case C_SACON:
384 case C_LACON:
385 s->used.ireg |= 1<<REGSP;
386 break;
387 case C_SECON:
388 case C_LECON:
389 s->used.ireg |= 1<<REGSB;
390 break;
391 case C_REG:
392 if(ar)
393 s->used.ireg |= 1<<p->to.reg;
394 else
395 s->set.ireg |= 1<<p->to.reg;
396 break;
397 case C_FREG:
398 /* do better -- determine double prec */
399 if(ar) {
400 s->used.freg |= 1<<p->to.reg;
401 s->used.freg |= 1<<(p->to.reg|1);
402 } else {
403 s->set.freg |= 1<<p->to.reg;
404 s->set.freg |= 1<<(p->to.reg|1);
405 }
406 break;
407 case C_SAUTO:
408 case C_LAUTO:
409 case C_ESAUTO:
410 case C_OSAUTO:
411 case C_ELAUTO:
412 case C_OLAUTO:
413 s->used.ireg |= 1<<REGSP;
414 if(ad)
415 break;
416 s->size = sz;
417 s->soffset = regoff(&p->to);
418
419 if(ar)
420 s->used.cc |= E_MEMSP;
421 else
422 s->set.cc |= E_MEMSP;
423 break;
424 case C_SEXT:
425 case C_LEXT:
426 case C_ESEXT:
427 case C_OSEXT:
428 case C_ELEXT:
429 case C_OLEXT:
430 s->used.ireg |= 1<<REGSB;
431 if(ad)
432 break;
433 s->size = sz;
434 s->soffset = regoff(&p->to);
435
436 if(ar)
437 s->used.cc |= E_MEMSB;
438 else
439 s->set.cc |= E_MEMSB;
440 break;
441 }
442
443 /*
444 * flags based on 'from' field
445 */
446 c = p->from.class;
447 if(c == 0) {
448 c = aclass(&p->from) + 1;
449 p->from.class = c;
450 }
451 c--;
452 switch(c) {
453 default:
454 print("unknown class %d %D\n", c, &p->from);
455
456 case C_ZCON:
457 case C_SCON:
458 case C_UCON:
459 case C_LCON:
460 case C_NONE:
461 case C_SBRA:
462 case C_LBRA:
463 c = p->from.reg;
464 if(c != NREG)
465 s->used.ireg |= 1<<c;
466 break;
467 case C_CREG:
468 case C_FSR:
469 case C_FQ:
470 case C_PREG:
471 s->set.ireg = ALL;
472 s->set.freg = ALL;
473 s->set.cc = ALL;
474 break;
475 case C_ZOREG:
476 case C_SOREG:
477 case C_LOREG:
478 case C_ASI:
479 c = p->from.reg;
480 s->used.ireg |= 1<<c;
481 if(ld)
482 p->mark |= LOAD;
483 if(ad)
484 break;
485 s->size = sz;
486 s->soffset = regoff(&p->from);
487
488 m = ANYMEM;
489 if(c == REGSB)
490 m = E_MEMSB;
491 if(c == REGSP)
492 m = E_MEMSP;
493
494 s->used.cc |= m;
495 break;
496 case C_SACON:
497 case C_LACON:
498 s->used.ireg |= 1<<REGSP;
499 break;
500 case C_SECON:
501 case C_LECON:
502 s->used.ireg |= 1<<REGSB;
503 break;
504 case C_REG:
505 s->used.ireg |= 1<<p->from.reg;
506 break;
507 case C_FREG:
508 /* do better -- determine double prec */
509 s->used.freg |= 1<<p->from.reg;
510 s->used.freg |= 1<<(p->from.reg|1);
511 break;
512 case C_SAUTO:
513 case C_LAUTO:
514 case C_ESAUTO:
515 case C_ELAUTO:
516 case C_OSAUTO:
517 case C_OLAUTO:
518 s->used.ireg |= 1<<REGSP;
519 if(ld)
520 p->mark |= LOAD;
521 if(ad)
522 break;
523 s->size = sz;
524 s->soffset = regoff(&p->from);
525
526 s->used.cc |= E_MEMSP;
527 break;
528 case C_SEXT:
529 case C_LEXT:
530 case C_ESEXT:
531 case C_ELEXT:
532 case C_OSEXT:
533 case C_OLEXT:
534 s->used.ireg |= 1<<REGSB;
535 if(ld)
536 p->mark |= LOAD;
537 if(ad)
538 break;
539 s->size = sz;
540 s->soffset = regoff(&p->from);
541
542 s->used.cc |= E_MEMSB;
543 break;
544 }
545
546 c = p->reg;
547 if(c != NREG) {
548 if(p->from.type == D_FREG || p->to.type == D_FREG) {
549 s->used.freg |= 1<<c;
550 s->used.freg |= 1<<(c|1);
551 } else
552 s->used.ireg |= 1<<c;
553 }
554 s->set.ireg &= ~(1<<0); /* R0 cant be set */
555 }
556
557 /*
558 * test to see if 2 instrictions can be
559 * interchanged without changing semantics
560 */
561 int
depend(Sch * sa,Sch * sb)562 depend(Sch *sa, Sch *sb)
563 {
564 ulong x;
565
566 if(sa->set.ireg & (sb->set.ireg|sb->used.ireg))
567 return 1;
568 if(sb->set.ireg & sa->used.ireg)
569 return 1;
570
571 if(sa->set.freg & (sb->set.freg|sb->used.freg))
572 return 1;
573 if(sb->set.freg & sa->used.freg)
574 return 1;
575
576 x = (sa->set.cc & (sb->set.cc|sb->used.cc)) |
577 (sb->set.cc & sa->used.cc);
578 if(x) {
579 /*
580 * allow SB and SP to pass each other.
581 * allow SB to pass SB iff doffsets are ok
582 * anything else conflicts
583 */
584 if(x != E_MEMSP && x != E_MEMSB)
585 return 1;
586 x = sa->set.cc | sb->set.cc |
587 sa->used.cc | sb->used.cc;
588 if(x & E_MEM)
589 return 1;
590 if(offoverlap(sa, sb))
591 return 1;
592 }
593
594 return 0;
595 }
596
597 int
offoverlap(Sch * sa,Sch * sb)598 offoverlap(Sch *sa, Sch *sb)
599 {
600
601 if(sa->soffset < sb->soffset) {
602 if(sa->soffset+sa->size > sb->soffset)
603 return 1;
604 return 0;
605 }
606 if(sb->soffset+sb->size > sa->soffset)
607 return 1;
608 return 0;
609 }
610
611 /*
612 * test 2 adjacent instructions
613 * and find out if inserted instructions
614 * are desired to prevent stalls.
615 * first instruction is a load instruction.
616 */
617 int
conflict(Sch * sa,Sch * sb)618 conflict(Sch *sa, Sch *sb)
619 {
620
621 if(sa->set.ireg & sb->used.ireg)
622 return 1;
623 if(sa->set.freg & sb->used.freg)
624 return 1;
625 return 0;
626 }
627
628 int
compound(Prog * p)629 compound(Prog *p)
630 {
631 Optab *o;
632
633 o = oplook(p);
634 if(o->size != 4)
635 return 1;
636 if(p->to.type == D_REG && p->to.reg == REGSB)
637 return 1;
638 return 0;
639 }
640
641 void
dumpbits(Sch * s,Dep * d)642 dumpbits(Sch *s, Dep *d)
643 {
644 int i;
645
646 for(i=0; i<32; i++)
647 if(d->ireg & (1<<i))
648 Bprint(&bso, " R%d", i);
649 for(i=0; i<32; i++)
650 if(d->freg & (1<<i))
651 Bprint(&bso, " F%d", i);
652 for(i=0; i<32; i++)
653 switch(d->cc & (1<<i)) {
654 default:
655 break;
656 case E_ICC:
657 Bprint(&bso, " ICC");
658 break;
659 case E_FCC:
660 Bprint(&bso, " FCC");
661 break;
662 case E_MEM:
663 Bprint(&bso, " MEM%d", s->size);
664 break;
665 case E_MEMSB:
666 Bprint(&bso, " SB%d", s->size);
667 break;
668 case E_MEMSP:
669 Bprint(&bso, " SP%d", s->size);
670 break;
671 }
672 }
673