1*74a4d8c2SCharles.Forsyth #include "l.h"
2*74a4d8c2SCharles.Forsyth
3*74a4d8c2SCharles.Forsyth enum
4*74a4d8c2SCharles.Forsyth {
5*74a4d8c2SCharles.Forsyth E_HILO = 1<<0,
6*74a4d8c2SCharles.Forsyth E_FCR = 1<<1,
7*74a4d8c2SCharles.Forsyth E_MCR = 1<<2,
8*74a4d8c2SCharles.Forsyth E_MEM = 1<<3,
9*74a4d8c2SCharles.Forsyth E_MEMSP = 1<<4, /* uses offset and size */
10*74a4d8c2SCharles.Forsyth E_MEMSB = 1<<5, /* uses offset and size */
11*74a4d8c2SCharles.Forsyth ANYMEM = E_MEM|E_MEMSP|E_MEMSB,
12*74a4d8c2SCharles.Forsyth DELAY = BRANCH|LOAD|FCMP,
13*74a4d8c2SCharles.Forsyth };
14*74a4d8c2SCharles.Forsyth
15*74a4d8c2SCharles.Forsyth typedef struct Sch Sch;
16*74a4d8c2SCharles.Forsyth typedef struct Dep Dep;
17*74a4d8c2SCharles.Forsyth
18*74a4d8c2SCharles.Forsyth struct Dep
19*74a4d8c2SCharles.Forsyth {
20*74a4d8c2SCharles.Forsyth ulong ireg;
21*74a4d8c2SCharles.Forsyth ulong freg;
22*74a4d8c2SCharles.Forsyth ulong cc;
23*74a4d8c2SCharles.Forsyth };
24*74a4d8c2SCharles.Forsyth struct Sch
25*74a4d8c2SCharles.Forsyth {
26*74a4d8c2SCharles.Forsyth Prog p;
27*74a4d8c2SCharles.Forsyth Dep set;
28*74a4d8c2SCharles.Forsyth Dep used;
29*74a4d8c2SCharles.Forsyth long soffset;
30*74a4d8c2SCharles.Forsyth char size;
31*74a4d8c2SCharles.Forsyth char nop;
32*74a4d8c2SCharles.Forsyth char comp;
33*74a4d8c2SCharles.Forsyth };
34*74a4d8c2SCharles.Forsyth
35*74a4d8c2SCharles.Forsyth void regsused(Sch*, Prog*);
36*74a4d8c2SCharles.Forsyth int depend(Sch*, Sch*);
37*74a4d8c2SCharles.Forsyth int conflict(Sch*, Sch*);
38*74a4d8c2SCharles.Forsyth int offoverlap(Sch*, Sch*);
39*74a4d8c2SCharles.Forsyth void dumpbits(Sch*, Dep*);
40*74a4d8c2SCharles.Forsyth
41*74a4d8c2SCharles.Forsyth void
sched(Prog * p0,Prog * pe)42*74a4d8c2SCharles.Forsyth sched(Prog *p0, Prog *pe)
43*74a4d8c2SCharles.Forsyth {
44*74a4d8c2SCharles.Forsyth Prog *p, *q;
45*74a4d8c2SCharles.Forsyth Sch sch[NSCHED], *s, *t, *u, *se, stmp;
46*74a4d8c2SCharles.Forsyth
47*74a4d8c2SCharles.Forsyth /*
48*74a4d8c2SCharles.Forsyth * build side structure
49*74a4d8c2SCharles.Forsyth */
50*74a4d8c2SCharles.Forsyth s = sch;
51*74a4d8c2SCharles.Forsyth for(p=p0;; p=p->link) {
52*74a4d8c2SCharles.Forsyth memset(s, 0, sizeof(*s));
53*74a4d8c2SCharles.Forsyth s->p = *p;
54*74a4d8c2SCharles.Forsyth regsused(s, p);
55*74a4d8c2SCharles.Forsyth if(debug['X']) {
56*74a4d8c2SCharles.Forsyth Bprint(&bso, "%P\t\tset", &s->p);
57*74a4d8c2SCharles.Forsyth dumpbits(s, &s->set);
58*74a4d8c2SCharles.Forsyth Bprint(&bso, "; used");
59*74a4d8c2SCharles.Forsyth dumpbits(s, &s->used);
60*74a4d8c2SCharles.Forsyth if(s->comp)
61*74a4d8c2SCharles.Forsyth Bprint(&bso, "; compound");
62*74a4d8c2SCharles.Forsyth if(s->p.mark & LOAD)
63*74a4d8c2SCharles.Forsyth Bprint(&bso, "; load");
64*74a4d8c2SCharles.Forsyth if(s->p.mark & BRANCH)
65*74a4d8c2SCharles.Forsyth Bprint(&bso, "; branch");
66*74a4d8c2SCharles.Forsyth if(s->p.mark & FCMP)
67*74a4d8c2SCharles.Forsyth Bprint(&bso, "; fcmp");
68*74a4d8c2SCharles.Forsyth Bprint(&bso, "\n");
69*74a4d8c2SCharles.Forsyth }
70*74a4d8c2SCharles.Forsyth if(p == pe)
71*74a4d8c2SCharles.Forsyth break;
72*74a4d8c2SCharles.Forsyth s++;
73*74a4d8c2SCharles.Forsyth }
74*74a4d8c2SCharles.Forsyth se = s;
75*74a4d8c2SCharles.Forsyth
76*74a4d8c2SCharles.Forsyth /*
77*74a4d8c2SCharles.Forsyth * prepass to move things around
78*74a4d8c2SCharles.Forsyth * does nothing, but tries to make
79*74a4d8c2SCharles.Forsyth * the actual scheduler work better
80*74a4d8c2SCharles.Forsyth */
81*74a4d8c2SCharles.Forsyth for(s=sch; s<=se; s++) {
82*74a4d8c2SCharles.Forsyth if(!(s->p.mark & LOAD))
83*74a4d8c2SCharles.Forsyth continue;
84*74a4d8c2SCharles.Forsyth /* always good to put nonconflict loads together */
85*74a4d8c2SCharles.Forsyth for(t=s+1; t<=se; t++) {
86*74a4d8c2SCharles.Forsyth if(!(t->p.mark & LOAD))
87*74a4d8c2SCharles.Forsyth continue;
88*74a4d8c2SCharles.Forsyth if(t->p.mark & BRANCH)
89*74a4d8c2SCharles.Forsyth break;
90*74a4d8c2SCharles.Forsyth if(conflict(s, t))
91*74a4d8c2SCharles.Forsyth break;
92*74a4d8c2SCharles.Forsyth for(u=t-1; u>s; u--)
93*74a4d8c2SCharles.Forsyth if(depend(u, t))
94*74a4d8c2SCharles.Forsyth goto no11;
95*74a4d8c2SCharles.Forsyth u = s+1;
96*74a4d8c2SCharles.Forsyth stmp = *t;
97*74a4d8c2SCharles.Forsyth memmove(s+2, u, (uchar*)t - (uchar*)u);
98*74a4d8c2SCharles.Forsyth *u = stmp;
99*74a4d8c2SCharles.Forsyth break;
100*74a4d8c2SCharles.Forsyth }
101*74a4d8c2SCharles.Forsyth no11:
102*74a4d8c2SCharles.Forsyth
103*74a4d8c2SCharles.Forsyth /* put schedule fodder above load */
104*74a4d8c2SCharles.Forsyth for(t=s+1; t<=se; t++) {
105*74a4d8c2SCharles.Forsyth if(t->p.mark & BRANCH)
106*74a4d8c2SCharles.Forsyth break;
107*74a4d8c2SCharles.Forsyth if(s > sch && conflict(s-1, t))
108*74a4d8c2SCharles.Forsyth continue;
109*74a4d8c2SCharles.Forsyth for(u=t-1; u>=s; u--)
110*74a4d8c2SCharles.Forsyth if(depend(t, u))
111*74a4d8c2SCharles.Forsyth goto no1;
112*74a4d8c2SCharles.Forsyth stmp = *t;
113*74a4d8c2SCharles.Forsyth memmove(s+1, s, (uchar*)t - (uchar*)s);
114*74a4d8c2SCharles.Forsyth *s = stmp;
115*74a4d8c2SCharles.Forsyth if(!(s->p.mark & LOAD))
116*74a4d8c2SCharles.Forsyth break;
117*74a4d8c2SCharles.Forsyth no1:;
118*74a4d8c2SCharles.Forsyth }
119*74a4d8c2SCharles.Forsyth }
120*74a4d8c2SCharles.Forsyth
121*74a4d8c2SCharles.Forsyth for(s=se; s>=sch; s--) {
122*74a4d8c2SCharles.Forsyth if(!(s->p.mark & DELAY))
123*74a4d8c2SCharles.Forsyth continue;
124*74a4d8c2SCharles.Forsyth if(s < se)
125*74a4d8c2SCharles.Forsyth if(!conflict(s, s+1))
126*74a4d8c2SCharles.Forsyth goto out3;
127*74a4d8c2SCharles.Forsyth /*
128*74a4d8c2SCharles.Forsyth * s is load, s+1 is immediate use of result or end of block
129*74a4d8c2SCharles.Forsyth * t is the trial instruction to insert between s and s+1
130*74a4d8c2SCharles.Forsyth */
131*74a4d8c2SCharles.Forsyth if(!debug['Y'])
132*74a4d8c2SCharles.Forsyth for(t=s-1; t>=sch; t--) {
133*74a4d8c2SCharles.Forsyth if(t->comp)
134*74a4d8c2SCharles.Forsyth if(s->p.mark & BRANCH)
135*74a4d8c2SCharles.Forsyth goto no2;
136*74a4d8c2SCharles.Forsyth if(t->p.mark & DELAY)
137*74a4d8c2SCharles.Forsyth if(s >= se || conflict(t, s+1))
138*74a4d8c2SCharles.Forsyth goto no2;
139*74a4d8c2SCharles.Forsyth for(u=t+1; u<=s; u++)
140*74a4d8c2SCharles.Forsyth if(depend(u, t))
141*74a4d8c2SCharles.Forsyth goto no2;
142*74a4d8c2SCharles.Forsyth goto out2;
143*74a4d8c2SCharles.Forsyth no2:;
144*74a4d8c2SCharles.Forsyth }
145*74a4d8c2SCharles.Forsyth if(debug['X'])
146*74a4d8c2SCharles.Forsyth Bprint(&bso, "?l%P\n", &s->p);
147*74a4d8c2SCharles.Forsyth s->nop = 1;
148*74a4d8c2SCharles.Forsyth if(debug['v']) {
149*74a4d8c2SCharles.Forsyth if(s->p.mark & LOAD) {
150*74a4d8c2SCharles.Forsyth nop.load.count++;
151*74a4d8c2SCharles.Forsyth nop.load.outof++;
152*74a4d8c2SCharles.Forsyth }
153*74a4d8c2SCharles.Forsyth if(s->p.mark & BRANCH) {
154*74a4d8c2SCharles.Forsyth nop.branch.count++;
155*74a4d8c2SCharles.Forsyth nop.branch.outof++;
156*74a4d8c2SCharles.Forsyth }
157*74a4d8c2SCharles.Forsyth if(s->p.mark & FCMP) {
158*74a4d8c2SCharles.Forsyth nop.fcmp.count++;
159*74a4d8c2SCharles.Forsyth nop.fcmp.outof++;
160*74a4d8c2SCharles.Forsyth }
161*74a4d8c2SCharles.Forsyth }
162*74a4d8c2SCharles.Forsyth continue;
163*74a4d8c2SCharles.Forsyth
164*74a4d8c2SCharles.Forsyth out2:
165*74a4d8c2SCharles.Forsyth if(debug['X']) {
166*74a4d8c2SCharles.Forsyth Bprint(&bso, "!l%P\n", &t->p);
167*74a4d8c2SCharles.Forsyth Bprint(&bso, "%P\n", &s->p);
168*74a4d8c2SCharles.Forsyth }
169*74a4d8c2SCharles.Forsyth stmp = *t;
170*74a4d8c2SCharles.Forsyth memmove(t, t+1, (uchar*)s - (uchar*)t);
171*74a4d8c2SCharles.Forsyth *s = stmp;
172*74a4d8c2SCharles.Forsyth s--;
173*74a4d8c2SCharles.Forsyth
174*74a4d8c2SCharles.Forsyth out3:
175*74a4d8c2SCharles.Forsyth if(debug['v']) {
176*74a4d8c2SCharles.Forsyth if(s->p.mark & LOAD)
177*74a4d8c2SCharles.Forsyth nop.load.outof++;
178*74a4d8c2SCharles.Forsyth if(s->p.mark & BRANCH)
179*74a4d8c2SCharles.Forsyth nop.branch.outof++;
180*74a4d8c2SCharles.Forsyth if(s->p.mark & FCMP)
181*74a4d8c2SCharles.Forsyth nop.fcmp.outof++;
182*74a4d8c2SCharles.Forsyth }
183*74a4d8c2SCharles.Forsyth }
184*74a4d8c2SCharles.Forsyth
185*74a4d8c2SCharles.Forsyth /* Avoid HI/LO use->set */
186*74a4d8c2SCharles.Forsyth t = sch+1;
187*74a4d8c2SCharles.Forsyth for(s=sch; s<se-1; s++, t++) {
188*74a4d8c2SCharles.Forsyth if((s->used.cc & E_HILO) == 0)
189*74a4d8c2SCharles.Forsyth continue;
190*74a4d8c2SCharles.Forsyth if(t->set.cc & E_HILO)
191*74a4d8c2SCharles.Forsyth s->nop = 2;
192*74a4d8c2SCharles.Forsyth }
193*74a4d8c2SCharles.Forsyth
194*74a4d8c2SCharles.Forsyth /*
195*74a4d8c2SCharles.Forsyth * put it all back
196*74a4d8c2SCharles.Forsyth */
197*74a4d8c2SCharles.Forsyth for(s=sch, p=p0; s<=se; s++, p=q) {
198*74a4d8c2SCharles.Forsyth q = p->link;
199*74a4d8c2SCharles.Forsyth if(q != s->p.link) {
200*74a4d8c2SCharles.Forsyth *p = s->p;
201*74a4d8c2SCharles.Forsyth p->link = q;
202*74a4d8c2SCharles.Forsyth }
203*74a4d8c2SCharles.Forsyth while(s->nop--)
204*74a4d8c2SCharles.Forsyth addnop(p);
205*74a4d8c2SCharles.Forsyth }
206*74a4d8c2SCharles.Forsyth if(debug['X']) {
207*74a4d8c2SCharles.Forsyth Bprint(&bso, "\n");
208*74a4d8c2SCharles.Forsyth Bflush(&bso);
209*74a4d8c2SCharles.Forsyth }
210*74a4d8c2SCharles.Forsyth }
211*74a4d8c2SCharles.Forsyth
212*74a4d8c2SCharles.Forsyth void
regsused(Sch * s,Prog * realp)213*74a4d8c2SCharles.Forsyth regsused(Sch *s, Prog *realp)
214*74a4d8c2SCharles.Forsyth {
215*74a4d8c2SCharles.Forsyth int c, ar, ad, ld, sz;
216*74a4d8c2SCharles.Forsyth ulong m;
217*74a4d8c2SCharles.Forsyth Prog *p;
218*74a4d8c2SCharles.Forsyth
219*74a4d8c2SCharles.Forsyth p = &s->p;
220*74a4d8c2SCharles.Forsyth s->comp = compound(p);
221*74a4d8c2SCharles.Forsyth s->nop = 0;
222*74a4d8c2SCharles.Forsyth if(s->comp) {
223*74a4d8c2SCharles.Forsyth s->set.ireg |= 1<<REGTMP;
224*74a4d8c2SCharles.Forsyth s->used.ireg |= 1<<REGTMP;
225*74a4d8c2SCharles.Forsyth }
226*74a4d8c2SCharles.Forsyth
227*74a4d8c2SCharles.Forsyth ar = 0; /* dest is really reference */
228*74a4d8c2SCharles.Forsyth ad = 0; /* source/dest is really address */
229*74a4d8c2SCharles.Forsyth ld = 0; /* opcode is load instruction */
230*74a4d8c2SCharles.Forsyth sz = 20; /* size of load/store for overlap computation */
231*74a4d8c2SCharles.Forsyth
232*74a4d8c2SCharles.Forsyth /*
233*74a4d8c2SCharles.Forsyth * flags based on opcode
234*74a4d8c2SCharles.Forsyth */
235*74a4d8c2SCharles.Forsyth switch(p->as) {
236*74a4d8c2SCharles.Forsyth case ATEXT:
237*74a4d8c2SCharles.Forsyth curtext = realp;
238*74a4d8c2SCharles.Forsyth autosize = p->to.offset + 4;
239*74a4d8c2SCharles.Forsyth ad = 1;
240*74a4d8c2SCharles.Forsyth break;
241*74a4d8c2SCharles.Forsyth case AJAL:
242*74a4d8c2SCharles.Forsyth c = p->reg;
243*74a4d8c2SCharles.Forsyth if(c == NREG)
244*74a4d8c2SCharles.Forsyth c = REGLINK;
245*74a4d8c2SCharles.Forsyth s->set.ireg |= 1<<c;
246*74a4d8c2SCharles.Forsyth ar = 1;
247*74a4d8c2SCharles.Forsyth ad = 1;
248*74a4d8c2SCharles.Forsyth break;
249*74a4d8c2SCharles.Forsyth case ABGEZAL:
250*74a4d8c2SCharles.Forsyth case ABLTZAL:
251*74a4d8c2SCharles.Forsyth s->set.ireg |= 1<<REGLINK;
252*74a4d8c2SCharles.Forsyth case ABEQ:
253*74a4d8c2SCharles.Forsyth case ABGEZ:
254*74a4d8c2SCharles.Forsyth case ABGTZ:
255*74a4d8c2SCharles.Forsyth case ABLEZ:
256*74a4d8c2SCharles.Forsyth case ABLTZ:
257*74a4d8c2SCharles.Forsyth case ABNE:
258*74a4d8c2SCharles.Forsyth ar = 1;
259*74a4d8c2SCharles.Forsyth ad = 1;
260*74a4d8c2SCharles.Forsyth break;
261*74a4d8c2SCharles.Forsyth case ABFPT:
262*74a4d8c2SCharles.Forsyth case ABFPF:
263*74a4d8c2SCharles.Forsyth ad = 1;
264*74a4d8c2SCharles.Forsyth s->used.cc |= E_FCR;
265*74a4d8c2SCharles.Forsyth break;
266*74a4d8c2SCharles.Forsyth case ACMPEQD:
267*74a4d8c2SCharles.Forsyth case ACMPEQF:
268*74a4d8c2SCharles.Forsyth case ACMPGED:
269*74a4d8c2SCharles.Forsyth case ACMPGEF:
270*74a4d8c2SCharles.Forsyth case ACMPGTD:
271*74a4d8c2SCharles.Forsyth case ACMPGTF:
272*74a4d8c2SCharles.Forsyth ar = 1;
273*74a4d8c2SCharles.Forsyth s->set.cc |= E_FCR;
274*74a4d8c2SCharles.Forsyth p->mark |= FCMP;
275*74a4d8c2SCharles.Forsyth break;
276*74a4d8c2SCharles.Forsyth case AJMP:
277*74a4d8c2SCharles.Forsyth ar = 1;
278*74a4d8c2SCharles.Forsyth ad = 1;
279*74a4d8c2SCharles.Forsyth break;
280*74a4d8c2SCharles.Forsyth case AMOVB:
281*74a4d8c2SCharles.Forsyth case AMOVBU:
282*74a4d8c2SCharles.Forsyth sz = 1;
283*74a4d8c2SCharles.Forsyth ld = 1;
284*74a4d8c2SCharles.Forsyth break;
285*74a4d8c2SCharles.Forsyth case AMOVH:
286*74a4d8c2SCharles.Forsyth case AMOVHU:
287*74a4d8c2SCharles.Forsyth sz = 2;
288*74a4d8c2SCharles.Forsyth ld = 1;
289*74a4d8c2SCharles.Forsyth break;
290*74a4d8c2SCharles.Forsyth case AMOVF:
291*74a4d8c2SCharles.Forsyth case AMOVW:
292*74a4d8c2SCharles.Forsyth case AMOVWL:
293*74a4d8c2SCharles.Forsyth case AMOVWR:
294*74a4d8c2SCharles.Forsyth sz = 4;
295*74a4d8c2SCharles.Forsyth ld = 1;
296*74a4d8c2SCharles.Forsyth break;
297*74a4d8c2SCharles.Forsyth case AMOVD:
298*74a4d8c2SCharles.Forsyth case AMOVV:
299*74a4d8c2SCharles.Forsyth case AMOVVL:
300*74a4d8c2SCharles.Forsyth case AMOVVR:
301*74a4d8c2SCharles.Forsyth sz = 8;
302*74a4d8c2SCharles.Forsyth ld = 1;
303*74a4d8c2SCharles.Forsyth break;
304*74a4d8c2SCharles.Forsyth case ADIV:
305*74a4d8c2SCharles.Forsyth case ADIVU:
306*74a4d8c2SCharles.Forsyth case AMUL:
307*74a4d8c2SCharles.Forsyth case AMULU:
308*74a4d8c2SCharles.Forsyth case AREM:
309*74a4d8c2SCharles.Forsyth case AREMU:
310*74a4d8c2SCharles.Forsyth s->set.cc = E_HILO;
311*74a4d8c2SCharles.Forsyth case AADD:
312*74a4d8c2SCharles.Forsyth case AADDU:
313*74a4d8c2SCharles.Forsyth case AAND:
314*74a4d8c2SCharles.Forsyth case ANOR:
315*74a4d8c2SCharles.Forsyth case AOR:
316*74a4d8c2SCharles.Forsyth case ASGT:
317*74a4d8c2SCharles.Forsyth case ASGTU:
318*74a4d8c2SCharles.Forsyth case ASLL:
319*74a4d8c2SCharles.Forsyth case ASRA:
320*74a4d8c2SCharles.Forsyth case ASRL:
321*74a4d8c2SCharles.Forsyth case ASUB:
322*74a4d8c2SCharles.Forsyth case ASUBU:
323*74a4d8c2SCharles.Forsyth case AXOR:
324*74a4d8c2SCharles.Forsyth
325*74a4d8c2SCharles.Forsyth case AADDD:
326*74a4d8c2SCharles.Forsyth case AADDF:
327*74a4d8c2SCharles.Forsyth case AADDW:
328*74a4d8c2SCharles.Forsyth case ASUBD:
329*74a4d8c2SCharles.Forsyth case ASUBF:
330*74a4d8c2SCharles.Forsyth case ASUBW:
331*74a4d8c2SCharles.Forsyth case AMULF:
332*74a4d8c2SCharles.Forsyth case AMULD:
333*74a4d8c2SCharles.Forsyth case AMULW:
334*74a4d8c2SCharles.Forsyth case ADIVF:
335*74a4d8c2SCharles.Forsyth case ADIVD:
336*74a4d8c2SCharles.Forsyth case ADIVW:
337*74a4d8c2SCharles.Forsyth if(p->reg == NREG) {
338*74a4d8c2SCharles.Forsyth if(p->to.type == D_REG || p->to.type == D_FREG)
339*74a4d8c2SCharles.Forsyth p->reg = p->to.reg;
340*74a4d8c2SCharles.Forsyth if(p->reg == NREG)
341*74a4d8c2SCharles.Forsyth print("botch %P\n", p);
342*74a4d8c2SCharles.Forsyth }
343*74a4d8c2SCharles.Forsyth break;
344*74a4d8c2SCharles.Forsyth }
345*74a4d8c2SCharles.Forsyth
346*74a4d8c2SCharles.Forsyth /*
347*74a4d8c2SCharles.Forsyth * flags based on 'to' field
348*74a4d8c2SCharles.Forsyth */
349*74a4d8c2SCharles.Forsyth c = p->to.class;
350*74a4d8c2SCharles.Forsyth if(c == 0) {
351*74a4d8c2SCharles.Forsyth c = aclass(&p->to) + 1;
352*74a4d8c2SCharles.Forsyth p->to.class = c;
353*74a4d8c2SCharles.Forsyth }
354*74a4d8c2SCharles.Forsyth c--;
355*74a4d8c2SCharles.Forsyth switch(c) {
356*74a4d8c2SCharles.Forsyth default:
357*74a4d8c2SCharles.Forsyth print("unknown class %d %D\n", c, &p->to);
358*74a4d8c2SCharles.Forsyth
359*74a4d8c2SCharles.Forsyth case C_ZCON:
360*74a4d8c2SCharles.Forsyth case C_SCON:
361*74a4d8c2SCharles.Forsyth case C_ADD0CON:
362*74a4d8c2SCharles.Forsyth case C_AND0CON:
363*74a4d8c2SCharles.Forsyth case C_ADDCON:
364*74a4d8c2SCharles.Forsyth case C_ANDCON:
365*74a4d8c2SCharles.Forsyth case C_UCON:
366*74a4d8c2SCharles.Forsyth case C_LCON:
367*74a4d8c2SCharles.Forsyth case C_NONE:
368*74a4d8c2SCharles.Forsyth case C_SBRA:
369*74a4d8c2SCharles.Forsyth case C_LBRA:
370*74a4d8c2SCharles.Forsyth break;
371*74a4d8c2SCharles.Forsyth
372*74a4d8c2SCharles.Forsyth case C_HI:
373*74a4d8c2SCharles.Forsyth case C_LO:
374*74a4d8c2SCharles.Forsyth s->set.cc |= E_HILO;
375*74a4d8c2SCharles.Forsyth break;
376*74a4d8c2SCharles.Forsyth case C_FCREG:
377*74a4d8c2SCharles.Forsyth s->set.cc |= E_FCR;
378*74a4d8c2SCharles.Forsyth break;
379*74a4d8c2SCharles.Forsyth case C_MREG:
380*74a4d8c2SCharles.Forsyth s->set.cc |= E_MCR;
381*74a4d8c2SCharles.Forsyth break;
382*74a4d8c2SCharles.Forsyth case C_ZOREG:
383*74a4d8c2SCharles.Forsyth case C_SOREG:
384*74a4d8c2SCharles.Forsyth case C_LOREG:
385*74a4d8c2SCharles.Forsyth c = p->to.reg;
386*74a4d8c2SCharles.Forsyth s->used.ireg |= 1<<c;
387*74a4d8c2SCharles.Forsyth if(ad)
388*74a4d8c2SCharles.Forsyth break;
389*74a4d8c2SCharles.Forsyth s->size = sz;
390*74a4d8c2SCharles.Forsyth s->soffset = regoff(&p->to);
391*74a4d8c2SCharles.Forsyth
392*74a4d8c2SCharles.Forsyth m = ANYMEM;
393*74a4d8c2SCharles.Forsyth if(c == REGSB)
394*74a4d8c2SCharles.Forsyth m = E_MEMSB;
395*74a4d8c2SCharles.Forsyth if(c == REGSP)
396*74a4d8c2SCharles.Forsyth m = E_MEMSP;
397*74a4d8c2SCharles.Forsyth
398*74a4d8c2SCharles.Forsyth if(ar)
399*74a4d8c2SCharles.Forsyth s->used.cc |= m;
400*74a4d8c2SCharles.Forsyth else
401*74a4d8c2SCharles.Forsyth s->set.cc |= m;
402*74a4d8c2SCharles.Forsyth break;
403*74a4d8c2SCharles.Forsyth case C_SACON:
404*74a4d8c2SCharles.Forsyth case C_LACON:
405*74a4d8c2SCharles.Forsyth s->used.ireg |= 1<<REGSP;
406*74a4d8c2SCharles.Forsyth break;
407*74a4d8c2SCharles.Forsyth case C_SECON:
408*74a4d8c2SCharles.Forsyth case C_LECON:
409*74a4d8c2SCharles.Forsyth s->used.ireg |= 1<<REGSB;
410*74a4d8c2SCharles.Forsyth break;
411*74a4d8c2SCharles.Forsyth case C_REG:
412*74a4d8c2SCharles.Forsyth if(ar)
413*74a4d8c2SCharles.Forsyth s->used.ireg |= 1<<p->to.reg;
414*74a4d8c2SCharles.Forsyth else
415*74a4d8c2SCharles.Forsyth s->set.ireg |= 1<<p->to.reg;
416*74a4d8c2SCharles.Forsyth break;
417*74a4d8c2SCharles.Forsyth case C_FREG:
418*74a4d8c2SCharles.Forsyth /* do better -- determine double prec */
419*74a4d8c2SCharles.Forsyth if(ar) {
420*74a4d8c2SCharles.Forsyth s->used.freg |= 1<<p->to.reg;
421*74a4d8c2SCharles.Forsyth s->used.freg |= 1<<(p->to.reg|1);
422*74a4d8c2SCharles.Forsyth } else {
423*74a4d8c2SCharles.Forsyth s->set.freg |= 1<<p->to.reg;
424*74a4d8c2SCharles.Forsyth s->set.freg |= 1<<(p->to.reg|1);
425*74a4d8c2SCharles.Forsyth }
426*74a4d8c2SCharles.Forsyth if(ld && p->from.type == D_REG)
427*74a4d8c2SCharles.Forsyth p->mark |= LOAD;
428*74a4d8c2SCharles.Forsyth break;
429*74a4d8c2SCharles.Forsyth case C_SAUTO:
430*74a4d8c2SCharles.Forsyth case C_LAUTO:
431*74a4d8c2SCharles.Forsyth s->used.ireg |= 1<<REGSP;
432*74a4d8c2SCharles.Forsyth if(ad)
433*74a4d8c2SCharles.Forsyth break;
434*74a4d8c2SCharles.Forsyth s->size = sz;
435*74a4d8c2SCharles.Forsyth s->soffset = regoff(&p->to);
436*74a4d8c2SCharles.Forsyth
437*74a4d8c2SCharles.Forsyth if(ar)
438*74a4d8c2SCharles.Forsyth s->used.cc |= E_MEMSP;
439*74a4d8c2SCharles.Forsyth else
440*74a4d8c2SCharles.Forsyth s->set.cc |= E_MEMSP;
441*74a4d8c2SCharles.Forsyth break;
442*74a4d8c2SCharles.Forsyth case C_SEXT:
443*74a4d8c2SCharles.Forsyth case C_LEXT:
444*74a4d8c2SCharles.Forsyth s->used.ireg |= 1<<REGSB;
445*74a4d8c2SCharles.Forsyth if(ad)
446*74a4d8c2SCharles.Forsyth break;
447*74a4d8c2SCharles.Forsyth s->size = sz;
448*74a4d8c2SCharles.Forsyth s->soffset = regoff(&p->to);
449*74a4d8c2SCharles.Forsyth
450*74a4d8c2SCharles.Forsyth if(ar)
451*74a4d8c2SCharles.Forsyth s->used.cc |= E_MEMSB;
452*74a4d8c2SCharles.Forsyth else
453*74a4d8c2SCharles.Forsyth s->set.cc |= E_MEMSB;
454*74a4d8c2SCharles.Forsyth break;
455*74a4d8c2SCharles.Forsyth }
456*74a4d8c2SCharles.Forsyth
457*74a4d8c2SCharles.Forsyth /*
458*74a4d8c2SCharles.Forsyth * flags based on 'from' field
459*74a4d8c2SCharles.Forsyth */
460*74a4d8c2SCharles.Forsyth c = p->from.class;
461*74a4d8c2SCharles.Forsyth if(c == 0) {
462*74a4d8c2SCharles.Forsyth c = aclass(&p->from) + 1;
463*74a4d8c2SCharles.Forsyth p->from.class = c;
464*74a4d8c2SCharles.Forsyth }
465*74a4d8c2SCharles.Forsyth c--;
466*74a4d8c2SCharles.Forsyth switch(c) {
467*74a4d8c2SCharles.Forsyth default:
468*74a4d8c2SCharles.Forsyth print("unknown class %d %D\n", c, &p->from);
469*74a4d8c2SCharles.Forsyth
470*74a4d8c2SCharles.Forsyth case C_ZCON:
471*74a4d8c2SCharles.Forsyth case C_SCON:
472*74a4d8c2SCharles.Forsyth case C_ADD0CON:
473*74a4d8c2SCharles.Forsyth case C_AND0CON:
474*74a4d8c2SCharles.Forsyth case C_ADDCON:
475*74a4d8c2SCharles.Forsyth case C_ANDCON:
476*74a4d8c2SCharles.Forsyth case C_UCON:
477*74a4d8c2SCharles.Forsyth case C_LCON:
478*74a4d8c2SCharles.Forsyth case C_NONE:
479*74a4d8c2SCharles.Forsyth case C_SBRA:
480*74a4d8c2SCharles.Forsyth case C_LBRA:
481*74a4d8c2SCharles.Forsyth break;
482*74a4d8c2SCharles.Forsyth case C_HI:
483*74a4d8c2SCharles.Forsyth case C_LO:
484*74a4d8c2SCharles.Forsyth s->used.cc |= E_HILO;
485*74a4d8c2SCharles.Forsyth break;
486*74a4d8c2SCharles.Forsyth case C_FCREG:
487*74a4d8c2SCharles.Forsyth s->used.cc |= E_FCR;
488*74a4d8c2SCharles.Forsyth break;
489*74a4d8c2SCharles.Forsyth case C_MREG:
490*74a4d8c2SCharles.Forsyth s->used.cc |= E_MCR;
491*74a4d8c2SCharles.Forsyth break;
492*74a4d8c2SCharles.Forsyth case C_ZOREG:
493*74a4d8c2SCharles.Forsyth case C_SOREG:
494*74a4d8c2SCharles.Forsyth case C_LOREG:
495*74a4d8c2SCharles.Forsyth c = p->from.reg;
496*74a4d8c2SCharles.Forsyth s->used.ireg |= 1<<c;
497*74a4d8c2SCharles.Forsyth if(ld)
498*74a4d8c2SCharles.Forsyth p->mark |= LOAD;
499*74a4d8c2SCharles.Forsyth s->size = sz;
500*74a4d8c2SCharles.Forsyth s->soffset = regoff(&p->from);
501*74a4d8c2SCharles.Forsyth
502*74a4d8c2SCharles.Forsyth m = ANYMEM;
503*74a4d8c2SCharles.Forsyth if(c == REGSB)
504*74a4d8c2SCharles.Forsyth m = E_MEMSB;
505*74a4d8c2SCharles.Forsyth if(c == REGSP)
506*74a4d8c2SCharles.Forsyth m = E_MEMSP;
507*74a4d8c2SCharles.Forsyth
508*74a4d8c2SCharles.Forsyth s->used.cc |= m;
509*74a4d8c2SCharles.Forsyth break;
510*74a4d8c2SCharles.Forsyth case C_SACON:
511*74a4d8c2SCharles.Forsyth case C_LACON:
512*74a4d8c2SCharles.Forsyth s->used.ireg |= 1<<REGSP;
513*74a4d8c2SCharles.Forsyth break;
514*74a4d8c2SCharles.Forsyth case C_SECON:
515*74a4d8c2SCharles.Forsyth case C_LECON:
516*74a4d8c2SCharles.Forsyth s->used.ireg |= 1<<REGSB;
517*74a4d8c2SCharles.Forsyth break;
518*74a4d8c2SCharles.Forsyth case C_REG:
519*74a4d8c2SCharles.Forsyth s->used.ireg |= 1<<p->from.reg;
520*74a4d8c2SCharles.Forsyth break;
521*74a4d8c2SCharles.Forsyth case C_FREG:
522*74a4d8c2SCharles.Forsyth /* do better -- determine double prec */
523*74a4d8c2SCharles.Forsyth s->used.freg |= 1<<p->from.reg;
524*74a4d8c2SCharles.Forsyth s->used.freg |= 1<<(p->from.reg|1);
525*74a4d8c2SCharles.Forsyth if(ld && p->to.type == D_REG)
526*74a4d8c2SCharles.Forsyth p->mark |= LOAD;
527*74a4d8c2SCharles.Forsyth break;
528*74a4d8c2SCharles.Forsyth case C_SAUTO:
529*74a4d8c2SCharles.Forsyth case C_LAUTO:
530*74a4d8c2SCharles.Forsyth s->used.ireg |= 1<<REGSP;
531*74a4d8c2SCharles.Forsyth if(ld)
532*74a4d8c2SCharles.Forsyth p->mark |= LOAD;
533*74a4d8c2SCharles.Forsyth if(ad)
534*74a4d8c2SCharles.Forsyth break;
535*74a4d8c2SCharles.Forsyth s->size = sz;
536*74a4d8c2SCharles.Forsyth s->soffset = regoff(&p->from);
537*74a4d8c2SCharles.Forsyth
538*74a4d8c2SCharles.Forsyth s->used.cc |= E_MEMSP;
539*74a4d8c2SCharles.Forsyth break;
540*74a4d8c2SCharles.Forsyth case C_SEXT:
541*74a4d8c2SCharles.Forsyth case C_LEXT:
542*74a4d8c2SCharles.Forsyth s->used.ireg |= 1<<REGSB;
543*74a4d8c2SCharles.Forsyth if(ld)
544*74a4d8c2SCharles.Forsyth p->mark |= LOAD;
545*74a4d8c2SCharles.Forsyth if(ad)
546*74a4d8c2SCharles.Forsyth break;
547*74a4d8c2SCharles.Forsyth s->size = sz;
548*74a4d8c2SCharles.Forsyth s->soffset = regoff(&p->from);
549*74a4d8c2SCharles.Forsyth
550*74a4d8c2SCharles.Forsyth s->used.cc |= E_MEMSB;
551*74a4d8c2SCharles.Forsyth break;
552*74a4d8c2SCharles.Forsyth }
553*74a4d8c2SCharles.Forsyth
554*74a4d8c2SCharles.Forsyth c = p->reg;
555*74a4d8c2SCharles.Forsyth if(c != NREG) {
556*74a4d8c2SCharles.Forsyth if(p->from.type == D_FREG || p->to.type == D_FREG) {
557*74a4d8c2SCharles.Forsyth s->used.freg |= 1<<c;
558*74a4d8c2SCharles.Forsyth s->used.freg |= 1<<(c|1);
559*74a4d8c2SCharles.Forsyth } else
560*74a4d8c2SCharles.Forsyth s->used.ireg |= 1<<c;
561*74a4d8c2SCharles.Forsyth }
562*74a4d8c2SCharles.Forsyth s->set.ireg &= ~(1<<REGZERO); /* R0 cant be set */
563*74a4d8c2SCharles.Forsyth }
564*74a4d8c2SCharles.Forsyth
565*74a4d8c2SCharles.Forsyth /*
566*74a4d8c2SCharles.Forsyth * test to see if 2 instrictions can be
567*74a4d8c2SCharles.Forsyth * interchanged without changing semantics
568*74a4d8c2SCharles.Forsyth */
569*74a4d8c2SCharles.Forsyth int
depend(Sch * sa,Sch * sb)570*74a4d8c2SCharles.Forsyth depend(Sch *sa, Sch *sb)
571*74a4d8c2SCharles.Forsyth {
572*74a4d8c2SCharles.Forsyth ulong x;
573*74a4d8c2SCharles.Forsyth
574*74a4d8c2SCharles.Forsyth if(sa->set.ireg & (sb->set.ireg|sb->used.ireg))
575*74a4d8c2SCharles.Forsyth return 1;
576*74a4d8c2SCharles.Forsyth if(sb->set.ireg & sa->used.ireg)
577*74a4d8c2SCharles.Forsyth return 1;
578*74a4d8c2SCharles.Forsyth
579*74a4d8c2SCharles.Forsyth if(sa->set.freg & (sb->set.freg|sb->used.freg))
580*74a4d8c2SCharles.Forsyth return 1;
581*74a4d8c2SCharles.Forsyth if(sb->set.freg & sa->used.freg)
582*74a4d8c2SCharles.Forsyth return 1;
583*74a4d8c2SCharles.Forsyth
584*74a4d8c2SCharles.Forsyth /*
585*74a4d8c2SCharles.Forsyth * special case.
586*74a4d8c2SCharles.Forsyth * loads from same address cannot pass.
587*74a4d8c2SCharles.Forsyth * this is for hardware fifo's and the like
588*74a4d8c2SCharles.Forsyth */
589*74a4d8c2SCharles.Forsyth if(sa->used.cc & sb->used.cc & E_MEM)
590*74a4d8c2SCharles.Forsyth if(sa->p.reg == sb->p.reg)
591*74a4d8c2SCharles.Forsyth if(regoff(&sa->p.from) == regoff(&sb->p.from))
592*74a4d8c2SCharles.Forsyth return 1;
593*74a4d8c2SCharles.Forsyth
594*74a4d8c2SCharles.Forsyth x = (sa->set.cc & (sb->set.cc|sb->used.cc)) |
595*74a4d8c2SCharles.Forsyth (sb->set.cc & sa->used.cc);
596*74a4d8c2SCharles.Forsyth if(x) {
597*74a4d8c2SCharles.Forsyth /*
598*74a4d8c2SCharles.Forsyth * allow SB and SP to pass each other.
599*74a4d8c2SCharles.Forsyth * allow SB to pass SB iff doffsets are ok
600*74a4d8c2SCharles.Forsyth * anything else conflicts
601*74a4d8c2SCharles.Forsyth */
602*74a4d8c2SCharles.Forsyth if(x != E_MEMSP && x != E_MEMSB)
603*74a4d8c2SCharles.Forsyth return 1;
604*74a4d8c2SCharles.Forsyth x = sa->set.cc | sb->set.cc |
605*74a4d8c2SCharles.Forsyth sa->used.cc | sb->used.cc;
606*74a4d8c2SCharles.Forsyth if(x & E_MEM)
607*74a4d8c2SCharles.Forsyth return 1;
608*74a4d8c2SCharles.Forsyth if(offoverlap(sa, sb))
609*74a4d8c2SCharles.Forsyth return 1;
610*74a4d8c2SCharles.Forsyth }
611*74a4d8c2SCharles.Forsyth
612*74a4d8c2SCharles.Forsyth return 0;
613*74a4d8c2SCharles.Forsyth }
614*74a4d8c2SCharles.Forsyth
615*74a4d8c2SCharles.Forsyth int
offoverlap(Sch * sa,Sch * sb)616*74a4d8c2SCharles.Forsyth offoverlap(Sch *sa, Sch *sb)
617*74a4d8c2SCharles.Forsyth {
618*74a4d8c2SCharles.Forsyth
619*74a4d8c2SCharles.Forsyth if(sa->soffset < sb->soffset) {
620*74a4d8c2SCharles.Forsyth if(sa->soffset+sa->size > sb->soffset)
621*74a4d8c2SCharles.Forsyth return 1;
622*74a4d8c2SCharles.Forsyth return 0;
623*74a4d8c2SCharles.Forsyth }
624*74a4d8c2SCharles.Forsyth if(sb->soffset+sb->size > sa->soffset)
625*74a4d8c2SCharles.Forsyth return 1;
626*74a4d8c2SCharles.Forsyth return 0;
627*74a4d8c2SCharles.Forsyth }
628*74a4d8c2SCharles.Forsyth
629*74a4d8c2SCharles.Forsyth /*
630*74a4d8c2SCharles.Forsyth * test 2 adjacent instructions
631*74a4d8c2SCharles.Forsyth * and find out if inserted instructions
632*74a4d8c2SCharles.Forsyth * are desired to prevent stalls.
633*74a4d8c2SCharles.Forsyth */
634*74a4d8c2SCharles.Forsyth int
conflict(Sch * sa,Sch * sb)635*74a4d8c2SCharles.Forsyth conflict(Sch *sa, Sch *sb)
636*74a4d8c2SCharles.Forsyth {
637*74a4d8c2SCharles.Forsyth
638*74a4d8c2SCharles.Forsyth if(sa->set.ireg & sb->used.ireg)
639*74a4d8c2SCharles.Forsyth return 1;
640*74a4d8c2SCharles.Forsyth if(sa->set.freg & sb->used.freg)
641*74a4d8c2SCharles.Forsyth return 1;
642*74a4d8c2SCharles.Forsyth if(sa->set.cc & sb->used.cc)
643*74a4d8c2SCharles.Forsyth return 1;
644*74a4d8c2SCharles.Forsyth
645*74a4d8c2SCharles.Forsyth return 0;
646*74a4d8c2SCharles.Forsyth }
647*74a4d8c2SCharles.Forsyth
648*74a4d8c2SCharles.Forsyth int
compound(Prog * p)649*74a4d8c2SCharles.Forsyth compound(Prog *p)
650*74a4d8c2SCharles.Forsyth {
651*74a4d8c2SCharles.Forsyth Optab *o;
652*74a4d8c2SCharles.Forsyth
653*74a4d8c2SCharles.Forsyth o = oplook(p);
654*74a4d8c2SCharles.Forsyth if(o->size != 4)
655*74a4d8c2SCharles.Forsyth return 1;
656*74a4d8c2SCharles.Forsyth if(p->to.type == D_REG && p->to.reg == REGSB)
657*74a4d8c2SCharles.Forsyth return 1;
658*74a4d8c2SCharles.Forsyth return 0;
659*74a4d8c2SCharles.Forsyth }
660*74a4d8c2SCharles.Forsyth
661*74a4d8c2SCharles.Forsyth void
dumpbits(Sch * s,Dep * d)662*74a4d8c2SCharles.Forsyth dumpbits(Sch *s, Dep *d)
663*74a4d8c2SCharles.Forsyth {
664*74a4d8c2SCharles.Forsyth int i;
665*74a4d8c2SCharles.Forsyth
666*74a4d8c2SCharles.Forsyth for(i=0; i<32; i++)
667*74a4d8c2SCharles.Forsyth if(d->ireg & (1<<i))
668*74a4d8c2SCharles.Forsyth Bprint(&bso, " R%d", i);
669*74a4d8c2SCharles.Forsyth for(i=0; i<32; i++)
670*74a4d8c2SCharles.Forsyth if(d->freg & (1<<i))
671*74a4d8c2SCharles.Forsyth Bprint(&bso, " F%d", i);
672*74a4d8c2SCharles.Forsyth for(i=0; i<32; i++)
673*74a4d8c2SCharles.Forsyth switch(d->cc & (1<<i)) {
674*74a4d8c2SCharles.Forsyth default:
675*74a4d8c2SCharles.Forsyth break;
676*74a4d8c2SCharles.Forsyth case E_HILO:
677*74a4d8c2SCharles.Forsyth Bprint(&bso, " HILO");
678*74a4d8c2SCharles.Forsyth break;
679*74a4d8c2SCharles.Forsyth case E_FCR:
680*74a4d8c2SCharles.Forsyth Bprint(&bso, " FCR");
681*74a4d8c2SCharles.Forsyth break;
682*74a4d8c2SCharles.Forsyth case E_MCR:
683*74a4d8c2SCharles.Forsyth Bprint(&bso, " MCR");
684*74a4d8c2SCharles.Forsyth break;
685*74a4d8c2SCharles.Forsyth case E_MEM:
686*74a4d8c2SCharles.Forsyth Bprint(&bso, " MEM%d", s->size);
687*74a4d8c2SCharles.Forsyth break;
688*74a4d8c2SCharles.Forsyth case E_MEMSB:
689*74a4d8c2SCharles.Forsyth Bprint(&bso, " SB%d", s->size);
690*74a4d8c2SCharles.Forsyth break;
691*74a4d8c2SCharles.Forsyth case E_MEMSP:
692*74a4d8c2SCharles.Forsyth Bprint(&bso, " SP%d", s->size);
693*74a4d8c2SCharles.Forsyth break;
694*74a4d8c2SCharles.Forsyth }
695*74a4d8c2SCharles.Forsyth }
696