1 #include "l.h"
2
3 /*
4 * flag: insert nops to prevent three consecutive stores.
5 * workaround for 24k erratum #48, costs about 10% in text space,
6 * so only enable this if you need it. test cases are "hoc -e '7^6'"
7 * and "{ echo moon; echo plot } | scat".
8 */
9 enum {
10 Mips24k = 0,
11 };
12
13 static int
isdblwrdmov(Prog * p)14 isdblwrdmov(Prog *p)
15 {
16 if(p == nil)
17 return 0;
18 switch(p->as){
19 case AMOVD:
20 case AMOVDF:
21 case AMOVDW:
22 case AMOVFD:
23 case AMOVWD:
24 case AMOVV:
25 case AMOVVL:
26 case AMOVVR:
27 case AMOVFV:
28 case AMOVDV:
29 case AMOVVF:
30 case AMOVVD:
31 return 1;
32 }
33 return 0;
34 }
35
36 static int
ismove(Prog * p)37 ismove(Prog *p)
38 {
39 if(p == nil)
40 return 0;
41 switch(p->as){
42 case AMOVB:
43 case AMOVBU:
44 case AMOVF:
45 case AMOVFW:
46 case AMOVH:
47 case AMOVHU:
48 case AMOVW:
49 case AMOVWF:
50 case AMOVWL:
51 case AMOVWR:
52 case AMOVWU:
53 return 1;
54 }
55 if(isdblwrdmov(p))
56 return 1;
57 return 0;
58 }
59
60 static int
isstore(Prog * p)61 isstore(Prog *p)
62 {
63 if(p == nil)
64 return 0;
65 if(ismove(p))
66 switch(p->to.type) {
67 case D_OREG:
68 case D_EXTERN:
69 case D_STATIC:
70 case D_AUTO:
71 case D_PARAM:
72 return 1;
73 }
74 return 0;
75 }
76
77 static int
iscondbranch(Prog * p)78 iscondbranch(Prog *p)
79 {
80 if(p == nil)
81 return 0;
82 switch(p->as){
83 case ABEQ:
84 case ABFPF:
85 case ABFPT:
86 case ABGEZ:
87 case ABGEZAL:
88 case ABGTZ:
89 case ABLEZ:
90 case ABLTZ:
91 case ABLTZAL:
92 case ABNE:
93 return 1;
94 }
95 return 0;
96 }
97
98 static int
isbranch(Prog * p)99 isbranch(Prog *p)
100 {
101 if(p == nil)
102 return 0;
103 switch(p->as){
104 case AJAL:
105 case AJMP:
106 case ARET:
107 case ARFE:
108 return 1;
109 }
110 if(iscondbranch(p))
111 return 1;
112 return 0;
113 }
114
115 static void
nopafter(Prog * p)116 nopafter(Prog *p)
117 {
118 p->mark |= LABEL|SYNC;
119 addnop(p);
120 }
121
122 /*
123 * workaround for 24k erratum #48, costs about 0.5% in space.
124 * inserts a NOP before the last of 3 consecutive stores.
125 * double-word stores complicate things.
126 */
127 static int
no3stores(Prog * p)128 no3stores(Prog *p)
129 {
130 Prog *p1;
131
132 if(!isstore(p))
133 return 0;
134 p1 = p->link;
135 if(!isstore(p1))
136 return 0;
137 if(isdblwrdmov(p) || isdblwrdmov(p1)) {
138 nopafter(p);
139 nop.store.count++;
140 nop.store.outof++;
141 return 1;
142 }
143 if(isstore(p1->link)) {
144 nopafter(p1);
145 nop.store.count++;
146 nop.store.outof++;
147 return 1;
148 }
149 return 0;
150 }
151
152 /*
153 * keep stores out of branch delay slots.
154 * this is costly in space (the other 9.5%), but makes no3stores effective.
155 * there is undoubtedly a better way to do this.
156 */
157 void
storesnosched(void)158 storesnosched(void)
159 {
160 Prog *p;
161
162 for(p = firstp; p != P; p = p->link)
163 if(isstore(p))
164 p->mark |= NOSCHED;
165 }
166
167 int
triplestorenops(void)168 triplestorenops(void)
169 {
170 int r;
171 Prog *p, *p1;
172
173 r = 0;
174 for(p = firstp; p != P; p = p1) {
175 p1 = p->link;
176 // if (p->mark & NOSCHED)
177 // continue;
178 if(ismove(p) && isstore(p)) {
179 if (no3stores(p))
180 r++;
181 /*
182 * given storenosched, the next two
183 * checks shouldn't be necessary.
184 */
185 /*
186 * add nop after first MOV in `MOV; Bcond; MOV'.
187 */
188 else if(isbranch(p1) && isstore(p1->link)) {
189 nopafter(p);
190 nop.branch.count++;
191 nop.branch.outof++;
192 r++;
193 }
194 /*
195 * this may be a branch target, so insert a nop after,
196 * in case a branch leading here has a store in its
197 * delay slot and we have consecutive stores here.
198 */
199 if(p->mark & (LABEL|SYNC) && !isnop(p1)) {
200 nopafter(p);
201 nop.branch.count++;
202 nop.branch.outof++;
203 r++;
204 }
205 } else if (isbranch(p))
206 /*
207 * can't ignore delay slot of a conditional branch;
208 * the branch could fail and fall through.
209 */
210 if (!iscondbranch(p) && p1)
211 p1 = p1->link; /* skip its delay slot */
212 }
213 return r;
214 }
215
216 void
noops(void)217 noops(void)
218 {
219 Prog *p, *p1, *q, *q1;
220 int o, curframe, curbecome, maxbecome;
221
222 /*
223 * find leaf subroutines
224 * become sizes
225 * frame sizes
226 * strip NOPs
227 * expand RET
228 * expand BECOME pseudo
229 */
230
231 if(debug['v'])
232 Bprint(&bso, "%5.2f noops\n", cputime());
233 Bflush(&bso);
234
235 curframe = 0;
236 curbecome = 0;
237 maxbecome = 0;
238 curtext = 0;
239
240 q = P;
241 for(p = firstp; p != P; p = p->link) {
242
243 /* find out how much arg space is used in this TEXT */
244 if(p->to.type == D_OREG && p->to.reg == REGSP)
245 if(p->to.offset > curframe)
246 curframe = p->to.offset;
247
248 switch(p->as) {
249 case ATEXT:
250 if(curtext && curtext->from.sym) {
251 curtext->from.sym->frame = curframe;
252 curtext->from.sym->become = curbecome;
253 if(curbecome > maxbecome)
254 maxbecome = curbecome;
255 }
256 curframe = 0;
257 curbecome = 0;
258
259 p->mark |= LABEL|LEAF|SYNC;
260 if(p->link)
261 p->link->mark |= LABEL;
262 curtext = p;
263 break;
264
265 /* too hard, just leave alone */
266 case AMOVW:
267 if(p->to.type == D_FCREG ||
268 p->to.type == D_MREG) {
269 p->mark |= LABEL|SYNC;
270 break;
271 }
272 if(p->from.type == D_FCREG ||
273 p->from.type == D_MREG) {
274 p->mark |= LABEL|SYNC;
275 addnop(p);
276 addnop(p);
277 nop.mfrom.count += 2;
278 nop.mfrom.outof += 2;
279 break;
280 }
281 break;
282
283 /* too hard, just leave alone */
284 case ACASE:
285 case ASYSCALL:
286 case AWORD:
287 case ATLBWR:
288 case ATLBWI:
289 case ATLBP:
290 case ATLBR:
291 p->mark |= LABEL|SYNC;
292 break;
293
294 case ANOR:
295 if(p->to.type == D_REG && p->to.reg == REGZERO)
296 p->mark |= LABEL|SYNC;
297 break;
298
299 case ARET:
300 /* special form of RET is BECOME */
301 if(p->from.type == D_CONST)
302 if(p->from.offset > curbecome)
303 curbecome = p->from.offset;
304
305 if(p->link != P)
306 p->link->mark |= LABEL;
307 break;
308
309 case ANOP:
310 q1 = p->link;
311 q->link = q1; /* q is non-nop */
312 q1->mark |= p->mark;
313 continue;
314
315 case ABCASE:
316 p->mark |= LABEL|SYNC;
317 goto dstlab;
318
319 case ABGEZAL:
320 case ABLTZAL:
321 case AJAL:
322 if(curtext != P)
323 curtext->mark &= ~LEAF;
324
325 case AJMP:
326 case ABEQ:
327 case ABGEZ:
328 case ABGTZ:
329 case ABLEZ:
330 case ABLTZ:
331 case ABNE:
332 case ABFPT:
333 case ABFPF:
334 p->mark |= BRANCH;
335
336 dstlab:
337 q1 = p->cond;
338 if(q1 != P) {
339 while(q1->as == ANOP) {
340 q1 = q1->link;
341 p->cond = q1;
342 }
343 if(!(q1->mark & LEAF))
344 q1->mark |= LABEL;
345 } else
346 p->mark |= LABEL;
347 q1 = p->link;
348 if(q1 != P)
349 q1->mark |= LABEL;
350 break;
351 }
352 q = p;
353 }
354
355 if(curtext && curtext->from.sym) {
356 curtext->from.sym->frame = curframe;
357 curtext->from.sym->become = curbecome;
358 if(curbecome > maxbecome)
359 maxbecome = curbecome;
360 }
361
362 if(debug['b'])
363 print("max become = %d\n", maxbecome);
364 xdefine("ALEFbecome", STEXT, maxbecome);
365
366 curtext = 0;
367 for(p = firstp; p != P; p = p->link) {
368 switch(p->as) {
369 case ATEXT:
370 curtext = p;
371 break;
372 case AJAL:
373 if(curtext != P && curtext->from.sym != S && curtext->to.offset >= 0) {
374 o = maxbecome - curtext->from.sym->frame;
375 if(o <= 0)
376 break;
377 /* calling a become or calling a variable */
378 if(p->to.sym == S || p->to.sym->become) {
379 curtext->to.offset += o;
380 if(debug['b']) {
381 curp = p;
382 print("%D calling %D increase %d\n",
383 &curtext->from, &p->to, o);
384 }
385 }
386 }
387 break;
388 }
389 }
390
391 for(p = firstp; p != P; p = p->link) {
392 o = p->as;
393 switch(o) {
394 case ATEXT:
395 curtext = p;
396 autosize = p->to.offset + 4;
397 if(autosize <= 4)
398 if(curtext->mark & LEAF) {
399 p->to.offset = -4;
400 autosize = 0;
401 }
402
403 q = p;
404 if(autosize) {
405 q = prg();
406 q->as = AADD;
407 q->line = p->line;
408 q->from.type = D_CONST;
409 q->from.offset = -autosize;
410 q->to.type = D_REG;
411 q->to.reg = REGSP;
412
413 q->link = p->link;
414 p->link = q;
415 } else
416 if(!(curtext->mark & LEAF)) {
417 if(debug['v'])
418 Bprint(&bso, "save suppressed in: %s\n",
419 curtext->from.sym->name);
420 Bflush(&bso);
421 curtext->mark |= LEAF;
422 }
423
424 if(curtext->mark & LEAF) {
425 if(curtext->from.sym)
426 curtext->from.sym->type = SLEAF;
427 break;
428 }
429
430 q1 = prg();
431 q1->as = AMOVW;
432 q1->line = p->line;
433 q1->from.type = D_REG;
434 q1->from.reg = REGLINK;
435 q1->to.type = D_OREG;
436 q1->from.offset = 0;
437 q1->to.reg = REGSP;
438
439 q1->link = q->link;
440 q->link = q1;
441 break;
442
443 case ARET:
444 nocache(p);
445 if(p->from.type == D_CONST)
446 goto become;
447 if(curtext->mark & LEAF) {
448 if(!autosize) {
449 p->as = AJMP;
450 p->from = zprg.from;
451 p->to.type = D_OREG;
452 p->to.offset = 0;
453 p->to.reg = REGLINK;
454 p->mark |= BRANCH;
455 break;
456 }
457
458 p->as = AADD;
459 p->from.type = D_CONST;
460 p->from.offset = autosize;
461 p->to.type = D_REG;
462 p->to.reg = REGSP;
463
464 q = prg();
465 q->as = AJMP;
466 q->line = p->line;
467 q->to.type = D_OREG;
468 q->to.offset = 0;
469 q->to.reg = REGLINK;
470 q->mark |= BRANCH;
471
472 q->link = p->link;
473 p->link = q;
474 break;
475 }
476 p->as = AMOVW;
477 p->from.type = D_OREG;
478 p->from.offset = 0;
479 p->from.reg = REGSP;
480 p->to.type = D_REG;
481 p->to.reg = 2;
482
483 q = p;
484 if(autosize) {
485 q = prg();
486 q->as = AADD;
487 q->line = p->line;
488 q->from.type = D_CONST;
489 q->from.offset = autosize;
490 q->to.type = D_REG;
491 q->to.reg = REGSP;
492
493 q->link = p->link;
494 p->link = q;
495 }
496
497 q1 = prg();
498 q1->as = AJMP;
499 q1->line = p->line;
500 q1->to.type = D_OREG;
501 q1->to.offset = 0;
502 q1->to.reg = 2;
503 q1->mark |= BRANCH;
504
505 q1->link = q->link;
506 q->link = q1;
507 break;
508
509 become:
510 if(curtext->mark & LEAF) {
511
512 q = prg();
513 q->line = p->line;
514 q->as = AJMP;
515 q->from = zprg.from;
516 q->to = p->to;
517 q->cond = p->cond;
518 q->link = p->link;
519 q->mark |= BRANCH;
520 p->link = q;
521
522 p->as = AADD;
523 p->from = zprg.from;
524 p->from.type = D_CONST;
525 p->from.offset = autosize;
526 p->to = zprg.to;
527 p->to.type = D_REG;
528 p->to.reg = REGSP;
529
530 break;
531 }
532 q = prg();
533 q->line = p->line;
534 q->as = AJMP;
535 q->from = zprg.from;
536 q->to = p->to;
537 q->cond = p->cond;
538 q->link = p->link;
539 q->mark |= BRANCH;
540 p->link = q;
541
542 q = prg();
543 q->line = p->line;
544 q->as = AADD;
545 q->from.type = D_CONST;
546 q->from.offset = autosize;
547 q->to.type = D_REG;
548 q->to.reg = REGSP;
549 q->link = p->link;
550 p->link = q;
551
552 p->as = AMOVW;
553 p->from = zprg.from;
554 p->from.type = D_OREG;
555 p->from.offset = 0;
556 p->from.reg = REGSP;
557 p->to = zprg.to;
558 p->to.type = D_REG;
559 p->to.reg = REGLINK;
560
561 break;
562 }
563 }
564 if (Mips24k)
565 storesnosched();
566
567 curtext = P;
568 q = P; /* p - 1 */
569 q1 = firstp; /* top of block */
570 o = 0; /* count of instructions */
571 for(p = firstp; p != P; p = p1) {
572 p1 = p->link;
573 o++;
574 if(p->mark & NOSCHED){
575 if(q1 != p){
576 sched(q1, q);
577 }
578 for(; p != P; p = p->link){
579 if(!(p->mark & NOSCHED))
580 break;
581 q = p;
582 }
583 p1 = p;
584 q1 = p;
585 o = 0;
586 continue;
587 }
588 if(p->mark & (LABEL|SYNC)) {
589 if(q1 != p)
590 sched(q1, q);
591 q1 = p;
592 o = 1;
593 }
594 if(p->mark & (BRANCH|SYNC)) {
595 sched(q1, p);
596 q1 = p1;
597 o = 0;
598 }
599 if(o >= NSCHED) {
600 sched(q1, p);
601 q1 = p1;
602 o = 0;
603 }
604 q = p;
605 }
606
607 if (Mips24k)
608 triplestorenops();
609 }
610
611 void
addnop(Prog * p)612 addnop(Prog *p)
613 {
614 Prog *q;
615
616 q = prg();
617 q->as = ANOR;
618 q->line = p->line;
619 q->from.type = D_REG;
620 q->from.reg = REGZERO;
621 q->to.type = D_REG;
622 q->to.reg = REGZERO;
623
624 q->link = p->link;
625 p->link = q;
626 }
627
628 void
nocache(Prog * p)629 nocache(Prog *p)
630 {
631 p->optab = 0;
632 p->from.class = 0;
633 p->to.class = 0;
634 }
635