xref: /plan9/sys/src/cmd/vl/noop.c (revision 0400b64795cb7922fbea5587531527b381e1e588)
1 #include	"l.h"
2 
3 /*
4  * flag: insert nops to prevent three consecutive stores.
5  * workaround for 24k erratum #48, costs about 10% in text space,
6  * so only enable this if you need it.  test cases are "hoc -e '7^6'"
7  * and "{ echo moon; echo plot } | scat".
8  */
9 enum {
10 	Mips24k	= 0,
11 };
12 
13 static int
isdblwrdmov(Prog * p)14 isdblwrdmov(Prog *p)
15 {
16 	if(p == nil)
17 		return 0;
18 	switch(p->as){
19 	case AMOVD:
20 	case AMOVDF:
21 	case AMOVDW:
22 	case AMOVFD:
23 	case AMOVWD:
24 	case AMOVV:
25 	case AMOVVL:
26 	case AMOVVR:
27 	case AMOVFV:
28 	case AMOVDV:
29 	case AMOVVF:
30 	case AMOVVD:
31 		return 1;
32 	}
33 	return 0;
34 }
35 
36 static int
ismove(Prog * p)37 ismove(Prog *p)
38 {
39 	if(p == nil)
40 		return 0;
41 	switch(p->as){
42 	case AMOVB:
43 	case AMOVBU:
44 	case AMOVF:
45 	case AMOVFW:
46 	case AMOVH:
47 	case AMOVHU:
48 	case AMOVW:
49 	case AMOVWF:
50 	case AMOVWL:
51 	case AMOVWR:
52 	case AMOVWU:
53 		return 1;
54 	}
55 	if(isdblwrdmov(p))
56 		return 1;
57 	return 0;
58 }
59 
60 static int
isstore(Prog * p)61 isstore(Prog *p)
62 {
63 	if(p == nil)
64 		return 0;
65 	if(ismove(p))
66 		switch(p->to.type) {
67 		case D_OREG:
68 		case D_EXTERN:
69 		case D_STATIC:
70 		case D_AUTO:
71 		case D_PARAM:
72 			return 1;
73 		}
74 	return 0;
75 }
76 
77 static int
iscondbranch(Prog * p)78 iscondbranch(Prog *p)
79 {
80 	if(p == nil)
81 		return 0;
82 	switch(p->as){
83 	case ABEQ:
84 	case ABFPF:
85 	case ABFPT:
86 	case ABGEZ:
87 	case ABGEZAL:
88 	case ABGTZ:
89 	case ABLEZ:
90 	case ABLTZ:
91 	case ABLTZAL:
92 	case ABNE:
93 		return 1;
94 	}
95 	return 0;
96 }
97 
98 static int
isbranch(Prog * p)99 isbranch(Prog *p)
100 {
101 	if(p == nil)
102 		return 0;
103 	switch(p->as){
104 	case AJAL:
105 	case AJMP:
106 	case ARET:
107 	case ARFE:
108 		return 1;
109 	}
110 	if(iscondbranch(p))
111 		return 1;
112 	return 0;
113 }
114 
115 static void
nopafter(Prog * p)116 nopafter(Prog *p)
117 {
118 	p->mark |= LABEL|SYNC;
119 	addnop(p);
120 }
121 
122 /*
123  * workaround for 24k erratum #48, costs about 0.5% in space.
124  * inserts a NOP before the last of 3 consecutive stores.
125  * double-word stores complicate things.
126  */
127 static int
no3stores(Prog * p)128 no3stores(Prog *p)
129 {
130 	Prog *p1;
131 
132 	if(!isstore(p))
133 		return 0;
134 	p1 = p->link;
135 	if(!isstore(p1))
136 		return 0;
137 	if(isdblwrdmov(p) || isdblwrdmov(p1)) {
138 		nopafter(p);
139 		nop.store.count++;
140 		nop.store.outof++;
141 		return 1;
142 	}
143 	if(isstore(p1->link)) {
144 		nopafter(p1);
145 		nop.store.count++;
146 		nop.store.outof++;
147 		return 1;
148 	}
149 	return 0;
150 }
151 
152 /*
153  * keep stores out of branch delay slots.
154  * this is costly in space (the other 9.5%), but makes no3stores effective.
155  * there is undoubtedly a better way to do this.
156  */
157 void
storesnosched(void)158 storesnosched(void)
159 {
160 	Prog *p;
161 
162 	for(p = firstp; p != P; p = p->link)
163 		if(isstore(p))
164 			p->mark |= NOSCHED;
165 }
166 
167 int
triplestorenops(void)168 triplestorenops(void)
169 {
170 	int r;
171 	Prog *p, *p1;
172 
173 	r = 0;
174 	for(p = firstp; p != P; p = p1) {
175 		p1 = p->link;
176 //		if (p->mark & NOSCHED)
177 //			continue;
178 		if(ismove(p) && isstore(p)) {
179 			if (no3stores(p))
180 				r++;
181 			/*
182 			 * given storenosched, the next two
183 			 * checks shouldn't be necessary.
184 			 */
185 			/*
186 			 * add nop after first MOV in `MOV; Bcond; MOV'.
187 			 */
188 			else if(isbranch(p1) && isstore(p1->link)) {
189 				nopafter(p);
190 				nop.branch.count++;
191 				nop.branch.outof++;
192 				r++;
193 			}
194 			/*
195 			 * this may be a branch target, so insert a nop after,
196 			 * in case a branch leading here has a store in its
197 			 * delay slot and we have consecutive stores here.
198 			 */
199 			if(p->mark & (LABEL|SYNC) && !isnop(p1)) {
200 				nopafter(p);
201 				nop.branch.count++;
202 				nop.branch.outof++;
203 				r++;
204 			}
205 		} else if (isbranch(p))
206 			/*
207 			 * can't ignore delay slot of a conditional branch;
208 			 * the branch could fail and fall through.
209 			 */
210 			if (!iscondbranch(p) && p1)
211 				p1 = p1->link;	/* skip its delay slot */
212 	}
213 	return r;
214 }
215 
216 void
noops(void)217 noops(void)
218 {
219 	Prog *p, *p1, *q, *q1;
220 	int o, curframe, curbecome, maxbecome;
221 
222 	/*
223 	 * find leaf subroutines
224 	 * become sizes
225 	 * frame sizes
226 	 * strip NOPs
227 	 * expand RET
228 	 * expand BECOME pseudo
229 	 */
230 
231 	if(debug['v'])
232 		Bprint(&bso, "%5.2f noops\n", cputime());
233 	Bflush(&bso);
234 
235 	curframe = 0;
236 	curbecome = 0;
237 	maxbecome = 0;
238 	curtext = 0;
239 
240 	q = P;
241 	for(p = firstp; p != P; p = p->link) {
242 
243 		/* find out how much arg space is used in this TEXT */
244 		if(p->to.type == D_OREG && p->to.reg == REGSP)
245 			if(p->to.offset > curframe)
246 				curframe = p->to.offset;
247 
248 		switch(p->as) {
249 		case ATEXT:
250 			if(curtext && curtext->from.sym) {
251 				curtext->from.sym->frame = curframe;
252 				curtext->from.sym->become = curbecome;
253 				if(curbecome > maxbecome)
254 					maxbecome = curbecome;
255 			}
256 			curframe = 0;
257 			curbecome = 0;
258 
259 			p->mark |= LABEL|LEAF|SYNC;
260 			if(p->link)
261 				p->link->mark |= LABEL;
262 			curtext = p;
263 			break;
264 
265 		/* too hard, just leave alone */
266 		case AMOVW:
267 			if(p->to.type == D_FCREG ||
268 			   p->to.type == D_MREG) {
269 				p->mark |= LABEL|SYNC;
270 				break;
271 			}
272 			if(p->from.type == D_FCREG ||
273 			   p->from.type == D_MREG) {
274 				p->mark |= LABEL|SYNC;
275 				addnop(p);
276 				addnop(p);
277 				nop.mfrom.count += 2;
278 				nop.mfrom.outof += 2;
279 				break;
280 			}
281 			break;
282 
283 		/* too hard, just leave alone */
284 		case ACASE:
285 		case ASYSCALL:
286 		case AWORD:
287 		case ATLBWR:
288 		case ATLBWI:
289 		case ATLBP:
290 		case ATLBR:
291 			p->mark |= LABEL|SYNC;
292 			break;
293 
294 		case ANOR:
295 			if(p->to.type == D_REG && p->to.reg == REGZERO)
296 				p->mark |= LABEL|SYNC;
297 			break;
298 
299 		case ARET:
300 			/* special form of RET is BECOME */
301 			if(p->from.type == D_CONST)
302 				if(p->from.offset > curbecome)
303 					curbecome = p->from.offset;
304 
305 			if(p->link != P)
306 				p->link->mark |= LABEL;
307 			break;
308 
309 		case ANOP:
310 			q1 = p->link;
311 			q->link = q1;		/* q is non-nop */
312 			q1->mark |= p->mark;
313 			continue;
314 
315 		case ABCASE:
316 			p->mark |= LABEL|SYNC;
317 			goto dstlab;
318 
319 		case ABGEZAL:
320 		case ABLTZAL:
321 		case AJAL:
322 			if(curtext != P)
323 				curtext->mark &= ~LEAF;
324 
325 		case AJMP:
326 		case ABEQ:
327 		case ABGEZ:
328 		case ABGTZ:
329 		case ABLEZ:
330 		case ABLTZ:
331 		case ABNE:
332 		case ABFPT:
333 		case ABFPF:
334 			p->mark |= BRANCH;
335 
336 		dstlab:
337 			q1 = p->cond;
338 			if(q1 != P) {
339 				while(q1->as == ANOP) {
340 					q1 = q1->link;
341 					p->cond = q1;
342 				}
343 				if(!(q1->mark & LEAF))
344 					q1->mark |= LABEL;
345 			} else
346 				p->mark |= LABEL;
347 			q1 = p->link;
348 			if(q1 != P)
349 				q1->mark |= LABEL;
350 			break;
351 		}
352 		q = p;
353 	}
354 
355 	if(curtext && curtext->from.sym) {
356 		curtext->from.sym->frame = curframe;
357 		curtext->from.sym->become = curbecome;
358 		if(curbecome > maxbecome)
359 			maxbecome = curbecome;
360 	}
361 
362 	if(debug['b'])
363 		print("max become = %d\n", maxbecome);
364 	xdefine("ALEFbecome", STEXT, maxbecome);
365 
366 	curtext = 0;
367 	for(p = firstp; p != P; p = p->link) {
368 		switch(p->as) {
369 		case ATEXT:
370 			curtext = p;
371 			break;
372 		case AJAL:
373 			if(curtext != P && curtext->from.sym != S && curtext->to.offset >= 0) {
374 				o = maxbecome - curtext->from.sym->frame;
375 				if(o <= 0)
376 					break;
377 				/* calling a become or calling a variable */
378 				if(p->to.sym == S || p->to.sym->become) {
379 					curtext->to.offset += o;
380 					if(debug['b']) {
381 						curp = p;
382 						print("%D calling %D increase %d\n",
383 							&curtext->from, &p->to, o);
384 					}
385 				}
386 			}
387 			break;
388 		}
389 	}
390 
391 	for(p = firstp; p != P; p = p->link) {
392 		o = p->as;
393 		switch(o) {
394 		case ATEXT:
395 			curtext = p;
396 			autosize = p->to.offset + 4;
397 			if(autosize <= 4)
398 			if(curtext->mark & LEAF) {
399 				p->to.offset = -4;
400 				autosize = 0;
401 			}
402 
403 			q = p;
404 			if(autosize) {
405 				q = prg();
406 				q->as = AADD;
407 				q->line = p->line;
408 				q->from.type = D_CONST;
409 				q->from.offset = -autosize;
410 				q->to.type = D_REG;
411 				q->to.reg = REGSP;
412 
413 				q->link = p->link;
414 				p->link = q;
415 			} else
416 			if(!(curtext->mark & LEAF)) {
417 				if(debug['v'])
418 					Bprint(&bso, "save suppressed in: %s\n",
419 						curtext->from.sym->name);
420 				Bflush(&bso);
421 				curtext->mark |= LEAF;
422 			}
423 
424 			if(curtext->mark & LEAF) {
425 				if(curtext->from.sym)
426 					curtext->from.sym->type = SLEAF;
427 				break;
428 			}
429 
430 			q1 = prg();
431 			q1->as = AMOVW;
432 			q1->line = p->line;
433 			q1->from.type = D_REG;
434 			q1->from.reg = REGLINK;
435 			q1->to.type = D_OREG;
436 			q1->from.offset = 0;
437 			q1->to.reg = REGSP;
438 
439 			q1->link = q->link;
440 			q->link = q1;
441 			break;
442 
443 		case ARET:
444 			nocache(p);
445 			if(p->from.type == D_CONST)
446 				goto become;
447 			if(curtext->mark & LEAF) {
448 				if(!autosize) {
449 					p->as = AJMP;
450 					p->from = zprg.from;
451 					p->to.type = D_OREG;
452 					p->to.offset = 0;
453 					p->to.reg = REGLINK;
454 					p->mark |= BRANCH;
455 					break;
456 				}
457 
458 				p->as = AADD;
459 				p->from.type = D_CONST;
460 				p->from.offset = autosize;
461 				p->to.type = D_REG;
462 				p->to.reg = REGSP;
463 
464 				q = prg();
465 				q->as = AJMP;
466 				q->line = p->line;
467 				q->to.type = D_OREG;
468 				q->to.offset = 0;
469 				q->to.reg = REGLINK;
470 				q->mark |= BRANCH;
471 
472 				q->link = p->link;
473 				p->link = q;
474 				break;
475 			}
476 			p->as = AMOVW;
477 			p->from.type = D_OREG;
478 			p->from.offset = 0;
479 			p->from.reg = REGSP;
480 			p->to.type = D_REG;
481 			p->to.reg = 2;
482 
483 			q = p;
484 			if(autosize) {
485 				q = prg();
486 				q->as = AADD;
487 				q->line = p->line;
488 				q->from.type = D_CONST;
489 				q->from.offset = autosize;
490 				q->to.type = D_REG;
491 				q->to.reg = REGSP;
492 
493 				q->link = p->link;
494 				p->link = q;
495 			}
496 
497 			q1 = prg();
498 			q1->as = AJMP;
499 			q1->line = p->line;
500 			q1->to.type = D_OREG;
501 			q1->to.offset = 0;
502 			q1->to.reg = 2;
503 			q1->mark |= BRANCH;
504 
505 			q1->link = q->link;
506 			q->link = q1;
507 			break;
508 
509 		become:
510 			if(curtext->mark & LEAF) {
511 
512 				q = prg();
513 				q->line = p->line;
514 				q->as = AJMP;
515 				q->from = zprg.from;
516 				q->to = p->to;
517 				q->cond = p->cond;
518 				q->link = p->link;
519 				q->mark |= BRANCH;
520 				p->link = q;
521 
522 				p->as = AADD;
523 				p->from = zprg.from;
524 				p->from.type = D_CONST;
525 				p->from.offset = autosize;
526 				p->to = zprg.to;
527 				p->to.type = D_REG;
528 				p->to.reg = REGSP;
529 
530 				break;
531 			}
532 			q = prg();
533 			q->line = p->line;
534 			q->as = AJMP;
535 			q->from = zprg.from;
536 			q->to = p->to;
537 			q->cond = p->cond;
538 			q->link = p->link;
539 			q->mark |= BRANCH;
540 			p->link = q;
541 
542 			q = prg();
543 			q->line = p->line;
544 			q->as = AADD;
545 			q->from.type = D_CONST;
546 			q->from.offset = autosize;
547 			q->to.type = D_REG;
548 			q->to.reg = REGSP;
549 			q->link = p->link;
550 			p->link = q;
551 
552 			p->as = AMOVW;
553 			p->from = zprg.from;
554 			p->from.type = D_OREG;
555 			p->from.offset = 0;
556 			p->from.reg = REGSP;
557 			p->to = zprg.to;
558 			p->to.type = D_REG;
559 			p->to.reg = REGLINK;
560 
561 			break;
562 		}
563 	}
564 	if (Mips24k)
565 		storesnosched();
566 
567 	curtext = P;
568 	q = P;		/* p - 1 */
569 	q1 = firstp;	/* top of block */
570 	o = 0;		/* count of instructions */
571 	for(p = firstp; p != P; p = p1) {
572 		p1 = p->link;
573 		o++;
574 		if(p->mark & NOSCHED){
575 			if(q1 != p){
576 				sched(q1, q);
577 			}
578 			for(; p != P; p = p->link){
579 				if(!(p->mark & NOSCHED))
580 					break;
581 				q = p;
582 			}
583 			p1 = p;
584 			q1 = p;
585 			o = 0;
586 			continue;
587 		}
588 		if(p->mark & (LABEL|SYNC)) {
589 			if(q1 != p)
590 				sched(q1, q);
591 			q1 = p;
592 			o = 1;
593 		}
594 		if(p->mark & (BRANCH|SYNC)) {
595 			sched(q1, p);
596 			q1 = p1;
597 			o = 0;
598 		}
599 		if(o >= NSCHED) {
600 			sched(q1, p);
601 			q1 = p1;
602 			o = 0;
603 		}
604 		q = p;
605 	}
606 
607 	if (Mips24k)
608 		triplestorenops();
609 }
610 
611 void
addnop(Prog * p)612 addnop(Prog *p)
613 {
614 	Prog *q;
615 
616 	q = prg();
617 	q->as = ANOR;
618 	q->line = p->line;
619 	q->from.type = D_REG;
620 	q->from.reg = REGZERO;
621 	q->to.type = D_REG;
622 	q->to.reg = REGZERO;
623 
624 	q->link = p->link;
625 	p->link = q;
626 }
627 
628 void
nocache(Prog * p)629 nocache(Prog *p)
630 {
631 	p->optab = 0;
632 	p->from.class = 0;
633 	p->to.class = 0;
634 }
635