xref: /plan9-contrib/sys/src/cmd/6l/span.c (revision 40d015479ed36701ae6dcfd8814f849fc6285e8d)
1 #include	"l.h"
2 
3 static int	rexflag;
4 static int	vexbytes;
5 static int	asmode;
6 
7 void
span(void)8 span(void)
9 {
10 	Prog *p, *q;
11 	long v;
12 	vlong c, idat;
13 	int m, n, again;
14 
15 	xdefine("etext", STEXT, 0L);
16 	idat = INITDAT;
17 	for(p = firstp; p != P; p = p->link) {
18 		if(p->as == ATEXT)
19 			curtext = p;
20 		n = 0;
21 		if(p->to.type == D_BRANCH)
22 			if(p->pcond == P)
23 				p->pcond = p;
24 		if((q = p->pcond) != P)
25 			if(q->back != 2)
26 				n = 1;
27 		p->back = n;
28 		if(p->as == AADJSP) {
29 			p->to.type = D_SP;
30 			v = -p->from.offset;
31 			p->from.offset = v;
32 			p->as = p->mode != 64? AADDL: AADDQ;
33 			if(v < 0) {
34 				p->as = p->mode != 64? ASUBL: ASUBQ;
35 				v = -v;
36 				p->from.offset = v;
37 			}
38 			if(v == 0)
39 				p->as = ANOP;
40 		}
41 	}
42 	n = 0;
43 
44 start:
45 	if(debug['v'])
46 		Bprint(&bso, "%5.2f span\n", cputime());
47 	Bflush(&bso);
48 	c = INITTEXT;
49 	for(p = firstp; p != P; p = p->link) {
50 		if(p->as == ATEXT)
51 			curtext = p;
52 		if(p->to.type == D_BRANCH)
53 			if(p->back)
54 				p->pc = c;
55 		asmins(p);
56 		p->pc = c;
57 		m = andptr-and;
58 		p->mark = m;
59 		c += m;
60 	}
61 
62 loop:
63 	n++;
64 	if(debug['v'])
65 		Bprint(&bso, "%5.2f span %d\n", cputime(), n);
66 	Bflush(&bso);
67 	if(n > 50) {
68 		print("span must be looping\n");
69 		errorexit();
70 	}
71 	again = 0;
72 	c = INITTEXT;
73 	for(p = firstp; p != P; p = p->link) {
74 		if(p->as == ATEXT)
75 			curtext = p;
76 		if(p->to.type == D_BRANCH || p->back & 0100) {
77 			if(p->back)
78 				p->pc = c;
79 			asmins(p);
80 			m = andptr-and;
81 			if(m != p->mark) {
82 				p->mark = m;
83 				again++;
84 			}
85 		}
86 		p->pc = c;
87 		c += p->mark;
88 	}
89 	if(again) {
90 		textsize = c;
91 		goto loop;
92 	}
93 	if(INITRND) {
94 		INITDAT = rnd(c, INITRND);
95 		if(INITDAT != idat) {
96 			idat = INITDAT;
97 			goto start;
98 		}
99 	}
100 	xdefine("etext", STEXT, c);
101 	if(debug['v'])
102 		Bprint(&bso, "etext = %llux\n", c);
103 	Bflush(&bso);
104 	for(p = textp; p != P; p = p->pcond)
105 		p->from.sym->value = p->pc;
106 	textsize = c - INITTEXT;
107 }
108 
109 void
xdefine(char * p,int t,vlong v)110 xdefine(char *p, int t, vlong v)
111 {
112 	Sym *s;
113 
114 	s = lookup(p, 0);
115 	if(s->type == 0 || s->type == SXREF) {
116 		s->type = t;
117 		s->value = v;
118 	}
119 	if(s->type == STEXT && s->value == 0)
120 		s->value = v;
121 }
122 
123 void
putsymb(char * s,int t,vlong v,int ver)124 putsymb(char *s, int t, vlong v, int ver)
125 {
126 	int i, f, l;
127 
128 	if(t == 'f')
129 		s++;
130 	l = 4;
131 	switch(HEADTYPE){
132 	default:
133 		break;
134 	case 5:
135 		if(debug['8'])
136 			break;
137 	case 2:
138 	case 6:
139 		lput(v>>32);
140 		l = 8;
141 		break;
142 	}
143 	lput(v);
144 	if(ver)
145 		t += 'a' - 'A';
146 	cput(t+0x80);			/* 0x80 is variable length */
147 
148 	if(t == 'Z' || t == 'z') {
149 		cput(s[0]);
150 		for(i=1; s[i] != 0 || s[i+1] != 0; i += 2) {
151 			cput(s[i]);
152 			cput(s[i+1]);
153 		}
154 		cput(0);
155 		cput(0);
156 		i++;
157 	}
158 	else {
159 		for(i=0; s[i]; i++)
160 			cput(s[i]);
161 		cput(0);
162 	}
163 	symsize += l + 1 + i + 1;
164 
165 	if(debug['n']) {
166 		if(t == 'z' || t == 'Z') {
167 			Bprint(&bso, "%c %.8llux ", t, v);
168 			for(i=1; s[i] != 0 || s[i+1] != 0; i+=2) {
169 				f = ((s[i]&0xff) << 8) | (s[i+1]&0xff);
170 				Bprint(&bso, "/%x", f);
171 			}
172 			Bprint(&bso, "\n");
173 			return;
174 		}
175 		if(ver)
176 			Bprint(&bso, "%c %.8llux %s<%d>\n", t, v, s, ver);
177 		else
178 			Bprint(&bso, "%c %.8llux %s\n", t, v, s);
179 	}
180 }
181 
182 void
asmsym(void)183 asmsym(void)
184 {
185 	Prog *p;
186 	Auto *a;
187 	Sym *s;
188 	int h;
189 
190 	s = lookup("etext", 0);
191 	if(s->type == STEXT)
192 		putsymb(s->name, 'T', s->value, s->version);
193 
194 	for(h=0; h<NHASH; h++)
195 		for(s=hash[h]; s!=S; s=s->link)
196 			switch(s->type) {
197 			case SCONST:
198 				putsymb(s->name, 'D', s->value, s->version);
199 				continue;
200 
201 			case SDATA:
202 				putsymb(s->name, 'D', s->value+INITDAT, s->version);
203 				continue;
204 
205 			case SBSS:
206 				putsymb(s->name, 'B', s->value+INITDAT, s->version);
207 				continue;
208 
209 			case SFILE:
210 				putsymb(s->name, 'f', s->value, s->version);
211 				continue;
212 			}
213 
214 	for(p=textp; p!=P; p=p->pcond) {
215 		s = p->from.sym;
216 		if(s->type != STEXT)
217 			continue;
218 
219 		/* filenames first */
220 		for(a=p->to.autom; a; a=a->link)
221 			if(a->type == D_FILE)
222 				putsymb(a->asym->name, 'z', a->aoffset, 0);
223 			else
224 			if(a->type == D_FILE1)
225 				putsymb(a->asym->name, 'Z', a->aoffset, 0);
226 
227 		putsymb(s->name, 'T', s->value, s->version);
228 
229 		/* frame, auto and param after */
230 		putsymb(".frame", 'm', p->to.offset+8, 0);
231 
232 		for(a=p->to.autom; a; a=a->link)
233 			if(a->type == D_AUTO)
234 				putsymb(a->asym->name, 'a', -a->aoffset, 0);
235 			else
236 			if(a->type == D_PARAM)
237 				putsymb(a->asym->name, 'p', a->aoffset, 0);
238 	}
239 	if(debug['v'] || debug['n'])
240 		Bprint(&bso, "symsize = %lud\n", symsize);
241 	Bflush(&bso);
242 }
243 
244 void
asmlc(void)245 asmlc(void)
246 {
247 	vlong oldpc;
248 	Prog *p;
249 	long oldlc, v, s;
250 
251 	oldpc = INITTEXT;
252 	oldlc = 0;
253 	for(p = firstp; p != P; p = p->link) {
254 		if(p->line == oldlc || p->as == ATEXT || p->as == ANOP) {
255 			if(p->as == ATEXT)
256 				curtext = p;
257 			if(debug['V'])
258 				Bprint(&bso, "%6llux %P\n",
259 					p->pc, p);
260 			continue;
261 		}
262 		if(debug['V'])
263 			Bprint(&bso, "\t\t%6ld", lcsize);
264 		v = (p->pc - oldpc) / MINLC;
265 		while(v) {
266 			s = 127;
267 			if(v < 127)
268 				s = v;
269 			cput(s+128);	/* 129-255 +pc */
270 			if(debug['V'])
271 				Bprint(&bso, " pc+%ld*%d(%ld)", s, MINLC, s+128);
272 			v -= s;
273 			lcsize++;
274 		}
275 		s = p->line - oldlc;
276 		oldlc = p->line;
277 		oldpc = p->pc + MINLC;
278 		if(s > 64 || s < -64) {
279 			cput(0);	/* 0 vv +lc */
280 			cput(s>>24);
281 			cput(s>>16);
282 			cput(s>>8);
283 			cput(s);
284 			if(debug['V']) {
285 				if(s > 0)
286 					Bprint(&bso, " lc+%ld(%d,%ld)\n",
287 						s, 0, s);
288 				else
289 					Bprint(&bso, " lc%ld(%d,%ld)\n",
290 						s, 0, s);
291 				Bprint(&bso, "%6llux %P\n",
292 					p->pc, p);
293 			}
294 			lcsize += 5;
295 			continue;
296 		}
297 		if(s > 0) {
298 			cput(0+s);	/* 1-64 +lc */
299 			if(debug['V']) {
300 				Bprint(&bso, " lc+%ld(%ld)\n", s, 0+s);
301 				Bprint(&bso, "%6llux %P\n",
302 					p->pc, p);
303 			}
304 		} else {
305 			cput(64-s);	/* 65-128 -lc */
306 			if(debug['V']) {
307 				Bprint(&bso, " lc%ld(%ld)\n", s, 64-s);
308 				Bprint(&bso, "%6llux %P\n",
309 					p->pc, p);
310 			}
311 		}
312 		lcsize++;
313 	}
314 	while(lcsize & 1) {
315 		s = 129;
316 		cput(s);
317 		lcsize++;
318 	}
319 	if(debug['v'] || debug['V'])
320 		Bprint(&bso, "lcsize = %ld\n", lcsize);
321 	Bflush(&bso);
322 }
323 
324 int
prefixof(Adr * a)325 prefixof(Adr *a)
326 {
327 	switch(a->type) {
328 	case D_INDIR+D_CS:
329 		return 0x2e;
330 	case D_INDIR+D_DS:
331 		return 0x3e;
332 	case D_INDIR+D_ES:
333 		return 0x26;
334 	case D_INDIR+D_FS:
335 		return 0x64;
336 	case D_INDIR+D_GS:
337 		return 0x65;
338 	}
339 	return 0;
340 }
341 
342 int
oclass(Adr * a)343 oclass(Adr *a)
344 {
345 	vlong v;
346 	long l;
347 
348 	if(a->type < D_CONST2 && (a->type >= D_INDIR || !isxyreg(a->type) && a->index != D_NONE)) {
349 		if(a->index != D_NONE && a->scale == 0) {
350 			if(a->type == D_ADDR) {
351 				switch(a->index) {
352 				case D_EXTERN:
353 				case D_STATIC:
354 					return Yi32;	/* TO DO: Yi64 */
355 				case D_AUTO:
356 				case D_PARAM:
357 					return Yiauto;
358 				}
359 				return Yxxx;
360 			}
361 			return Ycol;
362 		}
363 		return Ym;
364 	}
365 	switch(a->type)
366 	{
367 	case D_AL:
368 		return Yal;
369 
370 	case D_AX:
371 		return Yax;
372 
373 /*
374 	case D_SPB:
375 */
376 	case D_BPB:
377 	case D_SIB:
378 	case D_DIB:
379 	case D_R8B:
380 	case D_R9B:
381 	case D_R10B:
382 	case D_R11B:
383 	case D_R12B:
384 	case D_R13B:
385 	case D_R14B:
386 	case D_R15B:
387 		if(asmode != 64)
388 			return Yxxx;
389 	case D_DL:
390 	case D_BL:
391 	case D_AH:
392 	case D_CH:
393 	case D_DH:
394 	case D_BH:
395 		return Yrb;
396 
397 	case D_CL:
398 		return Ycl;
399 
400 	case D_CX:
401 		return Ycx;
402 
403 	case D_DX:
404 	case D_BX:
405 		return Yrx;
406 
407 	case D_R8:	/* not really Yrl */
408 	case D_R9:
409 	case D_R10:
410 	case D_R11:
411 	case D_R12:
412 	case D_R13:
413 	case D_R14:
414 	case D_R15:
415 		if(asmode != 64)
416 			return Yxxx;
417 	case D_SP:
418 	case D_BP:
419 	case D_SI:
420 	case D_DI:
421 		return Yrl;
422 
423 /*
424 	case D_F0+0:
425 		return	Yf0;
426 
427 	case D_F0+1:
428 	case D_F0+2:
429 	case D_F0+3:
430 	case D_F0+4:
431 	case D_F0+5:
432 	case D_F0+6:
433 	case D_F0+7:
434 		return	Yrf;
435 */
436 
437 	case D_M0+0:
438 	case D_M0+1:
439 	case D_M0+2:
440 	case D_M0+3:
441 	case D_M0+4:
442 	case D_M0+5:
443 	case D_M0+6:
444 	case D_M0+7:
445 		return	Ymr;
446 
447 	case D_X0+0:
448 	case D_X0+1:
449 	case D_X0+2:
450 	case D_X0+3:
451 	case D_X0+4:
452 	case D_X0+5:
453 	case D_X0+6:
454 	case D_X0+7:
455 	case D_X0+8:
456 	case D_X0+9:
457 	case D_X0+10:
458 	case D_X0+11:
459 	case D_X0+12:
460 	case D_X0+13:
461 	case D_X0+14:
462 	case D_X0+15:
463 		return	Yxr;
464 
465 	case D_Y0+0:
466 	case D_Y0+1:
467 	case D_Y0+2:
468 	case D_Y0+3:
469 	case D_Y0+4:
470 	case D_Y0+5:
471 	case D_Y0+6:
472 	case D_Y0+7:
473 	case D_Y0+8:
474 	case D_Y0+9:
475 	case D_Y0+10:
476 	case D_Y0+11:
477 	case D_Y0+12:
478 	case D_Y0+13:
479 	case D_Y0+14:
480 	case D_Y0+15:
481 		return	Yyr;
482 
483 	case D_NONE:
484 		return Ynone;
485 
486 	case D_CS:	return	Ycs;
487 	case D_SS:	return	Yss;
488 	case D_DS:	return	Yds;
489 	case D_ES:	return	Yes;
490 	case D_FS:	return	Yfs;
491 	case D_GS:	return	Ygs;
492 
493 	case D_GDTR:	return	Ygdtr;
494 	case D_IDTR:	return	Yidtr;
495 	case D_LDTR:	return	Yldtr;
496 	case D_MSW:	return	Ymsw;
497 	case D_TASK:	return	Ytask;
498 
499 	case D_CR+0:	return	Ycr0;
500 	case D_CR+1:	return	Ycr1;
501 	case D_CR+2:	return	Ycr2;
502 	case D_CR+3:	return	Ycr3;
503 	case D_CR+4:	return	Ycr4;
504 	case D_CR+5:	return	Ycr5;
505 	case D_CR+6:	return	Ycr6;
506 	case D_CR+7:	return	Ycr7;
507 	case D_CR+8:	return	Ycr8;
508 
509 	case D_DR+0:	return	Ydr0;
510 	case D_DR+1:	return	Ydr1;
511 	case D_DR+2:	return	Ydr2;
512 	case D_DR+3:	return	Ydr3;
513 	case D_DR+4:	return	Ydr4;
514 	case D_DR+5:	return	Ydr5;
515 	case D_DR+6:	return	Ydr6;
516 	case D_DR+7:	return	Ydr7;
517 
518 	case D_TR+0:	return	Ytr0;
519 	case D_TR+1:	return	Ytr1;
520 	case D_TR+2:	return	Ytr2;
521 	case D_TR+3:	return	Ytr3;
522 	case D_TR+4:	return	Ytr4;
523 	case D_TR+5:	return	Ytr5;
524 	case D_TR+6:	return	Ytr6;
525 	case D_TR+7:	return	Ytr7;
526 
527 	case D_EXTERN:
528 	case D_STATIC:
529 	case D_AUTO:
530 	case D_PARAM:
531 		return Ym;
532 
533 	case D_CONST:
534 	case D_ADDR:
535 		if(a->sym == S) {
536 			v = a->offset;
537 			if(v == 0)
538 				return Yi0;
539 			if(v == 1)
540 				return Yi1;
541 			if(v >= -128 && v <= 127)
542 				return Yi8;
543 			l = v;
544 			if((vlong)l == v)
545 				return Ys32;	/* can sign extend */
546 			if((v>>32) == 0)
547 				return Yi32;	/* unsigned */
548 			return Yi64;
549 		}
550 		return Yi32;	/* TO DO: D_ADDR as Yi64 */
551 
552 	case D_BRANCH:
553 		return Ybr;
554 	}
555 	return Yxxx;
556 }
557 
558 void
asmidx(Adr * a,int base)559 asmidx(Adr *a, int base)
560 {
561 	int i;
562 
563 	switch(a->index) {
564 	default:
565 		goto bad;
566 
567 	case D_NONE:
568 		i = 4 << 3;
569 		goto bas;
570 
571 	case D_R8:
572 	case D_R9:
573 	case D_R10:
574 	case D_R11:
575 	case D_R12:
576 	case D_R13:
577 	case D_R14:
578 	case D_R15:
579 		if(asmode != 64)
580 			goto bad;
581 	case D_AX:
582 	case D_CX:
583 	case D_DX:
584 	case D_BX:
585 	case D_BP:
586 	case D_SI:
587 	case D_DI:
588 		i = reg[a->index] << 3;
589 		break;
590 	}
591 	switch(a->scale) {
592 	default:
593 		goto bad;
594 	case 1:
595 		break;
596 	case 2:
597 		i |= (1<<6);
598 		break;
599 	case 4:
600 		i |= (2<<6);
601 		break;
602 	case 8:
603 		i |= (3<<6);
604 		break;
605 	}
606 bas:
607 	switch(base) {
608 	default:
609 		goto bad;
610 	case D_NONE:	/* must be mod=00 */
611 		i |= 5;
612 		break;
613 	case D_R8:
614 	case D_R9:
615 	case D_R10:
616 	case D_R11:
617 	case D_R12:
618 	case D_R13:
619 	case D_R14:
620 	case D_R15:
621 		if(asmode != 64)
622 			goto bad;
623 	case D_AX:
624 	case D_CX:
625 	case D_DX:
626 	case D_BX:
627 	case D_SP:
628 	case D_BP:
629 	case D_SI:
630 	case D_DI:
631 		i |= reg[base];
632 		break;
633 	}
634 	*andptr++ = i;
635 	return;
636 bad:
637 	diag("asmidx: bad address %D", a);
638 	*andptr++ = 0;
639 	return;
640 }
641 
642 static void
put4(long v)643 put4(long v)
644 {
645 	if(dlm && curp != P && reloca != nil){
646 		dynreloc(reloca->sym, curp->pc + andptr - &and[0], 1);
647 		reloca = nil;
648 	}
649 	andptr[0] = v;
650 	andptr[1] = v>>8;
651 	andptr[2] = v>>16;
652 	andptr[3] = v>>24;
653 	andptr += 4;
654 }
655 
656 static void
put8(vlong v)657 put8(vlong v)
658 {
659 	if(dlm && curp != P && reloca != nil){
660 		dynreloc(reloca->sym, curp->pc + andptr - &and[0], 1);	/* TO DO */
661 		reloca = nil;
662 	}
663 	andptr[0] = v;
664 	andptr[1] = v>>8;
665 	andptr[2] = v>>16;
666 	andptr[3] = v>>24;
667 	andptr[4] = v>>32;
668 	andptr[5] = v>>40;
669 	andptr[6] = v>>48;
670 	andptr[7] = v>>56;
671 	andptr += 8;
672 }
673 
674 vlong
vaddr(Adr * a)675 vaddr(Adr *a)
676 {
677 	int t;
678 	vlong v;
679 	Sym *s;
680 
681 	t = a->type;
682 	v = a->offset;
683 	if(t == D_ADDR)
684 		t = a->index;
685 	switch(t) {
686 	case D_STATIC:
687 	case D_EXTERN:
688 		s = a->sym;
689 		if(s != nil) {
690 			if(dlm && curp != P)
691 				reloca = a;
692 			switch(s->type) {
693 			case SUNDEF:
694 				ckoff(s, v);
695 			case STEXT:
696 			case SCONST:
697 				if((uvlong)s->value < (uvlong)INITTEXT)
698 					v += INITTEXT;	/* TO DO */
699 				v += s->value;
700 				break;
701 			default:
702 				v += INITDAT + s->value;
703 			}
704 		}
705 	}
706 	return v;
707 }
708 
709 static void
asmandsz(Adr * a,int r,int rex,int m64)710 asmandsz(Adr *a, int r, int rex, int m64)
711 {
712 	long v;
713 	int t;
714 	Adr aa;
715 
716 	if(r == -1)
717 		diag("asmandsz: immedate instead of register");
718 
719 	rex &= (0x40 | Rxr);
720 	v = a->offset;
721 	t = a->type;
722 	if(a->index != D_NONE && !isxyreg(t)) {
723 		if(t >= D_INDIR && t < D_CONST2) {
724 			t -= D_INDIR;
725 			rexflag |= (regrex[a->index] & Rxx) | (regrex[t] & Rxb) | rex;
726 			if(t == D_NONE) {
727 				*andptr++ = (0 << 6) | (4 << 0) | (r << 3);
728 				asmidx(a, t);
729 				put4(v);
730 				return;
731 			}
732 			if(v == 0 && t != D_BP && t != D_R13) {
733 				*andptr++ = (0 << 6) | (4 << 0) | (r << 3);
734 				asmidx(a, t);
735 				return;
736 			}
737 			if(v >= -128 && v < 128) {
738 				*andptr++ = (1 << 6) | (4 << 0) | (r << 3);
739 				asmidx(a, t);
740 				*andptr++ = v;
741 				return;
742 			}
743 			*andptr++ = (2 << 6) | (4 << 0) | (r << 3);
744 			asmidx(a, t);
745 			put4(v);
746 			return;
747 		}
748 		switch(t) {
749 		default:
750 			goto bad;
751 		case D_STATIC:
752 		case D_EXTERN:
753 			aa.type = D_NONE+D_INDIR;
754 			break;
755 		case D_AUTO:
756 		case D_PARAM:
757 			aa.type = D_SP+D_INDIR;
758 			break;
759 		}
760 		aa.offset = vaddr(a);
761 		aa.index = a->index;
762 		aa.scale = a->scale;
763 		asmandsz(&aa, r, rex, m64);
764 		return;
765 	}
766 	if(t >= D_AL && t <= D_BH) {
767 		if(v)
768 			goto bad;
769 		*andptr++ = (3 << 6) | (reg[t] << 0) | (r << 3);
770 		rexflag |= (regrex[t] & (0x40 | Rxb)) | rex;
771 		return;
772 	}
773 	if(t >= D_X0 && t <= D_X15 || t >= D_Y0 && t <= D_Y15) {
774 		if(v)
775 			goto bad;
776 		*andptr++ = (3 << 6) | (reg[t] << 0) | (r << 3);
777 		if(t >= D_Y0 && t <= D_Y15)
778 			vexbytes |= Vexl;
779 		rexflag |= (regrex[t] & (0x40 | Rxb)) | rex;
780 		return;
781 	}
782 	if(t >= D_INDIR && t < D_CONST2) {
783 		t -= D_INDIR;
784 		rexflag |= (regrex[t] & Rxb) | rex;
785 		if(t == D_NONE || D_CS <= t && t <= D_GS) {
786 			if(asmode != 64){
787 				*andptr++ = (0 << 6) | (5 << 0) | (r << 3);
788 				put4(v);
789 				return;
790 			}
791 			/* temporary */
792 			*andptr++ = (0 <<  6) | (4 << 0) | (r << 3);	/* sib present */
793 			*andptr++ = (0 << 6) | (4 << 3) | (5 << 0);	/* DS:d32 */
794 			put4(v);
795 			return;
796 		}
797 		if(t == D_SP || t == D_R12) {
798 			if(v == 0) {
799 				*andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
800 				asmidx(a, t);
801 				return;
802 			}
803 			if(v >= -128 && v < 128) {
804 				*andptr++ = (1 << 6) | (reg[t] << 0) | (r << 3);
805 				asmidx(a, t);
806 				*andptr++ = v;
807 				return;
808 			}
809 			*andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
810 			asmidx(a, t);
811 			put4(v);
812 			return;
813 		}
814 		if(t >= D_AX && t <= D_R15) {
815 			if(v == 0 && t != D_BP && t != D_R13) {
816 				*andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
817 				return;
818 			}
819 			if(v >= -128 && v < 128) {
820 				andptr[0] = (1 << 6) | (reg[t] << 0) | (r << 3);
821 				andptr[1] = v;
822 				andptr += 2;
823 				return;
824 			}
825 			*andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
826 			put4(v);
827 			return;
828 		}
829 		goto bad;
830 	}
831 	switch(a->type) {
832 	default:
833 		goto bad;
834 	case D_STATIC:
835 	case D_EXTERN:
836 		aa.type = D_NONE+D_INDIR;
837 		break;
838 	case D_AUTO:
839 	case D_PARAM:
840 		aa.type = D_SP+D_INDIR;
841 		break;
842 	}
843 	aa.index = D_NONE;
844 	aa.scale = 1;
845 	aa.offset = vaddr(a);
846 	asmandsz(&aa, r, rex, m64);
847 	return;
848 bad:
849 	diag("asmand: bad address %D", a);
850 	return;
851 }
852 
853 int
isxyreg(int t)854 isxyreg(int t)
855 {
856 	return t >= D_X0 && t <= D_X15 || t >= D_Y0 && t <= D_Y15;
857 }
858 
859 static void
vexreg(Adr * a)860 vexreg(Adr *a)
861 {
862 	int t;
863 
864 	t = a->type;
865 	if(t >= D_Y0 && t <= D_Y15) {
866 		vexbytes |= Vexl;
867 	} else if(t >= D_X0 && t <= D_X15) {
868 		if(vexed)
869 			vexbytes |= Vexr;	/* force vex prefix */
870 	} else
871 		return;
872 	if(a->index != D_NONE)
873 		vexbytes |= a->index << 8;
874 }
875 
876 void
asmand(Adr * a,Adr * ra)877 asmand(Adr *a, Adr *ra)
878 {
879 	asmandsz(a, reg[ra->type], regrex[ra->type], 0);
880 }
881 
882 void
asmandg(Adr * a,Adr * r,int o,int rdest,int prefix)883 asmandg(Adr *a, Adr *r, int o, int rdest, int prefix)
884 {
885 	Adr aa, rr;
886 
887 	if(isxyreg(a->type)) {
888 		if(isxyreg(a->index) && r->type == D_CONST) {
889 			/*
890 			 * convert sse instructions with immediate like
891 			 * AESKEYGENASSIST $32, X1, X2 from
892 			 * a=X1(X2*0); r=$32 to a=X1, r=X2.  the
893 			 * caller adds the immediate byte.  vex is not required
894 			 */
895 			rr.offset = 0;
896 			rr.sym = a->sym;
897 			rr.type = a->index;
898 			rr.index = D_NONE;
899 			rr.scale = 0;
900 			r = &rr;
901 
902 			aa = *a;
903 			aa.index = D_NONE;
904 			a = &aa;
905 		}
906 	}
907 	vexreg(a);
908 	if(isxyreg(a->type)) {
909 		if(a->index != D_NONE) {
910 			aa = *a;
911 			aa.index = D_NONE;
912 			a = &aa;
913 		}
914 	}
915 	if(r == nil) {
916 		asmandsz(a, o, 0, 0);
917 		return;
918 	}
919 	vexreg(r);
920 	if(rdest && (prefix&P2) == 0 && vexbytes != 0 && (vexbytes>>8) == 0) {
921 		/* copy destination register as second source register */
922 		if(isxyreg(r->type)) {
923 			vexbytes |= r->type << 8;
924 			rexflag |= regrex[r->type] & Rxx;
925 		}
926 	}
927 	asmand(a, r);
928 }
929 
930 void
asmando(Adr * a,int o)931 asmando(Adr *a, int o)
932 {
933 	asmandg(a, nil, o, 0, 0);
934 }
935 
936 static void
bytereg(Adr * a)937 bytereg(Adr *a)
938 {
939 	if(a->index == D_NONE && (a->type >= D_AX && a->type <= D_R15))
940 		a->type = D_AL + (a->type-D_AX);
941 }
942 
943 #define	E	0xff
944 Movtab	ymovtab[] =
945 {
946 /* push */
947 	{APUSHL,	Ycs,	Ynone,	0,	0x0e,E,0,0},
948 	{APUSHL,	Yss,	Ynone,	0,	0x16,E,0,0},
949 	{APUSHL,	Yds,	Ynone,	0,	0x1e,E,0,0},
950 	{APUSHL,	Yes,	Ynone,	0,	0x06,E,0,0},
951 	{APUSHL,	Yfs,	Ynone,	0,	0x0f,0xa0,E,0},
952 	{APUSHL,	Ygs,	Ynone,	0,	0x0f,0xa8,E,0},
953 	{APUSHQ,	Yfs,	Ynone,	0,	0x0f,0xa0,E,0},
954 	{APUSHQ,	Ygs,	Ynone,	0,	0x0f,0xa8,E,0},
955 
956 	{APUSHW,	Ycs,	Ynone,	0,	Pe,0x0e,E,0},
957 	{APUSHW,	Yss,	Ynone,	0,	Pe,0x16,E,0},
958 	{APUSHW,	Yds,	Ynone,	0,	Pe,0x1e,E,0},
959 	{APUSHW,	Yes,	Ynone,	0,	Pe,0x06,E,0},
960 	{APUSHW,	Yfs,	Ynone,	0,	Pe,0x0f,0xa0,E},
961 	{APUSHW,	Ygs,	Ynone,	0,	Pe,0x0f,0xa8,E},
962 
963 /* pop */
964 	{APOPL,	Ynone,	Yds,	0,	0x1f,E,0,0},
965 	{APOPL,	Ynone,	Yes,	0,	0x07,E,0,0},
966 	{APOPL,	Ynone,	Yss,	0,	0x17,E,0,0},
967 	{APOPL,	Ynone,	Yfs,	0,	0x0f,0xa1,E,0},
968 	{APOPL,	Ynone,	Ygs,	0,	0x0f,0xa9,E,0},
969 	{APOPQ,	Ynone,	Yfs,	0,	0x0f,0xa1,E,0},
970 	{APOPQ,	Ynone,	Ygs,	0,	0x0f,0xa9,E,0},
971 
972 	{APOPW,	Ynone,	Yds,	0,	Pe,0x1f,E,0},
973 	{APOPW,	Ynone,	Yes,	0,	Pe,0x07,E,0},
974 	{APOPW,	Ynone,	Yss,	0,	Pe,0x17,E,0},
975 	{APOPW,	Ynone,	Yfs,	0,	Pe,0x0f,0xa1,E},
976 	{APOPW,	Ynone,	Ygs,	0,	Pe,0x0f,0xa9,E},
977 
978 /* mov seg */
979 	{AMOVW,	Yes,	Yml,	1,	0x8c,0,0,0},
980 	{AMOVW,	Ycs,	Yml,	1,	0x8c,1,0,0},
981 	{AMOVW,	Yss,	Yml,	1,	0x8c,2,0,0},
982 	{AMOVW,	Yds,	Yml,	1,	0x8c,3,0,0},
983 	{AMOVW,	Yfs,	Yml,	1,	0x8c,4,0,0},
984 	{AMOVW,	Ygs,	Yml,	1,	0x8c,5,0,0},
985 
986 	{AMOVW,	Yml,	Yes,	2,	0x8e,0,0,0},
987 	{AMOVW,	Yml,	Ycs,	2,	0x8e,1,0,0},
988 	{AMOVW,	Yml,	Yss,	2,	0x8e,2,0,0},
989 	{AMOVW,	Yml,	Yds,	2,	0x8e,3,0,0},
990 	{AMOVW,	Yml,	Yfs,	2,	0x8e,4,0,0},
991 	{AMOVW,	Yml,	Ygs,	2,	0x8e,5,0,0},
992 
993 /* mov cr */
994 	{AMOVL,	Ycr0,	Yml,	3,	0x0f,0x20,0,0},
995 	{AMOVL,	Ycr2,	Yml,	3,	0x0f,0x20,2,0},
996 	{AMOVL,	Ycr3,	Yml,	3,	0x0f,0x20,3,0},
997 	{AMOVL,	Ycr4,	Yml,	3,	0x0f,0x20,4,0},
998 	{AMOVL,	Ycr8,	Yml,	3,	0x0f,0x20,8,0},
999 	{AMOVQ,	Ycr0,	Yml,	3,	0x0f,0x20,0,0},
1000 	{AMOVQ,	Ycr2,	Yml,	3,	0x0f,0x20,2,0},
1001 	{AMOVQ,	Ycr3,	Yml,	3,	0x0f,0x20,3,0},
1002 	{AMOVQ,	Ycr4,	Yml,	3,	0x0f,0x20,4,0},
1003 	{AMOVQ,	Ycr8,	Yml,	3,	0x0f,0x20,8,0},
1004 
1005 	{AMOVL,	Yml,	Ycr0,	4,	0x0f,0x22,0,0},
1006 	{AMOVL,	Yml,	Ycr2,	4,	0x0f,0x22,2,0},
1007 	{AMOVL,	Yml,	Ycr3,	4,	0x0f,0x22,3,0},
1008 	{AMOVL,	Yml,	Ycr4,	4,	0x0f,0x22,4,0},
1009 	{AMOVL,	Yml,	Ycr8,	4,	0x0f,0x22,8,0},
1010 	{AMOVQ,	Yml,	Ycr0,	4,	0x0f,0x22,0,0},
1011 	{AMOVQ,	Yml,	Ycr2,	4,	0x0f,0x22,2,0},
1012 	{AMOVQ,	Yml,	Ycr3,	4,	0x0f,0x22,3,0},
1013 	{AMOVQ,	Yml,	Ycr4,	4,	0x0f,0x22,4,0},
1014 	{AMOVQ,	Yml,	Ycr8,	4,	0x0f,0x22,8,0},
1015 
1016 /* mov dr */
1017 	{AMOVL,	Ydr0,	Yml,	3,	0x0f,0x21,0,0},
1018 	{AMOVL,	Ydr6,	Yml,	3,	0x0f,0x21,6,0},
1019 	{AMOVL,	Ydr7,	Yml,	3,	0x0f,0x21,7,0},
1020 	{AMOVQ,	Ydr0,	Yml,	3,	0x0f,0x21,0,0},
1021 	{AMOVQ,	Ydr6,	Yml,	3,	0x0f,0x21,6,0},
1022 	{AMOVQ,	Ydr7,	Yml,	3,	0x0f,0x21,7,0},
1023 
1024 	{AMOVL,	Yml,	Ydr0,	4,	0x0f,0x23,0,0},
1025 	{AMOVL,	Yml,	Ydr6,	4,	0x0f,0x23,6,0},
1026 	{AMOVL,	Yml,	Ydr7,	4,	0x0f,0x23,7,0},
1027 	{AMOVQ,	Yml,	Ydr0,	4,	0x0f,0x23,0,0},
1028 	{AMOVQ,	Yml,	Ydr6,	4,	0x0f,0x23,6,0},
1029 	{AMOVQ,	Yml,	Ydr7,	4,	0x0f,0x23,7,0},
1030 
1031 /* mov tr */
1032 	{AMOVL,	Ytr6,	Yml,	3,	0x0f,0x24,6,0},
1033 	{AMOVL,	Ytr7,	Yml,	3,	0x0f,0x24,7,0},
1034 
1035 	{AMOVL,	Yml,	Ytr6,	4,	0x0f,0x26,6,E},
1036 	{AMOVL,	Yml,	Ytr7,	4,	0x0f,0x26,7,E},
1037 
1038 /* lgdt, sgdt, lidt, sidt */
1039 	{AMOVL,	Ym,	Ygdtr,	4,	0x0f,0x01,2,0},
1040 	{AMOVL,	Ygdtr,	Ym,	3,	0x0f,0x01,0,0},
1041 	{AMOVL,	Ym,	Yidtr,	4,	0x0f,0x01,3,0},
1042 	{AMOVL,	Yidtr,	Ym,	3,	0x0f,0x01,1,0},
1043 	{AMOVQ,	Ym,	Ygdtr,	4,	0x0f,0x01,2,0},
1044 	{AMOVQ,	Ygdtr,	Ym,	3,	0x0f,0x01,0,0},
1045 	{AMOVQ,	Ym,	Yidtr,	4,	0x0f,0x01,3,0},
1046 	{AMOVQ,	Yidtr,	Ym,	3,	0x0f,0x01,1,0},
1047 
1048 /* lldt, sldt */
1049 	{AMOVW,	Yml,	Yldtr,	4,	0x0f,0x00,2,0},
1050 	{AMOVW,	Yldtr,	Yml,	3,	0x0f,0x00,0,0},
1051 
1052 /* lmsw, smsw */
1053 	{AMOVW,	Yml,	Ymsw,	4,	0x0f,0x01,6,0},
1054 	{AMOVW,	Ymsw,	Yml,	3,	0x0f,0x01,4,0},
1055 
1056 /* ltr, str */
1057 	{AMOVW,	Yml,	Ytask,	4,	0x0f,0x00,3,0},
1058 	{AMOVW,	Ytask,	Yml,	3,	0x0f,0x00,1,0},
1059 
1060 /* load full pointer */
1061 	{AMOVL,	Yml,	Ycol,	5,	0,0,0,0},
1062 	{AMOVW,	Yml,	Ycol,	5,	Pe,0,0,0},
1063 
1064 /* double shift */
1065 	{ASHLL,	Ycol,	Yml,	6,	0xa4,0xa5,0,0},
1066 	{ASHRL,	Ycol,	Yml,	6,	0xac,0xad,0,0},
1067 	{ASHLQ,	Ycol,	Yml,	6,	Pw,0xa4,0xa5,0},
1068 	{ASHRQ,	Ycol,	Yml,	6,	Pw,0xac,0xad,0},
1069 	{ASHLW,	Ycol,	Yml,	6,	Pe,0xa4,0xa5,0},
1070 	{ASHRW,	Ycol,	Yml,	6,	Pe,0xac,0xad,0},
1071 	0
1072 };
1073 
1074 int
isax(Adr * a)1075 isax(Adr *a)
1076 {
1077 
1078 	switch(a->type) {
1079 	case D_AX:
1080 	case D_AL:
1081 	case D_AH:
1082 	case D_INDIR+D_AX:
1083 		return 1;
1084 	}
1085 	if(a->index == D_AX)
1086 		return 1;
1087 	return 0;
1088 }
1089 
1090 void
subreg(Prog * p,int from,int to)1091 subreg(Prog *p, int from, int to)
1092 {
1093 
1094 	if(debug['Q'])
1095 		print("\n%P	s/%R/%R/\n", p, from, to);
1096 
1097 	if(p->from.type == from)
1098 		p->from.type = to;
1099 	if(p->to.type == from)
1100 		p->to.type = to;
1101 
1102 	if(p->from.index == from)
1103 		p->from.index = to;
1104 	if(p->to.index == from)
1105 		p->to.index = to;
1106 
1107 	from += D_INDIR;
1108 	if(p->from.type == from)
1109 		p->from.type = to+D_INDIR;
1110 	if(p->to.type == from)
1111 		p->to.type = to+D_INDIR;
1112 
1113 	if(debug['Q'])
1114 		print("%P\n", p);
1115 }
1116 
1117 static int
mediaop(Optab * o,int op,int osize,int z)1118 mediaop(Optab *o, int op, int osize, int z)
1119 {
1120 	switch(op){
1121 	case Pm38:
1122 	case Pm3a:
1123 		*andptr++ = Pm;	/* 0f */
1124 		*andptr++ = op;	/* 38 | 3a */
1125 		op = o->op[++z];
1126 		break;
1127 
1128 	case Pm:
1129 	case Pe:
1130 	case Pf2:
1131 	case Pf3:
1132 		if(osize != 1){
1133 			if(op != Pm)
1134 				*andptr++ = op;
1135 			*andptr++ = Pm;
1136 			op = o->op[++z];
1137 			if(op == Pm38 || op == Pm3a) {
1138 				*andptr++ = op;
1139 				op = o->op[++z];
1140 			}
1141 			break;
1142 		}
1143 	default:
1144 		if(andptr == and || andptr[-1] != Pm)
1145 			*andptr++ = Pm;
1146 		break;
1147 	}
1148 	*andptr++ = op;
1149 	return z;
1150 }
1151 
1152 void
doasm(Prog * p)1153 doasm(Prog *p)
1154 {
1155 	Optab *o;
1156 	Prog *q, pp;
1157 	uchar *t;
1158 	Movtab *mo;
1159 	int z, op, ft, tt, xo, l, pre;
1160 	vlong v;
1161 	Adr vmi;
1162 
1163 	pre = prefixof(&p->from);
1164 	if(pre)
1165 		*andptr++ = pre;
1166 	pre = prefixof(&p->to);
1167 	if(pre)
1168 		*andptr++ = pre;
1169 
1170 	o = opindex[p->as];
1171 	if(o == nil) {
1172 		diag("asmins: missing op %P", p);
1173 		return;
1174 	}
1175 	ft = oclass(&p->from) * Ymax;
1176 	tt = oclass(&p->to) * Ymax;
1177 	t = o->ytab;
1178 	if(t == 0) {
1179 		diag("asmins: noproto %P", p);
1180 		return;
1181 	}
1182 	xo = o->op[0] == Pm;
1183 	for(z=0; *t; z+=t[3]+xo,t+=4)
1184 		if(ycover[ft+t[0]])
1185 		if(ycover[tt+t[1]])
1186 			goto found;
1187 	goto domov;
1188 
1189 found:
1190 	switch(o->prefix & 0xFF) {
1191 	case Pq:	/* 16 bit escape and opcode escape */
1192 		*andptr++ = Pe;
1193 		*andptr++ = Pm;
1194 		break;
1195 
1196 	case Pf2:	/* xmm opcode escape */
1197 	case Pf3:
1198 		*andptr++ = o->prefix & 0xFF;
1199 		*andptr++ = Pm;
1200 		break;
1201 
1202 	case Pm:	/* opcode escape */
1203 		*andptr++ = Pm;
1204 		break;
1205 
1206 	case Pe:	/* 16 bit escape */
1207 		*andptr++ = Pe;
1208 		break;
1209 
1210 	case Pw:	/* 64-bit escape */
1211 		if(p->mode != 64)
1212 			diag("asmins: illegal 64: %P", p);
1213 		rexflag |= Pw;
1214 		break;
1215 
1216 	case Pb:	/* botch */
1217 		bytereg(&p->from);
1218 		bytereg(&p->to);
1219 		break;
1220 
1221 	case P32:	/* 32 bit but illegal if 64-bit mode */
1222 		if(p->mode == 64)
1223 			diag("asmins: illegal in 64-bit mode: %P", p);
1224 		break;
1225 
1226 	case Py:	/* 64-bit only, no prefix */
1227 		if(p->mode != 64)
1228 			diag("asmins: illegal in %d-bit mode: %P", p->mode, p);
1229 		break;
1230 	}
1231 	v = vaddr(&p->from);
1232 	op = o->op[z];
1233 	if(op == 0x0f) {
1234 		*andptr++ = op;
1235 		op = o->op[++z];
1236 	}
1237 	switch(t[2]) {
1238 	default:
1239 		diag("asmins: unknown z %d %P", t[2], p);
1240 		return;
1241 
1242 	case Zpseudo:
1243 		break;
1244 
1245 	case Zlit:
1246 		for(; op = o->op[z]; z++)
1247 			*andptr++ = op;
1248 		break;
1249 
1250 	case Zmb_r:
1251 		bytereg(&p->from);
1252 		/* fall through */
1253 	case Zm_r:
1254 		*andptr++ = op;
1255 		asmandg(&p->from, &p->to, 0, 1, o->prefix);
1256 		break;
1257 
1258 	case Zm_r_xm:
1259 		mediaop(o, op, t[3], z);
1260 		asmandg(&p->from, &p->to, 0, 1, o->prefix);
1261 		break;
1262 
1263 	case Zm_r_xm_nr:
1264 		rexflag = 0;
1265 		mediaop(o, op, t[3], z);
1266 		asmandg(&p->from, &p->to, 0, 1, o->prefix);
1267 		break;
1268 
1269 	case Zm_r_i_xm:
1270 		mediaop(o, op, t[3], z);
1271 		asmandg(&p->from, &p->to, 0, 1, o->prefix);
1272 		*andptr++ = p->to.offset;
1273 		break;
1274 
1275 	case Zm_r_3d:
1276 		*andptr++ = 0x0f;
1277 		*andptr++ = 0x0f;
1278 		asmandg(&p->from, &p->to, 0, 1, o->prefix);
1279 		*andptr++ = op;
1280 		break;
1281 
1282 	case Zibm_r:
1283 		*andptr++ = op;
1284 		asmandg(&p->from, &p->to, 0, 1, o->prefix);
1285 		*andptr++ = p->to.offset;
1286 		break;
1287 
1288 	case Zaut_r:
1289 		*andptr++ = 0x8d;	/* leal */
1290 		if(p->from.type != D_ADDR)
1291 			diag("asmins: Zaut sb type ADDR");
1292 		p->from.type = p->from.index;
1293 		p->from.index = D_NONE;
1294 		asmand(&p->from, &p->to);
1295 		p->from.index = p->from.type;
1296 		p->from.type = D_ADDR;
1297 		break;
1298 
1299 	case Zm_o:
1300 		*andptr++ = op;
1301 		asmando(&p->from, o->op[z+1]);
1302 		break;
1303 
1304 	case Zr_m:
1305 		*andptr++ = op;
1306 		asmand(&p->to, &p->from);
1307 		break;
1308 
1309 	case Zr_m_xm:
1310 		mediaop(o, op, t[3], z);
1311 		asmandg(&p->to, &p->from, 0, 0, o->prefix);
1312 		break;
1313 
1314 	case Zr_m_xm_nr:
1315 		rexflag = 0;
1316 		mediaop(o, op, t[3], z);
1317 		asmandg(&p->to, &p->from, 0, 0, o->prefix);
1318 		break;
1319 
1320 	case Zr_m_i_xm:
1321 		mediaop(o, op, t[3], z);
1322 		asmandg(&p->to, &p->from, 0, 0, o->prefix);
1323 		*andptr++ = p->from.offset;
1324 		break;
1325 
1326 	case Zo_m:
1327 		*andptr++ = op;
1328 		asmando(&p->to, o->op[z+1]);
1329 		break;
1330 
1331 	case Zo_m64:
1332 		*andptr++ = op;
1333 		asmandsz(&p->to, o->op[z+1], 0, 1);
1334 		break;
1335 
1336 	case Zm_ibo:
1337 		v = vaddr(&p->to);
1338 		*andptr++ = op;
1339 		asmando(&p->from, o->op[z+1]);
1340 		*andptr++ = v;
1341 		break;
1342 
1343 	case Zibo_m:
1344 		*andptr++ = op;
1345 		asmando(&p->to, o->op[z+1]);
1346 		*andptr++ = v;
1347 		break;
1348 
1349 	case Zibo_m_xm:
1350 		vmi = p->to;
1351 		if(p->to.index != D_NONE) {	/* VMI has "non-destructive dest" with dest in Vex.vvvv */
1352 			vmi.type = p->to.index;
1353 			vmi.index = p->to.type;
1354 		}
1355 		z = mediaop(o, op, t[3], z);
1356 		asmando(&vmi, o->op[z+1]);
1357 		*andptr++ = v;
1358 		break;
1359 
1360 	case Z_ib:
1361 		v = vaddr(&p->to);
1362 	case Zib_:
1363 		*andptr++ = op;
1364 		*andptr++ = v;
1365 		break;
1366 
1367 	case Zib_rp:
1368 		rexflag |= regrex[p->to.type] & (Rxb|0x40);
1369 		*andptr++ = op + reg[p->to.type];
1370 		*andptr++ = v;
1371 		break;
1372 
1373 	case Zil_rp:
1374 		rexflag |= regrex[p->to.type] & Rxb;
1375 		*andptr++ = op + reg[p->to.type];
1376 		if(o->prefix == Pe) {
1377 			*andptr++ = v;
1378 			*andptr++ = v>>8;
1379 		}
1380 		else
1381 			put4(v);
1382 		break;
1383 
1384 	case Zo_iw:
1385 		*andptr++ = op;
1386 		if(p->from.type != D_NONE){
1387 			*andptr++ = v;
1388 			*andptr++ = v>>8;
1389 		}
1390 		break;
1391 
1392 	case Ziq_rp:
1393 		l = v>>32;
1394 		if(l == 0){
1395 			//p->mark |= 0100;
1396 			//print("zero: %llux %P\n", v, p);
1397 			rexflag &= ~(0x40|Rxw);
1398 			rexflag |= regrex[p->to.type] & Rxb;
1399 			*andptr++ = 0xb8 + reg[p->to.type];
1400 			put4(v);
1401 		}else if(l == -1 && (v&((uvlong)1<<31))!=0){	/* sign extend */
1402 			//p->mark |= 0100;
1403 			//print("sign: %llux %P\n", v, p);
1404 			*andptr ++ = 0xc7;
1405 			asmando(&p->to, 0);
1406 			put4(v);
1407 		}else{	/* need all 8 */
1408 			//print("all: %llux %P\n", v, p);
1409 			rexflag |= regrex[p->to.type] & Rxb;
1410 			*andptr++ = op + reg[p->to.type];
1411 			put8(v);
1412 		}
1413 		break;
1414 
1415 	case Zib_rr:
1416 		*andptr++ = op;
1417 		asmand(&p->to, &p->to);
1418 		*andptr++ = v;
1419 		break;
1420 
1421 	case Z_il:
1422 		v = vaddr(&p->to);
1423 	case Zil_:
1424 		*andptr++ = op;
1425 		if(o->prefix == Pe) {
1426 			*andptr++ = v;
1427 			*andptr++ = v>>8;
1428 		}
1429 		else
1430 			put4(v);
1431 		break;
1432 
1433 	case Zm_ilo:
1434 		v = vaddr(&p->to);
1435 		*andptr++ = op;
1436 		asmando(&p->from, o->op[z+1]);
1437 		if(o->prefix == Pe) {
1438 			*andptr++ = v;
1439 			*andptr++ = v>>8;
1440 		}
1441 		else
1442 			put4(v);
1443 		break;
1444 
1445 	case Zilo_m:
1446 		*andptr++ = op;
1447 		asmando(&p->to, o->op[z+1]);
1448 		if(o->prefix == Pe) {
1449 			*andptr++ = v;
1450 			*andptr++ = v>>8;
1451 		}
1452 		else
1453 			put4(v);
1454 		break;
1455 
1456 	case Zil_rr:
1457 		*andptr++ = op;
1458 		asmand(&p->to, &p->to);
1459 		if(o->prefix == Pe) {
1460 			*andptr++ = v;
1461 			*andptr++ = v>>8;
1462 		}
1463 		else
1464 			put4(v);
1465 		break;
1466 
1467 	case Z_rp:
1468 		rexflag |= regrex[p->to.type] & (Rxb|0x40);
1469 		*andptr++ = op + reg[p->to.type];
1470 		break;
1471 
1472 	case Zrp_:
1473 		rexflag |= regrex[p->from.type] & (Rxb|0x40);
1474 		*andptr++ = op + reg[p->from.type];
1475 		break;
1476 
1477 	case Zclr:
1478 		*andptr++ = op;
1479 		asmand(&p->to, &p->to);
1480 		break;
1481 
1482 	case Zbr:
1483 		q = p->pcond;
1484 		if(q) {
1485 			v = q->pc - p->pc - 2;
1486 			if(v >= -128 && v <= 127) {
1487 				*andptr++ = op;
1488 				*andptr++ = v;
1489 			} else {
1490 				v -= 6-2;
1491 				*andptr++ = 0x0f;
1492 				*andptr++ = o->op[z+1];
1493 				*andptr++ = v;
1494 				*andptr++ = v>>8;
1495 				*andptr++ = v>>16;
1496 				*andptr++ = v>>24;
1497 			}
1498 		}
1499 		break;
1500 
1501 	case Zcall:
1502 		q = p->pcond;
1503 		if(q) {
1504 			v = q->pc - p->pc - 5;
1505 			if(dlm && curp != P && p->to.sym->type == SUNDEF){
1506 				/* v = 0 - p->pc - 5; */
1507 				v = 0;
1508 				ckoff(p->to.sym, v);
1509 				v += p->to.sym->value;
1510 				dynreloc(p->to.sym, p->pc+1, 0);
1511 			}
1512 			*andptr++ = op;
1513 			*andptr++ = v;
1514 			*andptr++ = v>>8;
1515 			*andptr++ = v>>16;
1516 			*andptr++ = v>>24;
1517 		}
1518 		break;
1519 
1520 	case Zjmp:
1521 		q = p->pcond;
1522 		if(q) {
1523 			v = q->pc - p->pc - 2;
1524 			if(v >= -128 && v <= 127) {
1525 				*andptr++ = op;
1526 				*andptr++ = v;
1527 			} else {
1528 				v -= 5-2;
1529 				*andptr++ = o->op[z+1];
1530 				*andptr++ = v;
1531 				*andptr++ = v>>8;
1532 				*andptr++ = v>>16;
1533 				*andptr++ = v>>24;
1534 			}
1535 		}
1536 		break;
1537 
1538 	case Zloop:
1539 		q = p->pcond;
1540 		if(q) {
1541 			v = q->pc - p->pc - 2;
1542 			if(v < -128 || v > 127)
1543 				diag("loop too far: %P", p);
1544 			*andptr++ = op;
1545 			*andptr++ = v;
1546 		}
1547 		break;
1548 
1549 	case Zbyte:
1550 		*andptr++ = v;
1551 		if(op > 1) {
1552 			*andptr++ = v>>8;
1553 			if(op > 2) {
1554 				*andptr++ = v>>16;
1555 				*andptr++ = v>>24;
1556 				if(op > 4) {
1557 					*andptr++ = v>>32;
1558 					*andptr++ = v>>40;
1559 					*andptr++ = v>>48;
1560 					*andptr++ = v>>56;
1561 				}
1562 			}
1563 		}
1564 		break;
1565 	}
1566 	return;
1567 
1568 domov:
1569 	for(mo=ymovtab; mo->as; mo++)
1570 		if(p->as == mo->as)
1571 		if(ycover[ft+mo->ft])
1572 		if(ycover[tt+mo->tt]){
1573 			t = mo->op;
1574 			goto mfound;
1575 		}
1576 bad:
1577 	if(p->mode != 64){
1578 		/*
1579 		 * here, the assembly has failed.
1580 		 * if its a byte instruction that has
1581 		 * unaddressable registers, try to
1582 		 * exchange registers and reissue the
1583 		 * instruction with the operands renamed.
1584 		 */
1585 		pp = *p;
1586 		z = p->from.type;
1587 		if(z >= D_BP && z <= D_DI) {
1588 			if(isax(&p->to)) {
1589 				*andptr++ = 0x87;			/* xchg lhs,bx */
1590 				asmando(&p->from, reg[D_BX]);
1591 				subreg(&pp, z, D_BX);
1592 				doasm(&pp);
1593 				*andptr++ = 0x87;			/* xchg lhs,bx */
1594 				asmando(&p->from, reg[D_BX]);
1595 			} else {
1596 				*andptr++ = 0x90 + reg[z];		/* xchg lsh,ax */
1597 				subreg(&pp, z, D_AX);
1598 				doasm(&pp);
1599 				*andptr++ = 0x90 + reg[z];		/* xchg lsh,ax */
1600 			}
1601 			return;
1602 		}
1603 		z = p->to.type;
1604 		if(z >= D_BP && z <= D_DI) {
1605 			if(isax(&p->from)) {
1606 				*andptr++ = 0x87;			/* xchg rhs,bx */
1607 				asmando(&p->to, reg[D_BX]);
1608 				subreg(&pp, z, D_BX);
1609 				doasm(&pp);
1610 				*andptr++ = 0x87;			/* xchg rhs,bx */
1611 				asmando(&p->to, reg[D_BX]);
1612 			} else {
1613 				*andptr++ = 0x90 + reg[z];		/* xchg rsh,ax */
1614 				subreg(&pp, z, D_AX);
1615 				doasm(&pp);
1616 				*andptr++ = 0x90 + reg[z];		/* xchg rsh,ax */
1617 			}
1618 			return;
1619 		}
1620 	}
1621 	if(0) {
1622 		int ft = oclass(&p->from), tt = oclass(&p->to); extern char* yclname[];
1623 		fprint(2, "ft=%d [%s] tt=%d [%s]\n", ft, yclname[ft], tt, yclname[tt]);
1624 	}
1625 	diag("doasm: notfound from=%ux to=%ux %P", p->from.type, p->to.type, p);
1626 	return;
1627 
1628 mfound:
1629 	switch(mo->code) {
1630 	default:
1631 		diag("asmins: unknown mov %d %P", mo->code, p);
1632 		break;
1633 
1634 	case 0:	/* lit */
1635 		for(z=0; t[z]!=E; z++)
1636 			*andptr++ = t[z];
1637 		break;
1638 
1639 	case 1:	/* r,m */
1640 		*andptr++ = t[0];
1641 		asmando(&p->to, t[1]);
1642 		break;
1643 
1644 	case 2:	/* m,r */
1645 		*andptr++ = t[0];
1646 		asmando(&p->from, t[1]);
1647 		break;
1648 
1649 	case 3:	/* r,m - 2op */
1650 		*andptr++ = t[0];
1651 		*andptr++ = t[1];
1652 		asmando(&p->to, t[2]);
1653 		rexflag |= regrex[p->from.type] & (Rxr|0x40);
1654 		break;
1655 
1656 	case 4:	/* m,r - 2op */
1657 		*andptr++ = t[0];
1658 		*andptr++ = t[1];
1659 		asmando(&p->from, t[2]);
1660 		rexflag |= regrex[p->to.type] & (Rxr|0x40);
1661 		break;
1662 
1663 	case 5:	/* load full pointer, trash heap */
1664 		if(t[0])
1665 			*andptr++ = t[0];
1666 		switch(p->to.index) {
1667 		default:
1668 			goto bad;
1669 		case D_DS:
1670 			*andptr++ = 0xc5;
1671 			break;
1672 		case D_SS:
1673 			*andptr++ = 0x0f;
1674 			*andptr++ = 0xb2;
1675 			break;
1676 		case D_ES:
1677 			*andptr++ = 0xc4;
1678 			break;
1679 		case D_FS:
1680 			*andptr++ = 0x0f;
1681 			*andptr++ = 0xb4;
1682 			break;
1683 		case D_GS:
1684 			*andptr++ = 0x0f;
1685 			*andptr++ = 0xb5;
1686 			break;
1687 		}
1688 		asmand(&p->from, &p->to);
1689 		break;
1690 
1691 	case 6:	/* double shift */
1692 		if(t[0] == Pw){
1693 			if(p->mode != 64)
1694 				diag("asmins: illegal 64: %P", p);
1695 			rexflag |= Pw;
1696 			t++;
1697 		}else if(t[0] == Pe){
1698 			*andptr++ = Pe;
1699 			t++;
1700 		}
1701 		z = p->from.type;
1702 		switch(z) {
1703 		default:
1704 			goto bad;
1705 		case D_CONST:
1706 			*andptr++ = 0x0f;
1707 			*andptr++ = t[0];
1708 			asmandsz(&p->to, reg[p->from.index], regrex[p->from.index], 0);
1709 			*andptr++ = p->from.offset;
1710 			break;
1711 		case D_CL:
1712 		case D_CX:
1713 			*andptr++ = 0x0f;
1714 			*andptr++ = t[1];
1715 			asmandsz(&p->to, reg[p->from.index], regrex[p->from.index], 0);
1716 			break;
1717 		}
1718 		break;
1719 
1720 	case 7: /* imul rm,r */
1721 		*andptr++ = t[4];
1722 		*andptr++ = t[5];
1723 		asmand(&p->from, &p->to);
1724 		break;
1725 	}
1726 }
1727 
1728 void
asmins(Prog * p)1729 asmins(Prog *p)
1730 {
1731 	int n, np, o, c, t, v1, v2, vexlen;
1732 
1733 	vexbytes = 0;
1734 	rexflag = 0;
1735 	andptr = and;
1736 	asmode = p->mode;
1737 	doasm(p);
1738 	if(vexbytes == 0) {
1739 		if(rexflag) {
1740 			if(0) fprint(2, "rexflag=%#ux %P\n", rexflag, p);
1741 			/*
1742 			 * the rex prefix must appear before the first opcode byte
1743 			 * and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
1744 			 * before the 0f opcode escape.
1745 			 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
1746 			 */
1747 			if(p->mode != 64)
1748 				diag("asmins: illegal in mode %d: %P", p->mode, p);
1749 			n = andptr - and;
1750 			for(np = 0; np < n; np++) {
1751 				c = and[np];
1752 				if(c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26)
1753 					break;
1754 			}
1755 			memmove(and+np+1, and+np, n-np);
1756 			and[np] = 0x40 | rexflag;
1757 			andptr++;
1758 		}
1759 		return;
1760 	}
1761 	if(0) if(rexflag||vexbytes)fprint(2, "rexflag=%#ux vexbytes=%#ux %P\n", rexflag, vexbytes, p);
1762 	n = andptr - and;
1763 //vex if need vvvv register or W or L. never need R X B (must be 1 in 32-bit)
1764 //note: 4th register encoding in immediate byte
1765 	/* media/sse/vex: seg* (66|F3|F2)? 0F (38|3A)? op -> seg* vex2|vex3 op */
1766 	for(np = 0; np < n; np++) {	/* seg* */
1767 		c = and[np];
1768 		if(c != 0x2e && c != 0x3e && c != 0x26 && c != 0x64 && c != 0x65)
1769 			break;
1770 	}
1771 	o = np;
1772 	if(np+1 < n) {
1773 		v1 = 0;
1774 		v2 = (vexbytes & Vexl) | Vexp0;
1775 		switch(and[np]) {
1776 		case 0x66:
1777 			v2 |= Vexp66;
1778 			np++;
1779 			break;
1780 		case 0xF3:
1781 			v2 |= Vexpf3;
1782 			np++;
1783 			break;
1784 		case 0xF2:
1785 			v2 |= Vexpf2;
1786 			np++;
1787 			break;
1788 		}
1789 		c = and[np];
1790 		if(c == Vex2 || c == Vex3)
1791 			return;	/* already vexed */
1792 		if(and[np] != 0x0F) {
1793 			diag("internal: inconsistent vex state: %P", p);
1794 			return;
1795 		}
1796 		np++;
1797 		if(np < n) {
1798 			switch(and[np]) {
1799 			case 0x38:
1800 				v1 = Vex0f38;
1801 				np++;
1802 				break;
1803 			case 0x3a:
1804 				v1 = Vex0f3a;
1805 				np++;
1806 				break;
1807 			default:
1808 				if(rexflag & (Rxw|Rxx|Rxb))
1809 					v1 = Vex0f;	/* force 3-byte vex */
1810 				break;
1811 			}
1812 		}
1813 		t = vexbytes >> 8;
1814 		if(t >= D_Y0 && t <= D_Y15)
1815 			t -= D_Y0;
1816 		else if(t >= D_X0 && t <= D_X15)
1817 			t -= D_X0;
1818 		v2 |= (~t & 0xF) << 3;
1819 		vexlen = 2;
1820 		if(v1 != 0)
1821 			vexlen = 3;
1822 		if(o+vexlen != np) {
1823 			memmove(and+o+vexlen, and+np, n-np);
1824 			andptr = and+(o+vexlen)+(n-np);
1825 		}
1826 		if(vexlen == 2) {
1827 			and[o] = Vex2;
1828 			and[o+1] = v2 | ((~rexflag<<5) & Vexr);
1829 		} else {
1830 			and[o] = Vex3;
1831 			and[o+1] = v1 | ((~rexflag<<5) & (Vexr | Vexx | Vexb));
1832 			if(rexflag & Rxw)
1833 				v2 |= Vexw;
1834 			and[o+2] = v2;
1835 		}
1836 	}
1837 }
1838 
1839 enum{
1840 	ABSD = 0,
1841 	ABSU = 1,
1842 	RELD = 2,
1843 	RELU = 3,
1844 };
1845 
1846 int modemap[4] = { 0, 1, -1, 2, };
1847 
1848 typedef struct Reloc Reloc;
1849 
1850 struct Reloc
1851 {
1852 	int n;
1853 	int t;
1854 	uchar *m;
1855 	ulong *a;
1856 };
1857 
1858 Reloc rels;
1859 
1860 static void
grow(Reloc * r)1861 grow(Reloc *r)
1862 {
1863 	int t;
1864 	uchar *m, *nm;
1865 	ulong *a, *na;
1866 
1867 	t = r->t;
1868 	r->t += 64;
1869 	m = r->m;
1870 	a = r->a;
1871 	r->m = nm = malloc(r->t*sizeof(uchar));
1872 	r->a = na = malloc(r->t*sizeof(ulong));
1873 	memmove(nm, m, t*sizeof(uchar));
1874 	memmove(na, a, t*sizeof(ulong));
1875 	free(m);
1876 	free(a);
1877 }
1878 
1879 void
dynreloc(Sym * s,ulong v,int abs)1880 dynreloc(Sym *s, ulong v, int abs)
1881 {
1882 	int i, k, n;
1883 	uchar *m;
1884 	ulong *a;
1885 	Reloc *r;
1886 
1887 	if(s->type == SUNDEF)
1888 		k = abs ? ABSU : RELU;
1889 	else
1890 		k = abs ? ABSD : RELD;
1891 	/* Bprint(&bso, "R %s a=%ld(%lx) %d\n", s->name, v, v, k); */
1892 	k = modemap[k];
1893 	r = &rels;
1894 	n = r->n;
1895 	if(n >= r->t)
1896 		grow(r);
1897 	m = r->m;
1898 	a = r->a;
1899 	for(i = n; i > 0; i--){
1900 		if(v < a[i-1]){	/* happens occasionally for data */
1901 			m[i] = m[i-1];
1902 			a[i] = a[i-1];
1903 		}
1904 		else
1905 			break;
1906 	}
1907 	m[i] = k;
1908 	a[i] = v;
1909 	r->n++;
1910 }
1911 
1912 static int
sput(char * s)1913 sput(char *s)
1914 {
1915 	char *p;
1916 
1917 	p = s;
1918 	while(*s)
1919 		cput(*s++);
1920 	cput(0);
1921 	return s-p+1;
1922 }
1923 
1924 void
asmdyn()1925 asmdyn()
1926 {
1927 	int i, n, t, c;
1928 	Sym *s;
1929 	ulong la, ra, *a;
1930 	vlong off;
1931 	uchar *m;
1932 	Reloc *r;
1933 
1934 	cflush();
1935 	off = seek(cout, 0, 1);
1936 	lput(0);
1937 	t = 0;
1938 	lput(imports);
1939 	t += 4;
1940 	for(i = 0; i < NHASH; i++)
1941 		for(s = hash[i]; s != S; s = s->link)
1942 			if(s->type == SUNDEF){
1943 				lput(s->sig);
1944 				t += 4;
1945 				t += sput(s->name);
1946 			}
1947 
1948 	la = 0;
1949 	r = &rels;
1950 	n = r->n;
1951 	m = r->m;
1952 	a = r->a;
1953 	lput(n);
1954 	t += 4;
1955 	for(i = 0; i < n; i++){
1956 		ra = *a-la;
1957 		if(*a < la)
1958 			diag("bad relocation order");
1959 		if(ra < 256)
1960 			c = 0;
1961 		else if(ra < 65536)
1962 			c = 1;
1963 		else
1964 			c = 2;
1965 		cput((c<<6)|*m++);
1966 		t++;
1967 		if(c == 0){
1968 			cput(ra);
1969 			t++;
1970 		}
1971 		else if(c == 1){
1972 			wput(ra);
1973 			t += 2;
1974 		}
1975 		else{
1976 			lput(ra);
1977 			t += 4;
1978 		}
1979 		la = *a++;
1980 	}
1981 
1982 	cflush();
1983 	seek(cout, off, 0);
1984 	lput(t);
1985 
1986 	if(debug['v']){
1987 		Bprint(&bso, "import table entries = %d\n", imports);
1988 		Bprint(&bso, "export table entries = %d\n", exports);
1989 	}
1990 }
1991 
1992 char*	yclname[] ={
1993 	[Yxxx] "Yxxx",
1994 	[Ynone] "Ynone",
1995 	[Yi0] "Yi0",
1996 	[Yi1] "Yi1",
1997 	[Yi8] "Yi8",
1998 	[Ys32] "Ys32",
1999 	[Yi32] "Yi32",
2000 	[Yi64] "Yi64",
2001 	[Yiauto] "Yiauto",
2002 	[Yal] "Yal",
2003 	[Ycl] "Ycl",
2004 	[Yax] "Yax",
2005 	[Ycx] "Ycx",
2006 	[Yrb] "Yrb",
2007 	[Yrl] "Yrl",
2008 	[Yrf] "Yrf",
2009 	[Yf0] "Yf0",
2010 	[Yrx] "Yrx",
2011 	[Ymb] "Ymb",
2012 	[Yml] "Yml",
2013 	[Ym] "Ym",
2014 	[Ybr] "Ybr",
2015 	[Ycol] "Ycol",
2016 	[Ycs] "Ycs",
2017 	[Yss] "Yss",
2018 	[Yds] "Yds",
2019 	[Yes] "Yes",
2020 	[Yfs] "Yfs",
2021 	[Ygs] "Ygs",
2022 	[Ygdtr] "Ygdtr",
2023 	[Yidtr] "Yidtr",
2024 	[Yldtr] "Yldtr",
2025 	[Ymsw] "Ymsw",
2026 	[Ytask] "Ytask",
2027 	[Ycr0] "Ycr0",
2028 	[Ycr1] "Ycr1",
2029 	[Ycr2] "Ycr2",
2030 	[Ycr3] "Ycr3",
2031 	[Ycr4] "Ycr4",
2032 	[Ycr5] "Ycr5",
2033 	[Ycr6] "Ycr6",
2034 	[Ycr7] "Ycr7",
2035 	[Ycr8] "Ycr8",
2036 	[Ydr0] "Ydr0",
2037 	[Ydr1] "Ydr1",
2038 	[Ydr2] "Ydr2",
2039 	[Ydr3] "Ydr3",
2040 	[Ydr4] "Ydr4",
2041 	[Ydr5] "Ydr5",
2042 	[Ydr6] "Ydr6",
2043 	[Ydr7] "Ydr7",
2044 	[Ytr0] "Ytr0",
2045 	[Ytr1] "Ytr1",
2046 	[Ytr2] "Ytr2",
2047 	[Ytr3] "Ytr3",
2048 	[Ytr4] "Ytr4",
2049 	[Ytr5] "Ytr5",
2050 	[Ytr6] "Ytr6",
2051 	[Ytr7] "Ytr7",
2052 	[Yrl32] "Yrl32",
2053 	[Yrl64] "Yrl64",
2054 	[Ymr] "Ymr",
2055 	[Ymm] "Ymm",
2056 	[Yxr] "Yxr",
2057 	[Yxm] "Yxm",
2058 	[Yyr] "Yyr",
2059 	[Yxyr] "Yxyr",
2060 	[Ymax] "Ymax",
2061 };
2062