1 #include "l.h"
2
3 static int rexflag;
4 static int vexbytes;
5 static int asmode;
6
7 void
span(void)8 span(void)
9 {
10 Prog *p, *q;
11 long v;
12 vlong c, idat;
13 int m, n, again;
14
15 xdefine("etext", STEXT, 0L);
16 idat = INITDAT;
17 for(p = firstp; p != P; p = p->link) {
18 if(p->as == ATEXT)
19 curtext = p;
20 n = 0;
21 if(p->to.type == D_BRANCH)
22 if(p->pcond == P)
23 p->pcond = p;
24 if((q = p->pcond) != P)
25 if(q->back != 2)
26 n = 1;
27 p->back = n;
28 if(p->as == AADJSP) {
29 p->to.type = D_SP;
30 v = -p->from.offset;
31 p->from.offset = v;
32 p->as = p->mode != 64? AADDL: AADDQ;
33 if(v < 0) {
34 p->as = p->mode != 64? ASUBL: ASUBQ;
35 v = -v;
36 p->from.offset = v;
37 }
38 if(v == 0)
39 p->as = ANOP;
40 }
41 }
42 n = 0;
43
44 start:
45 if(debug['v'])
46 Bprint(&bso, "%5.2f span\n", cputime());
47 Bflush(&bso);
48 c = INITTEXT;
49 for(p = firstp; p != P; p = p->link) {
50 if(p->as == ATEXT)
51 curtext = p;
52 if(p->to.type == D_BRANCH)
53 if(p->back)
54 p->pc = c;
55 asmins(p);
56 p->pc = c;
57 m = andptr-and;
58 p->mark = m;
59 c += m;
60 }
61
62 loop:
63 n++;
64 if(debug['v'])
65 Bprint(&bso, "%5.2f span %d\n", cputime(), n);
66 Bflush(&bso);
67 if(n > 50) {
68 print("span must be looping\n");
69 errorexit();
70 }
71 again = 0;
72 c = INITTEXT;
73 for(p = firstp; p != P; p = p->link) {
74 if(p->as == ATEXT)
75 curtext = p;
76 if(p->to.type == D_BRANCH || p->back & 0100) {
77 if(p->back)
78 p->pc = c;
79 asmins(p);
80 m = andptr-and;
81 if(m != p->mark) {
82 p->mark = m;
83 again++;
84 }
85 }
86 p->pc = c;
87 c += p->mark;
88 }
89 if(again) {
90 textsize = c;
91 goto loop;
92 }
93 if(INITRND) {
94 INITDAT = rnd(c, INITRND);
95 if(INITDAT != idat) {
96 idat = INITDAT;
97 goto start;
98 }
99 }
100 xdefine("etext", STEXT, c);
101 if(debug['v'])
102 Bprint(&bso, "etext = %llux\n", c);
103 Bflush(&bso);
104 for(p = textp; p != P; p = p->pcond)
105 p->from.sym->value = p->pc;
106 textsize = c - INITTEXT;
107 }
108
109 void
xdefine(char * p,int t,vlong v)110 xdefine(char *p, int t, vlong v)
111 {
112 Sym *s;
113
114 s = lookup(p, 0);
115 if(s->type == 0 || s->type == SXREF) {
116 s->type = t;
117 s->value = v;
118 }
119 if(s->type == STEXT && s->value == 0)
120 s->value = v;
121 }
122
123 void
putsymb(char * s,int t,vlong v,int ver)124 putsymb(char *s, int t, vlong v, int ver)
125 {
126 int i, f, l;
127
128 if(t == 'f')
129 s++;
130 l = 4;
131 switch(HEADTYPE){
132 default:
133 break;
134 case 5:
135 if(debug['8'])
136 break;
137 case 2:
138 case 6:
139 lput(v>>32);
140 l = 8;
141 break;
142 }
143 lput(v);
144 if(ver)
145 t += 'a' - 'A';
146 cput(t+0x80); /* 0x80 is variable length */
147
148 if(t == 'Z' || t == 'z') {
149 cput(s[0]);
150 for(i=1; s[i] != 0 || s[i+1] != 0; i += 2) {
151 cput(s[i]);
152 cput(s[i+1]);
153 }
154 cput(0);
155 cput(0);
156 i++;
157 }
158 else {
159 for(i=0; s[i]; i++)
160 cput(s[i]);
161 cput(0);
162 }
163 symsize += l + 1 + i + 1;
164
165 if(debug['n']) {
166 if(t == 'z' || t == 'Z') {
167 Bprint(&bso, "%c %.8llux ", t, v);
168 for(i=1; s[i] != 0 || s[i+1] != 0; i+=2) {
169 f = ((s[i]&0xff) << 8) | (s[i+1]&0xff);
170 Bprint(&bso, "/%x", f);
171 }
172 Bprint(&bso, "\n");
173 return;
174 }
175 if(ver)
176 Bprint(&bso, "%c %.8llux %s<%d>\n", t, v, s, ver);
177 else
178 Bprint(&bso, "%c %.8llux %s\n", t, v, s);
179 }
180 }
181
182 void
asmsym(void)183 asmsym(void)
184 {
185 Prog *p;
186 Auto *a;
187 Sym *s;
188 int h;
189
190 s = lookup("etext", 0);
191 if(s->type == STEXT)
192 putsymb(s->name, 'T', s->value, s->version);
193
194 for(h=0; h<NHASH; h++)
195 for(s=hash[h]; s!=S; s=s->link)
196 switch(s->type) {
197 case SCONST:
198 putsymb(s->name, 'D', s->value, s->version);
199 continue;
200
201 case SDATA:
202 putsymb(s->name, 'D', s->value+INITDAT, s->version);
203 continue;
204
205 case SBSS:
206 putsymb(s->name, 'B', s->value+INITDAT, s->version);
207 continue;
208
209 case SFILE:
210 putsymb(s->name, 'f', s->value, s->version);
211 continue;
212 }
213
214 for(p=textp; p!=P; p=p->pcond) {
215 s = p->from.sym;
216 if(s->type != STEXT)
217 continue;
218
219 /* filenames first */
220 for(a=p->to.autom; a; a=a->link)
221 if(a->type == D_FILE)
222 putsymb(a->asym->name, 'z', a->aoffset, 0);
223 else
224 if(a->type == D_FILE1)
225 putsymb(a->asym->name, 'Z', a->aoffset, 0);
226
227 putsymb(s->name, 'T', s->value, s->version);
228
229 /* frame, auto and param after */
230 putsymb(".frame", 'm', p->to.offset+8, 0);
231
232 for(a=p->to.autom; a; a=a->link)
233 if(a->type == D_AUTO)
234 putsymb(a->asym->name, 'a', -a->aoffset, 0);
235 else
236 if(a->type == D_PARAM)
237 putsymb(a->asym->name, 'p', a->aoffset, 0);
238 }
239 if(debug['v'] || debug['n'])
240 Bprint(&bso, "symsize = %lud\n", symsize);
241 Bflush(&bso);
242 }
243
244 void
asmlc(void)245 asmlc(void)
246 {
247 vlong oldpc;
248 Prog *p;
249 long oldlc, v, s;
250
251 oldpc = INITTEXT;
252 oldlc = 0;
253 for(p = firstp; p != P; p = p->link) {
254 if(p->line == oldlc || p->as == ATEXT || p->as == ANOP) {
255 if(p->as == ATEXT)
256 curtext = p;
257 if(debug['V'])
258 Bprint(&bso, "%6llux %P\n",
259 p->pc, p);
260 continue;
261 }
262 if(debug['V'])
263 Bprint(&bso, "\t\t%6ld", lcsize);
264 v = (p->pc - oldpc) / MINLC;
265 while(v) {
266 s = 127;
267 if(v < 127)
268 s = v;
269 cput(s+128); /* 129-255 +pc */
270 if(debug['V'])
271 Bprint(&bso, " pc+%ld*%d(%ld)", s, MINLC, s+128);
272 v -= s;
273 lcsize++;
274 }
275 s = p->line - oldlc;
276 oldlc = p->line;
277 oldpc = p->pc + MINLC;
278 if(s > 64 || s < -64) {
279 cput(0); /* 0 vv +lc */
280 cput(s>>24);
281 cput(s>>16);
282 cput(s>>8);
283 cput(s);
284 if(debug['V']) {
285 if(s > 0)
286 Bprint(&bso, " lc+%ld(%d,%ld)\n",
287 s, 0, s);
288 else
289 Bprint(&bso, " lc%ld(%d,%ld)\n",
290 s, 0, s);
291 Bprint(&bso, "%6llux %P\n",
292 p->pc, p);
293 }
294 lcsize += 5;
295 continue;
296 }
297 if(s > 0) {
298 cput(0+s); /* 1-64 +lc */
299 if(debug['V']) {
300 Bprint(&bso, " lc+%ld(%ld)\n", s, 0+s);
301 Bprint(&bso, "%6llux %P\n",
302 p->pc, p);
303 }
304 } else {
305 cput(64-s); /* 65-128 -lc */
306 if(debug['V']) {
307 Bprint(&bso, " lc%ld(%ld)\n", s, 64-s);
308 Bprint(&bso, "%6llux %P\n",
309 p->pc, p);
310 }
311 }
312 lcsize++;
313 }
314 while(lcsize & 1) {
315 s = 129;
316 cput(s);
317 lcsize++;
318 }
319 if(debug['v'] || debug['V'])
320 Bprint(&bso, "lcsize = %ld\n", lcsize);
321 Bflush(&bso);
322 }
323
324 int
prefixof(Adr * a)325 prefixof(Adr *a)
326 {
327 switch(a->type) {
328 case D_INDIR+D_CS:
329 return 0x2e;
330 case D_INDIR+D_DS:
331 return 0x3e;
332 case D_INDIR+D_ES:
333 return 0x26;
334 case D_INDIR+D_FS:
335 return 0x64;
336 case D_INDIR+D_GS:
337 return 0x65;
338 }
339 return 0;
340 }
341
342 int
oclass(Adr * a)343 oclass(Adr *a)
344 {
345 vlong v;
346 long l;
347
348 if(a->type < D_CONST2 && (a->type >= D_INDIR || !isxyreg(a->type) && a->index != D_NONE)) {
349 if(a->index != D_NONE && a->scale == 0) {
350 if(a->type == D_ADDR) {
351 switch(a->index) {
352 case D_EXTERN:
353 case D_STATIC:
354 return Yi32; /* TO DO: Yi64 */
355 case D_AUTO:
356 case D_PARAM:
357 return Yiauto;
358 }
359 return Yxxx;
360 }
361 return Ycol;
362 }
363 return Ym;
364 }
365 switch(a->type)
366 {
367 case D_AL:
368 return Yal;
369
370 case D_AX:
371 return Yax;
372
373 /*
374 case D_SPB:
375 */
376 case D_BPB:
377 case D_SIB:
378 case D_DIB:
379 case D_R8B:
380 case D_R9B:
381 case D_R10B:
382 case D_R11B:
383 case D_R12B:
384 case D_R13B:
385 case D_R14B:
386 case D_R15B:
387 if(asmode != 64)
388 return Yxxx;
389 case D_DL:
390 case D_BL:
391 case D_AH:
392 case D_CH:
393 case D_DH:
394 case D_BH:
395 return Yrb;
396
397 case D_CL:
398 return Ycl;
399
400 case D_CX:
401 return Ycx;
402
403 case D_DX:
404 case D_BX:
405 return Yrx;
406
407 case D_R8: /* not really Yrl */
408 case D_R9:
409 case D_R10:
410 case D_R11:
411 case D_R12:
412 case D_R13:
413 case D_R14:
414 case D_R15:
415 if(asmode != 64)
416 return Yxxx;
417 case D_SP:
418 case D_BP:
419 case D_SI:
420 case D_DI:
421 return Yrl;
422
423 /*
424 case D_F0+0:
425 return Yf0;
426
427 case D_F0+1:
428 case D_F0+2:
429 case D_F0+3:
430 case D_F0+4:
431 case D_F0+5:
432 case D_F0+6:
433 case D_F0+7:
434 return Yrf;
435 */
436
437 case D_M0+0:
438 case D_M0+1:
439 case D_M0+2:
440 case D_M0+3:
441 case D_M0+4:
442 case D_M0+5:
443 case D_M0+6:
444 case D_M0+7:
445 return Ymr;
446
447 case D_X0+0:
448 case D_X0+1:
449 case D_X0+2:
450 case D_X0+3:
451 case D_X0+4:
452 case D_X0+5:
453 case D_X0+6:
454 case D_X0+7:
455 case D_X0+8:
456 case D_X0+9:
457 case D_X0+10:
458 case D_X0+11:
459 case D_X0+12:
460 case D_X0+13:
461 case D_X0+14:
462 case D_X0+15:
463 return Yxr;
464
465 case D_Y0+0:
466 case D_Y0+1:
467 case D_Y0+2:
468 case D_Y0+3:
469 case D_Y0+4:
470 case D_Y0+5:
471 case D_Y0+6:
472 case D_Y0+7:
473 case D_Y0+8:
474 case D_Y0+9:
475 case D_Y0+10:
476 case D_Y0+11:
477 case D_Y0+12:
478 case D_Y0+13:
479 case D_Y0+14:
480 case D_Y0+15:
481 return Yyr;
482
483 case D_NONE:
484 return Ynone;
485
486 case D_CS: return Ycs;
487 case D_SS: return Yss;
488 case D_DS: return Yds;
489 case D_ES: return Yes;
490 case D_FS: return Yfs;
491 case D_GS: return Ygs;
492
493 case D_GDTR: return Ygdtr;
494 case D_IDTR: return Yidtr;
495 case D_LDTR: return Yldtr;
496 case D_MSW: return Ymsw;
497 case D_TASK: return Ytask;
498
499 case D_CR+0: return Ycr0;
500 case D_CR+1: return Ycr1;
501 case D_CR+2: return Ycr2;
502 case D_CR+3: return Ycr3;
503 case D_CR+4: return Ycr4;
504 case D_CR+5: return Ycr5;
505 case D_CR+6: return Ycr6;
506 case D_CR+7: return Ycr7;
507 case D_CR+8: return Ycr8;
508
509 case D_DR+0: return Ydr0;
510 case D_DR+1: return Ydr1;
511 case D_DR+2: return Ydr2;
512 case D_DR+3: return Ydr3;
513 case D_DR+4: return Ydr4;
514 case D_DR+5: return Ydr5;
515 case D_DR+6: return Ydr6;
516 case D_DR+7: return Ydr7;
517
518 case D_TR+0: return Ytr0;
519 case D_TR+1: return Ytr1;
520 case D_TR+2: return Ytr2;
521 case D_TR+3: return Ytr3;
522 case D_TR+4: return Ytr4;
523 case D_TR+5: return Ytr5;
524 case D_TR+6: return Ytr6;
525 case D_TR+7: return Ytr7;
526
527 case D_EXTERN:
528 case D_STATIC:
529 case D_AUTO:
530 case D_PARAM:
531 return Ym;
532
533 case D_CONST:
534 case D_ADDR:
535 if(a->sym == S) {
536 v = a->offset;
537 if(v == 0)
538 return Yi0;
539 if(v == 1)
540 return Yi1;
541 if(v >= -128 && v <= 127)
542 return Yi8;
543 l = v;
544 if((vlong)l == v)
545 return Ys32; /* can sign extend */
546 if((v>>32) == 0)
547 return Yi32; /* unsigned */
548 return Yi64;
549 }
550 return Yi32; /* TO DO: D_ADDR as Yi64 */
551
552 case D_BRANCH:
553 return Ybr;
554 }
555 return Yxxx;
556 }
557
558 void
asmidx(Adr * a,int base)559 asmidx(Adr *a, int base)
560 {
561 int i;
562
563 switch(a->index) {
564 default:
565 goto bad;
566
567 case D_NONE:
568 i = 4 << 3;
569 goto bas;
570
571 case D_R8:
572 case D_R9:
573 case D_R10:
574 case D_R11:
575 case D_R12:
576 case D_R13:
577 case D_R14:
578 case D_R15:
579 if(asmode != 64)
580 goto bad;
581 case D_AX:
582 case D_CX:
583 case D_DX:
584 case D_BX:
585 case D_BP:
586 case D_SI:
587 case D_DI:
588 i = reg[a->index] << 3;
589 break;
590 }
591 switch(a->scale) {
592 default:
593 goto bad;
594 case 1:
595 break;
596 case 2:
597 i |= (1<<6);
598 break;
599 case 4:
600 i |= (2<<6);
601 break;
602 case 8:
603 i |= (3<<6);
604 break;
605 }
606 bas:
607 switch(base) {
608 default:
609 goto bad;
610 case D_NONE: /* must be mod=00 */
611 i |= 5;
612 break;
613 case D_R8:
614 case D_R9:
615 case D_R10:
616 case D_R11:
617 case D_R12:
618 case D_R13:
619 case D_R14:
620 case D_R15:
621 if(asmode != 64)
622 goto bad;
623 case D_AX:
624 case D_CX:
625 case D_DX:
626 case D_BX:
627 case D_SP:
628 case D_BP:
629 case D_SI:
630 case D_DI:
631 i |= reg[base];
632 break;
633 }
634 *andptr++ = i;
635 return;
636 bad:
637 diag("asmidx: bad address %D", a);
638 *andptr++ = 0;
639 return;
640 }
641
642 static void
put4(long v)643 put4(long v)
644 {
645 if(dlm && curp != P && reloca != nil){
646 dynreloc(reloca->sym, curp->pc + andptr - &and[0], 1);
647 reloca = nil;
648 }
649 andptr[0] = v;
650 andptr[1] = v>>8;
651 andptr[2] = v>>16;
652 andptr[3] = v>>24;
653 andptr += 4;
654 }
655
656 static void
put8(vlong v)657 put8(vlong v)
658 {
659 if(dlm && curp != P && reloca != nil){
660 dynreloc(reloca->sym, curp->pc + andptr - &and[0], 1); /* TO DO */
661 reloca = nil;
662 }
663 andptr[0] = v;
664 andptr[1] = v>>8;
665 andptr[2] = v>>16;
666 andptr[3] = v>>24;
667 andptr[4] = v>>32;
668 andptr[5] = v>>40;
669 andptr[6] = v>>48;
670 andptr[7] = v>>56;
671 andptr += 8;
672 }
673
674 vlong
vaddr(Adr * a)675 vaddr(Adr *a)
676 {
677 int t;
678 vlong v;
679 Sym *s;
680
681 t = a->type;
682 v = a->offset;
683 if(t == D_ADDR)
684 t = a->index;
685 switch(t) {
686 case D_STATIC:
687 case D_EXTERN:
688 s = a->sym;
689 if(s != nil) {
690 if(dlm && curp != P)
691 reloca = a;
692 switch(s->type) {
693 case SUNDEF:
694 ckoff(s, v);
695 case STEXT:
696 case SCONST:
697 if((uvlong)s->value < (uvlong)INITTEXT)
698 v += INITTEXT; /* TO DO */
699 v += s->value;
700 break;
701 default:
702 v += INITDAT + s->value;
703 }
704 }
705 }
706 return v;
707 }
708
709 static void
asmandsz(Adr * a,int r,int rex,int m64)710 asmandsz(Adr *a, int r, int rex, int m64)
711 {
712 long v;
713 int t;
714 Adr aa;
715
716 if(r == -1)
717 diag("asmandsz: immedate instead of register");
718
719 rex &= (0x40 | Rxr);
720 v = a->offset;
721 t = a->type;
722 if(a->index != D_NONE && !isxyreg(t)) {
723 if(t >= D_INDIR && t < D_CONST2) {
724 t -= D_INDIR;
725 rexflag |= (regrex[a->index] & Rxx) | (regrex[t] & Rxb) | rex;
726 if(t == D_NONE) {
727 *andptr++ = (0 << 6) | (4 << 0) | (r << 3);
728 asmidx(a, t);
729 put4(v);
730 return;
731 }
732 if(v == 0 && t != D_BP && t != D_R13) {
733 *andptr++ = (0 << 6) | (4 << 0) | (r << 3);
734 asmidx(a, t);
735 return;
736 }
737 if(v >= -128 && v < 128) {
738 *andptr++ = (1 << 6) | (4 << 0) | (r << 3);
739 asmidx(a, t);
740 *andptr++ = v;
741 return;
742 }
743 *andptr++ = (2 << 6) | (4 << 0) | (r << 3);
744 asmidx(a, t);
745 put4(v);
746 return;
747 }
748 switch(t) {
749 default:
750 goto bad;
751 case D_STATIC:
752 case D_EXTERN:
753 aa.type = D_NONE+D_INDIR;
754 break;
755 case D_AUTO:
756 case D_PARAM:
757 aa.type = D_SP+D_INDIR;
758 break;
759 }
760 aa.offset = vaddr(a);
761 aa.index = a->index;
762 aa.scale = a->scale;
763 asmandsz(&aa, r, rex, m64);
764 return;
765 }
766 if(t >= D_AL && t <= D_BH) {
767 if(v)
768 goto bad;
769 *andptr++ = (3 << 6) | (reg[t] << 0) | (r << 3);
770 rexflag |= (regrex[t] & (0x40 | Rxb)) | rex;
771 return;
772 }
773 if(t >= D_X0 && t <= D_X15 || t >= D_Y0 && t <= D_Y15) {
774 if(v)
775 goto bad;
776 *andptr++ = (3 << 6) | (reg[t] << 0) | (r << 3);
777 if(t >= D_Y0 && t <= D_Y15)
778 vexbytes |= Vexl;
779 rexflag |= (regrex[t] & (0x40 | Rxb)) | rex;
780 return;
781 }
782 if(t >= D_INDIR && t < D_CONST2) {
783 t -= D_INDIR;
784 rexflag |= (regrex[t] & Rxb) | rex;
785 if(t == D_NONE || D_CS <= t && t <= D_GS) {
786 if(asmode != 64){
787 *andptr++ = (0 << 6) | (5 << 0) | (r << 3);
788 put4(v);
789 return;
790 }
791 /* temporary */
792 *andptr++ = (0 << 6) | (4 << 0) | (r << 3); /* sib present */
793 *andptr++ = (0 << 6) | (4 << 3) | (5 << 0); /* DS:d32 */
794 put4(v);
795 return;
796 }
797 if(t == D_SP || t == D_R12) {
798 if(v == 0) {
799 *andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
800 asmidx(a, t);
801 return;
802 }
803 if(v >= -128 && v < 128) {
804 *andptr++ = (1 << 6) | (reg[t] << 0) | (r << 3);
805 asmidx(a, t);
806 *andptr++ = v;
807 return;
808 }
809 *andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
810 asmidx(a, t);
811 put4(v);
812 return;
813 }
814 if(t >= D_AX && t <= D_R15) {
815 if(v == 0 && t != D_BP && t != D_R13) {
816 *andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
817 return;
818 }
819 if(v >= -128 && v < 128) {
820 andptr[0] = (1 << 6) | (reg[t] << 0) | (r << 3);
821 andptr[1] = v;
822 andptr += 2;
823 return;
824 }
825 *andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
826 put4(v);
827 return;
828 }
829 goto bad;
830 }
831 switch(a->type) {
832 default:
833 goto bad;
834 case D_STATIC:
835 case D_EXTERN:
836 aa.type = D_NONE+D_INDIR;
837 break;
838 case D_AUTO:
839 case D_PARAM:
840 aa.type = D_SP+D_INDIR;
841 break;
842 }
843 aa.index = D_NONE;
844 aa.scale = 1;
845 aa.offset = vaddr(a);
846 asmandsz(&aa, r, rex, m64);
847 return;
848 bad:
849 diag("asmand: bad address %D", a);
850 return;
851 }
852
853 int
isxyreg(int t)854 isxyreg(int t)
855 {
856 return t >= D_X0 && t <= D_X15 || t >= D_Y0 && t <= D_Y15;
857 }
858
859 static void
vexreg(Adr * a)860 vexreg(Adr *a)
861 {
862 int t;
863
864 t = a->type;
865 if(t >= D_Y0 && t <= D_Y15) {
866 vexbytes |= Vexl;
867 } else if(t >= D_X0 && t <= D_X15) {
868 if(vexed)
869 vexbytes |= Vexr; /* force vex prefix */
870 } else
871 return;
872 if(a->index != D_NONE)
873 vexbytes |= a->index << 8;
874 }
875
876 void
asmand(Adr * a,Adr * ra)877 asmand(Adr *a, Adr *ra)
878 {
879 asmandsz(a, reg[ra->type], regrex[ra->type], 0);
880 }
881
882 void
asmandg(Adr * a,Adr * r,int o,int rdest,int prefix)883 asmandg(Adr *a, Adr *r, int o, int rdest, int prefix)
884 {
885 Adr aa, rr;
886
887 if(isxyreg(a->type)) {
888 if(isxyreg(a->index) && r->type == D_CONST) {
889 /*
890 * convert sse instructions with immediate like
891 * AESKEYGENASSIST $32, X1, X2 from
892 * a=X1(X2*0); r=$32 to a=X1, r=X2. the
893 * caller adds the immediate byte. vex is not required
894 */
895 rr.offset = 0;
896 rr.sym = a->sym;
897 rr.type = a->index;
898 rr.index = D_NONE;
899 rr.scale = 0;
900 r = &rr;
901
902 aa = *a;
903 aa.index = D_NONE;
904 a = &aa;
905 }
906 }
907 vexreg(a);
908 if(isxyreg(a->type)) {
909 if(a->index != D_NONE) {
910 aa = *a;
911 aa.index = D_NONE;
912 a = &aa;
913 }
914 }
915 if(r == nil) {
916 asmandsz(a, o, 0, 0);
917 return;
918 }
919 vexreg(r);
920 if(rdest && (prefix&P2) == 0 && vexbytes != 0 && (vexbytes>>8) == 0) {
921 /* copy destination register as second source register */
922 if(isxyreg(r->type)) {
923 vexbytes |= r->type << 8;
924 rexflag |= regrex[r->type] & Rxx;
925 }
926 }
927 asmand(a, r);
928 }
929
930 void
asmando(Adr * a,int o)931 asmando(Adr *a, int o)
932 {
933 asmandg(a, nil, o, 0, 0);
934 }
935
936 static void
bytereg(Adr * a)937 bytereg(Adr *a)
938 {
939 if(a->index == D_NONE && (a->type >= D_AX && a->type <= D_R15))
940 a->type = D_AL + (a->type-D_AX);
941 }
942
943 #define E 0xff
944 Movtab ymovtab[] =
945 {
946 /* push */
947 {APUSHL, Ycs, Ynone, 0, 0x0e,E,0,0},
948 {APUSHL, Yss, Ynone, 0, 0x16,E,0,0},
949 {APUSHL, Yds, Ynone, 0, 0x1e,E,0,0},
950 {APUSHL, Yes, Ynone, 0, 0x06,E,0,0},
951 {APUSHL, Yfs, Ynone, 0, 0x0f,0xa0,E,0},
952 {APUSHL, Ygs, Ynone, 0, 0x0f,0xa8,E,0},
953 {APUSHQ, Yfs, Ynone, 0, 0x0f,0xa0,E,0},
954 {APUSHQ, Ygs, Ynone, 0, 0x0f,0xa8,E,0},
955
956 {APUSHW, Ycs, Ynone, 0, Pe,0x0e,E,0},
957 {APUSHW, Yss, Ynone, 0, Pe,0x16,E,0},
958 {APUSHW, Yds, Ynone, 0, Pe,0x1e,E,0},
959 {APUSHW, Yes, Ynone, 0, Pe,0x06,E,0},
960 {APUSHW, Yfs, Ynone, 0, Pe,0x0f,0xa0,E},
961 {APUSHW, Ygs, Ynone, 0, Pe,0x0f,0xa8,E},
962
963 /* pop */
964 {APOPL, Ynone, Yds, 0, 0x1f,E,0,0},
965 {APOPL, Ynone, Yes, 0, 0x07,E,0,0},
966 {APOPL, Ynone, Yss, 0, 0x17,E,0,0},
967 {APOPL, Ynone, Yfs, 0, 0x0f,0xa1,E,0},
968 {APOPL, Ynone, Ygs, 0, 0x0f,0xa9,E,0},
969 {APOPQ, Ynone, Yfs, 0, 0x0f,0xa1,E,0},
970 {APOPQ, Ynone, Ygs, 0, 0x0f,0xa9,E,0},
971
972 {APOPW, Ynone, Yds, 0, Pe,0x1f,E,0},
973 {APOPW, Ynone, Yes, 0, Pe,0x07,E,0},
974 {APOPW, Ynone, Yss, 0, Pe,0x17,E,0},
975 {APOPW, Ynone, Yfs, 0, Pe,0x0f,0xa1,E},
976 {APOPW, Ynone, Ygs, 0, Pe,0x0f,0xa9,E},
977
978 /* mov seg */
979 {AMOVW, Yes, Yml, 1, 0x8c,0,0,0},
980 {AMOVW, Ycs, Yml, 1, 0x8c,1,0,0},
981 {AMOVW, Yss, Yml, 1, 0x8c,2,0,0},
982 {AMOVW, Yds, Yml, 1, 0x8c,3,0,0},
983 {AMOVW, Yfs, Yml, 1, 0x8c,4,0,0},
984 {AMOVW, Ygs, Yml, 1, 0x8c,5,0,0},
985
986 {AMOVW, Yml, Yes, 2, 0x8e,0,0,0},
987 {AMOVW, Yml, Ycs, 2, 0x8e,1,0,0},
988 {AMOVW, Yml, Yss, 2, 0x8e,2,0,0},
989 {AMOVW, Yml, Yds, 2, 0x8e,3,0,0},
990 {AMOVW, Yml, Yfs, 2, 0x8e,4,0,0},
991 {AMOVW, Yml, Ygs, 2, 0x8e,5,0,0},
992
993 /* mov cr */
994 {AMOVL, Ycr0, Yml, 3, 0x0f,0x20,0,0},
995 {AMOVL, Ycr2, Yml, 3, 0x0f,0x20,2,0},
996 {AMOVL, Ycr3, Yml, 3, 0x0f,0x20,3,0},
997 {AMOVL, Ycr4, Yml, 3, 0x0f,0x20,4,0},
998 {AMOVL, Ycr8, Yml, 3, 0x0f,0x20,8,0},
999 {AMOVQ, Ycr0, Yml, 3, 0x0f,0x20,0,0},
1000 {AMOVQ, Ycr2, Yml, 3, 0x0f,0x20,2,0},
1001 {AMOVQ, Ycr3, Yml, 3, 0x0f,0x20,3,0},
1002 {AMOVQ, Ycr4, Yml, 3, 0x0f,0x20,4,0},
1003 {AMOVQ, Ycr8, Yml, 3, 0x0f,0x20,8,0},
1004
1005 {AMOVL, Yml, Ycr0, 4, 0x0f,0x22,0,0},
1006 {AMOVL, Yml, Ycr2, 4, 0x0f,0x22,2,0},
1007 {AMOVL, Yml, Ycr3, 4, 0x0f,0x22,3,0},
1008 {AMOVL, Yml, Ycr4, 4, 0x0f,0x22,4,0},
1009 {AMOVL, Yml, Ycr8, 4, 0x0f,0x22,8,0},
1010 {AMOVQ, Yml, Ycr0, 4, 0x0f,0x22,0,0},
1011 {AMOVQ, Yml, Ycr2, 4, 0x0f,0x22,2,0},
1012 {AMOVQ, Yml, Ycr3, 4, 0x0f,0x22,3,0},
1013 {AMOVQ, Yml, Ycr4, 4, 0x0f,0x22,4,0},
1014 {AMOVQ, Yml, Ycr8, 4, 0x0f,0x22,8,0},
1015
1016 /* mov dr */
1017 {AMOVL, Ydr0, Yml, 3, 0x0f,0x21,0,0},
1018 {AMOVL, Ydr6, Yml, 3, 0x0f,0x21,6,0},
1019 {AMOVL, Ydr7, Yml, 3, 0x0f,0x21,7,0},
1020 {AMOVQ, Ydr0, Yml, 3, 0x0f,0x21,0,0},
1021 {AMOVQ, Ydr6, Yml, 3, 0x0f,0x21,6,0},
1022 {AMOVQ, Ydr7, Yml, 3, 0x0f,0x21,7,0},
1023
1024 {AMOVL, Yml, Ydr0, 4, 0x0f,0x23,0,0},
1025 {AMOVL, Yml, Ydr6, 4, 0x0f,0x23,6,0},
1026 {AMOVL, Yml, Ydr7, 4, 0x0f,0x23,7,0},
1027 {AMOVQ, Yml, Ydr0, 4, 0x0f,0x23,0,0},
1028 {AMOVQ, Yml, Ydr6, 4, 0x0f,0x23,6,0},
1029 {AMOVQ, Yml, Ydr7, 4, 0x0f,0x23,7,0},
1030
1031 /* mov tr */
1032 {AMOVL, Ytr6, Yml, 3, 0x0f,0x24,6,0},
1033 {AMOVL, Ytr7, Yml, 3, 0x0f,0x24,7,0},
1034
1035 {AMOVL, Yml, Ytr6, 4, 0x0f,0x26,6,E},
1036 {AMOVL, Yml, Ytr7, 4, 0x0f,0x26,7,E},
1037
1038 /* lgdt, sgdt, lidt, sidt */
1039 {AMOVL, Ym, Ygdtr, 4, 0x0f,0x01,2,0},
1040 {AMOVL, Ygdtr, Ym, 3, 0x0f,0x01,0,0},
1041 {AMOVL, Ym, Yidtr, 4, 0x0f,0x01,3,0},
1042 {AMOVL, Yidtr, Ym, 3, 0x0f,0x01,1,0},
1043 {AMOVQ, Ym, Ygdtr, 4, 0x0f,0x01,2,0},
1044 {AMOVQ, Ygdtr, Ym, 3, 0x0f,0x01,0,0},
1045 {AMOVQ, Ym, Yidtr, 4, 0x0f,0x01,3,0},
1046 {AMOVQ, Yidtr, Ym, 3, 0x0f,0x01,1,0},
1047
1048 /* lldt, sldt */
1049 {AMOVW, Yml, Yldtr, 4, 0x0f,0x00,2,0},
1050 {AMOVW, Yldtr, Yml, 3, 0x0f,0x00,0,0},
1051
1052 /* lmsw, smsw */
1053 {AMOVW, Yml, Ymsw, 4, 0x0f,0x01,6,0},
1054 {AMOVW, Ymsw, Yml, 3, 0x0f,0x01,4,0},
1055
1056 /* ltr, str */
1057 {AMOVW, Yml, Ytask, 4, 0x0f,0x00,3,0},
1058 {AMOVW, Ytask, Yml, 3, 0x0f,0x00,1,0},
1059
1060 /* load full pointer */
1061 {AMOVL, Yml, Ycol, 5, 0,0,0,0},
1062 {AMOVW, Yml, Ycol, 5, Pe,0,0,0},
1063
1064 /* double shift */
1065 {ASHLL, Ycol, Yml, 6, 0xa4,0xa5,0,0},
1066 {ASHRL, Ycol, Yml, 6, 0xac,0xad,0,0},
1067 {ASHLQ, Ycol, Yml, 6, Pw,0xa4,0xa5,0},
1068 {ASHRQ, Ycol, Yml, 6, Pw,0xac,0xad,0},
1069 {ASHLW, Ycol, Yml, 6, Pe,0xa4,0xa5,0},
1070 {ASHRW, Ycol, Yml, 6, Pe,0xac,0xad,0},
1071 0
1072 };
1073
1074 int
isax(Adr * a)1075 isax(Adr *a)
1076 {
1077
1078 switch(a->type) {
1079 case D_AX:
1080 case D_AL:
1081 case D_AH:
1082 case D_INDIR+D_AX:
1083 return 1;
1084 }
1085 if(a->index == D_AX)
1086 return 1;
1087 return 0;
1088 }
1089
1090 void
subreg(Prog * p,int from,int to)1091 subreg(Prog *p, int from, int to)
1092 {
1093
1094 if(debug['Q'])
1095 print("\n%P s/%R/%R/\n", p, from, to);
1096
1097 if(p->from.type == from)
1098 p->from.type = to;
1099 if(p->to.type == from)
1100 p->to.type = to;
1101
1102 if(p->from.index == from)
1103 p->from.index = to;
1104 if(p->to.index == from)
1105 p->to.index = to;
1106
1107 from += D_INDIR;
1108 if(p->from.type == from)
1109 p->from.type = to+D_INDIR;
1110 if(p->to.type == from)
1111 p->to.type = to+D_INDIR;
1112
1113 if(debug['Q'])
1114 print("%P\n", p);
1115 }
1116
1117 static int
mediaop(Optab * o,int op,int osize,int z)1118 mediaop(Optab *o, int op, int osize, int z)
1119 {
1120 switch(op){
1121 case Pm38:
1122 case Pm3a:
1123 *andptr++ = Pm; /* 0f */
1124 *andptr++ = op; /* 38 | 3a */
1125 op = o->op[++z];
1126 break;
1127
1128 case Pm:
1129 case Pe:
1130 case Pf2:
1131 case Pf3:
1132 if(osize != 1){
1133 if(op != Pm)
1134 *andptr++ = op;
1135 *andptr++ = Pm;
1136 op = o->op[++z];
1137 if(op == Pm38 || op == Pm3a) {
1138 *andptr++ = op;
1139 op = o->op[++z];
1140 }
1141 break;
1142 }
1143 default:
1144 if(andptr == and || andptr[-1] != Pm)
1145 *andptr++ = Pm;
1146 break;
1147 }
1148 *andptr++ = op;
1149 return z;
1150 }
1151
1152 void
doasm(Prog * p)1153 doasm(Prog *p)
1154 {
1155 Optab *o;
1156 Prog *q, pp;
1157 uchar *t;
1158 Movtab *mo;
1159 int z, op, ft, tt, xo, l, pre;
1160 vlong v;
1161 Adr vmi;
1162
1163 pre = prefixof(&p->from);
1164 if(pre)
1165 *andptr++ = pre;
1166 pre = prefixof(&p->to);
1167 if(pre)
1168 *andptr++ = pre;
1169
1170 o = opindex[p->as];
1171 if(o == nil) {
1172 diag("asmins: missing op %P", p);
1173 return;
1174 }
1175 ft = oclass(&p->from) * Ymax;
1176 tt = oclass(&p->to) * Ymax;
1177 t = o->ytab;
1178 if(t == 0) {
1179 diag("asmins: noproto %P", p);
1180 return;
1181 }
1182 xo = o->op[0] == Pm;
1183 for(z=0; *t; z+=t[3]+xo,t+=4)
1184 if(ycover[ft+t[0]])
1185 if(ycover[tt+t[1]])
1186 goto found;
1187 goto domov;
1188
1189 found:
1190 switch(o->prefix & 0xFF) {
1191 case Pq: /* 16 bit escape and opcode escape */
1192 *andptr++ = Pe;
1193 *andptr++ = Pm;
1194 break;
1195
1196 case Pf2: /* xmm opcode escape */
1197 case Pf3:
1198 *andptr++ = o->prefix & 0xFF;
1199 *andptr++ = Pm;
1200 break;
1201
1202 case Pm: /* opcode escape */
1203 *andptr++ = Pm;
1204 break;
1205
1206 case Pe: /* 16 bit escape */
1207 *andptr++ = Pe;
1208 break;
1209
1210 case Pw: /* 64-bit escape */
1211 if(p->mode != 64)
1212 diag("asmins: illegal 64: %P", p);
1213 rexflag |= Pw;
1214 break;
1215
1216 case Pb: /* botch */
1217 bytereg(&p->from);
1218 bytereg(&p->to);
1219 break;
1220
1221 case P32: /* 32 bit but illegal if 64-bit mode */
1222 if(p->mode == 64)
1223 diag("asmins: illegal in 64-bit mode: %P", p);
1224 break;
1225
1226 case Py: /* 64-bit only, no prefix */
1227 if(p->mode != 64)
1228 diag("asmins: illegal in %d-bit mode: %P", p->mode, p);
1229 break;
1230 }
1231 v = vaddr(&p->from);
1232 op = o->op[z];
1233 if(op == 0x0f) {
1234 *andptr++ = op;
1235 op = o->op[++z];
1236 }
1237 switch(t[2]) {
1238 default:
1239 diag("asmins: unknown z %d %P", t[2], p);
1240 return;
1241
1242 case Zpseudo:
1243 break;
1244
1245 case Zlit:
1246 for(; op = o->op[z]; z++)
1247 *andptr++ = op;
1248 break;
1249
1250 case Zmb_r:
1251 bytereg(&p->from);
1252 /* fall through */
1253 case Zm_r:
1254 *andptr++ = op;
1255 asmandg(&p->from, &p->to, 0, 1, o->prefix);
1256 break;
1257
1258 case Zm_r_xm:
1259 mediaop(o, op, t[3], z);
1260 asmandg(&p->from, &p->to, 0, 1, o->prefix);
1261 break;
1262
1263 case Zm_r_xm_nr:
1264 rexflag = 0;
1265 mediaop(o, op, t[3], z);
1266 asmandg(&p->from, &p->to, 0, 1, o->prefix);
1267 break;
1268
1269 case Zm_r_i_xm:
1270 mediaop(o, op, t[3], z);
1271 asmandg(&p->from, &p->to, 0, 1, o->prefix);
1272 *andptr++ = p->to.offset;
1273 break;
1274
1275 case Zm_r_3d:
1276 *andptr++ = 0x0f;
1277 *andptr++ = 0x0f;
1278 asmandg(&p->from, &p->to, 0, 1, o->prefix);
1279 *andptr++ = op;
1280 break;
1281
1282 case Zibm_r:
1283 *andptr++ = op;
1284 asmandg(&p->from, &p->to, 0, 1, o->prefix);
1285 *andptr++ = p->to.offset;
1286 break;
1287
1288 case Zaut_r:
1289 *andptr++ = 0x8d; /* leal */
1290 if(p->from.type != D_ADDR)
1291 diag("asmins: Zaut sb type ADDR");
1292 p->from.type = p->from.index;
1293 p->from.index = D_NONE;
1294 asmand(&p->from, &p->to);
1295 p->from.index = p->from.type;
1296 p->from.type = D_ADDR;
1297 break;
1298
1299 case Zm_o:
1300 *andptr++ = op;
1301 asmando(&p->from, o->op[z+1]);
1302 break;
1303
1304 case Zr_m:
1305 *andptr++ = op;
1306 asmand(&p->to, &p->from);
1307 break;
1308
1309 case Zr_m_xm:
1310 mediaop(o, op, t[3], z);
1311 asmandg(&p->to, &p->from, 0, 0, o->prefix);
1312 break;
1313
1314 case Zr_m_xm_nr:
1315 rexflag = 0;
1316 mediaop(o, op, t[3], z);
1317 asmandg(&p->to, &p->from, 0, 0, o->prefix);
1318 break;
1319
1320 case Zr_m_i_xm:
1321 mediaop(o, op, t[3], z);
1322 asmandg(&p->to, &p->from, 0, 0, o->prefix);
1323 *andptr++ = p->from.offset;
1324 break;
1325
1326 case Zo_m:
1327 *andptr++ = op;
1328 asmando(&p->to, o->op[z+1]);
1329 break;
1330
1331 case Zo_m64:
1332 *andptr++ = op;
1333 asmandsz(&p->to, o->op[z+1], 0, 1);
1334 break;
1335
1336 case Zm_ibo:
1337 v = vaddr(&p->to);
1338 *andptr++ = op;
1339 asmando(&p->from, o->op[z+1]);
1340 *andptr++ = v;
1341 break;
1342
1343 case Zibo_m:
1344 *andptr++ = op;
1345 asmando(&p->to, o->op[z+1]);
1346 *andptr++ = v;
1347 break;
1348
1349 case Zibo_m_xm:
1350 vmi = p->to;
1351 if(p->to.index != D_NONE) { /* VMI has "non-destructive dest" with dest in Vex.vvvv */
1352 vmi.type = p->to.index;
1353 vmi.index = p->to.type;
1354 }
1355 z = mediaop(o, op, t[3], z);
1356 asmando(&vmi, o->op[z+1]);
1357 *andptr++ = v;
1358 break;
1359
1360 case Z_ib:
1361 v = vaddr(&p->to);
1362 case Zib_:
1363 *andptr++ = op;
1364 *andptr++ = v;
1365 break;
1366
1367 case Zib_rp:
1368 rexflag |= regrex[p->to.type] & (Rxb|0x40);
1369 *andptr++ = op + reg[p->to.type];
1370 *andptr++ = v;
1371 break;
1372
1373 case Zil_rp:
1374 rexflag |= regrex[p->to.type] & Rxb;
1375 *andptr++ = op + reg[p->to.type];
1376 if(o->prefix == Pe) {
1377 *andptr++ = v;
1378 *andptr++ = v>>8;
1379 }
1380 else
1381 put4(v);
1382 break;
1383
1384 case Zo_iw:
1385 *andptr++ = op;
1386 if(p->from.type != D_NONE){
1387 *andptr++ = v;
1388 *andptr++ = v>>8;
1389 }
1390 break;
1391
1392 case Ziq_rp:
1393 l = v>>32;
1394 if(l == 0){
1395 //p->mark |= 0100;
1396 //print("zero: %llux %P\n", v, p);
1397 rexflag &= ~(0x40|Rxw);
1398 rexflag |= regrex[p->to.type] & Rxb;
1399 *andptr++ = 0xb8 + reg[p->to.type];
1400 put4(v);
1401 }else if(l == -1 && (v&((uvlong)1<<31))!=0){ /* sign extend */
1402 //p->mark |= 0100;
1403 //print("sign: %llux %P\n", v, p);
1404 *andptr ++ = 0xc7;
1405 asmando(&p->to, 0);
1406 put4(v);
1407 }else{ /* need all 8 */
1408 //print("all: %llux %P\n", v, p);
1409 rexflag |= regrex[p->to.type] & Rxb;
1410 *andptr++ = op + reg[p->to.type];
1411 put8(v);
1412 }
1413 break;
1414
1415 case Zib_rr:
1416 *andptr++ = op;
1417 asmand(&p->to, &p->to);
1418 *andptr++ = v;
1419 break;
1420
1421 case Z_il:
1422 v = vaddr(&p->to);
1423 case Zil_:
1424 *andptr++ = op;
1425 if(o->prefix == Pe) {
1426 *andptr++ = v;
1427 *andptr++ = v>>8;
1428 }
1429 else
1430 put4(v);
1431 break;
1432
1433 case Zm_ilo:
1434 v = vaddr(&p->to);
1435 *andptr++ = op;
1436 asmando(&p->from, o->op[z+1]);
1437 if(o->prefix == Pe) {
1438 *andptr++ = v;
1439 *andptr++ = v>>8;
1440 }
1441 else
1442 put4(v);
1443 break;
1444
1445 case Zilo_m:
1446 *andptr++ = op;
1447 asmando(&p->to, o->op[z+1]);
1448 if(o->prefix == Pe) {
1449 *andptr++ = v;
1450 *andptr++ = v>>8;
1451 }
1452 else
1453 put4(v);
1454 break;
1455
1456 case Zil_rr:
1457 *andptr++ = op;
1458 asmand(&p->to, &p->to);
1459 if(o->prefix == Pe) {
1460 *andptr++ = v;
1461 *andptr++ = v>>8;
1462 }
1463 else
1464 put4(v);
1465 break;
1466
1467 case Z_rp:
1468 rexflag |= regrex[p->to.type] & (Rxb|0x40);
1469 *andptr++ = op + reg[p->to.type];
1470 break;
1471
1472 case Zrp_:
1473 rexflag |= regrex[p->from.type] & (Rxb|0x40);
1474 *andptr++ = op + reg[p->from.type];
1475 break;
1476
1477 case Zclr:
1478 *andptr++ = op;
1479 asmand(&p->to, &p->to);
1480 break;
1481
1482 case Zbr:
1483 q = p->pcond;
1484 if(q) {
1485 v = q->pc - p->pc - 2;
1486 if(v >= -128 && v <= 127) {
1487 *andptr++ = op;
1488 *andptr++ = v;
1489 } else {
1490 v -= 6-2;
1491 *andptr++ = 0x0f;
1492 *andptr++ = o->op[z+1];
1493 *andptr++ = v;
1494 *andptr++ = v>>8;
1495 *andptr++ = v>>16;
1496 *andptr++ = v>>24;
1497 }
1498 }
1499 break;
1500
1501 case Zcall:
1502 q = p->pcond;
1503 if(q) {
1504 v = q->pc - p->pc - 5;
1505 if(dlm && curp != P && p->to.sym->type == SUNDEF){
1506 /* v = 0 - p->pc - 5; */
1507 v = 0;
1508 ckoff(p->to.sym, v);
1509 v += p->to.sym->value;
1510 dynreloc(p->to.sym, p->pc+1, 0);
1511 }
1512 *andptr++ = op;
1513 *andptr++ = v;
1514 *andptr++ = v>>8;
1515 *andptr++ = v>>16;
1516 *andptr++ = v>>24;
1517 }
1518 break;
1519
1520 case Zjmp:
1521 q = p->pcond;
1522 if(q) {
1523 v = q->pc - p->pc - 2;
1524 if(v >= -128 && v <= 127) {
1525 *andptr++ = op;
1526 *andptr++ = v;
1527 } else {
1528 v -= 5-2;
1529 *andptr++ = o->op[z+1];
1530 *andptr++ = v;
1531 *andptr++ = v>>8;
1532 *andptr++ = v>>16;
1533 *andptr++ = v>>24;
1534 }
1535 }
1536 break;
1537
1538 case Zloop:
1539 q = p->pcond;
1540 if(q) {
1541 v = q->pc - p->pc - 2;
1542 if(v < -128 || v > 127)
1543 diag("loop too far: %P", p);
1544 *andptr++ = op;
1545 *andptr++ = v;
1546 }
1547 break;
1548
1549 case Zbyte:
1550 *andptr++ = v;
1551 if(op > 1) {
1552 *andptr++ = v>>8;
1553 if(op > 2) {
1554 *andptr++ = v>>16;
1555 *andptr++ = v>>24;
1556 if(op > 4) {
1557 *andptr++ = v>>32;
1558 *andptr++ = v>>40;
1559 *andptr++ = v>>48;
1560 *andptr++ = v>>56;
1561 }
1562 }
1563 }
1564 break;
1565 }
1566 return;
1567
1568 domov:
1569 for(mo=ymovtab; mo->as; mo++)
1570 if(p->as == mo->as)
1571 if(ycover[ft+mo->ft])
1572 if(ycover[tt+mo->tt]){
1573 t = mo->op;
1574 goto mfound;
1575 }
1576 bad:
1577 if(p->mode != 64){
1578 /*
1579 * here, the assembly has failed.
1580 * if its a byte instruction that has
1581 * unaddressable registers, try to
1582 * exchange registers and reissue the
1583 * instruction with the operands renamed.
1584 */
1585 pp = *p;
1586 z = p->from.type;
1587 if(z >= D_BP && z <= D_DI) {
1588 if(isax(&p->to)) {
1589 *andptr++ = 0x87; /* xchg lhs,bx */
1590 asmando(&p->from, reg[D_BX]);
1591 subreg(&pp, z, D_BX);
1592 doasm(&pp);
1593 *andptr++ = 0x87; /* xchg lhs,bx */
1594 asmando(&p->from, reg[D_BX]);
1595 } else {
1596 *andptr++ = 0x90 + reg[z]; /* xchg lsh,ax */
1597 subreg(&pp, z, D_AX);
1598 doasm(&pp);
1599 *andptr++ = 0x90 + reg[z]; /* xchg lsh,ax */
1600 }
1601 return;
1602 }
1603 z = p->to.type;
1604 if(z >= D_BP && z <= D_DI) {
1605 if(isax(&p->from)) {
1606 *andptr++ = 0x87; /* xchg rhs,bx */
1607 asmando(&p->to, reg[D_BX]);
1608 subreg(&pp, z, D_BX);
1609 doasm(&pp);
1610 *andptr++ = 0x87; /* xchg rhs,bx */
1611 asmando(&p->to, reg[D_BX]);
1612 } else {
1613 *andptr++ = 0x90 + reg[z]; /* xchg rsh,ax */
1614 subreg(&pp, z, D_AX);
1615 doasm(&pp);
1616 *andptr++ = 0x90 + reg[z]; /* xchg rsh,ax */
1617 }
1618 return;
1619 }
1620 }
1621 if(0) {
1622 int ft = oclass(&p->from), tt = oclass(&p->to); extern char* yclname[];
1623 fprint(2, "ft=%d [%s] tt=%d [%s]\n", ft, yclname[ft], tt, yclname[tt]);
1624 }
1625 diag("doasm: notfound from=%ux to=%ux %P", p->from.type, p->to.type, p);
1626 return;
1627
1628 mfound:
1629 switch(mo->code) {
1630 default:
1631 diag("asmins: unknown mov %d %P", mo->code, p);
1632 break;
1633
1634 case 0: /* lit */
1635 for(z=0; t[z]!=E; z++)
1636 *andptr++ = t[z];
1637 break;
1638
1639 case 1: /* r,m */
1640 *andptr++ = t[0];
1641 asmando(&p->to, t[1]);
1642 break;
1643
1644 case 2: /* m,r */
1645 *andptr++ = t[0];
1646 asmando(&p->from, t[1]);
1647 break;
1648
1649 case 3: /* r,m - 2op */
1650 *andptr++ = t[0];
1651 *andptr++ = t[1];
1652 asmando(&p->to, t[2]);
1653 rexflag |= regrex[p->from.type] & (Rxr|0x40);
1654 break;
1655
1656 case 4: /* m,r - 2op */
1657 *andptr++ = t[0];
1658 *andptr++ = t[1];
1659 asmando(&p->from, t[2]);
1660 rexflag |= regrex[p->to.type] & (Rxr|0x40);
1661 break;
1662
1663 case 5: /* load full pointer, trash heap */
1664 if(t[0])
1665 *andptr++ = t[0];
1666 switch(p->to.index) {
1667 default:
1668 goto bad;
1669 case D_DS:
1670 *andptr++ = 0xc5;
1671 break;
1672 case D_SS:
1673 *andptr++ = 0x0f;
1674 *andptr++ = 0xb2;
1675 break;
1676 case D_ES:
1677 *andptr++ = 0xc4;
1678 break;
1679 case D_FS:
1680 *andptr++ = 0x0f;
1681 *andptr++ = 0xb4;
1682 break;
1683 case D_GS:
1684 *andptr++ = 0x0f;
1685 *andptr++ = 0xb5;
1686 break;
1687 }
1688 asmand(&p->from, &p->to);
1689 break;
1690
1691 case 6: /* double shift */
1692 if(t[0] == Pw){
1693 if(p->mode != 64)
1694 diag("asmins: illegal 64: %P", p);
1695 rexflag |= Pw;
1696 t++;
1697 }else if(t[0] == Pe){
1698 *andptr++ = Pe;
1699 t++;
1700 }
1701 z = p->from.type;
1702 switch(z) {
1703 default:
1704 goto bad;
1705 case D_CONST:
1706 *andptr++ = 0x0f;
1707 *andptr++ = t[0];
1708 asmandsz(&p->to, reg[p->from.index], regrex[p->from.index], 0);
1709 *andptr++ = p->from.offset;
1710 break;
1711 case D_CL:
1712 case D_CX:
1713 *andptr++ = 0x0f;
1714 *andptr++ = t[1];
1715 asmandsz(&p->to, reg[p->from.index], regrex[p->from.index], 0);
1716 break;
1717 }
1718 break;
1719
1720 case 7: /* imul rm,r */
1721 *andptr++ = t[4];
1722 *andptr++ = t[5];
1723 asmand(&p->from, &p->to);
1724 break;
1725 }
1726 }
1727
1728 void
asmins(Prog * p)1729 asmins(Prog *p)
1730 {
1731 int n, np, o, c, t, v1, v2, vexlen;
1732
1733 vexbytes = 0;
1734 rexflag = 0;
1735 andptr = and;
1736 asmode = p->mode;
1737 doasm(p);
1738 if(vexbytes == 0) {
1739 if(rexflag) {
1740 if(0) fprint(2, "rexflag=%#ux %P\n", rexflag, p);
1741 /*
1742 * the rex prefix must appear before the first opcode byte
1743 * and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
1744 * before the 0f opcode escape.
1745 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
1746 */
1747 if(p->mode != 64)
1748 diag("asmins: illegal in mode %d: %P", p->mode, p);
1749 n = andptr - and;
1750 for(np = 0; np < n; np++) {
1751 c = and[np];
1752 if(c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26)
1753 break;
1754 }
1755 memmove(and+np+1, and+np, n-np);
1756 and[np] = 0x40 | rexflag;
1757 andptr++;
1758 }
1759 return;
1760 }
1761 if(0) if(rexflag||vexbytes)fprint(2, "rexflag=%#ux vexbytes=%#ux %P\n", rexflag, vexbytes, p);
1762 n = andptr - and;
1763 //vex if need vvvv register or W or L. never need R X B (must be 1 in 32-bit)
1764 //note: 4th register encoding in immediate byte
1765 /* media/sse/vex: seg* (66|F3|F2)? 0F (38|3A)? op -> seg* vex2|vex3 op */
1766 for(np = 0; np < n; np++) { /* seg* */
1767 c = and[np];
1768 if(c != 0x2e && c != 0x3e && c != 0x26 && c != 0x64 && c != 0x65)
1769 break;
1770 }
1771 o = np;
1772 if(np+1 < n) {
1773 v1 = 0;
1774 v2 = (vexbytes & Vexl) | Vexp0;
1775 switch(and[np]) {
1776 case 0x66:
1777 v2 |= Vexp66;
1778 np++;
1779 break;
1780 case 0xF3:
1781 v2 |= Vexpf3;
1782 np++;
1783 break;
1784 case 0xF2:
1785 v2 |= Vexpf2;
1786 np++;
1787 break;
1788 }
1789 c = and[np];
1790 if(c == Vex2 || c == Vex3)
1791 return; /* already vexed */
1792 if(and[np] != 0x0F) {
1793 diag("internal: inconsistent vex state: %P", p);
1794 return;
1795 }
1796 np++;
1797 if(np < n) {
1798 switch(and[np]) {
1799 case 0x38:
1800 v1 = Vex0f38;
1801 np++;
1802 break;
1803 case 0x3a:
1804 v1 = Vex0f3a;
1805 np++;
1806 break;
1807 default:
1808 if(rexflag & (Rxw|Rxx|Rxb))
1809 v1 = Vex0f; /* force 3-byte vex */
1810 break;
1811 }
1812 }
1813 t = vexbytes >> 8;
1814 if(t >= D_Y0 && t <= D_Y15)
1815 t -= D_Y0;
1816 else if(t >= D_X0 && t <= D_X15)
1817 t -= D_X0;
1818 v2 |= (~t & 0xF) << 3;
1819 vexlen = 2;
1820 if(v1 != 0)
1821 vexlen = 3;
1822 if(o+vexlen != np) {
1823 memmove(and+o+vexlen, and+np, n-np);
1824 andptr = and+(o+vexlen)+(n-np);
1825 }
1826 if(vexlen == 2) {
1827 and[o] = Vex2;
1828 and[o+1] = v2 | ((~rexflag<<5) & Vexr);
1829 } else {
1830 and[o] = Vex3;
1831 and[o+1] = v1 | ((~rexflag<<5) & (Vexr | Vexx | Vexb));
1832 if(rexflag & Rxw)
1833 v2 |= Vexw;
1834 and[o+2] = v2;
1835 }
1836 }
1837 }
1838
1839 enum{
1840 ABSD = 0,
1841 ABSU = 1,
1842 RELD = 2,
1843 RELU = 3,
1844 };
1845
1846 int modemap[4] = { 0, 1, -1, 2, };
1847
1848 typedef struct Reloc Reloc;
1849
1850 struct Reloc
1851 {
1852 int n;
1853 int t;
1854 uchar *m;
1855 ulong *a;
1856 };
1857
1858 Reloc rels;
1859
1860 static void
grow(Reloc * r)1861 grow(Reloc *r)
1862 {
1863 int t;
1864 uchar *m, *nm;
1865 ulong *a, *na;
1866
1867 t = r->t;
1868 r->t += 64;
1869 m = r->m;
1870 a = r->a;
1871 r->m = nm = malloc(r->t*sizeof(uchar));
1872 r->a = na = malloc(r->t*sizeof(ulong));
1873 memmove(nm, m, t*sizeof(uchar));
1874 memmove(na, a, t*sizeof(ulong));
1875 free(m);
1876 free(a);
1877 }
1878
1879 void
dynreloc(Sym * s,ulong v,int abs)1880 dynreloc(Sym *s, ulong v, int abs)
1881 {
1882 int i, k, n;
1883 uchar *m;
1884 ulong *a;
1885 Reloc *r;
1886
1887 if(s->type == SUNDEF)
1888 k = abs ? ABSU : RELU;
1889 else
1890 k = abs ? ABSD : RELD;
1891 /* Bprint(&bso, "R %s a=%ld(%lx) %d\n", s->name, v, v, k); */
1892 k = modemap[k];
1893 r = &rels;
1894 n = r->n;
1895 if(n >= r->t)
1896 grow(r);
1897 m = r->m;
1898 a = r->a;
1899 for(i = n; i > 0; i--){
1900 if(v < a[i-1]){ /* happens occasionally for data */
1901 m[i] = m[i-1];
1902 a[i] = a[i-1];
1903 }
1904 else
1905 break;
1906 }
1907 m[i] = k;
1908 a[i] = v;
1909 r->n++;
1910 }
1911
1912 static int
sput(char * s)1913 sput(char *s)
1914 {
1915 char *p;
1916
1917 p = s;
1918 while(*s)
1919 cput(*s++);
1920 cput(0);
1921 return s-p+1;
1922 }
1923
1924 void
asmdyn()1925 asmdyn()
1926 {
1927 int i, n, t, c;
1928 Sym *s;
1929 ulong la, ra, *a;
1930 vlong off;
1931 uchar *m;
1932 Reloc *r;
1933
1934 cflush();
1935 off = seek(cout, 0, 1);
1936 lput(0);
1937 t = 0;
1938 lput(imports);
1939 t += 4;
1940 for(i = 0; i < NHASH; i++)
1941 for(s = hash[i]; s != S; s = s->link)
1942 if(s->type == SUNDEF){
1943 lput(s->sig);
1944 t += 4;
1945 t += sput(s->name);
1946 }
1947
1948 la = 0;
1949 r = &rels;
1950 n = r->n;
1951 m = r->m;
1952 a = r->a;
1953 lput(n);
1954 t += 4;
1955 for(i = 0; i < n; i++){
1956 ra = *a-la;
1957 if(*a < la)
1958 diag("bad relocation order");
1959 if(ra < 256)
1960 c = 0;
1961 else if(ra < 65536)
1962 c = 1;
1963 else
1964 c = 2;
1965 cput((c<<6)|*m++);
1966 t++;
1967 if(c == 0){
1968 cput(ra);
1969 t++;
1970 }
1971 else if(c == 1){
1972 wput(ra);
1973 t += 2;
1974 }
1975 else{
1976 lput(ra);
1977 t += 4;
1978 }
1979 la = *a++;
1980 }
1981
1982 cflush();
1983 seek(cout, off, 0);
1984 lput(t);
1985
1986 if(debug['v']){
1987 Bprint(&bso, "import table entries = %d\n", imports);
1988 Bprint(&bso, "export table entries = %d\n", exports);
1989 }
1990 }
1991
1992 char* yclname[] ={
1993 [Yxxx] "Yxxx",
1994 [Ynone] "Ynone",
1995 [Yi0] "Yi0",
1996 [Yi1] "Yi1",
1997 [Yi8] "Yi8",
1998 [Ys32] "Ys32",
1999 [Yi32] "Yi32",
2000 [Yi64] "Yi64",
2001 [Yiauto] "Yiauto",
2002 [Yal] "Yal",
2003 [Ycl] "Ycl",
2004 [Yax] "Yax",
2005 [Ycx] "Ycx",
2006 [Yrb] "Yrb",
2007 [Yrl] "Yrl",
2008 [Yrf] "Yrf",
2009 [Yf0] "Yf0",
2010 [Yrx] "Yrx",
2011 [Ymb] "Ymb",
2012 [Yml] "Yml",
2013 [Ym] "Ym",
2014 [Ybr] "Ybr",
2015 [Ycol] "Ycol",
2016 [Ycs] "Ycs",
2017 [Yss] "Yss",
2018 [Yds] "Yds",
2019 [Yes] "Yes",
2020 [Yfs] "Yfs",
2021 [Ygs] "Ygs",
2022 [Ygdtr] "Ygdtr",
2023 [Yidtr] "Yidtr",
2024 [Yldtr] "Yldtr",
2025 [Ymsw] "Ymsw",
2026 [Ytask] "Ytask",
2027 [Ycr0] "Ycr0",
2028 [Ycr1] "Ycr1",
2029 [Ycr2] "Ycr2",
2030 [Ycr3] "Ycr3",
2031 [Ycr4] "Ycr4",
2032 [Ycr5] "Ycr5",
2033 [Ycr6] "Ycr6",
2034 [Ycr7] "Ycr7",
2035 [Ycr8] "Ycr8",
2036 [Ydr0] "Ydr0",
2037 [Ydr1] "Ydr1",
2038 [Ydr2] "Ydr2",
2039 [Ydr3] "Ydr3",
2040 [Ydr4] "Ydr4",
2041 [Ydr5] "Ydr5",
2042 [Ydr6] "Ydr6",
2043 [Ydr7] "Ydr7",
2044 [Ytr0] "Ytr0",
2045 [Ytr1] "Ytr1",
2046 [Ytr2] "Ytr2",
2047 [Ytr3] "Ytr3",
2048 [Ytr4] "Ytr4",
2049 [Ytr5] "Ytr5",
2050 [Ytr6] "Ytr6",
2051 [Ytr7] "Ytr7",
2052 [Yrl32] "Yrl32",
2053 [Yrl64] "Yrl64",
2054 [Ymr] "Ymr",
2055 [Ymm] "Ymm",
2056 [Yxr] "Yxr",
2057 [Yxm] "Yxm",
2058 [Yyr] "Yyr",
2059 [Yxyr] "Yxyr",
2060 [Ymax] "Ymax",
2061 };
2062