xref: /plan9/sys/src/cmd/6l/span.c (revision 86abb9fb23a9f11dbfd9e6dc2fe0c20d62417d94)
1 #include	"l.h"
2 
3 static int	rexflag;
4 static int	asmode;
5 
6 void
7 span(void)
8 {
9 	Prog *p, *q;
10 	long v;
11 	vlong c, idat;
12 	int m, n, again;
13 
14 	xdefine("etext", STEXT, 0L);
15 	idat = INITDAT;
16 	for(p = firstp; p != P; p = p->link) {
17 		if(p->as == ATEXT)
18 			curtext = p;
19 		n = 0;
20 		if(p->to.type == D_BRANCH)
21 			if(p->pcond == P)
22 				p->pcond = p;
23 		if((q = p->pcond) != P)
24 			if(q->back != 2)
25 				n = 1;
26 		p->back = n;
27 		if(p->as == AADJSP) {
28 			p->to.type = D_SP;
29 			v = -p->from.offset;
30 			p->from.offset = v;
31 			p->as = p->mode != 64? AADDL: AADDQ;
32 			if(v < 0) {
33 				p->as = p->mode != 64? ASUBL: ASUBQ;
34 				v = -v;
35 				p->from.offset = v;
36 			}
37 			if(v == 0)
38 				p->as = ANOP;
39 		}
40 	}
41 	n = 0;
42 
43 start:
44 	if(debug['v'])
45 		Bprint(&bso, "%5.2f span\n", cputime());
46 	Bflush(&bso);
47 	c = INITTEXT;
48 	for(p = firstp; p != P; p = p->link) {
49 		if(p->as == ATEXT)
50 			curtext = p;
51 		if(p->to.type == D_BRANCH)
52 			if(p->back)
53 				p->pc = c;
54 		asmins(p);
55 		p->pc = c;
56 		m = andptr-and;
57 		p->mark = m;
58 		c += m;
59 	}
60 
61 loop:
62 	n++;
63 	if(debug['v'])
64 		Bprint(&bso, "%5.2f span %d\n", cputime(), n);
65 	Bflush(&bso);
66 	if(n > 50) {
67 		print("span must be looping\n");
68 		errorexit();
69 	}
70 	again = 0;
71 	c = INITTEXT;
72 	for(p = firstp; p != P; p = p->link) {
73 		if(p->as == ATEXT)
74 			curtext = p;
75 		if(p->to.type == D_BRANCH || p->back & 0100) {
76 			if(p->back)
77 				p->pc = c;
78 			asmins(p);
79 			m = andptr-and;
80 			if(m != p->mark) {
81 				p->mark = m;
82 				again++;
83 			}
84 		}
85 		p->pc = c;
86 		c += p->mark;
87 	}
88 	if(again) {
89 		textsize = c;
90 		goto loop;
91 	}
92 	if(INITRND) {
93 		INITDAT = rnd(c, INITRND);
94 		if(INITDAT != idat) {
95 			idat = INITDAT;
96 			goto start;
97 		}
98 	}
99 	xdefine("etext", STEXT, c);
100 	if(debug['v'])
101 		Bprint(&bso, "etext = %llux\n", c);
102 	Bflush(&bso);
103 	for(p = textp; p != P; p = p->pcond)
104 		p->from.sym->value = p->pc;
105 	textsize = c - INITTEXT;
106 }
107 
108 void
109 xdefine(char *p, int t, vlong v)
110 {
111 	Sym *s;
112 
113 	s = lookup(p, 0);
114 	if(s->type == 0 || s->type == SXREF) {
115 		s->type = t;
116 		s->value = v;
117 	}
118 	if(s->type == STEXT && s->value == 0)
119 		s->value = v;
120 }
121 
122 void
123 putsymb(char *s, int t, vlong v, int ver)
124 {
125 	int i, f, l;
126 
127 	if(t == 'f')
128 		s++;
129 	l = 4;
130 	if(!debug['8']){
131 		lput(v>>32);
132 		l = 8;
133 	}
134 	lput(v);
135 	if(ver)
136 		t += 'a' - 'A';
137 	cput(t+0x80);			/* 0x80 is variable length */
138 
139 	if(t == 'Z' || t == 'z') {
140 		cput(s[0]);
141 		for(i=1; s[i] != 0 || s[i+1] != 0; i += 2) {
142 			cput(s[i]);
143 			cput(s[i+1]);
144 		}
145 		cput(0);
146 		cput(0);
147 		i++;
148 	}
149 	else {
150 		for(i=0; s[i]; i++)
151 			cput(s[i]);
152 		cput(0);
153 	}
154 	symsize += l + 1 + i + 1;
155 
156 	if(debug['n']) {
157 		if(t == 'z' || t == 'Z') {
158 			Bprint(&bso, "%c %.8llux ", t, v);
159 			for(i=1; s[i] != 0 || s[i+1] != 0; i+=2) {
160 				f = ((s[i]&0xff) << 8) | (s[i+1]&0xff);
161 				Bprint(&bso, "/%x", f);
162 			}
163 			Bprint(&bso, "\n");
164 			return;
165 		}
166 		if(ver)
167 			Bprint(&bso, "%c %.8llux %s<%d>\n", t, v, s, ver);
168 		else
169 			Bprint(&bso, "%c %.8llux %s\n", t, v, s);
170 	}
171 }
172 
173 void
174 asmsym(void)
175 {
176 	Prog *p;
177 	Auto *a;
178 	Sym *s;
179 	int h;
180 
181 	s = lookup("etext", 0);
182 	if(s->type == STEXT)
183 		putsymb(s->name, 'T', s->value, s->version);
184 
185 	for(h=0; h<NHASH; h++)
186 		for(s=hash[h]; s!=S; s=s->link)
187 			switch(s->type) {
188 			case SCONST:
189 				putsymb(s->name, 'D', s->value, s->version);
190 				continue;
191 
192 			case SDATA:
193 				putsymb(s->name, 'D', s->value+INITDAT, s->version);
194 				continue;
195 
196 			case SBSS:
197 				putsymb(s->name, 'B', s->value+INITDAT, s->version);
198 				continue;
199 
200 			case SFILE:
201 				putsymb(s->name, 'f', s->value, s->version);
202 				continue;
203 			}
204 
205 	for(p=textp; p!=P; p=p->pcond) {
206 		s = p->from.sym;
207 		if(s->type != STEXT)
208 			continue;
209 
210 		/* filenames first */
211 		for(a=p->to.autom; a; a=a->link)
212 			if(a->type == D_FILE)
213 				putsymb(a->asym->name, 'z', a->aoffset, 0);
214 			else
215 			if(a->type == D_FILE1)
216 				putsymb(a->asym->name, 'Z', a->aoffset, 0);
217 
218 		putsymb(s->name, 'T', s->value, s->version);
219 
220 		/* frame, auto and param after */
221 		putsymb(".frame", 'm', p->to.offset+8, 0);
222 
223 		for(a=p->to.autom; a; a=a->link)
224 			if(a->type == D_AUTO)
225 				putsymb(a->asym->name, 'a', -a->aoffset, 0);
226 			else
227 			if(a->type == D_PARAM)
228 				putsymb(a->asym->name, 'p', a->aoffset, 0);
229 	}
230 	if(debug['v'] || debug['n'])
231 		Bprint(&bso, "symsize = %lud\n", symsize);
232 	Bflush(&bso);
233 }
234 
235 void
236 asmlc(void)
237 {
238 	vlong oldpc;
239 	Prog *p;
240 	long oldlc, v, s;
241 
242 	oldpc = INITTEXT;
243 	oldlc = 0;
244 	for(p = firstp; p != P; p = p->link) {
245 		if(p->line == oldlc || p->as == ATEXT || p->as == ANOP) {
246 			if(p->as == ATEXT)
247 				curtext = p;
248 			if(debug['L'])
249 				Bprint(&bso, "%6llux %P\n",
250 					p->pc, p);
251 			continue;
252 		}
253 		if(debug['L'])
254 			Bprint(&bso, "\t\t%6ld", lcsize);
255 		v = (p->pc - oldpc) / MINLC;
256 		while(v) {
257 			s = 127;
258 			if(v < 127)
259 				s = v;
260 			cput(s+128);	/* 129-255 +pc */
261 			if(debug['L'])
262 				Bprint(&bso, " pc+%ld*%d(%ld)", s, MINLC, s+128);
263 			v -= s;
264 			lcsize++;
265 		}
266 		s = p->line - oldlc;
267 		oldlc = p->line;
268 		oldpc = p->pc + MINLC;
269 		if(s > 64 || s < -64) {
270 			cput(0);	/* 0 vv +lc */
271 			cput(s>>24);
272 			cput(s>>16);
273 			cput(s>>8);
274 			cput(s);
275 			if(debug['L']) {
276 				if(s > 0)
277 					Bprint(&bso, " lc+%ld(%d,%ld)\n",
278 						s, 0, s);
279 				else
280 					Bprint(&bso, " lc%ld(%d,%ld)\n",
281 						s, 0, s);
282 				Bprint(&bso, "%6llux %P\n",
283 					p->pc, p);
284 			}
285 			lcsize += 5;
286 			continue;
287 		}
288 		if(s > 0) {
289 			cput(0+s);	/* 1-64 +lc */
290 			if(debug['L']) {
291 				Bprint(&bso, " lc+%ld(%ld)\n", s, 0+s);
292 				Bprint(&bso, "%6llux %P\n",
293 					p->pc, p);
294 			}
295 		} else {
296 			cput(64-s);	/* 65-128 -lc */
297 			if(debug['L']) {
298 				Bprint(&bso, " lc%ld(%ld)\n", s, 64-s);
299 				Bprint(&bso, "%6llux %P\n",
300 					p->pc, p);
301 			}
302 		}
303 		lcsize++;
304 	}
305 	while(lcsize & 1) {
306 		s = 129;
307 		cput(s);
308 		lcsize++;
309 	}
310 	if(debug['v'] || debug['L'])
311 		Bprint(&bso, "lcsize = %ld\n", lcsize);
312 	Bflush(&bso);
313 }
314 
315 int
316 oclass(Adr *a)
317 {
318 	vlong v;
319 	long l;
320 
321 	if(a->type >= D_INDIR || a->index != D_NONE) {
322 		if(a->index != D_NONE && a->scale == 0) {
323 			if(a->type == D_ADDR) {
324 				switch(a->index) {
325 				case D_EXTERN:
326 				case D_STATIC:
327 					return Yi32;	/* TO DO: Yi64 */
328 				case D_AUTO:
329 				case D_PARAM:
330 					return Yiauto;
331 				}
332 				return Yxxx;
333 			}
334 			return Ycol;
335 		}
336 		return Ym;
337 	}
338 	switch(a->type)
339 	{
340 	case D_AL:
341 		return Yal;
342 
343 	case D_AX:
344 		return Yax;
345 
346 /*
347 	case D_SPB:
348 */
349 	case D_BPB:
350 	case D_SIB:
351 	case D_DIB:
352 	case D_R8B:
353 	case D_R9B:
354 	case D_R10B:
355 	case D_R11B:
356 	case D_R12B:
357 	case D_R13B:
358 	case D_R14B:
359 	case D_R15B:
360 		if(asmode != 64)
361 			return Yxxx;
362 	case D_DL:
363 	case D_BL:
364 	case D_AH:
365 	case D_CH:
366 	case D_DH:
367 	case D_BH:
368 		return Yrb;
369 
370 	case D_CL:
371 		return Ycl;
372 
373 	case D_CX:
374 		return Ycx;
375 
376 	case D_DX:
377 	case D_BX:
378 		return Yrx;
379 
380 	case D_R8:	/* not really Yrl */
381 	case D_R9:
382 	case D_R10:
383 	case D_R11:
384 	case D_R12:
385 	case D_R13:
386 	case D_R14:
387 	case D_R15:
388 		if(asmode != 64)
389 			return Yxxx;
390 	case D_SP:
391 	case D_BP:
392 	case D_SI:
393 	case D_DI:
394 		return Yrl;
395 
396 	case D_F0+0:
397 		return	Yf0;
398 
399 	case D_F0+1:
400 	case D_F0+2:
401 	case D_F0+3:
402 	case D_F0+4:
403 	case D_F0+5:
404 	case D_F0+6:
405 	case D_F0+7:
406 		return	Yrf;
407 
408 	case D_M0+0:
409 	case D_M0+1:
410 	case D_M0+2:
411 	case D_M0+3:
412 	case D_M0+4:
413 	case D_M0+5:
414 	case D_M0+6:
415 	case D_M0+7:
416 		return	Ymr;
417 
418 	case D_X0+0:
419 	case D_X0+1:
420 	case D_X0+2:
421 	case D_X0+3:
422 	case D_X0+4:
423 	case D_X0+5:
424 	case D_X0+6:
425 	case D_X0+7:
426 	case D_X0+8:
427 	case D_X0+9:
428 	case D_X0+10:
429 	case D_X0+11:
430 	case D_X0+12:
431 	case D_X0+13:
432 	case D_X0+14:
433 	case D_X0+15:
434 		return	Yxr;
435 
436 	case D_NONE:
437 		return Ynone;
438 
439 	case D_CS:	return	Ycs;
440 	case D_SS:	return	Yss;
441 	case D_DS:	return	Yds;
442 	case D_ES:	return	Yes;
443 	case D_FS:	return	Yfs;
444 	case D_GS:	return	Ygs;
445 
446 	case D_GDTR:	return	Ygdtr;
447 	case D_IDTR:	return	Yidtr;
448 	case D_LDTR:	return	Yldtr;
449 	case D_MSW:	return	Ymsw;
450 	case D_TASK:	return	Ytask;
451 
452 	case D_CR+0:	return	Ycr0;
453 	case D_CR+1:	return	Ycr1;
454 	case D_CR+2:	return	Ycr2;
455 	case D_CR+3:	return	Ycr3;
456 	case D_CR+4:	return	Ycr4;
457 	case D_CR+5:	return	Ycr5;
458 	case D_CR+6:	return	Ycr6;
459 	case D_CR+7:	return	Ycr7;
460 	case D_CR+8:	return	Ycr8;
461 
462 	case D_DR+0:	return	Ydr0;
463 	case D_DR+1:	return	Ydr1;
464 	case D_DR+2:	return	Ydr2;
465 	case D_DR+3:	return	Ydr3;
466 	case D_DR+4:	return	Ydr4;
467 	case D_DR+5:	return	Ydr5;
468 	case D_DR+6:	return	Ydr6;
469 	case D_DR+7:	return	Ydr7;
470 
471 	case D_TR+0:	return	Ytr0;
472 	case D_TR+1:	return	Ytr1;
473 	case D_TR+2:	return	Ytr2;
474 	case D_TR+3:	return	Ytr3;
475 	case D_TR+4:	return	Ytr4;
476 	case D_TR+5:	return	Ytr5;
477 	case D_TR+6:	return	Ytr6;
478 	case D_TR+7:	return	Ytr7;
479 
480 	case D_EXTERN:
481 	case D_STATIC:
482 	case D_AUTO:
483 	case D_PARAM:
484 		return Ym;
485 
486 	case D_CONST:
487 	case D_ADDR:
488 		if(a->sym == S) {
489 			v = a->offset;
490 			if(v == 0)
491 				return Yi0;
492 			if(v == 1)
493 				return Yi1;
494 			if(v >= -128 && v <= 127)
495 				return Yi8;
496 			l = v;
497 			if((vlong)l == v)
498 				return Ys32;	/* can sign extend */
499 			if((v>>32) == 0)
500 				return Yi32;	/* unsigned */
501 			return Yi64;
502 		}
503 		return Yi32;	/* TO DO: D_ADDR as Yi64 */
504 
505 	case D_BRANCH:
506 		return Ybr;
507 	}
508 	return Yxxx;
509 }
510 
511 void
512 asmidx(Adr *a, int base)
513 {
514 	int i;
515 
516 	switch(a->index) {
517 	default:
518 		goto bad;
519 
520 	case D_NONE:
521 		i = 4 << 3;
522 		goto bas;
523 
524 	case D_R8:
525 	case D_R9:
526 	case D_R10:
527 	case D_R11:
528 	case D_R12:
529 	case D_R13:
530 	case D_R14:
531 	case D_R15:
532 		if(asmode != 64)
533 			goto bad;
534 	case D_AX:
535 	case D_CX:
536 	case D_DX:
537 	case D_BX:
538 	case D_BP:
539 	case D_SI:
540 	case D_DI:
541 		i = reg[a->index] << 3;
542 		break;
543 	}
544 	switch(a->scale) {
545 	default:
546 		goto bad;
547 	case 1:
548 		break;
549 	case 2:
550 		i |= (1<<6);
551 		break;
552 	case 4:
553 		i |= (2<<6);
554 		break;
555 	case 8:
556 		i |= (3<<6);
557 		break;
558 	}
559 bas:
560 	switch(base) {
561 	default:
562 		goto bad;
563 	case D_NONE:	/* must be mod=00 */
564 		i |= 5;
565 		break;
566 	case D_R8:
567 	case D_R9:
568 	case D_R10:
569 	case D_R11:
570 	case D_R12:
571 	case D_R13:
572 	case D_R14:
573 	case D_R15:
574 		if(asmode != 64)
575 			goto bad;
576 	case D_AX:
577 	case D_CX:
578 	case D_DX:
579 	case D_BX:
580 	case D_SP:
581 	case D_BP:
582 	case D_SI:
583 	case D_DI:
584 		i |= reg[base];
585 		break;
586 	}
587 	*andptr++ = i;
588 	return;
589 bad:
590 	diag("asmidx: bad address %D", a);
591 	*andptr++ = 0;
592 	return;
593 }
594 
595 static void
596 put4(long v)
597 {
598 	if(dlm && curp != P && reloca != nil){
599 		dynreloc(reloca->sym, curp->pc + andptr - &and[0], 1);
600 		reloca = nil;
601 	}
602 	andptr[0] = v;
603 	andptr[1] = v>>8;
604 	andptr[2] = v>>16;
605 	andptr[3] = v>>24;
606 	andptr += 4;
607 }
608 
609 static void
610 put8(vlong v)
611 {
612 	if(dlm && curp != P && reloca != nil){
613 		dynreloc(reloca->sym, curp->pc + andptr - &and[0], 1);	/* TO DO */
614 		reloca = nil;
615 	}
616 	andptr[0] = v;
617 	andptr[1] = v>>8;
618 	andptr[2] = v>>16;
619 	andptr[3] = v>>24;
620 	andptr[4] = v>>32;
621 	andptr[5] = v>>40;
622 	andptr[6] = v>>48;
623 	andptr[7] = v>>56;
624 	andptr += 8;
625 }
626 
627 vlong
628 vaddr(Adr *a)
629 {
630 	int t;
631 	vlong v;
632 	Sym *s;
633 
634 	t = a->type;
635 	v = a->offset;
636 	if(t == D_ADDR)
637 		t = a->index;
638 	switch(t) {
639 	case D_STATIC:
640 	case D_EXTERN:
641 		s = a->sym;
642 		if(s != nil) {
643 			if(dlm && curp != P)
644 				reloca = a;
645 			switch(s->type) {
646 			case SUNDEF:
647 				ckoff(s, v);
648 			case STEXT:
649 			case SCONST:
650 				if((uvlong)s->value < (uvlong)INITTEXT)
651 					v += INITTEXT;	/* TO DO */
652 				v += s->value;
653 				break;
654 			default:
655 				v += INITDAT + s->value;
656 			}
657 		}
658 	}
659 	return v;
660 }
661 
662 static void
663 asmandsz(Adr *a, int r, int rex, int m64)
664 {
665 	long v;
666 	int t;
667 	Adr aa;
668 
669 	rex &= (0x40 | Rxr);
670 	v = a->offset;
671 	if ((vlong)v != a->offset)
672 		print("asmandsz: Trying to emit %#ullx and 32 bits is not sufficient\n",
673 			a->offset);
674 	t = a->type;
675 	if(a->index != D_NONE) {
676 		if(t >= D_INDIR) {
677 			t -= D_INDIR;
678 			rexflag |= (regrex[a->index] & Rxx) | (regrex[t] & Rxb) | rex;
679 			if(t == D_NONE) {
680 				*andptr++ = (0 << 6) | (4 << 0) | (r << 3);
681 				asmidx(a, t);
682 				put4(v);
683 				return;
684 			}
685 			if(v == 0 && t != D_BP && t != D_R13) {
686 				*andptr++ = (0 << 6) | (4 << 0) | (r << 3);
687 				asmidx(a, t);
688 				return;
689 			}
690 			if(v >= -128 && v < 128) {
691 				*andptr++ = (1 << 6) | (4 << 0) | (r << 3);
692 				asmidx(a, t);
693 				*andptr++ = v;
694 				return;
695 			}
696 			*andptr++ = (2 << 6) | (4 << 0) | (r << 3);
697 			asmidx(a, t);
698 			put4(v);
699 			return;
700 		}
701 		switch(t) {
702 		default:
703 			goto bad;
704 		case D_STATIC:
705 		case D_EXTERN:
706 			aa.type = D_NONE+D_INDIR;
707 			break;
708 		case D_AUTO:
709 		case D_PARAM:
710 			aa.type = D_SP+D_INDIR;
711 			break;
712 		}
713 		aa.offset = vaddr(a);
714 		aa.index = a->index;
715 		aa.scale = a->scale;
716 		asmandsz(&aa, r, rex, m64);
717 		return;
718 	}
719 	if(t >= D_AL && t <= D_X0+15) {
720 		if(v)
721 			goto bad;
722 		*andptr++ = (3 << 6) | (reg[t] << 0) | (r << 3);
723 		rexflag |= (regrex[t] & (0x40 | Rxb)) | rex;
724 		return;
725 	}
726 	if(t >= D_INDIR) {
727 		t -= D_INDIR;
728 		rexflag |= (regrex[t] & Rxb) | rex;
729 		if(t == D_NONE) {
730 			if(asmode != 64){
731 				*andptr++ = (0 << 6) | (5 << 0) | (r << 3);
732 				put4(v);
733 				return;
734 			}
735 			/* temporary */
736 			*andptr++ = (0 <<  6) | (4 << 0) | (r << 3);	/* sib present */
737 			*andptr++ = (0 << 6) | (4 << 3) | (5 << 0);	/* DS:d32 */
738 			put4(v);
739 			return;
740 		}
741 		if(t == D_SP || t == D_R12) {
742 			if(v == 0) {
743 				*andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
744 				asmidx(a, t);
745 				return;
746 			}
747 			if(v >= -128 && v < 128) {
748 				*andptr++ = (1 << 6) | (reg[t] << 0) | (r << 3);
749 				asmidx(a, t);
750 				*andptr++ = v;
751 				return;
752 			}
753 			*andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
754 			asmidx(a, t);
755 			put4(v);
756 			return;
757 		}
758 		if(t >= D_AX && t <= D_R15) {
759 			if(v == 0 && t != D_BP && t != D_R13) {
760 				*andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
761 				return;
762 			}
763 			if(v >= -128 && v < 128) {
764 				andptr[0] = (1 << 6) | (reg[t] << 0) | (r << 3);
765 				andptr[1] = v;
766 				andptr += 2;
767 				return;
768 			}
769 			*andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
770 			put4(v);
771 			return;
772 		}
773 		goto bad;
774 	}
775 	switch(a->type) {
776 	default:
777 		goto bad;
778 	case D_STATIC:
779 	case D_EXTERN:
780 		aa.type = D_NONE+D_INDIR;
781 		break;
782 	case D_AUTO:
783 	case D_PARAM:
784 		aa.type = D_SP+D_INDIR;
785 		break;
786 	}
787 	aa.index = D_NONE;
788 	aa.scale = 1;
789 	aa.offset = vaddr(a);
790 	asmandsz(&aa, r, rex, m64);
791 	return;
792 bad:
793 	diag("asmand: bad address %D", a);
794 	return;
795 }
796 
797 void
798 asmand(Adr *a, Adr *ra)
799 {
800 	asmandsz(a, reg[ra->type], regrex[ra->type], 0);
801 }
802 
803 void
804 asmando(Adr *a, int o)
805 {
806 	asmandsz(a, o, 0, 0);
807 }
808 
809 static void
810 bytereg(Adr *a)
811 {
812 	if(a->index == D_NONE && (a->type >= D_AX && a->type <= D_R15))
813 		a->type = D_AL + (a->type-D_AX);
814 }
815 
816 #define	E	0xff
817 Movtab	ymovtab[] =
818 {
819 /* push */
820 	{APUSHL,	Ycs,	Ynone,	0,	0x0e,E,0,0},
821 	{APUSHL,	Yss,	Ynone,	0,	0x16,E,0,0},
822 	{APUSHL,	Yds,	Ynone,	0,	0x1e,E,0,0},
823 	{APUSHL,	Yes,	Ynone,	0,	0x06,E,0,0},
824 	{APUSHL,	Yfs,	Ynone,	0,	0x0f,0xa0,E,0},
825 	{APUSHL,	Ygs,	Ynone,	0,	0x0f,0xa8,E,0},
826 	{APUSHQ,	Yfs,	Ynone,	0,	0x0f,0xa0,E,0},
827 	{APUSHQ,	Ygs,	Ynone,	0,	0x0f,0xa8,E,0},
828 
829 	{APUSHW,	Ycs,	Ynone,	0,	Pe,0x0e,E,0},
830 	{APUSHW,	Yss,	Ynone,	0,	Pe,0x16,E,0},
831 	{APUSHW,	Yds,	Ynone,	0,	Pe,0x1e,E,0},
832 	{APUSHW,	Yes,	Ynone,	0,	Pe,0x06,E,0},
833 	{APUSHW,	Yfs,	Ynone,	0,	Pe,0x0f,0xa0,E},
834 	{APUSHW,	Ygs,	Ynone,	0,	Pe,0x0f,0xa8,E},
835 
836 /* pop */
837 	{APOPL,	Ynone,	Yds,	0,	0x1f,E,0,0},
838 	{APOPL,	Ynone,	Yes,	0,	0x07,E,0,0},
839 	{APOPL,	Ynone,	Yss,	0,	0x17,E,0,0},
840 	{APOPL,	Ynone,	Yfs,	0,	0x0f,0xa1,E,0},
841 	{APOPL,	Ynone,	Ygs,	0,	0x0f,0xa9,E,0},
842 	{APOPQ,	Ynone,	Yfs,	0,	0x0f,0xa1,E,0},
843 	{APOPQ,	Ynone,	Ygs,	0,	0x0f,0xa9,E,0},
844 
845 	{APOPW,	Ynone,	Yds,	0,	Pe,0x1f,E,0},
846 	{APOPW,	Ynone,	Yes,	0,	Pe,0x07,E,0},
847 	{APOPW,	Ynone,	Yss,	0,	Pe,0x17,E,0},
848 	{APOPW,	Ynone,	Yfs,	0,	Pe,0x0f,0xa1,E},
849 	{APOPW,	Ynone,	Ygs,	0,	Pe,0x0f,0xa9,E},
850 
851 /* mov seg */
852 	{AMOVW,	Yes,	Yml,	1,	0x8c,0,0,0},
853 	{AMOVW,	Ycs,	Yml,	1,	0x8c,1,0,0},
854 	{AMOVW,	Yss,	Yml,	1,	0x8c,2,0,0},
855 	{AMOVW,	Yds,	Yml,	1,	0x8c,3,0,0},
856 	{AMOVW,	Yfs,	Yml,	1,	0x8c,4,0,0},
857 	{AMOVW,	Ygs,	Yml,	1,	0x8c,5,0,0},
858 
859 	{AMOVW,	Yml,	Yes,	2,	0x8e,0,0,0},
860 	{AMOVW,	Yml,	Ycs,	2,	0x8e,1,0,0},
861 	{AMOVW,	Yml,	Yss,	2,	0x8e,2,0,0},
862 	{AMOVW,	Yml,	Yds,	2,	0x8e,3,0,0},
863 	{AMOVW,	Yml,	Yfs,	2,	0x8e,4,0,0},
864 	{AMOVW,	Yml,	Ygs,	2,	0x8e,5,0,0},
865 
866 /* mov cr */
867 	{AMOVL,	Ycr0,	Yml,	3,	0x0f,0x20,0,0},
868 	{AMOVL,	Ycr2,	Yml,	3,	0x0f,0x20,2,0},
869 	{AMOVL,	Ycr3,	Yml,	3,	0x0f,0x20,3,0},
870 	{AMOVL,	Ycr4,	Yml,	3,	0x0f,0x20,4,0},
871 	{AMOVL,	Ycr8,	Yml,	3,	0x0f,0x20,8,0},
872 	{AMOVQ,	Ycr0,	Yml,	3,	0x0f,0x20,0,0},
873 	{AMOVQ,	Ycr2,	Yml,	3,	0x0f,0x20,2,0},
874 	{AMOVQ,	Ycr3,	Yml,	3,	0x0f,0x20,3,0},
875 	{AMOVQ,	Ycr4,	Yml,	3,	0x0f,0x20,4,0},
876 	{AMOVQ,	Ycr8,	Yml,	3,	0x0f,0x20,8,0},
877 
878 	{AMOVL,	Yml,	Ycr0,	4,	0x0f,0x22,0,0},
879 	{AMOVL,	Yml,	Ycr2,	4,	0x0f,0x22,2,0},
880 	{AMOVL,	Yml,	Ycr3,	4,	0x0f,0x22,3,0},
881 	{AMOVL,	Yml,	Ycr4,	4,	0x0f,0x22,4,0},
882 	{AMOVL,	Yml,	Ycr8,	4,	0x0f,0x22,8,0},
883 	{AMOVQ,	Yml,	Ycr0,	4,	0x0f,0x22,0,0},
884 	{AMOVQ,	Yml,	Ycr2,	4,	0x0f,0x22,2,0},
885 	{AMOVQ,	Yml,	Ycr3,	4,	0x0f,0x22,3,0},
886 	{AMOVQ,	Yml,	Ycr4,	4,	0x0f,0x22,4,0},
887 	{AMOVQ,	Yml,	Ycr8,	4,	0x0f,0x22,8,0},
888 
889 /* mov dr */
890 	{AMOVL,	Ydr0,	Yml,	3,	0x0f,0x21,0,0},
891 	{AMOVL,	Ydr6,	Yml,	3,	0x0f,0x21,6,0},
892 	{AMOVL,	Ydr7,	Yml,	3,	0x0f,0x21,7,0},
893 	{AMOVQ,	Ydr0,	Yml,	3,	0x0f,0x21,0,0},
894 	{AMOVQ,	Ydr6,	Yml,	3,	0x0f,0x21,6,0},
895 	{AMOVQ,	Ydr7,	Yml,	3,	0x0f,0x21,7,0},
896 
897 	{AMOVL,	Yml,	Ydr0,	4,	0x0f,0x23,0,0},
898 	{AMOVL,	Yml,	Ydr6,	4,	0x0f,0x23,6,0},
899 	{AMOVL,	Yml,	Ydr7,	4,	0x0f,0x23,7,0},
900 	{AMOVQ,	Yml,	Ydr0,	4,	0x0f,0x23,0,0},
901 	{AMOVQ,	Yml,	Ydr6,	4,	0x0f,0x23,6,0},
902 	{AMOVQ,	Yml,	Ydr7,	4,	0x0f,0x23,7,0},
903 
904 /* mov tr */
905 	{AMOVL,	Ytr6,	Yml,	3,	0x0f,0x24,6,0},
906 	{AMOVL,	Ytr7,	Yml,	3,	0x0f,0x24,7,0},
907 
908 	{AMOVL,	Yml,	Ytr6,	4,	0x0f,0x26,6,E},
909 	{AMOVL,	Yml,	Ytr7,	4,	0x0f,0x26,7,E},
910 
911 /* lgdt, sgdt, lidt, sidt */
912 	{AMOVL,	Ym,	Ygdtr,	4,	0x0f,0x01,2,0},
913 	{AMOVL,	Ygdtr,	Ym,	3,	0x0f,0x01,0,0},
914 	{AMOVL,	Ym,	Yidtr,	4,	0x0f,0x01,3,0},
915 	{AMOVL,	Yidtr,	Ym,	3,	0x0f,0x01,1,0},
916 	{AMOVQ,	Ym,	Ygdtr,	4,	0x0f,0x01,2,0},
917 	{AMOVQ,	Ygdtr,	Ym,	3,	0x0f,0x01,0,0},
918 	{AMOVQ,	Ym,	Yidtr,	4,	0x0f,0x01,3,0},
919 	{AMOVQ,	Yidtr,	Ym,	3,	0x0f,0x01,1,0},
920 
921 /* lldt, sldt */
922 	{AMOVW,	Yml,	Yldtr,	4,	0x0f,0x00,2,0},
923 	{AMOVW,	Yldtr,	Yml,	3,	0x0f,0x00,0,0},
924 
925 /* lmsw, smsw */
926 	{AMOVW,	Yml,	Ymsw,	4,	0x0f,0x01,6,0},
927 	{AMOVW,	Ymsw,	Yml,	3,	0x0f,0x01,4,0},
928 
929 /* ltr, str */
930 	{AMOVW,	Yml,	Ytask,	4,	0x0f,0x00,3,0},
931 	{AMOVW,	Ytask,	Yml,	3,	0x0f,0x00,1,0},
932 
933 /* load full pointer */
934 	{AMOVL,	Yml,	Ycol,	5,	0,0,0,0},
935 	{AMOVW,	Yml,	Ycol,	5,	Pe,0,0,0},
936 
937 /* double shift */
938 	{ASHLL,	Ycol,	Yml,	6,	0xa4,0xa5,0,0},
939 	{ASHRL,	Ycol,	Yml,	6,	0xac,0xad,0,0},
940 	{ASHLQ,	Ycol,	Yml,	6,	Pw,0xa4,0xa5,0},
941 	{ASHRQ,	Ycol,	Yml,	6,	Pw,0xac,0xad,0},
942 	{ASHLW,	Ycol,	Yml,	6,	Pe,0xa4,0xa5,0},
943 	{ASHRW,	Ycol,	Yml,	6,	Pe,0xac,0xad,0},
944 	0
945 };
946 
947 int
948 isax(Adr *a)
949 {
950 
951 	switch(a->type) {
952 	case D_AX:
953 	case D_AL:
954 	case D_AH:
955 	case D_INDIR+D_AX:
956 		return 1;
957 	}
958 	if(a->index == D_AX)
959 		return 1;
960 	return 0;
961 }
962 
963 void
964 subreg(Prog *p, int from, int to)
965 {
966 
967 	if(debug['Q'])
968 		print("\n%P	s/%R/%R/\n", p, from, to);
969 
970 	if(p->from.type == from)
971 		p->from.type = to;
972 	if(p->to.type == from)
973 		p->to.type = to;
974 
975 	if(p->from.index == from)
976 		p->from.index = to;
977 	if(p->to.index == from)
978 		p->to.index = to;
979 
980 	from += D_INDIR;
981 	if(p->from.type == from)
982 		p->from.type = to+D_INDIR;
983 	if(p->to.type == from)
984 		p->to.type = to+D_INDIR;
985 
986 	if(debug['Q'])
987 		print("%P\n", p);
988 }
989 
990 static int
991 mediaop(Optab *o, int op, int osize, int z)
992 {
993 	switch(op){
994 	case Pm:
995 	case Pe:
996 	case Pf2:
997 	case Pf3:
998 		if(osize != 1){
999 			if(op != Pm)
1000 				*andptr++ = op;
1001 			*andptr++ = Pm;
1002 			op = o->op[++z];
1003 			break;
1004 		}
1005 	default:
1006 		if(andptr == and || andptr[-1] != Pm)
1007 			*andptr++ = Pm;
1008 		break;
1009 	}
1010 	*andptr++ = op;
1011 	return z;
1012 }
1013 
1014 void
1015 doasm(Prog *p)
1016 {
1017 	Optab *o;
1018 	Prog *q, pp;
1019 	uchar *t;
1020 	Movtab *mo;
1021 	int z, op, ft, tt, xo, l;
1022 	vlong v;
1023 
1024 	o = opindex[p->as];
1025 	if(o == nil) {
1026 		diag("asmins: missing op %P", p);
1027 		return;
1028 	}
1029 	ft = oclass(&p->from) * Ymax;
1030 	tt = oclass(&p->to) * Ymax;
1031 	t = o->ytab;
1032 	if(t == 0) {
1033 		diag("asmins: noproto %P", p);
1034 		return;
1035 	}
1036 	xo = o->op[0] == 0x0f;
1037 	for(z=0; *t; z+=t[3]+xo,t+=4)
1038 		if(ycover[ft+t[0]])
1039 		if(ycover[tt+t[1]])
1040 			goto found;
1041 	goto domov;
1042 
1043 found:
1044 	switch(o->prefix) {
1045 	case Pq:	/* 16 bit escape and opcode escape */
1046 		*andptr++ = Pe;
1047 		*andptr++ = Pm;
1048 		break;
1049 
1050 	case Pf2:	/* xmm opcode escape */
1051 	case Pf3:
1052 		*andptr++ = o->prefix;
1053 		*andptr++ = Pm;
1054 		break;
1055 
1056 	case Pm:	/* opcode escape */
1057 		*andptr++ = Pm;
1058 		break;
1059 
1060 	case Pe:	/* 16 bit escape */
1061 		*andptr++ = Pe;
1062 		break;
1063 
1064 	case Pw:	/* 64-bit escape */
1065 		if(p->mode != 64)
1066 			diag("asmins: illegal 64: %P", p);
1067 		rexflag |= Pw;
1068 		break;
1069 
1070 	case Pb:	/* botch */
1071 		bytereg(&p->from);
1072 		bytereg(&p->to);
1073 		break;
1074 
1075 	case P32:	/* 32 bit but illegal if 64-bit mode */
1076 		if(p->mode == 64)
1077 			diag("asmins: illegal in 64-bit mode: %P", p);
1078 		break;
1079 
1080 	case Py:	/* 64-bit only, no prefix */
1081 		if(p->mode != 64)
1082 			diag("asmins: illegal in %d-bit mode: %P", p->mode, p);
1083 		break;
1084 	}
1085 	v = vaddr(&p->from);
1086 	op = o->op[z];
1087 	if(op == 0x0f) {
1088 		*andptr++ = op;
1089 		op = o->op[++z];
1090 	}
1091 	switch(t[2]) {
1092 	default:
1093 		diag("asmins: unknown z %d %P", t[2], p);
1094 		return;
1095 
1096 	case Zpseudo:
1097 		break;
1098 
1099 	case Zlit:
1100 		for(; op = o->op[z]; z++)
1101 			*andptr++ = op;
1102 		break;
1103 
1104 	case Zmb_r:
1105 		bytereg(&p->from);
1106 		/* fall through */
1107 	case Zm_r:
1108 		*andptr++ = op;
1109 		asmand(&p->from, &p->to);
1110 		break;
1111 
1112 	case Zm_r_xm:
1113 		mediaop(o, op, t[3], z);
1114 		asmand(&p->from, &p->to);
1115 		break;
1116 
1117 	case Zm_r_xm_nr:
1118 		rexflag = 0;
1119 		mediaop(o, op, t[3], z);
1120 		asmand(&p->from, &p->to);
1121 		break;
1122 
1123 	case Zm_r_i_xm:
1124 		mediaop(o, op, t[3], z);
1125 		asmand(&p->from, &p->to);
1126 		*andptr++ = p->to.offset;
1127 		break;
1128 
1129 	case Zm_r_3d:
1130 		*andptr++ = 0x0f;
1131 		*andptr++ = 0x0f;
1132 		asmand(&p->from, &p->to);
1133 		*andptr++ = op;
1134 		break;
1135 
1136 	case Zibm_r:
1137 		*andptr++ = op;
1138 		asmand(&p->from, &p->to);
1139 		*andptr++ = p->to.offset;
1140 		break;
1141 
1142 	case Zaut_r:
1143 		*andptr++ = 0x8d;	/* leal */
1144 		if(p->from.type != D_ADDR)
1145 			diag("asmins: Zaut sb type ADDR");
1146 		p->from.type = p->from.index;
1147 		p->from.index = D_NONE;
1148 		asmand(&p->from, &p->to);
1149 		p->from.index = p->from.type;
1150 		p->from.type = D_ADDR;
1151 		break;
1152 
1153 	case Zm_o:
1154 		*andptr++ = op;
1155 		asmando(&p->from, o->op[z+1]);
1156 		break;
1157 
1158 	case Zr_m:
1159 		*andptr++ = op;
1160 		asmand(&p->to, &p->from);
1161 		break;
1162 
1163 	case Zr_m_xm:
1164 		mediaop(o, op, t[3], z);
1165 		asmand(&p->to, &p->from);
1166 		break;
1167 
1168 	case Zr_m_xm_nr:
1169 		rexflag = 0;
1170 		mediaop(o, op, t[3], z);
1171 		asmand(&p->to, &p->from);
1172 		break;
1173 
1174 	case Zr_m_i_xm:
1175 		mediaop(o, op, t[3], z);
1176 		asmand(&p->to, &p->from);
1177 		*andptr++ = p->from.offset;
1178 		break;
1179 
1180 	case Zo_m:
1181 		*andptr++ = op;
1182 		asmando(&p->to, o->op[z+1]);
1183 		break;
1184 
1185 	case Zo_m64:
1186 		*andptr++ = op;
1187 		asmandsz(&p->to, o->op[z+1], 0, 1);
1188 		break;
1189 
1190 	case Zm_ibo:
1191 		v = vaddr(&p->to);
1192 		*andptr++ = op;
1193 		asmando(&p->from, o->op[z+1]);
1194 		*andptr++ = v;
1195 		break;
1196 
1197 	case Zibo_m:
1198 		*andptr++ = op;
1199 		asmando(&p->to, o->op[z+1]);
1200 		*andptr++ = v;
1201 		break;
1202 
1203 	case Zibo_m_xm:
1204 		z = mediaop(o, op, t[3], z);
1205 		asmando(&p->to, o->op[z+1]);
1206 		*andptr++ = v;
1207 		break;
1208 
1209 	case Z_ib:
1210 		v = vaddr(&p->to);
1211 	case Zib_:
1212 		*andptr++ = op;
1213 		*andptr++ = v;
1214 		break;
1215 
1216 	case Zib_rp:
1217 		rexflag |= regrex[p->to.type] & (Rxb|0x40);
1218 		*andptr++ = op + reg[p->to.type];
1219 		*andptr++ = v;
1220 		break;
1221 
1222 	case Zil_rp:
1223 		rexflag |= regrex[p->to.type] & Rxb;
1224 		*andptr++ = op + reg[p->to.type];
1225 		if(o->prefix == Pe) {
1226 			*andptr++ = v;
1227 			*andptr++ = v>>8;
1228 		}
1229 		else
1230 			put4(v);
1231 		break;
1232 
1233 	case Zo_iw:
1234 		*andptr++ = op;
1235 		if(p->from.type != D_NONE){
1236 			*andptr++ = v;
1237 			*andptr++ = v>>8;
1238 		}
1239 		break;
1240 
1241 	case Ziq_rp:
1242 		l = v>>32;
1243 		if(l == 0){
1244 			//p->mark |= 0100;
1245 			//print("zero: %llux %P\n", v, p);
1246 			rexflag &= ~(0x40|Rxw);
1247 			rexflag |= regrex[p->to.type] & Rxb;
1248 			*andptr++ = 0xb8 + reg[p->to.type];
1249 			put4(v);
1250 		}else if(l == -1 && (v&((uvlong)1<<31))!=0){	/* sign extend */
1251 			//p->mark |= 0100;
1252 			//print("sign: %llux %P\n", v, p);
1253 			*andptr ++ = 0xc7;
1254 			asmando(&p->to, 0);
1255 			put4(v);
1256 		}else{	/* need all 8 */
1257 			//print("all: %llux %P\n", v, p);
1258 			rexflag |= regrex[p->to.type] & Rxb;
1259 			*andptr++ = op + reg[p->to.type];
1260 			put8(v);
1261 		}
1262 		break;
1263 
1264 	case Zib_rr:
1265 		*andptr++ = op;
1266 		asmand(&p->to, &p->to);
1267 		*andptr++ = v;
1268 		break;
1269 
1270 	case Z_il:
1271 		v = vaddr(&p->to);
1272 	case Zil_:
1273 		*andptr++ = op;
1274 		if(o->prefix == Pe) {
1275 			*andptr++ = v;
1276 			*andptr++ = v>>8;
1277 		}
1278 		else
1279 			put4(v);
1280 		break;
1281 
1282 	case Zm_ilo:
1283 		v = vaddr(&p->to);
1284 		*andptr++ = op;
1285 		asmando(&p->from, o->op[z+1]);
1286 		if(o->prefix == Pe) {
1287 			*andptr++ = v;
1288 			*andptr++ = v>>8;
1289 		}
1290 		else
1291 			put4(v);
1292 		break;
1293 
1294 	case Zilo_m:
1295 		*andptr++ = op;
1296 		asmando(&p->to, o->op[z+1]);
1297 		if(o->prefix == Pe) {
1298 			*andptr++ = v;
1299 			*andptr++ = v>>8;
1300 		}
1301 		else
1302 			put4(v);
1303 		break;
1304 
1305 	case Zil_rr:
1306 		*andptr++ = op;
1307 		asmand(&p->to, &p->to);
1308 		if(o->prefix == Pe) {
1309 			*andptr++ = v;
1310 			*andptr++ = v>>8;
1311 		}
1312 		else
1313 			put4(v);
1314 		break;
1315 
1316 	case Z_rp:
1317 		rexflag |= regrex[p->to.type] & (Rxb|0x40);
1318 		*andptr++ = op + reg[p->to.type];
1319 		break;
1320 
1321 	case Zrp_:
1322 		rexflag |= regrex[p->from.type] & (Rxb|0x40);
1323 		*andptr++ = op + reg[p->from.type];
1324 		break;
1325 
1326 	case Zclr:
1327 		*andptr++ = op;
1328 		asmand(&p->to, &p->to);
1329 		break;
1330 
1331 	case Zbr:
1332 		q = p->pcond;
1333 		if(q) {
1334 			v = q->pc - p->pc - 2;
1335 			if(v >= -128 && v <= 127) {
1336 				*andptr++ = op;
1337 				*andptr++ = v;
1338 			} else {
1339 				v -= 6-2;
1340 				*andptr++ = 0x0f;
1341 				*andptr++ = o->op[z+1];
1342 				*andptr++ = v;
1343 				*andptr++ = v>>8;
1344 				*andptr++ = v>>16;
1345 				*andptr++ = v>>24;
1346 			}
1347 		}
1348 		break;
1349 
1350 	case Zcall:
1351 		q = p->pcond;
1352 		if(q) {
1353 			v = q->pc - p->pc - 5;
1354 			if(dlm && curp != P && p->to.sym->type == SUNDEF){
1355 				/* v = 0 - p->pc - 5; */
1356 				v = 0;
1357 				ckoff(p->to.sym, v);
1358 				v += p->to.sym->value;
1359 				dynreloc(p->to.sym, p->pc+1, 0);
1360 			}
1361 			*andptr++ = op;
1362 			*andptr++ = v;
1363 			*andptr++ = v>>8;
1364 			*andptr++ = v>>16;
1365 			*andptr++ = v>>24;
1366 		}
1367 		break;
1368 
1369 	case Zjmp:
1370 		q = p->pcond;
1371 		if(q) {
1372 			v = q->pc - p->pc - 2;
1373 			if(v >= -128 && v <= 127) {
1374 				*andptr++ = op;
1375 				*andptr++ = v;
1376 			} else {
1377 				v -= 5-2;
1378 				*andptr++ = o->op[z+1];
1379 				*andptr++ = v;
1380 				*andptr++ = v>>8;
1381 				*andptr++ = v>>16;
1382 				*andptr++ = v>>24;
1383 			}
1384 		}
1385 		break;
1386 
1387 	case Zloop:
1388 		q = p->pcond;
1389 		if(q) {
1390 			v = q->pc - p->pc - 2;
1391 			if(v < -128 || v > 127)
1392 				diag("loop too far: %P", p);
1393 			*andptr++ = op;
1394 			*andptr++ = v;
1395 		}
1396 		break;
1397 
1398 	case Zbyte:
1399 		*andptr++ = v;
1400 		if(op > 1) {
1401 			*andptr++ = v>>8;
1402 			if(op > 2) {
1403 				*andptr++ = v>>16;
1404 				*andptr++ = v>>24;
1405 				if(op > 4) {
1406 					*andptr++ = v>>32;
1407 					*andptr++ = v>>40;
1408 					*andptr++ = v>>48;
1409 					*andptr++ = v>>56;
1410 				}
1411 			}
1412 		}
1413 		break;
1414 	}
1415 	return;
1416 
1417 domov:
1418 	for(mo=ymovtab; mo->as; mo++)
1419 		if(p->as == mo->as)
1420 		if(ycover[ft+mo->ft])
1421 		if(ycover[tt+mo->tt]){
1422 			t = mo->op;
1423 			goto mfound;
1424 		}
1425 bad:
1426 	if(p->mode != 64){
1427 		/*
1428 		 * here, the assembly has failed.
1429 		 * if its a byte instruction that has
1430 		 * unaddressable registers, try to
1431 		 * exchange registers and reissue the
1432 		 * instruction with the operands renamed.
1433 		 */
1434 		pp = *p;
1435 		z = p->from.type;
1436 		if(z >= D_BP && z <= D_DI) {
1437 			if(isax(&p->to)) {
1438 				*andptr++ = 0x87;			/* xchg lhs,bx */
1439 				asmando(&p->from, reg[D_BX]);
1440 				subreg(&pp, z, D_BX);
1441 				doasm(&pp);
1442 				*andptr++ = 0x87;			/* xchg lhs,bx */
1443 				asmando(&p->from, reg[D_BX]);
1444 			} else {
1445 				*andptr++ = 0x90 + reg[z];		/* xchg lsh,ax */
1446 				subreg(&pp, z, D_AX);
1447 				doasm(&pp);
1448 				*andptr++ = 0x90 + reg[z];		/* xchg lsh,ax */
1449 			}
1450 			return;
1451 		}
1452 		z = p->to.type;
1453 		if(z >= D_BP && z <= D_DI) {
1454 			if(isax(&p->from)) {
1455 				*andptr++ = 0x87;			/* xchg rhs,bx */
1456 				asmando(&p->to, reg[D_BX]);
1457 				subreg(&pp, z, D_BX);
1458 				doasm(&pp);
1459 				*andptr++ = 0x87;			/* xchg rhs,bx */
1460 				asmando(&p->to, reg[D_BX]);
1461 			} else {
1462 				*andptr++ = 0x90 + reg[z];		/* xchg rsh,ax */
1463 				subreg(&pp, z, D_AX);
1464 				doasm(&pp);
1465 				*andptr++ = 0x90 + reg[z];		/* xchg rsh,ax */
1466 			}
1467 			return;
1468 		}
1469 	}
1470 	diag("doasm: notfound from=%ux to=%ux %P", p->from.type, p->to.type, p);
1471 	return;
1472 
1473 mfound:
1474 	switch(mo->code) {
1475 	default:
1476 		diag("asmins: unknown mov %d %P", mo->code, p);
1477 		break;
1478 
1479 	case 0:	/* lit */
1480 		for(z=0; t[z]!=E; z++)
1481 			*andptr++ = t[z];
1482 		break;
1483 
1484 	case 1:	/* r,m */
1485 		*andptr++ = t[0];
1486 		asmando(&p->to, t[1]);
1487 		break;
1488 
1489 	case 2:	/* m,r */
1490 		*andptr++ = t[0];
1491 		asmando(&p->from, t[1]);
1492 		break;
1493 
1494 	case 3:	/* r,m - 2op */
1495 		*andptr++ = t[0];
1496 		*andptr++ = t[1];
1497 		asmando(&p->to, t[2]);
1498 		rexflag |= regrex[p->from.type] & (Rxr|0x40);
1499 		break;
1500 
1501 	case 4:	/* m,r - 2op */
1502 		*andptr++ = t[0];
1503 		*andptr++ = t[1];
1504 		asmando(&p->from, t[2]);
1505 		rexflag |= regrex[p->to.type] & (Rxr|0x40);
1506 		break;
1507 
1508 	case 5:	/* load full pointer, trash heap */
1509 		if(t[0])
1510 			*andptr++ = t[0];
1511 		switch(p->to.index) {
1512 		default:
1513 			goto bad;
1514 		case D_DS:
1515 			*andptr++ = 0xc5;
1516 			break;
1517 		case D_SS:
1518 			*andptr++ = 0x0f;
1519 			*andptr++ = 0xb2;
1520 			break;
1521 		case D_ES:
1522 			*andptr++ = 0xc4;
1523 			break;
1524 		case D_FS:
1525 			*andptr++ = 0x0f;
1526 			*andptr++ = 0xb4;
1527 			break;
1528 		case D_GS:
1529 			*andptr++ = 0x0f;
1530 			*andptr++ = 0xb5;
1531 			break;
1532 		}
1533 		asmand(&p->from, &p->to);
1534 		break;
1535 
1536 	case 6:	/* double shift */
1537 		if(t[0] == Pw){
1538 			if(p->mode != 64)
1539 				diag("asmins: illegal 64: %P", p);
1540 			rexflag |= Pw;
1541 			t++;
1542 		}else if(t[0] == Pe){
1543 			*andptr++ = Pe;
1544 			t++;
1545 		}
1546 		z = p->from.type;
1547 		switch(z) {
1548 		default:
1549 			goto bad;
1550 		case D_CONST:
1551 			*andptr++ = 0x0f;
1552 			*andptr++ = t[0];
1553 			asmandsz(&p->to, reg[p->from.index], regrex[p->from.index], 0);
1554 			*andptr++ = p->from.offset;
1555 			break;
1556 		case D_CL:
1557 		case D_CX:
1558 			*andptr++ = 0x0f;
1559 			*andptr++ = t[1];
1560 			asmandsz(&p->to, reg[p->from.index], regrex[p->from.index], 0);
1561 			break;
1562 		}
1563 		break;
1564 	}
1565 }
1566 
1567 void
1568 asmins(Prog *p)
1569 {
1570 	int n, np, c;
1571 
1572 	rexflag = 0;
1573 	andptr = and;
1574 	asmode = p->mode;
1575 	doasm(p);
1576 	if(rexflag){
1577 		/*
1578 		 * as befits the whole approach of the architecture,
1579 		 * the rex prefix must appear before the first opcode byte
1580 		 * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
1581 		 * before the 0f opcode escape!), or it might be ignored.
1582 		 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
1583 		 */
1584 		if(p->mode != 64)
1585 			diag("asmins: illegal in mode %d: %P", p->mode, p);
1586 		n = andptr - and;
1587 		for(np = 0; np < n; np++) {
1588 			c = and[np];
1589 			if(c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26)
1590 				break;
1591 		}
1592 		memmove(and+np+1, and+np, n-np);
1593 		and[np] = 0x40 | rexflag;
1594 		andptr++;
1595 	}
1596 }
1597 
1598 enum{
1599 	ABSD = 0,
1600 	ABSU = 1,
1601 	RELD = 2,
1602 	RELU = 3,
1603 };
1604 
1605 int modemap[4] = { 0, 1, -1, 2, };
1606 
1607 typedef struct Reloc Reloc;
1608 
1609 struct Reloc
1610 {
1611 	int n;
1612 	int t;
1613 	uchar *m;
1614 	ulong *a;
1615 };
1616 
1617 Reloc rels;
1618 
1619 static void
1620 grow(Reloc *r)
1621 {
1622 	int t;
1623 	uchar *m, *nm;
1624 	ulong *a, *na;
1625 
1626 	t = r->t;
1627 	r->t += 64;
1628 	m = r->m;
1629 	a = r->a;
1630 	r->m = nm = malloc(r->t*sizeof(uchar));
1631 	r->a = na = malloc(r->t*sizeof(ulong));
1632 	memmove(nm, m, t*sizeof(uchar));
1633 	memmove(na, a, t*sizeof(ulong));
1634 	free(m);
1635 	free(a);
1636 }
1637 
1638 void
1639 dynreloc(Sym *s, ulong v, int abs)
1640 {
1641 	int i, k, n;
1642 	uchar *m;
1643 	ulong *a;
1644 	Reloc *r;
1645 
1646 	if(s->type == SUNDEF)
1647 		k = abs ? ABSU : RELU;
1648 	else
1649 		k = abs ? ABSD : RELD;
1650 	/* Bprint(&bso, "R %s a=%ld(%lx) %d\n", s->name, v, v, k); */
1651 	k = modemap[k];
1652 	r = &rels;
1653 	n = r->n;
1654 	if(n >= r->t)
1655 		grow(r);
1656 	m = r->m;
1657 	a = r->a;
1658 	for(i = n; i > 0; i--){
1659 		if(v < a[i-1]){	/* happens occasionally for data */
1660 			m[i] = m[i-1];
1661 			a[i] = a[i-1];
1662 		}
1663 		else
1664 			break;
1665 	}
1666 	m[i] = k;
1667 	a[i] = v;
1668 	r->n++;
1669 }
1670 
1671 static int
1672 sput(char *s)
1673 {
1674 	char *p;
1675 
1676 	p = s;
1677 	while(*s)
1678 		cput(*s++);
1679 	cput(0);
1680 	return s-p+1;
1681 }
1682 
1683 void
1684 asmdyn()
1685 {
1686 	int i, n, t, c;
1687 	Sym *s;
1688 	ulong la, ra, *a;
1689 	vlong off;
1690 	uchar *m;
1691 	Reloc *r;
1692 
1693 	cflush();
1694 	off = seek(cout, 0, 1);
1695 	lput(0);
1696 	t = 0;
1697 	lput(imports);
1698 	t += 4;
1699 	for(i = 0; i < NHASH; i++)
1700 		for(s = hash[i]; s != S; s = s->link)
1701 			if(s->type == SUNDEF){
1702 				lput(s->sig);
1703 				t += 4;
1704 				t += sput(s->name);
1705 			}
1706 
1707 	la = 0;
1708 	r = &rels;
1709 	n = r->n;
1710 	m = r->m;
1711 	a = r->a;
1712 	lput(n);
1713 	t += 4;
1714 	for(i = 0; i < n; i++){
1715 		ra = *a-la;
1716 		if(*a < la)
1717 			diag("bad relocation order");
1718 		if(ra < 256)
1719 			c = 0;
1720 		else if(ra < 65536)
1721 			c = 1;
1722 		else
1723 			c = 2;
1724 		cput((c<<6)|*m++);
1725 		t++;
1726 		if(c == 0){
1727 			cput(ra);
1728 			t++;
1729 		}
1730 		else if(c == 1){
1731 			wput(ra);
1732 			t += 2;
1733 		}
1734 		else{
1735 			lput(ra);
1736 			t += 4;
1737 		}
1738 		la = *a++;
1739 	}
1740 
1741 	cflush();
1742 	seek(cout, off, 0);
1743 	lput(t);
1744 
1745 	if(debug['v']){
1746 		Bprint(&bso, "import table entries = %d\n", imports);
1747 		Bprint(&bso, "export table entries = %d\n", exports);
1748 	}
1749 }
1750