xref: /inferno-os/utils/6l/span.c (revision 5d0c4cf3fc288434c41cba52dd998ab1d7375a7b)
1 #include	"l.h"
2 
3 static int	rexflag;
4 static int	asmode;
5 
6 void
7 span(void)
8 {
9 	Prog *p, *q;
10 	long v;
11 	vlong c, idat;
12 	int m, n, again;
13 
14 	xdefine("etext", STEXT, 0L);
15 	idat = INITDAT;
16 	for(p = firstp; p != P; p = p->link) {
17 		if(p->as == ATEXT)
18 			curtext = p;
19 		n = 0;
20 		if(p->to.type == D_BRANCH)
21 			if(p->pcond == P)
22 				p->pcond = p;
23 		if((q = p->pcond) != P)
24 			if(q->back != 2)
25 				n = 1;
26 		p->back = n;
27 		if(p->as == AADJSP) {
28 			p->to.type = D_SP;
29 			v = -p->from.offset;
30 			p->from.offset = v;
31 			p->as = p->mode != 64? AADDL: AADDQ;
32 			if(v < 0) {
33 				p->as = p->mode != 64? ASUBL: ASUBQ;
34 				v = -v;
35 				p->from.offset = v;
36 			}
37 			if(v == 0)
38 				p->as = ANOP;
39 		}
40 	}
41 	n = 0;
42 
43 start:
44 	if(debug['v'])
45 		Bprint(&bso, "%5.2f span\n", cputime());
46 	Bflush(&bso);
47 	c = INITTEXT;
48 	for(p = firstp; p != P; p = p->link) {
49 		if(p->as == ATEXT)
50 			curtext = p;
51 		if(p->to.type == D_BRANCH)
52 			if(p->back)
53 				p->pc = c;
54 		asmins(p);
55 		p->pc = c;
56 		m = andptr-and;
57 		p->mark = m;
58 		c += m;
59 	}
60 
61 loop:
62 	n++;
63 	if(debug['v'])
64 		Bprint(&bso, "%5.2f span %d\n", cputime(), n);
65 	Bflush(&bso);
66 	if(n > 50) {
67 		print("span must be looping\n");
68 		errorexit();
69 	}
70 	again = 0;
71 	c = INITTEXT;
72 	for(p = firstp; p != P; p = p->link) {
73 		if(p->as == ATEXT)
74 			curtext = p;
75 		if(p->to.type == D_BRANCH || p->back & 0100) {
76 			if(p->back)
77 				p->pc = c;
78 			asmins(p);
79 			m = andptr-and;
80 			if(m != p->mark) {
81 				p->mark = m;
82 				again++;
83 			}
84 		}
85 		p->pc = c;
86 		c += p->mark;
87 	}
88 	if(again) {
89 		textsize = c;
90 		goto loop;
91 	}
92 	if(INITRND) {
93 		INITDAT = rnd(c, INITRND);
94 		if(INITDAT != idat) {
95 			idat = INITDAT;
96 			goto start;
97 		}
98 	}
99 	xdefine("etext", STEXT, c);
100 	if(debug['v'])
101 		Bprint(&bso, "etext = %llux\n", c);
102 	Bflush(&bso);
103 	for(p = textp; p != P; p = p->pcond)
104 		p->from.sym->value = p->pc;
105 	textsize = c - INITTEXT;
106 }
107 
108 void
109 xdefine(char *p, int t, vlong v)
110 {
111 	Sym *s;
112 
113 	s = lookup(p, 0);
114 	if(s->type == 0 || s->type == SXREF) {
115 		s->type = t;
116 		s->value = v;
117 	}
118 	if(s->type == STEXT && s->value == 0)
119 		s->value = v;
120 }
121 
122 void
123 putsymb(char *s, int t, vlong v, int ver)
124 {
125 	int i, f, l;
126 
127 	if(t == 'f')
128 		s++;
129 	l = 4;
130 	switch(HEADTYPE){
131 	default:
132 		break;
133 	case 5:
134 		if(debug['8'])
135 			break;
136 	case 2:
137 	case 6:
138 		lput(v>>32);
139 		l = 8;
140 		break;
141 	}
142 	lput(v);
143 	if(ver)
144 		t += 'a' - 'A';
145 	cput(t+0x80);			/* 0x80 is variable length */
146 
147 	if(t == 'Z' || t == 'z') {
148 		cput(s[0]);
149 		for(i=1; s[i] != 0 || s[i+1] != 0; i += 2) {
150 			cput(s[i]);
151 			cput(s[i+1]);
152 		}
153 		cput(0);
154 		cput(0);
155 		i++;
156 	}
157 	else {
158 		for(i=0; s[i]; i++)
159 			cput(s[i]);
160 		cput(0);
161 	}
162 	symsize += l + 1 + i + 1;
163 
164 	if(debug['n']) {
165 		if(t == 'z' || t == 'Z') {
166 			Bprint(&bso, "%c %.8llux ", t, v);
167 			for(i=1; s[i] != 0 || s[i+1] != 0; i+=2) {
168 				f = ((s[i]&0xff) << 8) | (s[i+1]&0xff);
169 				Bprint(&bso, "/%x", f);
170 			}
171 			Bprint(&bso, "\n");
172 			return;
173 		}
174 		if(ver)
175 			Bprint(&bso, "%c %.8llux %s<%d>\n", t, v, s, ver);
176 		else
177 			Bprint(&bso, "%c %.8llux %s\n", t, v, s);
178 	}
179 }
180 
181 void
182 asmsym(void)
183 {
184 	Prog *p;
185 	Auto *a;
186 	Sym *s;
187 	int h;
188 
189 	s = lookup("etext", 0);
190 	if(s->type == STEXT)
191 		putsymb(s->name, 'T', s->value, s->version);
192 
193 	for(h=0; h<NHASH; h++)
194 		for(s=hash[h]; s!=S; s=s->link)
195 			switch(s->type) {
196 			case SCONST:
197 				putsymb(s->name, 'D', s->value, s->version);
198 				continue;
199 
200 			case SDATA:
201 				putsymb(s->name, 'D', s->value+INITDAT, s->version);
202 				continue;
203 
204 			case SBSS:
205 				putsymb(s->name, 'B', s->value+INITDAT, s->version);
206 				continue;
207 
208 			case SFILE:
209 				putsymb(s->name, 'f', s->value, s->version);
210 				continue;
211 			}
212 
213 	for(p=textp; p!=P; p=p->pcond) {
214 		s = p->from.sym;
215 		if(s->type != STEXT)
216 			continue;
217 
218 		/* filenames first */
219 		for(a=p->to.autom; a; a=a->link)
220 			if(a->type == D_FILE)
221 				putsymb(a->asym->name, 'z', a->aoffset, 0);
222 			else
223 			if(a->type == D_FILE1)
224 				putsymb(a->asym->name, 'Z', a->aoffset, 0);
225 
226 		putsymb(s->name, 'T', s->value, s->version);
227 
228 		/* frame, auto and param after */
229 		putsymb(".frame", 'm', p->to.offset+8, 0);
230 
231 		for(a=p->to.autom; a; a=a->link)
232 			if(a->type == D_AUTO)
233 				putsymb(a->asym->name, 'a', -a->aoffset, 0);
234 			else
235 			if(a->type == D_PARAM)
236 				putsymb(a->asym->name, 'p', a->aoffset, 0);
237 	}
238 	if(debug['v'] || debug['n'])
239 		Bprint(&bso, "symsize = %lud\n", symsize);
240 	Bflush(&bso);
241 }
242 
243 void
244 asmlc(void)
245 {
246 	vlong oldpc;
247 	Prog *p;
248 	long oldlc, v, s;
249 
250 	oldpc = INITTEXT;
251 	oldlc = 0;
252 	for(p = firstp; p != P; p = p->link) {
253 		if(p->line == oldlc || p->as == ATEXT || p->as == ANOP) {
254 			if(p->as == ATEXT)
255 				curtext = p;
256 			if(debug['V'])
257 				Bprint(&bso, "%6llux %P\n",
258 					p->pc, p);
259 			continue;
260 		}
261 		if(debug['V'])
262 			Bprint(&bso, "\t\t%6ld", lcsize);
263 		v = (p->pc - oldpc) / MINLC;
264 		while(v) {
265 			s = 127;
266 			if(v < 127)
267 				s = v;
268 			cput(s+128);	/* 129-255 +pc */
269 			if(debug['V'])
270 				Bprint(&bso, " pc+%ld*%d(%ld)", s, MINLC, s+128);
271 			v -= s;
272 			lcsize++;
273 		}
274 		s = p->line - oldlc;
275 		oldlc = p->line;
276 		oldpc = p->pc + MINLC;
277 		if(s > 64 || s < -64) {
278 			cput(0);	/* 0 vv +lc */
279 			cput(s>>24);
280 			cput(s>>16);
281 			cput(s>>8);
282 			cput(s);
283 			if(debug['V']) {
284 				if(s > 0)
285 					Bprint(&bso, " lc+%ld(%d,%ld)\n",
286 						s, 0, s);
287 				else
288 					Bprint(&bso, " lc%ld(%d,%ld)\n",
289 						s, 0, s);
290 				Bprint(&bso, "%6llux %P\n",
291 					p->pc, p);
292 			}
293 			lcsize += 5;
294 			continue;
295 		}
296 		if(s > 0) {
297 			cput(0+s);	/* 1-64 +lc */
298 			if(debug['V']) {
299 				Bprint(&bso, " lc+%ld(%ld)\n", s, 0+s);
300 				Bprint(&bso, "%6llux %P\n",
301 					p->pc, p);
302 			}
303 		} else {
304 			cput(64-s);	/* 65-128 -lc */
305 			if(debug['V']) {
306 				Bprint(&bso, " lc%ld(%ld)\n", s, 64-s);
307 				Bprint(&bso, "%6llux %P\n",
308 					p->pc, p);
309 			}
310 		}
311 		lcsize++;
312 	}
313 	while(lcsize & 1) {
314 		s = 129;
315 		cput(s);
316 		lcsize++;
317 	}
318 	if(debug['v'] || debug['V'])
319 		Bprint(&bso, "lcsize = %ld\n", lcsize);
320 	Bflush(&bso);
321 }
322 
323 int
324 oclass(Adr *a)
325 {
326 	vlong v;
327 	long l;
328 
329 	if(a->type >= D_INDIR || a->index != D_NONE) {
330 		if(a->index != D_NONE && a->scale == 0) {
331 			if(a->type == D_ADDR) {
332 				switch(a->index) {
333 				case D_EXTERN:
334 				case D_STATIC:
335 					return Yi32;	/* TO DO: Yi64 */
336 				case D_AUTO:
337 				case D_PARAM:
338 					return Yiauto;
339 				}
340 				return Yxxx;
341 			}
342 			return Ycol;
343 		}
344 		return Ym;
345 	}
346 	switch(a->type)
347 	{
348 	case D_AL:
349 		return Yal;
350 
351 	case D_AX:
352 		return Yax;
353 
354 /*
355 	case D_SPB:
356 */
357 	case D_BPB:
358 	case D_SIB:
359 	case D_DIB:
360 	case D_R8B:
361 	case D_R9B:
362 	case D_R10B:
363 	case D_R11B:
364 	case D_R12B:
365 	case D_R13B:
366 	case D_R14B:
367 	case D_R15B:
368 		if(asmode != 64)
369 			return Yxxx;
370 	case D_DL:
371 	case D_BL:
372 	case D_AH:
373 	case D_CH:
374 	case D_DH:
375 	case D_BH:
376 		return Yrb;
377 
378 	case D_CL:
379 		return Ycl;
380 
381 	case D_CX:
382 		return Ycx;
383 
384 	case D_DX:
385 	case D_BX:
386 		return Yrx;
387 
388 	case D_R8:	/* not really Yrl */
389 	case D_R9:
390 	case D_R10:
391 	case D_R11:
392 	case D_R12:
393 	case D_R13:
394 	case D_R14:
395 	case D_R15:
396 		if(asmode != 64)
397 			return Yxxx;
398 	case D_SP:
399 	case D_BP:
400 	case D_SI:
401 	case D_DI:
402 		return Yrl;
403 
404 	case D_F0+0:
405 		return	Yf0;
406 
407 	case D_F0+1:
408 	case D_F0+2:
409 	case D_F0+3:
410 	case D_F0+4:
411 	case D_F0+5:
412 	case D_F0+6:
413 	case D_F0+7:
414 		return	Yrf;
415 
416 	case D_M0+0:
417 	case D_M0+1:
418 	case D_M0+2:
419 	case D_M0+3:
420 	case D_M0+4:
421 	case D_M0+5:
422 	case D_M0+6:
423 	case D_M0+7:
424 		return	Ymr;
425 
426 	case D_X0+0:
427 	case D_X0+1:
428 	case D_X0+2:
429 	case D_X0+3:
430 	case D_X0+4:
431 	case D_X0+5:
432 	case D_X0+6:
433 	case D_X0+7:
434 	case D_X0+8:
435 	case D_X0+9:
436 	case D_X0+10:
437 	case D_X0+11:
438 	case D_X0+12:
439 	case D_X0+13:
440 	case D_X0+14:
441 	case D_X0+15:
442 		return	Yxr;
443 
444 	case D_NONE:
445 		return Ynone;
446 
447 	case D_CS:	return	Ycs;
448 	case D_SS:	return	Yss;
449 	case D_DS:	return	Yds;
450 	case D_ES:	return	Yes;
451 	case D_FS:	return	Yfs;
452 	case D_GS:	return	Ygs;
453 
454 	case D_GDTR:	return	Ygdtr;
455 	case D_IDTR:	return	Yidtr;
456 	case D_LDTR:	return	Yldtr;
457 	case D_MSW:	return	Ymsw;
458 	case D_TASK:	return	Ytask;
459 
460 	case D_CR+0:	return	Ycr0;
461 	case D_CR+1:	return	Ycr1;
462 	case D_CR+2:	return	Ycr2;
463 	case D_CR+3:	return	Ycr3;
464 	case D_CR+4:	return	Ycr4;
465 	case D_CR+5:	return	Ycr5;
466 	case D_CR+6:	return	Ycr6;
467 	case D_CR+7:	return	Ycr7;
468 	case D_CR+8:	return	Ycr8;
469 
470 	case D_DR+0:	return	Ydr0;
471 	case D_DR+1:	return	Ydr1;
472 	case D_DR+2:	return	Ydr2;
473 	case D_DR+3:	return	Ydr3;
474 	case D_DR+4:	return	Ydr4;
475 	case D_DR+5:	return	Ydr5;
476 	case D_DR+6:	return	Ydr6;
477 	case D_DR+7:	return	Ydr7;
478 
479 	case D_TR+0:	return	Ytr0;
480 	case D_TR+1:	return	Ytr1;
481 	case D_TR+2:	return	Ytr2;
482 	case D_TR+3:	return	Ytr3;
483 	case D_TR+4:	return	Ytr4;
484 	case D_TR+5:	return	Ytr5;
485 	case D_TR+6:	return	Ytr6;
486 	case D_TR+7:	return	Ytr7;
487 
488 	case D_EXTERN:
489 	case D_STATIC:
490 	case D_AUTO:
491 	case D_PARAM:
492 		return Ym;
493 
494 	case D_CONST:
495 	case D_ADDR:
496 		if(a->sym == S) {
497 			v = a->offset;
498 			if(v == 0)
499 				return Yi0;
500 			if(v == 1)
501 				return Yi1;
502 			if(v >= -128 && v <= 127)
503 				return Yi8;
504 			l = v;
505 			if((vlong)l == v)
506 				return Ys32;	/* can sign extend */
507 			if((v>>32) == 0)
508 				return Yi32;	/* unsigned */
509 			return Yi64;
510 		}
511 		return Yi32;	/* TO DO: D_ADDR as Yi64 */
512 
513 	case D_BRANCH:
514 		return Ybr;
515 	}
516 	return Yxxx;
517 }
518 
519 void
520 asmidx(Adr *a, int base)
521 {
522 	int i;
523 
524 	switch(a->index) {
525 	default:
526 		goto bad;
527 
528 	case D_NONE:
529 		i = 4 << 3;
530 		goto bas;
531 
532 	case D_R8:
533 	case D_R9:
534 	case D_R10:
535 	case D_R11:
536 	case D_R12:
537 	case D_R13:
538 	case D_R14:
539 	case D_R15:
540 		if(asmode != 64)
541 			goto bad;
542 	case D_AX:
543 	case D_CX:
544 	case D_DX:
545 	case D_BX:
546 	case D_BP:
547 	case D_SI:
548 	case D_DI:
549 		i = reg[a->index] << 3;
550 		break;
551 	}
552 	switch(a->scale) {
553 	default:
554 		goto bad;
555 	case 1:
556 		break;
557 	case 2:
558 		i |= (1<<6);
559 		break;
560 	case 4:
561 		i |= (2<<6);
562 		break;
563 	case 8:
564 		i |= (3<<6);
565 		break;
566 	}
567 bas:
568 	switch(base) {
569 	default:
570 		goto bad;
571 	case D_NONE:	/* must be mod=00 */
572 		i |= 5;
573 		break;
574 	case D_R8:
575 	case D_R9:
576 	case D_R10:
577 	case D_R11:
578 	case D_R12:
579 	case D_R13:
580 	case D_R14:
581 	case D_R15:
582 		if(asmode != 64)
583 			goto bad;
584 	case D_AX:
585 	case D_CX:
586 	case D_DX:
587 	case D_BX:
588 	case D_SP:
589 	case D_BP:
590 	case D_SI:
591 	case D_DI:
592 		i |= reg[base];
593 		break;
594 	}
595 	*andptr++ = i;
596 	return;
597 bad:
598 	diag("asmidx: bad address %D", a);
599 	*andptr++ = 0;
600 	return;
601 }
602 
603 static void
604 put4(long v)
605 {
606 	if(dlm && curp != P && reloca != nil){
607 		dynreloc(reloca->sym, curp->pc + andptr - &and[0], 1);
608 		reloca = nil;
609 	}
610 	andptr[0] = v;
611 	andptr[1] = v>>8;
612 	andptr[2] = v>>16;
613 	andptr[3] = v>>24;
614 	andptr += 4;
615 }
616 
617 static void
618 put8(vlong v)
619 {
620 	if(dlm && curp != P && reloca != nil){
621 		dynreloc(reloca->sym, curp->pc + andptr - &and[0], 1);	/* TO DO */
622 		reloca = nil;
623 	}
624 	andptr[0] = v;
625 	andptr[1] = v>>8;
626 	andptr[2] = v>>16;
627 	andptr[3] = v>>24;
628 	andptr[4] = v>>32;
629 	andptr[5] = v>>40;
630 	andptr[6] = v>>48;
631 	andptr[7] = v>>56;
632 	andptr += 8;
633 }
634 
635 vlong
636 vaddr(Adr *a)
637 {
638 	int t;
639 	vlong v;
640 	Sym *s;
641 
642 	t = a->type;
643 	v = a->offset;
644 	if(t == D_ADDR)
645 		t = a->index;
646 	switch(t) {
647 	case D_STATIC:
648 	case D_EXTERN:
649 		s = a->sym;
650 		if(s != nil) {
651 			if(dlm && curp != P)
652 				reloca = a;
653 			switch(s->type) {
654 			case SUNDEF:
655 				ckoff(s, v);
656 			case STEXT:
657 			case SCONST:
658 				if((uvlong)s->value < (uvlong)INITTEXT)
659 					v += INITTEXT;	/* TO DO */
660 				v += s->value;
661 				break;
662 			default:
663 				v += INITDAT + s->value;
664 			}
665 		}
666 	}
667 	return v;
668 }
669 
670 static void
671 asmandsz(Adr *a, int r, int rex, int m64)
672 {
673 	long v;
674 	int t;
675 	Adr aa;
676 
677 	rex &= (0x40 | Rxr);
678 	v = a->offset;
679 	t = a->type;
680 	if(a->index != D_NONE) {
681 		if(t >= D_INDIR) {
682 			t -= D_INDIR;
683 			rexflag |= (regrex[a->index] & Rxx) | (regrex[t] & Rxb) | rex;
684 			if(t == D_NONE) {
685 				*andptr++ = (0 << 6) | (4 << 0) | (r << 3);
686 				asmidx(a, t);
687 				put4(v);
688 				return;
689 			}
690 			if(v == 0 && t != D_BP && t != D_R13) {
691 				*andptr++ = (0 << 6) | (4 << 0) | (r << 3);
692 				asmidx(a, t);
693 				return;
694 			}
695 			if(v >= -128 && v < 128) {
696 				*andptr++ = (1 << 6) | (4 << 0) | (r << 3);
697 				asmidx(a, t);
698 				*andptr++ = v;
699 				return;
700 			}
701 			*andptr++ = (2 << 6) | (4 << 0) | (r << 3);
702 			asmidx(a, t);
703 			put4(v);
704 			return;
705 		}
706 		switch(t) {
707 		default:
708 			goto bad;
709 		case D_STATIC:
710 		case D_EXTERN:
711 			aa.type = D_NONE+D_INDIR;
712 			break;
713 		case D_AUTO:
714 		case D_PARAM:
715 			aa.type = D_SP+D_INDIR;
716 			break;
717 		}
718 		aa.offset = vaddr(a);
719 		aa.index = a->index;
720 		aa.scale = a->scale;
721 		asmandsz(&aa, r, rex, m64);
722 		return;
723 	}
724 	if(t >= D_AL && t <= D_X0+15) {
725 		if(v)
726 			goto bad;
727 		*andptr++ = (3 << 6) | (reg[t] << 0) | (r << 3);
728 		rexflag |= (regrex[t] & (0x40 | Rxb)) | rex;
729 		return;
730 	}
731 	if(t >= D_INDIR) {
732 		t -= D_INDIR;
733 		rexflag |= (regrex[t] & Rxb) | rex;
734 		if(t == D_NONE) {
735 			if(asmode != 64){
736 				*andptr++ = (0 << 6) | (5 << 0) | (r << 3);
737 				put4(v);
738 				return;
739 			}
740 			/* temporary */
741 			*andptr++ = (0 <<  6) | (4 << 0) | (r << 3);	/* sib present */
742 			*andptr++ = (0 << 6) | (4 << 3) | (5 << 0);	/* DS:d32 */
743 			put4(v);
744 			return;
745 		}
746 		if(t == D_SP || t == D_R12) {
747 			if(v == 0) {
748 				*andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
749 				asmidx(a, t);
750 				return;
751 			}
752 			if(v >= -128 && v < 128) {
753 				*andptr++ = (1 << 6) | (reg[t] << 0) | (r << 3);
754 				asmidx(a, t);
755 				*andptr++ = v;
756 				return;
757 			}
758 			*andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
759 			asmidx(a, t);
760 			put4(v);
761 			return;
762 		}
763 		if(t >= D_AX && t <= D_R15) {
764 			if(v == 0 && t != D_BP && t != D_R13) {
765 				*andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
766 				return;
767 			}
768 			if(v >= -128 && v < 128) {
769 				andptr[0] = (1 << 6) | (reg[t] << 0) | (r << 3);
770 				andptr[1] = v;
771 				andptr += 2;
772 				return;
773 			}
774 			*andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
775 			put4(v);
776 			return;
777 		}
778 		goto bad;
779 	}
780 	switch(a->type) {
781 	default:
782 		goto bad;
783 	case D_STATIC:
784 	case D_EXTERN:
785 		aa.type = D_NONE+D_INDIR;
786 		break;
787 	case D_AUTO:
788 	case D_PARAM:
789 		aa.type = D_SP+D_INDIR;
790 		break;
791 	}
792 	aa.index = D_NONE;
793 	aa.scale = 1;
794 	aa.offset = vaddr(a);
795 	asmandsz(&aa, r, rex, m64);
796 	return;
797 bad:
798 	diag("asmand: bad address %D", a);
799 	return;
800 }
801 
802 void
803 asmand(Adr *a, Adr *ra)
804 {
805 	asmandsz(a, reg[ra->type], regrex[ra->type], 0);
806 }
807 
808 void
809 asmando(Adr *a, int o)
810 {
811 	asmandsz(a, o, 0, 0);
812 }
813 
814 static void
815 bytereg(Adr *a)
816 {
817 	if(a->index == D_NONE && (a->type >= D_AX && a->type <= D_R15))
818 		a->type = D_AL + (a->type-D_AX);
819 }
820 
821 #define	E	0xff
822 Movtab	ymovtab[] =
823 {
824 /* push */
825 	{APUSHL,	Ycs,	Ynone,	0,	0x0e,E,0,0},
826 	{APUSHL,	Yss,	Ynone,	0,	0x16,E,0,0},
827 	{APUSHL,	Yds,	Ynone,	0,	0x1e,E,0,0},
828 	{APUSHL,	Yes,	Ynone,	0,	0x06,E,0,0},
829 	{APUSHL,	Yfs,	Ynone,	0,	0x0f,0xa0,E,0},
830 	{APUSHL,	Ygs,	Ynone,	0,	0x0f,0xa8,E,0},
831 	{APUSHQ,	Yfs,	Ynone,	0,	0x0f,0xa0,E,0},
832 	{APUSHQ,	Ygs,	Ynone,	0,	0x0f,0xa8,E,0},
833 
834 	{APUSHW,	Ycs,	Ynone,	0,	Pe,0x0e,E,0},
835 	{APUSHW,	Yss,	Ynone,	0,	Pe,0x16,E,0},
836 	{APUSHW,	Yds,	Ynone,	0,	Pe,0x1e,E,0},
837 	{APUSHW,	Yes,	Ynone,	0,	Pe,0x06,E,0},
838 	{APUSHW,	Yfs,	Ynone,	0,	Pe,0x0f,0xa0,E},
839 	{APUSHW,	Ygs,	Ynone,	0,	Pe,0x0f,0xa8,E},
840 
841 /* pop */
842 	{APOPL,	Ynone,	Yds,	0,	0x1f,E,0,0},
843 	{APOPL,	Ynone,	Yes,	0,	0x07,E,0,0},
844 	{APOPL,	Ynone,	Yss,	0,	0x17,E,0,0},
845 	{APOPL,	Ynone,	Yfs,	0,	0x0f,0xa1,E,0},
846 	{APOPL,	Ynone,	Ygs,	0,	0x0f,0xa9,E,0},
847 	{APOPQ,	Ynone,	Yfs,	0,	0x0f,0xa1,E,0},
848 	{APOPQ,	Ynone,	Ygs,	0,	0x0f,0xa9,E,0},
849 
850 	{APOPW,	Ynone,	Yds,	0,	Pe,0x1f,E,0},
851 	{APOPW,	Ynone,	Yes,	0,	Pe,0x07,E,0},
852 	{APOPW,	Ynone,	Yss,	0,	Pe,0x17,E,0},
853 	{APOPW,	Ynone,	Yfs,	0,	Pe,0x0f,0xa1,E},
854 	{APOPW,	Ynone,	Ygs,	0,	Pe,0x0f,0xa9,E},
855 
856 /* mov seg */
857 	{AMOVW,	Yes,	Yml,	1,	0x8c,0,0,0},
858 	{AMOVW,	Ycs,	Yml,	1,	0x8c,1,0,0},
859 	{AMOVW,	Yss,	Yml,	1,	0x8c,2,0,0},
860 	{AMOVW,	Yds,	Yml,	1,	0x8c,3,0,0},
861 	{AMOVW,	Yfs,	Yml,	1,	0x8c,4,0,0},
862 	{AMOVW,	Ygs,	Yml,	1,	0x8c,5,0,0},
863 
864 	{AMOVW,	Yml,	Yes,	2,	0x8e,0,0,0},
865 	{AMOVW,	Yml,	Ycs,	2,	0x8e,1,0,0},
866 	{AMOVW,	Yml,	Yss,	2,	0x8e,2,0,0},
867 	{AMOVW,	Yml,	Yds,	2,	0x8e,3,0,0},
868 	{AMOVW,	Yml,	Yfs,	2,	0x8e,4,0,0},
869 	{AMOVW,	Yml,	Ygs,	2,	0x8e,5,0,0},
870 
871 /* mov cr */
872 	{AMOVL,	Ycr0,	Yml,	3,	0x0f,0x20,0,0},
873 	{AMOVL,	Ycr2,	Yml,	3,	0x0f,0x20,2,0},
874 	{AMOVL,	Ycr3,	Yml,	3,	0x0f,0x20,3,0},
875 	{AMOVL,	Ycr4,	Yml,	3,	0x0f,0x20,4,0},
876 	{AMOVL,	Ycr8,	Yml,	3,	0x0f,0x20,8,0},
877 	{AMOVQ,	Ycr0,	Yml,	3,	0x0f,0x20,0,0},
878 	{AMOVQ,	Ycr2,	Yml,	3,	0x0f,0x20,2,0},
879 	{AMOVQ,	Ycr3,	Yml,	3,	0x0f,0x20,3,0},
880 	{AMOVQ,	Ycr4,	Yml,	3,	0x0f,0x20,4,0},
881 	{AMOVQ,	Ycr8,	Yml,	3,	0x0f,0x20,8,0},
882 
883 	{AMOVL,	Yml,	Ycr0,	4,	0x0f,0x22,0,0},
884 	{AMOVL,	Yml,	Ycr2,	4,	0x0f,0x22,2,0},
885 	{AMOVL,	Yml,	Ycr3,	4,	0x0f,0x22,3,0},
886 	{AMOVL,	Yml,	Ycr4,	4,	0x0f,0x22,4,0},
887 	{AMOVL,	Yml,	Ycr8,	4,	0x0f,0x22,8,0},
888 	{AMOVQ,	Yml,	Ycr0,	4,	0x0f,0x22,0,0},
889 	{AMOVQ,	Yml,	Ycr2,	4,	0x0f,0x22,2,0},
890 	{AMOVQ,	Yml,	Ycr3,	4,	0x0f,0x22,3,0},
891 	{AMOVQ,	Yml,	Ycr4,	4,	0x0f,0x22,4,0},
892 	{AMOVQ,	Yml,	Ycr8,	4,	0x0f,0x22,8,0},
893 
894 /* mov dr */
895 	{AMOVL,	Ydr0,	Yml,	3,	0x0f,0x21,0,0},
896 	{AMOVL,	Ydr6,	Yml,	3,	0x0f,0x21,6,0},
897 	{AMOVL,	Ydr7,	Yml,	3,	0x0f,0x21,7,0},
898 	{AMOVQ,	Ydr0,	Yml,	3,	0x0f,0x21,0,0},
899 	{AMOVQ,	Ydr6,	Yml,	3,	0x0f,0x21,6,0},
900 	{AMOVQ,	Ydr7,	Yml,	3,	0x0f,0x21,7,0},
901 
902 	{AMOVL,	Yml,	Ydr0,	4,	0x0f,0x23,0,0},
903 	{AMOVL,	Yml,	Ydr6,	4,	0x0f,0x23,6,0},
904 	{AMOVL,	Yml,	Ydr7,	4,	0x0f,0x23,7,0},
905 	{AMOVQ,	Yml,	Ydr0,	4,	0x0f,0x23,0,0},
906 	{AMOVQ,	Yml,	Ydr6,	4,	0x0f,0x23,6,0},
907 	{AMOVQ,	Yml,	Ydr7,	4,	0x0f,0x23,7,0},
908 
909 /* mov tr */
910 	{AMOVL,	Ytr6,	Yml,	3,	0x0f,0x24,6,0},
911 	{AMOVL,	Ytr7,	Yml,	3,	0x0f,0x24,7,0},
912 
913 	{AMOVL,	Yml,	Ytr6,	4,	0x0f,0x26,6,E},
914 	{AMOVL,	Yml,	Ytr7,	4,	0x0f,0x26,7,E},
915 
916 /* lgdt, sgdt, lidt, sidt */
917 	{AMOVL,	Ym,	Ygdtr,	4,	0x0f,0x01,2,0},
918 	{AMOVL,	Ygdtr,	Ym,	3,	0x0f,0x01,0,0},
919 	{AMOVL,	Ym,	Yidtr,	4,	0x0f,0x01,3,0},
920 	{AMOVL,	Yidtr,	Ym,	3,	0x0f,0x01,1,0},
921 	{AMOVQ,	Ym,	Ygdtr,	4,	0x0f,0x01,2,0},
922 	{AMOVQ,	Ygdtr,	Ym,	3,	0x0f,0x01,0,0},
923 	{AMOVQ,	Ym,	Yidtr,	4,	0x0f,0x01,3,0},
924 	{AMOVQ,	Yidtr,	Ym,	3,	0x0f,0x01,1,0},
925 
926 /* lldt, sldt */
927 	{AMOVW,	Yml,	Yldtr,	4,	0x0f,0x00,2,0},
928 	{AMOVW,	Yldtr,	Yml,	3,	0x0f,0x00,0,0},
929 
930 /* lmsw, smsw */
931 	{AMOVW,	Yml,	Ymsw,	4,	0x0f,0x01,6,0},
932 	{AMOVW,	Ymsw,	Yml,	3,	0x0f,0x01,4,0},
933 
934 /* ltr, str */
935 	{AMOVW,	Yml,	Ytask,	4,	0x0f,0x00,3,0},
936 	{AMOVW,	Ytask,	Yml,	3,	0x0f,0x00,1,0},
937 
938 /* load full pointer */
939 	{AMOVL,	Yml,	Ycol,	5,	0,0,0,0},
940 	{AMOVW,	Yml,	Ycol,	5,	Pe,0,0,0},
941 
942 /* double shift */
943 	{ASHLL,	Ycol,	Yml,	6,	0xa4,0xa5,0,0},
944 	{ASHRL,	Ycol,	Yml,	6,	0xac,0xad,0,0},
945 	{ASHLQ,	Ycol,	Yml,	6,	Pw,0xa4,0xa5,0},
946 	{ASHRQ,	Ycol,	Yml,	6,	Pw,0xac,0xad,0},
947 	{ASHLW,	Ycol,	Yml,	6,	Pe,0xa4,0xa5,0},
948 	{ASHRW,	Ycol,	Yml,	6,	Pe,0xac,0xad,0},
949 	0
950 };
951 
952 int
953 isax(Adr *a)
954 {
955 
956 	switch(a->type) {
957 	case D_AX:
958 	case D_AL:
959 	case D_AH:
960 	case D_INDIR+D_AX:
961 		return 1;
962 	}
963 	if(a->index == D_AX)
964 		return 1;
965 	return 0;
966 }
967 
968 void
969 subreg(Prog *p, int from, int to)
970 {
971 
972 	if(debug['Q'])
973 		print("\n%P	s/%R/%R/\n", p, from, to);
974 
975 	if(p->from.type == from)
976 		p->from.type = to;
977 	if(p->to.type == from)
978 		p->to.type = to;
979 
980 	if(p->from.index == from)
981 		p->from.index = to;
982 	if(p->to.index == from)
983 		p->to.index = to;
984 
985 	from += D_INDIR;
986 	if(p->from.type == from)
987 		p->from.type = to+D_INDIR;
988 	if(p->to.type == from)
989 		p->to.type = to+D_INDIR;
990 
991 	if(debug['Q'])
992 		print("%P\n", p);
993 }
994 
995 static int
996 mediaop(Optab *o, int op, int osize, int z)
997 {
998 	switch(op){
999 	case Pm:
1000 	case Pe:
1001 	case Pf2:
1002 	case Pf3:
1003 		if(osize != 1){
1004 			if(op != Pm)
1005 				*andptr++ = op;
1006 			*andptr++ = Pm;
1007 			op = o->op[++z];
1008 			break;
1009 		}
1010 	default:
1011 		if(andptr == and || andptr[-1] != Pm)
1012 			*andptr++ = Pm;
1013 		break;
1014 	}
1015 	*andptr++ = op;
1016 	return z;
1017 }
1018 
1019 void
1020 doasm(Prog *p)
1021 {
1022 	Optab *o;
1023 	Prog *q, pp;
1024 	uchar *t;
1025 	Movtab *mo;
1026 	int z, op, ft, tt, xo, l;
1027 	vlong v;
1028 
1029 	o = opindex[p->as];
1030 	if(o == nil) {
1031 		diag("asmins: missing op %P", p);
1032 		return;
1033 	}
1034 	ft = oclass(&p->from) * Ymax;
1035 	tt = oclass(&p->to) * Ymax;
1036 	t = o->ytab;
1037 	if(t == 0) {
1038 		diag("asmins: noproto %P", p);
1039 		return;
1040 	}
1041 	xo = o->op[0] == 0x0f;
1042 	for(z=0; *t; z+=t[3]+xo,t+=4)
1043 		if(ycover[ft+t[0]])
1044 		if(ycover[tt+t[1]])
1045 			goto found;
1046 	goto domov;
1047 
1048 found:
1049 	switch(o->prefix) {
1050 	case Pq:	/* 16 bit escape and opcode escape */
1051 		*andptr++ = Pe;
1052 		*andptr++ = Pm;
1053 		break;
1054 
1055 	case Pf2:	/* xmm opcode escape */
1056 	case Pf3:
1057 		*andptr++ = o->prefix;
1058 		*andptr++ = Pm;
1059 		break;
1060 
1061 	case Pm:	/* opcode escape */
1062 		*andptr++ = Pm;
1063 		break;
1064 
1065 	case Pe:	/* 16 bit escape */
1066 		*andptr++ = Pe;
1067 		break;
1068 
1069 	case Pw:	/* 64-bit escape */
1070 		if(p->mode != 64)
1071 			diag("asmins: illegal 64: %P", p);
1072 		rexflag |= Pw;
1073 		break;
1074 
1075 	case Pb:	/* botch */
1076 		bytereg(&p->from);
1077 		bytereg(&p->to);
1078 		break;
1079 
1080 	case P32:	/* 32 bit but illegal if 64-bit mode */
1081 		if(p->mode == 64)
1082 			diag("asmins: illegal in 64-bit mode: %P", p);
1083 		break;
1084 
1085 	case Py:	/* 64-bit only, no prefix */
1086 		if(p->mode != 64)
1087 			diag("asmins: illegal in %d-bit mode: %P", p->mode, p);
1088 		break;
1089 	}
1090 	v = vaddr(&p->from);
1091 	op = o->op[z];
1092 	if(op == 0x0f) {
1093 		*andptr++ = op;
1094 		op = o->op[++z];
1095 	}
1096 	switch(t[2]) {
1097 	default:
1098 		diag("asmins: unknown z %d %P", t[2], p);
1099 		return;
1100 
1101 	case Zpseudo:
1102 		break;
1103 
1104 	case Zlit:
1105 		for(; op = o->op[z]; z++)
1106 			*andptr++ = op;
1107 		break;
1108 
1109 	case Zmb_r:
1110 		bytereg(&p->from);
1111 		/* fall through */
1112 	case Zm_r:
1113 		*andptr++ = op;
1114 		asmand(&p->from, &p->to);
1115 		break;
1116 
1117 	case Zm_r_xm:
1118 		mediaop(o, op, t[3], z);
1119 		asmand(&p->from, &p->to);
1120 		break;
1121 
1122 	case Zm_r_xm_nr:
1123 		rexflag = 0;
1124 		mediaop(o, op, t[3], z);
1125 		asmand(&p->from, &p->to);
1126 		break;
1127 
1128 	case Zm_r_i_xm:
1129 		mediaop(o, op, t[3], z);
1130 		asmand(&p->from, &p->to);
1131 		*andptr++ = p->to.offset;
1132 		break;
1133 
1134 	case Zm_r_3d:
1135 		*andptr++ = 0x0f;
1136 		*andptr++ = 0x0f;
1137 		asmand(&p->from, &p->to);
1138 		*andptr++ = op;
1139 		break;
1140 
1141 	case Zibm_r:
1142 		*andptr++ = op;
1143 		asmand(&p->from, &p->to);
1144 		*andptr++ = p->to.offset;
1145 		break;
1146 
1147 	case Zaut_r:
1148 		*andptr++ = 0x8d;	/* leal */
1149 		if(p->from.type != D_ADDR)
1150 			diag("asmins: Zaut sb type ADDR");
1151 		p->from.type = p->from.index;
1152 		p->from.index = D_NONE;
1153 		asmand(&p->from, &p->to);
1154 		p->from.index = p->from.type;
1155 		p->from.type = D_ADDR;
1156 		break;
1157 
1158 	case Zm_o:
1159 		*andptr++ = op;
1160 		asmando(&p->from, o->op[z+1]);
1161 		break;
1162 
1163 	case Zr_m:
1164 		*andptr++ = op;
1165 		asmand(&p->to, &p->from);
1166 		break;
1167 
1168 	case Zr_m_xm:
1169 		mediaop(o, op, t[3], z);
1170 		asmand(&p->to, &p->from);
1171 		break;
1172 
1173 	case Zr_m_xm_nr:
1174 		rexflag = 0;
1175 		mediaop(o, op, t[3], z);
1176 		asmand(&p->to, &p->from);
1177 		break;
1178 
1179 	case Zr_m_i_xm:
1180 		mediaop(o, op, t[3], z);
1181 		asmand(&p->to, &p->from);
1182 		*andptr++ = p->from.offset;
1183 		break;
1184 
1185 	case Zo_m:
1186 		*andptr++ = op;
1187 		asmando(&p->to, o->op[z+1]);
1188 		break;
1189 
1190 	case Zo_m64:
1191 		*andptr++ = op;
1192 		asmandsz(&p->to, o->op[z+1], 0, 1);
1193 		break;
1194 
1195 	case Zm_ibo:
1196 		v = vaddr(&p->to);
1197 		*andptr++ = op;
1198 		asmando(&p->from, o->op[z+1]);
1199 		*andptr++ = v;
1200 		break;
1201 
1202 	case Zibo_m:
1203 		*andptr++ = op;
1204 		asmando(&p->to, o->op[z+1]);
1205 		*andptr++ = v;
1206 		break;
1207 
1208 	case Zibo_m_xm:
1209 		z = mediaop(o, op, t[3], z);
1210 		asmando(&p->to, o->op[z+1]);
1211 		*andptr++ = v;
1212 		break;
1213 
1214 	case Z_ib:
1215 		v = vaddr(&p->to);
1216 	case Zib_:
1217 		*andptr++ = op;
1218 		*andptr++ = v;
1219 		break;
1220 
1221 	case Zib_rp:
1222 		rexflag |= regrex[p->to.type] & (Rxb|0x40);
1223 		*andptr++ = op + reg[p->to.type];
1224 		*andptr++ = v;
1225 		break;
1226 
1227 	case Zil_rp:
1228 		rexflag |= regrex[p->to.type] & Rxb;
1229 		*andptr++ = op + reg[p->to.type];
1230 		if(o->prefix == Pe) {
1231 			*andptr++ = v;
1232 			*andptr++ = v>>8;
1233 		}
1234 		else
1235 			put4(v);
1236 		break;
1237 
1238 	case Zo_iw:
1239 		*andptr++ = op;
1240 		if(p->from.type != D_NONE){
1241 			*andptr++ = v;
1242 			*andptr++ = v>>8;
1243 		}
1244 		break;
1245 
1246 	case Ziq_rp:
1247 		l = v>>32;
1248 		if(l == 0){
1249 			//p->mark |= 0100;
1250 			//print("zero: %llux %P\n", v, p);
1251 			rexflag &= ~(0x40|Rxw);
1252 			rexflag |= regrex[p->to.type] & Rxb;
1253 			*andptr++ = 0xb8 + reg[p->to.type];
1254 			put4(v);
1255 		}else if(l == -1 && (v&((uvlong)1<<31))!=0){	/* sign extend */
1256 			//p->mark |= 0100;
1257 			//print("sign: %llux %P\n", v, p);
1258 			*andptr ++ = 0xc7;
1259 			asmando(&p->to, 0);
1260 			put4(v);
1261 		}else{	/* need all 8 */
1262 			//print("all: %llux %P\n", v, p);
1263 			rexflag |= regrex[p->to.type] & Rxb;
1264 			*andptr++ = op + reg[p->to.type];
1265 			put8(v);
1266 		}
1267 		break;
1268 
1269 	case Zib_rr:
1270 		*andptr++ = op;
1271 		asmand(&p->to, &p->to);
1272 		*andptr++ = v;
1273 		break;
1274 
1275 	case Z_il:
1276 		v = vaddr(&p->to);
1277 	case Zil_:
1278 		*andptr++ = op;
1279 		if(o->prefix == Pe) {
1280 			*andptr++ = v;
1281 			*andptr++ = v>>8;
1282 		}
1283 		else
1284 			put4(v);
1285 		break;
1286 
1287 	case Zm_ilo:
1288 		v = vaddr(&p->to);
1289 		*andptr++ = op;
1290 		asmando(&p->from, o->op[z+1]);
1291 		if(o->prefix == Pe) {
1292 			*andptr++ = v;
1293 			*andptr++ = v>>8;
1294 		}
1295 		else
1296 			put4(v);
1297 		break;
1298 
1299 	case Zilo_m:
1300 		*andptr++ = op;
1301 		asmando(&p->to, o->op[z+1]);
1302 		if(o->prefix == Pe) {
1303 			*andptr++ = v;
1304 			*andptr++ = v>>8;
1305 		}
1306 		else
1307 			put4(v);
1308 		break;
1309 
1310 	case Zil_rr:
1311 		*andptr++ = op;
1312 		asmand(&p->to, &p->to);
1313 		if(o->prefix == Pe) {
1314 			*andptr++ = v;
1315 			*andptr++ = v>>8;
1316 		}
1317 		else
1318 			put4(v);
1319 		break;
1320 
1321 	case Z_rp:
1322 		rexflag |= regrex[p->to.type] & (Rxb|0x40);
1323 		*andptr++ = op + reg[p->to.type];
1324 		break;
1325 
1326 	case Zrp_:
1327 		rexflag |= regrex[p->from.type] & (Rxb|0x40);
1328 		*andptr++ = op + reg[p->from.type];
1329 		break;
1330 
1331 	case Zclr:
1332 		*andptr++ = op;
1333 		asmand(&p->to, &p->to);
1334 		break;
1335 
1336 	case Zbr:
1337 		q = p->pcond;
1338 		if(q) {
1339 			v = q->pc - p->pc - 2;
1340 			if(v >= -128 && v <= 127) {
1341 				*andptr++ = op;
1342 				*andptr++ = v;
1343 			} else {
1344 				v -= 6-2;
1345 				*andptr++ = 0x0f;
1346 				*andptr++ = o->op[z+1];
1347 				*andptr++ = v;
1348 				*andptr++ = v>>8;
1349 				*andptr++ = v>>16;
1350 				*andptr++ = v>>24;
1351 			}
1352 		}
1353 		break;
1354 
1355 	case Zcall:
1356 		q = p->pcond;
1357 		if(q) {
1358 			v = q->pc - p->pc - 5;
1359 			if(dlm && curp != P && p->to.sym->type == SUNDEF){
1360 				/* v = 0 - p->pc - 5; */
1361 				v = 0;
1362 				ckoff(p->to.sym, v);
1363 				v += p->to.sym->value;
1364 				dynreloc(p->to.sym, p->pc+1, 0);
1365 			}
1366 			*andptr++ = op;
1367 			*andptr++ = v;
1368 			*andptr++ = v>>8;
1369 			*andptr++ = v>>16;
1370 			*andptr++ = v>>24;
1371 		}
1372 		break;
1373 
1374 	case Zjmp:
1375 		q = p->pcond;
1376 		if(q) {
1377 			v = q->pc - p->pc - 2;
1378 			if(v >= -128 && v <= 127) {
1379 				*andptr++ = op;
1380 				*andptr++ = v;
1381 			} else {
1382 				v -= 5-2;
1383 				*andptr++ = o->op[z+1];
1384 				*andptr++ = v;
1385 				*andptr++ = v>>8;
1386 				*andptr++ = v>>16;
1387 				*andptr++ = v>>24;
1388 			}
1389 		}
1390 		break;
1391 
1392 	case Zloop:
1393 		q = p->pcond;
1394 		if(q) {
1395 			v = q->pc - p->pc - 2;
1396 			if(v < -128 || v > 127)
1397 				diag("loop too far: %P", p);
1398 			*andptr++ = op;
1399 			*andptr++ = v;
1400 		}
1401 		break;
1402 
1403 	case Zbyte:
1404 		*andptr++ = v;
1405 		if(op > 1) {
1406 			*andptr++ = v>>8;
1407 			if(op > 2) {
1408 				*andptr++ = v>>16;
1409 				*andptr++ = v>>24;
1410 				if(op > 4) {
1411 					*andptr++ = v>>32;
1412 					*andptr++ = v>>40;
1413 					*andptr++ = v>>48;
1414 					*andptr++ = v>>56;
1415 				}
1416 			}
1417 		}
1418 		break;
1419 	}
1420 	return;
1421 
1422 domov:
1423 	for(mo=ymovtab; mo->as; mo++)
1424 		if(p->as == mo->as)
1425 		if(ycover[ft+mo->ft])
1426 		if(ycover[tt+mo->tt]){
1427 			t = mo->op;
1428 			goto mfound;
1429 		}
1430 bad:
1431 	if(p->mode != 64){
1432 		/*
1433 		 * here, the assembly has failed.
1434 		 * if its a byte instruction that has
1435 		 * unaddressable registers, try to
1436 		 * exchange registers and reissue the
1437 		 * instruction with the operands renamed.
1438 		 */
1439 		pp = *p;
1440 		z = p->from.type;
1441 		if(z >= D_BP && z <= D_DI) {
1442 			if(isax(&p->to)) {
1443 				*andptr++ = 0x87;			/* xchg lhs,bx */
1444 				asmando(&p->from, reg[D_BX]);
1445 				subreg(&pp, z, D_BX);
1446 				doasm(&pp);
1447 				*andptr++ = 0x87;			/* xchg lhs,bx */
1448 				asmando(&p->from, reg[D_BX]);
1449 			} else {
1450 				*andptr++ = 0x90 + reg[z];		/* xchg lsh,ax */
1451 				subreg(&pp, z, D_AX);
1452 				doasm(&pp);
1453 				*andptr++ = 0x90 + reg[z];		/* xchg lsh,ax */
1454 			}
1455 			return;
1456 		}
1457 		z = p->to.type;
1458 		if(z >= D_BP && z <= D_DI) {
1459 			if(isax(&p->from)) {
1460 				*andptr++ = 0x87;			/* xchg rhs,bx */
1461 				asmando(&p->to, reg[D_BX]);
1462 				subreg(&pp, z, D_BX);
1463 				doasm(&pp);
1464 				*andptr++ = 0x87;			/* xchg rhs,bx */
1465 				asmando(&p->to, reg[D_BX]);
1466 			} else {
1467 				*andptr++ = 0x90 + reg[z];		/* xchg rsh,ax */
1468 				subreg(&pp, z, D_AX);
1469 				doasm(&pp);
1470 				*andptr++ = 0x90 + reg[z];		/* xchg rsh,ax */
1471 			}
1472 			return;
1473 		}
1474 	}
1475 	diag("doasm: notfound from=%ux to=%ux %P", p->from.type, p->to.type, p);
1476 	return;
1477 
1478 mfound:
1479 	switch(mo->code) {
1480 	default:
1481 		diag("asmins: unknown mov %d %P", mo->code, p);
1482 		break;
1483 
1484 	case 0:	/* lit */
1485 		for(z=0; t[z]!=E; z++)
1486 			*andptr++ = t[z];
1487 		break;
1488 
1489 	case 1:	/* r,m */
1490 		*andptr++ = t[0];
1491 		asmando(&p->to, t[1]);
1492 		break;
1493 
1494 	case 2:	/* m,r */
1495 		*andptr++ = t[0];
1496 		asmando(&p->from, t[1]);
1497 		break;
1498 
1499 	case 3:	/* r,m - 2op */
1500 		*andptr++ = t[0];
1501 		*andptr++ = t[1];
1502 		asmando(&p->to, t[2]);
1503 		rexflag |= regrex[p->from.type] & (Rxr|0x40);
1504 		break;
1505 
1506 	case 4:	/* m,r - 2op */
1507 		*andptr++ = t[0];
1508 		*andptr++ = t[1];
1509 		asmando(&p->from, t[2]);
1510 		rexflag |= regrex[p->to.type] & (Rxr|0x40);
1511 		break;
1512 
1513 	case 5:	/* load full pointer, trash heap */
1514 		if(t[0])
1515 			*andptr++ = t[0];
1516 		switch(p->to.index) {
1517 		default:
1518 			goto bad;
1519 		case D_DS:
1520 			*andptr++ = 0xc5;
1521 			break;
1522 		case D_SS:
1523 			*andptr++ = 0x0f;
1524 			*andptr++ = 0xb2;
1525 			break;
1526 		case D_ES:
1527 			*andptr++ = 0xc4;
1528 			break;
1529 		case D_FS:
1530 			*andptr++ = 0x0f;
1531 			*andptr++ = 0xb4;
1532 			break;
1533 		case D_GS:
1534 			*andptr++ = 0x0f;
1535 			*andptr++ = 0xb5;
1536 			break;
1537 		}
1538 		asmand(&p->from, &p->to);
1539 		break;
1540 
1541 	case 6:	/* double shift */
1542 		if(t[0] == Pw){
1543 			if(p->mode != 64)
1544 				diag("asmins: illegal 64: %P", p);
1545 			rexflag |= Pw;
1546 			t++;
1547 		}else if(t[0] == Pe){
1548 			*andptr++ = Pe;
1549 			t++;
1550 		}
1551 		z = p->from.type;
1552 		switch(z) {
1553 		default:
1554 			goto bad;
1555 		case D_CONST:
1556 			*andptr++ = 0x0f;
1557 			*andptr++ = t[0];
1558 			asmandsz(&p->to, reg[p->from.index], regrex[p->from.index], 0);
1559 			*andptr++ = p->from.offset;
1560 			break;
1561 		case D_CL:
1562 		case D_CX:
1563 			*andptr++ = 0x0f;
1564 			*andptr++ = t[1];
1565 			asmandsz(&p->to, reg[p->from.index], regrex[p->from.index], 0);
1566 			break;
1567 		}
1568 		break;
1569 	}
1570 }
1571 
1572 void
1573 asmins(Prog *p)
1574 {
1575 	int n, np, c;
1576 
1577 	rexflag = 0;
1578 	andptr = and;
1579 	asmode = p->mode;
1580 	doasm(p);
1581 	if(rexflag){
1582 		/*
1583 		 * as befits the whole approach of the architecture,
1584 		 * the rex prefix must appear before the first opcode byte
1585 		 * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
1586 		 * before the 0f opcode escape!), or it might be ignored.
1587 		 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
1588 		 */
1589 		if(p->mode != 64)
1590 			diag("asmins: illegal in mode %d: %P", p->mode, p);
1591 		n = andptr - and;
1592 		for(np = 0; np < n; np++) {
1593 			c = and[np];
1594 			if(c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26)
1595 				break;
1596 		}
1597 		memmove(and+np+1, and+np, n-np);
1598 		and[np] = 0x40 | rexflag;
1599 		andptr++;
1600 	}
1601 }
1602 
1603 enum{
1604 	ABSD = 0,
1605 	ABSU = 1,
1606 	RELD = 2,
1607 	RELU = 3,
1608 };
1609 
1610 int modemap[4] = { 0, 1, -1, 2, };
1611 
1612 typedef struct Reloc Reloc;
1613 
1614 struct Reloc
1615 {
1616 	int n;
1617 	int t;
1618 	uchar *m;
1619 	ulong *a;
1620 };
1621 
1622 Reloc rels;
1623 
1624 static void
1625 grow(Reloc *r)
1626 {
1627 	int t;
1628 	uchar *m, *nm;
1629 	ulong *a, *na;
1630 
1631 	t = r->t;
1632 	r->t += 64;
1633 	m = r->m;
1634 	a = r->a;
1635 	r->m = nm = malloc(r->t*sizeof(uchar));
1636 	r->a = na = malloc(r->t*sizeof(ulong));
1637 	memmove(nm, m, t*sizeof(uchar));
1638 	memmove(na, a, t*sizeof(ulong));
1639 	free(m);
1640 	free(a);
1641 }
1642 
1643 void
1644 dynreloc(Sym *s, ulong v, int abs)
1645 {
1646 	int i, k, n;
1647 	uchar *m;
1648 	ulong *a;
1649 	Reloc *r;
1650 
1651 	if(s->type == SUNDEF)
1652 		k = abs ? ABSU : RELU;
1653 	else
1654 		k = abs ? ABSD : RELD;
1655 	/* Bprint(&bso, "R %s a=%ld(%lx) %d\n", s->name, v, v, k); */
1656 	k = modemap[k];
1657 	r = &rels;
1658 	n = r->n;
1659 	if(n >= r->t)
1660 		grow(r);
1661 	m = r->m;
1662 	a = r->a;
1663 	for(i = n; i > 0; i--){
1664 		if(v < a[i-1]){	/* happens occasionally for data */
1665 			m[i] = m[i-1];
1666 			a[i] = a[i-1];
1667 		}
1668 		else
1669 			break;
1670 	}
1671 	m[i] = k;
1672 	a[i] = v;
1673 	r->n++;
1674 }
1675 
1676 static int
1677 sput(char *s)
1678 {
1679 	char *p;
1680 
1681 	p = s;
1682 	while(*s)
1683 		cput(*s++);
1684 	cput(0);
1685 	return s-p+1;
1686 }
1687 
1688 void
1689 asmdyn()
1690 {
1691 	int i, n, t, c;
1692 	Sym *s;
1693 	ulong la, ra, *a;
1694 	vlong off;
1695 	uchar *m;
1696 	Reloc *r;
1697 
1698 	cflush();
1699 	off = seek(cout, 0, 1);
1700 	lput(0);
1701 	t = 0;
1702 	lput(imports);
1703 	t += 4;
1704 	for(i = 0; i < NHASH; i++)
1705 		for(s = hash[i]; s != S; s = s->link)
1706 			if(s->type == SUNDEF){
1707 				lput(s->sig);
1708 				t += 4;
1709 				t += sput(s->name);
1710 			}
1711 
1712 	la = 0;
1713 	r = &rels;
1714 	n = r->n;
1715 	m = r->m;
1716 	a = r->a;
1717 	lput(n);
1718 	t += 4;
1719 	for(i = 0; i < n; i++){
1720 		ra = *a-la;
1721 		if(*a < la)
1722 			diag("bad relocation order");
1723 		if(ra < 256)
1724 			c = 0;
1725 		else if(ra < 65536)
1726 			c = 1;
1727 		else
1728 			c = 2;
1729 		cput((c<<6)|*m++);
1730 		t++;
1731 		if(c == 0){
1732 			cput(ra);
1733 			t++;
1734 		}
1735 		else if(c == 1){
1736 			wput(ra);
1737 			t += 2;
1738 		}
1739 		else{
1740 			lput(ra);
1741 			t += 4;
1742 		}
1743 		la = *a++;
1744 	}
1745 
1746 	cflush();
1747 	seek(cout, off, 0);
1748 	lput(t);
1749 
1750 	if(debug['v']){
1751 		Bprint(&bso, "import table entries = %d\n", imports);
1752 		Bprint(&bso, "export table entries = %d\n", exports);
1753 	}
1754 }
1755