xref: /inferno-os/utils/6l/span.c (revision d0e1d143ef6f03c75c008c7ec648859dd260cbab)
1 #include	"l.h"
2 
3 static int	rexflag;
4 static int	asmode;
5 
6 void
7 span(void)
8 {
9 	Prog *p, *q;
10 	long v;
11 	vlong c, idat;
12 	int m, n, again;
13 
14 	xdefine("etext", STEXT, 0L);
15 	idat = INITDAT;
16 	for(p = firstp; p != P; p = p->link) {
17 		if(p->as == ATEXT)
18 			curtext = p;
19 		n = 0;
20 		if(p->to.type == D_BRANCH)
21 			if(p->pcond == P)
22 				p->pcond = p;
23 		if((q = p->pcond) != P)
24 			if(q->back != 2)
25 				n = 1;
26 		p->back = n;
27 		if(p->as == AADJSP) {
28 			p->to.type = D_SP;
29 			v = -p->from.offset;
30 			p->from.offset = v;
31 			p->as = p->mode != 64? AADDL: AADDQ;
32 			if(v < 0) {
33 				p->as = p->mode != 64? ASUBL: ASUBQ;
34 				v = -v;
35 				p->from.offset = v;
36 			}
37 			if(v == 0)
38 				p->as = ANOP;
39 		}
40 	}
41 	n = 0;
42 
43 start:
44 	if(debug['v'])
45 		Bprint(&bso, "%5.2f span\n", cputime());
46 	Bflush(&bso);
47 	c = INITTEXT;
48 	for(p = firstp; p != P; p = p->link) {
49 		if(p->as == ATEXT)
50 			curtext = p;
51 		if(p->to.type == D_BRANCH)
52 			if(p->back)
53 				p->pc = c;
54 		asmins(p);
55 		p->pc = c;
56 		m = andptr-and;
57 		p->mark = m;
58 		c += m;
59 	}
60 
61 loop:
62 	n++;
63 	if(debug['v'])
64 		Bprint(&bso, "%5.2f span %d\n", cputime(), n);
65 	Bflush(&bso);
66 	if(n > 50) {
67 		print("span must be looping\n");
68 		errorexit();
69 	}
70 	again = 0;
71 	c = INITTEXT;
72 	for(p = firstp; p != P; p = p->link) {
73 		if(p->as == ATEXT)
74 			curtext = p;
75 		if(p->to.type == D_BRANCH || p->back & 0100) {
76 			if(p->back)
77 				p->pc = c;
78 			asmins(p);
79 			m = andptr-and;
80 			if(m != p->mark) {
81 				p->mark = m;
82 				again++;
83 			}
84 		}
85 		p->pc = c;
86 		c += p->mark;
87 	}
88 	if(again) {
89 		textsize = c;
90 		goto loop;
91 	}
92 	if(INITRND) {
93 		INITDAT = rnd(c, INITRND);
94 		if(INITDAT != idat) {
95 			idat = INITDAT;
96 			goto start;
97 		}
98 	}
99 	xdefine("etext", STEXT, c);
100 	if(debug['v'])
101 		Bprint(&bso, "etext = %llux\n", c);
102 	Bflush(&bso);
103 	for(p = textp; p != P; p = p->pcond)
104 		p->from.sym->value = p->pc;
105 	textsize = c - INITTEXT;
106 }
107 
108 void
109 xdefine(char *p, int t, vlong v)
110 {
111 	Sym *s;
112 
113 	s = lookup(p, 0);
114 	if(s->type == 0 || s->type == SXREF) {
115 		s->type = t;
116 		s->value = v;
117 	}
118 	if(s->type == STEXT && s->value == 0)
119 		s->value = v;
120 }
121 
122 void
123 putsymb(char *s, int t, long v, int ver)
124 {
125 	int i, f;
126 
127 	if(t == 'f')
128 		s++;
129 	lput(v);
130 	if(ver)
131 		t += 'a' - 'A';
132 	cput(t+0x80);			/* 0x80 is variable length */
133 
134 	if(t == 'Z' || t == 'z') {
135 		cput(s[0]);
136 		for(i=1; s[i] != 0 || s[i+1] != 0; i += 2) {
137 			cput(s[i]);
138 			cput(s[i+1]);
139 		}
140 		cput(0);
141 		cput(0);
142 		i++;
143 	}
144 	else {
145 		for(i=0; s[i]; i++)
146 			cput(s[i]);
147 		cput(0);
148 	}
149 	symsize += 4 + 1 + i + 1;
150 
151 	if(debug['n']) {
152 		if(t == 'z' || t == 'Z') {
153 			Bprint(&bso, "%c %.8lux ", t, v);
154 			for(i=1; s[i] != 0 || s[i+1] != 0; i+=2) {
155 				f = ((s[i]&0xff) << 8) | (s[i+1]&0xff);
156 				Bprint(&bso, "/%x", f);
157 			}
158 			Bprint(&bso, "\n");
159 			return;
160 		}
161 		if(ver)
162 			Bprint(&bso, "%c %.8lux %s<%d>\n", t, v, s, ver);
163 		else
164 			Bprint(&bso, "%c %.8lux %s\n", t, v, s);
165 	}
166 }
167 
168 void
169 asmsym(void)
170 {
171 	Prog *p;
172 	Auto *a;
173 	Sym *s;
174 	int h;
175 
176 	s = lookup("etext", 0);
177 	if(s->type == STEXT)
178 		putsymb(s->name, 'T', s->value, s->version);
179 
180 	for(h=0; h<NHASH; h++)
181 		for(s=hash[h]; s!=S; s=s->link)
182 			switch(s->type) {
183 			case SCONST:
184 				putsymb(s->name, 'D', s->value, s->version);
185 				continue;
186 
187 			case SDATA:
188 				putsymb(s->name, 'D', s->value+INITDAT, s->version);
189 				continue;
190 
191 			case SBSS:
192 				putsymb(s->name, 'B', s->value+INITDAT, s->version);
193 				continue;
194 
195 			case SFILE:
196 				putsymb(s->name, 'f', s->value, s->version);
197 				continue;
198 			}
199 
200 	for(p=textp; p!=P; p=p->pcond) {
201 		s = p->from.sym;
202 		if(s->type != STEXT)
203 			continue;
204 
205 		/* filenames first */
206 		for(a=p->to.autom; a; a=a->link)
207 			if(a->type == D_FILE)
208 				putsymb(a->asym->name, 'z', a->aoffset, 0);
209 			else
210 			if(a->type == D_FILE1)
211 				putsymb(a->asym->name, 'Z', a->aoffset, 0);
212 
213 		putsymb(s->name, 'T', s->value, s->version);
214 
215 		/* frame, auto and param after */
216 		putsymb(".frame", 'm', p->to.offset+4, 0);
217 
218 		for(a=p->to.autom; a; a=a->link)
219 			if(a->type == D_AUTO)
220 				putsymb(a->asym->name, 'a', -a->aoffset, 0);
221 			else
222 			if(a->type == D_PARAM)
223 				putsymb(a->asym->name, 'p', a->aoffset, 0);
224 	}
225 	if(debug['v'] || debug['n'])
226 		Bprint(&bso, "symsize = %lud\n", symsize);
227 	Bflush(&bso);
228 }
229 
230 void
231 asmlc(void)
232 {
233 	long oldpc, oldlc;
234 	Prog *p;
235 	long v, s;
236 
237 	oldpc = INITTEXT;
238 	oldlc = 0;
239 	for(p = firstp; p != P; p = p->link) {
240 		if(p->line == oldlc || p->as == ATEXT || p->as == ANOP) {
241 			if(p->as == ATEXT)
242 				curtext = p;
243 			if(debug['L'])
244 				Bprint(&bso, "%6llux %P\n",
245 					p->pc, p);
246 			continue;
247 		}
248 		if(debug['L'])
249 			Bprint(&bso, "\t\t%6ld", lcsize);
250 		v = (p->pc - oldpc) / MINLC;
251 		while(v) {
252 			s = 127;
253 			if(v < 127)
254 				s = v;
255 			cput(s+128);	/* 129-255 +pc */
256 			if(debug['L'])
257 				Bprint(&bso, " pc+%ld*%d(%ld)", s, MINLC, s+128);
258 			v -= s;
259 			lcsize++;
260 		}
261 		s = p->line - oldlc;
262 		oldlc = p->line;
263 		oldpc = p->pc + MINLC;
264 		if(s > 64 || s < -64) {
265 			cput(0);	/* 0 vv +lc */
266 			cput(s>>24);
267 			cput(s>>16);
268 			cput(s>>8);
269 			cput(s);
270 			if(debug['L']) {
271 				if(s > 0)
272 					Bprint(&bso, " lc+%ld(%d,%ld)\n",
273 						s, 0, s);
274 				else
275 					Bprint(&bso, " lc%ld(%d,%ld)\n",
276 						s, 0, s);
277 				Bprint(&bso, "%6llux %P\n",
278 					p->pc, p);
279 			}
280 			lcsize += 5;
281 			continue;
282 		}
283 		if(s > 0) {
284 			cput(0+s);	/* 1-64 +lc */
285 			if(debug['L']) {
286 				Bprint(&bso, " lc+%ld(%ld)\n", s, 0+s);
287 				Bprint(&bso, "%6llux %P\n",
288 					p->pc, p);
289 			}
290 		} else {
291 			cput(64-s);	/* 65-128 -lc */
292 			if(debug['L']) {
293 				Bprint(&bso, " lc%ld(%ld)\n", s, 64-s);
294 				Bprint(&bso, "%6llux %P\n",
295 					p->pc, p);
296 			}
297 		}
298 		lcsize++;
299 	}
300 	while(lcsize & 1) {
301 		s = 129;
302 		cput(s);
303 		lcsize++;
304 	}
305 	if(debug['v'] || debug['L'])
306 		Bprint(&bso, "lcsize = %ld\n", lcsize);
307 	Bflush(&bso);
308 }
309 
310 int
311 oclass(Adr *a)
312 {
313 	vlong v;
314 	long l;
315 
316 	if(a->type >= D_INDIR || a->index != D_NONE) {
317 		if(a->index != D_NONE && a->scale == 0) {
318 			if(a->type == D_ADDR) {
319 				switch(a->index) {
320 				case D_EXTERN:
321 				case D_STATIC:
322 					return Yi32;	/* TO DO: Yi64 */
323 				case D_AUTO:
324 				case D_PARAM:
325 					return Yiauto;
326 				}
327 				return Yxxx;
328 			}
329 			return Ycol;
330 		}
331 		return Ym;
332 	}
333 	switch(a->type)
334 	{
335 	case D_AL:
336 		return Yal;
337 
338 	case D_AX:
339 		return Yax;
340 
341 /*
342 	case D_SPB:
343 */
344 	case D_BPB:
345 	case D_SIB:
346 	case D_DIB:
347 	case D_R8B:
348 	case D_R9B:
349 	case D_R10B:
350 	case D_R11B:
351 	case D_R12B:
352 	case D_R13B:
353 	case D_R14B:
354 	case D_R15B:
355 		if(asmode != 64)
356 			return Yxxx;
357 	case D_DL:
358 	case D_BL:
359 	case D_AH:
360 	case D_CH:
361 	case D_DH:
362 	case D_BH:
363 		return Yrb;
364 
365 	case D_CL:
366 		return Ycl;
367 
368 	case D_CX:
369 		return Ycx;
370 
371 	case D_DX:
372 	case D_BX:
373 		return Yrx;
374 
375 	case D_R8:	/* not really Yrl */
376 	case D_R9:
377 	case D_R10:
378 	case D_R11:
379 	case D_R12:
380 	case D_R13:
381 	case D_R14:
382 	case D_R15:
383 		if(asmode != 64)
384 			return Yxxx;
385 	case D_SP:
386 	case D_BP:
387 	case D_SI:
388 	case D_DI:
389 		return Yrl;
390 
391 	case D_F0+0:
392 		return	Yf0;
393 
394 	case D_F0+1:
395 	case D_F0+2:
396 	case D_F0+3:
397 	case D_F0+4:
398 	case D_F0+5:
399 	case D_F0+6:
400 	case D_F0+7:
401 		return	Yrf;
402 
403 	case D_M0+0:
404 	case D_M0+1:
405 	case D_M0+2:
406 	case D_M0+3:
407 	case D_M0+4:
408 	case D_M0+5:
409 	case D_M0+6:
410 	case D_M0+7:
411 		return	Ymr;
412 
413 	case D_X0+0:
414 	case D_X0+1:
415 	case D_X0+2:
416 	case D_X0+3:
417 	case D_X0+4:
418 	case D_X0+5:
419 	case D_X0+6:
420 	case D_X0+7:
421 	case D_X0+8:
422 	case D_X0+9:
423 	case D_X0+10:
424 	case D_X0+11:
425 	case D_X0+12:
426 	case D_X0+13:
427 	case D_X0+14:
428 	case D_X0+15:
429 		return	Yxr;
430 
431 	case D_NONE:
432 		return Ynone;
433 
434 	case D_CS:	return	Ycs;
435 	case D_SS:	return	Yss;
436 	case D_DS:	return	Yds;
437 	case D_ES:	return	Yes;
438 	case D_FS:	return	Yfs;
439 	case D_GS:	return	Ygs;
440 
441 	case D_GDTR:	return	Ygdtr;
442 	case D_IDTR:	return	Yidtr;
443 	case D_LDTR:	return	Yldtr;
444 	case D_MSW:	return	Ymsw;
445 	case D_TASK:	return	Ytask;
446 
447 	case D_CR+0:	return	Ycr0;
448 	case D_CR+1:	return	Ycr1;
449 	case D_CR+2:	return	Ycr2;
450 	case D_CR+3:	return	Ycr3;
451 	case D_CR+4:	return	Ycr4;
452 	case D_CR+5:	return	Ycr5;
453 	case D_CR+6:	return	Ycr6;
454 	case D_CR+7:	return	Ycr7;
455 	case D_CR+8:	return	Ycr8;
456 
457 	case D_DR+0:	return	Ydr0;
458 	case D_DR+1:	return	Ydr1;
459 	case D_DR+2:	return	Ydr2;
460 	case D_DR+3:	return	Ydr3;
461 	case D_DR+4:	return	Ydr4;
462 	case D_DR+5:	return	Ydr5;
463 	case D_DR+6:	return	Ydr6;
464 	case D_DR+7:	return	Ydr7;
465 
466 	case D_TR+0:	return	Ytr0;
467 	case D_TR+1:	return	Ytr1;
468 	case D_TR+2:	return	Ytr2;
469 	case D_TR+3:	return	Ytr3;
470 	case D_TR+4:	return	Ytr4;
471 	case D_TR+5:	return	Ytr5;
472 	case D_TR+6:	return	Ytr6;
473 	case D_TR+7:	return	Ytr7;
474 
475 	case D_EXTERN:
476 	case D_STATIC:
477 	case D_AUTO:
478 	case D_PARAM:
479 		return Ym;
480 
481 	case D_CONST:
482 	case D_ADDR:
483 		if(a->sym == S) {
484 			v = a->offset;
485 			if(v == 0)
486 				return Yi0;
487 			if(v == 1)
488 				return Yi1;
489 			if(v >= -128 && v <= 127)
490 				return Yi8;
491 			l = v;
492 			if((vlong)l == v)
493 				return Ys32;	/* can sign extend */
494 			if((v>>32) == 0)
495 				return Yi32;	/* unsigned */
496 			return Yi64;
497 		}
498 		return Yi32;	/* TO DO: D_ADDR as Yi64 */
499 
500 	case D_BRANCH:
501 		return Ybr;
502 	}
503 	return Yxxx;
504 }
505 
506 void
507 asmidx(Adr *a, int base)
508 {
509 	int i;
510 
511 	switch(a->index) {
512 	default:
513 		goto bad;
514 
515 	case D_NONE:
516 		i = 4 << 3;
517 		goto bas;
518 
519 	case D_R8:
520 	case D_R9:
521 	case D_R10:
522 	case D_R11:
523 	case D_R12:
524 	case D_R13:
525 	case D_R14:
526 	case D_R15:
527 		if(asmode != 64)
528 			goto bad;
529 	case D_AX:
530 	case D_CX:
531 	case D_DX:
532 	case D_BX:
533 	case D_BP:
534 	case D_SI:
535 	case D_DI:
536 		i = reg[a->index] << 3;
537 		break;
538 	}
539 	switch(a->scale) {
540 	default:
541 		goto bad;
542 	case 1:
543 		break;
544 	case 2:
545 		i |= (1<<6);
546 		break;
547 	case 4:
548 		i |= (2<<6);
549 		break;
550 	case 8:
551 		i |= (3<<6);
552 		break;
553 	}
554 bas:
555 	switch(base) {
556 	default:
557 		goto bad;
558 	case D_NONE:	/* must be mod=00 */
559 		i |= 5;
560 		break;
561 	case D_R8:
562 	case D_R9:
563 	case D_R10:
564 	case D_R11:
565 	case D_R12:
566 	case D_R13:
567 	case D_R14:
568 	case D_R15:
569 		if(asmode != 64)
570 			goto bad;
571 	case D_AX:
572 	case D_CX:
573 	case D_DX:
574 	case D_BX:
575 	case D_SP:
576 	case D_BP:
577 	case D_SI:
578 	case D_DI:
579 		i |= reg[base];
580 		break;
581 	}
582 	*andptr++ = i;
583 	return;
584 bad:
585 	diag("asmidx: bad address %D", a);
586 	*andptr++ = 0;
587 	return;
588 }
589 
590 static void
591 put4(long v)
592 {
593 	if(dlm && curp != P && reloca != nil){
594 		dynreloc(reloca->sym, curp->pc + andptr - &and[0], 1);
595 		reloca = nil;
596 	}
597 	andptr[0] = v;
598 	andptr[1] = v>>8;
599 	andptr[2] = v>>16;
600 	andptr[3] = v>>24;
601 	andptr += 4;
602 }
603 
604 static void
605 put8(vlong v)
606 {
607 	if(dlm && curp != P && reloca != nil){
608 		dynreloc(reloca->sym, curp->pc + andptr - &and[0], 1);	/* TO DO */
609 		reloca = nil;
610 	}
611 	andptr[0] = v;
612 	andptr[1] = v>>8;
613 	andptr[2] = v>>16;
614 	andptr[3] = v>>24;
615 	andptr[4] = v>>32;
616 	andptr[5] = v>>40;
617 	andptr[6] = v>>48;
618 	andptr[7] = v>>56;
619 	andptr += 8;
620 }
621 
622 vlong
623 vaddr(Adr *a)
624 {
625 	int t;
626 	vlong v;
627 	Sym *s;
628 
629 	t = a->type;
630 	v = a->offset;
631 	if(t == D_ADDR)
632 		t = a->index;
633 	switch(t) {
634 	case D_STATIC:
635 	case D_EXTERN:
636 		s = a->sym;
637 		if(s != nil) {
638 			if(dlm && curp != P)
639 				reloca = a;
640 			switch(s->type) {
641 			case SUNDEF:
642 				ckoff(s, v);
643 			case STEXT:
644 			case SCONST:
645 				if((uvlong)s->value < (uvlong)INITTEXT)
646 					v += INITTEXT;	/* TO DO */
647 				v += s->value;
648 				break;
649 			default:
650 				v += INITDAT + s->value;
651 			}
652 		}
653 	}
654 	return v;
655 }
656 
657 static void
658 asmandsz(Adr *a, int r, int rex, int m64)
659 {
660 	long v;
661 	int t;
662 	Adr aa;
663 
664 	rex &= (0x40 | Rxr);
665 	v = a->offset;
666 	t = a->type;
667 	if(a->index != D_NONE) {
668 		if(t >= D_INDIR) {
669 			t -= D_INDIR;
670 			rexflag |= (regrex[a->index] & Rxx) | (regrex[t] & Rxb) | rex;
671 			if(t == D_NONE) {
672 				*andptr++ = (0 << 6) | (4 << 0) | (r << 3);
673 				asmidx(a, t);
674 				put4(v);
675 				return;
676 			}
677 			if(v == 0 && t != D_BP && t != D_R13) {
678 				*andptr++ = (0 << 6) | (4 << 0) | (r << 3);
679 				asmidx(a, t);
680 				return;
681 			}
682 			if(v >= -128 && v < 128) {
683 				*andptr++ = (1 << 6) | (4 << 0) | (r << 3);
684 				asmidx(a, t);
685 				*andptr++ = v;
686 				return;
687 			}
688 			*andptr++ = (2 << 6) | (4 << 0) | (r << 3);
689 			asmidx(a, t);
690 			put4(v);
691 			return;
692 		}
693 		switch(t) {
694 		default:
695 			goto bad;
696 		case D_STATIC:
697 		case D_EXTERN:
698 			aa.type = D_NONE+D_INDIR;
699 			break;
700 		case D_AUTO:
701 		case D_PARAM:
702 			aa.type = D_SP+D_INDIR;
703 			break;
704 		}
705 		aa.offset = vaddr(a);
706 		aa.index = a->index;
707 		aa.scale = a->scale;
708 		asmandsz(&aa, r, rex, m64);
709 		return;
710 	}
711 	if(t >= D_AL && t <= D_X0+15) {
712 		if(v)
713 			goto bad;
714 		*andptr++ = (3 << 6) | (reg[t] << 0) | (r << 3);
715 		rexflag |= (regrex[t] & (0x40 | Rxb)) | rex;
716 		return;
717 	}
718 	if(t >= D_INDIR) {
719 		t -= D_INDIR;
720 		rexflag |= (regrex[t] & Rxb) | rex;
721 		if(t == D_NONE) {
722 			if(asmode != 64){
723 				*andptr++ = (0 << 6) | (5 << 0) | (r << 3);
724 				put4(v);
725 				return;
726 			}
727 			/* temporary */
728 			*andptr++ = (0 <<  6) | (4 << 0) | (r << 3);	/* sib present */
729 			*andptr++ = (0 << 6) | (4 << 3) | (5 << 0);	/* DS:d32 */
730 			put4(v);
731 			return;
732 		}
733 		if(t == D_SP || t == D_R12) {
734 			if(v == 0) {
735 				*andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
736 				asmidx(a, t);
737 				return;
738 			}
739 			if(v >= -128 && v < 128) {
740 				*andptr++ = (1 << 6) | (reg[t] << 0) | (r << 3);
741 				asmidx(a, t);
742 				*andptr++ = v;
743 				return;
744 			}
745 			*andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
746 			asmidx(a, t);
747 			put4(v);
748 			return;
749 		}
750 		if(t >= D_AX && t <= D_R15) {
751 			if(v == 0 && t != D_BP && t != D_R13) {
752 				*andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
753 				return;
754 			}
755 			if(v >= -128 && v < 128) {
756 				andptr[0] = (1 << 6) | (reg[t] << 0) | (r << 3);
757 				andptr[1] = v;
758 				andptr += 2;
759 				return;
760 			}
761 			*andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
762 			put4(v);
763 			return;
764 		}
765 		goto bad;
766 	}
767 	switch(a->type) {
768 	default:
769 		goto bad;
770 	case D_STATIC:
771 	case D_EXTERN:
772 		aa.type = D_NONE+D_INDIR;
773 		break;
774 	case D_AUTO:
775 	case D_PARAM:
776 		aa.type = D_SP+D_INDIR;
777 		break;
778 	}
779 	aa.index = D_NONE;
780 	aa.scale = 1;
781 	aa.offset = vaddr(a);
782 	asmandsz(&aa, r, rex, m64);
783 	return;
784 bad:
785 	diag("asmand: bad address %D", a);
786 	return;
787 }
788 
789 void
790 asmand(Adr *a, Adr *ra)
791 {
792 	asmandsz(a, reg[ra->type], regrex[ra->type], 0);
793 }
794 
795 void
796 asmando(Adr *a, int o)
797 {
798 	asmandsz(a, o, 0, 0);
799 }
800 
801 static void
802 bytereg(Adr *a)
803 {
804 	if(a->index == D_NONE && (a->type >= D_AX && a->type <= D_R15))
805 		a->type = D_AL + (a->type-D_AX);
806 }
807 
808 #define	E	0xff
809 Movtab	ymovtab[] =
810 {
811 /* push */
812 	{APUSHL,	Ycs,	Ynone,	0,	0x0e,E,0,0},
813 	{APUSHL,	Yss,	Ynone,	0,	0x16,E,0,0},
814 	{APUSHL,	Yds,	Ynone,	0,	0x1e,E,0,0},
815 	{APUSHL,	Yes,	Ynone,	0,	0x06,E,0,0},
816 	{APUSHL,	Yfs,	Ynone,	0,	0x0f,0xa0,E,0},
817 	{APUSHL,	Ygs,	Ynone,	0,	0x0f,0xa8,E,0},
818 	{APUSHQ,	Yfs,	Ynone,	0,	0x0f,0xa0,E,0},
819 	{APUSHQ,	Ygs,	Ynone,	0,	0x0f,0xa8,E,0},
820 
821 	{APUSHW,	Ycs,	Ynone,	0,	Pe,0x0e,E,0},
822 	{APUSHW,	Yss,	Ynone,	0,	Pe,0x16,E,0},
823 	{APUSHW,	Yds,	Ynone,	0,	Pe,0x1e,E,0},
824 	{APUSHW,	Yes,	Ynone,	0,	Pe,0x06,E,0},
825 	{APUSHW,	Yfs,	Ynone,	0,	Pe,0x0f,0xa0,E},
826 	{APUSHW,	Ygs,	Ynone,	0,	Pe,0x0f,0xa8,E},
827 
828 /* pop */
829 	{APOPL,	Ynone,	Yds,	0,	0x1f,E,0,0},
830 	{APOPL,	Ynone,	Yes,	0,	0x07,E,0,0},
831 	{APOPL,	Ynone,	Yss,	0,	0x17,E,0,0},
832 	{APOPL,	Ynone,	Yfs,	0,	0x0f,0xa1,E,0},
833 	{APOPL,	Ynone,	Ygs,	0,	0x0f,0xa9,E,0},
834 	{APOPQ,	Ynone,	Yfs,	0,	0x0f,0xa1,E,0},
835 	{APOPQ,	Ynone,	Ygs,	0,	0x0f,0xa9,E,0},
836 
837 	{APOPW,	Ynone,	Yds,	0,	Pe,0x1f,E,0},
838 	{APOPW,	Ynone,	Yes,	0,	Pe,0x07,E,0},
839 	{APOPW,	Ynone,	Yss,	0,	Pe,0x17,E,0},
840 	{APOPW,	Ynone,	Yfs,	0,	Pe,0x0f,0xa1,E},
841 	{APOPW,	Ynone,	Ygs,	0,	Pe,0x0f,0xa9,E},
842 
843 /* mov seg */
844 	{AMOVW,	Yes,	Yml,	1,	0x8c,0,0,0},
845 	{AMOVW,	Ycs,	Yml,	1,	0x8c,1,0,0},
846 	{AMOVW,	Yss,	Yml,	1,	0x8c,2,0,0},
847 	{AMOVW,	Yds,	Yml,	1,	0x8c,3,0,0},
848 	{AMOVW,	Yfs,	Yml,	1,	0x8c,4,0,0},
849 	{AMOVW,	Ygs,	Yml,	1,	0x8c,5,0,0},
850 
851 	{AMOVW,	Yml,	Yes,	2,	0x8e,0,0,0},
852 	{AMOVW,	Yml,	Ycs,	2,	0x8e,1,0,0},
853 	{AMOVW,	Yml,	Yss,	2,	0x8e,2,0,0},
854 	{AMOVW,	Yml,	Yds,	2,	0x8e,3,0,0},
855 	{AMOVW,	Yml,	Yfs,	2,	0x8e,4,0,0},
856 	{AMOVW,	Yml,	Ygs,	2,	0x8e,5,0,0},
857 
858 /* mov cr */
859 	{AMOVL,	Ycr0,	Yml,	3,	0x0f,0x20,0,0},
860 	{AMOVL,	Ycr2,	Yml,	3,	0x0f,0x20,2,0},
861 	{AMOVL,	Ycr3,	Yml,	3,	0x0f,0x20,3,0},
862 	{AMOVL,	Ycr4,	Yml,	3,	0x0f,0x20,4,0},
863 	{AMOVL,	Ycr8,	Yml,	3,	0x0f,0x20,8,0},
864 	{AMOVQ,	Ycr0,	Yml,	3,	0x0f,0x20,0,0},
865 	{AMOVQ,	Ycr2,	Yml,	3,	0x0f,0x20,2,0},
866 	{AMOVQ,	Ycr3,	Yml,	3,	0x0f,0x20,3,0},
867 	{AMOVQ,	Ycr4,	Yml,	3,	0x0f,0x20,4,0},
868 	{AMOVQ,	Ycr8,	Yml,	3,	0x0f,0x20,8,0},
869 
870 	{AMOVL,	Yml,	Ycr0,	4,	0x0f,0x22,0,0},
871 	{AMOVL,	Yml,	Ycr2,	4,	0x0f,0x22,2,0},
872 	{AMOVL,	Yml,	Ycr3,	4,	0x0f,0x22,3,0},
873 	{AMOVL,	Yml,	Ycr4,	4,	0x0f,0x22,4,0},
874 	{AMOVL,	Yml,	Ycr8,	4,	0x0f,0x22,8,0},
875 	{AMOVQ,	Yml,	Ycr0,	4,	0x0f,0x22,0,0},
876 	{AMOVQ,	Yml,	Ycr2,	4,	0x0f,0x22,2,0},
877 	{AMOVQ,	Yml,	Ycr3,	4,	0x0f,0x22,3,0},
878 	{AMOVQ,	Yml,	Ycr4,	4,	0x0f,0x22,4,0},
879 	{AMOVQ,	Yml,	Ycr8,	4,	0x0f,0x22,8,0},
880 
881 /* mov dr */
882 	{AMOVL,	Ydr0,	Yml,	3,	0x0f,0x21,0,0},
883 	{AMOVL,	Ydr6,	Yml,	3,	0x0f,0x21,6,0},
884 	{AMOVL,	Ydr7,	Yml,	3,	0x0f,0x21,7,0},
885 	{AMOVQ,	Ydr0,	Yml,	3,	0x0f,0x21,0,0},
886 	{AMOVQ,	Ydr6,	Yml,	3,	0x0f,0x21,6,0},
887 	{AMOVQ,	Ydr7,	Yml,	3,	0x0f,0x21,7,0},
888 
889 	{AMOVL,	Yml,	Ydr0,	4,	0x0f,0x23,0,0},
890 	{AMOVL,	Yml,	Ydr6,	4,	0x0f,0x23,6,0},
891 	{AMOVL,	Yml,	Ydr7,	4,	0x0f,0x23,7,0},
892 	{AMOVQ,	Yml,	Ydr0,	4,	0x0f,0x23,0,0},
893 	{AMOVQ,	Yml,	Ydr6,	4,	0x0f,0x23,6,0},
894 	{AMOVQ,	Yml,	Ydr7,	4,	0x0f,0x23,7,0},
895 
896 /* mov tr */
897 	{AMOVL,	Ytr6,	Yml,	3,	0x0f,0x24,6,0},
898 	{AMOVL,	Ytr7,	Yml,	3,	0x0f,0x24,7,0},
899 
900 	{AMOVL,	Yml,	Ytr6,	4,	0x0f,0x26,6,E},
901 	{AMOVL,	Yml,	Ytr7,	4,	0x0f,0x26,7,E},
902 
903 /* lgdt, sgdt, lidt, sidt */
904 	{AMOVL,	Ym,	Ygdtr,	4,	0x0f,0x01,2,0},
905 	{AMOVL,	Ygdtr,	Ym,	3,	0x0f,0x01,0,0},
906 	{AMOVL,	Ym,	Yidtr,	4,	0x0f,0x01,3,0},
907 	{AMOVL,	Yidtr,	Ym,	3,	0x0f,0x01,1,0},
908 	{AMOVQ,	Ym,	Ygdtr,	4,	0x0f,0x01,2,0},
909 	{AMOVQ,	Ygdtr,	Ym,	3,	0x0f,0x01,0,0},
910 	{AMOVQ,	Ym,	Yidtr,	4,	0x0f,0x01,3,0},
911 	{AMOVQ,	Yidtr,	Ym,	3,	0x0f,0x01,1,0},
912 
913 /* lldt, sldt */
914 	{AMOVW,	Yml,	Yldtr,	4,	0x0f,0x00,2,0},
915 	{AMOVW,	Yldtr,	Yml,	3,	0x0f,0x00,0,0},
916 
917 /* lmsw, smsw */
918 	{AMOVW,	Yml,	Ymsw,	4,	0x0f,0x01,6,0},
919 	{AMOVW,	Ymsw,	Yml,	3,	0x0f,0x01,4,0},
920 
921 /* ltr, str */
922 	{AMOVW,	Yml,	Ytask,	4,	0x0f,0x00,3,0},
923 	{AMOVW,	Ytask,	Yml,	3,	0x0f,0x00,1,0},
924 
925 /* load full pointer */
926 	{AMOVL,	Yml,	Ycol,	5,	0,0,0,0},
927 	{AMOVW,	Yml,	Ycol,	5,	Pe,0,0,0},
928 
929 /* double shift */
930 	{ASHLL,	Ycol,	Yml,	6,	0xa4,0xa5,0,0},
931 	{ASHRL,	Ycol,	Yml,	6,	0xac,0xad,0,0},
932 	{ASHLQ,	Ycol,	Yml,	6,	Pw,0xa4,0xa5,0},
933 	{ASHRQ,	Ycol,	Yml,	6,	Pw,0xac,0xad,0},
934 	{ASHLW,	Ycol,	Yml,	6,	Pe,0xa4,0xa5,0},
935 	{ASHRW,	Ycol,	Yml,	6,	Pe,0xac,0xad,0},
936 	0
937 };
938 
939 int
940 isax(Adr *a)
941 {
942 
943 	switch(a->type) {
944 	case D_AX:
945 	case D_AL:
946 	case D_AH:
947 	case D_INDIR+D_AX:
948 		return 1;
949 	}
950 	if(a->index == D_AX)
951 		return 1;
952 	return 0;
953 }
954 
955 void
956 subreg(Prog *p, int from, int to)
957 {
958 
959 	if(debug['Q'])
960 		print("\n%P	s/%R/%R/\n", p, from, to);
961 
962 	if(p->from.type == from)
963 		p->from.type = to;
964 	if(p->to.type == from)
965 		p->to.type = to;
966 
967 	if(p->from.index == from)
968 		p->from.index = to;
969 	if(p->to.index == from)
970 		p->to.index = to;
971 
972 	from += D_INDIR;
973 	if(p->from.type == from)
974 		p->from.type = to+D_INDIR;
975 	if(p->to.type == from)
976 		p->to.type = to+D_INDIR;
977 
978 	if(debug['Q'])
979 		print("%P\n", p);
980 }
981 
982 static int
983 mediaop(Optab *o, int op, int osize, int z)
984 {
985 	switch(op){
986 	case Pm:
987 	case Pe:
988 	case Pf2:
989 	case Pf3:
990 		if(osize != 1){
991 			if(op != Pm)
992 				*andptr++ = op;
993 			*andptr++ = Pm;
994 			op = o->op[++z];
995 			break;
996 		}
997 	default:
998 		if(andptr == and || andptr[-1] != Pm)
999 			*andptr++ = Pm;
1000 		break;
1001 	}
1002 	*andptr++ = op;
1003 	return z;
1004 }
1005 
1006 void
1007 doasm(Prog *p)
1008 {
1009 	Optab *o;
1010 	Prog *q, pp;
1011 	uchar *t;
1012 	Movtab *mo;
1013 	int z, op, ft, tt, xo, l;
1014 	vlong v;
1015 
1016 	o = opindex[p->as];
1017 	if(o == nil) {
1018 		diag("asmins: missing op %P", p);
1019 		return;
1020 	}
1021 	ft = oclass(&p->from) * Ymax;
1022 	tt = oclass(&p->to) * Ymax;
1023 	t = o->ytab;
1024 	if(t == 0) {
1025 		diag("asmins: noproto %P", p);
1026 		return;
1027 	}
1028 	xo = o->op[0] == 0x0f;
1029 	for(z=0; *t; z+=t[3]+xo,t+=4)
1030 		if(ycover[ft+t[0]])
1031 		if(ycover[tt+t[1]])
1032 			goto found;
1033 	goto domov;
1034 
1035 found:
1036 	switch(o->prefix) {
1037 	case Pq:	/* 16 bit escape and opcode escape */
1038 		*andptr++ = Pe;
1039 		*andptr++ = Pm;
1040 		break;
1041 
1042 	case Pf2:	/* xmm opcode escape */
1043 	case Pf3:
1044 		*andptr++ = o->prefix;
1045 		*andptr++ = Pm;
1046 		break;
1047 
1048 	case Pm:	/* opcode escape */
1049 		*andptr++ = Pm;
1050 		break;
1051 
1052 	case Pe:	/* 16 bit escape */
1053 		*andptr++ = Pe;
1054 		break;
1055 
1056 	case Pw:	/* 64-bit escape */
1057 		if(p->mode != 64)
1058 			diag("asmins: illegal 64: %P", p);
1059 		rexflag |= Pw;
1060 		break;
1061 
1062 	case Pb:	/* botch */
1063 		bytereg(&p->from);
1064 		bytereg(&p->to);
1065 		break;
1066 
1067 	case P32:	/* 32 bit but illegal if 64-bit mode */
1068 		if(p->mode == 64)
1069 			diag("asmins: illegal in 64-bit mode: %P", p);
1070 		break;
1071 
1072 	case Py:	/* 64-bit only, no prefix */
1073 		if(p->mode != 64)
1074 			diag("asmins: illegal in %d-bit mode: %P", p->mode, p);
1075 		break;
1076 	}
1077 	v = vaddr(&p->from);
1078 	op = o->op[z];
1079 	if(op == 0x0f) {
1080 		*andptr++ = op;
1081 		op = o->op[++z];
1082 	}
1083 	switch(t[2]) {
1084 	default:
1085 		diag("asmins: unknown z %d %P", t[2], p);
1086 		return;
1087 
1088 	case Zpseudo:
1089 		break;
1090 
1091 	case Zlit:
1092 		for(; op = o->op[z]; z++)
1093 			*andptr++ = op;
1094 		break;
1095 
1096 	case Zmb_r:
1097 		bytereg(&p->from);
1098 		/* fall through */
1099 	case Zm_r:
1100 		*andptr++ = op;
1101 		asmand(&p->from, &p->to);
1102 		break;
1103 
1104 	case Zm_r_xm:
1105 		mediaop(o, op, t[3], z);
1106 		asmand(&p->from, &p->to);
1107 		break;
1108 
1109 	case Zm_r_xm_nr:
1110 		rexflag = 0;
1111 		mediaop(o, op, t[3], z);
1112 		asmand(&p->from, &p->to);
1113 		break;
1114 
1115 	case Zm_r_i_xm:
1116 		mediaop(o, op, t[3], z);
1117 		asmand(&p->from, &p->to);
1118 		*andptr++ = p->to.offset;
1119 		break;
1120 
1121 	case Zm_r_3d:
1122 		*andptr++ = 0x0f;
1123 		*andptr++ = 0x0f;
1124 		asmand(&p->from, &p->to);
1125 		*andptr++ = op;
1126 		break;
1127 
1128 	case Zibm_r:
1129 		*andptr++ = op;
1130 		asmand(&p->from, &p->to);
1131 		*andptr++ = p->to.offset;
1132 		break;
1133 
1134 	case Zaut_r:
1135 		*andptr++ = 0x8d;	/* leal */
1136 		if(p->from.type != D_ADDR)
1137 			diag("asmins: Zaut sb type ADDR");
1138 		p->from.type = p->from.index;
1139 		p->from.index = D_NONE;
1140 		asmand(&p->from, &p->to);
1141 		p->from.index = p->from.type;
1142 		p->from.type = D_ADDR;
1143 		break;
1144 
1145 	case Zm_o:
1146 		*andptr++ = op;
1147 		asmando(&p->from, o->op[z+1]);
1148 		break;
1149 
1150 	case Zr_m:
1151 		*andptr++ = op;
1152 		asmand(&p->to, &p->from);
1153 		break;
1154 
1155 	case Zr_m_xm:
1156 		mediaop(o, op, t[3], z);
1157 		asmand(&p->to, &p->from);
1158 		break;
1159 
1160 	case Zr_m_xm_nr:
1161 		rexflag = 0;
1162 		mediaop(o, op, t[3], z);
1163 		asmand(&p->to, &p->from);
1164 		break;
1165 
1166 	case Zr_m_i_xm:
1167 		mediaop(o, op, t[3], z);
1168 		asmand(&p->to, &p->from);
1169 		*andptr++ = p->from.offset;
1170 		break;
1171 
1172 	case Zo_m:
1173 		*andptr++ = op;
1174 		asmando(&p->to, o->op[z+1]);
1175 		break;
1176 
1177 	case Zo_m64:
1178 		*andptr++ = op;
1179 		asmandsz(&p->to, o->op[z+1], 0, 1);
1180 		break;
1181 
1182 	case Zm_ibo:
1183 		v = vaddr(&p->to);
1184 		*andptr++ = op;
1185 		asmando(&p->from, o->op[z+1]);
1186 		*andptr++ = v;
1187 		break;
1188 
1189 	case Zibo_m:
1190 		*andptr++ = op;
1191 		asmando(&p->to, o->op[z+1]);
1192 		*andptr++ = v;
1193 		break;
1194 
1195 	case Zibo_m_xm:
1196 		z = mediaop(o, op, t[3], z);
1197 		asmando(&p->to, o->op[z+1]);
1198 		*andptr++ = v;
1199 		break;
1200 
1201 	case Z_ib:
1202 		v = vaddr(&p->to);
1203 	case Zib_:
1204 		*andptr++ = op;
1205 		*andptr++ = v;
1206 		break;
1207 
1208 	case Zib_rp:
1209 		rexflag |= regrex[p->to.type] & (Rxb|0x40);
1210 		*andptr++ = op + reg[p->to.type];
1211 		*andptr++ = v;
1212 		break;
1213 
1214 	case Zil_rp:
1215 		rexflag |= regrex[p->to.type] & Rxb;
1216 		*andptr++ = op + reg[p->to.type];
1217 		if(o->prefix == Pe) {
1218 			*andptr++ = v;
1219 			*andptr++ = v>>8;
1220 		}
1221 		else
1222 			put4(v);
1223 		break;
1224 
1225 	case Zo_iw:
1226 		*andptr++ = op;
1227 		if(p->from.type != D_NONE){
1228 			*andptr++ = v;
1229 			*andptr++ = v>>8;
1230 		}
1231 		break;
1232 
1233 	case Ziq_rp:
1234 		l = v>>32;
1235 		if(l == 0){
1236 			//p->mark |= 0100;
1237 			//print("zero: %llux %P\n", v, p);
1238 			rexflag &= ~(0x40|Rxw);
1239 			rexflag |= regrex[p->to.type] & Rxb;
1240 			*andptr++ = 0xb8 + reg[p->to.type];
1241 			put4(v);
1242 		}else if(l == -1 && (v&((uvlong)1<<31))!=0){	/* sign extend */
1243 			//p->mark |= 0100;
1244 			//print("sign: %llux %P\n", v, p);
1245 			*andptr ++ = 0xc7;
1246 			asmando(&p->to, 0);
1247 			put4(v);
1248 		}else{	/* need all 8 */
1249 			//print("all: %llux %P\n", v, p);
1250 			rexflag |= regrex[p->to.type] & Rxb;
1251 			*andptr++ = op + reg[p->to.type];
1252 			put8(v);
1253 		}
1254 		break;
1255 
1256 	case Zib_rr:
1257 		*andptr++ = op;
1258 		asmand(&p->to, &p->to);
1259 		*andptr++ = v;
1260 		break;
1261 
1262 	case Z_il:
1263 		v = vaddr(&p->to);
1264 	case Zil_:
1265 		*andptr++ = op;
1266 		if(o->prefix == Pe) {
1267 			*andptr++ = v;
1268 			*andptr++ = v>>8;
1269 		}
1270 		else
1271 			put4(v);
1272 		break;
1273 
1274 	case Zm_ilo:
1275 		v = vaddr(&p->to);
1276 		*andptr++ = op;
1277 		asmando(&p->from, o->op[z+1]);
1278 		if(o->prefix == Pe) {
1279 			*andptr++ = v;
1280 			*andptr++ = v>>8;
1281 		}
1282 		else
1283 			put4(v);
1284 		break;
1285 
1286 	case Zilo_m:
1287 		*andptr++ = op;
1288 		asmando(&p->to, o->op[z+1]);
1289 		if(o->prefix == Pe) {
1290 			*andptr++ = v;
1291 			*andptr++ = v>>8;
1292 		}
1293 		else
1294 			put4(v);
1295 		break;
1296 
1297 	case Zil_rr:
1298 		*andptr++ = op;
1299 		asmand(&p->to, &p->to);
1300 		if(o->prefix == Pe) {
1301 			*andptr++ = v;
1302 			*andptr++ = v>>8;
1303 		}
1304 		else
1305 			put4(v);
1306 		break;
1307 
1308 	case Z_rp:
1309 		rexflag |= regrex[p->to.type] & (Rxb|0x40);
1310 		*andptr++ = op + reg[p->to.type];
1311 		break;
1312 
1313 	case Zrp_:
1314 		rexflag |= regrex[p->from.type] & (Rxb|0x40);
1315 		*andptr++ = op + reg[p->from.type];
1316 		break;
1317 
1318 	case Zclr:
1319 		*andptr++ = op;
1320 		asmand(&p->to, &p->to);
1321 		break;
1322 
1323 	case Zbr:
1324 		q = p->pcond;
1325 		if(q) {
1326 			v = q->pc - p->pc - 2;
1327 			if(v >= -128 && v <= 127) {
1328 				*andptr++ = op;
1329 				*andptr++ = v;
1330 			} else {
1331 				v -= 6-2;
1332 				*andptr++ = 0x0f;
1333 				*andptr++ = o->op[z+1];
1334 				*andptr++ = v;
1335 				*andptr++ = v>>8;
1336 				*andptr++ = v>>16;
1337 				*andptr++ = v>>24;
1338 			}
1339 		}
1340 		break;
1341 
1342 	case Zcall:
1343 		q = p->pcond;
1344 		if(q) {
1345 			v = q->pc - p->pc - 5;
1346 			if(dlm && curp != P && p->to.sym->type == SUNDEF){
1347 				/* v = 0 - p->pc - 5; */
1348 				v = 0;
1349 				ckoff(p->to.sym, v);
1350 				v += p->to.sym->value;
1351 				dynreloc(p->to.sym, p->pc+1, 0);
1352 			}
1353 			*andptr++ = op;
1354 			*andptr++ = v;
1355 			*andptr++ = v>>8;
1356 			*andptr++ = v>>16;
1357 			*andptr++ = v>>24;
1358 		}
1359 		break;
1360 
1361 	case Zjmp:
1362 		q = p->pcond;
1363 		if(q) {
1364 			v = q->pc - p->pc - 2;
1365 			if(v >= -128 && v <= 127) {
1366 				*andptr++ = op;
1367 				*andptr++ = v;
1368 			} else {
1369 				v -= 5-2;
1370 				*andptr++ = o->op[z+1];
1371 				*andptr++ = v;
1372 				*andptr++ = v>>8;
1373 				*andptr++ = v>>16;
1374 				*andptr++ = v>>24;
1375 			}
1376 		}
1377 		break;
1378 
1379 	case Zloop:
1380 		q = p->pcond;
1381 		if(q) {
1382 			v = q->pc - p->pc - 2;
1383 			if(v < -128 && v > 127)
1384 				diag("loop too far: %P", p);
1385 			*andptr++ = op;
1386 			*andptr++ = v;
1387 		}
1388 		break;
1389 
1390 	case Zbyte:
1391 		*andptr++ = v;
1392 		if(op > 1) {
1393 			*andptr++ = v>>8;
1394 			if(op > 2) {
1395 				*andptr++ = v>>16;
1396 				*andptr++ = v>>24;
1397 				if(op > 4) {
1398 					*andptr++ = v>>32;
1399 					*andptr++ = v>>40;
1400 					*andptr++ = v>>48;
1401 					*andptr++ = v>>56;
1402 				}
1403 			}
1404 		}
1405 		break;
1406 	}
1407 	return;
1408 
1409 domov:
1410 	for(mo=ymovtab; mo->as; mo++)
1411 		if(p->as == mo->as)
1412 		if(ycover[ft+mo->ft])
1413 		if(ycover[tt+mo->tt]){
1414 			t = mo->op;
1415 			goto mfound;
1416 		}
1417 bad:
1418 	if(p->mode != 64){
1419 		/*
1420 		 * here, the assembly has failed.
1421 		 * if its a byte instruction that has
1422 		 * unaddressable registers, try to
1423 		 * exchange registers and reissue the
1424 		 * instruction with the operands renamed.
1425 		 */
1426 		pp = *p;
1427 		z = p->from.type;
1428 		if(z >= D_BP && z <= D_DI) {
1429 			if(isax(&p->to)) {
1430 				*andptr++ = 0x87;			/* xchg lhs,bx */
1431 				asmando(&p->from, reg[D_BX]);
1432 				subreg(&pp, z, D_BX);
1433 				doasm(&pp);
1434 				*andptr++ = 0x87;			/* xchg lhs,bx */
1435 				asmando(&p->from, reg[D_BX]);
1436 			} else {
1437 				*andptr++ = 0x90 + reg[z];		/* xchg lsh,ax */
1438 				subreg(&pp, z, D_AX);
1439 				doasm(&pp);
1440 				*andptr++ = 0x90 + reg[z];		/* xchg lsh,ax */
1441 			}
1442 			return;
1443 		}
1444 		z = p->to.type;
1445 		if(z >= D_BP && z <= D_DI) {
1446 			if(isax(&p->from)) {
1447 				*andptr++ = 0x87;			/* xchg rhs,bx */
1448 				asmando(&p->to, reg[D_BX]);
1449 				subreg(&pp, z, D_BX);
1450 				doasm(&pp);
1451 				*andptr++ = 0x87;			/* xchg rhs,bx */
1452 				asmando(&p->to, reg[D_BX]);
1453 			} else {
1454 				*andptr++ = 0x90 + reg[z];		/* xchg rsh,ax */
1455 				subreg(&pp, z, D_AX);
1456 				doasm(&pp);
1457 				*andptr++ = 0x90 + reg[z];		/* xchg rsh,ax */
1458 			}
1459 			return;
1460 		}
1461 	}
1462 	diag("doasm: notfound from=%ux to=%ux %P", p->from.type, p->to.type, p);
1463 	return;
1464 
1465 mfound:
1466 	switch(mo->code) {
1467 	default:
1468 		diag("asmins: unknown mov %d %P", mo->code, p);
1469 		break;
1470 
1471 	case 0:	/* lit */
1472 		for(z=0; t[z]!=E; z++)
1473 			*andptr++ = t[z];
1474 		break;
1475 
1476 	case 1:	/* r,m */
1477 		*andptr++ = t[0];
1478 		asmando(&p->to, t[1]);
1479 		break;
1480 
1481 	case 2:	/* m,r */
1482 		*andptr++ = t[0];
1483 		asmando(&p->from, t[1]);
1484 		break;
1485 
1486 	case 3:	/* r,m - 2op */
1487 		*andptr++ = t[0];
1488 		*andptr++ = t[1];
1489 		asmando(&p->to, t[2]);
1490 		rexflag |= regrex[p->from.type] & (Rxr|0x40);
1491 		break;
1492 
1493 	case 4:	/* m,r - 2op */
1494 		*andptr++ = t[0];
1495 		*andptr++ = t[1];
1496 		asmando(&p->from, t[2]);
1497 		rexflag |= regrex[p->to.type] & (Rxr|0x40);
1498 		break;
1499 
1500 	case 5:	/* load full pointer, trash heap */
1501 		if(t[0])
1502 			*andptr++ = t[0];
1503 		switch(p->to.index) {
1504 		default:
1505 			goto bad;
1506 		case D_DS:
1507 			*andptr++ = 0xc5;
1508 			break;
1509 		case D_SS:
1510 			*andptr++ = 0x0f;
1511 			*andptr++ = 0xb2;
1512 			break;
1513 		case D_ES:
1514 			*andptr++ = 0xc4;
1515 			break;
1516 		case D_FS:
1517 			*andptr++ = 0x0f;
1518 			*andptr++ = 0xb4;
1519 			break;
1520 		case D_GS:
1521 			*andptr++ = 0x0f;
1522 			*andptr++ = 0xb5;
1523 			break;
1524 		}
1525 		asmand(&p->from, &p->to);
1526 		break;
1527 
1528 	case 6:	/* double shift */
1529 		if(t[0] == Pw){
1530 			if(p->mode != 64)
1531 				diag("asmins: illegal 64: %P", p);
1532 			rexflag |= Pw;
1533 			t++;
1534 		}else if(t[0] == Pe){
1535 			*andptr++ = Pe;
1536 			t++;
1537 		}
1538 		z = p->from.type;
1539 		switch(z) {
1540 		default:
1541 			goto bad;
1542 		case D_CONST:
1543 			*andptr++ = 0x0f;
1544 			*andptr++ = t[0];
1545 			asmandsz(&p->to, reg[p->from.index], regrex[p->from.index], 0);
1546 			*andptr++ = p->from.offset;
1547 			break;
1548 		case D_CL:
1549 		case D_CX:
1550 			*andptr++ = 0x0f;
1551 			*andptr++ = t[1];
1552 			asmandsz(&p->to, reg[p->from.index], regrex[p->from.index], 0);
1553 			break;
1554 		}
1555 		break;
1556 	}
1557 }
1558 
1559 void
1560 asmins(Prog *p)
1561 {
1562 	int n, np, c;
1563 
1564 	rexflag = 0;
1565 	andptr = and;
1566 	asmode = p->mode;
1567 	doasm(p);
1568 	if(rexflag){
1569 		/*
1570 		 * as befits the whole approach of the architecture,
1571 		 * the rex prefix must appear before the first opcode byte
1572 		 * (and thus after any 66/67/f2/f3 prefix bytes, but
1573 		 * before the 0f opcode escape!), or it might be ignored.
1574 		 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
1575 		 */
1576 		if(p->mode != 64)
1577 			diag("asmins: illegal in mode %d: %P", p->mode, p);
1578 		n = andptr - and;
1579 		for(np = 0; np < n; np++) {
1580 			c = and[np];
1581 			if(c != 0x66 && c != 0xf2 && c != 0xf3 && c != 0x67)
1582 				break;
1583 		}
1584 		memmove(and+np+1, and+np, n-np);
1585 		and[np] = 0x40 | rexflag;
1586 		andptr++;
1587 	}
1588 }
1589 
1590 enum{
1591 	ABSD = 0,
1592 	ABSU = 1,
1593 	RELD = 2,
1594 	RELU = 3,
1595 };
1596 
1597 int modemap[4] = { 0, 1, -1, 2, };
1598 
1599 typedef struct Reloc Reloc;
1600 
1601 struct Reloc
1602 {
1603 	int n;
1604 	int t;
1605 	uchar *m;
1606 	ulong *a;
1607 };
1608 
1609 Reloc rels;
1610 
1611 static void
1612 grow(Reloc *r)
1613 {
1614 	int t;
1615 	uchar *m, *nm;
1616 	ulong *a, *na;
1617 
1618 	t = r->t;
1619 	r->t += 64;
1620 	m = r->m;
1621 	a = r->a;
1622 	r->m = nm = malloc(r->t*sizeof(uchar));
1623 	r->a = na = malloc(r->t*sizeof(ulong));
1624 	memmove(nm, m, t*sizeof(uchar));
1625 	memmove(na, a, t*sizeof(ulong));
1626 	free(m);
1627 	free(a);
1628 }
1629 
1630 void
1631 dynreloc(Sym *s, ulong v, int abs)
1632 {
1633 	int i, k, n;
1634 	uchar *m;
1635 	ulong *a;
1636 	Reloc *r;
1637 
1638 	if(s->type == SUNDEF)
1639 		k = abs ? ABSU : RELU;
1640 	else
1641 		k = abs ? ABSD : RELD;
1642 	/* Bprint(&bso, "R %s a=%ld(%lx) %d\n", s->name, v, v, k); */
1643 	k = modemap[k];
1644 	r = &rels;
1645 	n = r->n;
1646 	if(n >= r->t)
1647 		grow(r);
1648 	m = r->m;
1649 	a = r->a;
1650 	for(i = n; i > 0; i--){
1651 		if(v < a[i-1]){	/* happens occasionally for data */
1652 			m[i] = m[i-1];
1653 			a[i] = a[i-1];
1654 		}
1655 		else
1656 			break;
1657 	}
1658 	m[i] = k;
1659 	a[i] = v;
1660 	r->n++;
1661 }
1662 
1663 static int
1664 sput(char *s)
1665 {
1666 	char *p;
1667 
1668 	p = s;
1669 	while(*s)
1670 		cput(*s++);
1671 	cput(0);
1672 	return s-p+1;
1673 }
1674 
1675 void
1676 asmdyn()
1677 {
1678 	int i, n, t, c;
1679 	Sym *s;
1680 	ulong la, ra, *a;
1681 	vlong off;
1682 	uchar *m;
1683 	Reloc *r;
1684 
1685 	cflush();
1686 	off = seek(cout, 0, 1);
1687 	lput(0);
1688 	t = 0;
1689 	lput(imports);
1690 	t += 4;
1691 	for(i = 0; i < NHASH; i++)
1692 		for(s = hash[i]; s != S; s = s->link)
1693 			if(s->type == SUNDEF){
1694 				lput(s->sig);
1695 				t += 4;
1696 				t += sput(s->name);
1697 			}
1698 
1699 	la = 0;
1700 	r = &rels;
1701 	n = r->n;
1702 	m = r->m;
1703 	a = r->a;
1704 	lput(n);
1705 	t += 4;
1706 	for(i = 0; i < n; i++){
1707 		ra = *a-la;
1708 		if(*a < la)
1709 			diag("bad relocation order");
1710 		if(ra < 256)
1711 			c = 0;
1712 		else if(ra < 65536)
1713 			c = 1;
1714 		else
1715 			c = 2;
1716 		cput((c<<6)|*m++);
1717 		t++;
1718 		if(c == 0){
1719 			cput(ra);
1720 			t++;
1721 		}
1722 		else if(c == 1){
1723 			wput(ra);
1724 			t += 2;
1725 		}
1726 		else{
1727 			lput(ra);
1728 			t += 4;
1729 		}
1730 		la = *a++;
1731 	}
1732 
1733 	cflush();
1734 	seek(cout, off, 0);
1735 	lput(t);
1736 
1737 	if(debug['v']){
1738 		Bprint(&bso, "import table entries = %d\n", imports);
1739 		Bprint(&bso, "export table entries = %d\n", exports);
1740 	}
1741 }
1742