xref: /inferno-os/utils/6l/span.c (revision e57c7e16a3789cd4de1a3c2560d49b1ee39cd10a)
1 #include	"l.h"
2 
3 static int	rexflag;
4 static int	asmode;
5 
6 void
7 span(void)
8 {
9 	Prog *p, *q;
10 	long v;
11 	vlong c, idat;
12 	int m, n, again;
13 
14 	xdefine("etext", STEXT, 0L);
15 	idat = INITDAT;
16 	for(p = firstp; p != P; p = p->link) {
17 		if(p->as == ATEXT)
18 			curtext = p;
19 		n = 0;
20 		if(p->to.type == D_BRANCH)
21 			if(p->pcond == P)
22 				p->pcond = p;
23 		if((q = p->pcond) != P)
24 			if(q->back != 2)
25 				n = 1;
26 		p->back = n;
27 		if(p->as == AADJSP) {
28 			p->to.type = D_SP;
29 			v = -p->from.offset;
30 			p->from.offset = v;
31 			p->as = p->mode != 64? AADDL: AADDQ;
32 			if(v < 0) {
33 				p->as = p->mode != 64? ASUBL: ASUBQ;
34 				v = -v;
35 				p->from.offset = v;
36 			}
37 			if(v == 0)
38 				p->as = ANOP;
39 		}
40 	}
41 	n = 0;
42 
43 start:
44 	if(debug['v'])
45 		Bprint(&bso, "%5.2f span\n", cputime());
46 	Bflush(&bso);
47 	c = INITTEXT;
48 	for(p = firstp; p != P; p = p->link) {
49 		if(p->as == ATEXT)
50 			curtext = p;
51 		if(p->to.type == D_BRANCH)
52 			if(p->back)
53 				p->pc = c;
54 		asmins(p);
55 		p->pc = c;
56 		m = andptr-and;
57 		p->mark = m;
58 		c += m;
59 	}
60 
61 loop:
62 	n++;
63 	if(debug['v'])
64 		Bprint(&bso, "%5.2f span %d\n", cputime(), n);
65 	Bflush(&bso);
66 	if(n > 50) {
67 		print("span must be looping\n");
68 		errorexit();
69 	}
70 	again = 0;
71 	c = INITTEXT;
72 	for(p = firstp; p != P; p = p->link) {
73 		if(p->as == ATEXT)
74 			curtext = p;
75 		if(p->to.type == D_BRANCH || p->back & 0100) {
76 			if(p->back)
77 				p->pc = c;
78 			asmins(p);
79 			m = andptr-and;
80 			if(m != p->mark) {
81 				p->mark = m;
82 				again++;
83 			}
84 		}
85 		p->pc = c;
86 		c += p->mark;
87 	}
88 	if(again) {
89 		textsize = c;
90 		goto loop;
91 	}
92 	if(INITRND) {
93 		INITDAT = rnd(c, INITRND);
94 		if(INITDAT != idat) {
95 			idat = INITDAT;
96 			goto start;
97 		}
98 	}
99 	xdefine("etext", STEXT, c);
100 	if(debug['v'])
101 		Bprint(&bso, "etext = %llux\n", c);
102 	Bflush(&bso);
103 	for(p = textp; p != P; p = p->pcond)
104 		p->from.sym->value = p->pc;
105 	textsize = c - INITTEXT;
106 }
107 
108 void
109 xdefine(char *p, int t, vlong v)
110 {
111 	Sym *s;
112 
113 	s = lookup(p, 0);
114 	if(s->type == 0 || s->type == SXREF) {
115 		s->type = t;
116 		s->value = v;
117 	}
118 	if(s->type == STEXT && s->value == 0)
119 		s->value = v;
120 }
121 
122 void
123 putsymb(char *s, int t, vlong v, int ver)
124 {
125 	int i, f, l;
126 
127 	if(t == 'f')
128 		s++;
129 	l = 4;
130 	if(!debug['8']){
131 		lput(v>>32);
132 		l = 8;
133 	}
134 	lput(v);
135 	if(ver)
136 		t += 'a' - 'A';
137 	cput(t+0x80);			/* 0x80 is variable length */
138 
139 	if(t == 'Z' || t == 'z') {
140 		cput(s[0]);
141 		for(i=1; s[i] != 0 || s[i+1] != 0; i += 2) {
142 			cput(s[i]);
143 			cput(s[i+1]);
144 		}
145 		cput(0);
146 		cput(0);
147 		i++;
148 	}
149 	else {
150 		for(i=0; s[i]; i++)
151 			cput(s[i]);
152 		cput(0);
153 	}
154 	symsize += l + 1 + i + 1;
155 
156 	if(debug['n']) {
157 		if(t == 'z' || t == 'Z') {
158 			Bprint(&bso, "%c %.8llux ", t, v);
159 			for(i=1; s[i] != 0 || s[i+1] != 0; i+=2) {
160 				f = ((s[i]&0xff) << 8) | (s[i+1]&0xff);
161 				Bprint(&bso, "/%x", f);
162 			}
163 			Bprint(&bso, "\n");
164 			return;
165 		}
166 		if(ver)
167 			Bprint(&bso, "%c %.8llux %s<%d>\n", t, v, s, ver);
168 		else
169 			Bprint(&bso, "%c %.8llux %s\n", t, v, s);
170 	}
171 }
172 
173 void
174 asmsym(void)
175 {
176 	Prog *p;
177 	Auto *a;
178 	Sym *s;
179 	int h;
180 
181 	s = lookup("etext", 0);
182 	if(s->type == STEXT)
183 		putsymb(s->name, 'T', s->value, s->version);
184 
185 	for(h=0; h<NHASH; h++)
186 		for(s=hash[h]; s!=S; s=s->link)
187 			switch(s->type) {
188 			case SCONST:
189 				putsymb(s->name, 'D', s->value, s->version);
190 				continue;
191 
192 			case SDATA:
193 				putsymb(s->name, 'D', s->value+INITDAT, s->version);
194 				continue;
195 
196 			case SBSS:
197 				putsymb(s->name, 'B', s->value+INITDAT, s->version);
198 				continue;
199 
200 			case SFILE:
201 				putsymb(s->name, 'f', s->value, s->version);
202 				continue;
203 			}
204 
205 	for(p=textp; p!=P; p=p->pcond) {
206 		s = p->from.sym;
207 		if(s->type != STEXT)
208 			continue;
209 
210 		/* filenames first */
211 		for(a=p->to.autom; a; a=a->link)
212 			if(a->type == D_FILE)
213 				putsymb(a->asym->name, 'z', a->aoffset, 0);
214 			else
215 			if(a->type == D_FILE1)
216 				putsymb(a->asym->name, 'Z', a->aoffset, 0);
217 
218 		putsymb(s->name, 'T', s->value, s->version);
219 
220 		/* frame, auto and param after */
221 		putsymb(".frame", 'm', p->to.offset+8, 0);
222 
223 		for(a=p->to.autom; a; a=a->link)
224 			if(a->type == D_AUTO)
225 				putsymb(a->asym->name, 'a', -a->aoffset, 0);
226 			else
227 			if(a->type == D_PARAM)
228 				putsymb(a->asym->name, 'p', a->aoffset, 0);
229 	}
230 	if(debug['v'] || debug['n'])
231 		Bprint(&bso, "symsize = %lud\n", symsize);
232 	Bflush(&bso);
233 }
234 
235 void
236 asmlc(void)
237 {
238 	vlong oldpc;
239 	Prog *p;
240 	long oldlc, v, s;
241 
242 	oldpc = INITTEXT;
243 	oldlc = 0;
244 	for(p = firstp; p != P; p = p->link) {
245 		if(p->line == oldlc || p->as == ATEXT || p->as == ANOP) {
246 			if(p->as == ATEXT)
247 				curtext = p;
248 			if(debug['L'])
249 				Bprint(&bso, "%6llux %P\n",
250 					p->pc, p);
251 			continue;
252 		}
253 		if(debug['L'])
254 			Bprint(&bso, "\t\t%6ld", lcsize);
255 		v = (p->pc - oldpc) / MINLC;
256 		while(v) {
257 			s = 127;
258 			if(v < 127)
259 				s = v;
260 			cput(s+128);	/* 129-255 +pc */
261 			if(debug['L'])
262 				Bprint(&bso, " pc+%ld*%d(%ld)", s, MINLC, s+128);
263 			v -= s;
264 			lcsize++;
265 		}
266 		s = p->line - oldlc;
267 		oldlc = p->line;
268 		oldpc = p->pc + MINLC;
269 		if(s > 64 || s < -64) {
270 			cput(0);	/* 0 vv +lc */
271 			cput(s>>24);
272 			cput(s>>16);
273 			cput(s>>8);
274 			cput(s);
275 			if(debug['L']) {
276 				if(s > 0)
277 					Bprint(&bso, " lc+%ld(%d,%ld)\n",
278 						s, 0, s);
279 				else
280 					Bprint(&bso, " lc%ld(%d,%ld)\n",
281 						s, 0, s);
282 				Bprint(&bso, "%6llux %P\n",
283 					p->pc, p);
284 			}
285 			lcsize += 5;
286 			continue;
287 		}
288 		if(s > 0) {
289 			cput(0+s);	/* 1-64 +lc */
290 			if(debug['L']) {
291 				Bprint(&bso, " lc+%ld(%ld)\n", s, 0+s);
292 				Bprint(&bso, "%6llux %P\n",
293 					p->pc, p);
294 			}
295 		} else {
296 			cput(64-s);	/* 65-128 -lc */
297 			if(debug['L']) {
298 				Bprint(&bso, " lc%ld(%ld)\n", s, 64-s);
299 				Bprint(&bso, "%6llux %P\n",
300 					p->pc, p);
301 			}
302 		}
303 		lcsize++;
304 	}
305 	while(lcsize & 1) {
306 		s = 129;
307 		cput(s);
308 		lcsize++;
309 	}
310 	if(debug['v'] || debug['L'])
311 		Bprint(&bso, "lcsize = %ld\n", lcsize);
312 	Bflush(&bso);
313 }
314 
315 int
316 oclass(Adr *a)
317 {
318 	vlong v;
319 	long l;
320 
321 	if(a->type >= D_INDIR || a->index != D_NONE) {
322 		if(a->index != D_NONE && a->scale == 0) {
323 			if(a->type == D_ADDR) {
324 				switch(a->index) {
325 				case D_EXTERN:
326 				case D_STATIC:
327 					return Yi32;	/* TO DO: Yi64 */
328 				case D_AUTO:
329 				case D_PARAM:
330 					return Yiauto;
331 				}
332 				return Yxxx;
333 			}
334 			return Ycol;
335 		}
336 		return Ym;
337 	}
338 	switch(a->type)
339 	{
340 	case D_AL:
341 		return Yal;
342 
343 	case D_AX:
344 		return Yax;
345 
346 /*
347 	case D_SPB:
348 */
349 	case D_BPB:
350 	case D_SIB:
351 	case D_DIB:
352 	case D_R8B:
353 	case D_R9B:
354 	case D_R10B:
355 	case D_R11B:
356 	case D_R12B:
357 	case D_R13B:
358 	case D_R14B:
359 	case D_R15B:
360 		if(asmode != 64)
361 			return Yxxx;
362 	case D_DL:
363 	case D_BL:
364 	case D_AH:
365 	case D_CH:
366 	case D_DH:
367 	case D_BH:
368 		return Yrb;
369 
370 	case D_CL:
371 		return Ycl;
372 
373 	case D_CX:
374 		return Ycx;
375 
376 	case D_DX:
377 	case D_BX:
378 		return Yrx;
379 
380 	case D_R8:	/* not really Yrl */
381 	case D_R9:
382 	case D_R10:
383 	case D_R11:
384 	case D_R12:
385 	case D_R13:
386 	case D_R14:
387 	case D_R15:
388 		if(asmode != 64)
389 			return Yxxx;
390 	case D_SP:
391 	case D_BP:
392 	case D_SI:
393 	case D_DI:
394 		return Yrl;
395 
396 	case D_F0+0:
397 		return	Yf0;
398 
399 	case D_F0+1:
400 	case D_F0+2:
401 	case D_F0+3:
402 	case D_F0+4:
403 	case D_F0+5:
404 	case D_F0+6:
405 	case D_F0+7:
406 		return	Yrf;
407 
408 	case D_M0+0:
409 	case D_M0+1:
410 	case D_M0+2:
411 	case D_M0+3:
412 	case D_M0+4:
413 	case D_M0+5:
414 	case D_M0+6:
415 	case D_M0+7:
416 		return	Ymr;
417 
418 	case D_X0+0:
419 	case D_X0+1:
420 	case D_X0+2:
421 	case D_X0+3:
422 	case D_X0+4:
423 	case D_X0+5:
424 	case D_X0+6:
425 	case D_X0+7:
426 	case D_X0+8:
427 	case D_X0+9:
428 	case D_X0+10:
429 	case D_X0+11:
430 	case D_X0+12:
431 	case D_X0+13:
432 	case D_X0+14:
433 	case D_X0+15:
434 		return	Yxr;
435 
436 	case D_NONE:
437 		return Ynone;
438 
439 	case D_CS:	return	Ycs;
440 	case D_SS:	return	Yss;
441 	case D_DS:	return	Yds;
442 	case D_ES:	return	Yes;
443 	case D_FS:	return	Yfs;
444 	case D_GS:	return	Ygs;
445 
446 	case D_GDTR:	return	Ygdtr;
447 	case D_IDTR:	return	Yidtr;
448 	case D_LDTR:	return	Yldtr;
449 	case D_MSW:	return	Ymsw;
450 	case D_TASK:	return	Ytask;
451 
452 	case D_CR+0:	return	Ycr0;
453 	case D_CR+1:	return	Ycr1;
454 	case D_CR+2:	return	Ycr2;
455 	case D_CR+3:	return	Ycr3;
456 	case D_CR+4:	return	Ycr4;
457 	case D_CR+5:	return	Ycr5;
458 	case D_CR+6:	return	Ycr6;
459 	case D_CR+7:	return	Ycr7;
460 	case D_CR+8:	return	Ycr8;
461 
462 	case D_DR+0:	return	Ydr0;
463 	case D_DR+1:	return	Ydr1;
464 	case D_DR+2:	return	Ydr2;
465 	case D_DR+3:	return	Ydr3;
466 	case D_DR+4:	return	Ydr4;
467 	case D_DR+5:	return	Ydr5;
468 	case D_DR+6:	return	Ydr6;
469 	case D_DR+7:	return	Ydr7;
470 
471 	case D_TR+0:	return	Ytr0;
472 	case D_TR+1:	return	Ytr1;
473 	case D_TR+2:	return	Ytr2;
474 	case D_TR+3:	return	Ytr3;
475 	case D_TR+4:	return	Ytr4;
476 	case D_TR+5:	return	Ytr5;
477 	case D_TR+6:	return	Ytr6;
478 	case D_TR+7:	return	Ytr7;
479 
480 	case D_EXTERN:
481 	case D_STATIC:
482 	case D_AUTO:
483 	case D_PARAM:
484 		return Ym;
485 
486 	case D_CONST:
487 	case D_ADDR:
488 		if(a->sym == S) {
489 			v = a->offset;
490 			if(v == 0)
491 				return Yi0;
492 			if(v == 1)
493 				return Yi1;
494 			if(v >= -128 && v <= 127)
495 				return Yi8;
496 			l = v;
497 			if((vlong)l == v)
498 				return Ys32;	/* can sign extend */
499 			if((v>>32) == 0)
500 				return Yi32;	/* unsigned */
501 			return Yi64;
502 		}
503 		return Yi32;	/* TO DO: D_ADDR as Yi64 */
504 
505 	case D_BRANCH:
506 		return Ybr;
507 	}
508 	return Yxxx;
509 }
510 
511 void
512 asmidx(Adr *a, int base)
513 {
514 	int i;
515 
516 	switch(a->index) {
517 	default:
518 		goto bad;
519 
520 	case D_NONE:
521 		i = 4 << 3;
522 		goto bas;
523 
524 	case D_R8:
525 	case D_R9:
526 	case D_R10:
527 	case D_R11:
528 	case D_R12:
529 	case D_R13:
530 	case D_R14:
531 	case D_R15:
532 		if(asmode != 64)
533 			goto bad;
534 	case D_AX:
535 	case D_CX:
536 	case D_DX:
537 	case D_BX:
538 	case D_BP:
539 	case D_SI:
540 	case D_DI:
541 		i = reg[a->index] << 3;
542 		break;
543 	}
544 	switch(a->scale) {
545 	default:
546 		goto bad;
547 	case 1:
548 		break;
549 	case 2:
550 		i |= (1<<6);
551 		break;
552 	case 4:
553 		i |= (2<<6);
554 		break;
555 	case 8:
556 		i |= (3<<6);
557 		break;
558 	}
559 bas:
560 	switch(base) {
561 	default:
562 		goto bad;
563 	case D_NONE:	/* must be mod=00 */
564 		i |= 5;
565 		break;
566 	case D_R8:
567 	case D_R9:
568 	case D_R10:
569 	case D_R11:
570 	case D_R12:
571 	case D_R13:
572 	case D_R14:
573 	case D_R15:
574 		if(asmode != 64)
575 			goto bad;
576 	case D_AX:
577 	case D_CX:
578 	case D_DX:
579 	case D_BX:
580 	case D_SP:
581 	case D_BP:
582 	case D_SI:
583 	case D_DI:
584 		i |= reg[base];
585 		break;
586 	}
587 	*andptr++ = i;
588 	return;
589 bad:
590 	diag("asmidx: bad address %D", a);
591 	*andptr++ = 0;
592 	return;
593 }
594 
595 static void
596 put4(long v)
597 {
598 	if(dlm && curp != P && reloca != nil){
599 		dynreloc(reloca->sym, curp->pc + andptr - &and[0], 1);
600 		reloca = nil;
601 	}
602 	andptr[0] = v;
603 	andptr[1] = v>>8;
604 	andptr[2] = v>>16;
605 	andptr[3] = v>>24;
606 	andptr += 4;
607 }
608 
609 static void
610 put8(vlong v)
611 {
612 	if(dlm && curp != P && reloca != nil){
613 		dynreloc(reloca->sym, curp->pc + andptr - &and[0], 1);	/* TO DO */
614 		reloca = nil;
615 	}
616 	andptr[0] = v;
617 	andptr[1] = v>>8;
618 	andptr[2] = v>>16;
619 	andptr[3] = v>>24;
620 	andptr[4] = v>>32;
621 	andptr[5] = v>>40;
622 	andptr[6] = v>>48;
623 	andptr[7] = v>>56;
624 	andptr += 8;
625 }
626 
627 vlong
628 vaddr(Adr *a)
629 {
630 	int t;
631 	vlong v;
632 	Sym *s;
633 
634 	t = a->type;
635 	v = a->offset;
636 	if(t == D_ADDR)
637 		t = a->index;
638 	switch(t) {
639 	case D_STATIC:
640 	case D_EXTERN:
641 		s = a->sym;
642 		if(s != nil) {
643 			if(dlm && curp != P)
644 				reloca = a;
645 			switch(s->type) {
646 			case SUNDEF:
647 				ckoff(s, v);
648 			case STEXT:
649 			case SCONST:
650 				if((uvlong)s->value < (uvlong)INITTEXT)
651 					v += INITTEXT;	/* TO DO */
652 				v += s->value;
653 				break;
654 			default:
655 				v += INITDAT + s->value;
656 			}
657 		}
658 	}
659 	return v;
660 }
661 
662 static void
663 asmandsz(Adr *a, int r, int rex, int m64)
664 {
665 	long v;
666 	int t;
667 	Adr aa;
668 
669 	rex &= (0x40 | Rxr);
670 	v = a->offset;
671 	t = a->type;
672 	if(a->index != D_NONE) {
673 		if(t >= D_INDIR) {
674 			t -= D_INDIR;
675 			rexflag |= (regrex[a->index] & Rxx) | (regrex[t] & Rxb) | rex;
676 			if(t == D_NONE) {
677 				*andptr++ = (0 << 6) | (4 << 0) | (r << 3);
678 				asmidx(a, t);
679 				put4(v);
680 				return;
681 			}
682 			if(v == 0 && t != D_BP && t != D_R13) {
683 				*andptr++ = (0 << 6) | (4 << 0) | (r << 3);
684 				asmidx(a, t);
685 				return;
686 			}
687 			if(v >= -128 && v < 128) {
688 				*andptr++ = (1 << 6) | (4 << 0) | (r << 3);
689 				asmidx(a, t);
690 				*andptr++ = v;
691 				return;
692 			}
693 			*andptr++ = (2 << 6) | (4 << 0) | (r << 3);
694 			asmidx(a, t);
695 			put4(v);
696 			return;
697 		}
698 		switch(t) {
699 		default:
700 			goto bad;
701 		case D_STATIC:
702 		case D_EXTERN:
703 			aa.type = D_NONE+D_INDIR;
704 			break;
705 		case D_AUTO:
706 		case D_PARAM:
707 			aa.type = D_SP+D_INDIR;
708 			break;
709 		}
710 		aa.offset = vaddr(a);
711 		aa.index = a->index;
712 		aa.scale = a->scale;
713 		asmandsz(&aa, r, rex, m64);
714 		return;
715 	}
716 	if(t >= D_AL && t <= D_X0+15) {
717 		if(v)
718 			goto bad;
719 		*andptr++ = (3 << 6) | (reg[t] << 0) | (r << 3);
720 		rexflag |= (regrex[t] & (0x40 | Rxb)) | rex;
721 		return;
722 	}
723 	if(t >= D_INDIR) {
724 		t -= D_INDIR;
725 		rexflag |= (regrex[t] & Rxb) | rex;
726 		if(t == D_NONE) {
727 			if(asmode != 64){
728 				*andptr++ = (0 << 6) | (5 << 0) | (r << 3);
729 				put4(v);
730 				return;
731 			}
732 			/* temporary */
733 			*andptr++ = (0 <<  6) | (4 << 0) | (r << 3);	/* sib present */
734 			*andptr++ = (0 << 6) | (4 << 3) | (5 << 0);	/* DS:d32 */
735 			put4(v);
736 			return;
737 		}
738 		if(t == D_SP || t == D_R12) {
739 			if(v == 0) {
740 				*andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
741 				asmidx(a, t);
742 				return;
743 			}
744 			if(v >= -128 && v < 128) {
745 				*andptr++ = (1 << 6) | (reg[t] << 0) | (r << 3);
746 				asmidx(a, t);
747 				*andptr++ = v;
748 				return;
749 			}
750 			*andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
751 			asmidx(a, t);
752 			put4(v);
753 			return;
754 		}
755 		if(t >= D_AX && t <= D_R15) {
756 			if(v == 0 && t != D_BP && t != D_R13) {
757 				*andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3);
758 				return;
759 			}
760 			if(v >= -128 && v < 128) {
761 				andptr[0] = (1 << 6) | (reg[t] << 0) | (r << 3);
762 				andptr[1] = v;
763 				andptr += 2;
764 				return;
765 			}
766 			*andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3);
767 			put4(v);
768 			return;
769 		}
770 		goto bad;
771 	}
772 	switch(a->type) {
773 	default:
774 		goto bad;
775 	case D_STATIC:
776 	case D_EXTERN:
777 		aa.type = D_NONE+D_INDIR;
778 		break;
779 	case D_AUTO:
780 	case D_PARAM:
781 		aa.type = D_SP+D_INDIR;
782 		break;
783 	}
784 	aa.index = D_NONE;
785 	aa.scale = 1;
786 	aa.offset = vaddr(a);
787 	asmandsz(&aa, r, rex, m64);
788 	return;
789 bad:
790 	diag("asmand: bad address %D", a);
791 	return;
792 }
793 
794 void
795 asmand(Adr *a, Adr *ra)
796 {
797 	asmandsz(a, reg[ra->type], regrex[ra->type], 0);
798 }
799 
800 void
801 asmando(Adr *a, int o)
802 {
803 	asmandsz(a, o, 0, 0);
804 }
805 
806 static void
807 bytereg(Adr *a)
808 {
809 	if(a->index == D_NONE && (a->type >= D_AX && a->type <= D_R15))
810 		a->type = D_AL + (a->type-D_AX);
811 }
812 
813 #define	E	0xff
814 Movtab	ymovtab[] =
815 {
816 /* push */
817 	{APUSHL,	Ycs,	Ynone,	0,	0x0e,E,0,0},
818 	{APUSHL,	Yss,	Ynone,	0,	0x16,E,0,0},
819 	{APUSHL,	Yds,	Ynone,	0,	0x1e,E,0,0},
820 	{APUSHL,	Yes,	Ynone,	0,	0x06,E,0,0},
821 	{APUSHL,	Yfs,	Ynone,	0,	0x0f,0xa0,E,0},
822 	{APUSHL,	Ygs,	Ynone,	0,	0x0f,0xa8,E,0},
823 	{APUSHQ,	Yfs,	Ynone,	0,	0x0f,0xa0,E,0},
824 	{APUSHQ,	Ygs,	Ynone,	0,	0x0f,0xa8,E,0},
825 
826 	{APUSHW,	Ycs,	Ynone,	0,	Pe,0x0e,E,0},
827 	{APUSHW,	Yss,	Ynone,	0,	Pe,0x16,E,0},
828 	{APUSHW,	Yds,	Ynone,	0,	Pe,0x1e,E,0},
829 	{APUSHW,	Yes,	Ynone,	0,	Pe,0x06,E,0},
830 	{APUSHW,	Yfs,	Ynone,	0,	Pe,0x0f,0xa0,E},
831 	{APUSHW,	Ygs,	Ynone,	0,	Pe,0x0f,0xa8,E},
832 
833 /* pop */
834 	{APOPL,	Ynone,	Yds,	0,	0x1f,E,0,0},
835 	{APOPL,	Ynone,	Yes,	0,	0x07,E,0,0},
836 	{APOPL,	Ynone,	Yss,	0,	0x17,E,0,0},
837 	{APOPL,	Ynone,	Yfs,	0,	0x0f,0xa1,E,0},
838 	{APOPL,	Ynone,	Ygs,	0,	0x0f,0xa9,E,0},
839 	{APOPQ,	Ynone,	Yfs,	0,	0x0f,0xa1,E,0},
840 	{APOPQ,	Ynone,	Ygs,	0,	0x0f,0xa9,E,0},
841 
842 	{APOPW,	Ynone,	Yds,	0,	Pe,0x1f,E,0},
843 	{APOPW,	Ynone,	Yes,	0,	Pe,0x07,E,0},
844 	{APOPW,	Ynone,	Yss,	0,	Pe,0x17,E,0},
845 	{APOPW,	Ynone,	Yfs,	0,	Pe,0x0f,0xa1,E},
846 	{APOPW,	Ynone,	Ygs,	0,	Pe,0x0f,0xa9,E},
847 
848 /* mov seg */
849 	{AMOVW,	Yes,	Yml,	1,	0x8c,0,0,0},
850 	{AMOVW,	Ycs,	Yml,	1,	0x8c,1,0,0},
851 	{AMOVW,	Yss,	Yml,	1,	0x8c,2,0,0},
852 	{AMOVW,	Yds,	Yml,	1,	0x8c,3,0,0},
853 	{AMOVW,	Yfs,	Yml,	1,	0x8c,4,0,0},
854 	{AMOVW,	Ygs,	Yml,	1,	0x8c,5,0,0},
855 
856 	{AMOVW,	Yml,	Yes,	2,	0x8e,0,0,0},
857 	{AMOVW,	Yml,	Ycs,	2,	0x8e,1,0,0},
858 	{AMOVW,	Yml,	Yss,	2,	0x8e,2,0,0},
859 	{AMOVW,	Yml,	Yds,	2,	0x8e,3,0,0},
860 	{AMOVW,	Yml,	Yfs,	2,	0x8e,4,0,0},
861 	{AMOVW,	Yml,	Ygs,	2,	0x8e,5,0,0},
862 
863 /* mov cr */
864 	{AMOVL,	Ycr0,	Yml,	3,	0x0f,0x20,0,0},
865 	{AMOVL,	Ycr2,	Yml,	3,	0x0f,0x20,2,0},
866 	{AMOVL,	Ycr3,	Yml,	3,	0x0f,0x20,3,0},
867 	{AMOVL,	Ycr4,	Yml,	3,	0x0f,0x20,4,0},
868 	{AMOVL,	Ycr8,	Yml,	3,	0x0f,0x20,8,0},
869 	{AMOVQ,	Ycr0,	Yml,	3,	0x0f,0x20,0,0},
870 	{AMOVQ,	Ycr2,	Yml,	3,	0x0f,0x20,2,0},
871 	{AMOVQ,	Ycr3,	Yml,	3,	0x0f,0x20,3,0},
872 	{AMOVQ,	Ycr4,	Yml,	3,	0x0f,0x20,4,0},
873 	{AMOVQ,	Ycr8,	Yml,	3,	0x0f,0x20,8,0},
874 
875 	{AMOVL,	Yml,	Ycr0,	4,	0x0f,0x22,0,0},
876 	{AMOVL,	Yml,	Ycr2,	4,	0x0f,0x22,2,0},
877 	{AMOVL,	Yml,	Ycr3,	4,	0x0f,0x22,3,0},
878 	{AMOVL,	Yml,	Ycr4,	4,	0x0f,0x22,4,0},
879 	{AMOVL,	Yml,	Ycr8,	4,	0x0f,0x22,8,0},
880 	{AMOVQ,	Yml,	Ycr0,	4,	0x0f,0x22,0,0},
881 	{AMOVQ,	Yml,	Ycr2,	4,	0x0f,0x22,2,0},
882 	{AMOVQ,	Yml,	Ycr3,	4,	0x0f,0x22,3,0},
883 	{AMOVQ,	Yml,	Ycr4,	4,	0x0f,0x22,4,0},
884 	{AMOVQ,	Yml,	Ycr8,	4,	0x0f,0x22,8,0},
885 
886 /* mov dr */
887 	{AMOVL,	Ydr0,	Yml,	3,	0x0f,0x21,0,0},
888 	{AMOVL,	Ydr6,	Yml,	3,	0x0f,0x21,6,0},
889 	{AMOVL,	Ydr7,	Yml,	3,	0x0f,0x21,7,0},
890 	{AMOVQ,	Ydr0,	Yml,	3,	0x0f,0x21,0,0},
891 	{AMOVQ,	Ydr6,	Yml,	3,	0x0f,0x21,6,0},
892 	{AMOVQ,	Ydr7,	Yml,	3,	0x0f,0x21,7,0},
893 
894 	{AMOVL,	Yml,	Ydr0,	4,	0x0f,0x23,0,0},
895 	{AMOVL,	Yml,	Ydr6,	4,	0x0f,0x23,6,0},
896 	{AMOVL,	Yml,	Ydr7,	4,	0x0f,0x23,7,0},
897 	{AMOVQ,	Yml,	Ydr0,	4,	0x0f,0x23,0,0},
898 	{AMOVQ,	Yml,	Ydr6,	4,	0x0f,0x23,6,0},
899 	{AMOVQ,	Yml,	Ydr7,	4,	0x0f,0x23,7,0},
900 
901 /* mov tr */
902 	{AMOVL,	Ytr6,	Yml,	3,	0x0f,0x24,6,0},
903 	{AMOVL,	Ytr7,	Yml,	3,	0x0f,0x24,7,0},
904 
905 	{AMOVL,	Yml,	Ytr6,	4,	0x0f,0x26,6,E},
906 	{AMOVL,	Yml,	Ytr7,	4,	0x0f,0x26,7,E},
907 
908 /* lgdt, sgdt, lidt, sidt */
909 	{AMOVL,	Ym,	Ygdtr,	4,	0x0f,0x01,2,0},
910 	{AMOVL,	Ygdtr,	Ym,	3,	0x0f,0x01,0,0},
911 	{AMOVL,	Ym,	Yidtr,	4,	0x0f,0x01,3,0},
912 	{AMOVL,	Yidtr,	Ym,	3,	0x0f,0x01,1,0},
913 	{AMOVQ,	Ym,	Ygdtr,	4,	0x0f,0x01,2,0},
914 	{AMOVQ,	Ygdtr,	Ym,	3,	0x0f,0x01,0,0},
915 	{AMOVQ,	Ym,	Yidtr,	4,	0x0f,0x01,3,0},
916 	{AMOVQ,	Yidtr,	Ym,	3,	0x0f,0x01,1,0},
917 
918 /* lldt, sldt */
919 	{AMOVW,	Yml,	Yldtr,	4,	0x0f,0x00,2,0},
920 	{AMOVW,	Yldtr,	Yml,	3,	0x0f,0x00,0,0},
921 
922 /* lmsw, smsw */
923 	{AMOVW,	Yml,	Ymsw,	4,	0x0f,0x01,6,0},
924 	{AMOVW,	Ymsw,	Yml,	3,	0x0f,0x01,4,0},
925 
926 /* ltr, str */
927 	{AMOVW,	Yml,	Ytask,	4,	0x0f,0x00,3,0},
928 	{AMOVW,	Ytask,	Yml,	3,	0x0f,0x00,1,0},
929 
930 /* load full pointer */
931 	{AMOVL,	Yml,	Ycol,	5,	0,0,0,0},
932 	{AMOVW,	Yml,	Ycol,	5,	Pe,0,0,0},
933 
934 /* double shift */
935 	{ASHLL,	Ycol,	Yml,	6,	0xa4,0xa5,0,0},
936 	{ASHRL,	Ycol,	Yml,	6,	0xac,0xad,0,0},
937 	{ASHLQ,	Ycol,	Yml,	6,	Pw,0xa4,0xa5,0},
938 	{ASHRQ,	Ycol,	Yml,	6,	Pw,0xac,0xad,0},
939 	{ASHLW,	Ycol,	Yml,	6,	Pe,0xa4,0xa5,0},
940 	{ASHRW,	Ycol,	Yml,	6,	Pe,0xac,0xad,0},
941 	0
942 };
943 
944 int
945 isax(Adr *a)
946 {
947 
948 	switch(a->type) {
949 	case D_AX:
950 	case D_AL:
951 	case D_AH:
952 	case D_INDIR+D_AX:
953 		return 1;
954 	}
955 	if(a->index == D_AX)
956 		return 1;
957 	return 0;
958 }
959 
960 void
961 subreg(Prog *p, int from, int to)
962 {
963 
964 	if(debug['Q'])
965 		print("\n%P	s/%R/%R/\n", p, from, to);
966 
967 	if(p->from.type == from)
968 		p->from.type = to;
969 	if(p->to.type == from)
970 		p->to.type = to;
971 
972 	if(p->from.index == from)
973 		p->from.index = to;
974 	if(p->to.index == from)
975 		p->to.index = to;
976 
977 	from += D_INDIR;
978 	if(p->from.type == from)
979 		p->from.type = to+D_INDIR;
980 	if(p->to.type == from)
981 		p->to.type = to+D_INDIR;
982 
983 	if(debug['Q'])
984 		print("%P\n", p);
985 }
986 
987 static int
988 mediaop(Optab *o, int op, int osize, int z)
989 {
990 	switch(op){
991 	case Pm:
992 	case Pe:
993 	case Pf2:
994 	case Pf3:
995 		if(osize != 1){
996 			if(op != Pm)
997 				*andptr++ = op;
998 			*andptr++ = Pm;
999 			op = o->op[++z];
1000 			break;
1001 		}
1002 	default:
1003 		if(andptr == and || andptr[-1] != Pm)
1004 			*andptr++ = Pm;
1005 		break;
1006 	}
1007 	*andptr++ = op;
1008 	return z;
1009 }
1010 
1011 void
1012 doasm(Prog *p)
1013 {
1014 	Optab *o;
1015 	Prog *q, pp;
1016 	uchar *t;
1017 	Movtab *mo;
1018 	int z, op, ft, tt, xo, l;
1019 	vlong v;
1020 
1021 	o = opindex[p->as];
1022 	if(o == nil) {
1023 		diag("asmins: missing op %P", p);
1024 		return;
1025 	}
1026 	ft = oclass(&p->from) * Ymax;
1027 	tt = oclass(&p->to) * Ymax;
1028 	t = o->ytab;
1029 	if(t == 0) {
1030 		diag("asmins: noproto %P", p);
1031 		return;
1032 	}
1033 	xo = o->op[0] == 0x0f;
1034 	for(z=0; *t; z+=t[3]+xo,t+=4)
1035 		if(ycover[ft+t[0]])
1036 		if(ycover[tt+t[1]])
1037 			goto found;
1038 	goto domov;
1039 
1040 found:
1041 	switch(o->prefix) {
1042 	case Pq:	/* 16 bit escape and opcode escape */
1043 		*andptr++ = Pe;
1044 		*andptr++ = Pm;
1045 		break;
1046 
1047 	case Pf2:	/* xmm opcode escape */
1048 	case Pf3:
1049 		*andptr++ = o->prefix;
1050 		*andptr++ = Pm;
1051 		break;
1052 
1053 	case Pm:	/* opcode escape */
1054 		*andptr++ = Pm;
1055 		break;
1056 
1057 	case Pe:	/* 16 bit escape */
1058 		*andptr++ = Pe;
1059 		break;
1060 
1061 	case Pw:	/* 64-bit escape */
1062 		if(p->mode != 64)
1063 			diag("asmins: illegal 64: %P", p);
1064 		rexflag |= Pw;
1065 		break;
1066 
1067 	case Pb:	/* botch */
1068 		bytereg(&p->from);
1069 		bytereg(&p->to);
1070 		break;
1071 
1072 	case P32:	/* 32 bit but illegal if 64-bit mode */
1073 		if(p->mode == 64)
1074 			diag("asmins: illegal in 64-bit mode: %P", p);
1075 		break;
1076 
1077 	case Py:	/* 64-bit only, no prefix */
1078 		if(p->mode != 64)
1079 			diag("asmins: illegal in %d-bit mode: %P", p->mode, p);
1080 		break;
1081 	}
1082 	v = vaddr(&p->from);
1083 	op = o->op[z];
1084 	if(op == 0x0f) {
1085 		*andptr++ = op;
1086 		op = o->op[++z];
1087 	}
1088 	switch(t[2]) {
1089 	default:
1090 		diag("asmins: unknown z %d %P", t[2], p);
1091 		return;
1092 
1093 	case Zpseudo:
1094 		break;
1095 
1096 	case Zlit:
1097 		for(; op = o->op[z]; z++)
1098 			*andptr++ = op;
1099 		break;
1100 
1101 	case Zmb_r:
1102 		bytereg(&p->from);
1103 		/* fall through */
1104 	case Zm_r:
1105 		*andptr++ = op;
1106 		asmand(&p->from, &p->to);
1107 		break;
1108 
1109 	case Zm_r_xm:
1110 		mediaop(o, op, t[3], z);
1111 		asmand(&p->from, &p->to);
1112 		break;
1113 
1114 	case Zm_r_xm_nr:
1115 		rexflag = 0;
1116 		mediaop(o, op, t[3], z);
1117 		asmand(&p->from, &p->to);
1118 		break;
1119 
1120 	case Zm_r_i_xm:
1121 		mediaop(o, op, t[3], z);
1122 		asmand(&p->from, &p->to);
1123 		*andptr++ = p->to.offset;
1124 		break;
1125 
1126 	case Zm_r_3d:
1127 		*andptr++ = 0x0f;
1128 		*andptr++ = 0x0f;
1129 		asmand(&p->from, &p->to);
1130 		*andptr++ = op;
1131 		break;
1132 
1133 	case Zibm_r:
1134 		*andptr++ = op;
1135 		asmand(&p->from, &p->to);
1136 		*andptr++ = p->to.offset;
1137 		break;
1138 
1139 	case Zaut_r:
1140 		*andptr++ = 0x8d;	/* leal */
1141 		if(p->from.type != D_ADDR)
1142 			diag("asmins: Zaut sb type ADDR");
1143 		p->from.type = p->from.index;
1144 		p->from.index = D_NONE;
1145 		asmand(&p->from, &p->to);
1146 		p->from.index = p->from.type;
1147 		p->from.type = D_ADDR;
1148 		break;
1149 
1150 	case Zm_o:
1151 		*andptr++ = op;
1152 		asmando(&p->from, o->op[z+1]);
1153 		break;
1154 
1155 	case Zr_m:
1156 		*andptr++ = op;
1157 		asmand(&p->to, &p->from);
1158 		break;
1159 
1160 	case Zr_m_xm:
1161 		mediaop(o, op, t[3], z);
1162 		asmand(&p->to, &p->from);
1163 		break;
1164 
1165 	case Zr_m_xm_nr:
1166 		rexflag = 0;
1167 		mediaop(o, op, t[3], z);
1168 		asmand(&p->to, &p->from);
1169 		break;
1170 
1171 	case Zr_m_i_xm:
1172 		mediaop(o, op, t[3], z);
1173 		asmand(&p->to, &p->from);
1174 		*andptr++ = p->from.offset;
1175 		break;
1176 
1177 	case Zo_m:
1178 		*andptr++ = op;
1179 		asmando(&p->to, o->op[z+1]);
1180 		break;
1181 
1182 	case Zo_m64:
1183 		*andptr++ = op;
1184 		asmandsz(&p->to, o->op[z+1], 0, 1);
1185 		break;
1186 
1187 	case Zm_ibo:
1188 		v = vaddr(&p->to);
1189 		*andptr++ = op;
1190 		asmando(&p->from, o->op[z+1]);
1191 		*andptr++ = v;
1192 		break;
1193 
1194 	case Zibo_m:
1195 		*andptr++ = op;
1196 		asmando(&p->to, o->op[z+1]);
1197 		*andptr++ = v;
1198 		break;
1199 
1200 	case Zibo_m_xm:
1201 		z = mediaop(o, op, t[3], z);
1202 		asmando(&p->to, o->op[z+1]);
1203 		*andptr++ = v;
1204 		break;
1205 
1206 	case Z_ib:
1207 		v = vaddr(&p->to);
1208 	case Zib_:
1209 		*andptr++ = op;
1210 		*andptr++ = v;
1211 		break;
1212 
1213 	case Zib_rp:
1214 		rexflag |= regrex[p->to.type] & (Rxb|0x40);
1215 		*andptr++ = op + reg[p->to.type];
1216 		*andptr++ = v;
1217 		break;
1218 
1219 	case Zil_rp:
1220 		rexflag |= regrex[p->to.type] & Rxb;
1221 		*andptr++ = op + reg[p->to.type];
1222 		if(o->prefix == Pe) {
1223 			*andptr++ = v;
1224 			*andptr++ = v>>8;
1225 		}
1226 		else
1227 			put4(v);
1228 		break;
1229 
1230 	case Zo_iw:
1231 		*andptr++ = op;
1232 		if(p->from.type != D_NONE){
1233 			*andptr++ = v;
1234 			*andptr++ = v>>8;
1235 		}
1236 		break;
1237 
1238 	case Ziq_rp:
1239 		l = v>>32;
1240 		if(l == 0){
1241 			//p->mark |= 0100;
1242 			//print("zero: %llux %P\n", v, p);
1243 			rexflag &= ~(0x40|Rxw);
1244 			rexflag |= regrex[p->to.type] & Rxb;
1245 			*andptr++ = 0xb8 + reg[p->to.type];
1246 			put4(v);
1247 		}else if(l == -1 && (v&((uvlong)1<<31))!=0){	/* sign extend */
1248 			//p->mark |= 0100;
1249 			//print("sign: %llux %P\n", v, p);
1250 			*andptr ++ = 0xc7;
1251 			asmando(&p->to, 0);
1252 			put4(v);
1253 		}else{	/* need all 8 */
1254 			//print("all: %llux %P\n", v, p);
1255 			rexflag |= regrex[p->to.type] & Rxb;
1256 			*andptr++ = op + reg[p->to.type];
1257 			put8(v);
1258 		}
1259 		break;
1260 
1261 	case Zib_rr:
1262 		*andptr++ = op;
1263 		asmand(&p->to, &p->to);
1264 		*andptr++ = v;
1265 		break;
1266 
1267 	case Z_il:
1268 		v = vaddr(&p->to);
1269 	case Zil_:
1270 		*andptr++ = op;
1271 		if(o->prefix == Pe) {
1272 			*andptr++ = v;
1273 			*andptr++ = v>>8;
1274 		}
1275 		else
1276 			put4(v);
1277 		break;
1278 
1279 	case Zm_ilo:
1280 		v = vaddr(&p->to);
1281 		*andptr++ = op;
1282 		asmando(&p->from, o->op[z+1]);
1283 		if(o->prefix == Pe) {
1284 			*andptr++ = v;
1285 			*andptr++ = v>>8;
1286 		}
1287 		else
1288 			put4(v);
1289 		break;
1290 
1291 	case Zilo_m:
1292 		*andptr++ = op;
1293 		asmando(&p->to, o->op[z+1]);
1294 		if(o->prefix == Pe) {
1295 			*andptr++ = v;
1296 			*andptr++ = v>>8;
1297 		}
1298 		else
1299 			put4(v);
1300 		break;
1301 
1302 	case Zil_rr:
1303 		*andptr++ = op;
1304 		asmand(&p->to, &p->to);
1305 		if(o->prefix == Pe) {
1306 			*andptr++ = v;
1307 			*andptr++ = v>>8;
1308 		}
1309 		else
1310 			put4(v);
1311 		break;
1312 
1313 	case Z_rp:
1314 		rexflag |= regrex[p->to.type] & (Rxb|0x40);
1315 		*andptr++ = op + reg[p->to.type];
1316 		break;
1317 
1318 	case Zrp_:
1319 		rexflag |= regrex[p->from.type] & (Rxb|0x40);
1320 		*andptr++ = op + reg[p->from.type];
1321 		break;
1322 
1323 	case Zclr:
1324 		*andptr++ = op;
1325 		asmand(&p->to, &p->to);
1326 		break;
1327 
1328 	case Zbr:
1329 		q = p->pcond;
1330 		if(q) {
1331 			v = q->pc - p->pc - 2;
1332 			if(v >= -128 && v <= 127) {
1333 				*andptr++ = op;
1334 				*andptr++ = v;
1335 			} else {
1336 				v -= 6-2;
1337 				*andptr++ = 0x0f;
1338 				*andptr++ = o->op[z+1];
1339 				*andptr++ = v;
1340 				*andptr++ = v>>8;
1341 				*andptr++ = v>>16;
1342 				*andptr++ = v>>24;
1343 			}
1344 		}
1345 		break;
1346 
1347 	case Zcall:
1348 		q = p->pcond;
1349 		if(q) {
1350 			v = q->pc - p->pc - 5;
1351 			if(dlm && curp != P && p->to.sym->type == SUNDEF){
1352 				/* v = 0 - p->pc - 5; */
1353 				v = 0;
1354 				ckoff(p->to.sym, v);
1355 				v += p->to.sym->value;
1356 				dynreloc(p->to.sym, p->pc+1, 0);
1357 			}
1358 			*andptr++ = op;
1359 			*andptr++ = v;
1360 			*andptr++ = v>>8;
1361 			*andptr++ = v>>16;
1362 			*andptr++ = v>>24;
1363 		}
1364 		break;
1365 
1366 	case Zjmp:
1367 		q = p->pcond;
1368 		if(q) {
1369 			v = q->pc - p->pc - 2;
1370 			if(v >= -128 && v <= 127) {
1371 				*andptr++ = op;
1372 				*andptr++ = v;
1373 			} else {
1374 				v -= 5-2;
1375 				*andptr++ = o->op[z+1];
1376 				*andptr++ = v;
1377 				*andptr++ = v>>8;
1378 				*andptr++ = v>>16;
1379 				*andptr++ = v>>24;
1380 			}
1381 		}
1382 		break;
1383 
1384 	case Zloop:
1385 		q = p->pcond;
1386 		if(q) {
1387 			v = q->pc - p->pc - 2;
1388 			if(v < -128 && v > 127)
1389 				diag("loop too far: %P", p);
1390 			*andptr++ = op;
1391 			*andptr++ = v;
1392 		}
1393 		break;
1394 
1395 	case Zbyte:
1396 		*andptr++ = v;
1397 		if(op > 1) {
1398 			*andptr++ = v>>8;
1399 			if(op > 2) {
1400 				*andptr++ = v>>16;
1401 				*andptr++ = v>>24;
1402 				if(op > 4) {
1403 					*andptr++ = v>>32;
1404 					*andptr++ = v>>40;
1405 					*andptr++ = v>>48;
1406 					*andptr++ = v>>56;
1407 				}
1408 			}
1409 		}
1410 		break;
1411 	}
1412 	return;
1413 
1414 domov:
1415 	for(mo=ymovtab; mo->as; mo++)
1416 		if(p->as == mo->as)
1417 		if(ycover[ft+mo->ft])
1418 		if(ycover[tt+mo->tt]){
1419 			t = mo->op;
1420 			goto mfound;
1421 		}
1422 bad:
1423 	if(p->mode != 64){
1424 		/*
1425 		 * here, the assembly has failed.
1426 		 * if its a byte instruction that has
1427 		 * unaddressable registers, try to
1428 		 * exchange registers and reissue the
1429 		 * instruction with the operands renamed.
1430 		 */
1431 		pp = *p;
1432 		z = p->from.type;
1433 		if(z >= D_BP && z <= D_DI) {
1434 			if(isax(&p->to)) {
1435 				*andptr++ = 0x87;			/* xchg lhs,bx */
1436 				asmando(&p->from, reg[D_BX]);
1437 				subreg(&pp, z, D_BX);
1438 				doasm(&pp);
1439 				*andptr++ = 0x87;			/* xchg lhs,bx */
1440 				asmando(&p->from, reg[D_BX]);
1441 			} else {
1442 				*andptr++ = 0x90 + reg[z];		/* xchg lsh,ax */
1443 				subreg(&pp, z, D_AX);
1444 				doasm(&pp);
1445 				*andptr++ = 0x90 + reg[z];		/* xchg lsh,ax */
1446 			}
1447 			return;
1448 		}
1449 		z = p->to.type;
1450 		if(z >= D_BP && z <= D_DI) {
1451 			if(isax(&p->from)) {
1452 				*andptr++ = 0x87;			/* xchg rhs,bx */
1453 				asmando(&p->to, reg[D_BX]);
1454 				subreg(&pp, z, D_BX);
1455 				doasm(&pp);
1456 				*andptr++ = 0x87;			/* xchg rhs,bx */
1457 				asmando(&p->to, reg[D_BX]);
1458 			} else {
1459 				*andptr++ = 0x90 + reg[z];		/* xchg rsh,ax */
1460 				subreg(&pp, z, D_AX);
1461 				doasm(&pp);
1462 				*andptr++ = 0x90 + reg[z];		/* xchg rsh,ax */
1463 			}
1464 			return;
1465 		}
1466 	}
1467 	diag("doasm: notfound from=%ux to=%ux %P", p->from.type, p->to.type, p);
1468 	return;
1469 
1470 mfound:
1471 	switch(mo->code) {
1472 	default:
1473 		diag("asmins: unknown mov %d %P", mo->code, p);
1474 		break;
1475 
1476 	case 0:	/* lit */
1477 		for(z=0; t[z]!=E; z++)
1478 			*andptr++ = t[z];
1479 		break;
1480 
1481 	case 1:	/* r,m */
1482 		*andptr++ = t[0];
1483 		asmando(&p->to, t[1]);
1484 		break;
1485 
1486 	case 2:	/* m,r */
1487 		*andptr++ = t[0];
1488 		asmando(&p->from, t[1]);
1489 		break;
1490 
1491 	case 3:	/* r,m - 2op */
1492 		*andptr++ = t[0];
1493 		*andptr++ = t[1];
1494 		asmando(&p->to, t[2]);
1495 		rexflag |= regrex[p->from.type] & (Rxr|0x40);
1496 		break;
1497 
1498 	case 4:	/* m,r - 2op */
1499 		*andptr++ = t[0];
1500 		*andptr++ = t[1];
1501 		asmando(&p->from, t[2]);
1502 		rexflag |= regrex[p->to.type] & (Rxr|0x40);
1503 		break;
1504 
1505 	case 5:	/* load full pointer, trash heap */
1506 		if(t[0])
1507 			*andptr++ = t[0];
1508 		switch(p->to.index) {
1509 		default:
1510 			goto bad;
1511 		case D_DS:
1512 			*andptr++ = 0xc5;
1513 			break;
1514 		case D_SS:
1515 			*andptr++ = 0x0f;
1516 			*andptr++ = 0xb2;
1517 			break;
1518 		case D_ES:
1519 			*andptr++ = 0xc4;
1520 			break;
1521 		case D_FS:
1522 			*andptr++ = 0x0f;
1523 			*andptr++ = 0xb4;
1524 			break;
1525 		case D_GS:
1526 			*andptr++ = 0x0f;
1527 			*andptr++ = 0xb5;
1528 			break;
1529 		}
1530 		asmand(&p->from, &p->to);
1531 		break;
1532 
1533 	case 6:	/* double shift */
1534 		if(t[0] == Pw){
1535 			if(p->mode != 64)
1536 				diag("asmins: illegal 64: %P", p);
1537 			rexflag |= Pw;
1538 			t++;
1539 		}else if(t[0] == Pe){
1540 			*andptr++ = Pe;
1541 			t++;
1542 		}
1543 		z = p->from.type;
1544 		switch(z) {
1545 		default:
1546 			goto bad;
1547 		case D_CONST:
1548 			*andptr++ = 0x0f;
1549 			*andptr++ = t[0];
1550 			asmandsz(&p->to, reg[p->from.index], regrex[p->from.index], 0);
1551 			*andptr++ = p->from.offset;
1552 			break;
1553 		case D_CL:
1554 		case D_CX:
1555 			*andptr++ = 0x0f;
1556 			*andptr++ = t[1];
1557 			asmandsz(&p->to, reg[p->from.index], regrex[p->from.index], 0);
1558 			break;
1559 		}
1560 		break;
1561 	}
1562 }
1563 
1564 void
1565 asmins(Prog *p)
1566 {
1567 	int n, np, c;
1568 
1569 	rexflag = 0;
1570 	andptr = and;
1571 	asmode = p->mode;
1572 	doasm(p);
1573 	if(rexflag){
1574 		/*
1575 		 * as befits the whole approach of the architecture,
1576 		 * the rex prefix must appear before the first opcode byte
1577 		 * (and thus after any 66/67/f2/f3 prefix bytes, but
1578 		 * before the 0f opcode escape!), or it might be ignored.
1579 		 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
1580 		 */
1581 		if(p->mode != 64)
1582 			diag("asmins: illegal in mode %d: %P", p->mode, p);
1583 		n = andptr - and;
1584 		for(np = 0; np < n; np++) {
1585 			c = and[np];
1586 			if(c != 0x66 && c != 0xf2 && c != 0xf3 && c != 0x67)
1587 				break;
1588 		}
1589 		memmove(and+np+1, and+np, n-np);
1590 		and[np] = 0x40 | rexflag;
1591 		andptr++;
1592 	}
1593 }
1594 
1595 enum{
1596 	ABSD = 0,
1597 	ABSU = 1,
1598 	RELD = 2,
1599 	RELU = 3,
1600 };
1601 
1602 int modemap[4] = { 0, 1, -1, 2, };
1603 
1604 typedef struct Reloc Reloc;
1605 
1606 struct Reloc
1607 {
1608 	int n;
1609 	int t;
1610 	uchar *m;
1611 	ulong *a;
1612 };
1613 
1614 Reloc rels;
1615 
1616 static void
1617 grow(Reloc *r)
1618 {
1619 	int t;
1620 	uchar *m, *nm;
1621 	ulong *a, *na;
1622 
1623 	t = r->t;
1624 	r->t += 64;
1625 	m = r->m;
1626 	a = r->a;
1627 	r->m = nm = malloc(r->t*sizeof(uchar));
1628 	r->a = na = malloc(r->t*sizeof(ulong));
1629 	memmove(nm, m, t*sizeof(uchar));
1630 	memmove(na, a, t*sizeof(ulong));
1631 	free(m);
1632 	free(a);
1633 }
1634 
1635 void
1636 dynreloc(Sym *s, ulong v, int abs)
1637 {
1638 	int i, k, n;
1639 	uchar *m;
1640 	ulong *a;
1641 	Reloc *r;
1642 
1643 	if(s->type == SUNDEF)
1644 		k = abs ? ABSU : RELU;
1645 	else
1646 		k = abs ? ABSD : RELD;
1647 	/* Bprint(&bso, "R %s a=%ld(%lx) %d\n", s->name, v, v, k); */
1648 	k = modemap[k];
1649 	r = &rels;
1650 	n = r->n;
1651 	if(n >= r->t)
1652 		grow(r);
1653 	m = r->m;
1654 	a = r->a;
1655 	for(i = n; i > 0; i--){
1656 		if(v < a[i-1]){	/* happens occasionally for data */
1657 			m[i] = m[i-1];
1658 			a[i] = a[i-1];
1659 		}
1660 		else
1661 			break;
1662 	}
1663 	m[i] = k;
1664 	a[i] = v;
1665 	r->n++;
1666 }
1667 
1668 static int
1669 sput(char *s)
1670 {
1671 	char *p;
1672 
1673 	p = s;
1674 	while(*s)
1675 		cput(*s++);
1676 	cput(0);
1677 	return s-p+1;
1678 }
1679 
1680 void
1681 asmdyn()
1682 {
1683 	int i, n, t, c;
1684 	Sym *s;
1685 	ulong la, ra, *a;
1686 	vlong off;
1687 	uchar *m;
1688 	Reloc *r;
1689 
1690 	cflush();
1691 	off = seek(cout, 0, 1);
1692 	lput(0);
1693 	t = 0;
1694 	lput(imports);
1695 	t += 4;
1696 	for(i = 0; i < NHASH; i++)
1697 		for(s = hash[i]; s != S; s = s->link)
1698 			if(s->type == SUNDEF){
1699 				lput(s->sig);
1700 				t += 4;
1701 				t += sput(s->name);
1702 			}
1703 
1704 	la = 0;
1705 	r = &rels;
1706 	n = r->n;
1707 	m = r->m;
1708 	a = r->a;
1709 	lput(n);
1710 	t += 4;
1711 	for(i = 0; i < n; i++){
1712 		ra = *a-la;
1713 		if(*a < la)
1714 			diag("bad relocation order");
1715 		if(ra < 256)
1716 			c = 0;
1717 		else if(ra < 65536)
1718 			c = 1;
1719 		else
1720 			c = 2;
1721 		cput((c<<6)|*m++);
1722 		t++;
1723 		if(c == 0){
1724 			cput(ra);
1725 			t++;
1726 		}
1727 		else if(c == 1){
1728 			wput(ra);
1729 			t += 2;
1730 		}
1731 		else{
1732 			lput(ra);
1733 			t += 4;
1734 		}
1735 		la = *a++;
1736 	}
1737 
1738 	cflush();
1739 	seek(cout, off, 0);
1740 	lput(t);
1741 
1742 	if(debug['v']){
1743 		Bprint(&bso, "import table entries = %d\n", imports);
1744 		Bprint(&bso, "export table entries = %d\n", exports);
1745 	}
1746 }
1747