xref: /netbsd-src/sys/external/bsd/sljit/dist/sljit_src/sljitNativeX86_64.c (revision 6a493d6bc668897c91594964a732d38505b70cbb)
1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /* x86 64-bit arch dependent functions. */
28 
29 static int emit_load_imm64(struct sljit_compiler *compiler, int reg, sljit_w imm)
30 {
31 	sljit_ub *buf;
32 
33 	buf = (sljit_ub*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_w));
34 	FAIL_IF(!buf);
35 	INC_SIZE(2 + sizeof(sljit_w));
36 	*buf++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B);
37 	*buf++ = 0xb8 + (reg_map[reg] & 0x7);
38 	*(sljit_w*)buf = imm;
39 	return SLJIT_SUCCESS;
40 }
41 
42 static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, int type)
43 {
44 	if (type < SLJIT_JUMP) {
45 		*code_ptr++ = get_jump_code(type ^ 0x1) - 0x10;
46 		*code_ptr++ = 10 + 3;
47 	}
48 
49 	SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_first);
50 	*code_ptr++ = REX_W | REX_B;
51 	*code_ptr++ = 0xb8 + 1;
52 	jump->addr = (sljit_uw)code_ptr;
53 
54 	if (jump->flags & JUMP_LABEL)
55 		jump->flags |= PATCH_MD;
56 	else
57 		*(sljit_w*)code_ptr = jump->u.target;
58 
59 	code_ptr += sizeof(sljit_w);
60 	*code_ptr++ = REX_B;
61 	*code_ptr++ = 0xff;
62 	*code_ptr++ = (type >= SLJIT_FAST_CALL) ? 0xd1 /* call */ : 0xe1 /* jmp */;
63 
64 	return code_ptr;
65 }
66 
67 static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_w addr, int type)
68 {
69 	sljit_w delta = addr - ((sljit_w)code_ptr + 1 + sizeof(sljit_hw));
70 
71 	if (delta <= SLJIT_W(0x7fffffff) && delta >= SLJIT_W(-0x80000000)) {
72 		*code_ptr++ = (type == 2) ? 0xe8 /* call */ : 0xe9 /* jmp */;
73 		*(sljit_w*)code_ptr = delta;
74 	}
75 	else {
76 		SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_second);
77 		*code_ptr++ = REX_W | REX_B;
78 		*code_ptr++ = 0xb8 + 1;
79 		*(sljit_w*)code_ptr = addr;
80 		code_ptr += sizeof(sljit_w);
81 		*code_ptr++ = REX_B;
82 		*code_ptr++ = 0xff;
83 		*code_ptr++ = (type == 2) ? 0xd1 /* call */ : 0xe1 /* jmp */;
84 	}
85 
86 	return code_ptr;
87 }
88 
89 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size)
90 {
91 	int size, pushed_size;
92 	sljit_ub *buf;
93 
94 	CHECK_ERROR();
95 	check_sljit_emit_enter(compiler, args, temporaries, saveds, local_size);
96 
97 	compiler->temporaries = temporaries;
98 	compiler->saveds = saveds;
99 	compiler->flags_saved = 0;
100 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
101 	compiler->logical_local_size = local_size;
102 #endif
103 
104 	size = saveds;
105 	/* Including the return address saved by the call instruction. */
106 	pushed_size = (saveds + 1) * sizeof(sljit_w);
107 #ifndef _WIN64
108 	if (saveds >= 2)
109 		size += saveds - 1;
110 #else
111 	if (saveds >= 4)
112 		size += saveds - 3;
113 	if (temporaries >= 5) {
114 		size += (5 - 4) * 2;
115 		pushed_size += sizeof(sljit_w);
116 	}
117 #endif
118 	size += args * 3;
119 	if (size > 0) {
120 		buf = (sljit_ub*)ensure_buf(compiler, 1 + size);
121 		FAIL_IF(!buf);
122 
123 		INC_SIZE(size);
124 		if (saveds >= 5) {
125 			SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_EREG2] >= 8, saved_ereg2_is_hireg);
126 			*buf++ = REX_B;
127 			PUSH_REG(reg_lmap[SLJIT_SAVED_EREG2]);
128 		}
129 		if (saveds >= 4) {
130 			SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_EREG1] >= 8, saved_ereg1_is_hireg);
131 			*buf++ = REX_B;
132 			PUSH_REG(reg_lmap[SLJIT_SAVED_EREG1]);
133 		}
134 		if (saveds >= 3) {
135 #ifndef _WIN64
136 			SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG3] >= 8, saved_reg3_is_hireg);
137 			*buf++ = REX_B;
138 #else
139 			SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG3] < 8, saved_reg3_is_loreg);
140 #endif
141 			PUSH_REG(reg_lmap[SLJIT_SAVED_REG3]);
142 		}
143 		if (saveds >= 2) {
144 #ifndef _WIN64
145 			SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG2] >= 8, saved_reg2_is_hireg);
146 			*buf++ = REX_B;
147 #else
148 			SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG2] < 8, saved_reg2_is_loreg);
149 #endif
150 			PUSH_REG(reg_lmap[SLJIT_SAVED_REG2]);
151 		}
152 		if (saveds >= 1) {
153 			SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG1] < 8, saved_reg1_is_loreg);
154 			PUSH_REG(reg_lmap[SLJIT_SAVED_REG1]);
155 		}
156 #ifdef _WIN64
157 		if (temporaries >= 5) {
158 			SLJIT_COMPILE_ASSERT(reg_map[SLJIT_TEMPORARY_EREG2] >= 8, temporary_ereg2_is_hireg);
159 			*buf++ = REX_B;
160 			PUSH_REG(reg_lmap[SLJIT_TEMPORARY_EREG2]);
161 		}
162 #endif
163 
164 #ifndef _WIN64
165 		if (args > 0) {
166 			*buf++ = REX_W;
167 			*buf++ = 0x8b;
168 			*buf++ = 0xc0 | (reg_map[SLJIT_SAVED_REG1] << 3) | 0x7;
169 		}
170 		if (args > 1) {
171 			*buf++ = REX_W | REX_R;
172 			*buf++ = 0x8b;
173 			*buf++ = 0xc0 | (reg_lmap[SLJIT_SAVED_REG2] << 3) | 0x6;
174 		}
175 		if (args > 2) {
176 			*buf++ = REX_W | REX_R;
177 			*buf++ = 0x8b;
178 			*buf++ = 0xc0 | (reg_lmap[SLJIT_SAVED_REG3] << 3) | 0x2;
179 		}
180 #else
181 		if (args > 0) {
182 			*buf++ = REX_W;
183 			*buf++ = 0x8b;
184 			*buf++ = 0xc0 | (reg_map[SLJIT_SAVED_REG1] << 3) | 0x1;
185 		}
186 		if (args > 1) {
187 			*buf++ = REX_W;
188 			*buf++ = 0x8b;
189 			*buf++ = 0xc0 | (reg_map[SLJIT_SAVED_REG2] << 3) | 0x2;
190 		}
191 		if (args > 2) {
192 			*buf++ = REX_W | REX_B;
193 			*buf++ = 0x8b;
194 			*buf++ = 0xc0 | (reg_map[SLJIT_SAVED_REG3] << 3) | 0x0;
195 		}
196 #endif
197 	}
198 
199 	local_size = ((local_size + FIXED_LOCALS_OFFSET + pushed_size + 16 - 1) & ~(16 - 1)) - pushed_size;
200 	compiler->local_size = local_size;
201 #ifdef _WIN64
202 	if (local_size > 1024) {
203 		/* Allocate stack for the callback, which grows the stack. */
204 		buf = (sljit_ub*)ensure_buf(compiler, 1 + 4);
205 		FAIL_IF(!buf);
206 		INC_SIZE(4);
207 		*buf++ = REX_W;
208 		*buf++ = 0x83;
209 		*buf++ = 0xc0 | (5 << 3) | 4;
210 		/* Pushed size must be divisible by 8. */
211 		SLJIT_ASSERT(!(pushed_size & 0x7));
212 		if (pushed_size & 0x8) {
213 			*buf++ = 5 * sizeof(sljit_w);
214 			local_size -= 5 * sizeof(sljit_w);
215 		} else {
216 			*buf++ = 4 * sizeof(sljit_w);
217 			local_size -= 4 * sizeof(sljit_w);
218 		}
219 		FAIL_IF(emit_load_imm64(compiler, SLJIT_TEMPORARY_REG1, local_size));
220 		FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
221 	}
222 #endif
223 	SLJIT_ASSERT(local_size > 0);
224 	if (local_size <= 127) {
225 		buf = (sljit_ub*)ensure_buf(compiler, 1 + 4);
226 		FAIL_IF(!buf);
227 		INC_SIZE(4);
228 		*buf++ = REX_W;
229 		*buf++ = 0x83;
230 		*buf++ = 0xc0 | (5 << 3) | 4;
231 		*buf++ = local_size;
232 	}
233 	else {
234 		buf = (sljit_ub*)ensure_buf(compiler, 1 + 7);
235 		FAIL_IF(!buf);
236 		INC_SIZE(7);
237 		*buf++ = REX_W;
238 		*buf++ = 0x81;
239 		*buf++ = 0xc0 | (5 << 3) | 4;
240 		*(sljit_hw*)buf = local_size;
241 		buf += sizeof(sljit_hw);
242 	}
243 
244 	return SLJIT_SUCCESS;
245 }
246 
247 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size)
248 {
249 	int pushed_size;
250 
251 	CHECK_ERROR_VOID();
252 	check_sljit_set_context(compiler, args, temporaries, saveds, local_size);
253 
254 	compiler->temporaries = temporaries;
255 	compiler->saveds = saveds;
256 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
257 	compiler->logical_local_size = local_size;
258 #endif
259 
260 	/* Including the return address saved by the call instruction. */
261 	pushed_size = (saveds + 1) * sizeof(sljit_w);
262 #ifdef _WIN64
263 	if (temporaries >= 5)
264 		pushed_size += sizeof(sljit_w);
265 #endif
266 	compiler->local_size = ((local_size + FIXED_LOCALS_OFFSET + pushed_size + 16 - 1) & ~(16 - 1)) - pushed_size;
267 }
268 
269 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, int op, int src, sljit_w srcw)
270 {
271 	int size;
272 	sljit_ub *buf;
273 
274 	CHECK_ERROR();
275 	check_sljit_emit_return(compiler, op, src, srcw);
276 
277 	compiler->flags_saved = 0;
278 	FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
279 
280 	SLJIT_ASSERT(compiler->local_size > 0);
281 	if (compiler->local_size <= 127) {
282 		buf = (sljit_ub*)ensure_buf(compiler, 1 + 4);
283 		FAIL_IF(!buf);
284 		INC_SIZE(4);
285 		*buf++ = REX_W;
286 		*buf++ = 0x83;
287 		*buf++ = 0xc0 | (0 << 3) | 4;
288 		*buf = compiler->local_size;
289 	}
290 	else {
291 		buf = (sljit_ub*)ensure_buf(compiler, 1 + 7);
292 		FAIL_IF(!buf);
293 		INC_SIZE(7);
294 		*buf++ = REX_W;
295 		*buf++ = 0x81;
296 		*buf++ = 0xc0 | (0 << 3) | 4;
297 		*(sljit_hw*)buf = compiler->local_size;
298 	}
299 
300 	size = 1 + compiler->saveds;
301 #ifndef _WIN64
302 	if (compiler->saveds >= 2)
303 		size += compiler->saveds - 1;
304 #else
305 	if (compiler->saveds >= 4)
306 		size += compiler->saveds - 3;
307 	if (compiler->temporaries >= 5)
308 		size += (5 - 4) * 2;
309 #endif
310 	buf = (sljit_ub*)ensure_buf(compiler, 1 + size);
311 	FAIL_IF(!buf);
312 
313 	INC_SIZE(size);
314 
315 #ifdef _WIN64
316 	if (compiler->temporaries >= 5) {
317 		*buf++ = REX_B;
318 		POP_REG(reg_lmap[SLJIT_TEMPORARY_EREG2]);
319 	}
320 #endif
321 	if (compiler->saveds >= 1)
322 		POP_REG(reg_map[SLJIT_SAVED_REG1]);
323 	if (compiler->saveds >= 2) {
324 #ifndef _WIN64
325 		*buf++ = REX_B;
326 #endif
327 		POP_REG(reg_lmap[SLJIT_SAVED_REG2]);
328 	}
329 	if (compiler->saveds >= 3) {
330 #ifndef _WIN64
331 		*buf++ = REX_B;
332 #endif
333 		POP_REG(reg_lmap[SLJIT_SAVED_REG3]);
334 	}
335 	if (compiler->saveds >= 4) {
336 		*buf++ = REX_B;
337 		POP_REG(reg_lmap[SLJIT_SAVED_EREG1]);
338 	}
339 	if (compiler->saveds >= 5) {
340 		*buf++ = REX_B;
341 		POP_REG(reg_lmap[SLJIT_SAVED_EREG2]);
342 	}
343 
344 	RET();
345 	return SLJIT_SUCCESS;
346 }
347 
348 /* --------------------------------------------------------------------- */
349 /*  Operators                                                            */
350 /* --------------------------------------------------------------------- */
351 
352 static int emit_do_imm32(struct sljit_compiler *compiler, sljit_ub rex, sljit_ub opcode, sljit_w imm)
353 {
354 	sljit_ub *buf;
355 
356 	if (rex != 0) {
357 		buf = (sljit_ub*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_hw));
358 		FAIL_IF(!buf);
359 		INC_SIZE(2 + sizeof(sljit_hw));
360 		*buf++ = rex;
361 		*buf++ = opcode;
362 		*(sljit_hw*)buf = imm;
363 	}
364 	else {
365 		buf = (sljit_ub*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_hw));
366 		FAIL_IF(!buf);
367 		INC_SIZE(1 + sizeof(sljit_hw));
368 		*buf++ = opcode;
369 		*(sljit_hw*)buf = imm;
370 	}
371 	return SLJIT_SUCCESS;
372 }
373 
374 static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, int size,
375 	/* The register or immediate operand. */
376 	int a, sljit_w imma,
377 	/* The general operand (not immediate). */
378 	int b, sljit_w immb)
379 {
380 	sljit_ub *buf;
381 	sljit_ub *buf_ptr;
382 	sljit_ub rex = 0;
383 	int flags = size & ~0xf;
384 	int inst_size;
385 
386 	/* The immediate operand must be 32 bit. */
387 	SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma));
388 	/* Both cannot be switched on. */
389 	SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
390 	/* Size flags not allowed for typed instructions. */
391 	SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
392 	/* Both size flags cannot be switched on. */
393 	SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
394 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
395 	/* SSE2 and immediate is not possible. */
396 	SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
397 #endif
398 
399 	size &= 0xf;
400 	inst_size = size;
401 
402 	if ((b & SLJIT_MEM) && !(b & 0xf0) && NOT_HALFWORD(immb)) {
403 		if (emit_load_imm64(compiler, TMP_REG3, immb))
404 			return NULL;
405 		immb = 0;
406 		if (b & 0xf)
407 			b |= TMP_REG3 << 4;
408 		else
409 			b |= TMP_REG3;
410 	}
411 
412 	if (!compiler->mode32 && !(flags & EX86_NO_REXW))
413 		rex |= REX_W;
414 	else if (flags & EX86_REX)
415 		rex |= REX;
416 
417 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
418 	if (flags & EX86_PREF_F2)
419 		inst_size++;
420 #endif
421 	if (flags & EX86_PREF_66)
422 		inst_size++;
423 
424 	/* Calculate size of b. */
425 	inst_size += 1; /* mod r/m byte. */
426 	if (b & SLJIT_MEM) {
427 		if ((b & 0x0f) == SLJIT_UNUSED)
428 			inst_size += 1 + sizeof(sljit_hw); /* SIB byte required to avoid RIP based addressing. */
429 		else {
430 			if (reg_map[b & 0x0f] >= 8)
431 				rex |= REX_B;
432 			if (immb != 0 && !(b & 0xf0)) {
433 				/* Immediate operand. */
434 				if (immb <= 127 && immb >= -128)
435 					inst_size += sizeof(sljit_b);
436 				else
437 					inst_size += sizeof(sljit_hw);
438 			}
439 		}
440 
441 		if ((b & 0xf) == SLJIT_LOCALS_REG && !(b & 0xf0))
442 			b |= SLJIT_LOCALS_REG << 4;
443 
444 		if ((b & 0xf0) != SLJIT_UNUSED) {
445 			inst_size += 1; /* SIB byte. */
446 			if (reg_map[(b >> 4) & 0x0f] >= 8)
447 				rex |= REX_X;
448 		}
449 	}
450 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
451 	else if (!(flags & EX86_SSE2) && reg_map[b] >= 8)
452 		rex |= REX_B;
453 #else
454 	else if (reg_map[b] >= 8)
455 		rex |= REX_B;
456 #endif
457 
458 	if (a & SLJIT_IMM) {
459 		if (flags & EX86_BIN_INS) {
460 			if (imma <= 127 && imma >= -128) {
461 				inst_size += 1;
462 				flags |= EX86_BYTE_ARG;
463 			} else
464 				inst_size += 4;
465 		}
466 		else if (flags & EX86_SHIFT_INS) {
467 			imma &= compiler->mode32 ? 0x1f : 0x3f;
468 			if (imma != 1) {
469 				inst_size ++;
470 				flags |= EX86_BYTE_ARG;
471 			}
472 		} else if (flags & EX86_BYTE_ARG)
473 			inst_size++;
474 		else if (flags & EX86_HALF_ARG)
475 			inst_size += sizeof(short);
476 		else
477 			inst_size += sizeof(sljit_hw);
478 	}
479 	else {
480 		SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
481 		/* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */
482 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
483 		if (!(flags & EX86_SSE2) && reg_map[a] >= 8)
484 			rex |= REX_R;
485 #else
486 		if (reg_map[a] >= 8)
487 			rex |= REX_R;
488 #endif
489 	}
490 
491 	if (rex)
492 		inst_size++;
493 
494 	buf = (sljit_ub*)ensure_buf(compiler, 1 + inst_size);
495 	PTR_FAIL_IF(!buf);
496 
497 	/* Encoding the byte. */
498 	INC_SIZE(inst_size);
499 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
500 	if (flags & EX86_PREF_F2)
501 		*buf++ = 0xf2;
502 #endif
503 	if (flags & EX86_PREF_66)
504 		*buf++ = 0x66;
505 	if (rex)
506 		*buf++ = rex;
507 	buf_ptr = buf + size;
508 
509 	/* Encode mod/rm byte. */
510 	if (!(flags & EX86_SHIFT_INS)) {
511 		if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
512 			*buf = (flags & EX86_BYTE_ARG) ? 0x83 : 0x81;
513 
514 		if ((a & SLJIT_IMM) || (a == 0))
515 			*buf_ptr = 0;
516 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
517 		else if (!(flags & EX86_SSE2))
518 			*buf_ptr = reg_lmap[a] << 3;
519 		else
520 			*buf_ptr = a << 3;
521 #else
522 		else
523 			*buf_ptr = reg_lmap[a] << 3;
524 #endif
525 	}
526 	else {
527 		if (a & SLJIT_IMM) {
528 			if (imma == 1)
529 				*buf = 0xd1;
530 			else
531 				*buf = 0xc1;
532 		} else
533 			*buf = 0xd3;
534 		*buf_ptr = 0;
535 	}
536 
537 	if (!(b & SLJIT_MEM))
538 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
539 		*buf_ptr++ |= 0xc0 + ((!(flags & EX86_SSE2)) ? reg_lmap[b] : b);
540 #else
541 		*buf_ptr++ |= 0xc0 + reg_lmap[b];
542 #endif
543 	else if ((b & 0x0f) != SLJIT_UNUSED) {
544 		if ((b & 0xf0) == SLJIT_UNUSED || (b & 0xf0) == (SLJIT_LOCALS_REG << 4)) {
545 			if (immb != 0) {
546 				if (immb <= 127 && immb >= -128)
547 					*buf_ptr |= 0x40;
548 				else
549 					*buf_ptr |= 0x80;
550 			}
551 
552 			if ((b & 0xf0) == SLJIT_UNUSED)
553 				*buf_ptr++ |= reg_lmap[b & 0x0f];
554 			else {
555 				*buf_ptr++ |= 0x04;
556 				*buf_ptr++ = reg_lmap[b & 0x0f] | (reg_lmap[(b >> 4) & 0x0f] << 3);
557 			}
558 
559 			if (immb != 0) {
560 				if (immb <= 127 && immb >= -128)
561 					*buf_ptr++ = immb; /* 8 bit displacement. */
562 				else {
563 					*(sljit_hw*)buf_ptr = immb; /* 32 bit displacement. */
564 					buf_ptr += sizeof(sljit_hw);
565 				}
566 			}
567 		}
568 		else {
569 			*buf_ptr++ |= 0x04;
570 			*buf_ptr++ = reg_lmap[b & 0x0f] | (reg_lmap[(b >> 4) & 0x0f] << 3) | (immb << 6);
571 		}
572 	}
573 	else {
574 		*buf_ptr++ |= 0x04;
575 		*buf_ptr++ = 0x25;
576 		*(sljit_hw*)buf_ptr = immb; /* 32 bit displacement. */
577 		buf_ptr += sizeof(sljit_hw);
578 	}
579 
580 	if (a & SLJIT_IMM) {
581 		if (flags & EX86_BYTE_ARG)
582 			*buf_ptr = imma;
583 		else if (flags & EX86_HALF_ARG)
584 			*(short*)buf_ptr = imma;
585 		else if (!(flags & EX86_SHIFT_INS))
586 			*(sljit_hw*)buf_ptr = imma;
587 	}
588 
589 	return !(flags & EX86_SHIFT_INS) ? buf : (buf + 1);
590 }
591 
592 /* --------------------------------------------------------------------- */
593 /*  Call / return instructions                                           */
594 /* --------------------------------------------------------------------- */
595 
596 static SLJIT_INLINE int call_with_args(struct sljit_compiler *compiler, int type)
597 {
598 	sljit_ub *buf;
599 
600 #ifndef _WIN64
601 	SLJIT_COMPILE_ASSERT(reg_map[SLJIT_TEMPORARY_REG2] == 6 && reg_map[SLJIT_TEMPORARY_REG1] < 8 && reg_map[SLJIT_TEMPORARY_REG3] < 8, args_registers);
602 
603 	buf = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6));
604 	FAIL_IF(!buf);
605 	INC_SIZE((type < SLJIT_CALL3) ? 3 : 6);
606 	if (type >= SLJIT_CALL3) {
607 		*buf++ = REX_W;
608 		*buf++ = 0x8b;
609 		*buf++ = 0xc0 | (0x2 << 3) | reg_lmap[SLJIT_TEMPORARY_REG3];
610 	}
611 	*buf++ = REX_W;
612 	*buf++ = 0x8b;
613 	*buf++ = 0xc0 | (0x7 << 3) | reg_lmap[SLJIT_TEMPORARY_REG1];
614 #else
615 	SLJIT_COMPILE_ASSERT(reg_map[SLJIT_TEMPORARY_REG2] == 2 && reg_map[SLJIT_TEMPORARY_REG1] < 8 && reg_map[SLJIT_TEMPORARY_REG3] < 8, args_registers);
616 
617 	buf = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6));
618 	FAIL_IF(!buf);
619 	INC_SIZE((type < SLJIT_CALL3) ? 3 : 6);
620 	if (type >= SLJIT_CALL3) {
621 		*buf++ = REX_W | REX_R;
622 		*buf++ = 0x8b;
623 		*buf++ = 0xc0 | (0x0 << 3) | reg_lmap[SLJIT_TEMPORARY_REG3];
624 	}
625 	*buf++ = REX_W;
626 	*buf++ = 0x8b;
627 	*buf++ = 0xc0 | (0x1 << 3) | reg_lmap[SLJIT_TEMPORARY_REG1];
628 #endif
629 	return SLJIT_SUCCESS;
630 }
631 
632 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw)
633 {
634 	sljit_ub *buf;
635 
636 	CHECK_ERROR();
637 	check_sljit_emit_fast_enter(compiler, dst, dstw);
638 	ADJUST_LOCAL_OFFSET(dst, dstw);
639 
640 	/* For UNUSED dst. Uncommon, but possible. */
641 	if (dst == SLJIT_UNUSED)
642 		dst = TMP_REGISTER;
643 
644 	if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
645 		if (reg_map[dst] < 8) {
646 			buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
647 			FAIL_IF(!buf);
648 
649 			INC_SIZE(1);
650 			POP_REG(reg_lmap[dst]);
651 		}
652 		else {
653 			buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
654 			FAIL_IF(!buf);
655 
656 			INC_SIZE(2);
657 			*buf++ = REX_B;
658 			POP_REG(reg_lmap[dst]);
659 		}
660 	}
661 	else if (dst & SLJIT_MEM) {
662 		/* REX_W is not necessary (src is not immediate). */
663 		compiler->mode32 = 1;
664 		buf = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
665 		FAIL_IF(!buf);
666 		*buf++ = 0x8f;
667 	}
668 	return SLJIT_SUCCESS;
669 }
670 
671 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_return(struct sljit_compiler *compiler, int src, sljit_w srcw)
672 {
673 	sljit_ub *buf;
674 
675 	CHECK_ERROR();
676 	check_sljit_emit_fast_return(compiler, src, srcw);
677 	ADJUST_LOCAL_OFFSET(src, srcw);
678 
679 	if ((src & SLJIT_IMM) && NOT_HALFWORD(srcw)) {
680 		FAIL_IF(emit_load_imm64(compiler, TMP_REGISTER, srcw));
681 		src = TMP_REGISTER;
682 	}
683 
684 	if (src >= SLJIT_TEMPORARY_REG1 && src <= TMP_REGISTER) {
685 		if (reg_map[src] < 8) {
686 			buf = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 1);
687 			FAIL_IF(!buf);
688 
689 			INC_SIZE(1 + 1);
690 			PUSH_REG(reg_lmap[src]);
691 		}
692 		else {
693 			buf = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 1);
694 			FAIL_IF(!buf);
695 
696 			INC_SIZE(2 + 1);
697 			*buf++ = REX_B;
698 			PUSH_REG(reg_lmap[src]);
699 		}
700 	}
701 	else if (src & SLJIT_MEM) {
702 		/* REX_W is not necessary (src is not immediate). */
703 		compiler->mode32 = 1;
704 		buf = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
705 		FAIL_IF(!buf);
706 		*buf++ = 0xff;
707 		*buf |= 6 << 3;
708 
709 		buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
710 		FAIL_IF(!buf);
711 		INC_SIZE(1);
712 	}
713 	else {
714 		SLJIT_ASSERT(IS_HALFWORD(srcw));
715 		/* SLJIT_IMM. */
716 		buf = (sljit_ub*)ensure_buf(compiler, 1 + 5 + 1);
717 		FAIL_IF(!buf);
718 
719 		INC_SIZE(5 + 1);
720 		*buf++ = 0x68;
721 		*(sljit_hw*)buf = srcw;
722 		buf += sizeof(sljit_hw);
723 	}
724 
725 	RET();
726 	return SLJIT_SUCCESS;
727 }
728 
729 
730 /* --------------------------------------------------------------------- */
731 /*  Extend input                                                         */
732 /* --------------------------------------------------------------------- */
733 
734 static int emit_mov_int(struct sljit_compiler *compiler, int sign,
735 	int dst, sljit_w dstw,
736 	int src, sljit_w srcw)
737 {
738 	sljit_ub* code;
739 	int dst_r;
740 
741 	compiler->mode32 = 0;
742 
743 	if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
744 		return SLJIT_SUCCESS; /* Empty instruction. */
745 
746 	if (src & SLJIT_IMM) {
747 		if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
748 			if (sign || ((sljit_uw)srcw <= 0x7fffffff)) {
749 				code = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_w)(sljit_i)srcw, dst, dstw);
750 				FAIL_IF(!code);
751 				*code = 0xc7;
752 				return SLJIT_SUCCESS;
753 			}
754 			return emit_load_imm64(compiler, dst, srcw);
755 		}
756 		compiler->mode32 = 1;
757 		code = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_w)(sljit_i)srcw, dst, dstw);
758 		FAIL_IF(!code);
759 		*code = 0xc7;
760 		compiler->mode32 = 0;
761 		return SLJIT_SUCCESS;
762 	}
763 
764 	dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_SAVED_REG3) ? dst : TMP_REGISTER;
765 
766 	if ((dst & SLJIT_MEM) && (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_SAVED_REG3))
767 		dst_r = src;
768 	else {
769 		if (sign) {
770 			code = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw);
771 			FAIL_IF(!code);
772 			*code++ = 0x63;
773 		} else {
774 			compiler->mode32 = 1;
775 			FAIL_IF(emit_mov(compiler, dst_r, 0, src, srcw));
776 			compiler->mode32 = 0;
777 		}
778 	}
779 
780 	if (dst & SLJIT_MEM) {
781 		compiler->mode32 = 1;
782 		code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
783 		FAIL_IF(!code);
784 		*code = 0x89;
785 		compiler->mode32 = 0;
786 	}
787 
788 	return SLJIT_SUCCESS;
789 }
790