xref: /netbsd-src/sys/external/bsd/sljit/dist/sljit_src/sljitNativeX86_64.c (revision e35f55c6f088a9d849ac9e2c68755800c96229c2)
1 /*	$NetBSD: sljitNativeX86_64.c,v 1.4 2019/01/20 23:14:16 alnsn Exp $	*/
2 
3 /*
4  *    Stack-less Just-In-Time compiler
5  *
6  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without modification, are
9  * permitted provided that the following conditions are met:
10  *
11  *   1. Redistributions of source code must retain the above copyright notice, this list of
12  *      conditions and the following disclaimer.
13  *
14  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
15  *      of conditions and the following disclaimer in the documentation and/or other materials
16  *      provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
19  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
21  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
23  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /* x86 64-bit arch dependent functions. */
30 
emit_load_imm64(struct sljit_compiler * compiler,sljit_s32 reg,sljit_sw imm)31 static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm)
32 {
33 	sljit_u8 *inst;
34 
35 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw));
36 	FAIL_IF(!inst);
37 	INC_SIZE(2 + sizeof(sljit_sw));
38 	*inst++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B);
39 	*inst++ = MOV_r_i32 + (reg_map[reg] & 0x7);
40 	sljit_unaligned_store_sw(inst, imm);
41 	return SLJIT_SUCCESS;
42 }
43 
generate_far_jump_code(struct sljit_jump * jump,sljit_u8 * code_ptr,sljit_s32 type)44 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type)
45 {
46 	if (type < SLJIT_JUMP) {
47 		/* Invert type. */
48 		*code_ptr++ = get_jump_code(type ^ 0x1) - 0x10;
49 		*code_ptr++ = 10 + 3;
50 	}
51 
52 	SLJIT_ASSERT(reg_map[TMP_REG3] == 9);
53 	*code_ptr++ = REX_W | REX_B;
54 	*code_ptr++ = MOV_r_i32 + 1;
55 	jump->addr = (sljit_uw)code_ptr;
56 
57 	if (jump->flags & JUMP_LABEL)
58 		jump->flags |= PATCH_MD;
59 	else
60 		sljit_unaligned_store_sw(code_ptr, jump->u.target);
61 
62 	code_ptr += sizeof(sljit_sw);
63 	*code_ptr++ = REX_B;
64 	*code_ptr++ = GROUP_FF;
65 	*code_ptr++ = (type >= SLJIT_FAST_CALL) ? (MOD_REG | CALL_rm | 1) : (MOD_REG | JMP_rm | 1);
66 
67 	return code_ptr;
68 }
69 
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 args,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)70 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
71 	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
72 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
73 {
74 	sljit_s32 i, tmp, size, saved_register_size;
75 	sljit_u8 *inst;
76 
77 	CHECK_ERROR();
78 	CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
79 	set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
80 
81 #ifdef _WIN64
82 	/* Two/four register slots for parameters plus space for xmm6 register if needed. */
83 	if (fscratches >= 6 || fsaveds >= 1)
84 		compiler->locals_offset = 6 * sizeof(sljit_sw);
85 	else
86 		compiler->locals_offset = ((scratches > 2) ? 4 : 2) * sizeof(sljit_sw);
87 #endif
88 
89 	/* Including the return address saved by the call instruction. */
90 	saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
91 
92 	tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
93 	for (i = SLJIT_S0; i >= tmp; i--) {
94 		size = reg_map[i] >= 8 ? 2 : 1;
95 		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
96 		FAIL_IF(!inst);
97 		INC_SIZE(size);
98 		if (reg_map[i] >= 8)
99 			*inst++ = REX_B;
100 		PUSH_REG(reg_lmap[i]);
101 	}
102 
103 	for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
104 		size = reg_map[i] >= 8 ? 2 : 1;
105 		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
106 		FAIL_IF(!inst);
107 		INC_SIZE(size);
108 		if (reg_map[i] >= 8)
109 			*inst++ = REX_B;
110 		PUSH_REG(reg_lmap[i]);
111 	}
112 
113 	if (args > 0) {
114 		size = args * 3;
115 		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
116 		FAIL_IF(!inst);
117 
118 		INC_SIZE(size);
119 
120 #ifndef _WIN64
121 		if (args > 0) {
122 			*inst++ = REX_W;
123 			*inst++ = MOV_r_rm;
124 			*inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */;
125 		}
126 		if (args > 1) {
127 			*inst++ = REX_W | REX_R;
128 			*inst++ = MOV_r_rm;
129 			*inst++ = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */;
130 		}
131 		if (args > 2) {
132 			*inst++ = REX_W | REX_R;
133 			*inst++ = MOV_r_rm;
134 			*inst++ = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */;
135 		}
136 #else
137 		if (args > 0) {
138 			*inst++ = REX_W;
139 			*inst++ = MOV_r_rm;
140 			*inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */;
141 		}
142 		if (args > 1) {
143 			*inst++ = REX_W;
144 			*inst++ = MOV_r_rm;
145 			*inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */;
146 		}
147 		if (args > 2) {
148 			*inst++ = REX_W | REX_B;
149 			*inst++ = MOV_r_rm;
150 			*inst++ = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */;
151 		}
152 #endif
153 	}
154 
155 	local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size;
156 	compiler->local_size = local_size;
157 
158 #ifdef _WIN64
159 	if (local_size > 1024) {
160 		/* Allocate stack for the callback, which grows the stack. */
161 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + (3 + sizeof(sljit_s32)));
162 		FAIL_IF(!inst);
163 		INC_SIZE(4 + (3 + sizeof(sljit_s32)));
164 		*inst++ = REX_W;
165 		*inst++ = GROUP_BINARY_83;
166 		*inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
167 		/* Allocated size for registers must be divisible by 8. */
168 		SLJIT_ASSERT(!(saved_register_size & 0x7));
169 		/* Aligned to 16 byte. */
170 		if (saved_register_size & 0x8) {
171 			*inst++ = 5 * sizeof(sljit_sw);
172 			local_size -= 5 * sizeof(sljit_sw);
173 		} else {
174 			*inst++ = 4 * sizeof(sljit_sw);
175 			local_size -= 4 * sizeof(sljit_sw);
176 		}
177 		/* Second instruction */
178 		SLJIT_ASSERT(reg_map[SLJIT_R0] < 8);
179 		*inst++ = REX_W;
180 		*inst++ = MOV_rm_i32;
181 		*inst++ = MOD_REG | reg_lmap[SLJIT_R0];
182 		sljit_unaligned_store_s32(inst, local_size);
183 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
184 			|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
185 		compiler->skip_checks = 1;
186 #endif
187 		FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
188 	}
189 #endif
190 
191 	if (local_size > 0) {
192 		if (local_size <= 127) {
193 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
194 			FAIL_IF(!inst);
195 			INC_SIZE(4);
196 			*inst++ = REX_W;
197 			*inst++ = GROUP_BINARY_83;
198 			*inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
199 			*inst++ = local_size;
200 		}
201 		else {
202 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 7);
203 			FAIL_IF(!inst);
204 			INC_SIZE(7);
205 			*inst++ = REX_W;
206 			*inst++ = GROUP_BINARY_81;
207 			*inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
208 			sljit_unaligned_store_s32(inst, local_size);
209 			inst += sizeof(sljit_s32);
210 		}
211 	}
212 
213 #ifdef _WIN64
214 	/* Save xmm6 register: movaps [rsp + 0x20], xmm6 */
215 	if (fscratches >= 6 || fsaveds >= 1) {
216 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
217 		FAIL_IF(!inst);
218 		INC_SIZE(5);
219 		*inst++ = GROUP_0F;
220 		sljit_unaligned_store_s32(inst, 0x20247429);
221 	}
222 #endif
223 
224 	return SLJIT_SUCCESS;
225 }
226 
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 args,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)227 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
228 	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
229 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
230 {
231 	sljit_s32 saved_register_size;
232 
233 	CHECK_ERROR();
234 	CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
235 	set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
236 
237 #ifdef _WIN64
238 	/* Two/four register slots for parameters plus space for xmm6 register if needed. */
239 	if (fscratches >= 6 || fsaveds >= 1)
240 		compiler->locals_offset = 6 * sizeof(sljit_sw);
241 	else
242 		compiler->locals_offset = ((scratches > 2) ? 4 : 2) * sizeof(sljit_sw);
243 #endif
244 
245 	/* Including the return address saved by the call instruction. */
246 	saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
247 	compiler->local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size;
248 	return SLJIT_SUCCESS;
249 }
250 
sljit_emit_return(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)251 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
252 {
253 	sljit_s32 i, tmp, size;
254 	sljit_u8 *inst;
255 
256 	CHECK_ERROR();
257 	CHECK(check_sljit_emit_return(compiler, op, src, srcw));
258 
259 	FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
260 
261 #ifdef _WIN64
262 	/* Restore xmm6 register: movaps xmm6, [rsp + 0x20] */
263 	if (compiler->fscratches >= 6 || compiler->fsaveds >= 1) {
264 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
265 		FAIL_IF(!inst);
266 		INC_SIZE(5);
267 		*inst++ = GROUP_0F;
268 		sljit_unaligned_store_s32(inst, 0x20247428);
269 	}
270 #endif
271 
272 	if (compiler->local_size > 0) {
273 		if (compiler->local_size <= 127) {
274 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
275 			FAIL_IF(!inst);
276 			INC_SIZE(4);
277 			*inst++ = REX_W;
278 			*inst++ = GROUP_BINARY_83;
279 			*inst++ = MOD_REG | ADD | 4;
280 			*inst = compiler->local_size;
281 		}
282 		else {
283 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 7);
284 			FAIL_IF(!inst);
285 			INC_SIZE(7);
286 			*inst++ = REX_W;
287 			*inst++ = GROUP_BINARY_81;
288 			*inst++ = MOD_REG | ADD | 4;
289 			sljit_unaligned_store_s32(inst, compiler->local_size);
290 		}
291 	}
292 
293 	tmp = compiler->scratches;
294 	for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
295 		size = reg_map[i] >= 8 ? 2 : 1;
296 		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
297 		FAIL_IF(!inst);
298 		INC_SIZE(size);
299 		if (reg_map[i] >= 8)
300 			*inst++ = REX_B;
301 		POP_REG(reg_lmap[i]);
302 	}
303 
304 	tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
305 	for (i = tmp; i <= SLJIT_S0; i++) {
306 		size = reg_map[i] >= 8 ? 2 : 1;
307 		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
308 		FAIL_IF(!inst);
309 		INC_SIZE(size);
310 		if (reg_map[i] >= 8)
311 			*inst++ = REX_B;
312 		POP_REG(reg_lmap[i]);
313 	}
314 
315 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
316 	FAIL_IF(!inst);
317 	INC_SIZE(1);
318 	RET();
319 	return SLJIT_SUCCESS;
320 }
321 
322 /* --------------------------------------------------------------------- */
323 /*  Operators                                                            */
324 /* --------------------------------------------------------------------- */
325 
emit_do_imm32(struct sljit_compiler * compiler,sljit_u8 rex,sljit_u8 opcode,sljit_sw imm)326 static sljit_s32 emit_do_imm32(struct sljit_compiler *compiler, sljit_u8 rex, sljit_u8 opcode, sljit_sw imm)
327 {
328 	sljit_u8 *inst;
329 	sljit_s32 length = 1 + (rex ? 1 : 0) + sizeof(sljit_s32);
330 
331 	inst = (sljit_u8*)ensure_buf(compiler, 1 + length);
332 	FAIL_IF(!inst);
333 	INC_SIZE(length);
334 	if (rex)
335 		*inst++ = rex;
336 	*inst++ = opcode;
337 	sljit_unaligned_store_s32(inst, imm);
338 	return SLJIT_SUCCESS;
339 }
340 
emit_x86_instruction(struct sljit_compiler * compiler,sljit_s32 size,sljit_s32 a,sljit_sw imma,sljit_s32 b,sljit_sw immb)341 static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 size,
342 	/* The register or immediate operand. */
343 	sljit_s32 a, sljit_sw imma,
344 	/* The general operand (not immediate). */
345 	sljit_s32 b, sljit_sw immb)
346 {
347 	sljit_u8 *inst;
348 	sljit_u8 *buf_ptr;
349 	sljit_u8 rex = 0;
350 	sljit_s32 flags = size & ~0xf;
351 	sljit_s32 inst_size;
352 
353 	/* The immediate operand must be 32 bit. */
354 	SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma));
355 	/* Both cannot be switched on. */
356 	SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
357 	/* Size flags not allowed for typed instructions. */
358 	SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
359 	/* Both size flags cannot be switched on. */
360 	SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
361 	/* SSE2 and immediate is not possible. */
362 	SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
363 	SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
364 		&& (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
365 		&& (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
366 
367 	size &= 0xf;
368 	inst_size = size;
369 
370 	if (!compiler->mode32 && !(flags & EX86_NO_REXW))
371 		rex |= REX_W;
372 	else if (flags & EX86_REX)
373 		rex |= REX;
374 
375 	if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
376 		inst_size++;
377 	if (flags & EX86_PREF_66)
378 		inst_size++;
379 
380 	/* Calculate size of b. */
381 	inst_size += 1; /* mod r/m byte. */
382 	if (b & SLJIT_MEM) {
383 		if (!(b & OFFS_REG_MASK)) {
384 			if (NOT_HALFWORD(immb)) {
385 				PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG3, immb));
386 				immb = 0;
387 				if (b & REG_MASK)
388 					b |= TO_OFFS_REG(TMP_REG3);
389 				else
390 					b |= TMP_REG3;
391 			}
392 			else if (reg_lmap[b & REG_MASK] == 4)
393 				b |= TO_OFFS_REG(SLJIT_SP);
394 		}
395 
396 		if ((b & REG_MASK) == SLJIT_UNUSED)
397 			inst_size += 1 + sizeof(sljit_s32); /* SIB byte required to avoid RIP based addressing. */
398 		else {
399 			if (reg_map[b & REG_MASK] >= 8)
400 				rex |= REX_B;
401 
402 			if (immb != 0 && (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP))) {
403 				/* Immediate operand. */
404 				if (immb <= 127 && immb >= -128)
405 					inst_size += sizeof(sljit_s8);
406 				else
407 					inst_size += sizeof(sljit_s32);
408 			}
409 			else if (reg_lmap[b & REG_MASK] == 5)
410 				inst_size += sizeof(sljit_s8);
411 
412 			if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) {
413 				inst_size += 1; /* SIB byte. */
414 				if (reg_map[OFFS_REG(b)] >= 8)
415 					rex |= REX_X;
416 			}
417 		}
418 	}
419 	else if (!(flags & EX86_SSE2_OP2) && reg_map[b] >= 8)
420 		rex |= REX_B;
421 
422 	if (a & SLJIT_IMM) {
423 		if (flags & EX86_BIN_INS) {
424 			if (imma <= 127 && imma >= -128) {
425 				inst_size += 1;
426 				flags |= EX86_BYTE_ARG;
427 			} else
428 				inst_size += 4;
429 		}
430 		else if (flags & EX86_SHIFT_INS) {
431 			imma &= compiler->mode32 ? 0x1f : 0x3f;
432 			if (imma != 1) {
433 				inst_size ++;
434 				flags |= EX86_BYTE_ARG;
435 			}
436 		} else if (flags & EX86_BYTE_ARG)
437 			inst_size++;
438 		else if (flags & EX86_HALF_ARG)
439 			inst_size += sizeof(short);
440 		else
441 			inst_size += sizeof(sljit_s32);
442 	}
443 	else {
444 		SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
445 		/* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */
446 		if (!(flags & EX86_SSE2_OP1) && reg_map[a] >= 8)
447 			rex |= REX_R;
448 	}
449 
450 	if (rex)
451 		inst_size++;
452 
453 	inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
454 	PTR_FAIL_IF(!inst);
455 
456 	/* Encoding the byte. */
457 	INC_SIZE(inst_size);
458 	if (flags & EX86_PREF_F2)
459 		*inst++ = 0xf2;
460 	if (flags & EX86_PREF_F3)
461 		*inst++ = 0xf3;
462 	if (flags & EX86_PREF_66)
463 		*inst++ = 0x66;
464 	if (rex)
465 		*inst++ = rex;
466 	buf_ptr = inst + size;
467 
468 	/* Encode mod/rm byte. */
469 	if (!(flags & EX86_SHIFT_INS)) {
470 		if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
471 			*inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
472 
473 		if ((a & SLJIT_IMM) || (a == 0))
474 			*buf_ptr = 0;
475 		else if (!(flags & EX86_SSE2_OP1))
476 			*buf_ptr = reg_lmap[a] << 3;
477 		else
478 			*buf_ptr = a << 3;
479 	}
480 	else {
481 		if (a & SLJIT_IMM) {
482 			if (imma == 1)
483 				*inst = GROUP_SHIFT_1;
484 			else
485 				*inst = GROUP_SHIFT_N;
486 		} else
487 			*inst = GROUP_SHIFT_CL;
488 		*buf_ptr = 0;
489 	}
490 
491 	if (!(b & SLJIT_MEM))
492 		*buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_lmap[b] : b);
493 	else if ((b & REG_MASK) != SLJIT_UNUSED) {
494 		if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
495 			if (immb != 0 || reg_lmap[b & REG_MASK] == 5) {
496 				if (immb <= 127 && immb >= -128)
497 					*buf_ptr |= 0x40;
498 				else
499 					*buf_ptr |= 0x80;
500 			}
501 
502 			if ((b & OFFS_REG_MASK) == SLJIT_UNUSED)
503 				*buf_ptr++ |= reg_lmap[b & REG_MASK];
504 			else {
505 				*buf_ptr++ |= 0x04;
506 				*buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3);
507 			}
508 
509 			if (immb != 0 || reg_lmap[b & REG_MASK] == 5) {
510 				if (immb <= 127 && immb >= -128)
511 					*buf_ptr++ = immb; /* 8 bit displacement. */
512 				else {
513 					sljit_unaligned_store_s32(buf_ptr, immb); /* 32 bit displacement. */
514 					buf_ptr += sizeof(sljit_s32);
515 				}
516 			}
517 		}
518 		else {
519 			if (reg_lmap[b & REG_MASK] == 5)
520 				*buf_ptr |= 0x40;
521 			*buf_ptr++ |= 0x04;
522 			*buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6);
523 			if (reg_lmap[b & REG_MASK] == 5)
524 				*buf_ptr++ = 0;
525 		}
526 	}
527 	else {
528 		*buf_ptr++ |= 0x04;
529 		*buf_ptr++ = 0x25;
530 		sljit_unaligned_store_s32(buf_ptr, immb); /* 32 bit displacement. */
531 		buf_ptr += sizeof(sljit_s32);
532 	}
533 
534 	if (a & SLJIT_IMM) {
535 		if (flags & EX86_BYTE_ARG)
536 			*buf_ptr = imma;
537 		else if (flags & EX86_HALF_ARG)
538 			sljit_unaligned_store_s16(buf_ptr, imma);
539 		else if (!(flags & EX86_SHIFT_INS))
540 			sljit_unaligned_store_s32(buf_ptr, imma);
541 	}
542 
543 	return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
544 }
545 
546 /* --------------------------------------------------------------------- */
547 /*  Call / return instructions                                           */
548 /* --------------------------------------------------------------------- */
549 
call_with_args(struct sljit_compiler * compiler,sljit_s32 type)550 static SLJIT_INLINE sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 type)
551 {
552 	sljit_u8 *inst;
553 
554 #ifndef _WIN64
555 	SLJIT_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8);
556 
557 	inst = (sljit_u8*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6));
558 	FAIL_IF(!inst);
559 	INC_SIZE((type < SLJIT_CALL3) ? 3 : 6);
560 	if (type >= SLJIT_CALL3) {
561 		*inst++ = REX_W;
562 		*inst++ = MOV_r_rm;
563 		*inst++ = MOD_REG | (0x2 /* rdx */ << 3) | reg_lmap[SLJIT_R2];
564 	}
565 	*inst++ = REX_W;
566 	*inst++ = MOV_r_rm;
567 	*inst++ = MOD_REG | (0x7 /* rdi */ << 3) | reg_lmap[SLJIT_R0];
568 #else
569 	SLJIT_ASSERT(reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8);
570 
571 	inst = (sljit_u8*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6));
572 	FAIL_IF(!inst);
573 	INC_SIZE((type < SLJIT_CALL3) ? 3 : 6);
574 	if (type >= SLJIT_CALL3) {
575 		*inst++ = REX_W | REX_R;
576 		*inst++ = MOV_r_rm;
577 		*inst++ = MOD_REG | (0x0 /* r8 */ << 3) | reg_lmap[SLJIT_R2];
578 	}
579 	*inst++ = REX_W;
580 	*inst++ = MOV_r_rm;
581 	*inst++ = MOD_REG | (0x1 /* rcx */ << 3) | reg_lmap[SLJIT_R0];
582 #endif
583 	return SLJIT_SUCCESS;
584 }
585 
sljit_emit_fast_enter(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)586 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
587 {
588 	sljit_u8 *inst;
589 
590 	CHECK_ERROR();
591 	CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
592 	ADJUST_LOCAL_OFFSET(dst, dstw);
593 
594 	/* For UNUSED dst. Uncommon, but possible. */
595 	if (dst == SLJIT_UNUSED)
596 		dst = TMP_REG1;
597 
598 	if (FAST_IS_REG(dst)) {
599 		if (reg_map[dst] < 8) {
600 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
601 			FAIL_IF(!inst);
602 			INC_SIZE(1);
603 			POP_REG(reg_lmap[dst]);
604 			return SLJIT_SUCCESS;
605 		}
606 
607 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
608 		FAIL_IF(!inst);
609 		INC_SIZE(2);
610 		*inst++ = REX_B;
611 		POP_REG(reg_lmap[dst]);
612 		return SLJIT_SUCCESS;
613 	}
614 
615 	/* REX_W is not necessary (src is not immediate). */
616 	compiler->mode32 = 1;
617 	inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
618 	FAIL_IF(!inst);
619 	*inst++ = POP_rm;
620 	return SLJIT_SUCCESS;
621 }
622 
sljit_emit_fast_return(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)623 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
624 {
625 	sljit_u8 *inst;
626 
627 	CHECK_ERROR();
628 	CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
629 	ADJUST_LOCAL_OFFSET(src, srcw);
630 
631 	if ((src & SLJIT_IMM) && NOT_HALFWORD(srcw)) {
632 		FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
633 		src = TMP_REG1;
634 	}
635 
636 	if (FAST_IS_REG(src)) {
637 		if (reg_map[src] < 8) {
638 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
639 			FAIL_IF(!inst);
640 
641 			INC_SIZE(1 + 1);
642 			PUSH_REG(reg_lmap[src]);
643 		}
644 		else {
645 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 1);
646 			FAIL_IF(!inst);
647 
648 			INC_SIZE(2 + 1);
649 			*inst++ = REX_B;
650 			PUSH_REG(reg_lmap[src]);
651 		}
652 	}
653 	else if (src & SLJIT_MEM) {
654 		/* REX_W is not necessary (src is not immediate). */
655 		compiler->mode32 = 1;
656 		inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
657 		FAIL_IF(!inst);
658 		*inst++ = GROUP_FF;
659 		*inst |= PUSH_rm;
660 
661 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
662 		FAIL_IF(!inst);
663 		INC_SIZE(1);
664 	}
665 	else {
666 		SLJIT_ASSERT(IS_HALFWORD(srcw));
667 		/* SLJIT_IMM. */
668 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 5 + 1);
669 		FAIL_IF(!inst);
670 
671 		INC_SIZE(5 + 1);
672 		*inst++ = PUSH_i32;
673 		sljit_unaligned_store_s32(inst, srcw);
674 		inst += sizeof(sljit_s32);
675 	}
676 
677 	RET();
678 	return SLJIT_SUCCESS;
679 }
680 
681 
682 /* --------------------------------------------------------------------- */
683 /*  Extend input                                                         */
684 /* --------------------------------------------------------------------- */
685 
emit_mov_int(struct sljit_compiler * compiler,sljit_s32 sign,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)686 static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign,
687 	sljit_s32 dst, sljit_sw dstw,
688 	sljit_s32 src, sljit_sw srcw)
689 {
690 	sljit_u8* inst;
691 	sljit_s32 dst_r;
692 
693 	compiler->mode32 = 0;
694 
695 	if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
696 		return SLJIT_SUCCESS; /* Empty instruction. */
697 
698 	if (src & SLJIT_IMM) {
699 		if (FAST_IS_REG(dst)) {
700 			if (sign || ((sljit_uw)srcw <= 0x7fffffff)) {
701 				inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
702 				FAIL_IF(!inst);
703 				*inst = MOV_rm_i32;
704 				return SLJIT_SUCCESS;
705 			}
706 			return emit_load_imm64(compiler, dst, srcw);
707 		}
708 		compiler->mode32 = 1;
709 		inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
710 		FAIL_IF(!inst);
711 		*inst = MOV_rm_i32;
712 		compiler->mode32 = 0;
713 		return SLJIT_SUCCESS;
714 	}
715 
716 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
717 
718 	if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
719 		dst_r = src;
720 	else {
721 		if (sign) {
722 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw);
723 			FAIL_IF(!inst);
724 			*inst++ = MOVSXD_r_rm;
725 		} else {
726 			compiler->mode32 = 1;
727 			FAIL_IF(emit_mov(compiler, dst_r, 0, src, srcw));
728 			compiler->mode32 = 0;
729 		}
730 	}
731 
732 	if (dst & SLJIT_MEM) {
733 		compiler->mode32 = 1;
734 		inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
735 		FAIL_IF(!inst);
736 		*inst = MOV_rm_r;
737 		compiler->mode32 = 0;
738 	}
739 
740 	return SLJIT_SUCCESS;
741 }
742