xref: /netbsd-src/sys/external/bsd/sljit/dist/sljit_src/sljitNativeX86_32.c (revision e35f55c6f088a9d849ac9e2c68755800c96229c2)
1 /*	$NetBSD: sljitNativeX86_32.c,v 1.6 2019/01/20 23:14:16 alnsn Exp $	*/
2 
3 /*
4  *    Stack-less Just-In-Time compiler
5  *
6  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without modification, are
9  * permitted provided that the following conditions are met:
10  *
11  *   1. Redistributions of source code must retain the above copyright notice, this list of
12  *      conditions and the following disclaimer.
13  *
14  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
15  *      of conditions and the following disclaimer in the documentation and/or other materials
16  *      provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
19  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
21  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
23  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /* x86 32-bit arch dependent functions. */
30 
emit_do_imm(struct sljit_compiler * compiler,sljit_u8 opcode,sljit_sw imm)31 static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, sljit_sw imm)
32 {
33 	sljit_u8 *inst;
34 
35 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_sw));
36 	FAIL_IF(!inst);
37 	INC_SIZE(1 + sizeof(sljit_sw));
38 	*inst++ = opcode;
39 	sljit_unaligned_store_sw(inst, imm);
40 	return SLJIT_SUCCESS;
41 }
42 
generate_far_jump_code(struct sljit_jump * jump,sljit_u8 * code_ptr,sljit_s32 type,sljit_sw executable_offset)43 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type, sljit_sw executable_offset)
44 {
45 	if (type == SLJIT_JUMP) {
46 		*code_ptr++ = JMP_i32;
47 		jump->addr++;
48 	}
49 	else if (type >= SLJIT_FAST_CALL) {
50 		*code_ptr++ = CALL_i32;
51 		jump->addr++;
52 	}
53 	else {
54 		*code_ptr++ = GROUP_0F;
55 		*code_ptr++ = get_jump_code(type);
56 		jump->addr += 2;
57 	}
58 
59 	if (jump->flags & JUMP_LABEL)
60 		jump->flags |= PATCH_MW;
61 	else
62 		sljit_unaligned_store_sw(code_ptr, jump->u.target - (jump->addr + 4) - (sljit_uw)executable_offset);
63 	code_ptr += 4;
64 
65 	return code_ptr;
66 }
67 
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 args,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)68 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
69 	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
70 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
71 {
72 	sljit_s32 size;
73 	sljit_u8 *inst;
74 
75 	CHECK_ERROR();
76 	CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
77 	set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
78 
79 	compiler->args = args;
80 
81 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
82 	/* [esp+0] for saving temporaries and third argument for calls. */
83 	compiler->saveds_offset = 1 * sizeof(sljit_sw);
84 #else
85 	/* [esp+0] for saving temporaries and space for maximum three arguments. */
86 	if (scratches <= 1)
87 		compiler->saveds_offset = 1 * sizeof(sljit_sw);
88 	else
89 		compiler->saveds_offset = ((scratches == 2) ? 2 : 3) * sizeof(sljit_sw);
90 #endif
91 
92 	if (scratches > 3)
93 		compiler->saveds_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * sizeof(sljit_sw);
94 
95 	compiler->locals_offset = compiler->saveds_offset;
96 
97 	if (saveds > 3)
98 		compiler->locals_offset += (saveds - 3) * sizeof(sljit_sw);
99 
100 	if (options & SLJIT_F64_ALIGNMENT)
101 		compiler->locals_offset = (compiler->locals_offset + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1);
102 
103 	size = 1 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3);
104 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
105 	size += (args > 0 ? (args * 2) : 0) + (args > 2 ? 2 : 0);
106 #else
107 	size += (args > 0 ? (2 + args * 3) : 0);
108 #endif
109 	inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
110 	FAIL_IF(!inst);
111 
112 	INC_SIZE(size);
113 	PUSH_REG(reg_map[TMP_REG1]);
114 #if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
115 	if (args > 0) {
116 		*inst++ = MOV_r_rm;
117 		*inst++ = MOD_REG | (reg_map[TMP_REG1] << 3) | 0x4 /* esp */;
118 	}
119 #endif
120 	if (saveds > 2 || scratches > 9)
121 		PUSH_REG(reg_map[SLJIT_S2]);
122 	if (saveds > 1 || scratches > 10)
123 		PUSH_REG(reg_map[SLJIT_S1]);
124 	if (saveds > 0 || scratches > 11)
125 		PUSH_REG(reg_map[SLJIT_S0]);
126 
127 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
128 	if (args > 0) {
129 		*inst++ = MOV_r_rm;
130 		*inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2];
131 	}
132 	if (args > 1) {
133 		*inst++ = MOV_r_rm;
134 		*inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1];
135 	}
136 	if (args > 2) {
137 		*inst++ = MOV_r_rm;
138 		*inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */;
139 		*inst++ = 0x24;
140 		*inst++ = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */
141 	}
142 #else
143 	if (args > 0) {
144 		*inst++ = MOV_r_rm;
145 		*inst++ = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1];
146 		*inst++ = sizeof(sljit_sw) * 2;
147 	}
148 	if (args > 1) {
149 		*inst++ = MOV_r_rm;
150 		*inst++ = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1];
151 		*inst++ = sizeof(sljit_sw) * 3;
152 	}
153 	if (args > 2) {
154 		*inst++ = MOV_r_rm;
155 		*inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1];
156 		*inst++ = sizeof(sljit_sw) * 4;
157 	}
158 #endif
159 
160 	SLJIT_ASSERT(SLJIT_LOCALS_OFFSET > 0);
161 
162 #if defined(__APPLE__)
163 	/* Ignore pushed registers and SLJIT_LOCALS_OFFSET when computing the aligned local size. */
164 	saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
165 	local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds;
166 #else
167 	if (options & SLJIT_F64_ALIGNMENT)
168 		local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1));
169 	else
170 		local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_sw) - 1) & ~(sizeof(sljit_sw) - 1));
171 #endif
172 
173 	compiler->local_size = local_size;
174 
175 #ifdef _WIN32
176 	if (local_size > 1024) {
177 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
178 		FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size));
179 #else
180 		/* Space for a single argument. This amount is excluded when the stack is allocated below. */
181 		local_size -= sizeof(sljit_sw);
182 		FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size));
183 		FAIL_IF(emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
184 			SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, sizeof(sljit_sw)));
185 #endif
186 		FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
187 	}
188 #endif
189 
190 	SLJIT_ASSERT(local_size > 0);
191 
192 #if !defined(__APPLE__)
193 	if (options & SLJIT_F64_ALIGNMENT) {
194 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_SP, 0);
195 
196 		/* Some space might allocated during sljit_grow_stack() above on WIN32. */
197 		FAIL_IF(emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
198 			SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size + sizeof(sljit_sw)));
199 
200 #if defined _WIN32 && !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
201 		if (compiler->local_size > 1024)
202 			FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
203 				TMP_REG1, 0, TMP_REG1, 0, SLJIT_IMM, sizeof(sljit_sw)));
204 #endif
205 
206 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 6);
207 		FAIL_IF(!inst);
208 
209 		INC_SIZE(6);
210 		inst[0] = GROUP_BINARY_81;
211 		inst[1] = MOD_REG | AND | reg_map[SLJIT_SP];
212 		sljit_unaligned_store_sw(inst + 2, ~(sizeof(sljit_f64) - 1));
213 
214 		/* The real local size must be used. */
215 		return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), compiler->local_size, TMP_REG1, 0);
216 	}
217 #endif
218 	return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
219 		SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size);
220 }
221 
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 args,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)222 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
223 	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
224 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
225 {
226 	CHECK_ERROR();
227 	CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
228 	set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
229 
230 	compiler->args = args;
231 
232 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
233 	/* [esp+0] for saving temporaries and third argument for calls. */
234 	compiler->saveds_offset = 1 * sizeof(sljit_sw);
235 #else
236 	/* [esp+0] for saving temporaries and space for maximum three arguments. */
237 	if (scratches <= 1)
238 		compiler->saveds_offset = 1 * sizeof(sljit_sw);
239 	else
240 		compiler->saveds_offset = ((scratches == 2) ? 2 : 3) * sizeof(sljit_sw);
241 #endif
242 
243 	if (scratches > 3)
244 		compiler->saveds_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * sizeof(sljit_sw);
245 
246 	compiler->locals_offset = compiler->saveds_offset;
247 
248 	if (saveds > 3)
249 		compiler->locals_offset += (saveds - 3) * sizeof(sljit_sw);
250 
251 	if (options & SLJIT_F64_ALIGNMENT)
252 		compiler->locals_offset = (compiler->locals_offset + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1);
253 
254 #if defined(__APPLE__)
255 	saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
256 	compiler->local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds;
257 #else
258 	if (options & SLJIT_F64_ALIGNMENT)
259 		compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1));
260 	else
261 		compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_sw) - 1) & ~(sizeof(sljit_sw) - 1));
262 #endif
263 	return SLJIT_SUCCESS;
264 }
265 
sljit_emit_return(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)266 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
267 {
268 	sljit_s32 size;
269 	sljit_u8 *inst;
270 
271 	CHECK_ERROR();
272 	CHECK(check_sljit_emit_return(compiler, op, src, srcw));
273 	SLJIT_ASSERT(compiler->args >= 0);
274 
275 	FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
276 
277 	SLJIT_ASSERT(compiler->local_size > 0);
278 
279 #if !defined(__APPLE__)
280 	if (compiler->options & SLJIT_F64_ALIGNMENT)
281 		EMIT_MOV(compiler, SLJIT_SP, 0, SLJIT_MEM1(SLJIT_SP), compiler->local_size)
282 	else
283 		FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
284 			SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size));
285 #else
286 	FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
287 		SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size));
288 #endif
289 
290 	size = 2 + (compiler->scratches > 7 ? (compiler->scratches - 7) : 0) +
291 		(compiler->saveds <= 3 ? compiler->saveds : 3);
292 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
293 	if (compiler->args > 2)
294 		size += 2;
295 #else
296 	if (compiler->args > 0)
297 		size += 2;
298 #endif
299 	inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
300 	FAIL_IF(!inst);
301 
302 	INC_SIZE(size);
303 
304 	if (compiler->saveds > 0 || compiler->scratches > 11)
305 		POP_REG(reg_map[SLJIT_S0]);
306 	if (compiler->saveds > 1 || compiler->scratches > 10)
307 		POP_REG(reg_map[SLJIT_S1]);
308 	if (compiler->saveds > 2 || compiler->scratches > 9)
309 		POP_REG(reg_map[SLJIT_S2]);
310 	POP_REG(reg_map[TMP_REG1]);
311 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
312 	if (compiler->args > 2)
313 		RET_I16(sizeof(sljit_sw));
314 	else
315 		RET();
316 #else
317 	RET();
318 #endif
319 
320 	return SLJIT_SUCCESS;
321 }
322 
323 /* --------------------------------------------------------------------- */
324 /*  Operators                                                            */
325 /* --------------------------------------------------------------------- */
326 
327 /* Size contains the flags as well. */
emit_x86_instruction(struct sljit_compiler * compiler,sljit_s32 size,sljit_s32 a,sljit_sw imma,sljit_s32 b,sljit_sw immb)328 static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 size,
329 	/* The register or immediate operand. */
330 	sljit_s32 a, sljit_sw imma,
331 	/* The general operand (not immediate). */
332 	sljit_s32 b, sljit_sw immb)
333 {
334 	sljit_u8 *inst;
335 	sljit_u8 *buf_ptr;
336 	sljit_s32 flags = size & ~0xf;
337 	sljit_s32 inst_size;
338 
339 	/* Both cannot be switched on. */
340 	SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
341 	/* Size flags not allowed for typed instructions. */
342 	SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
343 	/* Both size flags cannot be switched on. */
344 	SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
345 	/* SSE2 and immediate is not possible. */
346 	SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
347 	SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
348 		&& (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
349 		&& (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
350 
351 	size &= 0xf;
352 	inst_size = size;
353 
354 	if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
355 		inst_size++;
356 	if (flags & EX86_PREF_66)
357 		inst_size++;
358 
359 	/* Calculate size of b. */
360 	inst_size += 1; /* mod r/m byte. */
361 	if (b & SLJIT_MEM) {
362 		if ((b & REG_MASK) == SLJIT_UNUSED)
363 			inst_size += sizeof(sljit_sw);
364 		else if (immb != 0 && !(b & OFFS_REG_MASK)) {
365 			/* Immediate operand. */
366 			if (immb <= 127 && immb >= -128)
367 				inst_size += sizeof(sljit_s8);
368 			else
369 				inst_size += sizeof(sljit_sw);
370 		}
371 
372 		if ((b & REG_MASK) == SLJIT_SP && !(b & OFFS_REG_MASK))
373 			b |= TO_OFFS_REG(SLJIT_SP);
374 
375 		if ((b & OFFS_REG_MASK) != SLJIT_UNUSED)
376 			inst_size += 1; /* SIB byte. */
377 	}
378 
379 	/* Calculate size of a. */
380 	if (a & SLJIT_IMM) {
381 		if (flags & EX86_BIN_INS) {
382 			if (imma <= 127 && imma >= -128) {
383 				inst_size += 1;
384 				flags |= EX86_BYTE_ARG;
385 			} else
386 				inst_size += 4;
387 		}
388 		else if (flags & EX86_SHIFT_INS) {
389 			imma &= 0x1f;
390 			if (imma != 1) {
391 				inst_size ++;
392 				flags |= EX86_BYTE_ARG;
393 			}
394 		} else if (flags & EX86_BYTE_ARG)
395 			inst_size++;
396 		else if (flags & EX86_HALF_ARG)
397 			inst_size += sizeof(short);
398 		else
399 			inst_size += sizeof(sljit_sw);
400 	}
401 	else
402 		SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
403 
404 	inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
405 	PTR_FAIL_IF(!inst);
406 
407 	/* Encoding the byte. */
408 	INC_SIZE(inst_size);
409 	if (flags & EX86_PREF_F2)
410 		*inst++ = 0xf2;
411 	if (flags & EX86_PREF_F3)
412 		*inst++ = 0xf3;
413 	if (flags & EX86_PREF_66)
414 		*inst++ = 0x66;
415 
416 	buf_ptr = inst + size;
417 
418 	/* Encode mod/rm byte. */
419 	if (!(flags & EX86_SHIFT_INS)) {
420 		if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
421 			*inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
422 
423 		if ((a & SLJIT_IMM) || (a == 0))
424 			*buf_ptr = 0;
425 		else if (!(flags & EX86_SSE2_OP1))
426 			*buf_ptr = reg_map[a] << 3;
427 		else
428 			*buf_ptr = a << 3;
429 	}
430 	else {
431 		if (a & SLJIT_IMM) {
432 			if (imma == 1)
433 				*inst = GROUP_SHIFT_1;
434 			else
435 				*inst = GROUP_SHIFT_N;
436 		} else
437 			*inst = GROUP_SHIFT_CL;
438 		*buf_ptr = 0;
439 	}
440 
441 	if (!(b & SLJIT_MEM))
442 		*buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_map[b] : b);
443 	else if ((b & REG_MASK) != SLJIT_UNUSED) {
444 		if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
445 			if (immb != 0) {
446 				if (immb <= 127 && immb >= -128)
447 					*buf_ptr |= 0x40;
448 				else
449 					*buf_ptr |= 0x80;
450 			}
451 
452 			if ((b & OFFS_REG_MASK) == SLJIT_UNUSED)
453 				*buf_ptr++ |= reg_map[b & REG_MASK];
454 			else {
455 				*buf_ptr++ |= 0x04;
456 				*buf_ptr++ = reg_map[b & REG_MASK] | (reg_map[OFFS_REG(b)] << 3);
457 			}
458 
459 			if (immb != 0) {
460 				if (immb <= 127 && immb >= -128)
461 					*buf_ptr++ = immb; /* 8 bit displacement. */
462 				else {
463 					sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
464 					buf_ptr += sizeof(sljit_sw);
465 				}
466 			}
467 		}
468 		else {
469 			*buf_ptr++ |= 0x04;
470 			*buf_ptr++ = reg_map[b & REG_MASK] | (reg_map[OFFS_REG(b)] << 3) | (immb << 6);
471 		}
472 	}
473 	else {
474 		*buf_ptr++ |= 0x05;
475 		sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
476 		buf_ptr += sizeof(sljit_sw);
477 	}
478 
479 	if (a & SLJIT_IMM) {
480 		if (flags & EX86_BYTE_ARG)
481 			*buf_ptr = imma;
482 		else if (flags & EX86_HALF_ARG)
483 			sljit_unaligned_store_s16(buf_ptr, imma);
484 		else if (!(flags & EX86_SHIFT_INS))
485 			sljit_unaligned_store_sw(buf_ptr, imma);
486 	}
487 
488 	return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
489 }
490 
491 /* --------------------------------------------------------------------- */
492 /*  Call / return instructions                                           */
493 /* --------------------------------------------------------------------- */
494 
call_with_args(struct sljit_compiler * compiler,sljit_s32 type)495 static SLJIT_INLINE sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 type)
496 {
497 	sljit_u8 *inst;
498 
499 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
500 	inst = (sljit_u8*)ensure_buf(compiler, type >= SLJIT_CALL3 ? 1 + 2 + 1 : 1 + 2);
501 	FAIL_IF(!inst);
502 	INC_SIZE(type >= SLJIT_CALL3 ? 2 + 1 : 2);
503 
504 	if (type >= SLJIT_CALL3)
505 		PUSH_REG(reg_map[SLJIT_R2]);
506 	*inst++ = MOV_r_rm;
507 	*inst++ = MOD_REG | (reg_map[SLJIT_R2] << 3) | reg_map[SLJIT_R0];
508 #else
509 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 * (type - SLJIT_CALL0));
510 	FAIL_IF(!inst);
511 	INC_SIZE(4 * (type - SLJIT_CALL0));
512 
513 	*inst++ = MOV_rm_r;
514 	*inst++ = MOD_DISP8 | (reg_map[SLJIT_R0] << 3) | 0x4 /* SIB */;
515 	*inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP];
516 	*inst++ = 0;
517 	if (type >= SLJIT_CALL2) {
518 		*inst++ = MOV_rm_r;
519 		*inst++ = MOD_DISP8 | (reg_map[SLJIT_R1] << 3) | 0x4 /* SIB */;
520 		*inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP];
521 		*inst++ = sizeof(sljit_sw);
522 	}
523 	if (type >= SLJIT_CALL3) {
524 		*inst++ = MOV_rm_r;
525 		*inst++ = MOD_DISP8 | (reg_map[SLJIT_R2] << 3) | 0x4 /* SIB */;
526 		*inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP];
527 		*inst++ = 2 * sizeof(sljit_sw);
528 	}
529 #endif
530 	return SLJIT_SUCCESS;
531 }
532 
sljit_emit_fast_enter(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)533 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
534 {
535 	sljit_u8 *inst;
536 
537 	CHECK_ERROR();
538 	CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
539 	ADJUST_LOCAL_OFFSET(dst, dstw);
540 
541 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
542 
543 	/* For UNUSED dst. Uncommon, but possible. */
544 	if (dst == SLJIT_UNUSED)
545 		dst = TMP_REG1;
546 
547 	if (FAST_IS_REG(dst)) {
548 		/* Unused dest is possible here. */
549 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
550 		FAIL_IF(!inst);
551 
552 		INC_SIZE(1);
553 		POP_REG(reg_map[dst]);
554 		return SLJIT_SUCCESS;
555 	}
556 
557 	/* Memory. */
558 	inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
559 	FAIL_IF(!inst);
560 	*inst++ = POP_rm;
561 	return SLJIT_SUCCESS;
562 }
563 
sljit_emit_fast_return(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)564 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
565 {
566 	sljit_u8 *inst;
567 
568 	CHECK_ERROR();
569 	CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
570 	ADJUST_LOCAL_OFFSET(src, srcw);
571 
572 	CHECK_EXTRA_REGS(src, srcw, (void)0);
573 
574 	if (FAST_IS_REG(src)) {
575 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
576 		FAIL_IF(!inst);
577 
578 		INC_SIZE(1 + 1);
579 		PUSH_REG(reg_map[src]);
580 	}
581 	else if (src & SLJIT_MEM) {
582 		inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
583 		FAIL_IF(!inst);
584 		*inst++ = GROUP_FF;
585 		*inst |= PUSH_rm;
586 
587 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
588 		FAIL_IF(!inst);
589 		INC_SIZE(1);
590 	}
591 	else {
592 		/* SLJIT_IMM. */
593 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 5 + 1);
594 		FAIL_IF(!inst);
595 
596 		INC_SIZE(5 + 1);
597 		*inst++ = PUSH_i32;
598 		sljit_unaligned_store_sw(inst, srcw);
599 		inst += sizeof(sljit_sw);
600 	}
601 
602 	RET();
603 	return SLJIT_SUCCESS;
604 }
605