xref: /netbsd-src/sys/external/bsd/sljit/dist/sljit_src/sljitNativeX86_common.c (revision 96fc3e30a7c3f7bba53384bf41dad5f78306fac4)
1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
28 {
29 	return "x86" SLJIT_CPUINFO;
30 }
31 
32 /*
33    32b register indexes:
34      0 - EAX
35      1 - ECX
36      2 - EDX
37      3 - EBX
38      4 - none
39      5 - EBP
40      6 - ESI
41      7 - EDI
42 */
43 
44 /*
45    64b register indexes:
46      0 - RAX
47      1 - RCX
48      2 - RDX
49      3 - RBX
50      4 - none
51      5 - RBP
52      6 - RSI
53      7 - RDI
54      8 - R8   - From now on REX prefix is required
55      9 - R9
56     10 - R10
57     11 - R11
58     12 - R12
59     13 - R13
60     14 - R14
61     15 - R15
62 */
63 
64 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
65 
66 /* Last register + 1. */
67 #define TMP_REGISTER	(SLJIT_NO_REGISTERS + 1)
68 
69 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 2] = {
70 	0, 0, 2, 1, 0, 0, 3, 6, 7, 0, 0, 4, 5
71 };
72 
73 #define CHECK_EXTRA_REGS(p, w, do) \
74 	if (p >= SLJIT_TEMPORARY_EREG1 && p <= SLJIT_TEMPORARY_EREG2) { \
75 		w = compiler->temporaries_start + (p - SLJIT_TEMPORARY_EREG1) * sizeof(sljit_w); \
76 		p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
77 		do; \
78 	} \
79 	else if (p >= SLJIT_SAVED_EREG1 && p <= SLJIT_SAVED_EREG2) { \
80 		w = compiler->saveds_start + (p - SLJIT_SAVED_EREG1) * sizeof(sljit_w); \
81 		p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
82 		do; \
83 	}
84 
85 #else /* SLJIT_CONFIG_X86_32 */
86 
87 /* Last register + 1. */
88 #define TMP_REGISTER	(SLJIT_NO_REGISTERS + 1)
89 #define TMP_REG2	(SLJIT_NO_REGISTERS + 2)
90 #define TMP_REG3	(SLJIT_NO_REGISTERS + 3)
91 
92 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
93    Note: avoid to use r12 and r13 for memory addessing
94    therefore r12 is better for SAVED_EREG than SAVED_REG. */
95 #ifndef _WIN64
96 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
97 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
98 	0, 0, 6, 1, 8, 11, 3, 15, 14, 13, 12, 4, 2, 7, 9
99 };
100 /* low-map. reg_map & 0x7. */
101 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
102 	0, 0, 6, 1, 0, 3,  3, 7,  6,  5,  4,  4, 2, 7, 1
103 };
104 #else
105 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
106 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
107 	0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 15, 4, 10, 8, 9
108 };
109 /* low-map. reg_map & 0x7. */
110 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
111 	0, 0, 2, 1, 3,  5,  3, 6, 7,  6,  7, 4, 2,  0, 1
112 };
113 #endif
114 
115 #define REX_W		0x48
116 #define REX_R		0x44
117 #define REX_X		0x42
118 #define REX_B		0x41
119 #define REX		0x40
120 
121 typedef unsigned int sljit_uhw;
122 typedef int sljit_hw;
123 
124 #define IS_HALFWORD(x)		((x) <= 0x7fffffffll && (x) >= -0x80000000ll)
125 #define NOT_HALFWORD(x)		((x) > 0x7fffffffll || (x) < -0x80000000ll)
126 
127 #define CHECK_EXTRA_REGS(p, w, do)
128 
129 #endif /* SLJIT_CONFIG_X86_32 */
130 
131 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
132 #define TMP_FREG	(SLJIT_FLOAT_REG4 + 1)
133 #endif
134 
135 /* Size flags for emit_x86_instruction: */
136 #define EX86_BIN_INS		0x0010
137 #define EX86_SHIFT_INS		0x0020
138 #define EX86_REX		0x0040
139 #define EX86_NO_REXW		0x0080
140 #define EX86_BYTE_ARG		0x0100
141 #define EX86_HALF_ARG		0x0200
142 #define EX86_PREF_66		0x0400
143 
144 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
145 #define EX86_PREF_F2		0x0800
146 #define EX86_SSE2		0x1000
147 #endif
148 
149 #define INC_SIZE(s)			(*buf++ = (s), compiler->size += (s))
150 #define INC_CSIZE(s)			(*code++ = (s), compiler->size += (s))
151 
152 #define PUSH_REG(r)			(*buf++ = (0x50 + (r)))
153 #define POP_REG(r)			(*buf++ = (0x58 + (r)))
154 #define RET()				(*buf++ = (0xc3))
155 #define RETN(n)				(*buf++ = (0xc2), *buf++ = n, *buf++ = 0)
156 /* r32, r/m32 */
157 #define MOV_RM(mod, reg, rm)		(*buf++ = (0x8b), *buf++ = (mod) << 6 | (reg) << 3 | (rm))
158 
159 static sljit_ub get_jump_code(int type)
160 {
161 	switch (type) {
162 	case SLJIT_C_EQUAL:
163 	case SLJIT_C_FLOAT_EQUAL:
164 		return 0x84;
165 
166 	case SLJIT_C_NOT_EQUAL:
167 	case SLJIT_C_FLOAT_NOT_EQUAL:
168 		return 0x85;
169 
170 	case SLJIT_C_LESS:
171 	case SLJIT_C_FLOAT_LESS:
172 		return 0x82;
173 
174 	case SLJIT_C_GREATER_EQUAL:
175 	case SLJIT_C_FLOAT_GREATER_EQUAL:
176 		return 0x83;
177 
178 	case SLJIT_C_GREATER:
179 	case SLJIT_C_FLOAT_GREATER:
180 		return 0x87;
181 
182 	case SLJIT_C_LESS_EQUAL:
183 	case SLJIT_C_FLOAT_LESS_EQUAL:
184 		return 0x86;
185 
186 	case SLJIT_C_SIG_LESS:
187 		return 0x8c;
188 
189 	case SLJIT_C_SIG_GREATER_EQUAL:
190 		return 0x8d;
191 
192 	case SLJIT_C_SIG_GREATER:
193 		return 0x8f;
194 
195 	case SLJIT_C_SIG_LESS_EQUAL:
196 		return 0x8e;
197 
198 	case SLJIT_C_OVERFLOW:
199 	case SLJIT_C_MUL_OVERFLOW:
200 		return 0x80;
201 
202 	case SLJIT_C_NOT_OVERFLOW:
203 	case SLJIT_C_MUL_NOT_OVERFLOW:
204 		return 0x81;
205 
206 	case SLJIT_C_FLOAT_UNORDERED:
207 		return 0x8a;
208 
209 	case SLJIT_C_FLOAT_ORDERED:
210 		return 0x8b;
211 	}
212 	return 0;
213 }
214 
215 static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, int type);
216 
217 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
218 static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_w addr, int type);
219 #endif
220 
221 static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, int type)
222 {
223 	int short_jump;
224 	sljit_uw label_addr;
225 
226 	if (jump->flags & JUMP_LABEL)
227 		label_addr = (sljit_uw)(code + jump->u.label->size);
228 	else
229 		label_addr = jump->u.target;
230 	short_jump = (sljit_w)(label_addr - (jump->addr + 2)) >= -128 && (sljit_w)(label_addr - (jump->addr + 2)) <= 127;
231 
232 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
233 	if ((sljit_w)(label_addr - (jump->addr + 1)) > 0x7fffffffll || (sljit_w)(label_addr - (jump->addr + 1)) < -0x80000000ll)
234 		return generate_far_jump_code(jump, code_ptr, type);
235 #endif
236 
237 	if (type == SLJIT_JUMP) {
238 		if (short_jump)
239 			*code_ptr++ = 0xeb;
240 		else
241 			*code_ptr++ = 0xe9;
242 		jump->addr++;
243 	}
244 	else if (type >= SLJIT_FAST_CALL) {
245 		short_jump = 0;
246 		*code_ptr++ = 0xe8;
247 		jump->addr++;
248 	}
249 	else if (short_jump) {
250 		*code_ptr++ = get_jump_code(type) - 0x10;
251 		jump->addr++;
252 	}
253 	else {
254 		*code_ptr++ = 0x0f;
255 		*code_ptr++ = get_jump_code(type);
256 		jump->addr += 2;
257 	}
258 
259 	if (short_jump) {
260 		jump->flags |= PATCH_MB;
261 		code_ptr += sizeof(sljit_b);
262 	} else {
263 		jump->flags |= PATCH_MW;
264 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
265 		code_ptr += sizeof(sljit_w);
266 #else
267 		code_ptr += sizeof(sljit_hw);
268 #endif
269 	}
270 
271 	return code_ptr;
272 }
273 
274 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
275 {
276 	struct sljit_memory_fragment *buf;
277 	sljit_ub *code;
278 	sljit_ub *code_ptr;
279 	sljit_ub *buf_ptr;
280 	sljit_ub *buf_end;
281 	sljit_ub len;
282 
283 	struct sljit_label *label;
284 	struct sljit_jump *jump;
285 	struct sljit_const *const_;
286 
287 	CHECK_ERROR_PTR();
288 	check_sljit_generate_code(compiler);
289 	reverse_buf(compiler);
290 
291 	/* Second code generation pass. */
292 	code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size);
293 	PTR_FAIL_WITH_EXEC_IF(code);
294 	buf = compiler->buf;
295 
296 	code_ptr = code;
297 	label = compiler->labels;
298 	jump = compiler->jumps;
299 	const_ = compiler->consts;
300 	do {
301 		buf_ptr = buf->memory;
302 		buf_end = buf_ptr + buf->used_size;
303 		do {
304 			len = *buf_ptr++;
305 			if (len > 0) {
306 				/* The code is already generated. */
307 				SLJIT_MEMMOVE(code_ptr, buf_ptr, len);
308 				code_ptr += len;
309 				buf_ptr += len;
310 			}
311 			else {
312 				if (*buf_ptr >= 4) {
313 					jump->addr = (sljit_uw)code_ptr;
314 					if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
315 						code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
316 					else
317 						code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
318 					jump = jump->next;
319 				}
320 				else if (*buf_ptr == 0) {
321 					label->addr = (sljit_uw)code_ptr;
322 					label->size = code_ptr - code;
323 					label = label->next;
324 				}
325 				else if (*buf_ptr == 1) {
326 					const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_w);
327 					const_ = const_->next;
328 				}
329 				else {
330 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
331 					*code_ptr++ = (*buf_ptr == 2) ? 0xe8 /* call */ : 0xe9 /* jmp */;
332 					buf_ptr++;
333 					*(sljit_w*)code_ptr = *(sljit_w*)buf_ptr - ((sljit_w)code_ptr + sizeof(sljit_w));
334 					code_ptr += sizeof(sljit_w);
335 					buf_ptr += sizeof(sljit_w) - 1;
336 #else
337 					code_ptr = generate_fixed_jump(code_ptr, *(sljit_w*)(buf_ptr + 1), *buf_ptr);
338 					buf_ptr += sizeof(sljit_w);
339 #endif
340 				}
341 				buf_ptr++;
342 			}
343 		} while (buf_ptr < buf_end);
344 		SLJIT_ASSERT(buf_ptr == buf_end);
345 		buf = buf->next;
346 	} while (buf);
347 
348 	SLJIT_ASSERT(!label);
349 	SLJIT_ASSERT(!jump);
350 	SLJIT_ASSERT(!const_);
351 
352 	jump = compiler->jumps;
353 	while (jump) {
354 		if (jump->flags & PATCH_MB) {
355 			SLJIT_ASSERT((sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_b))) >= -128 && (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_b))) <= 127);
356 			*(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_b)));
357 		} else if (jump->flags & PATCH_MW) {
358 			if (jump->flags & JUMP_LABEL) {
359 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
360 				*(sljit_w*)jump->addr = (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_w)));
361 #else
362 				SLJIT_ASSERT((sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw))) >= -0x80000000ll && (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw))) <= 0x7fffffffll);
363 				*(sljit_hw*)jump->addr = (sljit_hw)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw)));
364 #endif
365 			}
366 			else {
367 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
368 				*(sljit_w*)jump->addr = (sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_w)));
369 #else
370 				SLJIT_ASSERT((sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_hw))) >= -0x80000000ll && (sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_hw))) <= 0x7fffffffll);
371 				*(sljit_hw*)jump->addr = (sljit_hw)(jump->u.target - (jump->addr + sizeof(sljit_hw)));
372 #endif
373 			}
374 		}
375 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
376 		else if (jump->flags & PATCH_MD)
377 			*(sljit_w*)jump->addr = jump->u.label->addr;
378 #endif
379 
380 		jump = jump->next;
381 	}
382 
383 	/* Maybe we waste some space because of short jumps. */
384 	SLJIT_ASSERT(code_ptr <= code + compiler->size);
385 	compiler->error = SLJIT_ERR_COMPILED;
386 	compiler->executable_size = compiler->size;
387 	return (void*)code;
388 }
389 
390 /* --------------------------------------------------------------------- */
391 /*  Operators                                                            */
392 /* --------------------------------------------------------------------- */
393 
394 static int emit_cum_binary(struct sljit_compiler *compiler,
395 	sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
396 	int dst, sljit_w dstw,
397 	int src1, sljit_w src1w,
398 	int src2, sljit_w src2w);
399 
400 static int emit_non_cum_binary(struct sljit_compiler *compiler,
401 	sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
402 	int dst, sljit_w dstw,
403 	int src1, sljit_w src1w,
404 	int src2, sljit_w src2w);
405 
406 static int emit_mov(struct sljit_compiler *compiler,
407 	int dst, sljit_w dstw,
408 	int src, sljit_w srcw);
409 
410 static SLJIT_INLINE int emit_save_flags(struct sljit_compiler *compiler)
411 {
412 	sljit_ub *buf;
413 
414 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
415 	buf = (sljit_ub*)ensure_buf(compiler, 1 + 5);
416 	FAIL_IF(!buf);
417 	INC_SIZE(5);
418 #else
419 	buf = (sljit_ub*)ensure_buf(compiler, 1 + 6);
420 	FAIL_IF(!buf);
421 	INC_SIZE(6);
422 	*buf++ = REX_W;
423 #endif
424 	*buf++ = 0x8d; /* lea esp/rsp, [esp/rsp + sizeof(sljit_w)] */
425 	*buf++ = 0x64;
426 	*buf++ = 0x24;
427 	*buf++ = (sljit_ub)sizeof(sljit_w);
428 	*buf++ = 0x9c; /* pushfd / pushfq */
429 	compiler->flags_saved = 1;
430 	return SLJIT_SUCCESS;
431 }
432 
433 static SLJIT_INLINE int emit_restore_flags(struct sljit_compiler *compiler, int keep_flags)
434 {
435 	sljit_ub *buf;
436 
437 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
438 	buf = (sljit_ub*)ensure_buf(compiler, 1 + 5);
439 	FAIL_IF(!buf);
440 	INC_SIZE(5);
441 	*buf++ = 0x9d; /* popfd */
442 #else
443 	buf = (sljit_ub*)ensure_buf(compiler, 1 + 6);
444 	FAIL_IF(!buf);
445 	INC_SIZE(6);
446 	*buf++ = 0x9d; /* popfq */
447 	*buf++ = REX_W;
448 #endif
449 	*buf++ = 0x8d; /* lea esp/rsp, [esp/rsp - sizeof(sljit_w)] */
450 	*buf++ = 0x64;
451 	*buf++ = 0x24;
452 	*buf++ = (sljit_ub)-(int)sizeof(sljit_w);
453 	compiler->flags_saved = keep_flags;
454 	return SLJIT_SUCCESS;
455 }
456 
457 #ifdef _WIN32
458 #include <malloc.h>
459 
460 static void SLJIT_CALL sljit_grow_stack(sljit_w local_size)
461 {
462 	/* Workaround for calling the internal _chkstk() function on Windows.
463 	This function touches all 4k pages belongs to the requested stack space,
464 	which size is passed in local_size. This is necessary on Windows where
465 	the stack can only grow in 4k steps. However, this function just burn
466 	CPU cycles if the stack is large enough, but you don't know it in advance.
467 	I think this is a bad design even if it has some reasons. */
468 	alloca(local_size);
469 }
470 
471 #endif
472 
473 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
474 #include "sljitNativeX86_32.c"
475 #else
476 #include "sljitNativeX86_64.c"
477 #endif
478 
479 static int emit_mov(struct sljit_compiler *compiler,
480 	int dst, sljit_w dstw,
481 	int src, sljit_w srcw)
482 {
483 	sljit_ub* code;
484 
485 	if (dst == SLJIT_UNUSED) {
486 		/* No destination, doesn't need to setup flags. */
487 		if (src & SLJIT_MEM) {
488 			code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw);
489 			FAIL_IF(!code);
490 			*code = 0x8b;
491 		}
492 		return SLJIT_SUCCESS;
493 	}
494 	if (src >= SLJIT_TEMPORARY_REG1 && src <= TMP_REGISTER) {
495 		code = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
496 		FAIL_IF(!code);
497 		*code = 0x89;
498 		return SLJIT_SUCCESS;
499 	}
500 	if (src & SLJIT_IMM) {
501 		if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
502 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
503 			return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
504 #else
505 			if (!compiler->mode32) {
506 				if (NOT_HALFWORD(srcw))
507 					return emit_load_imm64(compiler, dst, srcw);
508 			}
509 			else
510 				return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, 0xb8 + reg_lmap[dst], srcw);
511 #endif
512 		}
513 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
514 		if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
515 			FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
516 			code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
517 			FAIL_IF(!code);
518 			*code = 0x89;
519 			return SLJIT_SUCCESS;
520 		}
521 #endif
522 		code = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
523 		FAIL_IF(!code);
524 		*code = 0xc7;
525 		return SLJIT_SUCCESS;
526 	}
527 	if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
528 		code = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
529 		FAIL_IF(!code);
530 		*code = 0x8b;
531 		return SLJIT_SUCCESS;
532 	}
533 
534 	/* Memory to memory move. Requires two instruction. */
535 	code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw);
536 	FAIL_IF(!code);
537 	*code = 0x8b;
538 	code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
539 	FAIL_IF(!code);
540 	*code = 0x89;
541 	return SLJIT_SUCCESS;
542 }
543 
544 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
545 	FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
546 
547 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op)
548 {
549 	sljit_ub *buf;
550 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
551 	int size;
552 #endif
553 
554 	CHECK_ERROR();
555 	check_sljit_emit_op0(compiler, op);
556 
557 	switch (GET_OPCODE(op)) {
558 	case SLJIT_BREAKPOINT:
559 		buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
560 		FAIL_IF(!buf);
561 		INC_SIZE(1);
562 		*buf = 0xcc;
563 		break;
564 	case SLJIT_NOP:
565 		buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
566 		FAIL_IF(!buf);
567 		INC_SIZE(1);
568 		*buf = 0x90;
569 		break;
570 	case SLJIT_UMUL:
571 	case SLJIT_SMUL:
572 	case SLJIT_UDIV:
573 	case SLJIT_SDIV:
574 		compiler->flags_saved = 0;
575 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
576 #ifdef _WIN64
577 		SLJIT_COMPILE_ASSERT(
578 			reg_map[SLJIT_TEMPORARY_REG1] == 0
579 			&& reg_map[SLJIT_TEMPORARY_REG2] == 2
580 			&& reg_map[TMP_REGISTER] > 7,
581 			invalid_register_assignment_for_div_mul);
582 #else
583 		SLJIT_COMPILE_ASSERT(
584 			reg_map[SLJIT_TEMPORARY_REG1] == 0
585 			&& reg_map[SLJIT_TEMPORARY_REG2] < 7
586 			&& reg_map[TMP_REGISTER] == 2,
587 			invalid_register_assignment_for_div_mul);
588 #endif
589 		compiler->mode32 = op & SLJIT_INT_OP;
590 #endif
591 
592 		op = GET_OPCODE(op);
593 		if (op == SLJIT_UDIV) {
594 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
595 			EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0);
596 			buf = emit_x86_instruction(compiler, 1, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0);
597 #else
598 			buf = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
599 #endif
600 			FAIL_IF(!buf);
601 			*buf = 0x33;
602 		}
603 
604 		if (op == SLJIT_SDIV) {
605 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
606 			EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0);
607 #endif
608 
609 			/* CDQ instruction */
610 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
611 			buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
612 			FAIL_IF(!buf);
613 			INC_SIZE(1);
614 			*buf = 0x99;
615 #else
616 			if (compiler->mode32) {
617 				buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
618 				FAIL_IF(!buf);
619 				INC_SIZE(1);
620 				*buf = 0x99;
621 			} else {
622 				buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
623 				FAIL_IF(!buf);
624 				INC_SIZE(2);
625 				*buf++ = REX_W;
626 				*buf = 0x99;
627 			}
628 #endif
629 		}
630 
631 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
632 		buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
633 		FAIL_IF(!buf);
634 		INC_SIZE(2);
635 		*buf++ = 0xf7;
636 		*buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_map[TMP_REGISTER] : reg_map[SLJIT_TEMPORARY_REG2]);
637 #else
638 #ifdef _WIN64
639 		size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2;
640 #else
641 		size = (!compiler->mode32) ? 3 : 2;
642 #endif
643 		buf = (sljit_ub*)ensure_buf(compiler, 1 + size);
644 		FAIL_IF(!buf);
645 		INC_SIZE(size);
646 #ifdef _WIN64
647 		if (!compiler->mode32)
648 			*buf++ = REX_W | ((op >= SLJIT_UDIV) ? REX_B : 0);
649 		else if (op >= SLJIT_UDIV)
650 			*buf++ = REX_B;
651 		*buf++ = 0xf7;
652 		*buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REGISTER] : reg_lmap[SLJIT_TEMPORARY_REG2]);
653 #else
654 		if (!compiler->mode32)
655 			*buf++ = REX_W;
656 		*buf++ = 0xf7;
657 		*buf = 0xc0 | reg_map[SLJIT_TEMPORARY_REG2];
658 #endif
659 #endif
660 		switch (op) {
661 		case SLJIT_UMUL:
662 			*buf |= 4 << 3;
663 			break;
664 		case SLJIT_SMUL:
665 			*buf |= 5 << 3;
666 			break;
667 		case SLJIT_UDIV:
668 			*buf |= 6 << 3;
669 			break;
670 		case SLJIT_SDIV:
671 			*buf |= 7 << 3;
672 			break;
673 		}
674 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
675 		EMIT_MOV(compiler, SLJIT_TEMPORARY_REG2, 0, TMP_REGISTER, 0);
676 #endif
677 		break;
678 	}
679 
680 	return SLJIT_SUCCESS;
681 }
682 
683 #define ENCODE_PREFIX(prefix) \
684 	do { \
685 		code = (sljit_ub*)ensure_buf(compiler, 1 + 1); \
686 		FAIL_IF(!code); \
687 		INC_CSIZE(1); \
688 		*code = (prefix); \
689 	} while (0)
690 
691 static int emit_mov_byte(struct sljit_compiler *compiler, int sign,
692 	int dst, sljit_w dstw,
693 	int src, sljit_w srcw)
694 {
695 	sljit_ub* code;
696 	int dst_r;
697 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
698 	int work_r;
699 #endif
700 
701 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
702 	compiler->mode32 = 0;
703 #endif
704 
705 	if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
706 		return SLJIT_SUCCESS; /* Empty instruction. */
707 
708 	if (src & SLJIT_IMM) {
709 		if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
710 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
711 			return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
712 #else
713 			return emit_load_imm64(compiler, dst, srcw);
714 #endif
715 		}
716 		code = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
717 		FAIL_IF(!code);
718 		*code = 0xc6;
719 		return SLJIT_SUCCESS;
720 	}
721 
722 	dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
723 
724 	if ((dst & SLJIT_MEM) && src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS) {
725 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
726 		if (reg_map[src] >= 4) {
727 			SLJIT_ASSERT(dst_r == TMP_REGISTER);
728 			EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
729 		} else
730 			dst_r = src;
731 #else
732 		dst_r = src;
733 #endif
734 	}
735 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
736 	else if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS && reg_map[src] >= 4) {
737 		/* src, dst are registers. */
738 		SLJIT_ASSERT(dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER);
739 		if (reg_map[dst] < 4) {
740 			if (dst != src)
741 				EMIT_MOV(compiler, dst, 0, src, 0);
742 			code = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
743 			FAIL_IF(!code);
744 			*code++ = 0x0f;
745 			*code = sign ? 0xbe : 0xb6;
746 		}
747 		else {
748 			if (dst != src)
749 				EMIT_MOV(compiler, dst, 0, src, 0);
750 			if (sign) {
751 				/* shl reg, 24 */
752 				code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
753 				FAIL_IF(!code);
754 				*code |= 0x4 << 3;
755 				code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
756 				FAIL_IF(!code);
757 				/* shr/sar reg, 24 */
758 				*code |= 0x7 << 3;
759 			}
760 			else {
761 				/* and dst, 0xff */
762 				code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 255, dst, 0);
763 				FAIL_IF(!code);
764 				*(code + 1) |= 0x4 << 3;
765 			}
766 		}
767 		return SLJIT_SUCCESS;
768 	}
769 #endif
770 	else {
771 		/* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
772 		code = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
773 		FAIL_IF(!code);
774 		*code++ = 0x0f;
775 		*code = sign ? 0xbe : 0xb6;
776 	}
777 
778 	if (dst & SLJIT_MEM) {
779 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
780 		if (dst_r == TMP_REGISTER) {
781 			/* Find a non-used register, whose reg_map[src] < 4. */
782 			if ((dst & 0xf) == SLJIT_TEMPORARY_REG1) {
783 				if ((dst & 0xf0) == (SLJIT_TEMPORARY_REG2 << 4))
784 					work_r = SLJIT_TEMPORARY_REG3;
785 				else
786 					work_r = SLJIT_TEMPORARY_REG2;
787 			}
788 			else {
789 				if ((dst & 0xf0) != (SLJIT_TEMPORARY_REG1 << 4))
790 					work_r = SLJIT_TEMPORARY_REG1;
791 				else if ((dst & 0xf) == SLJIT_TEMPORARY_REG2)
792 					work_r = SLJIT_TEMPORARY_REG3;
793 				else
794 					work_r = SLJIT_TEMPORARY_REG2;
795 			}
796 
797 			if (work_r == SLJIT_TEMPORARY_REG1) {
798 				ENCODE_PREFIX(0x90 + reg_map[TMP_REGISTER]);
799 			}
800 			else {
801 				code = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
802 				FAIL_IF(!code);
803 				*code = 0x87;
804 			}
805 
806 			code = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
807 			FAIL_IF(!code);
808 			*code = 0x88;
809 
810 			if (work_r == SLJIT_TEMPORARY_REG1) {
811 				ENCODE_PREFIX(0x90 + reg_map[TMP_REGISTER]);
812 			}
813 			else {
814 				code = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
815 				FAIL_IF(!code);
816 				*code = 0x87;
817 			}
818 		}
819 		else {
820 			code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
821 			FAIL_IF(!code);
822 			*code = 0x88;
823 		}
824 #else
825 		code = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
826 		FAIL_IF(!code);
827 		*code = 0x88;
828 #endif
829 	}
830 
831 	return SLJIT_SUCCESS;
832 }
833 
834 static int emit_mov_half(struct sljit_compiler *compiler, int sign,
835 	int dst, sljit_w dstw,
836 	int src, sljit_w srcw)
837 {
838 	sljit_ub* code;
839 	int dst_r;
840 
841 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
842 	compiler->mode32 = 0;
843 #endif
844 
845 	if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
846 		return SLJIT_SUCCESS; /* Empty instruction. */
847 
848 	if (src & SLJIT_IMM) {
849 		if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
850 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
851 			return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
852 #else
853 			return emit_load_imm64(compiler, dst, srcw);
854 #endif
855 		}
856 		code = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
857 		FAIL_IF(!code);
858 		*code = 0xc7;
859 		return SLJIT_SUCCESS;
860 	}
861 
862 	dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
863 
864 	if ((dst & SLJIT_MEM) && (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS))
865 		dst_r = src;
866 	else {
867 		code = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
868 		FAIL_IF(!code);
869 		*code++ = 0x0f;
870 		*code = sign ? 0xbf : 0xb7;
871 	}
872 
873 	if (dst & SLJIT_MEM) {
874 		code = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
875 		FAIL_IF(!code);
876 		*code = 0x89;
877 	}
878 
879 	return SLJIT_SUCCESS;
880 }
881 
882 static int emit_unary(struct sljit_compiler *compiler, int un_index,
883 	int dst, sljit_w dstw,
884 	int src, sljit_w srcw)
885 {
886 	sljit_ub* code;
887 
888 	if (dst == SLJIT_UNUSED) {
889 		EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
890 		code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
891 		FAIL_IF(!code);
892 		*code++ = 0xf7;
893 		*code |= (un_index) << 3;
894 		return SLJIT_SUCCESS;
895 	}
896 	if (dst == src && dstw == srcw) {
897 		/* Same input and output */
898 		code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
899 		FAIL_IF(!code);
900 		*code++ = 0xf7;
901 		*code |= (un_index) << 3;
902 		return SLJIT_SUCCESS;
903 	}
904 	if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
905 		EMIT_MOV(compiler, dst, 0, src, srcw);
906 		code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
907 		FAIL_IF(!code);
908 		*code++ = 0xf7;
909 		*code |= (un_index) << 3;
910 		return SLJIT_SUCCESS;
911 	}
912 	EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
913 	code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
914 	FAIL_IF(!code);
915 	*code++ = 0xf7;
916 	*code |= (un_index) << 3;
917 	EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
918 	return SLJIT_SUCCESS;
919 }
920 
921 static int emit_not_with_flags(struct sljit_compiler *compiler,
922 	int dst, sljit_w dstw,
923 	int src, sljit_w srcw)
924 {
925 	sljit_ub* code;
926 
927 	if (dst == SLJIT_UNUSED) {
928 		EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
929 		code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
930 		FAIL_IF(!code);
931 		*code++ = 0xf7;
932 		*code |= 0x2 << 3;
933 		code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
934 		FAIL_IF(!code);
935 		*code = 0x0b;
936 		return SLJIT_SUCCESS;
937 	}
938 	if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
939 		EMIT_MOV(compiler, dst, 0, src, srcw);
940 		code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
941 		FAIL_IF(!code);
942 		*code++ = 0xf7;
943 		*code |= 0x2 << 3;
944 		code = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
945 		FAIL_IF(!code);
946 		*code = 0x0b;
947 		return SLJIT_SUCCESS;
948 	}
949 	EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
950 	code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
951 	FAIL_IF(!code);
952 	*code++ = 0xf7;
953 	*code |= 0x2 << 3;
954 	code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
955 	FAIL_IF(!code);
956 	*code = 0x0b;
957 	EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
958 	return SLJIT_SUCCESS;
959 }
960 
961 static int emit_clz(struct sljit_compiler *compiler, int op,
962 	int dst, sljit_w dstw,
963 	int src, sljit_w srcw)
964 {
965 	sljit_ub* code;
966 	int dst_r;
967 
968 	SLJIT_UNUSED_ARG(op);
969 	if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
970 		/* Just set the zero flag. */
971 		EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
972 		code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
973 		FAIL_IF(!code);
974 		*code++ = 0xf7;
975 		*code |= 0x2 << 3;
976 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
977 		code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REGISTER, 0);
978 #else
979 		code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 63 : 31, TMP_REGISTER, 0);
980 #endif
981 		FAIL_IF(!code);
982 		*code |= 0x5 << 3;
983 		return SLJIT_SUCCESS;
984 	}
985 
986 	if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
987 		EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
988 		src = TMP_REGISTER;
989 		srcw = 0;
990 	}
991 
992 	code = emit_x86_instruction(compiler, 2, TMP_REGISTER, 0, src, srcw);
993 	FAIL_IF(!code);
994 	*code++ = 0x0f;
995 	*code = 0xbd;
996 
997 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
998 	if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER)
999 		dst_r = dst;
1000 	else {
1001 		/* Find an unused temporary register. */
1002 		if ((dst & 0xf) != SLJIT_TEMPORARY_REG1 && (dst & 0xf0) != (SLJIT_TEMPORARY_REG1 << 4))
1003 			dst_r = SLJIT_TEMPORARY_REG1;
1004 		else if ((dst & 0xf) != SLJIT_TEMPORARY_REG2 && (dst & 0xf0) != (SLJIT_TEMPORARY_REG2 << 4))
1005 			dst_r = SLJIT_TEMPORARY_REG2;
1006 		else
1007 			dst_r = SLJIT_TEMPORARY_REG3;
1008 		EMIT_MOV(compiler, dst, dstw, dst_r, 0);
1009 	}
1010 	EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
1011 #else
1012 	dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REG2;
1013 	compiler->mode32 = 0;
1014 	EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 64 + 63 : 32 + 31);
1015 	compiler->mode32 = op & SLJIT_INT_OP;
1016 #endif
1017 
1018 	code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REGISTER, 0);
1019 	FAIL_IF(!code);
1020 	*code++ = 0x0f;
1021 	*code = 0x45;
1022 
1023 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1024 	code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1025 #else
1026 	code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 63 : 31, dst_r, 0);
1027 #endif
1028 	FAIL_IF(!code);
1029 	*(code + 1) |= 0x6 << 3;
1030 
1031 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1032 	if (dst & SLJIT_MEM) {
1033 		code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1034 		FAIL_IF(!code);
1035 		*code = 0x87;
1036 	}
1037 #else
1038 	if (dst & SLJIT_MEM)
1039 		EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
1040 #endif
1041 	return SLJIT_SUCCESS;
1042 }
1043 
1044 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct sljit_compiler *compiler, int op,
1045 	int dst, sljit_w dstw,
1046 	int src, sljit_w srcw)
1047 {
1048 	sljit_ub* code;
1049 	int update = 0;
1050 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1051 	int dst_is_ereg = 0;
1052 	int src_is_ereg = 0;
1053 #else
1054 	#define src_is_ereg 0
1055 #endif
1056 
1057 	CHECK_ERROR();
1058 	check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
1059 	ADJUST_LOCAL_OFFSET(dst, dstw);
1060 	ADJUST_LOCAL_OFFSET(src, srcw);
1061 
1062 	CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1063 	CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
1064 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1065 	compiler->mode32 = op & SLJIT_INT_OP;
1066 #endif
1067 
1068 	if (GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOVU_SI) {
1069 		op = GET_OPCODE(op);
1070 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1071 		compiler->mode32 = 0;
1072 #endif
1073 
1074 		SLJIT_COMPILE_ASSERT(SLJIT_MOV + 7 == SLJIT_MOVU, movu_offset);
1075 		if (op >= SLJIT_MOVU) {
1076 			update = 1;
1077 			op -= 7;
1078 		}
1079 
1080 		if (src & SLJIT_IMM) {
1081 			switch (op) {
1082 			case SLJIT_MOV_UB:
1083 				srcw = (unsigned char)srcw;
1084 				break;
1085 			case SLJIT_MOV_SB:
1086 				srcw = (signed char)srcw;
1087 				break;
1088 			case SLJIT_MOV_UH:
1089 				srcw = (unsigned short)srcw;
1090 				break;
1091 			case SLJIT_MOV_SH:
1092 				srcw = (signed short)srcw;
1093 				break;
1094 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1095 			case SLJIT_MOV_UI:
1096 				srcw = (unsigned int)srcw;
1097 				break;
1098 			case SLJIT_MOV_SI:
1099 				srcw = (signed int)srcw;
1100 				break;
1101 #endif
1102 			}
1103 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1104 			if (SLJIT_UNLIKELY(dst_is_ereg))
1105 				return emit_mov(compiler, dst, dstw, src, srcw);
1106 #endif
1107 		}
1108 
1109 		if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & 0xf) && (srcw != 0 || (src & 0xf0) != 0)) {
1110 			code = emit_x86_instruction(compiler, 1, src & 0xf, 0, src, srcw);
1111 			FAIL_IF(!code);
1112 			*code = 0x8d;
1113 			src &= SLJIT_MEM | 0xf;
1114 			srcw = 0;
1115 		}
1116 
1117 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1118 		if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI) || (src & SLJIT_MEM))) {
1119 			SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_LOCALS_REG));
1120 			dst = TMP_REGISTER;
1121 		}
1122 #endif
1123 
1124 		switch (op) {
1125 		case SLJIT_MOV:
1126 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1127 		case SLJIT_MOV_UI:
1128 		case SLJIT_MOV_SI:
1129 #endif
1130 			FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1131 			break;
1132 		case SLJIT_MOV_UB:
1133 			FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw));
1134 			break;
1135 		case SLJIT_MOV_SB:
1136 			FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw));
1137 			break;
1138 		case SLJIT_MOV_UH:
1139 			FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw));
1140 			break;
1141 		case SLJIT_MOV_SH:
1142 			FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw));
1143 			break;
1144 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1145 		case SLJIT_MOV_UI:
1146 			FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned int)srcw : srcw));
1147 			break;
1148 		case SLJIT_MOV_SI:
1149 			FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed int)srcw : srcw));
1150 			break;
1151 #endif
1152 		}
1153 
1154 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1155 		if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REGISTER)
1156 			return emit_mov(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), dstw, TMP_REGISTER, 0);
1157 #endif
1158 
1159 		if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & 0xf) && (dstw != 0 || (dst & 0xf0) != 0)) {
1160 			code = emit_x86_instruction(compiler, 1, dst & 0xf, 0, dst, dstw);
1161 			FAIL_IF(!code);
1162 			*code = 0x8d;
1163 		}
1164 		return SLJIT_SUCCESS;
1165 	}
1166 
1167 	if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1168 		compiler->flags_saved = 0;
1169 
1170 	switch (GET_OPCODE(op)) {
1171 	case SLJIT_NOT:
1172 		if (SLJIT_UNLIKELY(op & SLJIT_SET_E))
1173 			return emit_not_with_flags(compiler, dst, dstw, src, srcw);
1174 		return emit_unary(compiler, 0x2, dst, dstw, src, srcw);
1175 
1176 	case SLJIT_NEG:
1177 		if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1178 			FAIL_IF(emit_save_flags(compiler));
1179 		return emit_unary(compiler, 0x3, dst, dstw, src, srcw);
1180 
1181 	case SLJIT_CLZ:
1182 		if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1183 			FAIL_IF(emit_save_flags(compiler));
1184 		return emit_clz(compiler, op, dst, dstw, src, srcw);
1185 	}
1186 
1187 	return SLJIT_SUCCESS;
1188 
1189 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1190 	#undef src_is_ereg
1191 #endif
1192 }
1193 
1194 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1195 
1196 #define BINARY_IMM(_op_imm_, _op_mr_, immw, arg, argw) \
1197 	if (IS_HALFWORD(immw) || compiler->mode32) { \
1198 		code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1199 		FAIL_IF(!code); \
1200 		*(code + 1) |= (_op_imm_); \
1201 	} \
1202 	else { \
1203 		FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
1204 		code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
1205 		FAIL_IF(!code); \
1206 		*code = (_op_mr_); \
1207 	}
1208 
1209 #define BINARY_EAX_IMM(_op_eax_imm_, immw) \
1210 	FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (_op_eax_imm_), immw))
1211 
1212 #else
1213 
1214 #define BINARY_IMM(_op_imm_, _op_mr_, immw, arg, argw) \
1215 	code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1216 	FAIL_IF(!code); \
1217 	*(code + 1) |= (_op_imm_);
1218 
1219 #define BINARY_EAX_IMM(_op_eax_imm_, immw) \
1220 	FAIL_IF(emit_do_imm(compiler, (_op_eax_imm_), immw))
1221 
1222 #endif
1223 
1224 static int emit_cum_binary(struct sljit_compiler *compiler,
1225 	sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1226 	int dst, sljit_w dstw,
1227 	int src1, sljit_w src1w,
1228 	int src2, sljit_w src2w)
1229 {
1230 	sljit_ub* code;
1231 
1232 	if (dst == SLJIT_UNUSED) {
1233 		EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1234 		if (src2 & SLJIT_IMM) {
1235 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1236 		}
1237 		else {
1238 			code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1239 			FAIL_IF(!code);
1240 			*code = op_rm;
1241 		}
1242 		return SLJIT_SUCCESS;
1243 	}
1244 
1245 	if (dst == src1 && dstw == src1w) {
1246 		if (src2 & SLJIT_IMM) {
1247 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1248 			if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1249 #else
1250 			if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128)) {
1251 #endif
1252 				BINARY_EAX_IMM(op_eax_imm, src2w);
1253 			}
1254 			else {
1255 				BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1256 			}
1257 		}
1258 		else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1259 			code = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1260 			FAIL_IF(!code);
1261 			*code = op_rm;
1262 		}
1263 		else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= TMP_REGISTER) {
1264 			/* Special exception for sljit_emit_cond_value. */
1265 			code = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1266 			FAIL_IF(!code);
1267 			*code = op_mr;
1268 		}
1269 		else {
1270 			EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w);
1271 			code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
1272 			FAIL_IF(!code);
1273 			*code = op_mr;
1274 		}
1275 		return SLJIT_SUCCESS;
1276 	}
1277 
1278 	/* Only for cumulative operations. */
1279 	if (dst == src2 && dstw == src2w) {
1280 		if (src1 & SLJIT_IMM) {
1281 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1282 			if ((dst == SLJIT_TEMPORARY_REG1) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1283 #else
1284 			if ((dst == SLJIT_TEMPORARY_REG1) && (src1w > 127 || src1w < -128)) {
1285 #endif
1286 				BINARY_EAX_IMM(op_eax_imm, src1w);
1287 			}
1288 			else {
1289 				BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1290 			}
1291 		}
1292 		else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1293 			code = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1294 			FAIL_IF(!code);
1295 			*code = op_rm;
1296 		}
1297 		else if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1298 			code = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1299 			FAIL_IF(!code);
1300 			*code = op_mr;
1301 		}
1302 		else {
1303 			EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1304 			code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
1305 			FAIL_IF(!code);
1306 			*code = op_mr;
1307 		}
1308 		return SLJIT_SUCCESS;
1309 	}
1310 
1311 	/* General version. */
1312 	if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1313 		EMIT_MOV(compiler, dst, 0, src1, src1w);
1314 		if (src2 & SLJIT_IMM) {
1315 			BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1316 		}
1317 		else {
1318 			code = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1319 			FAIL_IF(!code);
1320 			*code = op_rm;
1321 		}
1322 	}
1323 	else {
1324 		/* This version requires less memory writing. */
1325 		EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1326 		if (src2 & SLJIT_IMM) {
1327 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1328 		}
1329 		else {
1330 			code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1331 			FAIL_IF(!code);
1332 			*code = op_rm;
1333 		}
1334 		EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1335 	}
1336 
1337 	return SLJIT_SUCCESS;
1338 }
1339 
1340 static int emit_non_cum_binary(struct sljit_compiler *compiler,
1341 	sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1342 	int dst, sljit_w dstw,
1343 	int src1, sljit_w src1w,
1344 	int src2, sljit_w src2w)
1345 {
1346 	sljit_ub* code;
1347 
1348 	if (dst == SLJIT_UNUSED) {
1349 		EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1350 		if (src2 & SLJIT_IMM) {
1351 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1352 		}
1353 		else {
1354 			code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1355 			FAIL_IF(!code);
1356 			*code = op_rm;
1357 		}
1358 		return SLJIT_SUCCESS;
1359 	}
1360 
1361 	if (dst == src1 && dstw == src1w) {
1362 		if (src2 & SLJIT_IMM) {
1363 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1364 			if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1365 #else
1366 			if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128)) {
1367 #endif
1368 				BINARY_EAX_IMM(op_eax_imm, src2w);
1369 			}
1370 			else {
1371 				BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1372 			}
1373 		}
1374 		else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1375 			code = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1376 			FAIL_IF(!code);
1377 			*code = op_rm;
1378 		}
1379 		else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1380 			code = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1381 			FAIL_IF(!code);
1382 			*code = op_mr;
1383 		}
1384 		else {
1385 			EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w);
1386 			code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
1387 			FAIL_IF(!code);
1388 			*code = op_mr;
1389 		}
1390 		return SLJIT_SUCCESS;
1391 	}
1392 
1393 	/* General version. */
1394 	if ((dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) && dst != src2) {
1395 		EMIT_MOV(compiler, dst, 0, src1, src1w);
1396 		if (src2 & SLJIT_IMM) {
1397 			BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1398 		}
1399 		else {
1400 			code = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1401 			FAIL_IF(!code);
1402 			*code = op_rm;
1403 		}
1404 	}
1405 	else {
1406 		/* This version requires less memory writing. */
1407 		EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1408 		if (src2 & SLJIT_IMM) {
1409 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1410 		}
1411 		else {
1412 			code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1413 			FAIL_IF(!code);
1414 			*code = op_rm;
1415 		}
1416 		EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1417 	}
1418 
1419 	return SLJIT_SUCCESS;
1420 }
1421 
1422 static int emit_mul(struct sljit_compiler *compiler,
1423 	int dst, sljit_w dstw,
1424 	int src1, sljit_w src1w,
1425 	int src2, sljit_w src2w)
1426 {
1427 	sljit_ub* code;
1428 	int dst_r;
1429 
1430 	dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
1431 
1432 	/* Register destination. */
1433 	if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
1434 		code = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1435 		FAIL_IF(!code);
1436 		*code++ = 0x0f;
1437 		*code = 0xaf;
1438 	}
1439 	else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
1440 		code = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
1441 		FAIL_IF(!code);
1442 		*code++ = 0x0f;
1443 		*code = 0xaf;
1444 	}
1445 	else if (src1 & SLJIT_IMM) {
1446 		if (src2 & SLJIT_IMM) {
1447 			EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
1448 			src2 = dst_r;
1449 			src2w = 0;
1450 		}
1451 
1452 		if (src1w <= 127 && src1w >= -128) {
1453 			code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1454 			FAIL_IF(!code);
1455 			*code = 0x6b;
1456 			code = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1457 			FAIL_IF(!code);
1458 			INC_CSIZE(1);
1459 			*code = (sljit_b)src1w;
1460 		}
1461 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1462 		else {
1463 			code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1464 			FAIL_IF(!code);
1465 			*code = 0x69;
1466 			code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1467 			FAIL_IF(!code);
1468 			INC_CSIZE(4);
1469 			*(sljit_w*)code = src1w;
1470 		}
1471 #else
1472 		else if (IS_HALFWORD(src1w)) {
1473 			code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1474 			FAIL_IF(!code);
1475 			*code = 0x69;
1476 			code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1477 			FAIL_IF(!code);
1478 			INC_CSIZE(4);
1479 			*(sljit_hw*)code = (sljit_hw)src1w;
1480 		}
1481 		else {
1482 			EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1483 			if (dst_r != src2)
1484 				EMIT_MOV(compiler, dst_r, 0, src2, src2w);
1485 			code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1486 			FAIL_IF(!code);
1487 			*code++ = 0x0f;
1488 			*code = 0xaf;
1489 		}
1490 #endif
1491 	}
1492 	else if (src2 & SLJIT_IMM) {
1493 		/* Note: src1 is NOT immediate. */
1494 
1495 		if (src2w <= 127 && src2w >= -128) {
1496 			code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1497 			FAIL_IF(!code);
1498 			*code = 0x6b;
1499 			code = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1500 			FAIL_IF(!code);
1501 			INC_CSIZE(1);
1502 			*code = (sljit_b)src2w;
1503 		}
1504 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1505 		else {
1506 			code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1507 			FAIL_IF(!code);
1508 			*code = 0x69;
1509 			code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1510 			FAIL_IF(!code);
1511 			INC_CSIZE(4);
1512 			*(sljit_w*)code = src2w;
1513 		}
1514 #else
1515 		else if (IS_HALFWORD(src2w)) {
1516 			code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1517 			FAIL_IF(!code);
1518 			*code = 0x69;
1519 			code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1520 			FAIL_IF(!code);
1521 			INC_CSIZE(4);
1522 			*(sljit_hw*)code = (sljit_hw)src2w;
1523 		}
1524 		else {
1525 			EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1526 			if (dst_r != src1)
1527 				EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1528 			code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1529 			FAIL_IF(!code);
1530 			*code++ = 0x0f;
1531 			*code = 0xaf;
1532 		}
1533 #endif
1534 	}
1535 	else {
1536 		/* Neither argument is immediate. */
1537 		if (ADDRESSING_DEPENDS_ON(src2, dst_r))
1538 			dst_r = TMP_REGISTER;
1539 		EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1540 		code = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1541 		FAIL_IF(!code);
1542 		*code++ = 0x0f;
1543 		*code = 0xaf;
1544 	}
1545 
1546 	if (dst_r == TMP_REGISTER)
1547 		EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1548 
1549 	return SLJIT_SUCCESS;
1550 }
1551 
1552 static int emit_lea_binary(struct sljit_compiler *compiler,
1553 	int dst, sljit_w dstw,
1554 	int src1, sljit_w src1w,
1555 	int src2, sljit_w src2w)
1556 {
1557 	sljit_ub* code;
1558 	int dst_r, done = 0;
1559 
1560 	/* These cases better be left to handled by normal way. */
1561 	if (dst == src1 && dstw == src1w)
1562 		return SLJIT_ERR_UNSUPPORTED;
1563 	if (dst == src2 && dstw == src2w)
1564 		return SLJIT_ERR_UNSUPPORTED;
1565 
1566 	dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
1567 
1568 	if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1569 		if ((src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) || src2 == TMP_REGISTER) {
1570 			code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
1571 			FAIL_IF(!code);
1572 			*code = 0x8d;
1573 			done = 1;
1574 		}
1575 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1576 		if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1577 			code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (int)src2w);
1578 #else
1579 		if (src2 & SLJIT_IMM) {
1580 			code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
1581 #endif
1582 			FAIL_IF(!code);
1583 			*code = 0x8d;
1584 			done = 1;
1585 		}
1586 	}
1587 	else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1588 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1589 		if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1590 			code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (int)src1w);
1591 #else
1592 		if (src1 & SLJIT_IMM) {
1593 			code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
1594 #endif
1595 			FAIL_IF(!code);
1596 			*code = 0x8d;
1597 			done = 1;
1598 		}
1599 	}
1600 
1601 	if (done) {
1602 		if (dst_r == TMP_REGISTER)
1603 			return emit_mov(compiler, dst, dstw, TMP_REGISTER, 0);
1604 		return SLJIT_SUCCESS;
1605 	}
1606 	return SLJIT_ERR_UNSUPPORTED;
1607 }
1608 
1609 static int emit_cmp_binary(struct sljit_compiler *compiler,
1610 	int src1, sljit_w src1w,
1611 	int src2, sljit_w src2w)
1612 {
1613 	sljit_ub* code;
1614 
1615 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1616 	if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1617 #else
1618 	if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1619 #endif
1620 		BINARY_EAX_IMM(0x3d, src2w);
1621 		return SLJIT_SUCCESS;
1622 	}
1623 
1624 	if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1625 		if (src2 & SLJIT_IMM) {
1626 			BINARY_IMM(0x7 << 3, 0x39, src2w, src1, 0);
1627 		}
1628 		else {
1629 			code = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1630 			FAIL_IF(!code);
1631 			*code = 0x3b;
1632 		}
1633 		return SLJIT_SUCCESS;
1634 	}
1635 
1636 	if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS && !(src1 & SLJIT_IMM)) {
1637 		code = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1638 		FAIL_IF(!code);
1639 		*code = 0x39;
1640 		return SLJIT_SUCCESS;
1641 	}
1642 
1643 	if (src2 & SLJIT_IMM) {
1644 		if (src1 & SLJIT_IMM) {
1645 			EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1646 			src1 = TMP_REGISTER;
1647 			src1w = 0;
1648 		}
1649 		BINARY_IMM(0x7 << 3, 0x39, src2w, src1, src1w);
1650 	}
1651 	else {
1652 		EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1653 		code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1654 		FAIL_IF(!code);
1655 		*code = 0x3b;
1656 	}
1657 	return SLJIT_SUCCESS;
1658 }
1659 
1660 static int emit_test_binary(struct sljit_compiler *compiler,
1661 	int src1, sljit_w src1w,
1662 	int src2, sljit_w src2w)
1663 {
1664 	sljit_ub* code;
1665 
1666 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1667 	if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1668 #else
1669 	if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1670 #endif
1671 		BINARY_EAX_IMM(0xa9, src2w);
1672 		return SLJIT_SUCCESS;
1673 	}
1674 
1675 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1676 	if (src2 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1677 #else
1678 	if (src2 == SLJIT_TEMPORARY_REG1 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
1679 #endif
1680 		BINARY_EAX_IMM(0xa9, src1w);
1681 		return SLJIT_SUCCESS;
1682 	}
1683 
1684 	if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1685 		if (src2 & SLJIT_IMM) {
1686 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1687 			if (IS_HALFWORD(src2w) || compiler->mode32) {
1688 				code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
1689 				FAIL_IF(!code);
1690 				*code = 0xf7;
1691 			}
1692 			else {
1693 				FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1694 				code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0);
1695 				FAIL_IF(!code);
1696 				*code = 0x85;
1697 			}
1698 #else
1699 			code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
1700 			FAIL_IF(!code);
1701 			*code = 0xf7;
1702 #endif
1703 		}
1704 		else {
1705 			code = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1706 			FAIL_IF(!code);
1707 			*code = 0x85;
1708 		}
1709 		return SLJIT_SUCCESS;
1710 	}
1711 
1712 	if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1713 		if (src1 & SLJIT_IMM) {
1714 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1715 			if (IS_HALFWORD(src1w) || compiler->mode32) {
1716 				code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0);
1717 				FAIL_IF(!code);
1718 				*code = 0xf7;
1719 			}
1720 			else {
1721 				FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
1722 				code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0);
1723 				FAIL_IF(!code);
1724 				*code = 0x85;
1725 			}
1726 #else
1727 			code = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0);
1728 			FAIL_IF(!code);
1729 			*code = 0xf7;
1730 #endif
1731 		}
1732 		else {
1733 			code = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1734 			FAIL_IF(!code);
1735 			*code = 0x85;
1736 		}
1737 		return SLJIT_SUCCESS;
1738 	}
1739 
1740 	EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1741 	if (src2 & SLJIT_IMM) {
1742 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1743 		if (IS_HALFWORD(src2w) || compiler->mode32) {
1744 			code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0);
1745 			FAIL_IF(!code);
1746 			*code = 0xf7;
1747 		}
1748 		else {
1749 			FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1750 			code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REGISTER, 0);
1751 			FAIL_IF(!code);
1752 			*code = 0x85;
1753 		}
1754 #else
1755 		code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0);
1756 		FAIL_IF(!code);
1757 		*code = 0xf7;
1758 #endif
1759 	}
1760 	else {
1761 		code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1762 		FAIL_IF(!code);
1763 		*code = 0x85;
1764 	}
1765 	return SLJIT_SUCCESS;
1766 }
1767 
1768 static int emit_shift(struct sljit_compiler *compiler,
1769 	sljit_ub mode,
1770 	int dst, sljit_w dstw,
1771 	int src1, sljit_w src1w,
1772 	int src2, sljit_w src2w)
1773 {
1774 	sljit_ub* code;
1775 
1776 	if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
1777 		if (dst == src1 && dstw == src1w) {
1778 			code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
1779 			FAIL_IF(!code);
1780 			*code |= mode;
1781 			return SLJIT_SUCCESS;
1782 		}
1783 		if (dst == SLJIT_UNUSED) {
1784 			EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1785 			code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0);
1786 			FAIL_IF(!code);
1787 			*code |= mode;
1788 			return SLJIT_SUCCESS;
1789 		}
1790 		if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
1791 			EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1792 			code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1793 			FAIL_IF(!code);
1794 			*code |= mode;
1795 			EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1796 			return SLJIT_SUCCESS;
1797 		}
1798 		if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1799 			EMIT_MOV(compiler, dst, 0, src1, src1w);
1800 			code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
1801 			FAIL_IF(!code);
1802 			*code |= mode;
1803 			return SLJIT_SUCCESS;
1804 		}
1805 
1806 		EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1807 		code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0);
1808 		FAIL_IF(!code);
1809 		*code |= mode;
1810 		EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1811 		return SLJIT_SUCCESS;
1812 	}
1813 
1814 	if (dst == SLJIT_PREF_SHIFT_REG) {
1815 		EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1816 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
1817 		code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1818 		FAIL_IF(!code);
1819 		*code |= mode;
1820 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1821 	}
1822 	else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
1823 		if (src1 != dst)
1824 			EMIT_MOV(compiler, dst, 0, src1, src1w);
1825 		EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_PREF_SHIFT_REG, 0);
1826 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
1827 		code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
1828 		FAIL_IF(!code);
1829 		*code |= mode;
1830 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1831 	}
1832 	else {
1833 		/* This case is really difficult, since ecx itself may used for
1834 		   addressing, and we must ensure to work even in that case. */
1835 		EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1836 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1837 		EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
1838 #else
1839 		/* [esp+0] contains the flags. */
1840 		EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_w), SLJIT_PREF_SHIFT_REG, 0);
1841 #endif
1842 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
1843 		code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1844 		FAIL_IF(!code);
1845 		*code |= mode;
1846 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1847 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
1848 #else
1849 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_w));
1850 #endif
1851 		EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1852 	}
1853 
1854 	return SLJIT_SUCCESS;
1855 }
1856 
1857 static int emit_shift_with_flags(struct sljit_compiler *compiler,
1858 	sljit_ub mode, int set_flags,
1859 	int dst, sljit_w dstw,
1860 	int src1, sljit_w src1w,
1861 	int src2, sljit_w src2w)
1862 {
1863 	/* The CPU does not set flags if the shift count is 0. */
1864 	if (src2 & SLJIT_IMM) {
1865 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1866 		if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
1867 			return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
1868 #else
1869 		if ((src2w & 0x1f) != 0)
1870 			return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
1871 #endif
1872 		if (!set_flags)
1873 			return emit_mov(compiler, dst, dstw, src1, src1w);
1874 		/* OR dst, src, 0 */
1875 		return emit_cum_binary(compiler, 0x0b, 0x09, 0x1 << 3, 0x0d,
1876 			dst, dstw, src1, src1w, SLJIT_IMM, 0);
1877 	}
1878 
1879 	if (!set_flags)
1880 		return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
1881 
1882 	if (!(dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS))
1883 		FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
1884 
1885 	FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
1886 
1887 	if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS)
1888 		return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
1889 	return SLJIT_SUCCESS;
1890 }
1891 
1892 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int op,
1893 	int dst, sljit_w dstw,
1894 	int src1, sljit_w src1w,
1895 	int src2, sljit_w src2w)
1896 {
1897 	CHECK_ERROR();
1898 	check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
1899 	ADJUST_LOCAL_OFFSET(dst, dstw);
1900 	ADJUST_LOCAL_OFFSET(src1, src1w);
1901 	ADJUST_LOCAL_OFFSET(src2, src2w);
1902 
1903 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
1904 	CHECK_EXTRA_REGS(src1, src1w, (void)0);
1905 	CHECK_EXTRA_REGS(src2, src2w, (void)0);
1906 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1907 	compiler->mode32 = op & SLJIT_INT_OP;
1908 #endif
1909 
1910 	if (GET_OPCODE(op) >= SLJIT_MUL) {
1911 		if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1912 			compiler->flags_saved = 0;
1913 		else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1914 			FAIL_IF(emit_save_flags(compiler));
1915 	}
1916 
1917 	switch (GET_OPCODE(op)) {
1918 	case SLJIT_ADD:
1919 		if (!GET_FLAGS(op)) {
1920 			if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
1921 				return compiler->error;
1922 		}
1923 		else
1924 			compiler->flags_saved = 0;
1925 		if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1926 			FAIL_IF(emit_save_flags(compiler));
1927 		return emit_cum_binary(compiler, 0x03, 0x01, 0x0 << 3, 0x05,
1928 			dst, dstw, src1, src1w, src2, src2w);
1929 	case SLJIT_ADDC:
1930 		if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
1931 			FAIL_IF(emit_restore_flags(compiler, 1));
1932 		else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
1933 			FAIL_IF(emit_save_flags(compiler));
1934 		if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1935 			compiler->flags_saved = 0;
1936 		return emit_cum_binary(compiler, 0x13, 0x11, 0x2 << 3, 0x15,
1937 			dst, dstw, src1, src1w, src2, src2w);
1938 	case SLJIT_SUB:
1939 		if (!GET_FLAGS(op)) {
1940 			if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
1941 				return compiler->error;
1942 		}
1943 		else
1944 			compiler->flags_saved = 0;
1945 		if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1946 			FAIL_IF(emit_save_flags(compiler));
1947 		if (dst == SLJIT_UNUSED)
1948 			return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
1949 		return emit_non_cum_binary(compiler, 0x2b, 0x29, 0x5 << 3, 0x2d,
1950 			dst, dstw, src1, src1w, src2, src2w);
1951 	case SLJIT_SUBC:
1952 		if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
1953 			FAIL_IF(emit_restore_flags(compiler, 1));
1954 		else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
1955 			FAIL_IF(emit_save_flags(compiler));
1956 		if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1957 			compiler->flags_saved = 0;
1958 		return emit_non_cum_binary(compiler, 0x1b, 0x19, 0x3 << 3, 0x1d,
1959 			dst, dstw, src1, src1w, src2, src2w);
1960 	case SLJIT_MUL:
1961 		return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
1962 	case SLJIT_AND:
1963 		if (dst == SLJIT_UNUSED)
1964 			return emit_test_binary(compiler, src1, src1w, src2, src2w);
1965 		return emit_cum_binary(compiler, 0x23, 0x21, 0x4 << 3, 0x25,
1966 			dst, dstw, src1, src1w, src2, src2w);
1967 	case SLJIT_OR:
1968 		return emit_cum_binary(compiler, 0x0b, 0x09, 0x1 << 3, 0x0d,
1969 			dst, dstw, src1, src1w, src2, src2w);
1970 	case SLJIT_XOR:
1971 		return emit_cum_binary(compiler, 0x33, 0x31, 0x6 << 3, 0x35,
1972 			dst, dstw, src1, src1w, src2, src2w);
1973 	case SLJIT_SHL:
1974 		return emit_shift_with_flags(compiler, 0x4 << 3, GET_FLAGS(op),
1975 			dst, dstw, src1, src1w, src2, src2w);
1976 	case SLJIT_LSHR:
1977 		return emit_shift_with_flags(compiler, 0x5 << 3, GET_FLAGS(op),
1978 			dst, dstw, src1, src1w, src2, src2w);
1979 	case SLJIT_ASHR:
1980 		return emit_shift_with_flags(compiler, 0x7 << 3, GET_FLAGS(op),
1981 			dst, dstw, src1, src1w, src2, src2w);
1982 	}
1983 
1984 	return SLJIT_SUCCESS;
1985 }
1986 
1987 SLJIT_API_FUNC_ATTRIBUTE int sljit_get_register_index(int reg)
1988 {
1989 	check_sljit_get_register_index(reg);
1990 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1991 	if (reg == SLJIT_TEMPORARY_EREG1 || reg == SLJIT_TEMPORARY_EREG2
1992 			|| reg == SLJIT_SAVED_EREG1 || reg == SLJIT_SAVED_EREG2)
1993 		return -1;
1994 #endif
1995 	return reg_map[reg];
1996 }
1997 
1998 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op_custom(struct sljit_compiler *compiler,
1999 	void *instruction, int size)
2000 {
2001 	sljit_ub *buf;
2002 
2003 	CHECK_ERROR();
2004 	check_sljit_emit_op_custom(compiler, instruction, size);
2005 	SLJIT_ASSERT(size > 0 && size < 16);
2006 
2007 	buf = (sljit_ub*)ensure_buf(compiler, 1 + size);
2008 	FAIL_IF(!buf);
2009 	INC_SIZE(size);
2010 	SLJIT_MEMMOVE(buf, instruction, size);
2011 	return SLJIT_SUCCESS;
2012 }
2013 
2014 /* --------------------------------------------------------------------- */
2015 /*  Floating point operators                                             */
2016 /* --------------------------------------------------------------------- */
2017 
2018 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
2019 
2020 /* Alignment + 2 * 16 bytes. */
2021 static sljit_i sse2_data[3 + 4 + 4];
2022 static sljit_i *sse2_buffer;
2023 
2024 static void init_compiler(void)
2025 {
2026 	sse2_buffer = (sljit_i*)(((sljit_uw)sse2_data + 15) & ~0xf);
2027 	sse2_buffer[0] = 0;
2028 	sse2_buffer[1] = 0x80000000;
2029 	sse2_buffer[4] = 0xffffffff;
2030 	sse2_buffer[5] = 0x7fffffff;
2031 }
2032 
2033 #endif
2034 
2035 SLJIT_API_FUNC_ATTRIBUTE int sljit_is_fpu_available(void)
2036 {
2037 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
2038 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
2039 	static int sse2_available = -1;
2040 	int features;
2041 
2042 	if (sse2_available != -1)
2043 		return sse2_available;
2044 
2045 #ifdef __GNUC__
2046 	/* AT&T syntax. */
2047 	asm (
2048 		"pushl %%ebx\n"
2049 		"movl $0x1, %%eax\n"
2050 		"cpuid\n"
2051 		"popl %%ebx\n"
2052 		"movl %%edx, %0\n"
2053 		: "=g" (features)
2054 		:
2055 		: "%eax", "%ecx", "%edx"
2056 	);
2057 #elif defined(_MSC_VER) || defined(__BORLANDC__)
2058 	/* Intel syntax. */
2059 	__asm {
2060 		mov eax, 1
2061 		push ebx
2062 		cpuid
2063 		pop ebx
2064 		mov features, edx
2065 	}
2066 #else
2067 	#error "SLJIT_DETECT_SSE2 is not implemented for this C compiler"
2068 #endif
2069 	sse2_available = (features >> 26) & 0x1;
2070 	return sse2_available;
2071 #else
2072 	return 1;
2073 #endif
2074 #else
2075 	return 0;
2076 #endif
2077 }
2078 
2079 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
2080 
2081 static int emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode,
2082 	int xmm1, int xmm2, sljit_w xmm2w)
2083 {
2084 	sljit_ub *buf;
2085 
2086 	buf = emit_x86_instruction(compiler, 2 | EX86_PREF_F2 | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2087 	FAIL_IF(!buf);
2088 	*buf++ = 0x0f;
2089 	*buf = opcode;
2090 	return SLJIT_SUCCESS;
2091 }
2092 
2093 static int emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode,
2094 	int xmm1, int xmm2, sljit_w xmm2w)
2095 {
2096 	sljit_ub *buf;
2097 
2098 	buf = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2099 	FAIL_IF(!buf);
2100 	*buf++ = 0x0f;
2101 	*buf = opcode;
2102 	return SLJIT_SUCCESS;
2103 }
2104 
2105 static SLJIT_INLINE int emit_sse2_load(struct sljit_compiler *compiler,
2106 	int dst, int src, sljit_w srcw)
2107 {
2108 	return emit_sse2(compiler, 0x10, dst, src, srcw);
2109 }
2110 
2111 static SLJIT_INLINE int emit_sse2_store(struct sljit_compiler *compiler,
2112 	int dst, sljit_w dstw, int src)
2113 {
2114 	return emit_sse2(compiler, 0x11, src, dst, dstw);
2115 }
2116 
2117 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
2118 	int dst, sljit_w dstw,
2119 	int src, sljit_w srcw)
2120 {
2121 	int dst_r;
2122 
2123 	CHECK_ERROR();
2124 	check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
2125 
2126 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2127 	compiler->mode32 = 1;
2128 #endif
2129 
2130 	if (GET_OPCODE(op) == SLJIT_FCMP) {
2131 		compiler->flags_saved = 0;
2132 		if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4)
2133 			dst_r = dst;
2134 		else {
2135 			dst_r = TMP_FREG;
2136 			FAIL_IF(emit_sse2_load(compiler, dst_r, dst, dstw));
2137 		}
2138 		return emit_sse2_logic(compiler, 0x2e, dst_r, src, srcw);
2139 	}
2140 
2141 	if (op == SLJIT_FMOV) {
2142 		if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4)
2143 			return emit_sse2_load(compiler, dst, src, srcw);
2144 		if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4)
2145 			return emit_sse2_store(compiler, dst, dstw, src);
2146 		FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src, srcw));
2147 		return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
2148 	}
2149 
2150 	if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) {
2151 		dst_r = dst;
2152 		if (dst != src)
2153 			FAIL_IF(emit_sse2_load(compiler, dst_r, src, srcw));
2154 	}
2155 	else {
2156 		dst_r = TMP_FREG;
2157 		FAIL_IF(emit_sse2_load(compiler, dst_r, src, srcw));
2158 	}
2159 
2160 	switch (op) {
2161 	case SLJIT_FNEG:
2162 		FAIL_IF(emit_sse2_logic(compiler, 0x57, dst_r, SLJIT_MEM0(), (sljit_w)sse2_buffer));
2163 		break;
2164 
2165 	case SLJIT_FABS:
2166 		FAIL_IF(emit_sse2_logic(compiler, 0x54, dst_r, SLJIT_MEM0(), (sljit_w)(sse2_buffer + 4)));
2167 		break;
2168 	}
2169 
2170 	if (dst_r == TMP_FREG)
2171 		return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
2172 	return SLJIT_SUCCESS;
2173 }
2174 
2175 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
2176 	int dst, sljit_w dstw,
2177 	int src1, sljit_w src1w,
2178 	int src2, sljit_w src2w)
2179 {
2180 	int dst_r;
2181 
2182 	CHECK_ERROR();
2183 	check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2184 
2185 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2186 	compiler->mode32 = 1;
2187 #endif
2188 
2189 	if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) {
2190 		dst_r = dst;
2191 		if (dst == src1)
2192 			; /* Do nothing here. */
2193 		else if (dst == src2 && (op == SLJIT_FADD || op == SLJIT_FMUL)) {
2194 			/* Swap arguments. */
2195 			src2 = src1;
2196 			src2w = src1w;
2197 		}
2198 		else if (dst != src2)
2199 			FAIL_IF(emit_sse2_load(compiler, dst_r, src1, src1w));
2200 		else {
2201 			dst_r = TMP_FREG;
2202 			FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src1, src1w));
2203 		}
2204 	}
2205 	else {
2206 		dst_r = TMP_FREG;
2207 		FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src1, src1w));
2208 	}
2209 
2210 	switch (op) {
2211 	case SLJIT_FADD:
2212 		FAIL_IF(emit_sse2(compiler, 0x58, dst_r, src2, src2w));
2213 		break;
2214 
2215 	case SLJIT_FSUB:
2216 		FAIL_IF(emit_sse2(compiler, 0x5c, dst_r, src2, src2w));
2217 		break;
2218 
2219 	case SLJIT_FMUL:
2220 		FAIL_IF(emit_sse2(compiler, 0x59, dst_r, src2, src2w));
2221 		break;
2222 
2223 	case SLJIT_FDIV:
2224 		FAIL_IF(emit_sse2(compiler, 0x5e, dst_r, src2, src2w));
2225 		break;
2226 	}
2227 
2228 	if (dst_r == TMP_FREG)
2229 		return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
2230 	return SLJIT_SUCCESS;
2231 }
2232 
2233 #else
2234 
2235 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
2236 	int dst, sljit_w dstw,
2237 	int src, sljit_w srcw)
2238 {
2239 	CHECK_ERROR();
2240 	/* Should cause an assertion fail. */
2241 	check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
2242 	compiler->error = SLJIT_ERR_UNSUPPORTED;
2243 	return SLJIT_ERR_UNSUPPORTED;
2244 }
2245 
2246 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
2247 	int dst, sljit_w dstw,
2248 	int src1, sljit_w src1w,
2249 	int src2, sljit_w src2w)
2250 {
2251 	CHECK_ERROR();
2252 	/* Should cause an assertion fail. */
2253 	check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2254 	compiler->error = SLJIT_ERR_UNSUPPORTED;
2255 	return SLJIT_ERR_UNSUPPORTED;
2256 }
2257 
2258 #endif
2259 
2260 /* --------------------------------------------------------------------- */
2261 /*  Conditional instructions                                             */
2262 /* --------------------------------------------------------------------- */
2263 
2264 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2265 {
2266 	sljit_ub *buf;
2267 	struct sljit_label *label;
2268 
2269 	CHECK_ERROR_PTR();
2270 	check_sljit_emit_label(compiler);
2271 
2272 	/* We should restore the flags before the label,
2273 	   since other taken jumps has their own flags as well. */
2274 	if (SLJIT_UNLIKELY(compiler->flags_saved))
2275 		PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2276 
2277 	if (compiler->last_label && compiler->last_label->size == compiler->size)
2278 		return compiler->last_label;
2279 
2280 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2281 	PTR_FAIL_IF(!label);
2282 	set_label(label, compiler);
2283 
2284 	buf = (sljit_ub*)ensure_buf(compiler, 2);
2285 	PTR_FAIL_IF(!buf);
2286 
2287 	*buf++ = 0;
2288 	*buf++ = 0;
2289 
2290 	return label;
2291 }
2292 
2293 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, int type)
2294 {
2295 	sljit_ub *buf;
2296 	struct sljit_jump *jump;
2297 
2298 	CHECK_ERROR_PTR();
2299 	check_sljit_emit_jump(compiler, type);
2300 
2301 	if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2302 		if ((type & 0xff) <= SLJIT_JUMP)
2303 			PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2304 		compiler->flags_saved = 0;
2305 	}
2306 
2307 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2308 	PTR_FAIL_IF_NULL(jump);
2309 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2310 	type &= 0xff;
2311 
2312 	if (type >= SLJIT_CALL1)
2313 		PTR_FAIL_IF(call_with_args(compiler, type));
2314 
2315 	/* Worst case size. */
2316 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2317 	compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
2318 #else
2319 	compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
2320 #endif
2321 
2322 	buf = (sljit_ub*)ensure_buf(compiler, 2);
2323 	PTR_FAIL_IF_NULL(buf);
2324 
2325 	*buf++ = 0;
2326 	*buf++ = type + 4;
2327 	return jump;
2328 }
2329 
2330 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_ijump(struct sljit_compiler *compiler, int type, int src, sljit_w srcw)
2331 {
2332 	sljit_ub *code;
2333 	struct sljit_jump *jump;
2334 
2335 	CHECK_ERROR();
2336 	check_sljit_emit_ijump(compiler, type, src, srcw);
2337 	ADJUST_LOCAL_OFFSET(src, srcw);
2338 
2339 	CHECK_EXTRA_REGS(src, srcw, (void)0);
2340 
2341 	if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2342 		if (type <= SLJIT_JUMP)
2343 			FAIL_IF(emit_restore_flags(compiler, 0));
2344 		compiler->flags_saved = 0;
2345 	}
2346 
2347 	if (type >= SLJIT_CALL1) {
2348 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2349 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
2350 		if (src == SLJIT_TEMPORARY_REG3) {
2351 			EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
2352 			src = TMP_REGISTER;
2353 		}
2354 		if (src == SLJIT_MEM1(SLJIT_LOCALS_REG) && type >= SLJIT_CALL3)
2355 			srcw += sizeof(sljit_w);
2356 #endif
2357 #endif
2358 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
2359 		if (src == SLJIT_TEMPORARY_REG3) {
2360 			EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
2361 			src = TMP_REGISTER;
2362 		}
2363 #endif
2364 		FAIL_IF(call_with_args(compiler, type));
2365 	}
2366 
2367 	if (src == SLJIT_IMM) {
2368 		jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2369 		FAIL_IF_NULL(jump);
2370 		set_jump(jump, compiler, JUMP_ADDR);
2371 		jump->u.target = srcw;
2372 
2373 		/* Worst case size. */
2374 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2375 		compiler->size += 5;
2376 #else
2377 		compiler->size += 10 + 3;
2378 #endif
2379 
2380 		code = (sljit_ub*)ensure_buf(compiler, 2);
2381 		FAIL_IF_NULL(code);
2382 
2383 		*code++ = 0;
2384 		*code++ = type + 4;
2385 	}
2386 	else {
2387 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2388 		/* REX_W is not necessary (src is not immediate). */
2389 		compiler->mode32 = 1;
2390 #endif
2391 		code = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2392 		FAIL_IF(!code);
2393 		*code++ = 0xff;
2394 		*code |= (type >= SLJIT_FAST_CALL) ? (2 << 3) : (4 << 3);
2395 	}
2396 	return SLJIT_SUCCESS;
2397 }
2398 
2399 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_cond_value(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int type)
2400 {
2401 	sljit_ub *buf;
2402 	sljit_ub cond_set = 0;
2403 	int dst_save = dst;
2404 	sljit_w dstw_save = dstw;
2405 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2406 	int reg;
2407 #endif
2408 
2409 	CHECK_ERROR();
2410 	check_sljit_emit_cond_value(compiler, op, dst, dstw, type);
2411 
2412 	if (dst == SLJIT_UNUSED)
2413 		return SLJIT_SUCCESS;
2414 
2415 	ADJUST_LOCAL_OFFSET(dst, dstw);
2416 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
2417 	if (SLJIT_UNLIKELY(compiler->flags_saved))
2418 		FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS));
2419 
2420 	switch (type) {
2421 	case SLJIT_C_EQUAL:
2422 	case SLJIT_C_FLOAT_EQUAL:
2423 		cond_set = 0x94;
2424 		break;
2425 
2426 	case SLJIT_C_NOT_EQUAL:
2427 	case SLJIT_C_FLOAT_NOT_EQUAL:
2428 		cond_set = 0x95;
2429 		break;
2430 
2431 	case SLJIT_C_LESS:
2432 	case SLJIT_C_FLOAT_LESS:
2433 		cond_set = 0x92;
2434 		break;
2435 
2436 	case SLJIT_C_GREATER_EQUAL:
2437 	case SLJIT_C_FLOAT_GREATER_EQUAL:
2438 		cond_set = 0x93;
2439 		break;
2440 
2441 	case SLJIT_C_GREATER:
2442 	case SLJIT_C_FLOAT_GREATER:
2443 		cond_set = 0x97;
2444 		break;
2445 
2446 	case SLJIT_C_LESS_EQUAL:
2447 	case SLJIT_C_FLOAT_LESS_EQUAL:
2448 		cond_set = 0x96;
2449 		break;
2450 
2451 	case SLJIT_C_SIG_LESS:
2452 		cond_set = 0x9c;
2453 		break;
2454 
2455 	case SLJIT_C_SIG_GREATER_EQUAL:
2456 		cond_set = 0x9d;
2457 		break;
2458 
2459 	case SLJIT_C_SIG_GREATER:
2460 		cond_set = 0x9f;
2461 		break;
2462 
2463 	case SLJIT_C_SIG_LESS_EQUAL:
2464 		cond_set = 0x9e;
2465 		break;
2466 
2467 	case SLJIT_C_OVERFLOW:
2468 	case SLJIT_C_MUL_OVERFLOW:
2469 		cond_set = 0x90;
2470 		break;
2471 
2472 	case SLJIT_C_NOT_OVERFLOW:
2473 	case SLJIT_C_MUL_NOT_OVERFLOW:
2474 		cond_set = 0x91;
2475 		break;
2476 
2477 	case SLJIT_C_FLOAT_UNORDERED:
2478 		cond_set = 0x9a;
2479 		break;
2480 
2481 	case SLJIT_C_FLOAT_ORDERED:
2482 		cond_set = 0x9b;
2483 		break;
2484 	}
2485 
2486 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2487 	reg = (op == SLJIT_MOV && dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
2488 
2489 	buf = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4);
2490 	FAIL_IF(!buf);
2491 	INC_SIZE(4 + 4);
2492 	/* Set low register to conditional flag. */
2493 	*buf++ = (reg_map[reg] <= 7) ? 0x40 : REX_B;
2494 	*buf++ = 0x0f;
2495 	*buf++ = cond_set;
2496 	*buf++ = 0xC0 | reg_lmap[reg];
2497 	*buf++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
2498 	*buf++ = 0x0f;
2499 	*buf++ = 0xb6;
2500 	*buf = 0xC0 | (reg_lmap[reg] << 3) | reg_lmap[reg];
2501 
2502 	if (reg == TMP_REGISTER) {
2503 		if (op == SLJIT_MOV) {
2504 			compiler->mode32 = 0;
2505 			EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
2506 		}
2507 		else {
2508 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2509 			compiler->skip_checks = 1;
2510 #endif
2511 			return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REGISTER, 0);
2512 		}
2513 	}
2514 #else
2515 	if (op == SLJIT_MOV) {
2516 		if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_TEMPORARY_REG3) {
2517 			buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
2518 			FAIL_IF(!buf);
2519 			INC_SIZE(3 + 3);
2520 			/* Set low byte to conditional flag. */
2521 			*buf++ = 0x0f;
2522 			*buf++ = cond_set;
2523 			*buf++ = 0xC0 | reg_map[dst];
2524 
2525 			*buf++ = 0x0f;
2526 			*buf++ = 0xb6;
2527 			*buf = 0xC0 | (reg_map[dst] << 3) | reg_map[dst];
2528 		}
2529 		else {
2530 			EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
2531 
2532 			buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
2533 			FAIL_IF(!buf);
2534 			INC_SIZE(3 + 3);
2535 			/* Set al to conditional flag. */
2536 			*buf++ = 0x0f;
2537 			*buf++ = cond_set;
2538 			*buf++ = 0xC0;
2539 
2540 			*buf++ = 0x0f;
2541 			*buf++ = 0xb6;
2542 			if (dst >= SLJIT_SAVED_REG1 && dst <= SLJIT_NO_REGISTERS)
2543 				*buf = 0xC0 | (reg_map[dst] << 3);
2544 			else {
2545 				*buf = 0xC0;
2546 				EMIT_MOV(compiler, dst, dstw, SLJIT_TEMPORARY_REG1, 0);
2547 			}
2548 
2549 			EMIT_MOV(compiler, SLJIT_TEMPORARY_REG1, 0, TMP_REGISTER, 0);
2550 		}
2551 	}
2552 	else {
2553 		if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_TEMPORARY_REG3) {
2554 			EMIT_MOV(compiler, TMP_REGISTER, 0, dst, 0);
2555 			buf = (sljit_ub*)ensure_buf(compiler, 1 + 3);
2556 			FAIL_IF(!buf);
2557 			INC_SIZE(3);
2558 
2559 			*buf++ = 0x0f;
2560 			*buf++ = cond_set;
2561 			*buf++ = 0xC0 | reg_map[dst];
2562 		}
2563 		else {
2564 			EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
2565 
2566 			buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3 + 1);
2567 			FAIL_IF(!buf);
2568 			INC_SIZE(3 + 3 + 1);
2569 			/* Set al to conditional flag. */
2570 			*buf++ = 0x0f;
2571 			*buf++ = cond_set;
2572 			*buf++ = 0xC0;
2573 
2574 			*buf++ = 0x0f;
2575 			*buf++ = 0xb6;
2576 			*buf++ = 0xC0;
2577 
2578 			*buf++ = 0x90 + reg_map[TMP_REGISTER];
2579 		}
2580 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2581 		compiler->skip_checks = 1;
2582 #endif
2583 		return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REGISTER, 0);
2584 	}
2585 #endif
2586 
2587 	return SLJIT_SUCCESS;
2588 }
2589 
2590 SLJIT_API_FUNC_ATTRIBUTE int sljit_get_local_base(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w offset)
2591 {
2592 	CHECK_ERROR();
2593 	check_sljit_get_local_base(compiler, dst, dstw, offset);
2594 	ADJUST_LOCAL_OFFSET(dst, dstw);
2595 
2596 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
2597 
2598 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2599 	compiler->mode32 = 0;
2600 #endif
2601 
2602 	ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_LOCALS_REG), offset);
2603 
2604 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2605 	if (NOT_HALFWORD(offset)) {
2606 		FAIL_IF(emit_load_imm64(compiler, TMP_REGISTER, offset));
2607 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
2608 		SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REGISTER, 0) != SLJIT_ERR_UNSUPPORTED);
2609 		return compiler->error;
2610 #else
2611 		return emit_lea_binary(compiler, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REGISTER, 0);
2612 #endif
2613 	}
2614 #endif
2615 
2616 	if (offset != 0)
2617 		return emit_lea_binary(compiler, dst, dstw, SLJIT_LOCALS_REG, 0, SLJIT_IMM, offset);
2618 	return emit_mov(compiler, dst, dstw, SLJIT_LOCALS_REG, 0);
2619 }
2620 
2621 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w init_value)
2622 {
2623 	sljit_ub *buf;
2624 	struct sljit_const *const_;
2625 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2626 	int reg;
2627 #endif
2628 
2629 	CHECK_ERROR_PTR();
2630 	check_sljit_emit_const(compiler, dst, dstw, init_value);
2631 	ADJUST_LOCAL_OFFSET(dst, dstw);
2632 
2633 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
2634 
2635 	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2636 	PTR_FAIL_IF(!const_);
2637 	set_const(const_, compiler);
2638 
2639 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2640 	compiler->mode32 = 0;
2641 	reg = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
2642 
2643 	if (emit_load_imm64(compiler, reg, init_value))
2644 		return NULL;
2645 #else
2646 	if (dst == SLJIT_UNUSED)
2647 		dst = TMP_REGISTER;
2648 
2649 	if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
2650 		return NULL;
2651 #endif
2652 
2653 	buf = (sljit_ub*)ensure_buf(compiler, 2);
2654 	PTR_FAIL_IF(!buf);
2655 
2656 	*buf++ = 0;
2657 	*buf++ = 1;
2658 
2659 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2660 	if (reg == TMP_REGISTER && dst != SLJIT_UNUSED)
2661 		if (emit_mov(compiler, dst, dstw, TMP_REGISTER, 0))
2662 			return NULL;
2663 #endif
2664 
2665 	return const_;
2666 }
2667 
2668 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
2669 {
2670 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2671 	*(sljit_w*)addr = new_addr - (addr + 4);
2672 #else
2673 	*(sljit_uw*)addr = new_addr;
2674 #endif
2675 }
2676 
2677 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_w new_constant)
2678 {
2679 	*(sljit_w*)addr = new_constant;
2680 }
2681