1 /* $NetBSD: sljitNativeX86_common.c,v 1.10 2021/11/30 12:32:09 christos Exp $ */
2
3 /*
4 * Stack-less Just-In-Time compiler
5 *
6 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without modification, are
9 * permitted provided that the following conditions are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright notice, this list of
12 * conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
15 * of conditions and the following disclaimer in the documentation and/or other materials
16 * provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
19 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
21 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
23 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
sljit_get_platform_name(void)29 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
30 {
31 return "x86" SLJIT_CPUINFO;
32 }
33
34 /*
35 32b register indexes:
36 0 - EAX
37 1 - ECX
38 2 - EDX
39 3 - EBX
40 4 - none
41 5 - EBP
42 6 - ESI
43 7 - EDI
44 */
45
46 /*
47 64b register indexes:
48 0 - RAX
49 1 - RCX
50 2 - RDX
51 3 - RBX
52 4 - none
53 5 - RBP
54 6 - RSI
55 7 - RDI
56 8 - R8 - From now on REX prefix is required
57 9 - R9
58 10 - R10
59 11 - R11
60 12 - R12
61 13 - R13
62 14 - R14
63 15 - R15
64 */
65
66 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
67
68 /* Last register + 1. */
69 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
70
71 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
72 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 7, 6, 3, 4, 5
73 };
74
75 #define CHECK_EXTRA_REGS(p, w, do) \
76 if (p >= SLJIT_R3 && p <= SLJIT_S3) { \
77 if (p <= compiler->scratches) \
78 w = compiler->saveds_offset - ((p) - SLJIT_R2) * (sljit_sw)sizeof(sljit_sw); \
79 else \
80 w = compiler->locals_offset + ((p) - SLJIT_S2) * (sljit_sw)sizeof(sljit_sw); \
81 p = SLJIT_MEM1(SLJIT_SP); \
82 do; \
83 }
84
85 #else /* SLJIT_CONFIG_X86_32 */
86
87 /* Last register + 1. */
88 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
89 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
90 #define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4)
91
92 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
93 Note: avoid to use r12 and r13 for memory addessing
94 therefore r12 is better for SAVED_EREG than SAVED_REG. */
95 #ifndef _WIN64
96 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
97 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
98 0, 0, 6, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 7, 9
99 };
100 /* low-map. reg_map & 0x7. */
101 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
102 0, 0, 6, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 7, 1
103 };
104 #else
105 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
106 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
107 0, 0, 2, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 10, 8, 9
108 };
109 /* low-map. reg_map & 0x7. */
110 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
111 0, 0, 2, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 2, 0, 1
112 };
113 #endif
114
115 #define REX_W 0x48
116 #define REX_R 0x44
117 #define REX_X 0x42
118 #define REX_B 0x41
119 #define REX 0x40
120
121 #ifndef _WIN64
122 #define HALFWORD_MAX 0x7fffffffl
123 #define HALFWORD_MIN -0x80000000l
124 #else
125 #define HALFWORD_MAX 0x7fffffffll
126 #define HALFWORD_MIN -0x80000000ll
127 #endif
128
129 #define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
130 #define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
131
132 #define CHECK_EXTRA_REGS(p, w, do)
133
134 #endif /* SLJIT_CONFIG_X86_32 */
135
136 #define TMP_FREG (0)
137
138 /* Size flags for emit_x86_instruction: */
139 #define EX86_BIN_INS 0x0010
140 #define EX86_SHIFT_INS 0x0020
141 #define EX86_REX 0x0040
142 #define EX86_NO_REXW 0x0080
143 #define EX86_BYTE_ARG 0x0100
144 #define EX86_HALF_ARG 0x0200
145 #define EX86_PREF_66 0x0400
146 #define EX86_PREF_F2 0x0800
147 #define EX86_PREF_F3 0x1000
148 #define EX86_SSE2_OP1 0x2000
149 #define EX86_SSE2_OP2 0x4000
150 #define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2)
151
152 /* --------------------------------------------------------------------- */
153 /* Instrucion forms */
154 /* --------------------------------------------------------------------- */
155
156 #define ADD (/* BINARY */ 0 << 3)
157 #define ADD_EAX_i32 0x05
158 #define ADD_r_rm 0x03
159 #define ADD_rm_r 0x01
160 #define ADDSD_x_xm 0x58
161 #define ADC (/* BINARY */ 2 << 3)
162 #define ADC_EAX_i32 0x15
163 #define ADC_r_rm 0x13
164 #define ADC_rm_r 0x11
165 #define AND (/* BINARY */ 4 << 3)
166 #define AND_EAX_i32 0x25
167 #define AND_r_rm 0x23
168 #define AND_rm_r 0x21
169 #define ANDPD_x_xm 0x54
170 #define BSR_r_rm (/* GROUP_0F */ 0xbd)
171 #define CALL_i32 0xe8
172 #define CALL_rm (/* GROUP_FF */ 2 << 3)
173 #define CDQ 0x99
174 #define CMOVNE_r_rm (/* GROUP_0F */ 0x45)
175 #define CMP (/* BINARY */ 7 << 3)
176 #define CMP_EAX_i32 0x3d
177 #define CMP_r_rm 0x3b
178 #define CMP_rm_r 0x39
179 #define CVTPD2PS_x_xm 0x5a
180 #define CVTSI2SD_x_rm 0x2a
181 #define CVTTSD2SI_r_xm 0x2c
182 #define DIV (/* GROUP_F7 */ 6 << 3)
183 #define DIVSD_x_xm 0x5e
184 #define INT3 0xcc
185 #define IDIV (/* GROUP_F7 */ 7 << 3)
186 #define IMUL (/* GROUP_F7 */ 5 << 3)
187 #define IMUL_r_rm (/* GROUP_0F */ 0xaf)
188 #define IMUL_r_rm_i8 0x6b
189 #define IMUL_r_rm_i32 0x69
190 #define JE_i8 0x74
191 #define JNE_i8 0x75
192 #define JMP_i8 0xeb
193 #define JMP_i32 0xe9
194 #define JMP_rm (/* GROUP_FF */ 4 << 3)
195 #define LEA_r_m 0x8d
196 #define MOV_r_rm 0x8b
197 #define MOV_r_i32 0xb8
198 #define MOV_rm_r 0x89
199 #define MOV_rm_i32 0xc7
200 #define MOV_rm8_i8 0xc6
201 #define MOV_rm8_r8 0x88
202 #define MOVSD_x_xm 0x10
203 #define MOVSD_xm_x 0x11
204 #define MOVSXD_r_rm 0x63
205 #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)
206 #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf)
207 #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)
208 #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7)
209 #define MUL (/* GROUP_F7 */ 4 << 3)
210 #define MULSD_x_xm 0x59
211 #define NEG_rm (/* GROUP_F7 */ 3 << 3)
212 #define NOP 0x90
213 #define NOT_rm (/* GROUP_F7 */ 2 << 3)
214 #define OR (/* BINARY */ 1 << 3)
215 #define OR_r_rm 0x0b
216 #define OR_EAX_i32 0x0d
217 #define OR_rm_r 0x09
218 #define OR_rm8_r8 0x08
219 #define POP_r 0x58
220 #define POP_rm 0x8f
221 #define POPF 0x9d
222 #define PUSH_i32 0x68
223 #define PUSH_r 0x50
224 #define PUSH_rm (/* GROUP_FF */ 6 << 3)
225 #define PUSHF 0x9c
226 #define RET_near 0xc3
227 #define RET_i16 0xc2
228 #define SBB (/* BINARY */ 3 << 3)
229 #define SBB_EAX_i32 0x1d
230 #define SBB_r_rm 0x1b
231 #define SBB_rm_r 0x19
232 #define SAR (/* SHIFT */ 7 << 3)
233 #define SHL (/* SHIFT */ 4 << 3)
234 #define SHR (/* SHIFT */ 5 << 3)
235 #define SUB (/* BINARY */ 5 << 3)
236 #define SUB_EAX_i32 0x2d
237 #define SUB_r_rm 0x2b
238 #define SUB_rm_r 0x29
239 #define SUBSD_x_xm 0x5c
240 #define TEST_EAX_i32 0xa9
241 #define TEST_rm_r 0x85
242 #define UCOMISD_x_xm 0x2e
243 #define UNPCKLPD_x_xm 0x14
244 #define XCHG_EAX_r 0x90
245 #define XCHG_r_rm 0x87
246 #define XOR (/* BINARY */ 6 << 3)
247 #define XOR_EAX_i32 0x35
248 #define XOR_r_rm 0x33
249 #define XOR_rm_r 0x31
250 #define XORPD_x_xm 0x57
251
252 #define GROUP_0F 0x0f
253 #define GROUP_F7 0xf7
254 #define GROUP_FF 0xff
255 #define GROUP_BINARY_81 0x81
256 #define GROUP_BINARY_83 0x83
257 #define GROUP_SHIFT_1 0xd1
258 #define GROUP_SHIFT_N 0xc1
259 #define GROUP_SHIFT_CL 0xd3
260
261 #define MOD_REG 0xc0
262 #define MOD_DISP8 0x40
263
264 #define INC_SIZE(s) (*inst++ = (s), compiler->size += (s))
265
266 #define PUSH_REG(r) (*inst++ = (PUSH_r + (r)))
267 #define POP_REG(r) (*inst++ = (POP_r + (r)))
268 #define RET() (*inst++ = (RET_near))
269 #define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
270 /* r32, r/m32 */
271 #define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
272
273 /* Multithreading does not affect these static variables, since they store
274 built-in CPU features. Therefore they can be overwritten by different threads
275 if they detect the CPU features in the same time. */
276 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
277 static sljit_s32 cpu_has_sse2 = -1;
278 #endif
279 static sljit_s32 cpu_has_cmov = -1;
280
281 #ifdef _WIN32_WCE
282 #include <cmnintrin.h>
283 #elif defined(_MSC_VER) && _MSC_VER >= 1400
284 #include <intrin.h>
285 #endif
286
287 /******************************************************/
288 /* Unaligned-store functions */
289 /******************************************************/
290
sljit_unaligned_store_s16(void * addr,sljit_s16 value)291 static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value)
292 {
293 SLJIT_MEMCPY(addr, &value, sizeof(value));
294 }
295
sljit_unaligned_store_s32(void * addr,sljit_s32 value)296 static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value)
297 {
298 SLJIT_MEMCPY(addr, &value, sizeof(value));
299 }
300
sljit_unaligned_store_sw(void * addr,sljit_sw value)301 static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value)
302 {
303 SLJIT_MEMCPY(addr, &value, sizeof(value));
304 }
305
306 /******************************************************/
307 /* Utility functions */
308 /******************************************************/
309
get_cpu_features(void)310 static void get_cpu_features(void)
311 {
312 sljit_u32 features;
313
314 #if defined(_MSC_VER) && _MSC_VER >= 1400
315
316 int CPUInfo[4];
317 __cpuid(CPUInfo, 1);
318 features = (sljit_u32)CPUInfo[3];
319
320 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C) || defined(__lint__)
321
322 /* AT&T syntax. */
323 __asm__ (
324 "movl $0x1, %%eax\n"
325 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
326 /* On x86-32, there is no red zone, so this
327 should work (no need for a local variable). */
328 "push %%ebx\n"
329 #endif
330 "cpuid\n"
331 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
332 "pop %%ebx\n"
333 #endif
334 "movl %%edx, %0\n"
335 : "=g" (features)
336 :
337 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
338 : "%eax", "%ecx", "%edx"
339 #else
340 : "%rax", "%rbx", "%rcx", "%rdx"
341 #endif
342 );
343
344 #else /* _MSC_VER && _MSC_VER >= 1400 */
345
346 /* Intel syntax. */
347 __asm {
348 mov eax, 1
349 cpuid
350 mov features, edx
351 }
352
353 #endif /* _MSC_VER && _MSC_VER >= 1400 */
354
355 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
356 cpu_has_sse2 = (features >> 26) & 0x1;
357 #endif
358 cpu_has_cmov = (features >> 15) & 0x1;
359 }
360
get_jump_code(sljit_s32 type)361 static sljit_u8 get_jump_code(sljit_s32 type)
362 {
363 switch (type) {
364 case SLJIT_EQUAL:
365 case SLJIT_EQUAL_F64:
366 return 0x84 /* je */;
367
368 case SLJIT_NOT_EQUAL:
369 case SLJIT_NOT_EQUAL_F64:
370 return 0x85 /* jne */;
371
372 case SLJIT_LESS:
373 case SLJIT_LESS_F64:
374 return 0x82 /* jc */;
375
376 case SLJIT_GREATER_EQUAL:
377 case SLJIT_GREATER_EQUAL_F64:
378 return 0x83 /* jae */;
379
380 case SLJIT_GREATER:
381 case SLJIT_GREATER_F64:
382 return 0x87 /* jnbe */;
383
384 case SLJIT_LESS_EQUAL:
385 case SLJIT_LESS_EQUAL_F64:
386 return 0x86 /* jbe */;
387
388 case SLJIT_SIG_LESS:
389 return 0x8c /* jl */;
390
391 case SLJIT_SIG_GREATER_EQUAL:
392 return 0x8d /* jnl */;
393
394 case SLJIT_SIG_GREATER:
395 return 0x8f /* jnle */;
396
397 case SLJIT_SIG_LESS_EQUAL:
398 return 0x8e /* jle */;
399
400 case SLJIT_OVERFLOW:
401 case SLJIT_MUL_OVERFLOW:
402 return 0x80 /* jo */;
403
404 case SLJIT_NOT_OVERFLOW:
405 case SLJIT_MUL_NOT_OVERFLOW:
406 return 0x81 /* jno */;
407
408 case SLJIT_UNORDERED_F64:
409 return 0x8a /* jp */;
410
411 case SLJIT_ORDERED_F64:
412 return 0x8b /* jpo */;
413 }
414 return 0;
415 }
416
417 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
418 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type, sljit_sw executable_offset);
419 #else
420 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type);
421 #endif
422
generate_near_jump_code(struct sljit_jump * jump,sljit_u8 * code_ptr,sljit_u8 * code,sljit_s32 type,sljit_sw executable_offset)423 static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_s32 type, sljit_sw executable_offset)
424 {
425 sljit_s32 short_jump;
426 sljit_uw label_addr;
427
428 if (jump->flags & JUMP_LABEL)
429 label_addr = (sljit_uw)(code + jump->u.label->size);
430 else
431 label_addr = jump->u.target - executable_offset;
432
433 short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
434
435 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
436 if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
437 return generate_far_jump_code(jump, code_ptr, type);
438 #endif
439
440 if (type == SLJIT_JUMP) {
441 if (short_jump)
442 *code_ptr++ = JMP_i8;
443 else
444 *code_ptr++ = JMP_i32;
445 jump->addr++;
446 }
447 else if (type >= SLJIT_FAST_CALL) {
448 short_jump = 0;
449 *code_ptr++ = CALL_i32;
450 jump->addr++;
451 }
452 else if (short_jump) {
453 *code_ptr++ = get_jump_code(type) - 0x10;
454 jump->addr++;
455 }
456 else {
457 *code_ptr++ = GROUP_0F;
458 *code_ptr++ = get_jump_code(type);
459 jump->addr += 2;
460 }
461
462 if (short_jump) {
463 jump->flags |= PATCH_MB;
464 code_ptr += sizeof(sljit_s8);
465 } else {
466 jump->flags |= PATCH_MW;
467 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
468 code_ptr += sizeof(sljit_sw);
469 #else
470 code_ptr += sizeof(sljit_s32);
471 #endif
472 }
473
474 return code_ptr;
475 }
476
sljit_generate_code(struct sljit_compiler * compiler)477 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
478 {
479 struct sljit_memory_fragment *buf;
480 sljit_u8 *code;
481 sljit_u8 *code_ptr;
482 sljit_u8 *buf_ptr;
483 sljit_u8 *buf_end;
484 sljit_u8 len;
485 sljit_sw executable_offset;
486 sljit_sw jump_addr;
487
488 struct sljit_label *label;
489 struct sljit_jump *jump;
490 struct sljit_const *const_;
491
492 CHECK_ERROR_PTR();
493 CHECK_PTR(check_sljit_generate_code(compiler));
494 reverse_buf(compiler);
495
496 /* Second code generation pass. */
497 code = (sljit_u8*)SLJIT_MALLOC_EXEC(compiler->size);
498 PTR_FAIL_WITH_EXEC_IF(code);
499 buf = compiler->buf;
500
501 code_ptr = code;
502 label = compiler->labels;
503 jump = compiler->jumps;
504 const_ = compiler->consts;
505 executable_offset = SLJIT_EXEC_OFFSET(code);
506
507 do {
508 buf_ptr = buf->memory;
509 buf_end = buf_ptr + buf->used_size;
510 do {
511 len = *buf_ptr++;
512 if (len > 0) {
513 /* The code is already generated. */
514 SLJIT_MEMCPY(code_ptr, buf_ptr, len);
515 code_ptr += len;
516 buf_ptr += len;
517 }
518 else {
519 if (*buf_ptr >= 2) {
520 jump->addr = (sljit_uw)code_ptr;
521 if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
522 code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 2, executable_offset);
523 else {
524 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
525 code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2, executable_offset);
526 #else
527 code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2);
528 #endif
529 }
530 jump = jump->next;
531 }
532 else if (*buf_ptr == 0) {
533 label->addr = ((sljit_uw)code_ptr) + executable_offset;
534 label->size = code_ptr - code;
535 label = label->next;
536 }
537 else { /* *buf_ptr is 1 */
538 const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
539 const_ = const_->next;
540 }
541 buf_ptr++;
542 }
543 } while (buf_ptr < buf_end);
544 SLJIT_ASSERT(buf_ptr == buf_end);
545 buf = buf->next;
546 } while (buf);
547
548 SLJIT_ASSERT(!label);
549 SLJIT_ASSERT(!jump);
550 SLJIT_ASSERT(!const_);
551
552 jump = compiler->jumps;
553 while (jump) {
554 jump_addr = jump->addr + executable_offset;
555
556 if (jump->flags & PATCH_MB) {
557 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) <= 127);
558 *(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8)));
559 } else if (jump->flags & PATCH_MW) {
560 if (jump->flags & JUMP_LABEL) {
561 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
562 sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_sw))));
563 #else
564 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
565 sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))));
566 #endif
567 }
568 else {
569 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
570 sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_sw))));
571 #else
572 SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
573 sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump_addr + sizeof(sljit_s32))));
574 #endif
575 }
576 }
577 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
578 else if (jump->flags & PATCH_MD)
579 sljit_unaligned_store_sw((void*)jump->addr, jump->u.label->addr);
580 #endif
581
582 jump = jump->next;
583 }
584
585 /* Some space may be wasted because of short jumps. */
586 SLJIT_ASSERT(code_ptr <= code + compiler->size);
587 compiler->error = SLJIT_ERR_COMPILED;
588 compiler->executable_offset = executable_offset;
589 compiler->executable_size = code_ptr - code;
590 return (void*)(code + executable_offset);
591 }
592
593 /* --------------------------------------------------------------------- */
594 /* Operators */
595 /* --------------------------------------------------------------------- */
596
597 static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
598 sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
599 sljit_s32 dst, sljit_sw dstw,
600 sljit_s32 src1, sljit_sw src1w,
601 sljit_s32 src2, sljit_sw src2w);
602
603 static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
604 sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
605 sljit_s32 dst, sljit_sw dstw,
606 sljit_s32 src1, sljit_sw src1w,
607 sljit_s32 src2, sljit_sw src2w);
608
609 static sljit_s32 emit_mov(struct sljit_compiler *compiler,
610 sljit_s32 dst, sljit_sw dstw,
611 sljit_s32 src, sljit_sw srcw);
612
613 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
614 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
615
616 #ifdef _WIN32
617 #include <malloc.h>
618
sljit_grow_stack(sljit_sw local_size)619 static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size)
620 {
621 /* Workaround for calling the internal _chkstk() function on Windows.
622 This function touches all 4k pages belongs to the requested stack space,
623 which size is passed in local_size. This is necessary on Windows where
624 the stack can only grow in 4k steps. However, this function just burn
625 CPU cycles if the stack is large enough. However, you don't know it in
626 advance, so it must always be called. I think this is a bad design in
627 general even if it has some reasons. */
628 *(volatile sljit_s32*)alloca(local_size) = 0;
629 }
630
631 #endif
632
633 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
634 #include "sljitNativeX86_32.c"
635 #else
636 #include "sljitNativeX86_64.c"
637 #endif
638
emit_mov(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)639 static sljit_s32 emit_mov(struct sljit_compiler *compiler,
640 sljit_s32 dst, sljit_sw dstw,
641 sljit_s32 src, sljit_sw srcw)
642 {
643 sljit_u8* inst;
644
645 if (dst == SLJIT_UNUSED) {
646 /* No destination, doesn't need to setup flags. */
647 if (src & SLJIT_MEM) {
648 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
649 FAIL_IF(!inst);
650 *inst = MOV_r_rm;
651 }
652 return SLJIT_SUCCESS;
653 }
654 if (FAST_IS_REG(src)) {
655 inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
656 FAIL_IF(!inst);
657 *inst = MOV_rm_r;
658 return SLJIT_SUCCESS;
659 }
660 if (src & SLJIT_IMM) {
661 if (FAST_IS_REG(dst)) {
662 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
663 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
664 #else
665 if (!compiler->mode32) {
666 if (NOT_HALFWORD(srcw))
667 return emit_load_imm64(compiler, dst, srcw);
668 }
669 else
670 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw);
671 #endif
672 }
673 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
674 if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
675 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
676 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
677 FAIL_IF(!inst);
678 *inst = MOV_rm_r;
679 return SLJIT_SUCCESS;
680 }
681 #endif
682 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
683 FAIL_IF(!inst);
684 *inst = MOV_rm_i32;
685 return SLJIT_SUCCESS;
686 }
687 if (FAST_IS_REG(dst)) {
688 inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
689 FAIL_IF(!inst);
690 *inst = MOV_r_rm;
691 return SLJIT_SUCCESS;
692 }
693
694 /* Memory to memory move. Requires two instruction. */
695 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
696 FAIL_IF(!inst);
697 *inst = MOV_r_rm;
698 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
699 FAIL_IF(!inst);
700 *inst = MOV_rm_r;
701 return SLJIT_SUCCESS;
702 }
703
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)704 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
705 {
706 sljit_u8 *inst;
707 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
708 sljit_s32 size;
709 #endif
710
711 CHECK_ERROR();
712 CHECK(check_sljit_emit_op0(compiler, op));
713
714 switch (GET_OPCODE(op)) {
715 case SLJIT_BREAKPOINT:
716 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
717 FAIL_IF(!inst);
718 INC_SIZE(1);
719 *inst = INT3;
720 break;
721 case SLJIT_NOP:
722 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
723 FAIL_IF(!inst);
724 INC_SIZE(1);
725 *inst = NOP;
726 break;
727 case SLJIT_LMUL_UW:
728 case SLJIT_LMUL_SW:
729 case SLJIT_DIVMOD_UW:
730 case SLJIT_DIVMOD_SW:
731 case SLJIT_DIV_UW:
732 case SLJIT_DIV_SW:
733 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
734 #ifdef _WIN64
735 SLJIT_ASSERT(
736 reg_map[SLJIT_R0] == 0
737 && reg_map[SLJIT_R1] == 2
738 && reg_map[TMP_REG1] > 7);
739 #else
740 SLJIT_ASSERT(
741 reg_map[SLJIT_R0] == 0
742 && reg_map[SLJIT_R1] < 7
743 && reg_map[TMP_REG1] == 2);
744 #endif
745 compiler->mode32 = op & SLJIT_I32_OP;
746 #endif
747 SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
748
749 op = GET_OPCODE(op);
750 if ((op | 0x2) == SLJIT_DIV_UW) {
751 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
752 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
753 inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
754 #else
755 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
756 #endif
757 FAIL_IF(!inst);
758 *inst = XOR_r_rm;
759 }
760
761 if ((op | 0x2) == SLJIT_DIV_SW) {
762 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
763 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
764 #endif
765
766 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
767 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
768 FAIL_IF(!inst);
769 INC_SIZE(1);
770 *inst = CDQ;
771 #else
772 if (compiler->mode32) {
773 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
774 FAIL_IF(!inst);
775 INC_SIZE(1);
776 *inst = CDQ;
777 } else {
778 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
779 FAIL_IF(!inst);
780 INC_SIZE(2);
781 *inst++ = REX_W;
782 *inst = CDQ;
783 }
784 #endif
785 }
786
787 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
788 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
789 FAIL_IF(!inst);
790 INC_SIZE(2);
791 *inst++ = GROUP_F7;
792 *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
793 #else
794 #ifdef _WIN64
795 size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2;
796 #else
797 size = (!compiler->mode32) ? 3 : 2;
798 #endif
799 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
800 FAIL_IF(!inst);
801 INC_SIZE(size);
802 #ifdef _WIN64
803 if (!compiler->mode32)
804 *inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0);
805 else if (op >= SLJIT_DIVMOD_UW)
806 *inst++ = REX_B;
807 *inst++ = GROUP_F7;
808 *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
809 #else
810 if (!compiler->mode32)
811 *inst++ = REX_W;
812 *inst++ = GROUP_F7;
813 *inst = MOD_REG | reg_map[SLJIT_R1];
814 #endif
815 #endif
816 switch (op) {
817 case SLJIT_LMUL_UW:
818 *inst |= MUL;
819 break;
820 case SLJIT_LMUL_SW:
821 *inst |= IMUL;
822 break;
823 case SLJIT_DIVMOD_UW:
824 case SLJIT_DIV_UW:
825 *inst |= DIV;
826 break;
827 case SLJIT_DIVMOD_SW:
828 case SLJIT_DIV_SW:
829 *inst |= IDIV;
830 break;
831 }
832 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
833 if (op <= SLJIT_DIVMOD_SW)
834 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
835 #else
836 if (op >= SLJIT_DIV_UW)
837 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
838 #endif
839 break;
840 }
841
842 return SLJIT_SUCCESS;
843 }
844
845 #define ENCODE_PREFIX(prefix) \
846 do { \
847 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); \
848 FAIL_IF(!inst); \
849 INC_SIZE(1); \
850 *inst = (prefix); \
851 } while (0)
852
emit_mov_byte(struct sljit_compiler * compiler,sljit_s32 sign,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)853 static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign,
854 sljit_s32 dst, sljit_sw dstw,
855 sljit_s32 src, sljit_sw srcw)
856 {
857 sljit_u8* inst;
858 sljit_s32 dst_r;
859 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
860 sljit_s32 work_r;
861 #endif
862
863 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
864 compiler->mode32 = 0;
865 #endif
866
867 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
868 return SLJIT_SUCCESS; /* Empty instruction. */
869
870 if (src & SLJIT_IMM) {
871 if (FAST_IS_REG(dst)) {
872 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
873 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
874 #else
875 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
876 FAIL_IF(!inst);
877 *inst = MOV_rm_i32;
878 return SLJIT_SUCCESS;
879 #endif
880 }
881 inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
882 FAIL_IF(!inst);
883 *inst = MOV_rm8_i8;
884 return SLJIT_SUCCESS;
885 }
886
887 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
888
889 if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
890 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
891 if (reg_map[src] >= 4) {
892 SLJIT_ASSERT(dst_r == TMP_REG1);
893 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
894 } else
895 dst_r = src;
896 #else
897 dst_r = src;
898 #endif
899 }
900 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
901 else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
902 /* src, dst are registers. */
903 SLJIT_ASSERT(SLOW_IS_REG(dst));
904 if (reg_map[dst] < 4) {
905 if (dst != src)
906 EMIT_MOV(compiler, dst, 0, src, 0);
907 inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
908 FAIL_IF(!inst);
909 *inst++ = GROUP_0F;
910 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
911 }
912 else {
913 if (dst != src)
914 EMIT_MOV(compiler, dst, 0, src, 0);
915 if (sign) {
916 /* shl reg, 24 */
917 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
918 FAIL_IF(!inst);
919 *inst |= SHL;
920 /* sar reg, 24 */
921 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
922 FAIL_IF(!inst);
923 *inst |= SAR;
924 }
925 else {
926 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
927 FAIL_IF(!inst);
928 *(inst + 1) |= AND;
929 }
930 }
931 return SLJIT_SUCCESS;
932 }
933 #endif
934 else {
935 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
936 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
937 FAIL_IF(!inst);
938 *inst++ = GROUP_0F;
939 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
940 }
941
942 if (dst & SLJIT_MEM) {
943 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
944 if (dst_r == TMP_REG1) {
945 /* Find a non-used register, whose reg_map[src] < 4. */
946 if ((dst & REG_MASK) == SLJIT_R0) {
947 if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1))
948 work_r = SLJIT_R2;
949 else
950 work_r = SLJIT_R1;
951 }
952 else {
953 if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
954 work_r = SLJIT_R0;
955 else if ((dst & REG_MASK) == SLJIT_R1)
956 work_r = SLJIT_R2;
957 else
958 work_r = SLJIT_R1;
959 }
960
961 if (work_r == SLJIT_R0) {
962 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
963 }
964 else {
965 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
966 FAIL_IF(!inst);
967 *inst = XCHG_r_rm;
968 }
969
970 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
971 FAIL_IF(!inst);
972 *inst = MOV_rm8_r8;
973
974 if (work_r == SLJIT_R0) {
975 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
976 }
977 else {
978 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
979 FAIL_IF(!inst);
980 *inst = XCHG_r_rm;
981 }
982 }
983 else {
984 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
985 FAIL_IF(!inst);
986 *inst = MOV_rm8_r8;
987 }
988 #else
989 inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
990 FAIL_IF(!inst);
991 *inst = MOV_rm8_r8;
992 #endif
993 }
994
995 return SLJIT_SUCCESS;
996 }
997
emit_mov_half(struct sljit_compiler * compiler,sljit_s32 sign,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)998 static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,
999 sljit_s32 dst, sljit_sw dstw,
1000 sljit_s32 src, sljit_sw srcw)
1001 {
1002 sljit_u8* inst;
1003 sljit_s32 dst_r;
1004
1005 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1006 compiler->mode32 = 0;
1007 #endif
1008
1009 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
1010 return SLJIT_SUCCESS; /* Empty instruction. */
1011
1012 if (src & SLJIT_IMM) {
1013 if (FAST_IS_REG(dst)) {
1014 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1015 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
1016 #else
1017 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1018 FAIL_IF(!inst);
1019 *inst = MOV_rm_i32;
1020 return SLJIT_SUCCESS;
1021 #endif
1022 }
1023 inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
1024 FAIL_IF(!inst);
1025 *inst = MOV_rm_i32;
1026 return SLJIT_SUCCESS;
1027 }
1028
1029 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1030
1031 if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
1032 dst_r = src;
1033 else {
1034 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1035 FAIL_IF(!inst);
1036 *inst++ = GROUP_0F;
1037 *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
1038 }
1039
1040 if (dst & SLJIT_MEM) {
1041 inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
1042 FAIL_IF(!inst);
1043 *inst = MOV_rm_r;
1044 }
1045
1046 return SLJIT_SUCCESS;
1047 }
1048
emit_unary(struct sljit_compiler * compiler,sljit_u8 opcode,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1049 static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,
1050 sljit_s32 dst, sljit_sw dstw,
1051 sljit_s32 src, sljit_sw srcw)
1052 {
1053 sljit_u8* inst;
1054
1055 if (dst == SLJIT_UNUSED) {
1056 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1057 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1058 FAIL_IF(!inst);
1059 *inst++ = GROUP_F7;
1060 *inst |= opcode;
1061 return SLJIT_SUCCESS;
1062 }
1063 if (dst == src && dstw == srcw) {
1064 /* Same input and output */
1065 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1066 FAIL_IF(!inst);
1067 *inst++ = GROUP_F7;
1068 *inst |= opcode;
1069 return SLJIT_SUCCESS;
1070 }
1071 if (FAST_IS_REG(dst)) {
1072 EMIT_MOV(compiler, dst, 0, src, srcw);
1073 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1074 FAIL_IF(!inst);
1075 *inst++ = GROUP_F7;
1076 *inst |= opcode;
1077 return SLJIT_SUCCESS;
1078 }
1079 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1080 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1081 FAIL_IF(!inst);
1082 *inst++ = GROUP_F7;
1083 *inst |= opcode;
1084 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1085 return SLJIT_SUCCESS;
1086 }
1087
emit_not_with_flags(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1088 static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler,
1089 sljit_s32 dst, sljit_sw dstw,
1090 sljit_s32 src, sljit_sw srcw)
1091 {
1092 sljit_u8* inst;
1093
1094 if (dst == SLJIT_UNUSED) {
1095 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1096 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1097 FAIL_IF(!inst);
1098 *inst++ = GROUP_F7;
1099 *inst |= NOT_rm;
1100 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1101 FAIL_IF(!inst);
1102 *inst = OR_r_rm;
1103 return SLJIT_SUCCESS;
1104 }
1105 if (FAST_IS_REG(dst)) {
1106 EMIT_MOV(compiler, dst, 0, src, srcw);
1107 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1108 FAIL_IF(!inst);
1109 *inst++ = GROUP_F7;
1110 *inst |= NOT_rm;
1111 inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
1112 FAIL_IF(!inst);
1113 *inst = OR_r_rm;
1114 return SLJIT_SUCCESS;
1115 }
1116 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1117 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1118 FAIL_IF(!inst);
1119 *inst++ = GROUP_F7;
1120 *inst |= NOT_rm;
1121 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1122 FAIL_IF(!inst);
1123 *inst = OR_r_rm;
1124 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1125 return SLJIT_SUCCESS;
1126 }
1127
emit_clz(struct sljit_compiler * compiler,sljit_s32 op_flags,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1128 static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags,
1129 sljit_s32 dst, sljit_sw dstw,
1130 sljit_s32 src, sljit_sw srcw)
1131 {
1132 sljit_u8* inst;
1133 sljit_s32 dst_r;
1134
1135 SLJIT_UNUSED_ARG(op_flags);
1136 if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
1137 /* Just set the zero flag. */
1138 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1139 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1140 FAIL_IF(!inst);
1141 *inst++ = GROUP_F7;
1142 *inst |= NOT_rm;
1143 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1144 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REG1, 0);
1145 #else
1146 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, TMP_REG1, 0);
1147 #endif
1148 FAIL_IF(!inst);
1149 *inst |= SHR;
1150 return SLJIT_SUCCESS;
1151 }
1152
1153 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
1154 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
1155 src = TMP_REG1;
1156 srcw = 0;
1157 }
1158
1159 inst = emit_x86_instruction(compiler, 2, TMP_REG1, 0, src, srcw);
1160 FAIL_IF(!inst);
1161 *inst++ = GROUP_0F;
1162 *inst = BSR_r_rm;
1163
1164 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1165 if (FAST_IS_REG(dst))
1166 dst_r = dst;
1167 else {
1168 /* Find an unused temporary register. */
1169 if ((dst & REG_MASK) != SLJIT_R0 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
1170 dst_r = SLJIT_R0;
1171 else if ((dst & REG_MASK) != SLJIT_R1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R1))
1172 dst_r = SLJIT_R1;
1173 else
1174 dst_r = SLJIT_R2;
1175 EMIT_MOV(compiler, dst, dstw, dst_r, 0);
1176 }
1177 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
1178 #else
1179 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
1180 compiler->mode32 = 0;
1181 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 64 + 63 : 32 + 31);
1182 compiler->mode32 = op_flags & SLJIT_I32_OP;
1183 #endif
1184
1185 if (cpu_has_cmov == -1)
1186 get_cpu_features();
1187
1188 if (cpu_has_cmov) {
1189 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
1190 FAIL_IF(!inst);
1191 *inst++ = GROUP_0F;
1192 *inst = CMOVNE_r_rm;
1193 } else {
1194 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1195 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1196 FAIL_IF(!inst);
1197 INC_SIZE(4);
1198
1199 *inst++ = JE_i8;
1200 *inst++ = 2;
1201 *inst++ = MOV_r_rm;
1202 *inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REG1];
1203 #else
1204 inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
1205 FAIL_IF(!inst);
1206 INC_SIZE(5);
1207
1208 *inst++ = JE_i8;
1209 *inst++ = 3;
1210 *inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REG1] >= 8 ? REX_B : 0);
1211 *inst++ = MOV_r_rm;
1212 *inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REG1];
1213 #endif
1214 }
1215
1216 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1217 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1218 #else
1219 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, dst_r, 0);
1220 #endif
1221 FAIL_IF(!inst);
1222 *(inst + 1) |= XOR;
1223
1224 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1225 if (dst & SLJIT_MEM) {
1226 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1227 FAIL_IF(!inst);
1228 *inst = XCHG_r_rm;
1229 }
1230 #else
1231 if (dst & SLJIT_MEM)
1232 EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
1233 #endif
1234 return SLJIT_SUCCESS;
1235 }
1236
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1237 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1238 sljit_s32 dst, sljit_sw dstw,
1239 sljit_s32 src, sljit_sw srcw)
1240 {
1241 sljit_s32 update = 0;
1242 sljit_s32 op_flags = GET_ALL_FLAGS(op);
1243 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1244 sljit_s32 dst_is_ereg = 0;
1245 sljit_s32 src_is_ereg = 0;
1246 #else
1247 # define src_is_ereg 0
1248 #endif
1249
1250 CHECK_ERROR();
1251 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1252 ADJUST_LOCAL_OFFSET(dst, dstw);
1253 ADJUST_LOCAL_OFFSET(src, srcw);
1254
1255 CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1256 CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
1257 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1258 compiler->mode32 = op_flags & SLJIT_I32_OP;
1259 #endif
1260
1261 op = GET_OPCODE(op);
1262 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
1263 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1264 compiler->mode32 = 0;
1265 #endif
1266
1267 if (op_flags & SLJIT_I32_OP) {
1268 if (FAST_IS_REG(src) && src == dst) {
1269 if (!TYPE_CAST_NEEDED(op))
1270 return SLJIT_SUCCESS;
1271 }
1272 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1273 if (op == SLJIT_MOV_S32 && (src & SLJIT_MEM))
1274 op = SLJIT_MOV_U32;
1275 if (op == SLJIT_MOVU_S32 && (src & SLJIT_MEM))
1276 op = SLJIT_MOVU_U32;
1277 if (op == SLJIT_MOV_U32 && (src & SLJIT_IMM))
1278 op = SLJIT_MOV_S32;
1279 if (op == SLJIT_MOVU_U32 && (src & SLJIT_IMM))
1280 op = SLJIT_MOVU_S32;
1281 #endif
1282 }
1283
1284 SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset);
1285 if (op >= SLJIT_MOVU) {
1286 update = 1;
1287 op -= 8;
1288 }
1289
1290 if (src & SLJIT_IMM) {
1291 switch (op) {
1292 case SLJIT_MOV_U8:
1293 srcw = (sljit_u8)srcw;
1294 break;
1295 case SLJIT_MOV_S8:
1296 srcw = (sljit_s8)srcw;
1297 break;
1298 case SLJIT_MOV_U16:
1299 srcw = (sljit_u16)srcw;
1300 break;
1301 case SLJIT_MOV_S16:
1302 srcw = (sljit_s16)srcw;
1303 break;
1304 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1305 case SLJIT_MOV_U32:
1306 srcw = (sljit_u32)srcw;
1307 break;
1308 case SLJIT_MOV_S32:
1309 srcw = (sljit_s32)srcw;
1310 break;
1311 #endif
1312 }
1313 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1314 if (SLJIT_UNLIKELY(dst_is_ereg))
1315 return emit_mov(compiler, dst, dstw, src, srcw);
1316 #endif
1317 }
1318
1319 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1320 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
1321 SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
1322 dst = TMP_REG1;
1323 }
1324 #endif
1325
1326 switch (op) {
1327 case SLJIT_MOV:
1328 case SLJIT_MOV_P:
1329 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1330 case SLJIT_MOV_U32:
1331 case SLJIT_MOV_S32:
1332 #endif
1333 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1334 break;
1335 case SLJIT_MOV_U8:
1336 FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
1337 break;
1338 case SLJIT_MOV_S8:
1339 FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
1340 break;
1341 case SLJIT_MOV_U16:
1342 FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
1343 break;
1344 case SLJIT_MOV_S16:
1345 FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
1346 break;
1347 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1348 case SLJIT_MOV_U32:
1349 FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
1350 break;
1351 case SLJIT_MOV_S32:
1352 FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
1353 break;
1354 #endif
1355 }
1356
1357 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1358 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
1359 return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
1360 #endif
1361
1362 if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK)) {
1363 if ((src & OFFS_REG_MASK) != 0) {
1364 FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
1365 (src & REG_MASK), 0, (src & REG_MASK), 0, OFFS_REG(dst), 0));
1366 }
1367 else if (srcw != 0) {
1368 FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
1369 (src & REG_MASK), 0, (src & REG_MASK), 0, SLJIT_IMM, srcw));
1370 }
1371 }
1372
1373 if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK)) {
1374 if ((dst & OFFS_REG_MASK) != 0) {
1375 FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
1376 (dst & REG_MASK), 0, (dst & REG_MASK), 0, OFFS_REG(dst), 0));
1377 }
1378 else if (dstw != 0) {
1379 FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
1380 (dst & REG_MASK), 0, (dst & REG_MASK), 0, SLJIT_IMM, dstw));
1381 }
1382 }
1383 return SLJIT_SUCCESS;
1384 }
1385
1386 switch (op) {
1387 case SLJIT_NOT:
1388 if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_Z))
1389 return emit_not_with_flags(compiler, dst, dstw, src, srcw);
1390 return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
1391
1392 case SLJIT_NEG:
1393 return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
1394
1395 case SLJIT_CLZ:
1396 return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
1397 }
1398
1399 return SLJIT_SUCCESS;
1400
1401 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1402 # undef src_is_ereg
1403 #endif
1404 }
1405
1406 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1407
1408 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1409 if (IS_HALFWORD(immw) || compiler->mode32) { \
1410 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1411 FAIL_IF(!inst); \
1412 *(inst + 1) |= (op_imm); \
1413 } \
1414 else { \
1415 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
1416 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
1417 FAIL_IF(!inst); \
1418 *inst = (op_mr); \
1419 }
1420
1421 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1422 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
1423
1424 #else
1425
1426 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1427 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1428 FAIL_IF(!inst); \
1429 *(inst + 1) |= (op_imm);
1430
1431 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1432 FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
1433
1434 #endif
1435
emit_cum_binary(struct sljit_compiler * compiler,sljit_u8 op_rm,sljit_u8 op_mr,sljit_u8 op_imm,sljit_u8 op_eax_imm,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1436 static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
1437 sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
1438 sljit_s32 dst, sljit_sw dstw,
1439 sljit_s32 src1, sljit_sw src1w,
1440 sljit_s32 src2, sljit_sw src2w)
1441 {
1442 sljit_u8* inst;
1443
1444 if (dst == SLJIT_UNUSED) {
1445 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1446 if (src2 & SLJIT_IMM) {
1447 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1448 }
1449 else {
1450 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1451 FAIL_IF(!inst);
1452 *inst = op_rm;
1453 }
1454 return SLJIT_SUCCESS;
1455 }
1456
1457 if (dst == src1 && dstw == src1w) {
1458 if (src2 & SLJIT_IMM) {
1459 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1460 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1461 #else
1462 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1463 #endif
1464 BINARY_EAX_IMM(op_eax_imm, src2w);
1465 }
1466 else {
1467 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1468 }
1469 }
1470 else if (FAST_IS_REG(dst)) {
1471 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1472 FAIL_IF(!inst);
1473 *inst = op_rm;
1474 }
1475 else if (FAST_IS_REG(src2)) {
1476 /* Special exception for sljit_emit_op_flags. */
1477 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1478 FAIL_IF(!inst);
1479 *inst = op_mr;
1480 }
1481 else {
1482 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1483 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1484 FAIL_IF(!inst);
1485 *inst = op_mr;
1486 }
1487 return SLJIT_SUCCESS;
1488 }
1489
1490 /* Only for cumulative operations. */
1491 if (dst == src2 && dstw == src2w) {
1492 if (src1 & SLJIT_IMM) {
1493 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1494 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1495 #else
1496 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
1497 #endif
1498 BINARY_EAX_IMM(op_eax_imm, src1w);
1499 }
1500 else {
1501 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1502 }
1503 }
1504 else if (FAST_IS_REG(dst)) {
1505 inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1506 FAIL_IF(!inst);
1507 *inst = op_rm;
1508 }
1509 else if (FAST_IS_REG(src1)) {
1510 inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1511 FAIL_IF(!inst);
1512 *inst = op_mr;
1513 }
1514 else {
1515 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1516 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1517 FAIL_IF(!inst);
1518 *inst = op_mr;
1519 }
1520 return SLJIT_SUCCESS;
1521 }
1522
1523 /* General version. */
1524 if (FAST_IS_REG(dst)) {
1525 EMIT_MOV(compiler, dst, 0, src1, src1w);
1526 if (src2 & SLJIT_IMM) {
1527 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1528 }
1529 else {
1530 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1531 FAIL_IF(!inst);
1532 *inst = op_rm;
1533 }
1534 }
1535 else {
1536 /* This version requires less memory writing. */
1537 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1538 if (src2 & SLJIT_IMM) {
1539 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1540 }
1541 else {
1542 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1543 FAIL_IF(!inst);
1544 *inst = op_rm;
1545 }
1546 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1547 }
1548
1549 return SLJIT_SUCCESS;
1550 }
1551
1552 static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
1553 sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
1554 sljit_s32 dst, sljit_sw dstw,
1555 sljit_s32 src1, sljit_sw src1w,
1556 sljit_s32 src2, sljit_sw src2w)
1557 {
1558 sljit_u8* inst;
1559
1560 if (dst == SLJIT_UNUSED) {
1561 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1562 if (src2 & SLJIT_IMM) {
1563 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1564 }
1565 else {
1566 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1567 FAIL_IF(!inst);
1568 *inst = op_rm;
1569 }
1570 return SLJIT_SUCCESS;
1571 }
1572
1573 if (dst == src1 && dstw == src1w) {
1574 if (src2 & SLJIT_IMM) {
1575 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1576 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1577 #else
1578 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1579 #endif
1580 BINARY_EAX_IMM(op_eax_imm, src2w);
1581 }
1582 else {
1583 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1584 }
1585 }
1586 else if (FAST_IS_REG(dst)) {
1587 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1588 FAIL_IF(!inst);
1589 *inst = op_rm;
1590 }
1591 else if (FAST_IS_REG(src2)) {
1592 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1593 FAIL_IF(!inst);
1594 *inst = op_mr;
1595 }
1596 else {
1597 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1598 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1599 FAIL_IF(!inst);
1600 *inst = op_mr;
1601 }
1602 return SLJIT_SUCCESS;
1603 }
1604
1605 /* General version. */
1606 if (FAST_IS_REG(dst) && dst != src2) {
1607 EMIT_MOV(compiler, dst, 0, src1, src1w);
1608 if (src2 & SLJIT_IMM) {
1609 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1610 }
1611 else {
1612 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1613 FAIL_IF(!inst);
1614 *inst = op_rm;
1615 }
1616 }
1617 else {
1618 /* This version requires less memory writing. */
1619 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1620 if (src2 & SLJIT_IMM) {
1621 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1622 }
1623 else {
1624 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1625 FAIL_IF(!inst);
1626 *inst = op_rm;
1627 }
1628 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1629 }
1630
1631 return SLJIT_SUCCESS;
1632 }
1633
1634 static sljit_s32 emit_mul(struct sljit_compiler *compiler,
1635 sljit_s32 dst, sljit_sw dstw,
1636 sljit_s32 src1, sljit_sw src1w,
1637 sljit_s32 src2, sljit_sw src2w)
1638 {
1639 sljit_u8* inst;
1640 sljit_s32 dst_r;
1641
1642 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1643
1644 /* Register destination. */
1645 if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
1646 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1647 FAIL_IF(!inst);
1648 *inst++ = GROUP_0F;
1649 *inst = IMUL_r_rm;
1650 }
1651 else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
1652 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
1653 FAIL_IF(!inst);
1654 *inst++ = GROUP_0F;
1655 *inst = IMUL_r_rm;
1656 }
1657 else if (src1 & SLJIT_IMM) {
1658 if (src2 & SLJIT_IMM) {
1659 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
1660 src2 = dst_r;
1661 src2w = 0;
1662 }
1663
1664 if (src1w <= 127 && src1w >= -128) {
1665 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1666 FAIL_IF(!inst);
1667 *inst = IMUL_r_rm_i8;
1668 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1669 FAIL_IF(!inst);
1670 INC_SIZE(1);
1671 *inst = (sljit_s8)src1w;
1672 }
1673 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1674 else {
1675 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1676 FAIL_IF(!inst);
1677 *inst = IMUL_r_rm_i32;
1678 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1679 FAIL_IF(!inst);
1680 INC_SIZE(4);
1681 sljit_unaligned_store_sw(inst, src1w);
1682 }
1683 #else
1684 else if (IS_HALFWORD(src1w)) {
1685 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1686 FAIL_IF(!inst);
1687 *inst = IMUL_r_rm_i32;
1688 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1689 FAIL_IF(!inst);
1690 INC_SIZE(4);
1691 sljit_unaligned_store_s32(inst, (sljit_s32)src1w);
1692 }
1693 else {
1694 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1695 if (dst_r != src2)
1696 EMIT_MOV(compiler, dst_r, 0, src2, src2w);
1697 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1698 FAIL_IF(!inst);
1699 *inst++ = GROUP_0F;
1700 *inst = IMUL_r_rm;
1701 }
1702 #endif
1703 }
1704 else if (src2 & SLJIT_IMM) {
1705 /* Note: src1 is NOT immediate. */
1706
1707 if (src2w <= 127 && src2w >= -128) {
1708 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1709 FAIL_IF(!inst);
1710 *inst = IMUL_r_rm_i8;
1711 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1712 FAIL_IF(!inst);
1713 INC_SIZE(1);
1714 *inst = (sljit_s8)src2w;
1715 }
1716 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1717 else {
1718 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1719 FAIL_IF(!inst);
1720 *inst = IMUL_r_rm_i32;
1721 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1722 FAIL_IF(!inst);
1723 INC_SIZE(4);
1724 sljit_unaligned_store_sw(inst, src2w);
1725 }
1726 #else
1727 else if (IS_HALFWORD(src2w)) {
1728 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1729 FAIL_IF(!inst);
1730 *inst = IMUL_r_rm_i32;
1731 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1732 FAIL_IF(!inst);
1733 INC_SIZE(4);
1734 sljit_unaligned_store_s32(inst, (sljit_s32)src2w);
1735 }
1736 else {
1737 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src2w);
1738 if (dst_r != src1)
1739 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1740 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1741 FAIL_IF(!inst);
1742 *inst++ = GROUP_0F;
1743 *inst = IMUL_r_rm;
1744 }
1745 #endif
1746 }
1747 else {
1748 /* Neither argument is immediate. */
1749 if (ADDRESSING_DEPENDS_ON(src2, dst_r))
1750 dst_r = TMP_REG1;
1751 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1752 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1753 FAIL_IF(!inst);
1754 *inst++ = GROUP_0F;
1755 *inst = IMUL_r_rm;
1756 }
1757
1758 if (dst_r == TMP_REG1)
1759 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1760
1761 return SLJIT_SUCCESS;
1762 }
1763
1764 static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler,
1765 sljit_s32 dst, sljit_sw dstw,
1766 sljit_s32 src1, sljit_sw src1w,
1767 sljit_s32 src2, sljit_sw src2w)
1768 {
1769 sljit_u8* inst;
1770 sljit_s32 dst_r, done = 0;
1771
1772 /* These cases better be left to handled by normal way. */
1773 if (dst == src1 && dstw == src1w)
1774 return SLJIT_ERR_UNSUPPORTED;
1775 if (dst == src2 && dstw == src2w)
1776 return SLJIT_ERR_UNSUPPORTED;
1777
1778 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1779
1780 if (FAST_IS_REG(src1)) {
1781 if (FAST_IS_REG(src2)) {
1782 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
1783 FAIL_IF(!inst);
1784 *inst = LEA_r_m;
1785 done = 1;
1786 }
1787 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1788 if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1789 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w);
1790 #else
1791 if (src2 & SLJIT_IMM) {
1792 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
1793 #endif
1794 FAIL_IF(!inst);
1795 *inst = LEA_r_m;
1796 done = 1;
1797 }
1798 }
1799 else if (FAST_IS_REG(src2)) {
1800 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1801 if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1802 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w);
1803 #else
1804 if (src1 & SLJIT_IMM) {
1805 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
1806 #endif
1807 FAIL_IF(!inst);
1808 *inst = LEA_r_m;
1809 done = 1;
1810 }
1811 }
1812
1813 if (done) {
1814 if (dst_r == TMP_REG1)
1815 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
1816 return SLJIT_SUCCESS;
1817 }
1818 return SLJIT_ERR_UNSUPPORTED;
1819 }
1820
1821 static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
1822 sljit_s32 src1, sljit_sw src1w,
1823 sljit_s32 src2, sljit_sw src2w)
1824 {
1825 sljit_u8* inst;
1826
1827 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1828 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1829 #else
1830 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1831 #endif
1832 BINARY_EAX_IMM(CMP_EAX_i32, src2w);
1833 return SLJIT_SUCCESS;
1834 }
1835
1836 if (FAST_IS_REG(src1)) {
1837 if (src2 & SLJIT_IMM) {
1838 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
1839 }
1840 else {
1841 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1842 FAIL_IF(!inst);
1843 *inst = CMP_r_rm;
1844 }
1845 return SLJIT_SUCCESS;
1846 }
1847
1848 if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
1849 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1850 FAIL_IF(!inst);
1851 *inst = CMP_rm_r;
1852 return SLJIT_SUCCESS;
1853 }
1854
1855 if (src2 & SLJIT_IMM) {
1856 if (src1 & SLJIT_IMM) {
1857 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1858 src1 = TMP_REG1;
1859 src1w = 0;
1860 }
1861 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
1862 }
1863 else {
1864 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1865 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1866 FAIL_IF(!inst);
1867 *inst = CMP_r_rm;
1868 }
1869 return SLJIT_SUCCESS;
1870 }
1871
1872 static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,
1873 sljit_s32 src1, sljit_sw src1w,
1874 sljit_s32 src2, sljit_sw src2w)
1875 {
1876 sljit_u8* inst;
1877
1878 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1879 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1880 #else
1881 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1882 #endif
1883 BINARY_EAX_IMM(TEST_EAX_i32, src2w);
1884 return SLJIT_SUCCESS;
1885 }
1886
1887 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1888 if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1889 #else
1890 if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
1891 #endif
1892 BINARY_EAX_IMM(TEST_EAX_i32, src1w);
1893 return SLJIT_SUCCESS;
1894 }
1895
1896 if (!(src1 & SLJIT_IMM)) {
1897 if (src2 & SLJIT_IMM) {
1898 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1899 if (IS_HALFWORD(src2w) || compiler->mode32) {
1900 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
1901 FAIL_IF(!inst);
1902 *inst = GROUP_F7;
1903 }
1904 else {
1905 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1906 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, src1w);
1907 FAIL_IF(!inst);
1908 *inst = TEST_rm_r;
1909 }
1910 #else
1911 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
1912 FAIL_IF(!inst);
1913 *inst = GROUP_F7;
1914 #endif
1915 return SLJIT_SUCCESS;
1916 }
1917 else if (FAST_IS_REG(src1)) {
1918 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1919 FAIL_IF(!inst);
1920 *inst = TEST_rm_r;
1921 return SLJIT_SUCCESS;
1922 }
1923 }
1924
1925 if (!(src2 & SLJIT_IMM)) {
1926 if (src1 & SLJIT_IMM) {
1927 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1928 if (IS_HALFWORD(src1w) || compiler->mode32) {
1929 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);
1930 FAIL_IF(!inst);
1931 *inst = GROUP_F7;
1932 }
1933 else {
1934 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
1935 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, src2w);
1936 FAIL_IF(!inst);
1937 *inst = TEST_rm_r;
1938 }
1939 #else
1940 inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w);
1941 FAIL_IF(!inst);
1942 *inst = GROUP_F7;
1943 #endif
1944 return SLJIT_SUCCESS;
1945 }
1946 else if (FAST_IS_REG(src2)) {
1947 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1948 FAIL_IF(!inst);
1949 *inst = TEST_rm_r;
1950 return SLJIT_SUCCESS;
1951 }
1952 }
1953
1954 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1955 if (src2 & SLJIT_IMM) {
1956 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1957 if (IS_HALFWORD(src2w) || compiler->mode32) {
1958 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
1959 FAIL_IF(!inst);
1960 *inst = GROUP_F7;
1961 }
1962 else {
1963 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1964 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
1965 FAIL_IF(!inst);
1966 *inst = TEST_rm_r;
1967 }
1968 #else
1969 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
1970 FAIL_IF(!inst);
1971 *inst = GROUP_F7;
1972 #endif
1973 }
1974 else {
1975 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1976 FAIL_IF(!inst);
1977 *inst = TEST_rm_r;
1978 }
1979 return SLJIT_SUCCESS;
1980 }
1981
1982 static sljit_s32 emit_shift(struct sljit_compiler *compiler,
1983 sljit_u8 mode,
1984 sljit_s32 dst, sljit_sw dstw,
1985 sljit_s32 src1, sljit_sw src1w,
1986 sljit_s32 src2, sljit_sw src2w)
1987 {
1988 sljit_u8* inst;
1989
1990 if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
1991 if (dst == src1 && dstw == src1w) {
1992 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
1993 FAIL_IF(!inst);
1994 *inst |= mode;
1995 return SLJIT_SUCCESS;
1996 }
1997 if (dst == SLJIT_UNUSED) {
1998 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1999 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2000 FAIL_IF(!inst);
2001 *inst |= mode;
2002 return SLJIT_SUCCESS;
2003 }
2004 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
2005 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2006 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2007 FAIL_IF(!inst);
2008 *inst |= mode;
2009 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2010 return SLJIT_SUCCESS;
2011 }
2012 if (FAST_IS_REG(dst)) {
2013 EMIT_MOV(compiler, dst, 0, src1, src1w);
2014 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
2015 FAIL_IF(!inst);
2016 *inst |= mode;
2017 return SLJIT_SUCCESS;
2018 }
2019
2020 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2021 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2022 FAIL_IF(!inst);
2023 *inst |= mode;
2024 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2025 return SLJIT_SUCCESS;
2026 }
2027
2028 if (dst == SLJIT_PREF_SHIFT_REG) {
2029 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2030 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2031 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2032 FAIL_IF(!inst);
2033 *inst |= mode;
2034 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2035 }
2036 else if (FAST_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
2037 if (src1 != dst)
2038 EMIT_MOV(compiler, dst, 0, src1, src1w);
2039 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2040 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2041 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
2042 FAIL_IF(!inst);
2043 *inst |= mode;
2044 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2045 }
2046 else {
2047 /* This case is complex since ecx itself may be used for
2048 addressing, and this case must be supported as well. */
2049 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2050 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2051 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
2052 #else
2053 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
2054 #endif
2055 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2056 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2057 FAIL_IF(!inst);
2058 *inst |= mode;
2059 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2060 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
2061 #else
2062 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0);
2063 #endif
2064 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2065 }
2066
2067 return SLJIT_SUCCESS;
2068 }
2069
2070 static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler,
2071 sljit_u8 mode, sljit_s32 set_flags,
2072 sljit_s32 dst, sljit_sw dstw,
2073 sljit_s32 src1, sljit_sw src1w,
2074 sljit_s32 src2, sljit_sw src2w)
2075 {
2076 /* The CPU does not set flags if the shift count is 0. */
2077 if (src2 & SLJIT_IMM) {
2078 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2079 if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
2080 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2081 #else
2082 if ((src2w & 0x1f) != 0)
2083 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2084 #endif
2085 if (!set_flags)
2086 return emit_mov(compiler, dst, dstw, src1, src1w);
2087 /* OR dst, src, 0 */
2088 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2089 dst, dstw, src1, src1w, SLJIT_IMM, 0);
2090 }
2091
2092 if (!set_flags)
2093 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2094
2095 if (!FAST_IS_REG(dst))
2096 FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
2097
2098 FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
2099
2100 if (FAST_IS_REG(dst))
2101 return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
2102 return SLJIT_SUCCESS;
2103 }
2104
2105 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2106 sljit_s32 dst, sljit_sw dstw,
2107 sljit_s32 src1, sljit_sw src1w,
2108 sljit_s32 src2, sljit_sw src2w)
2109 {
2110 CHECK_ERROR();
2111 CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2112 ADJUST_LOCAL_OFFSET(dst, dstw);
2113 ADJUST_LOCAL_OFFSET(src1, src1w);
2114 ADJUST_LOCAL_OFFSET(src2, src2w);
2115
2116 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2117 CHECK_EXTRA_REGS(src1, src1w, (void)0);
2118 CHECK_EXTRA_REGS(src2, src2w, (void)0);
2119 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2120 compiler->mode32 = op & SLJIT_I32_OP;
2121 #endif
2122
2123 switch (GET_OPCODE(op)) {
2124 case SLJIT_ADD:
2125 if (!HAS_FLAGS(op)) {
2126 if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
2127 return compiler->error;
2128 }
2129 return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
2130 dst, dstw, src1, src1w, src2, src2w);
2131 case SLJIT_ADDC:
2132 return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32,
2133 dst, dstw, src1, src1w, src2, src2w);
2134 case SLJIT_SUB:
2135 if (!HAS_FLAGS(op)) {
2136 if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
2137 return compiler->error;
2138 }
2139
2140 if (dst == SLJIT_UNUSED)
2141 return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
2142 return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
2143 dst, dstw, src1, src1w, src2, src2w);
2144 case SLJIT_SUBC:
2145 return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32,
2146 dst, dstw, src1, src1w, src2, src2w);
2147 case SLJIT_MUL:
2148 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
2149 case SLJIT_AND:
2150 if (dst == SLJIT_UNUSED)
2151 return emit_test_binary(compiler, src1, src1w, src2, src2w);
2152 return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32,
2153 dst, dstw, src1, src1w, src2, src2w);
2154 case SLJIT_OR:
2155 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2156 dst, dstw, src1, src1w, src2, src2w);
2157 case SLJIT_XOR:
2158 return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32,
2159 dst, dstw, src1, src1w, src2, src2w);
2160 case SLJIT_SHL:
2161 return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op),
2162 dst, dstw, src1, src1w, src2, src2w);
2163 case SLJIT_LSHR:
2164 return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op),
2165 dst, dstw, src1, src1w, src2, src2w);
2166 case SLJIT_ASHR:
2167 return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op),
2168 dst, dstw, src1, src1w, src2, src2w);
2169 }
2170
2171 return SLJIT_SUCCESS;
2172 }
2173
2174 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
2175 {
2176 CHECK_REG_INDEX(check_sljit_get_register_index(reg));
2177 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2178 if (reg >= SLJIT_R3 && reg <= SLJIT_R8)
2179 return -1;
2180 #endif
2181 return reg_map[reg];
2182 }
2183
2184 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
2185 {
2186 CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
2187 return reg;
2188 }
2189
2190 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2191 void *instruction, sljit_s32 size)
2192 {
2193 sljit_u8 *inst;
2194
2195 CHECK_ERROR();
2196 CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2197
2198 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
2199 FAIL_IF(!inst);
2200 INC_SIZE(size);
2201 SLJIT_MEMCPY(inst, instruction, size);
2202 return SLJIT_SUCCESS;
2203 }
2204
2205 /* --------------------------------------------------------------------- */
2206 /* Floating point operators */
2207 /* --------------------------------------------------------------------- */
2208
2209 /* Alignment + 2 * 16 bytes. */
2210 static sljit_s32 sse2_data[3 + (4 + 4) * 2];
2211 static sljit_s32 *sse2_buffer;
2212
2213 static void init_compiler(void)
2214 {
2215 sse2_buffer = (sljit_s32*)(((sljit_uw)sse2_data + 15) & ~0xf);
2216 /* Single precision constants. */
2217 sse2_buffer[0] = 0x80000000;
2218 sse2_buffer[4] = 0x7fffffff;
2219 /* Double precision constants. */
2220 sse2_buffer[8] = 0;
2221 sse2_buffer[9] = 0x80000000;
2222 sse2_buffer[12] = 0xffffffff;
2223 sse2_buffer[13] = 0x7fffffff;
2224 }
2225
2226 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
2227 {
2228 #ifdef SLJIT_IS_FPU_AVAILABLE
2229 return SLJIT_IS_FPU_AVAILABLE;
2230 #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
2231 if (cpu_has_sse2 == -1)
2232 get_cpu_features();
2233 return cpu_has_sse2;
2234 #else /* SLJIT_DETECT_SSE2 */
2235 return 1;
2236 #endif /* SLJIT_DETECT_SSE2 */
2237 }
2238
2239 static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode,
2240 sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
2241 {
2242 sljit_u8 *inst;
2243
2244 inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2245 FAIL_IF(!inst);
2246 *inst++ = GROUP_0F;
2247 *inst = opcode;
2248 return SLJIT_SUCCESS;
2249 }
2250
2251 static sljit_s32 emit_sse2_logic(struct sljit_compiler *compiler, sljit_u8 opcode,
2252 sljit_s32 pref66, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
2253 {
2254 sljit_u8 *inst;
2255
2256 inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2257 FAIL_IF(!inst);
2258 *inst++ = GROUP_0F;
2259 *inst = opcode;
2260 return SLJIT_SUCCESS;
2261 }
2262
2263 static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
2264 sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
2265 {
2266 return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
2267 }
2268
2269 static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
2270 sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src)
2271 {
2272 return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
2273 }
2274
2275 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2276 sljit_s32 dst, sljit_sw dstw,
2277 sljit_s32 src, sljit_sw srcw)
2278 {
2279 sljit_s32 dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
2280 sljit_u8 *inst;
2281
2282 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2283 if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
2284 compiler->mode32 = 0;
2285 #endif
2286
2287 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
2288 FAIL_IF(!inst);
2289 *inst++ = GROUP_0F;
2290 *inst = CVTTSD2SI_r_xm;
2291
2292 if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED)
2293 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2294 return SLJIT_SUCCESS;
2295 }
2296
2297 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2298 sljit_s32 dst, sljit_sw dstw,
2299 sljit_s32 src, sljit_sw srcw)
2300 {
2301 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2302 sljit_u8 *inst;
2303
2304 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2305 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
2306 compiler->mode32 = 0;
2307 #endif
2308
2309 if (src & SLJIT_IMM) {
2310 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2311 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
2312 srcw = (sljit_s32)srcw;
2313 #endif
2314 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
2315 src = TMP_REG1;
2316 srcw = 0;
2317 }
2318
2319 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
2320 FAIL_IF(!inst);
2321 *inst++ = GROUP_0F;
2322 *inst = CVTSI2SD_x_rm;
2323
2324 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2325 compiler->mode32 = 1;
2326 #endif
2327 if (dst_r == TMP_FREG)
2328 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2329 return SLJIT_SUCCESS;
2330 }
2331
2332 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2333 sljit_s32 src1, sljit_sw src1w,
2334 sljit_s32 src2, sljit_sw src2w)
2335 {
2336 if (!FAST_IS_REG(src1)) {
2337 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
2338 src1 = TMP_FREG;
2339 }
2340 return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_F32_OP), src1, src2, src2w);
2341 }
2342
2343 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2344 sljit_s32 dst, sljit_sw dstw,
2345 sljit_s32 src, sljit_sw srcw)
2346 {
2347 sljit_s32 dst_r;
2348
2349 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2350 compiler->mode32 = 1;
2351 #endif
2352
2353 CHECK_ERROR();
2354 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2355
2356 if (GET_OPCODE(op) == SLJIT_MOV_F64) {
2357 if (FAST_IS_REG(dst))
2358 return emit_sse2_load(compiler, op & SLJIT_F32_OP, dst, src, srcw);
2359 if (FAST_IS_REG(src))
2360 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, src);
2361 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src, srcw));
2362 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2363 }
2364
2365 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) {
2366 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2367 if (FAST_IS_REG(src)) {
2368 /* We overwrite the high bits of source. From SLJIT point of view,
2369 this is not an issue.
2370 Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
2371 FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_F32_OP, src, src, 0));
2372 }
2373 else {
2374 FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_F32_OP), TMP_FREG, src, srcw));
2375 src = TMP_FREG;
2376 }
2377
2378 FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_F32_OP, dst_r, src, 0));
2379 if (dst_r == TMP_FREG)
2380 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2381 return SLJIT_SUCCESS;
2382 }
2383
2384 if (SLOW_IS_REG(dst)) {
2385 dst_r = dst;
2386 if (dst != src)
2387 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
2388 }
2389 else {
2390 dst_r = TMP_FREG;
2391 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
2392 }
2393
2394 switch (GET_OPCODE(op)) {
2395 case SLJIT_NEG_F64:
2396 FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer : sse2_buffer + 8)));
2397 break;
2398
2399 case SLJIT_ABS_F64:
2400 FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer + 4 : sse2_buffer + 12)));
2401 break;
2402 }
2403
2404 if (dst_r == TMP_FREG)
2405 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2406 return SLJIT_SUCCESS;
2407 }
2408
2409 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2410 sljit_s32 dst, sljit_sw dstw,
2411 sljit_s32 src1, sljit_sw src1w,
2412 sljit_s32 src2, sljit_sw src2w)
2413 {
2414 sljit_s32 dst_r;
2415
2416 CHECK_ERROR();
2417 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2418 ADJUST_LOCAL_OFFSET(dst, dstw);
2419 ADJUST_LOCAL_OFFSET(src1, src1w);
2420 ADJUST_LOCAL_OFFSET(src2, src2w);
2421
2422 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2423 compiler->mode32 = 1;
2424 #endif
2425
2426 if (FAST_IS_REG(dst)) {
2427 dst_r = dst;
2428 if (dst == src1)
2429 ; /* Do nothing here. */
2430 else if (dst == src2 && (op == SLJIT_ADD_F64 || op == SLJIT_MUL_F64)) {
2431 /* Swap arguments. */
2432 src2 = src1;
2433 src2w = src1w;
2434 }
2435 else if (dst != src2)
2436 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src1, src1w));
2437 else {
2438 dst_r = TMP_FREG;
2439 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
2440 }
2441 }
2442 else {
2443 dst_r = TMP_FREG;
2444 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
2445 }
2446
2447 switch (GET_OPCODE(op)) {
2448 case SLJIT_ADD_F64:
2449 FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2450 break;
2451
2452 case SLJIT_SUB_F64:
2453 FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2454 break;
2455
2456 case SLJIT_MUL_F64:
2457 FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2458 break;
2459
2460 case SLJIT_DIV_F64:
2461 FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2462 break;
2463 }
2464
2465 if (dst_r == TMP_FREG)
2466 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2467 return SLJIT_SUCCESS;
2468 }
2469
2470 /* --------------------------------------------------------------------- */
2471 /* Conditional instructions */
2472 /* --------------------------------------------------------------------- */
2473
2474 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2475 {
2476 sljit_u8 *inst;
2477 struct sljit_label *label;
2478
2479 CHECK_ERROR_PTR();
2480 CHECK_PTR(check_sljit_emit_label(compiler));
2481
2482 if (compiler->last_label && compiler->last_label->size == compiler->size)
2483 return compiler->last_label;
2484
2485 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2486 PTR_FAIL_IF(!label);
2487 set_label(label, compiler);
2488
2489 inst = (sljit_u8*)ensure_buf(compiler, 2);
2490 PTR_FAIL_IF(!inst);
2491
2492 *inst++ = 0;
2493 *inst++ = 0;
2494
2495 return label;
2496 }
2497
2498 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2499 {
2500 sljit_u8 *inst;
2501 struct sljit_jump *jump;
2502
2503 CHECK_ERROR_PTR();
2504 CHECK_PTR(check_sljit_emit_jump(compiler, type));
2505
2506 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2507 PTR_FAIL_IF_NULL(jump);
2508 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2509 type &= 0xff;
2510
2511 if (type >= SLJIT_CALL1)
2512 PTR_FAIL_IF(call_with_args(compiler, type));
2513
2514 /* Worst case size. */
2515 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2516 compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
2517 #else
2518 compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
2519 #endif
2520
2521 inst = (sljit_u8*)ensure_buf(compiler, 2);
2522 PTR_FAIL_IF_NULL(inst);
2523
2524 *inst++ = 0;
2525 *inst++ = type + 2;
2526 return jump;
2527 }
2528
2529 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2530 {
2531 sljit_u8 *inst;
2532 struct sljit_jump *jump;
2533
2534 CHECK_ERROR();
2535 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2536 ADJUST_LOCAL_OFFSET(src, srcw);
2537
2538 CHECK_EXTRA_REGS(src, srcw, (void)0);
2539
2540 if (type >= SLJIT_CALL1) {
2541 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2542 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
2543 if (src == SLJIT_R2) {
2544 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
2545 src = TMP_REG1;
2546 }
2547 if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3)
2548 srcw += sizeof(sljit_sw);
2549 #endif
2550 #endif
2551 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
2552 if (src == SLJIT_R2) {
2553 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
2554 src = TMP_REG1;
2555 }
2556 #endif
2557 FAIL_IF(call_with_args(compiler, type));
2558 }
2559
2560 if (src == SLJIT_IMM) {
2561 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2562 FAIL_IF_NULL(jump);
2563 set_jump(jump, compiler, JUMP_ADDR);
2564 jump->u.target = srcw;
2565
2566 /* Worst case size. */
2567 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2568 compiler->size += 5;
2569 #else
2570 compiler->size += 10 + 3;
2571 #endif
2572
2573 inst = (sljit_u8*)ensure_buf(compiler, 2);
2574 FAIL_IF_NULL(inst);
2575
2576 *inst++ = 0;
2577 *inst++ = type + 2;
2578 }
2579 else {
2580 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2581 /* REX_W is not necessary (src is not immediate). */
2582 compiler->mode32 = 1;
2583 #endif
2584 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2585 FAIL_IF(!inst);
2586 *inst++ = GROUP_FF;
2587 *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm;
2588 }
2589 return SLJIT_SUCCESS;
2590 }
2591
2592 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
2593 sljit_s32 dst, sljit_sw dstw,
2594 sljit_s32 src, sljit_sw srcw,
2595 sljit_s32 type)
2596 {
2597 sljit_u8 *inst;
2598 sljit_u8 cond_set = 0;
2599 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2600 sljit_s32 reg;
2601 #endif
2602 /* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */
2603 sljit_s32 dst_save = dst;
2604 sljit_sw dstw_save = dstw;
2605
2606 CHECK_ERROR();
2607 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
2608 SLJIT_UNUSED_ARG(srcw);
2609
2610 if (dst == SLJIT_UNUSED)
2611 return SLJIT_SUCCESS;
2612
2613 ADJUST_LOCAL_OFFSET(dst, dstw);
2614 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2615
2616 type &= 0xff;
2617 /* setcc = jcc + 0x10. */
2618 cond_set = get_jump_code(type) + 0x10;
2619
2620 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2621 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src) {
2622 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3);
2623 FAIL_IF(!inst);
2624 INC_SIZE(4 + 3);
2625 /* Set low register to conditional flag. */
2626 *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
2627 *inst++ = GROUP_0F;
2628 *inst++ = cond_set;
2629 *inst++ = MOD_REG | reg_lmap[TMP_REG1];
2630 *inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
2631 *inst++ = OR_rm8_r8;
2632 *inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst];
2633 return SLJIT_SUCCESS;
2634 }
2635
2636 reg = (op == SLJIT_MOV && FAST_IS_REG(dst)) ? dst : TMP_REG1;
2637
2638 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4);
2639 FAIL_IF(!inst);
2640 INC_SIZE(4 + 4);
2641 /* Set low register to conditional flag. */
2642 *inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
2643 *inst++ = GROUP_0F;
2644 *inst++ = cond_set;
2645 *inst++ = MOD_REG | reg_lmap[reg];
2646 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
2647 /* The movzx instruction does not affect flags. */
2648 *inst++ = GROUP_0F;
2649 *inst++ = MOVZX_r_rm8;
2650 *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
2651
2652 if (reg != TMP_REG1)
2653 return SLJIT_SUCCESS;
2654
2655 if (GET_OPCODE(op) < SLJIT_ADD) {
2656 compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
2657 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2658 }
2659 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2660 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2661 compiler->skip_checks = 1;
2662 #endif
2663 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
2664
2665 #else
2666 /* The SLJIT_CONFIG_X86_32 code path starts here. */
2667 if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
2668 if (reg_map[dst] <= 4) {
2669 /* Low byte is accessible. */
2670 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
2671 FAIL_IF(!inst);
2672 INC_SIZE(3 + 3);
2673 /* Set low byte to conditional flag. */
2674 *inst++ = GROUP_0F;
2675 *inst++ = cond_set;
2676 *inst++ = MOD_REG | reg_map[dst];
2677
2678 *inst++ = GROUP_0F;
2679 *inst++ = MOVZX_r_rm8;
2680 *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst];
2681 return SLJIT_SUCCESS;
2682 }
2683
2684 /* Low byte is not accessible. */
2685 if (cpu_has_cmov == -1)
2686 get_cpu_features();
2687
2688 if (cpu_has_cmov) {
2689 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
2690 /* a xor reg, reg operation would overwrite the flags. */
2691 EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
2692
2693 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
2694 FAIL_IF(!inst);
2695 INC_SIZE(3);
2696
2697 *inst++ = GROUP_0F;
2698 /* cmovcc = setcc - 0x50. */
2699 *inst++ = cond_set - 0x50;
2700 *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1];
2701 return SLJIT_SUCCESS;
2702 }
2703
2704 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2705 FAIL_IF(!inst);
2706 INC_SIZE(1 + 3 + 3 + 1);
2707 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2708 /* Set al to conditional flag. */
2709 *inst++ = GROUP_0F;
2710 *inst++ = cond_set;
2711 *inst++ = MOD_REG | 0 /* eax */;
2712
2713 *inst++ = GROUP_0F;
2714 *inst++ = MOVZX_r_rm8;
2715 *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */;
2716 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2717 return SLJIT_SUCCESS;
2718 }
2719
2720 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) {
2721 SLJIT_ASSERT(reg_map[SLJIT_R0] == 0);
2722
2723 if (dst != SLJIT_R0) {
2724 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
2725 FAIL_IF(!inst);
2726 INC_SIZE(1 + 3 + 2 + 1);
2727 /* Set low register to conditional flag. */
2728 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2729 *inst++ = GROUP_0F;
2730 *inst++ = cond_set;
2731 *inst++ = MOD_REG | 0 /* eax */;
2732 *inst++ = OR_rm8_r8;
2733 *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
2734 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2735 }
2736 else {
2737 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
2738 FAIL_IF(!inst);
2739 INC_SIZE(2 + 3 + 2 + 2);
2740 /* Set low register to conditional flag. */
2741 *inst++ = XCHG_r_rm;
2742 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2743 *inst++ = GROUP_0F;
2744 *inst++ = cond_set;
2745 *inst++ = MOD_REG | 1 /* ecx */;
2746 *inst++ = OR_rm8_r8;
2747 *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
2748 *inst++ = XCHG_r_rm;
2749 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2750 }
2751 return SLJIT_SUCCESS;
2752 }
2753
2754 /* Set TMP_REG1 to the bit. */
2755 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2756 FAIL_IF(!inst);
2757 INC_SIZE(1 + 3 + 3 + 1);
2758 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2759 /* Set al to conditional flag. */
2760 *inst++ = GROUP_0F;
2761 *inst++ = cond_set;
2762 *inst++ = MOD_REG | 0 /* eax */;
2763
2764 *inst++ = GROUP_0F;
2765 *inst++ = MOVZX_r_rm8;
2766 *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
2767
2768 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2769
2770 if (GET_OPCODE(op) < SLJIT_ADD)
2771 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2772
2773 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2774 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2775 compiler->skip_checks = 1;
2776 #endif
2777 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
2778 #endif /* SLJIT_CONFIG_X86_64 */
2779 }
2780
2781 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
2782 {
2783 CHECK_ERROR();
2784 CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
2785 ADJUST_LOCAL_OFFSET(dst, dstw);
2786
2787 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2788
2789 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2790 compiler->mode32 = 0;
2791 #endif
2792
2793 ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
2794
2795 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2796 if (NOT_HALFWORD(offset)) {
2797 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
2798 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
2799 SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
2800 return compiler->error;
2801 #else
2802 return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
2803 #endif
2804 }
2805 #endif
2806
2807 if (offset != 0)
2808 return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
2809 return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
2810 }
2811
2812 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
2813 {
2814 sljit_u8 *inst;
2815 struct sljit_const *const_;
2816 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2817 sljit_s32 reg;
2818 #endif
2819
2820 CHECK_ERROR_PTR();
2821 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
2822 ADJUST_LOCAL_OFFSET(dst, dstw);
2823
2824 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2825
2826 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2827 PTR_FAIL_IF(!const_);
2828 set_const(const_, compiler);
2829
2830 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2831 compiler->mode32 = 0;
2832 reg = SLOW_IS_REG(dst) ? dst : TMP_REG1;
2833
2834 if (emit_load_imm64(compiler, reg, init_value))
2835 return NULL;
2836 #else
2837 if (dst == SLJIT_UNUSED)
2838 dst = TMP_REG1;
2839
2840 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
2841 return NULL;
2842 #endif
2843
2844 inst = (sljit_u8*)ensure_buf(compiler, 2);
2845 PTR_FAIL_IF(!inst);
2846
2847 *inst++ = 0;
2848 *inst++ = 1;
2849
2850 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2851 if (dst & SLJIT_MEM)
2852 if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
2853 return NULL;
2854 #endif
2855
2856 return const_;
2857 }
2858
2859 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
2860 {
2861 SLJIT_UNUSED_ARG(executable_offset);
2862 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2863 sljit_unaligned_store_sw((void*)addr, new_target - (addr + 4) - (sljit_uw)executable_offset);
2864 #else
2865 sljit_unaligned_store_sw((void*)addr, (sljit_sw) new_target);
2866 #endif
2867 }
2868
2869 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
2870 {
2871 SLJIT_UNUSED_ARG(executable_offset);
2872 sljit_unaligned_store_sw((void*)addr, new_constant);
2873 }
2874
2875 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_sse2_available(void)
2876 {
2877 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
2878 if (cpu_has_sse2 == -1)
2879 get_cpu_features();
2880 return cpu_has_sse2;
2881 #else
2882 return 1;
2883 #endif
2884 }
2885
2886 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_cmov_available(void)
2887 {
2888 if (cpu_has_cmov == -1)
2889 get_cpu_features();
2890 return cpu_has_cmov;
2891 }
2892
2893 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_emit_cmov(struct sljit_compiler *compiler,
2894 sljit_s32 type,
2895 sljit_s32 dst_reg,
2896 sljit_s32 src, sljit_sw srcw)
2897 {
2898 sljit_u8* inst;
2899
2900 CHECK_ERROR();
2901 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2902 CHECK_ARGUMENT(sljit_x86_is_cmov_available());
2903 CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_I32_OP)));
2904 CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64);
2905 CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_I32_OP));
2906 FUNCTION_CHECK_SRC(src, srcw);
2907
2908 if ((type & 0xff) <= SLJIT_NOT_ZERO)
2909 CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z);
2910 else
2911 CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff));
2912 #endif
2913 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
2914 if (SLJIT_UNLIKELY(!!compiler->verbose)) {
2915 fprintf(compiler->verbose, " x86_cmov%s %s%s, ",
2916 !(dst_reg & SLJIT_I32_OP) ? "" : ".i",
2917 jump_names[type & 0xff], JUMP_POSTFIX(type));
2918 sljit_verbose_reg(compiler, dst_reg & ~SLJIT_I32_OP);
2919 fprintf(compiler->verbose, ", ");
2920 sljit_verbose_param(compiler, src, srcw);
2921 fprintf(compiler->verbose, "\n");
2922 }
2923 #endif
2924
2925 ADJUST_LOCAL_OFFSET(src, srcw);
2926 CHECK_EXTRA_REGS(src, srcw, (void)0);
2927
2928 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2929 compiler->mode32 = dst_reg & SLJIT_I32_OP;
2930 #endif
2931 dst_reg &= ~SLJIT_I32_OP;
2932
2933 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
2934 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
2935 src = TMP_REG1;
2936 srcw = 0;
2937 }
2938
2939 inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
2940 FAIL_IF(!inst);
2941 *inst++ = GROUP_0F;
2942 *inst = get_jump_code(type & 0xff) - 0x40;
2943 return SLJIT_SUCCESS;
2944 }
2945