1 /* $NetBSD: sljitNativeX86_common.c,v 1.6 2014/06/17 19:33:20 alnsn Exp $ */ 2 3 /* 4 * Stack-less Just-In-Time compiler 5 * 6 * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without modification, are 9 * permitted provided that the following conditions are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright notice, this list of 12 * conditions and the following disclaimer. 13 * 14 * 2. Redistributions in binary form must reproduce the above copyright notice, this list 15 * of conditions and the following disclaimer in the documentation and/or other materials 16 * provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY 19 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 21 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 23 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 24 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 26 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) 30 { 31 return "x86" SLJIT_CPUINFO; 32 } 33 34 /* 35 32b register indexes: 36 0 - EAX 37 1 - ECX 38 2 - EDX 39 3 - EBX 40 4 - none 41 5 - EBP 42 6 - ESI 43 7 - EDI 44 */ 45 46 /* 47 64b register indexes: 48 0 - RAX 49 1 - RCX 50 2 - RDX 51 3 - RBX 52 4 - none 53 5 - RBP 54 6 - RSI 55 7 - RDI 56 8 - R8 - From now on REX prefix is required 57 9 - R9 58 10 - R10 59 11 - R11 60 12 - R12 61 13 - R13 62 14 - R14 63 15 - R15 64 */ 65 66 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 67 68 /* Last register + 1. */ 69 #define TMP_REG1 (SLJIT_NO_REGISTERS + 1) 70 71 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 2] = { 72 0, 0, 2, 1, 0, 0, 3, 6, 7, 0, 0, 4, 5 73 }; 74 75 #define CHECK_EXTRA_REGS(p, w, do) \ 76 if (p >= SLJIT_TEMPORARY_EREG1 && p <= SLJIT_TEMPORARY_EREG2) { \ 77 w = compiler->scratches_start + (p - SLJIT_TEMPORARY_EREG1) * sizeof(sljit_sw); \ 78 p = SLJIT_MEM1(SLJIT_LOCALS_REG); \ 79 do; \ 80 } \ 81 else if (p >= SLJIT_SAVED_EREG1 && p <= SLJIT_SAVED_EREG2) { \ 82 w = compiler->saveds_start + (p - SLJIT_SAVED_EREG1) * sizeof(sljit_sw); \ 83 p = SLJIT_MEM1(SLJIT_LOCALS_REG); \ 84 do; \ 85 } 86 87 #else /* SLJIT_CONFIG_X86_32 */ 88 89 /* Last register + 1. */ 90 #define TMP_REG1 (SLJIT_NO_REGISTERS + 1) 91 #define TMP_REG2 (SLJIT_NO_REGISTERS + 2) 92 #define TMP_REG3 (SLJIT_NO_REGISTERS + 3) 93 94 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present 95 Note: avoid to use r12 and r13 for memory addessing 96 therefore r12 is better for SAVED_EREG than SAVED_REG. */ 97 #ifndef _WIN64 98 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */ 99 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = { 100 0, 0, 6, 1, 8, 11, 3, 15, 14, 13, 12, 4, 2, 7, 9 101 }; 102 /* low-map. reg_map & 0x7. */ 103 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = { 104 0, 0, 6, 1, 0, 3, 3, 7, 6, 5, 4, 4, 2, 7, 1 105 }; 106 #else 107 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */ 108 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = { 109 0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 15, 4, 10, 8, 9 110 }; 111 /* low-map. reg_map & 0x7. */ 112 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = { 113 0, 0, 2, 1, 3, 5, 3, 6, 7, 6, 7, 4, 2, 0, 1 114 }; 115 #endif 116 117 #define REX_W 0x48 118 #define REX_R 0x44 119 #define REX_X 0x42 120 #define REX_B 0x41 121 #define REX 0x40 122 123 #ifndef _WIN64 124 #define HALFWORD_MAX 0x7fffffffl 125 #define HALFWORD_MIN -0x80000000l 126 #else 127 #define HALFWORD_MAX 0x7fffffffll 128 #define HALFWORD_MIN -0x80000000ll 129 #endif 130 131 #define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN) 132 #define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN) 133 134 #define CHECK_EXTRA_REGS(p, w, do) 135 136 #endif /* SLJIT_CONFIG_X86_32 */ 137 138 #if (defined SLJIT_SSE2 && SLJIT_SSE2) 139 #define TMP_FREG (0) 140 #endif 141 142 /* Size flags for emit_x86_instruction: */ 143 #define EX86_BIN_INS 0x0010 144 #define EX86_SHIFT_INS 0x0020 145 #define EX86_REX 0x0040 146 #define EX86_NO_REXW 0x0080 147 #define EX86_BYTE_ARG 0x0100 148 #define EX86_HALF_ARG 0x0200 149 #define EX86_PREF_66 0x0400 150 151 #if (defined SLJIT_SSE2 && SLJIT_SSE2) 152 #define EX86_SSE2 0x0800 153 #define EX86_PREF_F2 0x1000 154 #define EX86_PREF_F3 0x2000 155 #endif 156 157 /* --------------------------------------------------------------------- */ 158 /* Instrucion forms */ 159 /* --------------------------------------------------------------------- */ 160 161 #define ADD (/* BINARY */ 0 << 3) 162 #define ADD_EAX_i32 0x05 163 #define ADD_r_rm 0x03 164 #define ADD_rm_r 0x01 165 #define ADDSD_x_xm 0x58 166 #define ADC (/* BINARY */ 2 << 3) 167 #define ADC_EAX_i32 0x15 168 #define ADC_r_rm 0x13 169 #define ADC_rm_r 0x11 170 #define AND (/* BINARY */ 4 << 3) 171 #define AND_EAX_i32 0x25 172 #define AND_r_rm 0x23 173 #define AND_rm_r 0x21 174 #define ANDPD_x_xm 0x54 175 #define BSR_r_rm (/* GROUP_0F */ 0xbd) 176 #define CALL_i32 0xe8 177 #define CALL_rm (/* GROUP_FF */ 2 << 3) 178 #define CDQ 0x99 179 #define CMOVNE_r_rm (/* GROUP_0F */ 0x45) 180 #define CMP (/* BINARY */ 7 << 3) 181 #define CMP_EAX_i32 0x3d 182 #define CMP_r_rm 0x3b 183 #define CMP_rm_r 0x39 184 #define DIV (/* GROUP_F7 */ 6 << 3) 185 #define DIVSD_x_xm 0x5e 186 #define INT3 0xcc 187 #define IDIV (/* GROUP_F7 */ 7 << 3) 188 #define IMUL (/* GROUP_F7 */ 5 << 3) 189 #define IMUL_r_rm (/* GROUP_0F */ 0xaf) 190 #define IMUL_r_rm_i8 0x6b 191 #define IMUL_r_rm_i32 0x69 192 #define JE_i8 0x74 193 #define JMP_i8 0xeb 194 #define JMP_i32 0xe9 195 #define JMP_rm (/* GROUP_FF */ 4 << 3) 196 #define LEA_r_m 0x8d 197 #define MOV_r_rm 0x8b 198 #define MOV_r_i32 0xb8 199 #define MOV_rm_r 0x89 200 #define MOV_rm_i32 0xc7 201 #define MOV_rm8_i8 0xc6 202 #define MOV_rm8_r8 0x88 203 #define MOVSD_x_xm 0x10 204 #define MOVSD_xm_x 0x11 205 #define MOVSXD_r_rm 0x63 206 #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe) 207 #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf) 208 #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6) 209 #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7) 210 #define MUL (/* GROUP_F7 */ 4 << 3) 211 #define MULSD_x_xm 0x59 212 #define NEG_rm (/* GROUP_F7 */ 3 << 3) 213 #define NOP 0x90 214 #define NOT_rm (/* GROUP_F7 */ 2 << 3) 215 #define OR (/* BINARY */ 1 << 3) 216 #define OR_r_rm 0x0b 217 #define OR_EAX_i32 0x0d 218 #define OR_rm_r 0x09 219 #define OR_rm8_r8 0x08 220 #define POP_r 0x58 221 #define POP_rm 0x8f 222 #define POPF 0x9d 223 #define PUSH_i32 0x68 224 #define PUSH_r 0x50 225 #define PUSH_rm (/* GROUP_FF */ 6 << 3) 226 #define PUSHF 0x9c 227 #define RET_near 0xc3 228 #define RET_i16 0xc2 229 #define SBB (/* BINARY */ 3 << 3) 230 #define SBB_EAX_i32 0x1d 231 #define SBB_r_rm 0x1b 232 #define SBB_rm_r 0x19 233 #define SAR (/* SHIFT */ 7 << 3) 234 #define SHL (/* SHIFT */ 4 << 3) 235 #define SHR (/* SHIFT */ 5 << 3) 236 #define SUB (/* BINARY */ 5 << 3) 237 #define SUB_EAX_i32 0x2d 238 #define SUB_r_rm 0x2b 239 #define SUB_rm_r 0x29 240 #define SUBSD_x_xm 0x5c 241 #define TEST_EAX_i32 0xa9 242 #define TEST_rm_r 0x85 243 #define UCOMISD_x_xm 0x2e 244 #define XCHG_EAX_r 0x90 245 #define XCHG_r_rm 0x87 246 #define XOR (/* BINARY */ 6 << 3) 247 #define XOR_EAX_i32 0x35 248 #define XOR_r_rm 0x33 249 #define XOR_rm_r 0x31 250 #define XORPD_x_xm 0x57 251 252 #define GROUP_0F 0x0f 253 #define GROUP_F7 0xf7 254 #define GROUP_FF 0xff 255 #define GROUP_BINARY_81 0x81 256 #define GROUP_BINARY_83 0x83 257 #define GROUP_SHIFT_1 0xd1 258 #define GROUP_SHIFT_N 0xc1 259 #define GROUP_SHIFT_CL 0xd3 260 261 #define MOD_REG 0xc0 262 #define MOD_DISP8 0x40 263 264 #define INC_SIZE(s) (*inst++ = (s), compiler->size += (s)) 265 266 #define PUSH_REG(r) (*inst++ = (PUSH_r + (r))) 267 #define POP_REG(r) (*inst++ = (POP_r + (r))) 268 #define RET() (*inst++ = (RET_near)) 269 #define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0) 270 /* r32, r/m32 */ 271 #define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm)) 272 273 /* Multithreading does not affect these static variables, since they store 274 built-in CPU features. Therefore they can be overwritten by different threads 275 if they detect the CPU features in the same time. */ 276 #if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) 277 static sljit_si cpu_has_sse2 = -1; 278 #endif 279 static sljit_si cpu_has_cmov = -1; 280 281 #if defined(_MSC_VER) && _MSC_VER >= 1400 282 #include <intrin.h> 283 #endif 284 285 static void get_cpu_features(void) 286 { 287 sljit_ui features; 288 289 #if defined(_MSC_VER) && _MSC_VER >= 1400 290 291 int CPUInfo[4]; 292 __cpuid(CPUInfo, 1); 293 features = (sljit_ui)CPUInfo[3]; 294 295 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C) 296 297 /* AT&T syntax. */ 298 __asm__ ( 299 "movl $0x1, %%eax\n" 300 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 301 /* On x86-32, there is no red zone, so this 302 should work (no need for a local variable). */ 303 "push %%ebx\n" 304 #endif 305 "cpuid\n" 306 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 307 "pop %%ebx\n" 308 #endif 309 "movl %%edx, %0\n" 310 : "=g" (features) 311 : 312 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 313 : "%eax", "%ecx", "%edx" 314 #else 315 : "%rax", "%rbx", "%rcx", "%rdx" 316 #endif 317 ); 318 319 #else /* _MSC_VER && _MSC_VER >= 1400 */ 320 321 /* Intel syntax. */ 322 __asm { 323 mov eax, 1 324 cpuid 325 mov features, edx 326 } 327 328 #endif /* _MSC_VER && _MSC_VER >= 1400 */ 329 330 #if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) 331 cpu_has_sse2 = (features >> 26) & 0x1; 332 #endif 333 cpu_has_cmov = (features >> 15) & 0x1; 334 } 335 336 static sljit_ub get_jump_code(sljit_si type) 337 { 338 switch (type) { 339 case SLJIT_C_EQUAL: 340 case SLJIT_C_FLOAT_EQUAL: 341 return 0x84 /* je */; 342 343 case SLJIT_C_NOT_EQUAL: 344 case SLJIT_C_FLOAT_NOT_EQUAL: 345 return 0x85 /* jne */; 346 347 case SLJIT_C_LESS: 348 case SLJIT_C_FLOAT_LESS: 349 return 0x82 /* jc */; 350 351 case SLJIT_C_GREATER_EQUAL: 352 case SLJIT_C_FLOAT_GREATER_EQUAL: 353 return 0x83 /* jae */; 354 355 case SLJIT_C_GREATER: 356 case SLJIT_C_FLOAT_GREATER: 357 return 0x87 /* jnbe */; 358 359 case SLJIT_C_LESS_EQUAL: 360 case SLJIT_C_FLOAT_LESS_EQUAL: 361 return 0x86 /* jbe */; 362 363 case SLJIT_C_SIG_LESS: 364 return 0x8c /* jl */; 365 366 case SLJIT_C_SIG_GREATER_EQUAL: 367 return 0x8d /* jnl */; 368 369 case SLJIT_C_SIG_GREATER: 370 return 0x8f /* jnle */; 371 372 case SLJIT_C_SIG_LESS_EQUAL: 373 return 0x8e /* jle */; 374 375 case SLJIT_C_OVERFLOW: 376 case SLJIT_C_MUL_OVERFLOW: 377 return 0x80 /* jo */; 378 379 case SLJIT_C_NOT_OVERFLOW: 380 case SLJIT_C_MUL_NOT_OVERFLOW: 381 return 0x81 /* jno */; 382 383 case SLJIT_C_FLOAT_UNORDERED: 384 return 0x8a /* jp */; 385 386 case SLJIT_C_FLOAT_ORDERED: 387 return 0x8b /* jpo */; 388 } 389 return 0; 390 } 391 392 static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type); 393 394 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 395 static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si type); 396 #endif 397 398 static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, sljit_si type) 399 { 400 sljit_si short_jump; 401 sljit_uw label_addr; 402 403 if (jump->flags & JUMP_LABEL) 404 label_addr = (sljit_uw)(code + jump->u.label->size); 405 else 406 label_addr = jump->u.target; 407 short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127; 408 409 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 410 if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN) 411 return generate_far_jump_code(jump, code_ptr, type); 412 #endif 413 414 if (type == SLJIT_JUMP) { 415 if (short_jump) 416 *code_ptr++ = JMP_i8; 417 else 418 *code_ptr++ = JMP_i32; 419 jump->addr++; 420 } 421 else if (type >= SLJIT_FAST_CALL) { 422 short_jump = 0; 423 *code_ptr++ = CALL_i32; 424 jump->addr++; 425 } 426 else if (short_jump) { 427 *code_ptr++ = get_jump_code(type) - 0x10; 428 jump->addr++; 429 } 430 else { 431 *code_ptr++ = GROUP_0F; 432 *code_ptr++ = get_jump_code(type); 433 jump->addr += 2; 434 } 435 436 if (short_jump) { 437 jump->flags |= PATCH_MB; 438 code_ptr += sizeof(sljit_sb); 439 } else { 440 jump->flags |= PATCH_MW; 441 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 442 code_ptr += sizeof(sljit_sw); 443 #else 444 code_ptr += sizeof(sljit_si); 445 #endif 446 } 447 448 return code_ptr; 449 } 450 451 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) 452 { 453 struct sljit_memory_fragment *buf; 454 sljit_ub *code; 455 sljit_ub *code_ptr; 456 sljit_ub *buf_ptr; 457 sljit_ub *buf_end; 458 sljit_ub len; 459 460 struct sljit_label *label; 461 struct sljit_jump *jump; 462 struct sljit_const *const_; 463 464 CHECK_ERROR_PTR(); 465 check_sljit_generate_code(compiler); 466 reverse_buf(compiler); 467 468 /* Second code generation pass. */ 469 code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size); 470 PTR_FAIL_WITH_EXEC_IF(code); 471 buf = compiler->buf; 472 473 code_ptr = code; 474 label = compiler->labels; 475 jump = compiler->jumps; 476 const_ = compiler->consts; 477 do { 478 buf_ptr = buf->memory; 479 buf_end = buf_ptr + buf->used_size; 480 do { 481 len = *buf_ptr++; 482 if (len > 0) { 483 /* The code is already generated. */ 484 SLJIT_MEMMOVE(code_ptr, buf_ptr, len); 485 code_ptr += len; 486 buf_ptr += len; 487 } 488 else { 489 if (*buf_ptr >= 4) { 490 jump->addr = (sljit_uw)code_ptr; 491 if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) 492 code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4); 493 else 494 code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4); 495 jump = jump->next; 496 } 497 else if (*buf_ptr == 0) { 498 label->addr = (sljit_uw)code_ptr; 499 label->size = code_ptr - code; 500 label = label->next; 501 } 502 else if (*buf_ptr == 1) { 503 const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw); 504 const_ = const_->next; 505 } 506 else { 507 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 508 *code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32; 509 buf_ptr++; 510 *(sljit_sw*)code_ptr = *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw)); 511 code_ptr += sizeof(sljit_sw); 512 buf_ptr += sizeof(sljit_sw) - 1; 513 #else 514 code_ptr = generate_fixed_jump(code_ptr, *(sljit_sw*)(buf_ptr + 1), *buf_ptr); 515 buf_ptr += sizeof(sljit_sw); 516 #endif 517 } 518 buf_ptr++; 519 } 520 } while (buf_ptr < buf_end); 521 SLJIT_ASSERT(buf_ptr == buf_end); 522 buf = buf->next; 523 } while (buf); 524 525 SLJIT_ASSERT(!label); 526 SLJIT_ASSERT(!jump); 527 SLJIT_ASSERT(!const_); 528 529 jump = compiler->jumps; 530 while (jump) { 531 if (jump->flags & PATCH_MB) { 532 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) <= 127); 533 *(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))); 534 } else if (jump->flags & PATCH_MW) { 535 if (jump->flags & JUMP_LABEL) { 536 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 537 *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw))); 538 #else 539 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX); 540 *(sljit_si*)jump->addr = (sljit_si)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))); 541 #endif 542 } 543 else { 544 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 545 *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw))); 546 #else 547 SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX); 548 *(sljit_si*)jump->addr = (sljit_si)(jump->u.target - (jump->addr + sizeof(sljit_si))); 549 #endif 550 } 551 } 552 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 553 else if (jump->flags & PATCH_MD) 554 *(sljit_sw*)jump->addr = jump->u.label->addr; 555 #endif 556 557 jump = jump->next; 558 } 559 560 /* Maybe we waste some space because of short jumps. */ 561 SLJIT_ASSERT(code_ptr <= code + compiler->size); 562 compiler->error = SLJIT_ERR_COMPILED; 563 compiler->executable_size = code_ptr - code; 564 return (void*)code; 565 } 566 567 /* --------------------------------------------------------------------- */ 568 /* Operators */ 569 /* --------------------------------------------------------------------- */ 570 571 static sljit_si emit_cum_binary(struct sljit_compiler *compiler, 572 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm, 573 sljit_si dst, sljit_sw dstw, 574 sljit_si src1, sljit_sw src1w, 575 sljit_si src2, sljit_sw src2w); 576 577 static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler, 578 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm, 579 sljit_si dst, sljit_sw dstw, 580 sljit_si src1, sljit_sw src1w, 581 sljit_si src2, sljit_sw src2w); 582 583 static sljit_si emit_mov(struct sljit_compiler *compiler, 584 sljit_si dst, sljit_sw dstw, 585 sljit_si src, sljit_sw srcw); 586 587 static SLJIT_INLINE sljit_si emit_save_flags(struct sljit_compiler *compiler) 588 { 589 sljit_ub *inst; 590 591 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 592 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); 593 FAIL_IF(!inst); 594 INC_SIZE(5); 595 #else 596 inst = (sljit_ub*)ensure_buf(compiler, 1 + 6); 597 FAIL_IF(!inst); 598 INC_SIZE(6); 599 *inst++ = REX_W; 600 #endif 601 *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */ 602 *inst++ = 0x64; 603 *inst++ = 0x24; 604 *inst++ = (sljit_ub)sizeof(sljit_sw); 605 *inst++ = PUSHF; 606 compiler->flags_saved = 1; 607 return SLJIT_SUCCESS; 608 } 609 610 static SLJIT_INLINE sljit_si emit_restore_flags(struct sljit_compiler *compiler, sljit_si keep_flags) 611 { 612 sljit_ub *inst; 613 614 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 615 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); 616 FAIL_IF(!inst); 617 INC_SIZE(5); 618 *inst++ = POPF; 619 #else 620 inst = (sljit_ub*)ensure_buf(compiler, 1 + 6); 621 FAIL_IF(!inst); 622 INC_SIZE(6); 623 *inst++ = POPF; 624 *inst++ = REX_W; 625 #endif 626 *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */ 627 *inst++ = 0x64; 628 *inst++ = 0x24; 629 *inst++ = (sljit_ub)-(sljit_sb)sizeof(sljit_sw); 630 compiler->flags_saved = keep_flags; 631 return SLJIT_SUCCESS; 632 } 633 634 #ifdef _WIN32 635 #include <malloc.h> 636 637 static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size) 638 { 639 /* Workaround for calling the internal _chkstk() function on Windows. 640 This function touches all 4k pages belongs to the requested stack space, 641 which size is passed in local_size. This is necessary on Windows where 642 the stack can only grow in 4k steps. However, this function just burn 643 CPU cycles if the stack is large enough. However, you don't know it in 644 advance, so it must always be called. I think this is a bad design in 645 general even if it has some reasons. */ 646 *(volatile sljit_si*)alloca(local_size) = 0; 647 } 648 649 #endif 650 651 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 652 #include "sljitNativeX86_32.c" 653 #else 654 #include "sljitNativeX86_64.c" 655 #endif 656 657 static sljit_si emit_mov(struct sljit_compiler *compiler, 658 sljit_si dst, sljit_sw dstw, 659 sljit_si src, sljit_sw srcw) 660 { 661 sljit_ub* inst; 662 663 if (dst == SLJIT_UNUSED) { 664 /* No destination, doesn't need to setup flags. */ 665 if (src & SLJIT_MEM) { 666 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw); 667 FAIL_IF(!inst); 668 *inst = MOV_r_rm; 669 } 670 return SLJIT_SUCCESS; 671 } 672 if (FAST_IS_REG(src)) { 673 inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw); 674 FAIL_IF(!inst); 675 *inst = MOV_rm_r; 676 return SLJIT_SUCCESS; 677 } 678 if (src & SLJIT_IMM) { 679 if (FAST_IS_REG(dst)) { 680 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 681 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); 682 #else 683 if (!compiler->mode32) { 684 if (NOT_HALFWORD(srcw)) 685 return emit_load_imm64(compiler, dst, srcw); 686 } 687 else 688 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw); 689 #endif 690 } 691 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 692 if (!compiler->mode32 && NOT_HALFWORD(srcw)) { 693 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw)); 694 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw); 695 FAIL_IF(!inst); 696 *inst = MOV_rm_r; 697 return SLJIT_SUCCESS; 698 } 699 #endif 700 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw); 701 FAIL_IF(!inst); 702 *inst = MOV_rm_i32; 703 return SLJIT_SUCCESS; 704 } 705 if (FAST_IS_REG(dst)) { 706 inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw); 707 FAIL_IF(!inst); 708 *inst = MOV_r_rm; 709 return SLJIT_SUCCESS; 710 } 711 712 /* Memory to memory move. Requires two instruction. */ 713 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw); 714 FAIL_IF(!inst); 715 *inst = MOV_r_rm; 716 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); 717 FAIL_IF(!inst); 718 *inst = MOV_rm_r; 719 return SLJIT_SUCCESS; 720 } 721 722 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \ 723 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); 724 725 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) 726 { 727 sljit_ub *inst; 728 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 729 sljit_si size; 730 #endif 731 732 CHECK_ERROR(); 733 check_sljit_emit_op0(compiler, op); 734 735 switch (GET_OPCODE(op)) { 736 case SLJIT_BREAKPOINT: 737 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); 738 FAIL_IF(!inst); 739 INC_SIZE(1); 740 *inst = INT3; 741 break; 742 case SLJIT_NOP: 743 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); 744 FAIL_IF(!inst); 745 INC_SIZE(1); 746 *inst = NOP; 747 break; 748 case SLJIT_UMUL: 749 case SLJIT_SMUL: 750 case SLJIT_UDIV: 751 case SLJIT_SDIV: 752 compiler->flags_saved = 0; 753 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 754 #ifdef _WIN64 755 SLJIT_COMPILE_ASSERT( 756 reg_map[SLJIT_SCRATCH_REG1] == 0 757 && reg_map[SLJIT_SCRATCH_REG2] == 2 758 && reg_map[TMP_REG1] > 7, 759 invalid_register_assignment_for_div_mul); 760 #else 761 SLJIT_COMPILE_ASSERT( 762 reg_map[SLJIT_SCRATCH_REG1] == 0 763 && reg_map[SLJIT_SCRATCH_REG2] < 7 764 && reg_map[TMP_REG1] == 2, 765 invalid_register_assignment_for_div_mul); 766 #endif 767 compiler->mode32 = op & SLJIT_INT_OP; 768 #endif 769 770 op = GET_OPCODE(op); 771 if (op == SLJIT_UDIV) { 772 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) 773 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_SCRATCH_REG2, 0); 774 inst = emit_x86_instruction(compiler, 1, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0); 775 #else 776 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); 777 #endif 778 FAIL_IF(!inst); 779 *inst = XOR_r_rm; 780 } 781 782 if (op == SLJIT_SDIV) { 783 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) 784 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_SCRATCH_REG2, 0); 785 #endif 786 787 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 788 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); 789 FAIL_IF(!inst); 790 INC_SIZE(1); 791 *inst = CDQ; 792 #else 793 if (compiler->mode32) { 794 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); 795 FAIL_IF(!inst); 796 INC_SIZE(1); 797 *inst = CDQ; 798 } else { 799 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2); 800 FAIL_IF(!inst); 801 INC_SIZE(2); 802 *inst++ = REX_W; 803 *inst = CDQ; 804 } 805 #endif 806 } 807 808 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 809 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2); 810 FAIL_IF(!inst); 811 INC_SIZE(2); 812 *inst++ = GROUP_F7; 813 *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_map[TMP_REG1] : reg_map[SLJIT_SCRATCH_REG2]); 814 #else 815 #ifdef _WIN64 816 size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2; 817 #else 818 size = (!compiler->mode32) ? 3 : 2; 819 #endif 820 inst = (sljit_ub*)ensure_buf(compiler, 1 + size); 821 FAIL_IF(!inst); 822 INC_SIZE(size); 823 #ifdef _WIN64 824 if (!compiler->mode32) 825 *inst++ = REX_W | ((op >= SLJIT_UDIV) ? REX_B : 0); 826 else if (op >= SLJIT_UDIV) 827 *inst++ = REX_B; 828 *inst++ = GROUP_F7; 829 *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_SCRATCH_REG2]); 830 #else 831 if (!compiler->mode32) 832 *inst++ = REX_W; 833 *inst++ = GROUP_F7; 834 *inst = MOD_REG | reg_map[SLJIT_SCRATCH_REG2]; 835 #endif 836 #endif 837 switch (op) { 838 case SLJIT_UMUL: 839 *inst |= MUL; 840 break; 841 case SLJIT_SMUL: 842 *inst |= IMUL; 843 break; 844 case SLJIT_UDIV: 845 *inst |= DIV; 846 break; 847 case SLJIT_SDIV: 848 *inst |= IDIV; 849 break; 850 } 851 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64) 852 EMIT_MOV(compiler, SLJIT_SCRATCH_REG2, 0, TMP_REG1, 0); 853 #endif 854 break; 855 } 856 857 return SLJIT_SUCCESS; 858 } 859 860 #define ENCODE_PREFIX(prefix) \ 861 do { \ 862 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); \ 863 FAIL_IF(!inst); \ 864 INC_SIZE(1); \ 865 *inst = (prefix); \ 866 } while (0) 867 868 static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign, 869 sljit_si dst, sljit_sw dstw, 870 sljit_si src, sljit_sw srcw) 871 { 872 sljit_ub* inst; 873 sljit_si dst_r; 874 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 875 sljit_si work_r; 876 #endif 877 878 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 879 compiler->mode32 = 0; 880 #endif 881 882 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM)) 883 return SLJIT_SUCCESS; /* Empty instruction. */ 884 885 if (src & SLJIT_IMM) { 886 if (FAST_IS_REG(dst)) { 887 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 888 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); 889 #else 890 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); 891 FAIL_IF(!inst); 892 *inst = MOV_rm_i32; 893 return SLJIT_SUCCESS; 894 #endif 895 } 896 inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw); 897 FAIL_IF(!inst); 898 *inst = MOV_rm8_i8; 899 return SLJIT_SUCCESS; 900 } 901 902 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; 903 904 if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) { 905 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 906 if (reg_map[src] >= 4) { 907 SLJIT_ASSERT(dst_r == TMP_REG1); 908 EMIT_MOV(compiler, TMP_REG1, 0, src, 0); 909 } else 910 dst_r = src; 911 #else 912 dst_r = src; 913 #endif 914 } 915 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 916 else if (FAST_IS_REG(src) && reg_map[src] >= 4) { 917 /* src, dst are registers. */ 918 SLJIT_ASSERT(SLOW_IS_REG(dst)); 919 if (reg_map[dst] < 4) { 920 if (dst != src) 921 EMIT_MOV(compiler, dst, 0, src, 0); 922 inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0); 923 FAIL_IF(!inst); 924 *inst++ = GROUP_0F; 925 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8; 926 } 927 else { 928 if (dst != src) 929 EMIT_MOV(compiler, dst, 0, src, 0); 930 if (sign) { 931 /* shl reg, 24 */ 932 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); 933 FAIL_IF(!inst); 934 *inst |= SHL; 935 /* sar reg, 24 */ 936 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); 937 FAIL_IF(!inst); 938 *inst |= SAR; 939 } 940 else { 941 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0); 942 FAIL_IF(!inst); 943 *(inst + 1) |= AND; 944 } 945 } 946 return SLJIT_SUCCESS; 947 } 948 #endif 949 else { 950 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */ 951 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); 952 FAIL_IF(!inst); 953 *inst++ = GROUP_0F; 954 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8; 955 } 956 957 if (dst & SLJIT_MEM) { 958 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 959 if (dst_r == TMP_REG1) { 960 /* Find a non-used register, whose reg_map[src] < 4. */ 961 if ((dst & REG_MASK) == SLJIT_SCRATCH_REG1) { 962 if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SCRATCH_REG2)) 963 work_r = SLJIT_SCRATCH_REG3; 964 else 965 work_r = SLJIT_SCRATCH_REG2; 966 } 967 else { 968 if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SCRATCH_REG1)) 969 work_r = SLJIT_SCRATCH_REG1; 970 else if ((dst & REG_MASK) == SLJIT_SCRATCH_REG2) 971 work_r = SLJIT_SCRATCH_REG3; 972 else 973 work_r = SLJIT_SCRATCH_REG2; 974 } 975 976 if (work_r == SLJIT_SCRATCH_REG1) { 977 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]); 978 } 979 else { 980 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); 981 FAIL_IF(!inst); 982 *inst = XCHG_r_rm; 983 } 984 985 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw); 986 FAIL_IF(!inst); 987 *inst = MOV_rm8_r8; 988 989 if (work_r == SLJIT_SCRATCH_REG1) { 990 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]); 991 } 992 else { 993 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); 994 FAIL_IF(!inst); 995 *inst = XCHG_r_rm; 996 } 997 } 998 else { 999 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); 1000 FAIL_IF(!inst); 1001 *inst = MOV_rm8_r8; 1002 } 1003 #else 1004 inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw); 1005 FAIL_IF(!inst); 1006 *inst = MOV_rm8_r8; 1007 #endif 1008 } 1009 1010 return SLJIT_SUCCESS; 1011 } 1012 1013 static sljit_si emit_mov_half(struct sljit_compiler *compiler, sljit_si sign, 1014 sljit_si dst, sljit_sw dstw, 1015 sljit_si src, sljit_sw srcw) 1016 { 1017 sljit_ub* inst; 1018 sljit_si dst_r; 1019 1020 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1021 compiler->mode32 = 0; 1022 #endif 1023 1024 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM)) 1025 return SLJIT_SUCCESS; /* Empty instruction. */ 1026 1027 if (src & SLJIT_IMM) { 1028 if (FAST_IS_REG(dst)) { 1029 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1030 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); 1031 #else 1032 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); 1033 FAIL_IF(!inst); 1034 *inst = MOV_rm_i32; 1035 return SLJIT_SUCCESS; 1036 #endif 1037 } 1038 inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw); 1039 FAIL_IF(!inst); 1040 *inst = MOV_rm_i32; 1041 return SLJIT_SUCCESS; 1042 } 1043 1044 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; 1045 1046 if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) 1047 dst_r = src; 1048 else { 1049 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); 1050 FAIL_IF(!inst); 1051 *inst++ = GROUP_0F; 1052 *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16; 1053 } 1054 1055 if (dst & SLJIT_MEM) { 1056 inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw); 1057 FAIL_IF(!inst); 1058 *inst = MOV_rm_r; 1059 } 1060 1061 return SLJIT_SUCCESS; 1062 } 1063 1064 static sljit_si emit_unary(struct sljit_compiler *compiler, sljit_ub opcode, 1065 sljit_si dst, sljit_sw dstw, 1066 sljit_si src, sljit_sw srcw) 1067 { 1068 sljit_ub* inst; 1069 1070 if (dst == SLJIT_UNUSED) { 1071 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1072 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1073 FAIL_IF(!inst); 1074 *inst++ = GROUP_F7; 1075 *inst |= opcode; 1076 return SLJIT_SUCCESS; 1077 } 1078 if (dst == src && dstw == srcw) { 1079 /* Same input and output */ 1080 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); 1081 FAIL_IF(!inst); 1082 *inst++ = GROUP_F7; 1083 *inst |= opcode; 1084 return SLJIT_SUCCESS; 1085 } 1086 if (FAST_IS_REG(dst)) { 1087 EMIT_MOV(compiler, dst, 0, src, srcw); 1088 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); 1089 FAIL_IF(!inst); 1090 *inst++ = GROUP_F7; 1091 *inst |= opcode; 1092 return SLJIT_SUCCESS; 1093 } 1094 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1095 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1096 FAIL_IF(!inst); 1097 *inst++ = GROUP_F7; 1098 *inst |= opcode; 1099 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1100 return SLJIT_SUCCESS; 1101 } 1102 1103 static sljit_si emit_not_with_flags(struct sljit_compiler *compiler, 1104 sljit_si dst, sljit_sw dstw, 1105 sljit_si src, sljit_sw srcw) 1106 { 1107 sljit_ub* inst; 1108 1109 if (dst == SLJIT_UNUSED) { 1110 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1111 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1112 FAIL_IF(!inst); 1113 *inst++ = GROUP_F7; 1114 *inst |= NOT_rm; 1115 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); 1116 FAIL_IF(!inst); 1117 *inst = OR_r_rm; 1118 return SLJIT_SUCCESS; 1119 } 1120 if (FAST_IS_REG(dst)) { 1121 EMIT_MOV(compiler, dst, 0, src, srcw); 1122 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); 1123 FAIL_IF(!inst); 1124 *inst++ = GROUP_F7; 1125 *inst |= NOT_rm; 1126 inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0); 1127 FAIL_IF(!inst); 1128 *inst = OR_r_rm; 1129 return SLJIT_SUCCESS; 1130 } 1131 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1132 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1133 FAIL_IF(!inst); 1134 *inst++ = GROUP_F7; 1135 *inst |= NOT_rm; 1136 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); 1137 FAIL_IF(!inst); 1138 *inst = OR_r_rm; 1139 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1140 return SLJIT_SUCCESS; 1141 } 1142 1143 static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags, 1144 sljit_si dst, sljit_sw dstw, 1145 sljit_si src, sljit_sw srcw) 1146 { 1147 sljit_ub* inst; 1148 sljit_si dst_r; 1149 1150 SLJIT_UNUSED_ARG(op_flags); 1151 if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { 1152 /* Just set the zero flag. */ 1153 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1154 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1155 FAIL_IF(!inst); 1156 *inst++ = GROUP_F7; 1157 *inst |= NOT_rm; 1158 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1159 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REG1, 0); 1160 #else 1161 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, TMP_REG1, 0); 1162 #endif 1163 FAIL_IF(!inst); 1164 *inst |= SHR; 1165 return SLJIT_SUCCESS; 1166 } 1167 1168 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { 1169 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw); 1170 src = TMP_REG1; 1171 srcw = 0; 1172 } 1173 1174 inst = emit_x86_instruction(compiler, 2, TMP_REG1, 0, src, srcw); 1175 FAIL_IF(!inst); 1176 *inst++ = GROUP_0F; 1177 *inst = BSR_r_rm; 1178 1179 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1180 if (FAST_IS_REG(dst)) 1181 dst_r = dst; 1182 else { 1183 /* Find an unused temporary register. */ 1184 if ((dst & REG_MASK) != SLJIT_SCRATCH_REG1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SCRATCH_REG1)) 1185 dst_r = SLJIT_SCRATCH_REG1; 1186 else if ((dst & REG_MASK) != SLJIT_SCRATCH_REG2 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SCRATCH_REG2)) 1187 dst_r = SLJIT_SCRATCH_REG2; 1188 else 1189 dst_r = SLJIT_SCRATCH_REG3; 1190 EMIT_MOV(compiler, dst, dstw, dst_r, 0); 1191 } 1192 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31); 1193 #else 1194 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; 1195 compiler->mode32 = 0; 1196 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 64 + 63 : 32 + 31); 1197 compiler->mode32 = op_flags & SLJIT_INT_OP; 1198 #endif 1199 1200 if (cpu_has_cmov == -1) 1201 get_cpu_features(); 1202 1203 if (cpu_has_cmov) { 1204 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0); 1205 FAIL_IF(!inst); 1206 *inst++ = GROUP_0F; 1207 *inst = CMOVNE_r_rm; 1208 } else { 1209 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1210 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); 1211 FAIL_IF(!inst); 1212 INC_SIZE(4); 1213 1214 *inst++ = JE_i8; 1215 *inst++ = 2; 1216 *inst++ = MOV_r_rm; 1217 *inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REG1]; 1218 #else 1219 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); 1220 FAIL_IF(!inst); 1221 INC_SIZE(5); 1222 1223 *inst++ = JE_i8; 1224 *inst++ = 3; 1225 *inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REG1] >= 8 ? REX_B : 0); 1226 *inst++ = MOV_r_rm; 1227 *inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REG1]; 1228 #endif 1229 } 1230 1231 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1232 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0); 1233 #else 1234 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, dst_r, 0); 1235 #endif 1236 FAIL_IF(!inst); 1237 *(inst + 1) |= XOR; 1238 1239 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1240 if (dst & SLJIT_MEM) { 1241 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); 1242 FAIL_IF(!inst); 1243 *inst = XCHG_r_rm; 1244 } 1245 #else 1246 if (dst & SLJIT_MEM) 1247 EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0); 1248 #endif 1249 return SLJIT_SUCCESS; 1250 } 1251 1252 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, 1253 sljit_si dst, sljit_sw dstw, 1254 sljit_si src, sljit_sw srcw) 1255 { 1256 sljit_ub* inst; 1257 sljit_si update = 0; 1258 sljit_si op_flags = GET_ALL_FLAGS(op); 1259 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1260 sljit_si dst_is_ereg = 0; 1261 sljit_si src_is_ereg = 0; 1262 #else 1263 # define src_is_ereg 0 1264 #endif 1265 1266 CHECK_ERROR(); 1267 check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw); 1268 ADJUST_LOCAL_OFFSET(dst, dstw); 1269 ADJUST_LOCAL_OFFSET(src, srcw); 1270 1271 CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1); 1272 CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1); 1273 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1274 compiler->mode32 = op_flags & SLJIT_INT_OP; 1275 #endif 1276 1277 op = GET_OPCODE(op); 1278 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) { 1279 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1280 compiler->mode32 = 0; 1281 #endif 1282 1283 if (op_flags & SLJIT_INT_OP) { 1284 if (FAST_IS_REG(src) && src == dst) { 1285 if (!TYPE_CAST_NEEDED(op)) 1286 return SLJIT_SUCCESS; 1287 } 1288 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1289 if (op == SLJIT_MOV_SI && (src & SLJIT_MEM)) 1290 op = SLJIT_MOV_UI; 1291 if (op == SLJIT_MOVU_SI && (src & SLJIT_MEM)) 1292 op = SLJIT_MOVU_UI; 1293 if (op == SLJIT_MOV_UI && (src & SLJIT_IMM)) 1294 op = SLJIT_MOV_SI; 1295 if (op == SLJIT_MOVU_UI && (src & SLJIT_IMM)) 1296 op = SLJIT_MOVU_SI; 1297 #endif 1298 } 1299 1300 SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset); 1301 if (op >= SLJIT_MOVU) { 1302 update = 1; 1303 op -= 8; 1304 } 1305 1306 if (src & SLJIT_IMM) { 1307 switch (op) { 1308 case SLJIT_MOV_UB: 1309 srcw = (sljit_ub)srcw; 1310 break; 1311 case SLJIT_MOV_SB: 1312 srcw = (sljit_sb)srcw; 1313 break; 1314 case SLJIT_MOV_UH: 1315 srcw = (sljit_uh)srcw; 1316 break; 1317 case SLJIT_MOV_SH: 1318 srcw = (sljit_sh)srcw; 1319 break; 1320 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1321 case SLJIT_MOV_UI: 1322 srcw = (sljit_ui)srcw; 1323 break; 1324 case SLJIT_MOV_SI: 1325 srcw = (sljit_si)srcw; 1326 break; 1327 #endif 1328 } 1329 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1330 if (SLJIT_UNLIKELY(dst_is_ereg)) 1331 return emit_mov(compiler, dst, dstw, src, srcw); 1332 #endif 1333 } 1334 1335 if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK) && (srcw != 0 || (src & OFFS_REG_MASK) != 0)) { 1336 inst = emit_x86_instruction(compiler, 1, src & REG_MASK, 0, src, srcw); 1337 FAIL_IF(!inst); 1338 *inst = LEA_r_m; 1339 src &= SLJIT_MEM | 0xf; 1340 srcw = 0; 1341 } 1342 1343 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1344 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) { 1345 SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_LOCALS_REG)); 1346 dst = TMP_REG1; 1347 } 1348 #endif 1349 1350 switch (op) { 1351 case SLJIT_MOV: 1352 case SLJIT_MOV_P: 1353 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1354 case SLJIT_MOV_UI: 1355 case SLJIT_MOV_SI: 1356 #endif 1357 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); 1358 break; 1359 case SLJIT_MOV_UB: 1360 FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw)); 1361 break; 1362 case SLJIT_MOV_SB: 1363 FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw)); 1364 break; 1365 case SLJIT_MOV_UH: 1366 FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw)); 1367 break; 1368 case SLJIT_MOV_SH: 1369 FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw)); 1370 break; 1371 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1372 case SLJIT_MOV_UI: 1373 FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw)); 1374 break; 1375 case SLJIT_MOV_SI: 1376 FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw)); 1377 break; 1378 #endif 1379 } 1380 1381 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1382 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1) 1383 return emit_mov(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), dstw, TMP_REG1, 0); 1384 #endif 1385 1386 if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK) && (dstw != 0 || (dst & OFFS_REG_MASK) != 0)) { 1387 inst = emit_x86_instruction(compiler, 1, dst & REG_MASK, 0, dst, dstw); 1388 FAIL_IF(!inst); 1389 *inst = LEA_r_m; 1390 } 1391 return SLJIT_SUCCESS; 1392 } 1393 1394 if (SLJIT_UNLIKELY(GET_FLAGS(op_flags))) 1395 compiler->flags_saved = 0; 1396 1397 switch (op) { 1398 case SLJIT_NOT: 1399 if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_E)) 1400 return emit_not_with_flags(compiler, dst, dstw, src, srcw); 1401 return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw); 1402 1403 case SLJIT_NEG: 1404 if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) 1405 FAIL_IF(emit_save_flags(compiler)); 1406 return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw); 1407 1408 case SLJIT_CLZ: 1409 if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) 1410 FAIL_IF(emit_save_flags(compiler)); 1411 return emit_clz(compiler, op_flags, dst, dstw, src, srcw); 1412 } 1413 1414 return SLJIT_SUCCESS; 1415 1416 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1417 # undef src_is_ereg 1418 #endif 1419 } 1420 1421 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1422 1423 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ 1424 if (IS_HALFWORD(immw) || compiler->mode32) { \ 1425 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ 1426 FAIL_IF(!inst); \ 1427 *(inst + 1) |= (op_imm); \ 1428 } \ 1429 else { \ 1430 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \ 1431 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \ 1432 FAIL_IF(!inst); \ 1433 *inst = (op_mr); \ 1434 } 1435 1436 #define BINARY_EAX_IMM(op_eax_imm, immw) \ 1437 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw)) 1438 1439 #else 1440 1441 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ 1442 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ 1443 FAIL_IF(!inst); \ 1444 *(inst + 1) |= (op_imm); 1445 1446 #define BINARY_EAX_IMM(op_eax_imm, immw) \ 1447 FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw)) 1448 1449 #endif 1450 1451 static sljit_si emit_cum_binary(struct sljit_compiler *compiler, 1452 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm, 1453 sljit_si dst, sljit_sw dstw, 1454 sljit_si src1, sljit_sw src1w, 1455 sljit_si src2, sljit_sw src2w) 1456 { 1457 sljit_ub* inst; 1458 1459 if (dst == SLJIT_UNUSED) { 1460 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1461 if (src2 & SLJIT_IMM) { 1462 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); 1463 } 1464 else { 1465 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1466 FAIL_IF(!inst); 1467 *inst = op_rm; 1468 } 1469 return SLJIT_SUCCESS; 1470 } 1471 1472 if (dst == src1 && dstw == src1w) { 1473 if (src2 & SLJIT_IMM) { 1474 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1475 if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1476 #else 1477 if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128)) { 1478 #endif 1479 BINARY_EAX_IMM(op_eax_imm, src2w); 1480 } 1481 else { 1482 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw); 1483 } 1484 } 1485 else if (FAST_IS_REG(dst)) { 1486 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); 1487 FAIL_IF(!inst); 1488 *inst = op_rm; 1489 } 1490 else if (FAST_IS_REG(src2)) { 1491 /* Special exception for sljit_emit_op_flags. */ 1492 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); 1493 FAIL_IF(!inst); 1494 *inst = op_mr; 1495 } 1496 else { 1497 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w); 1498 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); 1499 FAIL_IF(!inst); 1500 *inst = op_mr; 1501 } 1502 return SLJIT_SUCCESS; 1503 } 1504 1505 /* Only for cumulative operations. */ 1506 if (dst == src2 && dstw == src2w) { 1507 if (src1 & SLJIT_IMM) { 1508 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1509 if ((dst == SLJIT_SCRATCH_REG1) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { 1510 #else 1511 if ((dst == SLJIT_SCRATCH_REG1) && (src1w > 127 || src1w < -128)) { 1512 #endif 1513 BINARY_EAX_IMM(op_eax_imm, src1w); 1514 } 1515 else { 1516 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw); 1517 } 1518 } 1519 else if (FAST_IS_REG(dst)) { 1520 inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w); 1521 FAIL_IF(!inst); 1522 *inst = op_rm; 1523 } 1524 else if (FAST_IS_REG(src1)) { 1525 inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw); 1526 FAIL_IF(!inst); 1527 *inst = op_mr; 1528 } 1529 else { 1530 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1531 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); 1532 FAIL_IF(!inst); 1533 *inst = op_mr; 1534 } 1535 return SLJIT_SUCCESS; 1536 } 1537 1538 /* General version. */ 1539 if (FAST_IS_REG(dst)) { 1540 EMIT_MOV(compiler, dst, 0, src1, src1w); 1541 if (src2 & SLJIT_IMM) { 1542 BINARY_IMM(op_imm, op_mr, src2w, dst, 0); 1543 } 1544 else { 1545 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); 1546 FAIL_IF(!inst); 1547 *inst = op_rm; 1548 } 1549 } 1550 else { 1551 /* This version requires less memory writing. */ 1552 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1553 if (src2 & SLJIT_IMM) { 1554 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); 1555 } 1556 else { 1557 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1558 FAIL_IF(!inst); 1559 *inst = op_rm; 1560 } 1561 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1562 } 1563 1564 return SLJIT_SUCCESS; 1565 } 1566 1567 static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler, 1568 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm, 1569 sljit_si dst, sljit_sw dstw, 1570 sljit_si src1, sljit_sw src1w, 1571 sljit_si src2, sljit_sw src2w) 1572 { 1573 sljit_ub* inst; 1574 1575 if (dst == SLJIT_UNUSED) { 1576 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1577 if (src2 & SLJIT_IMM) { 1578 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); 1579 } 1580 else { 1581 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1582 FAIL_IF(!inst); 1583 *inst = op_rm; 1584 } 1585 return SLJIT_SUCCESS; 1586 } 1587 1588 if (dst == src1 && dstw == src1w) { 1589 if (src2 & SLJIT_IMM) { 1590 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1591 if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1592 #else 1593 if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128)) { 1594 #endif 1595 BINARY_EAX_IMM(op_eax_imm, src2w); 1596 } 1597 else { 1598 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw); 1599 } 1600 } 1601 else if (FAST_IS_REG(dst)) { 1602 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); 1603 FAIL_IF(!inst); 1604 *inst = op_rm; 1605 } 1606 else if (FAST_IS_REG(src2)) { 1607 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); 1608 FAIL_IF(!inst); 1609 *inst = op_mr; 1610 } 1611 else { 1612 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w); 1613 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); 1614 FAIL_IF(!inst); 1615 *inst = op_mr; 1616 } 1617 return SLJIT_SUCCESS; 1618 } 1619 1620 /* General version. */ 1621 if (FAST_IS_REG(dst) && dst != src2) { 1622 EMIT_MOV(compiler, dst, 0, src1, src1w); 1623 if (src2 & SLJIT_IMM) { 1624 BINARY_IMM(op_imm, op_mr, src2w, dst, 0); 1625 } 1626 else { 1627 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); 1628 FAIL_IF(!inst); 1629 *inst = op_rm; 1630 } 1631 } 1632 else { 1633 /* This version requires less memory writing. */ 1634 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1635 if (src2 & SLJIT_IMM) { 1636 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); 1637 } 1638 else { 1639 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1640 FAIL_IF(!inst); 1641 *inst = op_rm; 1642 } 1643 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1644 } 1645 1646 return SLJIT_SUCCESS; 1647 } 1648 1649 static sljit_si emit_mul(struct sljit_compiler *compiler, 1650 sljit_si dst, sljit_sw dstw, 1651 sljit_si src1, sljit_sw src1w, 1652 sljit_si src2, sljit_sw src2w) 1653 { 1654 sljit_ub* inst; 1655 sljit_si dst_r; 1656 1657 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; 1658 1659 /* Register destination. */ 1660 if (dst_r == src1 && !(src2 & SLJIT_IMM)) { 1661 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); 1662 FAIL_IF(!inst); 1663 *inst++ = GROUP_0F; 1664 *inst = IMUL_r_rm; 1665 } 1666 else if (dst_r == src2 && !(src1 & SLJIT_IMM)) { 1667 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w); 1668 FAIL_IF(!inst); 1669 *inst++ = GROUP_0F; 1670 *inst = IMUL_r_rm; 1671 } 1672 else if (src1 & SLJIT_IMM) { 1673 if (src2 & SLJIT_IMM) { 1674 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w); 1675 src2 = dst_r; 1676 src2w = 0; 1677 } 1678 1679 if (src1w <= 127 && src1w >= -128) { 1680 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); 1681 FAIL_IF(!inst); 1682 *inst = IMUL_r_rm_i8; 1683 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); 1684 FAIL_IF(!inst); 1685 INC_SIZE(1); 1686 *inst = (sljit_sb)src1w; 1687 } 1688 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1689 else { 1690 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); 1691 FAIL_IF(!inst); 1692 *inst = IMUL_r_rm_i32; 1693 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); 1694 FAIL_IF(!inst); 1695 INC_SIZE(4); 1696 *(sljit_sw*)inst = src1w; 1697 } 1698 #else 1699 else if (IS_HALFWORD(src1w)) { 1700 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); 1701 FAIL_IF(!inst); 1702 *inst = IMUL_r_rm_i32; 1703 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); 1704 FAIL_IF(!inst); 1705 INC_SIZE(4); 1706 *(sljit_si*)inst = (sljit_si)src1w; 1707 } 1708 else { 1709 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w); 1710 if (dst_r != src2) 1711 EMIT_MOV(compiler, dst_r, 0, src2, src2w); 1712 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); 1713 FAIL_IF(!inst); 1714 *inst++ = GROUP_0F; 1715 *inst = IMUL_r_rm; 1716 } 1717 #endif 1718 } 1719 else if (src2 & SLJIT_IMM) { 1720 /* Note: src1 is NOT immediate. */ 1721 1722 if (src2w <= 127 && src2w >= -128) { 1723 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); 1724 FAIL_IF(!inst); 1725 *inst = IMUL_r_rm_i8; 1726 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); 1727 FAIL_IF(!inst); 1728 INC_SIZE(1); 1729 *inst = (sljit_sb)src2w; 1730 } 1731 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1732 else { 1733 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); 1734 FAIL_IF(!inst); 1735 *inst = IMUL_r_rm_i32; 1736 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); 1737 FAIL_IF(!inst); 1738 INC_SIZE(4); 1739 *(sljit_sw*)inst = src2w; 1740 } 1741 #else 1742 else if (IS_HALFWORD(src2w)) { 1743 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); 1744 FAIL_IF(!inst); 1745 *inst = IMUL_r_rm_i32; 1746 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); 1747 FAIL_IF(!inst); 1748 INC_SIZE(4); 1749 *(sljit_si*)inst = (sljit_si)src2w; 1750 } 1751 else { 1752 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w); 1753 if (dst_r != src1) 1754 EMIT_MOV(compiler, dst_r, 0, src1, src1w); 1755 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); 1756 FAIL_IF(!inst); 1757 *inst++ = GROUP_0F; 1758 *inst = IMUL_r_rm; 1759 } 1760 #endif 1761 } 1762 else { 1763 /* Neither argument is immediate. */ 1764 if (ADDRESSING_DEPENDS_ON(src2, dst_r)) 1765 dst_r = TMP_REG1; 1766 EMIT_MOV(compiler, dst_r, 0, src1, src1w); 1767 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); 1768 FAIL_IF(!inst); 1769 *inst++ = GROUP_0F; 1770 *inst = IMUL_r_rm; 1771 } 1772 1773 if (dst_r == TMP_REG1) 1774 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1775 1776 return SLJIT_SUCCESS; 1777 } 1778 1779 static sljit_si emit_lea_binary(struct sljit_compiler *compiler, sljit_si keep_flags, 1780 sljit_si dst, sljit_sw dstw, 1781 sljit_si src1, sljit_sw src1w, 1782 sljit_si src2, sljit_sw src2w) 1783 { 1784 sljit_ub* inst; 1785 sljit_si dst_r, done = 0; 1786 1787 /* These cases better be left to handled by normal way. */ 1788 if (!keep_flags) { 1789 if (dst == src1 && dstw == src1w) 1790 return SLJIT_ERR_UNSUPPORTED; 1791 if (dst == src2 && dstw == src2w) 1792 return SLJIT_ERR_UNSUPPORTED; 1793 } 1794 1795 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; 1796 1797 if (FAST_IS_REG(src1)) { 1798 if (FAST_IS_REG(src2)) { 1799 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0); 1800 FAIL_IF(!inst); 1801 *inst = LEA_r_m; 1802 done = 1; 1803 } 1804 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1805 if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1806 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_si)src2w); 1807 #else 1808 if (src2 & SLJIT_IMM) { 1809 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w); 1810 #endif 1811 FAIL_IF(!inst); 1812 *inst = LEA_r_m; 1813 done = 1; 1814 } 1815 } 1816 else if (FAST_IS_REG(src2)) { 1817 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1818 if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) { 1819 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_si)src1w); 1820 #else 1821 if (src1 & SLJIT_IMM) { 1822 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w); 1823 #endif 1824 FAIL_IF(!inst); 1825 *inst = LEA_r_m; 1826 done = 1; 1827 } 1828 } 1829 1830 if (done) { 1831 if (dst_r == TMP_REG1) 1832 return emit_mov(compiler, dst, dstw, TMP_REG1, 0); 1833 return SLJIT_SUCCESS; 1834 } 1835 return SLJIT_ERR_UNSUPPORTED; 1836 } 1837 1838 static sljit_si emit_cmp_binary(struct sljit_compiler *compiler, 1839 sljit_si src1, sljit_sw src1w, 1840 sljit_si src2, sljit_sw src2w) 1841 { 1842 sljit_ub* inst; 1843 1844 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1845 if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1846 #else 1847 if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { 1848 #endif 1849 BINARY_EAX_IMM(CMP_EAX_i32, src2w); 1850 return SLJIT_SUCCESS; 1851 } 1852 1853 if (FAST_IS_REG(src1)) { 1854 if (src2 & SLJIT_IMM) { 1855 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0); 1856 } 1857 else { 1858 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); 1859 FAIL_IF(!inst); 1860 *inst = CMP_r_rm; 1861 } 1862 return SLJIT_SUCCESS; 1863 } 1864 1865 if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) { 1866 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); 1867 FAIL_IF(!inst); 1868 *inst = CMP_rm_r; 1869 return SLJIT_SUCCESS; 1870 } 1871 1872 if (src2 & SLJIT_IMM) { 1873 if (src1 & SLJIT_IMM) { 1874 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1875 src1 = TMP_REG1; 1876 src1w = 0; 1877 } 1878 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w); 1879 } 1880 else { 1881 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1882 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1883 FAIL_IF(!inst); 1884 *inst = CMP_r_rm; 1885 } 1886 return SLJIT_SUCCESS; 1887 } 1888 1889 static sljit_si emit_test_binary(struct sljit_compiler *compiler, 1890 sljit_si src1, sljit_sw src1w, 1891 sljit_si src2, sljit_sw src2w) 1892 { 1893 sljit_ub* inst; 1894 1895 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1896 if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1897 #else 1898 if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { 1899 #endif 1900 BINARY_EAX_IMM(TEST_EAX_i32, src2w); 1901 return SLJIT_SUCCESS; 1902 } 1903 1904 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1905 if (src2 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { 1906 #else 1907 if (src2 == SLJIT_SCRATCH_REG1 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) { 1908 #endif 1909 BINARY_EAX_IMM(TEST_EAX_i32, src1w); 1910 return SLJIT_SUCCESS; 1911 } 1912 1913 if (FAST_IS_REG(src1)) { 1914 if (src2 & SLJIT_IMM) { 1915 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1916 if (IS_HALFWORD(src2w) || compiler->mode32) { 1917 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0); 1918 FAIL_IF(!inst); 1919 *inst = GROUP_F7; 1920 } 1921 else { 1922 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); 1923 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0); 1924 FAIL_IF(!inst); 1925 *inst = TEST_rm_r; 1926 } 1927 #else 1928 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0); 1929 FAIL_IF(!inst); 1930 *inst = GROUP_F7; 1931 #endif 1932 } 1933 else { 1934 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); 1935 FAIL_IF(!inst); 1936 *inst = TEST_rm_r; 1937 } 1938 return SLJIT_SUCCESS; 1939 } 1940 1941 if (FAST_IS_REG(src2)) { 1942 if (src1 & SLJIT_IMM) { 1943 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1944 if (IS_HALFWORD(src1w) || compiler->mode32) { 1945 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0); 1946 FAIL_IF(!inst); 1947 *inst = GROUP_F7; 1948 } 1949 else { 1950 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w)); 1951 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0); 1952 FAIL_IF(!inst); 1953 *inst = TEST_rm_r; 1954 } 1955 #else 1956 inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0); 1957 FAIL_IF(!inst); 1958 *inst = GROUP_F7; 1959 #endif 1960 } 1961 else { 1962 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); 1963 FAIL_IF(!inst); 1964 *inst = TEST_rm_r; 1965 } 1966 return SLJIT_SUCCESS; 1967 } 1968 1969 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1970 if (src2 & SLJIT_IMM) { 1971 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1972 if (IS_HALFWORD(src2w) || compiler->mode32) { 1973 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0); 1974 FAIL_IF(!inst); 1975 *inst = GROUP_F7; 1976 } 1977 else { 1978 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); 1979 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0); 1980 FAIL_IF(!inst); 1981 *inst = TEST_rm_r; 1982 } 1983 #else 1984 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0); 1985 FAIL_IF(!inst); 1986 *inst = GROUP_F7; 1987 #endif 1988 } 1989 else { 1990 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1991 FAIL_IF(!inst); 1992 *inst = TEST_rm_r; 1993 } 1994 return SLJIT_SUCCESS; 1995 } 1996 1997 static sljit_si emit_shift(struct sljit_compiler *compiler, 1998 sljit_ub mode, 1999 sljit_si dst, sljit_sw dstw, 2000 sljit_si src1, sljit_sw src1w, 2001 sljit_si src2, sljit_sw src2w) 2002 { 2003 sljit_ub* inst; 2004 2005 if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) { 2006 if (dst == src1 && dstw == src1w) { 2007 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw); 2008 FAIL_IF(!inst); 2009 *inst |= mode; 2010 return SLJIT_SUCCESS; 2011 } 2012 if (dst == SLJIT_UNUSED) { 2013 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 2014 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0); 2015 FAIL_IF(!inst); 2016 *inst |= mode; 2017 return SLJIT_SUCCESS; 2018 } 2019 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) { 2020 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 2021 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2022 FAIL_IF(!inst); 2023 *inst |= mode; 2024 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2025 return SLJIT_SUCCESS; 2026 } 2027 if (FAST_IS_REG(dst)) { 2028 EMIT_MOV(compiler, dst, 0, src1, src1w); 2029 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0); 2030 FAIL_IF(!inst); 2031 *inst |= mode; 2032 return SLJIT_SUCCESS; 2033 } 2034 2035 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 2036 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0); 2037 FAIL_IF(!inst); 2038 *inst |= mode; 2039 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 2040 return SLJIT_SUCCESS; 2041 } 2042 2043 if (dst == SLJIT_PREF_SHIFT_REG) { 2044 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 2045 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); 2046 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2047 FAIL_IF(!inst); 2048 *inst |= mode; 2049 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2050 } 2051 else if (FAST_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) { 2052 if (src1 != dst) 2053 EMIT_MOV(compiler, dst, 0, src1, src1w); 2054 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0); 2055 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); 2056 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0); 2057 FAIL_IF(!inst); 2058 *inst |= mode; 2059 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2060 } 2061 else { 2062 /* This case is really difficult, since ecx itself may used for 2063 addressing, and we must ensure to work even in that case. */ 2064 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 2065 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2066 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0); 2067 #else 2068 /* [esp+0] contains the flags. */ 2069 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0); 2070 #endif 2071 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); 2072 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2073 FAIL_IF(!inst); 2074 *inst |= mode; 2075 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2076 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0); 2077 #else 2078 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_sw)); 2079 #endif 2080 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 2081 } 2082 2083 return SLJIT_SUCCESS; 2084 } 2085 2086 static sljit_si emit_shift_with_flags(struct sljit_compiler *compiler, 2087 sljit_ub mode, sljit_si set_flags, 2088 sljit_si dst, sljit_sw dstw, 2089 sljit_si src1, sljit_sw src1w, 2090 sljit_si src2, sljit_sw src2w) 2091 { 2092 /* The CPU does not set flags if the shift count is 0. */ 2093 if (src2 & SLJIT_IMM) { 2094 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2095 if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0)) 2096 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); 2097 #else 2098 if ((src2w & 0x1f) != 0) 2099 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); 2100 #endif 2101 if (!set_flags) 2102 return emit_mov(compiler, dst, dstw, src1, src1w); 2103 /* OR dst, src, 0 */ 2104 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32, 2105 dst, dstw, src1, src1w, SLJIT_IMM, 0); 2106 } 2107 2108 if (!set_flags) 2109 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); 2110 2111 if (!FAST_IS_REG(dst)) 2112 FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0)); 2113 2114 FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w)); 2115 2116 if (FAST_IS_REG(dst)) 2117 return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0); 2118 return SLJIT_SUCCESS; 2119 } 2120 2121 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, 2122 sljit_si dst, sljit_sw dstw, 2123 sljit_si src1, sljit_sw src1w, 2124 sljit_si src2, sljit_sw src2w) 2125 { 2126 CHECK_ERROR(); 2127 check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w); 2128 ADJUST_LOCAL_OFFSET(dst, dstw); 2129 ADJUST_LOCAL_OFFSET(src1, src1w); 2130 ADJUST_LOCAL_OFFSET(src2, src2w); 2131 2132 CHECK_EXTRA_REGS(dst, dstw, (void)0); 2133 CHECK_EXTRA_REGS(src1, src1w, (void)0); 2134 CHECK_EXTRA_REGS(src2, src2w, (void)0); 2135 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2136 compiler->mode32 = op & SLJIT_INT_OP; 2137 #endif 2138 2139 if (GET_OPCODE(op) >= SLJIT_MUL) { 2140 if (SLJIT_UNLIKELY(GET_FLAGS(op))) 2141 compiler->flags_saved = 0; 2142 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) 2143 FAIL_IF(emit_save_flags(compiler)); 2144 } 2145 2146 switch (GET_OPCODE(op)) { 2147 case SLJIT_ADD: 2148 if (!GET_FLAGS(op)) { 2149 if (emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED) 2150 return compiler->error; 2151 } 2152 else 2153 compiler->flags_saved = 0; 2154 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) 2155 FAIL_IF(emit_save_flags(compiler)); 2156 return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, 2157 dst, dstw, src1, src1w, src2, src2w); 2158 case SLJIT_ADDC: 2159 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */ 2160 FAIL_IF(emit_restore_flags(compiler, 1)); 2161 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS)) 2162 FAIL_IF(emit_save_flags(compiler)); 2163 if (SLJIT_UNLIKELY(GET_FLAGS(op))) 2164 compiler->flags_saved = 0; 2165 return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32, 2166 dst, dstw, src1, src1w, src2, src2w); 2167 case SLJIT_SUB: 2168 if (!GET_FLAGS(op)) { 2169 if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED) 2170 return compiler->error; 2171 } 2172 else 2173 compiler->flags_saved = 0; 2174 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) 2175 FAIL_IF(emit_save_flags(compiler)); 2176 if (dst == SLJIT_UNUSED) 2177 return emit_cmp_binary(compiler, src1, src1w, src2, src2w); 2178 return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32, 2179 dst, dstw, src1, src1w, src2, src2w); 2180 case SLJIT_SUBC: 2181 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */ 2182 FAIL_IF(emit_restore_flags(compiler, 1)); 2183 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS)) 2184 FAIL_IF(emit_save_flags(compiler)); 2185 if (SLJIT_UNLIKELY(GET_FLAGS(op))) 2186 compiler->flags_saved = 0; 2187 return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32, 2188 dst, dstw, src1, src1w, src2, src2w); 2189 case SLJIT_MUL: 2190 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w); 2191 case SLJIT_AND: 2192 if (dst == SLJIT_UNUSED) 2193 return emit_test_binary(compiler, src1, src1w, src2, src2w); 2194 return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32, 2195 dst, dstw, src1, src1w, src2, src2w); 2196 case SLJIT_OR: 2197 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32, 2198 dst, dstw, src1, src1w, src2, src2w); 2199 case SLJIT_XOR: 2200 return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32, 2201 dst, dstw, src1, src1w, src2, src2w); 2202 case SLJIT_SHL: 2203 return emit_shift_with_flags(compiler, SHL, GET_FLAGS(op), 2204 dst, dstw, src1, src1w, src2, src2w); 2205 case SLJIT_LSHR: 2206 return emit_shift_with_flags(compiler, SHR, GET_FLAGS(op), 2207 dst, dstw, src1, src1w, src2, src2w); 2208 case SLJIT_ASHR: 2209 return emit_shift_with_flags(compiler, SAR, GET_FLAGS(op), 2210 dst, dstw, src1, src1w, src2, src2w); 2211 } 2212 2213 return SLJIT_SUCCESS; 2214 } 2215 2216 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) 2217 { 2218 check_sljit_get_register_index(reg); 2219 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2220 if (reg == SLJIT_TEMPORARY_EREG1 || reg == SLJIT_TEMPORARY_EREG2 2221 || reg == SLJIT_SAVED_EREG1 || reg == SLJIT_SAVED_EREG2) 2222 return -1; 2223 #endif 2224 return reg_map[reg]; 2225 } 2226 2227 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg) 2228 { 2229 check_sljit_get_float_register_index(reg); 2230 return reg; 2231 } 2232 2233 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, 2234 void *instruction, sljit_si size) 2235 { 2236 sljit_ub *inst; 2237 2238 CHECK_ERROR(); 2239 check_sljit_emit_op_custom(compiler, instruction, size); 2240 SLJIT_ASSERT(size > 0 && size < 16); 2241 2242 inst = (sljit_ub*)ensure_buf(compiler, 1 + size); 2243 FAIL_IF(!inst); 2244 INC_SIZE(size); 2245 SLJIT_MEMMOVE(inst, instruction, size); 2246 return SLJIT_SUCCESS; 2247 } 2248 2249 /* --------------------------------------------------------------------- */ 2250 /* Floating point operators */ 2251 /* --------------------------------------------------------------------- */ 2252 2253 #if (defined SLJIT_SSE2 && SLJIT_SSE2) 2254 2255 /* Alignment + 2 * 16 bytes. */ 2256 static sljit_si sse2_data[3 + (4 + 4) * 2]; 2257 static sljit_si *sse2_buffer; 2258 2259 static void init_compiler(void) 2260 { 2261 sse2_buffer = (sljit_si*)(((sljit_uw)sse2_data + 15) & ~0xf); 2262 /* Single precision constants. */ 2263 sse2_buffer[0] = 0x80000000; 2264 sse2_buffer[4] = 0x7fffffff; 2265 /* Double precision constants. */ 2266 sse2_buffer[8] = 0; 2267 sse2_buffer[9] = 0x80000000; 2268 sse2_buffer[12] = 0xffffffff; 2269 sse2_buffer[13] = 0x7fffffff; 2270 } 2271 2272 #endif 2273 2274 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) 2275 { 2276 #ifdef SLJIT_IS_FPU_AVAILABLE 2277 return SLJIT_IS_FPU_AVAILABLE; 2278 #elif (defined SLJIT_SSE2 && SLJIT_SSE2) 2279 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) 2280 if (cpu_has_sse2 == -1) 2281 get_cpu_features(); 2282 return cpu_has_sse2; 2283 #else /* SLJIT_DETECT_SSE2 */ 2284 return 1; 2285 #endif /* SLJIT_DETECT_SSE2 */ 2286 #else /* SLJIT_SSE2 */ 2287 return 0; 2288 #endif 2289 } 2290 2291 #if (defined SLJIT_SSE2 && SLJIT_SSE2) 2292 2293 static sljit_si emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode, 2294 sljit_si single, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w) 2295 { 2296 sljit_ub *inst; 2297 2298 inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w); 2299 FAIL_IF(!inst); 2300 *inst++ = GROUP_0F; 2301 *inst = opcode; 2302 return SLJIT_SUCCESS; 2303 } 2304 2305 static sljit_si emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode, 2306 sljit_si pref66, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w) 2307 { 2308 sljit_ub *inst; 2309 2310 inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w); 2311 FAIL_IF(!inst); 2312 *inst++ = GROUP_0F; 2313 *inst = opcode; 2314 return SLJIT_SUCCESS; 2315 } 2316 2317 static SLJIT_INLINE sljit_si emit_sse2_load(struct sljit_compiler *compiler, 2318 sljit_si single, sljit_si dst, sljit_si src, sljit_sw srcw) 2319 { 2320 return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw); 2321 } 2322 2323 static SLJIT_INLINE sljit_si emit_sse2_store(struct sljit_compiler *compiler, 2324 sljit_si single, sljit_si dst, sljit_sw dstw, sljit_si src) 2325 { 2326 return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw); 2327 } 2328 2329 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, 2330 sljit_si dst, sljit_sw dstw, 2331 sljit_si src, sljit_sw srcw) 2332 { 2333 sljit_si dst_r; 2334 2335 CHECK_ERROR(); 2336 check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); 2337 2338 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2339 compiler->mode32 = 1; 2340 #endif 2341 2342 if (GET_OPCODE(op) == SLJIT_CMPD) { 2343 compiler->flags_saved = 0; 2344 if (FAST_IS_REG(dst)) 2345 dst_r = dst; 2346 else { 2347 dst_r = TMP_FREG; 2348 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, dst, dstw)); 2349 } 2350 return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_SINGLE_OP), dst_r, src, srcw); 2351 } 2352 2353 if (op == SLJIT_MOVD) { 2354 if (FAST_IS_REG(dst)) 2355 return emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst, src, srcw); 2356 if (FAST_IS_REG(src)) 2357 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, src); 2358 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src, srcw)); 2359 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); 2360 } 2361 2362 if (SLOW_IS_REG(dst)) { 2363 dst_r = dst; 2364 if (dst != src) 2365 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw)); 2366 } 2367 else { 2368 dst_r = TMP_FREG; 2369 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw)); 2370 } 2371 2372 switch (GET_OPCODE(op)) { 2373 case SLJIT_NEGD: 2374 FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer : sse2_buffer + 8))); 2375 break; 2376 2377 case SLJIT_ABSD: 2378 FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer + 4 : sse2_buffer + 12))); 2379 break; 2380 } 2381 2382 if (dst_r == TMP_FREG) 2383 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); 2384 return SLJIT_SUCCESS; 2385 } 2386 2387 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, 2388 sljit_si dst, sljit_sw dstw, 2389 sljit_si src1, sljit_sw src1w, 2390 sljit_si src2, sljit_sw src2w) 2391 { 2392 sljit_si dst_r; 2393 2394 CHECK_ERROR(); 2395 check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); 2396 2397 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2398 compiler->mode32 = 1; 2399 #endif 2400 2401 if (FAST_IS_REG(dst)) { 2402 dst_r = dst; 2403 if (dst == src1) 2404 ; /* Do nothing here. */ 2405 else if (dst == src2 && (op == SLJIT_ADDD || op == SLJIT_MULD)) { 2406 /* Swap arguments. */ 2407 src2 = src1; 2408 src2w = src1w; 2409 } 2410 else if (dst != src2) 2411 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src1, src1w)); 2412 else { 2413 dst_r = TMP_FREG; 2414 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w)); 2415 } 2416 } 2417 else { 2418 dst_r = TMP_FREG; 2419 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w)); 2420 } 2421 2422 switch (GET_OPCODE(op)) { 2423 case SLJIT_ADDD: 2424 FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); 2425 break; 2426 2427 case SLJIT_SUBD: 2428 FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); 2429 break; 2430 2431 case SLJIT_MULD: 2432 FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); 2433 break; 2434 2435 case SLJIT_DIVD: 2436 FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); 2437 break; 2438 } 2439 2440 if (dst_r == TMP_FREG) 2441 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); 2442 return SLJIT_SUCCESS; 2443 } 2444 2445 #else 2446 2447 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, 2448 sljit_si dst, sljit_sw dstw, 2449 sljit_si src, sljit_sw srcw) 2450 { 2451 CHECK_ERROR(); 2452 /* Should cause an assertion fail. */ 2453 check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); 2454 compiler->error = SLJIT_ERR_UNSUPPORTED; 2455 return SLJIT_ERR_UNSUPPORTED; 2456 } 2457 2458 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, 2459 sljit_si dst, sljit_sw dstw, 2460 sljit_si src1, sljit_sw src1w, 2461 sljit_si src2, sljit_sw src2w) 2462 { 2463 CHECK_ERROR(); 2464 /* Should cause an assertion fail. */ 2465 check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); 2466 compiler->error = SLJIT_ERR_UNSUPPORTED; 2467 return SLJIT_ERR_UNSUPPORTED; 2468 } 2469 2470 #endif 2471 2472 /* --------------------------------------------------------------------- */ 2473 /* Conditional instructions */ 2474 /* --------------------------------------------------------------------- */ 2475 2476 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) 2477 { 2478 sljit_ub *inst; 2479 struct sljit_label *label; 2480 2481 CHECK_ERROR_PTR(); 2482 check_sljit_emit_label(compiler); 2483 2484 /* We should restore the flags before the label, 2485 since other taken jumps has their own flags as well. */ 2486 if (SLJIT_UNLIKELY(compiler->flags_saved)) 2487 PTR_FAIL_IF(emit_restore_flags(compiler, 0)); 2488 2489 if (compiler->last_label && compiler->last_label->size == compiler->size) 2490 return compiler->last_label; 2491 2492 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); 2493 PTR_FAIL_IF(!label); 2494 set_label(label, compiler); 2495 2496 inst = (sljit_ub*)ensure_buf(compiler, 2); 2497 PTR_FAIL_IF(!inst); 2498 2499 *inst++ = 0; 2500 *inst++ = 0; 2501 2502 return label; 2503 } 2504 2505 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) 2506 { 2507 sljit_ub *inst; 2508 struct sljit_jump *jump; 2509 2510 CHECK_ERROR_PTR(); 2511 check_sljit_emit_jump(compiler, type); 2512 2513 if (SLJIT_UNLIKELY(compiler->flags_saved)) { 2514 if ((type & 0xff) <= SLJIT_JUMP) 2515 PTR_FAIL_IF(emit_restore_flags(compiler, 0)); 2516 compiler->flags_saved = 0; 2517 } 2518 2519 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); 2520 PTR_FAIL_IF_NULL(jump); 2521 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); 2522 type &= 0xff; 2523 2524 if (type >= SLJIT_CALL1) 2525 PTR_FAIL_IF(call_with_args(compiler, type)); 2526 2527 /* Worst case size. */ 2528 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2529 compiler->size += (type >= SLJIT_JUMP) ? 5 : 6; 2530 #else 2531 compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3); 2532 #endif 2533 2534 inst = (sljit_ub*)ensure_buf(compiler, 2); 2535 PTR_FAIL_IF_NULL(inst); 2536 2537 *inst++ = 0; 2538 *inst++ = type + 4; 2539 return jump; 2540 } 2541 2542 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) 2543 { 2544 sljit_ub *inst; 2545 struct sljit_jump *jump; 2546 2547 CHECK_ERROR(); 2548 check_sljit_emit_ijump(compiler, type, src, srcw); 2549 ADJUST_LOCAL_OFFSET(src, srcw); 2550 2551 CHECK_EXTRA_REGS(src, srcw, (void)0); 2552 2553 if (SLJIT_UNLIKELY(compiler->flags_saved)) { 2554 if (type <= SLJIT_JUMP) 2555 FAIL_IF(emit_restore_flags(compiler, 0)); 2556 compiler->flags_saved = 0; 2557 } 2558 2559 if (type >= SLJIT_CALL1) { 2560 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2561 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) 2562 if (src == SLJIT_SCRATCH_REG3) { 2563 EMIT_MOV(compiler, TMP_REG1, 0, src, 0); 2564 src = TMP_REG1; 2565 } 2566 if (src == SLJIT_MEM1(SLJIT_LOCALS_REG) && type >= SLJIT_CALL3) 2567 srcw += sizeof(sljit_sw); 2568 #endif 2569 #endif 2570 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64) 2571 if (src == SLJIT_SCRATCH_REG3) { 2572 EMIT_MOV(compiler, TMP_REG1, 0, src, 0); 2573 src = TMP_REG1; 2574 } 2575 #endif 2576 FAIL_IF(call_with_args(compiler, type)); 2577 } 2578 2579 if (src == SLJIT_IMM) { 2580 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); 2581 FAIL_IF_NULL(jump); 2582 set_jump(jump, compiler, JUMP_ADDR); 2583 jump->u.target = srcw; 2584 2585 /* Worst case size. */ 2586 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2587 compiler->size += 5; 2588 #else 2589 compiler->size += 10 + 3; 2590 #endif 2591 2592 inst = (sljit_ub*)ensure_buf(compiler, 2); 2593 FAIL_IF_NULL(inst); 2594 2595 *inst++ = 0; 2596 *inst++ = type + 4; 2597 } 2598 else { 2599 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2600 /* REX_W is not necessary (src is not immediate). */ 2601 compiler->mode32 = 1; 2602 #endif 2603 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); 2604 FAIL_IF(!inst); 2605 *inst++ = GROUP_FF; 2606 *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm; 2607 } 2608 return SLJIT_SUCCESS; 2609 } 2610 2611 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, 2612 sljit_si dst, sljit_sw dstw, 2613 sljit_si src, sljit_sw srcw, 2614 sljit_si type) 2615 { 2616 sljit_ub *inst; 2617 sljit_ub cond_set = 0; 2618 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2619 sljit_si reg; 2620 #else 2621 /* CHECK_EXTRA_REGS migh overwrite these values. */ 2622 sljit_si dst_save = dst; 2623 sljit_sw dstw_save = dstw; 2624 #endif 2625 2626 CHECK_ERROR(); 2627 check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type); 2628 2629 if (dst == SLJIT_UNUSED) 2630 return SLJIT_SUCCESS; 2631 2632 ADJUST_LOCAL_OFFSET(dst, dstw); 2633 CHECK_EXTRA_REGS(dst, dstw, (void)0); 2634 if (SLJIT_UNLIKELY(compiler->flags_saved)) 2635 FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS)); 2636 2637 /* setcc = jcc + 0x10. */ 2638 cond_set = get_jump_code(type) + 0x10; 2639 2640 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2641 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src) { 2642 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 3); 2643 FAIL_IF(!inst); 2644 INC_SIZE(4 + 3); 2645 /* Set low register to conditional flag. */ 2646 *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B; 2647 *inst++ = GROUP_0F; 2648 *inst++ = cond_set; 2649 *inst++ = MOD_REG | reg_lmap[TMP_REG1]; 2650 *inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B); 2651 *inst++ = OR_rm8_r8; 2652 *inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst]; 2653 return SLJIT_SUCCESS; 2654 } 2655 2656 reg = (op == SLJIT_MOV && FAST_IS_REG(dst)) ? dst : TMP_REG1; 2657 2658 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4); 2659 FAIL_IF(!inst); 2660 INC_SIZE(4 + 4); 2661 /* Set low register to conditional flag. */ 2662 *inst++ = (reg_map[reg] <= 7) ? REX : REX_B; 2663 *inst++ = GROUP_0F; 2664 *inst++ = cond_set; 2665 *inst++ = MOD_REG | reg_lmap[reg]; 2666 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R)); 2667 *inst++ = GROUP_0F; 2668 *inst++ = MOVZX_r_rm8; 2669 *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]; 2670 2671 if (reg != TMP_REG1) 2672 return SLJIT_SUCCESS; 2673 2674 if (GET_OPCODE(op) < SLJIT_ADD) { 2675 compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV; 2676 return emit_mov(compiler, dst, dstw, TMP_REG1, 0); 2677 } 2678 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) 2679 compiler->skip_checks = 1; 2680 #endif 2681 return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REG1, 0); 2682 #else /* SLJIT_CONFIG_X86_64 */ 2683 if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) { 2684 if (reg_map[dst] <= 4) { 2685 /* Low byte is accessible. */ 2686 inst = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3); 2687 FAIL_IF(!inst); 2688 INC_SIZE(3 + 3); 2689 /* Set low byte to conditional flag. */ 2690 *inst++ = GROUP_0F; 2691 *inst++ = cond_set; 2692 *inst++ = MOD_REG | reg_map[dst]; 2693 2694 *inst++ = GROUP_0F; 2695 *inst++ = MOVZX_r_rm8; 2696 *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst]; 2697 return SLJIT_SUCCESS; 2698 } 2699 2700 /* Low byte is not accessible. */ 2701 if (cpu_has_cmov == -1) 2702 get_cpu_features(); 2703 2704 if (cpu_has_cmov) { 2705 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1); 2706 /* a xor reg, reg operation would overwrite the flags. */ 2707 EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0); 2708 2709 inst = (sljit_ub*)ensure_buf(compiler, 1 + 3); 2710 FAIL_IF(!inst); 2711 INC_SIZE(3); 2712 2713 *inst++ = GROUP_0F; 2714 /* cmovcc = setcc - 0x50. */ 2715 *inst++ = cond_set - 0x50; 2716 *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1]; 2717 return SLJIT_SUCCESS; 2718 } 2719 2720 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1); 2721 FAIL_IF(!inst); 2722 INC_SIZE(1 + 3 + 3 + 1); 2723 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2724 /* Set al to conditional flag. */ 2725 *inst++ = GROUP_0F; 2726 *inst++ = cond_set; 2727 *inst++ = MOD_REG | 0 /* eax */; 2728 2729 *inst++ = GROUP_0F; 2730 *inst++ = MOVZX_r_rm8; 2731 *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */; 2732 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2733 return SLJIT_SUCCESS; 2734 } 2735 2736 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) { 2737 SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG1] == 0, scratch_reg1_must_be_eax); 2738 if (dst != SLJIT_SCRATCH_REG1) { 2739 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1); 2740 FAIL_IF(!inst); 2741 INC_SIZE(1 + 3 + 2 + 1); 2742 /* Set low register to conditional flag. */ 2743 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2744 *inst++ = GROUP_0F; 2745 *inst++ = cond_set; 2746 *inst++ = MOD_REG | 0 /* eax */; 2747 *inst++ = OR_rm8_r8; 2748 *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst]; 2749 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2750 } 2751 else { 2752 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2); 2753 FAIL_IF(!inst); 2754 INC_SIZE(2 + 3 + 2 + 2); 2755 /* Set low register to conditional flag. */ 2756 *inst++ = XCHG_r_rm; 2757 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]; 2758 *inst++ = GROUP_0F; 2759 *inst++ = cond_set; 2760 *inst++ = MOD_REG | 1 /* ecx */; 2761 *inst++ = OR_rm8_r8; 2762 *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */; 2763 *inst++ = XCHG_r_rm; 2764 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]; 2765 } 2766 return SLJIT_SUCCESS; 2767 } 2768 2769 /* Set TMP_REG1 to the bit. */ 2770 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1); 2771 FAIL_IF(!inst); 2772 INC_SIZE(1 + 3 + 3 + 1); 2773 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2774 /* Set al to conditional flag. */ 2775 *inst++ = GROUP_0F; 2776 *inst++ = cond_set; 2777 *inst++ = MOD_REG | 0 /* eax */; 2778 2779 *inst++ = GROUP_0F; 2780 *inst++ = MOVZX_r_rm8; 2781 *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */; 2782 2783 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2784 2785 if (GET_OPCODE(op) < SLJIT_ADD) 2786 return emit_mov(compiler, dst, dstw, TMP_REG1, 0); 2787 2788 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) 2789 compiler->skip_checks = 1; 2790 #endif 2791 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0); 2792 #endif /* SLJIT_CONFIG_X86_64 */ 2793 } 2794 2795 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset) 2796 { 2797 CHECK_ERROR(); 2798 check_sljit_get_local_base(compiler, dst, dstw, offset); 2799 ADJUST_LOCAL_OFFSET(dst, dstw); 2800 2801 CHECK_EXTRA_REGS(dst, dstw, (void)0); 2802 2803 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2804 compiler->mode32 = 0; 2805 #endif 2806 2807 ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_LOCALS_REG), offset); 2808 2809 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2810 if (NOT_HALFWORD(offset)) { 2811 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset)); 2812 #if (defined SLJIT_DEBUG && SLJIT_DEBUG) 2813 SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED); 2814 return compiler->error; 2815 #else 2816 return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REG1, 0); 2817 #endif 2818 } 2819 #endif 2820 2821 if (offset != 0) 2822 return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, SLJIT_IMM, offset); 2823 return emit_mov(compiler, dst, dstw, SLJIT_LOCALS_REG, 0); 2824 } 2825 2826 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) 2827 { 2828 sljit_ub *inst; 2829 struct sljit_const *const_; 2830 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2831 sljit_si reg; 2832 #endif 2833 2834 CHECK_ERROR_PTR(); 2835 check_sljit_emit_const(compiler, dst, dstw, init_value); 2836 ADJUST_LOCAL_OFFSET(dst, dstw); 2837 2838 CHECK_EXTRA_REGS(dst, dstw, (void)0); 2839 2840 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); 2841 PTR_FAIL_IF(!const_); 2842 set_const(const_, compiler); 2843 2844 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2845 compiler->mode32 = 0; 2846 reg = SLOW_IS_REG(dst) ? dst : TMP_REG1; 2847 2848 if (emit_load_imm64(compiler, reg, init_value)) 2849 return NULL; 2850 #else 2851 if (dst == SLJIT_UNUSED) 2852 dst = TMP_REG1; 2853 2854 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value)) 2855 return NULL; 2856 #endif 2857 2858 inst = (sljit_ub*)ensure_buf(compiler, 2); 2859 PTR_FAIL_IF(!inst); 2860 2861 *inst++ = 0; 2862 *inst++ = 1; 2863 2864 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2865 if (dst & SLJIT_MEM) 2866 if (emit_mov(compiler, dst, dstw, TMP_REG1, 0)) 2867 return NULL; 2868 #endif 2869 2870 return const_; 2871 } 2872 2873 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) 2874 { 2875 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2876 *(sljit_sw*)addr = new_addr - (addr + 4); 2877 #else 2878 *(sljit_uw*)addr = new_addr; 2879 #endif 2880 } 2881 2882 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) 2883 { 2884 *(sljit_sw*)addr = new_constant; 2885 } 2886