1 /* $NetBSD: sljitNativeX86_common.c,v 1.8 2016/05/29 17:09:33 alnsn Exp $ */ 2 3 /* 4 * Stack-less Just-In-Time compiler 5 * 6 * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without modification, are 9 * permitted provided that the following conditions are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright notice, this list of 12 * conditions and the following disclaimer. 13 * 14 * 2. Redistributions in binary form must reproduce the above copyright notice, this list 15 * of conditions and the following disclaimer in the documentation and/or other materials 16 * provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY 19 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 21 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 23 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 24 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 26 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) 30 { 31 return "x86" SLJIT_CPUINFO; 32 } 33 34 /* 35 32b register indexes: 36 0 - EAX 37 1 - ECX 38 2 - EDX 39 3 - EBX 40 4 - none 41 5 - EBP 42 6 - ESI 43 7 - EDI 44 */ 45 46 /* 47 64b register indexes: 48 0 - RAX 49 1 - RCX 50 2 - RDX 51 3 - RBX 52 4 - none 53 5 - RBP 54 6 - RSI 55 7 - RDI 56 8 - R8 - From now on REX prefix is required 57 9 - R9 58 10 - R10 59 11 - R11 60 12 - R12 61 13 - R13 62 14 - R14 63 15 - R15 64 */ 65 66 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 67 68 /* Last register + 1. */ 69 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) 70 71 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = { 72 0, 0, 2, 1, 0, 0, 0, 0, 7, 6, 3, 4, 5 73 }; 74 75 #define CHECK_EXTRA_REGS(p, w, do) \ 76 if (p >= SLJIT_R3 && p <= SLJIT_R6) { \ 77 w = SLJIT_LOCALS_OFFSET + ((p) - (SLJIT_R3 + 4)) * sizeof(sljit_sw); \ 78 p = SLJIT_MEM1(SLJIT_SP); \ 79 do; \ 80 } 81 82 #else /* SLJIT_CONFIG_X86_32 */ 83 84 /* Last register + 1. */ 85 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) 86 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) 87 #define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) 88 89 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present 90 Note: avoid to use r12 and r13 for memory addessing 91 therefore r12 is better for SAVED_EREG than SAVED_REG. */ 92 #ifndef _WIN64 93 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */ 94 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { 95 0, 0, 6, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 7, 9 96 }; 97 /* low-map. reg_map & 0x7. */ 98 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = { 99 0, 0, 6, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 7, 1 100 }; 101 #else 102 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */ 103 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { 104 0, 0, 2, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 10, 8, 9 105 }; 106 /* low-map. reg_map & 0x7. */ 107 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = { 108 0, 0, 2, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 2, 0, 1 109 }; 110 #endif 111 112 #define REX_W 0x48 113 #define REX_R 0x44 114 #define REX_X 0x42 115 #define REX_B 0x41 116 #define REX 0x40 117 118 #ifndef _WIN64 119 #define HALFWORD_MAX 0x7fffffffl 120 #define HALFWORD_MIN -0x80000000l 121 #else 122 #define HALFWORD_MAX 0x7fffffffll 123 #define HALFWORD_MIN -0x80000000ll 124 #endif 125 126 #define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN) 127 #define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN) 128 129 #define CHECK_EXTRA_REGS(p, w, do) 130 131 #endif /* SLJIT_CONFIG_X86_32 */ 132 133 #define TMP_FREG (0) 134 135 /* Size flags for emit_x86_instruction: */ 136 #define EX86_BIN_INS 0x0010 137 #define EX86_SHIFT_INS 0x0020 138 #define EX86_REX 0x0040 139 #define EX86_NO_REXW 0x0080 140 #define EX86_BYTE_ARG 0x0100 141 #define EX86_HALF_ARG 0x0200 142 #define EX86_PREF_66 0x0400 143 #define EX86_PREF_F2 0x0800 144 #define EX86_PREF_F3 0x1000 145 #define EX86_SSE2_OP1 0x2000 146 #define EX86_SSE2_OP2 0x4000 147 #define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2) 148 149 /* --------------------------------------------------------------------- */ 150 /* Instrucion forms */ 151 /* --------------------------------------------------------------------- */ 152 153 #define ADD (/* BINARY */ 0 << 3) 154 #define ADD_EAX_i32 0x05 155 #define ADD_r_rm 0x03 156 #define ADD_rm_r 0x01 157 #define ADDSD_x_xm 0x58 158 #define ADC (/* BINARY */ 2 << 3) 159 #define ADC_EAX_i32 0x15 160 #define ADC_r_rm 0x13 161 #define ADC_rm_r 0x11 162 #define AND (/* BINARY */ 4 << 3) 163 #define AND_EAX_i32 0x25 164 #define AND_r_rm 0x23 165 #define AND_rm_r 0x21 166 #define ANDPD_x_xm 0x54 167 #define BSR_r_rm (/* GROUP_0F */ 0xbd) 168 #define CALL_i32 0xe8 169 #define CALL_rm (/* GROUP_FF */ 2 << 3) 170 #define CDQ 0x99 171 #define CMOVNE_r_rm (/* GROUP_0F */ 0x45) 172 #define CMP (/* BINARY */ 7 << 3) 173 #define CMP_EAX_i32 0x3d 174 #define CMP_r_rm 0x3b 175 #define CMP_rm_r 0x39 176 #define CVTPD2PS_x_xm 0x5a 177 #define CVTSI2SD_x_rm 0x2a 178 #define CVTTSD2SI_r_xm 0x2c 179 #define DIV (/* GROUP_F7 */ 6 << 3) 180 #define DIVSD_x_xm 0x5e 181 #define INT3 0xcc 182 #define IDIV (/* GROUP_F7 */ 7 << 3) 183 #define IMUL (/* GROUP_F7 */ 5 << 3) 184 #define IMUL_r_rm (/* GROUP_0F */ 0xaf) 185 #define IMUL_r_rm_i8 0x6b 186 #define IMUL_r_rm_i32 0x69 187 #define JE_i8 0x74 188 #define JNE_i8 0x75 189 #define JMP_i8 0xeb 190 #define JMP_i32 0xe9 191 #define JMP_rm (/* GROUP_FF */ 4 << 3) 192 #define LEA_r_m 0x8d 193 #define MOV_r_rm 0x8b 194 #define MOV_r_i32 0xb8 195 #define MOV_rm_r 0x89 196 #define MOV_rm_i32 0xc7 197 #define MOV_rm8_i8 0xc6 198 #define MOV_rm8_r8 0x88 199 #define MOVSD_x_xm 0x10 200 #define MOVSD_xm_x 0x11 201 #define MOVSXD_r_rm 0x63 202 #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe) 203 #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf) 204 #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6) 205 #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7) 206 #define MUL (/* GROUP_F7 */ 4 << 3) 207 #define MULSD_x_xm 0x59 208 #define NEG_rm (/* GROUP_F7 */ 3 << 3) 209 #define NOP 0x90 210 #define NOT_rm (/* GROUP_F7 */ 2 << 3) 211 #define OR (/* BINARY */ 1 << 3) 212 #define OR_r_rm 0x0b 213 #define OR_EAX_i32 0x0d 214 #define OR_rm_r 0x09 215 #define OR_rm8_r8 0x08 216 #define POP_r 0x58 217 #define POP_rm 0x8f 218 #define POPF 0x9d 219 #define PUSH_i32 0x68 220 #define PUSH_r 0x50 221 #define PUSH_rm (/* GROUP_FF */ 6 << 3) 222 #define PUSHF 0x9c 223 #define RET_near 0xc3 224 #define RET_i16 0xc2 225 #define SBB (/* BINARY */ 3 << 3) 226 #define SBB_EAX_i32 0x1d 227 #define SBB_r_rm 0x1b 228 #define SBB_rm_r 0x19 229 #define SAR (/* SHIFT */ 7 << 3) 230 #define SHL (/* SHIFT */ 4 << 3) 231 #define SHR (/* SHIFT */ 5 << 3) 232 #define SUB (/* BINARY */ 5 << 3) 233 #define SUB_EAX_i32 0x2d 234 #define SUB_r_rm 0x2b 235 #define SUB_rm_r 0x29 236 #define SUBSD_x_xm 0x5c 237 #define TEST_EAX_i32 0xa9 238 #define TEST_rm_r 0x85 239 #define UCOMISD_x_xm 0x2e 240 #define UNPCKLPD_x_xm 0x14 241 #define XCHG_EAX_r 0x90 242 #define XCHG_r_rm 0x87 243 #define XOR (/* BINARY */ 6 << 3) 244 #define XOR_EAX_i32 0x35 245 #define XOR_r_rm 0x33 246 #define XOR_rm_r 0x31 247 #define XORPD_x_xm 0x57 248 249 #define GROUP_0F 0x0f 250 #define GROUP_F7 0xf7 251 #define GROUP_FF 0xff 252 #define GROUP_BINARY_81 0x81 253 #define GROUP_BINARY_83 0x83 254 #define GROUP_SHIFT_1 0xd1 255 #define GROUP_SHIFT_N 0xc1 256 #define GROUP_SHIFT_CL 0xd3 257 258 #define MOD_REG 0xc0 259 #define MOD_DISP8 0x40 260 261 #define INC_SIZE(s) (*inst++ = (s), compiler->size += (s)) 262 263 #define PUSH_REG(r) (*inst++ = (PUSH_r + (r))) 264 #define POP_REG(r) (*inst++ = (POP_r + (r))) 265 #define RET() (*inst++ = (RET_near)) 266 #define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0) 267 /* r32, r/m32 */ 268 #define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm)) 269 270 /* Multithreading does not affect these static variables, since they store 271 built-in CPU features. Therefore they can be overwritten by different threads 272 if they detect the CPU features in the same time. */ 273 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) 274 static sljit_s32 cpu_has_sse2 = -1; 275 #endif 276 static sljit_s32 cpu_has_cmov = -1; 277 278 #ifdef _WIN32_WCE 279 #include <cmnintrin.h> 280 #elif defined(_MSC_VER) && _MSC_VER >= 1400 281 #include <intrin.h> 282 #endif 283 284 static void get_cpu_features(void) 285 { 286 sljit_u32 features; 287 288 #if defined(_MSC_VER) && _MSC_VER >= 1400 289 290 int CPUInfo[4]; 291 __cpuid(CPUInfo, 1); 292 features = (sljit_u32)CPUInfo[3]; 293 294 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C) 295 296 /* AT&T syntax. */ 297 __asm__ ( 298 "movl $0x1, %%eax\n" 299 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 300 /* On x86-32, there is no red zone, so this 301 should work (no need for a local variable). */ 302 "push %%ebx\n" 303 #endif 304 "cpuid\n" 305 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 306 "pop %%ebx\n" 307 #endif 308 "movl %%edx, %0\n" 309 : "=g" (features) 310 : 311 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 312 : "%eax", "%ecx", "%edx" 313 #else 314 : "%rax", "%rbx", "%rcx", "%rdx" 315 #endif 316 ); 317 318 #else /* _MSC_VER && _MSC_VER >= 1400 */ 319 320 /* Intel syntax. */ 321 __asm { 322 mov eax, 1 323 cpuid 324 mov features, edx 325 } 326 327 #endif /* _MSC_VER && _MSC_VER >= 1400 */ 328 329 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) 330 cpu_has_sse2 = (features >> 26) & 0x1; 331 #endif 332 cpu_has_cmov = (features >> 15) & 0x1; 333 } 334 335 static sljit_u8 get_jump_code(sljit_s32 type) 336 { 337 switch (type) { 338 case SLJIT_EQUAL: 339 case SLJIT_EQUAL_F64: 340 return 0x84 /* je */; 341 342 case SLJIT_NOT_EQUAL: 343 case SLJIT_NOT_EQUAL_F64: 344 return 0x85 /* jne */; 345 346 case SLJIT_LESS: 347 case SLJIT_LESS_F64: 348 return 0x82 /* jc */; 349 350 case SLJIT_GREATER_EQUAL: 351 case SLJIT_GREATER_EQUAL_F64: 352 return 0x83 /* jae */; 353 354 case SLJIT_GREATER: 355 case SLJIT_GREATER_F64: 356 return 0x87 /* jnbe */; 357 358 case SLJIT_LESS_EQUAL: 359 case SLJIT_LESS_EQUAL_F64: 360 return 0x86 /* jbe */; 361 362 case SLJIT_SIG_LESS: 363 return 0x8c /* jl */; 364 365 case SLJIT_SIG_GREATER_EQUAL: 366 return 0x8d /* jnl */; 367 368 case SLJIT_SIG_GREATER: 369 return 0x8f /* jnle */; 370 371 case SLJIT_SIG_LESS_EQUAL: 372 return 0x8e /* jle */; 373 374 case SLJIT_OVERFLOW: 375 case SLJIT_MUL_OVERFLOW: 376 return 0x80 /* jo */; 377 378 case SLJIT_NOT_OVERFLOW: 379 case SLJIT_MUL_NOT_OVERFLOW: 380 return 0x81 /* jno */; 381 382 case SLJIT_UNORDERED_F64: 383 return 0x8a /* jp */; 384 385 case SLJIT_ORDERED_F64: 386 return 0x8b /* jpo */; 387 } 388 return 0; 389 } 390 391 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type); 392 393 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 394 static sljit_u8* generate_fixed_jump(sljit_u8 *code_ptr, sljit_sw addr, sljit_s32 type); 395 #endif 396 397 static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_s32 type) 398 { 399 sljit_s32 short_jump; 400 sljit_uw label_addr; 401 402 if (jump->flags & JUMP_LABEL) 403 label_addr = (sljit_uw)(code + jump->u.label->size); 404 else 405 label_addr = jump->u.target; 406 short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127; 407 408 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 409 if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN) 410 return generate_far_jump_code(jump, code_ptr, type); 411 #endif 412 413 if (type == SLJIT_JUMP) { 414 if (short_jump) 415 *code_ptr++ = JMP_i8; 416 else 417 *code_ptr++ = JMP_i32; 418 jump->addr++; 419 } 420 else if (type >= SLJIT_FAST_CALL) { 421 short_jump = 0; 422 *code_ptr++ = CALL_i32; 423 jump->addr++; 424 } 425 else if (short_jump) { 426 *code_ptr++ = get_jump_code(type) - 0x10; 427 jump->addr++; 428 } 429 else { 430 *code_ptr++ = GROUP_0F; 431 *code_ptr++ = get_jump_code(type); 432 jump->addr += 2; 433 } 434 435 if (short_jump) { 436 jump->flags |= PATCH_MB; 437 code_ptr += sizeof(sljit_s8); 438 } else { 439 jump->flags |= PATCH_MW; 440 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 441 code_ptr += sizeof(sljit_sw); 442 #else 443 code_ptr += sizeof(sljit_s32); 444 #endif 445 } 446 447 return code_ptr; 448 } 449 450 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) 451 { 452 struct sljit_memory_fragment *buf; 453 sljit_u8 *code; 454 sljit_u8 *code_ptr; 455 sljit_u8 *buf_ptr; 456 sljit_u8 *buf_end; 457 sljit_u8 len; 458 459 struct sljit_label *label; 460 struct sljit_jump *jump; 461 struct sljit_const *const_; 462 463 CHECK_ERROR_PTR(); 464 CHECK_PTR(check_sljit_generate_code(compiler)); 465 reverse_buf(compiler); 466 467 /* Second code generation pass. */ 468 code = (sljit_u8*)SLJIT_MALLOC_EXEC(compiler->size); 469 PTR_FAIL_WITH_EXEC_IF(code); 470 buf = compiler->buf; 471 472 code_ptr = code; 473 label = compiler->labels; 474 jump = compiler->jumps; 475 const_ = compiler->consts; 476 do { 477 buf_ptr = buf->memory; 478 buf_end = buf_ptr + buf->used_size; 479 do { 480 len = *buf_ptr++; 481 if (len > 0) { 482 /* The code is already generated. */ 483 SLJIT_MEMMOVE(code_ptr, buf_ptr, len); 484 code_ptr += len; 485 buf_ptr += len; 486 } 487 else { 488 if (*buf_ptr >= 4) { 489 jump->addr = (sljit_uw)code_ptr; 490 if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) 491 code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4); 492 else 493 code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4); 494 jump = jump->next; 495 } 496 else if (*buf_ptr == 0) { 497 label->addr = (sljit_uw)code_ptr; 498 label->size = code_ptr - code; 499 label = label->next; 500 } 501 else if (*buf_ptr == 1) { 502 const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw); 503 const_ = const_->next; 504 } 505 else { 506 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 507 *code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32; 508 buf_ptr++; 509 *(sljit_sw*)code_ptr = *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw)); 510 code_ptr += sizeof(sljit_sw); 511 buf_ptr += sizeof(sljit_sw) - 1; 512 #else 513 code_ptr = generate_fixed_jump(code_ptr, *(sljit_sw*)(buf_ptr + 1), *buf_ptr); 514 buf_ptr += sizeof(sljit_sw); 515 #endif 516 } 517 buf_ptr++; 518 } 519 } while (buf_ptr < buf_end); 520 SLJIT_ASSERT(buf_ptr == buf_end); 521 buf = buf->next; 522 } while (buf); 523 524 SLJIT_ASSERT(!label); 525 SLJIT_ASSERT(!jump); 526 SLJIT_ASSERT(!const_); 527 528 jump = compiler->jumps; 529 while (jump) { 530 if (jump->flags & PATCH_MB) { 531 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s8))) <= 127); 532 *(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr - (jump->addr + sizeof(sljit_s8))); 533 } else if (jump->flags & PATCH_MW) { 534 if (jump->flags & JUMP_LABEL) { 535 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 536 *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw))); 537 #else 538 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))) <= HALFWORD_MAX); 539 *(sljit_s32*)jump->addr = (sljit_s32)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))); 540 #endif 541 } 542 else { 543 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 544 *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw))); 545 #else 546 SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_s32))) <= HALFWORD_MAX); 547 *(sljit_s32*)jump->addr = (sljit_s32)(jump->u.target - (jump->addr + sizeof(sljit_s32))); 548 #endif 549 } 550 } 551 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 552 else if (jump->flags & PATCH_MD) 553 *(sljit_sw*)jump->addr = jump->u.label->addr; 554 #endif 555 556 jump = jump->next; 557 } 558 559 /* Maybe we waste some space because of short jumps. */ 560 SLJIT_ASSERT(code_ptr <= code + compiler->size); 561 compiler->error = SLJIT_ERR_COMPILED; 562 compiler->executable_size = code_ptr - code; 563 return (void*)code; 564 } 565 566 /* --------------------------------------------------------------------- */ 567 /* Operators */ 568 /* --------------------------------------------------------------------- */ 569 570 static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler, 571 sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm, 572 sljit_s32 dst, sljit_sw dstw, 573 sljit_s32 src1, sljit_sw src1w, 574 sljit_s32 src2, sljit_sw src2w); 575 576 static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler, 577 sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm, 578 sljit_s32 dst, sljit_sw dstw, 579 sljit_s32 src1, sljit_sw src1w, 580 sljit_s32 src2, sljit_sw src2w); 581 582 static sljit_s32 emit_mov(struct sljit_compiler *compiler, 583 sljit_s32 dst, sljit_sw dstw, 584 sljit_s32 src, sljit_sw srcw); 585 586 static SLJIT_INLINE sljit_s32 emit_save_flags(struct sljit_compiler *compiler) 587 { 588 sljit_u8 *inst; 589 590 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 591 inst = (sljit_u8*)ensure_buf(compiler, 1 + 5); 592 FAIL_IF(!inst); 593 INC_SIZE(5); 594 #else 595 inst = (sljit_u8*)ensure_buf(compiler, 1 + 6); 596 FAIL_IF(!inst); 597 INC_SIZE(6); 598 *inst++ = REX_W; 599 #endif 600 *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */ 601 *inst++ = 0x64; 602 *inst++ = 0x24; 603 *inst++ = (sljit_u8)sizeof(sljit_sw); 604 *inst++ = PUSHF; 605 compiler->flags_saved = 1; 606 return SLJIT_SUCCESS; 607 } 608 609 static SLJIT_INLINE sljit_s32 emit_restore_flags(struct sljit_compiler *compiler, sljit_s32 keep_flags) 610 { 611 sljit_u8 *inst; 612 613 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 614 inst = (sljit_u8*)ensure_buf(compiler, 1 + 5); 615 FAIL_IF(!inst); 616 INC_SIZE(5); 617 *inst++ = POPF; 618 #else 619 inst = (sljit_u8*)ensure_buf(compiler, 1 + 6); 620 FAIL_IF(!inst); 621 INC_SIZE(6); 622 *inst++ = POPF; 623 *inst++ = REX_W; 624 #endif 625 *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */ 626 *inst++ = 0x64; 627 *inst++ = 0x24; 628 *inst++ = (sljit_u8)(-(sljit_s8)sizeof(sljit_sw)); 629 compiler->flags_saved = keep_flags; 630 return SLJIT_SUCCESS; 631 } 632 633 #ifdef _WIN32 634 #include <malloc.h> 635 636 static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size) 637 { 638 /* Workaround for calling the internal _chkstk() function on Windows. 639 This function touches all 4k pages belongs to the requested stack space, 640 which size is passed in local_size. This is necessary on Windows where 641 the stack can only grow in 4k steps. However, this function just burn 642 CPU cycles if the stack is large enough. However, you don't know it in 643 advance, so it must always be called. I think this is a bad design in 644 general even if it has some reasons. */ 645 *(volatile sljit_s32*)alloca(local_size) = 0; 646 } 647 648 #endif 649 650 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 651 #include "sljitNativeX86_32.c" 652 #else 653 #include "sljitNativeX86_64.c" 654 #endif 655 656 static sljit_s32 emit_mov(struct sljit_compiler *compiler, 657 sljit_s32 dst, sljit_sw dstw, 658 sljit_s32 src, sljit_sw srcw) 659 { 660 sljit_u8* inst; 661 662 if (dst == SLJIT_UNUSED) { 663 /* No destination, doesn't need to setup flags. */ 664 if (src & SLJIT_MEM) { 665 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw); 666 FAIL_IF(!inst); 667 *inst = MOV_r_rm; 668 } 669 return SLJIT_SUCCESS; 670 } 671 if (FAST_IS_REG(src)) { 672 inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw); 673 FAIL_IF(!inst); 674 *inst = MOV_rm_r; 675 return SLJIT_SUCCESS; 676 } 677 if (src & SLJIT_IMM) { 678 if (FAST_IS_REG(dst)) { 679 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 680 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); 681 #else 682 if (!compiler->mode32) { 683 if (NOT_HALFWORD(srcw)) 684 return emit_load_imm64(compiler, dst, srcw); 685 } 686 else 687 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw); 688 #endif 689 } 690 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 691 if (!compiler->mode32 && NOT_HALFWORD(srcw)) { 692 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw)); 693 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw); 694 FAIL_IF(!inst); 695 *inst = MOV_rm_r; 696 return SLJIT_SUCCESS; 697 } 698 #endif 699 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw); 700 FAIL_IF(!inst); 701 *inst = MOV_rm_i32; 702 return SLJIT_SUCCESS; 703 } 704 if (FAST_IS_REG(dst)) { 705 inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw); 706 FAIL_IF(!inst); 707 *inst = MOV_r_rm; 708 return SLJIT_SUCCESS; 709 } 710 711 /* Memory to memory move. Requires two instruction. */ 712 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw); 713 FAIL_IF(!inst); 714 *inst = MOV_r_rm; 715 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); 716 FAIL_IF(!inst); 717 *inst = MOV_rm_r; 718 return SLJIT_SUCCESS; 719 } 720 721 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \ 722 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); 723 724 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) 725 { 726 sljit_u8 *inst; 727 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 728 sljit_s32 size; 729 #endif 730 731 CHECK_ERROR(); 732 CHECK(check_sljit_emit_op0(compiler, op)); 733 734 switch (GET_OPCODE(op)) { 735 case SLJIT_BREAKPOINT: 736 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); 737 FAIL_IF(!inst); 738 INC_SIZE(1); 739 *inst = INT3; 740 break; 741 case SLJIT_NOP: 742 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); 743 FAIL_IF(!inst); 744 INC_SIZE(1); 745 *inst = NOP; 746 break; 747 case SLJIT_LMUL_UW: 748 case SLJIT_LMUL_SW: 749 case SLJIT_DIVMOD_UW: 750 case SLJIT_DIVMOD_SW: 751 case SLJIT_DIV_UW: 752 case SLJIT_DIV_SW: 753 compiler->flags_saved = 0; 754 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 755 #ifdef _WIN64 756 SLJIT_COMPILE_ASSERT( 757 reg_map[SLJIT_R0] == 0 758 && reg_map[SLJIT_R1] == 2 759 && reg_map[TMP_REG1] > 7, 760 invalid_register_assignment_for_div_mul); 761 #else 762 SLJIT_COMPILE_ASSERT( 763 reg_map[SLJIT_R0] == 0 764 && reg_map[SLJIT_R1] < 7 765 && reg_map[TMP_REG1] == 2, 766 invalid_register_assignment_for_div_mul); 767 #endif 768 compiler->mode32 = op & SLJIT_I32_OP; 769 #endif 770 SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments); 771 772 op = GET_OPCODE(op); 773 if ((op | 0x2) == SLJIT_DIV_UW) { 774 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) 775 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0); 776 inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0); 777 #else 778 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); 779 #endif 780 FAIL_IF(!inst); 781 *inst = XOR_r_rm; 782 } 783 784 if ((op | 0x2) == SLJIT_DIV_SW) { 785 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) 786 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0); 787 #endif 788 789 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 790 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); 791 FAIL_IF(!inst); 792 INC_SIZE(1); 793 *inst = CDQ; 794 #else 795 if (compiler->mode32) { 796 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); 797 FAIL_IF(!inst); 798 INC_SIZE(1); 799 *inst = CDQ; 800 } else { 801 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); 802 FAIL_IF(!inst); 803 INC_SIZE(2); 804 *inst++ = REX_W; 805 *inst = CDQ; 806 } 807 #endif 808 } 809 810 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 811 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); 812 FAIL_IF(!inst); 813 INC_SIZE(2); 814 *inst++ = GROUP_F7; 815 *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]); 816 #else 817 #ifdef _WIN64 818 size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2; 819 #else 820 size = (!compiler->mode32) ? 3 : 2; 821 #endif 822 inst = (sljit_u8*)ensure_buf(compiler, 1 + size); 823 FAIL_IF(!inst); 824 INC_SIZE(size); 825 #ifdef _WIN64 826 if (!compiler->mode32) 827 *inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0); 828 else if (op >= SLJIT_DIVMOD_UW) 829 *inst++ = REX_B; 830 *inst++ = GROUP_F7; 831 *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]); 832 #else 833 if (!compiler->mode32) 834 *inst++ = REX_W; 835 *inst++ = GROUP_F7; 836 *inst = MOD_REG | reg_map[SLJIT_R1]; 837 #endif 838 #endif 839 switch (op) { 840 case SLJIT_LMUL_UW: 841 *inst |= MUL; 842 break; 843 case SLJIT_LMUL_SW: 844 *inst |= IMUL; 845 break; 846 case SLJIT_DIVMOD_UW: 847 case SLJIT_DIV_UW: 848 *inst |= DIV; 849 break; 850 case SLJIT_DIVMOD_SW: 851 case SLJIT_DIV_SW: 852 *inst |= IDIV; 853 break; 854 } 855 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64) 856 if (op <= SLJIT_DIVMOD_SW) 857 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0); 858 #else 859 if (op >= SLJIT_DIV_UW) 860 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0); 861 #endif 862 break; 863 } 864 865 return SLJIT_SUCCESS; 866 } 867 868 #define ENCODE_PREFIX(prefix) \ 869 do { \ 870 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); \ 871 FAIL_IF(!inst); \ 872 INC_SIZE(1); \ 873 *inst = (prefix); \ 874 } while (0) 875 876 static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign, 877 sljit_s32 dst, sljit_sw dstw, 878 sljit_s32 src, sljit_sw srcw) 879 { 880 sljit_u8* inst; 881 sljit_s32 dst_r; 882 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 883 sljit_s32 work_r; 884 #endif 885 886 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 887 compiler->mode32 = 0; 888 #endif 889 890 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM)) 891 return SLJIT_SUCCESS; /* Empty instruction. */ 892 893 if (src & SLJIT_IMM) { 894 if (FAST_IS_REG(dst)) { 895 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 896 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); 897 #else 898 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); 899 FAIL_IF(!inst); 900 *inst = MOV_rm_i32; 901 return SLJIT_SUCCESS; 902 #endif 903 } 904 inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw); 905 FAIL_IF(!inst); 906 *inst = MOV_rm8_i8; 907 return SLJIT_SUCCESS; 908 } 909 910 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; 911 912 if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) { 913 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 914 if (reg_map[src] >= 4) { 915 SLJIT_ASSERT(dst_r == TMP_REG1); 916 EMIT_MOV(compiler, TMP_REG1, 0, src, 0); 917 } else 918 dst_r = src; 919 #else 920 dst_r = src; 921 #endif 922 } 923 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 924 else if (FAST_IS_REG(src) && reg_map[src] >= 4) { 925 /* src, dst are registers. */ 926 SLJIT_ASSERT(SLOW_IS_REG(dst)); 927 if (reg_map[dst] < 4) { 928 if (dst != src) 929 EMIT_MOV(compiler, dst, 0, src, 0); 930 inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0); 931 FAIL_IF(!inst); 932 *inst++ = GROUP_0F; 933 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8; 934 } 935 else { 936 if (dst != src) 937 EMIT_MOV(compiler, dst, 0, src, 0); 938 if (sign) { 939 /* shl reg, 24 */ 940 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); 941 FAIL_IF(!inst); 942 *inst |= SHL; 943 /* sar reg, 24 */ 944 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); 945 FAIL_IF(!inst); 946 *inst |= SAR; 947 } 948 else { 949 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0); 950 FAIL_IF(!inst); 951 *(inst + 1) |= AND; 952 } 953 } 954 return SLJIT_SUCCESS; 955 } 956 #endif 957 else { 958 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */ 959 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); 960 FAIL_IF(!inst); 961 *inst++ = GROUP_0F; 962 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8; 963 } 964 965 if (dst & SLJIT_MEM) { 966 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 967 if (dst_r == TMP_REG1) { 968 /* Find a non-used register, whose reg_map[src] < 4. */ 969 if ((dst & REG_MASK) == SLJIT_R0) { 970 if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1)) 971 work_r = SLJIT_R2; 972 else 973 work_r = SLJIT_R1; 974 } 975 else { 976 if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0)) 977 work_r = SLJIT_R0; 978 else if ((dst & REG_MASK) == SLJIT_R1) 979 work_r = SLJIT_R2; 980 else 981 work_r = SLJIT_R1; 982 } 983 984 if (work_r == SLJIT_R0) { 985 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]); 986 } 987 else { 988 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); 989 FAIL_IF(!inst); 990 *inst = XCHG_r_rm; 991 } 992 993 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw); 994 FAIL_IF(!inst); 995 *inst = MOV_rm8_r8; 996 997 if (work_r == SLJIT_R0) { 998 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]); 999 } 1000 else { 1001 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); 1002 FAIL_IF(!inst); 1003 *inst = XCHG_r_rm; 1004 } 1005 } 1006 else { 1007 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); 1008 FAIL_IF(!inst); 1009 *inst = MOV_rm8_r8; 1010 } 1011 #else 1012 inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw); 1013 FAIL_IF(!inst); 1014 *inst = MOV_rm8_r8; 1015 #endif 1016 } 1017 1018 return SLJIT_SUCCESS; 1019 } 1020 1021 static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign, 1022 sljit_s32 dst, sljit_sw dstw, 1023 sljit_s32 src, sljit_sw srcw) 1024 { 1025 sljit_u8* inst; 1026 sljit_s32 dst_r; 1027 1028 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1029 compiler->mode32 = 0; 1030 #endif 1031 1032 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM)) 1033 return SLJIT_SUCCESS; /* Empty instruction. */ 1034 1035 if (src & SLJIT_IMM) { 1036 if (FAST_IS_REG(dst)) { 1037 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1038 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); 1039 #else 1040 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); 1041 FAIL_IF(!inst); 1042 *inst = MOV_rm_i32; 1043 return SLJIT_SUCCESS; 1044 #endif 1045 } 1046 inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw); 1047 FAIL_IF(!inst); 1048 *inst = MOV_rm_i32; 1049 return SLJIT_SUCCESS; 1050 } 1051 1052 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; 1053 1054 if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) 1055 dst_r = src; 1056 else { 1057 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); 1058 FAIL_IF(!inst); 1059 *inst++ = GROUP_0F; 1060 *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16; 1061 } 1062 1063 if (dst & SLJIT_MEM) { 1064 inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw); 1065 FAIL_IF(!inst); 1066 *inst = MOV_rm_r; 1067 } 1068 1069 return SLJIT_SUCCESS; 1070 } 1071 1072 static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode, 1073 sljit_s32 dst, sljit_sw dstw, 1074 sljit_s32 src, sljit_sw srcw) 1075 { 1076 sljit_u8* inst; 1077 1078 if (dst == SLJIT_UNUSED) { 1079 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1080 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1081 FAIL_IF(!inst); 1082 *inst++ = GROUP_F7; 1083 *inst |= opcode; 1084 return SLJIT_SUCCESS; 1085 } 1086 if (dst == src && dstw == srcw) { 1087 /* Same input and output */ 1088 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); 1089 FAIL_IF(!inst); 1090 *inst++ = GROUP_F7; 1091 *inst |= opcode; 1092 return SLJIT_SUCCESS; 1093 } 1094 if (FAST_IS_REG(dst)) { 1095 EMIT_MOV(compiler, dst, 0, src, srcw); 1096 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); 1097 FAIL_IF(!inst); 1098 *inst++ = GROUP_F7; 1099 *inst |= opcode; 1100 return SLJIT_SUCCESS; 1101 } 1102 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1103 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1104 FAIL_IF(!inst); 1105 *inst++ = GROUP_F7; 1106 *inst |= opcode; 1107 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1108 return SLJIT_SUCCESS; 1109 } 1110 1111 static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler, 1112 sljit_s32 dst, sljit_sw dstw, 1113 sljit_s32 src, sljit_sw srcw) 1114 { 1115 sljit_u8* inst; 1116 1117 if (dst == SLJIT_UNUSED) { 1118 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1119 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1120 FAIL_IF(!inst); 1121 *inst++ = GROUP_F7; 1122 *inst |= NOT_rm; 1123 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); 1124 FAIL_IF(!inst); 1125 *inst = OR_r_rm; 1126 return SLJIT_SUCCESS; 1127 } 1128 if (FAST_IS_REG(dst)) { 1129 EMIT_MOV(compiler, dst, 0, src, srcw); 1130 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); 1131 FAIL_IF(!inst); 1132 *inst++ = GROUP_F7; 1133 *inst |= NOT_rm; 1134 inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0); 1135 FAIL_IF(!inst); 1136 *inst = OR_r_rm; 1137 return SLJIT_SUCCESS; 1138 } 1139 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1140 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1141 FAIL_IF(!inst); 1142 *inst++ = GROUP_F7; 1143 *inst |= NOT_rm; 1144 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); 1145 FAIL_IF(!inst); 1146 *inst = OR_r_rm; 1147 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1148 return SLJIT_SUCCESS; 1149 } 1150 1151 static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags, 1152 sljit_s32 dst, sljit_sw dstw, 1153 sljit_s32 src, sljit_sw srcw) 1154 { 1155 sljit_u8* inst; 1156 sljit_s32 dst_r; 1157 1158 SLJIT_UNUSED_ARG(op_flags); 1159 if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { 1160 /* Just set the zero flag. */ 1161 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1162 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1163 FAIL_IF(!inst); 1164 *inst++ = GROUP_F7; 1165 *inst |= NOT_rm; 1166 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1167 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REG1, 0); 1168 #else 1169 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, TMP_REG1, 0); 1170 #endif 1171 FAIL_IF(!inst); 1172 *inst |= SHR; 1173 return SLJIT_SUCCESS; 1174 } 1175 1176 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { 1177 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw); 1178 src = TMP_REG1; 1179 srcw = 0; 1180 } 1181 1182 inst = emit_x86_instruction(compiler, 2, TMP_REG1, 0, src, srcw); 1183 FAIL_IF(!inst); 1184 *inst++ = GROUP_0F; 1185 *inst = BSR_r_rm; 1186 1187 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1188 if (FAST_IS_REG(dst)) 1189 dst_r = dst; 1190 else { 1191 /* Find an unused temporary register. */ 1192 if ((dst & REG_MASK) != SLJIT_R0 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0)) 1193 dst_r = SLJIT_R0; 1194 else if ((dst & REG_MASK) != SLJIT_R1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R1)) 1195 dst_r = SLJIT_R1; 1196 else 1197 dst_r = SLJIT_R2; 1198 EMIT_MOV(compiler, dst, dstw, dst_r, 0); 1199 } 1200 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31); 1201 #else 1202 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; 1203 compiler->mode32 = 0; 1204 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 64 + 63 : 32 + 31); 1205 compiler->mode32 = op_flags & SLJIT_I32_OP; 1206 #endif 1207 1208 if (cpu_has_cmov == -1) 1209 get_cpu_features(); 1210 1211 if (cpu_has_cmov) { 1212 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0); 1213 FAIL_IF(!inst); 1214 *inst++ = GROUP_0F; 1215 *inst = CMOVNE_r_rm; 1216 } else { 1217 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1218 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); 1219 FAIL_IF(!inst); 1220 INC_SIZE(4); 1221 1222 *inst++ = JE_i8; 1223 *inst++ = 2; 1224 *inst++ = MOV_r_rm; 1225 *inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REG1]; 1226 #else 1227 inst = (sljit_u8*)ensure_buf(compiler, 1 + 5); 1228 FAIL_IF(!inst); 1229 INC_SIZE(5); 1230 1231 *inst++ = JE_i8; 1232 *inst++ = 3; 1233 *inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REG1] >= 8 ? REX_B : 0); 1234 *inst++ = MOV_r_rm; 1235 *inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REG1]; 1236 #endif 1237 } 1238 1239 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1240 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0); 1241 #else 1242 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, dst_r, 0); 1243 #endif 1244 FAIL_IF(!inst); 1245 *(inst + 1) |= XOR; 1246 1247 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1248 if (dst & SLJIT_MEM) { 1249 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); 1250 FAIL_IF(!inst); 1251 *inst = XCHG_r_rm; 1252 } 1253 #else 1254 if (dst & SLJIT_MEM) 1255 EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0); 1256 #endif 1257 return SLJIT_SUCCESS; 1258 } 1259 1260 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, 1261 sljit_s32 dst, sljit_sw dstw, 1262 sljit_s32 src, sljit_sw srcw) 1263 { 1264 sljit_u8* inst; 1265 sljit_s32 update = 0; 1266 sljit_s32 op_flags = GET_ALL_FLAGS(op); 1267 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1268 sljit_s32 dst_is_ereg = 0; 1269 sljit_s32 src_is_ereg = 0; 1270 #else 1271 # define src_is_ereg 0 1272 #endif 1273 1274 CHECK_ERROR(); 1275 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); 1276 ADJUST_LOCAL_OFFSET(dst, dstw); 1277 ADJUST_LOCAL_OFFSET(src, srcw); 1278 1279 CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1); 1280 CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1); 1281 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1282 compiler->mode32 = op_flags & SLJIT_I32_OP; 1283 #endif 1284 1285 op = GET_OPCODE(op); 1286 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) { 1287 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1288 compiler->mode32 = 0; 1289 #endif 1290 1291 if (op_flags & SLJIT_I32_OP) { 1292 if (FAST_IS_REG(src) && src == dst) { 1293 if (!TYPE_CAST_NEEDED(op)) 1294 return SLJIT_SUCCESS; 1295 } 1296 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1297 if (op == SLJIT_MOV_S32 && (src & SLJIT_MEM)) 1298 op = SLJIT_MOV_U32; 1299 if (op == SLJIT_MOVU_S32 && (src & SLJIT_MEM)) 1300 op = SLJIT_MOVU_U32; 1301 if (op == SLJIT_MOV_U32 && (src & SLJIT_IMM)) 1302 op = SLJIT_MOV_S32; 1303 if (op == SLJIT_MOVU_U32 && (src & SLJIT_IMM)) 1304 op = SLJIT_MOVU_S32; 1305 #endif 1306 } 1307 1308 SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset); 1309 if (op >= SLJIT_MOVU) { 1310 update = 1; 1311 op -= 8; 1312 } 1313 1314 if (src & SLJIT_IMM) { 1315 switch (op) { 1316 case SLJIT_MOV_U8: 1317 srcw = (sljit_u8)srcw; 1318 break; 1319 case SLJIT_MOV_S8: 1320 srcw = (sljit_s8)srcw; 1321 break; 1322 case SLJIT_MOV_U16: 1323 srcw = (sljit_u16)srcw; 1324 break; 1325 case SLJIT_MOV_S16: 1326 srcw = (sljit_s16)srcw; 1327 break; 1328 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1329 case SLJIT_MOV_U32: 1330 srcw = (sljit_u32)srcw; 1331 break; 1332 case SLJIT_MOV_S32: 1333 srcw = (sljit_s32)srcw; 1334 break; 1335 #endif 1336 } 1337 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1338 if (SLJIT_UNLIKELY(dst_is_ereg)) 1339 return emit_mov(compiler, dst, dstw, src, srcw); 1340 #endif 1341 } 1342 1343 if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK) && (srcw != 0 || (src & OFFS_REG_MASK) != 0)) { 1344 inst = emit_x86_instruction(compiler, 1, src & REG_MASK, 0, src, srcw); 1345 FAIL_IF(!inst); 1346 *inst = LEA_r_m; 1347 src &= SLJIT_MEM | 0xf; 1348 srcw = 0; 1349 } 1350 1351 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1352 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) { 1353 SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP)); 1354 dst = TMP_REG1; 1355 } 1356 #endif 1357 1358 switch (op) { 1359 case SLJIT_MOV: 1360 case SLJIT_MOV_P: 1361 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1362 case SLJIT_MOV_U32: 1363 case SLJIT_MOV_S32: 1364 #endif 1365 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); 1366 break; 1367 case SLJIT_MOV_U8: 1368 FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw)); 1369 break; 1370 case SLJIT_MOV_S8: 1371 FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw)); 1372 break; 1373 case SLJIT_MOV_U16: 1374 FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw)); 1375 break; 1376 case SLJIT_MOV_S16: 1377 FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw)); 1378 break; 1379 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1380 case SLJIT_MOV_U32: 1381 FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw)); 1382 break; 1383 case SLJIT_MOV_S32: 1384 FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw)); 1385 break; 1386 #endif 1387 } 1388 1389 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1390 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1) 1391 return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0); 1392 #endif 1393 1394 if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK) && (dstw != 0 || (dst & OFFS_REG_MASK) != 0)) { 1395 inst = emit_x86_instruction(compiler, 1, dst & REG_MASK, 0, dst, dstw); 1396 FAIL_IF(!inst); 1397 *inst = LEA_r_m; 1398 } 1399 return SLJIT_SUCCESS; 1400 } 1401 1402 if (SLJIT_UNLIKELY(GET_FLAGS(op_flags))) 1403 compiler->flags_saved = 0; 1404 1405 switch (op) { 1406 case SLJIT_NOT: 1407 if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_E)) 1408 return emit_not_with_flags(compiler, dst, dstw, src, srcw); 1409 return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw); 1410 1411 case SLJIT_NEG: 1412 if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) 1413 FAIL_IF(emit_save_flags(compiler)); 1414 return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw); 1415 1416 case SLJIT_CLZ: 1417 if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) 1418 FAIL_IF(emit_save_flags(compiler)); 1419 return emit_clz(compiler, op_flags, dst, dstw, src, srcw); 1420 } 1421 1422 return SLJIT_SUCCESS; 1423 1424 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1425 # undef src_is_ereg 1426 #endif 1427 } 1428 1429 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1430 1431 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ 1432 if (IS_HALFWORD(immw) || compiler->mode32) { \ 1433 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ 1434 FAIL_IF(!inst); \ 1435 *(inst + 1) |= (op_imm); \ 1436 } \ 1437 else { \ 1438 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \ 1439 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \ 1440 FAIL_IF(!inst); \ 1441 *inst = (op_mr); \ 1442 } 1443 1444 #define BINARY_EAX_IMM(op_eax_imm, immw) \ 1445 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw)) 1446 1447 #else 1448 1449 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ 1450 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ 1451 FAIL_IF(!inst); \ 1452 *(inst + 1) |= (op_imm); 1453 1454 #define BINARY_EAX_IMM(op_eax_imm, immw) \ 1455 FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw)) 1456 1457 #endif 1458 1459 static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler, 1460 sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm, 1461 sljit_s32 dst, sljit_sw dstw, 1462 sljit_s32 src1, sljit_sw src1w, 1463 sljit_s32 src2, sljit_sw src2w) 1464 { 1465 sljit_u8* inst; 1466 1467 if (dst == SLJIT_UNUSED) { 1468 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1469 if (src2 & SLJIT_IMM) { 1470 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); 1471 } 1472 else { 1473 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1474 FAIL_IF(!inst); 1475 *inst = op_rm; 1476 } 1477 return SLJIT_SUCCESS; 1478 } 1479 1480 if (dst == src1 && dstw == src1w) { 1481 if (src2 & SLJIT_IMM) { 1482 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1483 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1484 #else 1485 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) { 1486 #endif 1487 BINARY_EAX_IMM(op_eax_imm, src2w); 1488 } 1489 else { 1490 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw); 1491 } 1492 } 1493 else if (FAST_IS_REG(dst)) { 1494 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); 1495 FAIL_IF(!inst); 1496 *inst = op_rm; 1497 } 1498 else if (FAST_IS_REG(src2)) { 1499 /* Special exception for sljit_emit_op_flags. */ 1500 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); 1501 FAIL_IF(!inst); 1502 *inst = op_mr; 1503 } 1504 else { 1505 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w); 1506 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); 1507 FAIL_IF(!inst); 1508 *inst = op_mr; 1509 } 1510 return SLJIT_SUCCESS; 1511 } 1512 1513 /* Only for cumulative operations. */ 1514 if (dst == src2 && dstw == src2w) { 1515 if (src1 & SLJIT_IMM) { 1516 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1517 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { 1518 #else 1519 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) { 1520 #endif 1521 BINARY_EAX_IMM(op_eax_imm, src1w); 1522 } 1523 else { 1524 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw); 1525 } 1526 } 1527 else if (FAST_IS_REG(dst)) { 1528 inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w); 1529 FAIL_IF(!inst); 1530 *inst = op_rm; 1531 } 1532 else if (FAST_IS_REG(src1)) { 1533 inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw); 1534 FAIL_IF(!inst); 1535 *inst = op_mr; 1536 } 1537 else { 1538 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1539 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); 1540 FAIL_IF(!inst); 1541 *inst = op_mr; 1542 } 1543 return SLJIT_SUCCESS; 1544 } 1545 1546 /* General version. */ 1547 if (FAST_IS_REG(dst)) { 1548 EMIT_MOV(compiler, dst, 0, src1, src1w); 1549 if (src2 & SLJIT_IMM) { 1550 BINARY_IMM(op_imm, op_mr, src2w, dst, 0); 1551 } 1552 else { 1553 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); 1554 FAIL_IF(!inst); 1555 *inst = op_rm; 1556 } 1557 } 1558 else { 1559 /* This version requires less memory writing. */ 1560 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1561 if (src2 & SLJIT_IMM) { 1562 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); 1563 } 1564 else { 1565 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1566 FAIL_IF(!inst); 1567 *inst = op_rm; 1568 } 1569 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1570 } 1571 1572 return SLJIT_SUCCESS; 1573 } 1574 1575 static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler, 1576 sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm, 1577 sljit_s32 dst, sljit_sw dstw, 1578 sljit_s32 src1, sljit_sw src1w, 1579 sljit_s32 src2, sljit_sw src2w) 1580 { 1581 sljit_u8* inst; 1582 1583 if (dst == SLJIT_UNUSED) { 1584 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1585 if (src2 & SLJIT_IMM) { 1586 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); 1587 } 1588 else { 1589 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1590 FAIL_IF(!inst); 1591 *inst = op_rm; 1592 } 1593 return SLJIT_SUCCESS; 1594 } 1595 1596 if (dst == src1 && dstw == src1w) { 1597 if (src2 & SLJIT_IMM) { 1598 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1599 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1600 #else 1601 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) { 1602 #endif 1603 BINARY_EAX_IMM(op_eax_imm, src2w); 1604 } 1605 else { 1606 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw); 1607 } 1608 } 1609 else if (FAST_IS_REG(dst)) { 1610 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); 1611 FAIL_IF(!inst); 1612 *inst = op_rm; 1613 } 1614 else if (FAST_IS_REG(src2)) { 1615 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); 1616 FAIL_IF(!inst); 1617 *inst = op_mr; 1618 } 1619 else { 1620 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w); 1621 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); 1622 FAIL_IF(!inst); 1623 *inst = op_mr; 1624 } 1625 return SLJIT_SUCCESS; 1626 } 1627 1628 /* General version. */ 1629 if (FAST_IS_REG(dst) && dst != src2) { 1630 EMIT_MOV(compiler, dst, 0, src1, src1w); 1631 if (src2 & SLJIT_IMM) { 1632 BINARY_IMM(op_imm, op_mr, src2w, dst, 0); 1633 } 1634 else { 1635 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); 1636 FAIL_IF(!inst); 1637 *inst = op_rm; 1638 } 1639 } 1640 else { 1641 /* This version requires less memory writing. */ 1642 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1643 if (src2 & SLJIT_IMM) { 1644 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); 1645 } 1646 else { 1647 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1648 FAIL_IF(!inst); 1649 *inst = op_rm; 1650 } 1651 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1652 } 1653 1654 return SLJIT_SUCCESS; 1655 } 1656 1657 static sljit_s32 emit_mul(struct sljit_compiler *compiler, 1658 sljit_s32 dst, sljit_sw dstw, 1659 sljit_s32 src1, sljit_sw src1w, 1660 sljit_s32 src2, sljit_sw src2w) 1661 { 1662 sljit_u8* inst; 1663 sljit_s32 dst_r; 1664 1665 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; 1666 1667 /* Register destination. */ 1668 if (dst_r == src1 && !(src2 & SLJIT_IMM)) { 1669 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); 1670 FAIL_IF(!inst); 1671 *inst++ = GROUP_0F; 1672 *inst = IMUL_r_rm; 1673 } 1674 else if (dst_r == src2 && !(src1 & SLJIT_IMM)) { 1675 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w); 1676 FAIL_IF(!inst); 1677 *inst++ = GROUP_0F; 1678 *inst = IMUL_r_rm; 1679 } 1680 else if (src1 & SLJIT_IMM) { 1681 if (src2 & SLJIT_IMM) { 1682 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w); 1683 src2 = dst_r; 1684 src2w = 0; 1685 } 1686 1687 if (src1w <= 127 && src1w >= -128) { 1688 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); 1689 FAIL_IF(!inst); 1690 *inst = IMUL_r_rm_i8; 1691 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); 1692 FAIL_IF(!inst); 1693 INC_SIZE(1); 1694 *inst = (sljit_s8)src1w; 1695 } 1696 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1697 else { 1698 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); 1699 FAIL_IF(!inst); 1700 *inst = IMUL_r_rm_i32; 1701 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); 1702 FAIL_IF(!inst); 1703 INC_SIZE(4); 1704 *(sljit_sw*)inst = src1w; 1705 } 1706 #else 1707 else if (IS_HALFWORD(src1w)) { 1708 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); 1709 FAIL_IF(!inst); 1710 *inst = IMUL_r_rm_i32; 1711 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); 1712 FAIL_IF(!inst); 1713 INC_SIZE(4); 1714 *(sljit_s32*)inst = (sljit_s32)src1w; 1715 } 1716 else { 1717 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w); 1718 if (dst_r != src2) 1719 EMIT_MOV(compiler, dst_r, 0, src2, src2w); 1720 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); 1721 FAIL_IF(!inst); 1722 *inst++ = GROUP_0F; 1723 *inst = IMUL_r_rm; 1724 } 1725 #endif 1726 } 1727 else if (src2 & SLJIT_IMM) { 1728 /* Note: src1 is NOT immediate. */ 1729 1730 if (src2w <= 127 && src2w >= -128) { 1731 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); 1732 FAIL_IF(!inst); 1733 *inst = IMUL_r_rm_i8; 1734 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); 1735 FAIL_IF(!inst); 1736 INC_SIZE(1); 1737 *inst = (sljit_s8)src2w; 1738 } 1739 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1740 else { 1741 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); 1742 FAIL_IF(!inst); 1743 *inst = IMUL_r_rm_i32; 1744 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); 1745 FAIL_IF(!inst); 1746 INC_SIZE(4); 1747 *(sljit_sw*)inst = src2w; 1748 } 1749 #else 1750 else if (IS_HALFWORD(src2w)) { 1751 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); 1752 FAIL_IF(!inst); 1753 *inst = IMUL_r_rm_i32; 1754 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); 1755 FAIL_IF(!inst); 1756 INC_SIZE(4); 1757 *(sljit_s32*)inst = (sljit_s32)src2w; 1758 } 1759 else { 1760 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src2w); 1761 if (dst_r != src1) 1762 EMIT_MOV(compiler, dst_r, 0, src1, src1w); 1763 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); 1764 FAIL_IF(!inst); 1765 *inst++ = GROUP_0F; 1766 *inst = IMUL_r_rm; 1767 } 1768 #endif 1769 } 1770 else { 1771 /* Neither argument is immediate. */ 1772 if (ADDRESSING_DEPENDS_ON(src2, dst_r)) 1773 dst_r = TMP_REG1; 1774 EMIT_MOV(compiler, dst_r, 0, src1, src1w); 1775 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); 1776 FAIL_IF(!inst); 1777 *inst++ = GROUP_0F; 1778 *inst = IMUL_r_rm; 1779 } 1780 1781 if (dst_r == TMP_REG1) 1782 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1783 1784 return SLJIT_SUCCESS; 1785 } 1786 1787 static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler, sljit_s32 keep_flags, 1788 sljit_s32 dst, sljit_sw dstw, 1789 sljit_s32 src1, sljit_sw src1w, 1790 sljit_s32 src2, sljit_sw src2w) 1791 { 1792 sljit_u8* inst; 1793 sljit_s32 dst_r, done = 0; 1794 1795 /* These cases better be left to handled by normal way. */ 1796 if (!keep_flags) { 1797 if (dst == src1 && dstw == src1w) 1798 return SLJIT_ERR_UNSUPPORTED; 1799 if (dst == src2 && dstw == src2w) 1800 return SLJIT_ERR_UNSUPPORTED; 1801 } 1802 1803 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; 1804 1805 if (FAST_IS_REG(src1)) { 1806 if (FAST_IS_REG(src2)) { 1807 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0); 1808 FAIL_IF(!inst); 1809 *inst = LEA_r_m; 1810 done = 1; 1811 } 1812 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1813 if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1814 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w); 1815 #else 1816 if (src2 & SLJIT_IMM) { 1817 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w); 1818 #endif 1819 FAIL_IF(!inst); 1820 *inst = LEA_r_m; 1821 done = 1; 1822 } 1823 } 1824 else if (FAST_IS_REG(src2)) { 1825 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1826 if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) { 1827 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w); 1828 #else 1829 if (src1 & SLJIT_IMM) { 1830 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w); 1831 #endif 1832 FAIL_IF(!inst); 1833 *inst = LEA_r_m; 1834 done = 1; 1835 } 1836 } 1837 1838 if (done) { 1839 if (dst_r == TMP_REG1) 1840 return emit_mov(compiler, dst, dstw, TMP_REG1, 0); 1841 return SLJIT_SUCCESS; 1842 } 1843 return SLJIT_ERR_UNSUPPORTED; 1844 } 1845 1846 static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler, 1847 sljit_s32 src1, sljit_sw src1w, 1848 sljit_s32 src2, sljit_sw src2w) 1849 { 1850 sljit_u8* inst; 1851 1852 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1853 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1854 #else 1855 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { 1856 #endif 1857 BINARY_EAX_IMM(CMP_EAX_i32, src2w); 1858 return SLJIT_SUCCESS; 1859 } 1860 1861 if (FAST_IS_REG(src1)) { 1862 if (src2 & SLJIT_IMM) { 1863 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0); 1864 } 1865 else { 1866 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); 1867 FAIL_IF(!inst); 1868 *inst = CMP_r_rm; 1869 } 1870 return SLJIT_SUCCESS; 1871 } 1872 1873 if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) { 1874 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); 1875 FAIL_IF(!inst); 1876 *inst = CMP_rm_r; 1877 return SLJIT_SUCCESS; 1878 } 1879 1880 if (src2 & SLJIT_IMM) { 1881 if (src1 & SLJIT_IMM) { 1882 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1883 src1 = TMP_REG1; 1884 src1w = 0; 1885 } 1886 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w); 1887 } 1888 else { 1889 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1890 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1891 FAIL_IF(!inst); 1892 *inst = CMP_r_rm; 1893 } 1894 return SLJIT_SUCCESS; 1895 } 1896 1897 static sljit_s32 emit_test_binary(struct sljit_compiler *compiler, 1898 sljit_s32 src1, sljit_sw src1w, 1899 sljit_s32 src2, sljit_sw src2w) 1900 { 1901 sljit_u8* inst; 1902 1903 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1904 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1905 #else 1906 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { 1907 #endif 1908 BINARY_EAX_IMM(TEST_EAX_i32, src2w); 1909 return SLJIT_SUCCESS; 1910 } 1911 1912 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1913 if (src2 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { 1914 #else 1915 if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) { 1916 #endif 1917 BINARY_EAX_IMM(TEST_EAX_i32, src1w); 1918 return SLJIT_SUCCESS; 1919 } 1920 1921 if (!(src1 & SLJIT_IMM)) { 1922 if (src2 & SLJIT_IMM) { 1923 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1924 if (IS_HALFWORD(src2w) || compiler->mode32) { 1925 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w); 1926 FAIL_IF(!inst); 1927 *inst = GROUP_F7; 1928 } 1929 else { 1930 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); 1931 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, src1w); 1932 FAIL_IF(!inst); 1933 *inst = TEST_rm_r; 1934 } 1935 #else 1936 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w); 1937 FAIL_IF(!inst); 1938 *inst = GROUP_F7; 1939 #endif 1940 return SLJIT_SUCCESS; 1941 } 1942 else if (FAST_IS_REG(src1)) { 1943 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); 1944 FAIL_IF(!inst); 1945 *inst = TEST_rm_r; 1946 return SLJIT_SUCCESS; 1947 } 1948 } 1949 1950 if (!(src2 & SLJIT_IMM)) { 1951 if (src1 & SLJIT_IMM) { 1952 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1953 if (IS_HALFWORD(src1w) || compiler->mode32) { 1954 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w); 1955 FAIL_IF(!inst); 1956 *inst = GROUP_F7; 1957 } 1958 else { 1959 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w)); 1960 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, src2w); 1961 FAIL_IF(!inst); 1962 *inst = TEST_rm_r; 1963 } 1964 #else 1965 inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w); 1966 FAIL_IF(!inst); 1967 *inst = GROUP_F7; 1968 #endif 1969 return SLJIT_SUCCESS; 1970 } 1971 else if (FAST_IS_REG(src2)) { 1972 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); 1973 FAIL_IF(!inst); 1974 *inst = TEST_rm_r; 1975 return SLJIT_SUCCESS; 1976 } 1977 } 1978 1979 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1980 if (src2 & SLJIT_IMM) { 1981 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1982 if (IS_HALFWORD(src2w) || compiler->mode32) { 1983 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0); 1984 FAIL_IF(!inst); 1985 *inst = GROUP_F7; 1986 } 1987 else { 1988 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); 1989 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0); 1990 FAIL_IF(!inst); 1991 *inst = TEST_rm_r; 1992 } 1993 #else 1994 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0); 1995 FAIL_IF(!inst); 1996 *inst = GROUP_F7; 1997 #endif 1998 } 1999 else { 2000 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 2001 FAIL_IF(!inst); 2002 *inst = TEST_rm_r; 2003 } 2004 return SLJIT_SUCCESS; 2005 } 2006 2007 static sljit_s32 emit_shift(struct sljit_compiler *compiler, 2008 sljit_u8 mode, 2009 sljit_s32 dst, sljit_sw dstw, 2010 sljit_s32 src1, sljit_sw src1w, 2011 sljit_s32 src2, sljit_sw src2w) 2012 { 2013 sljit_u8* inst; 2014 2015 if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) { 2016 if (dst == src1 && dstw == src1w) { 2017 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw); 2018 FAIL_IF(!inst); 2019 *inst |= mode; 2020 return SLJIT_SUCCESS; 2021 } 2022 if (dst == SLJIT_UNUSED) { 2023 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 2024 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0); 2025 FAIL_IF(!inst); 2026 *inst |= mode; 2027 return SLJIT_SUCCESS; 2028 } 2029 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) { 2030 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 2031 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2032 FAIL_IF(!inst); 2033 *inst |= mode; 2034 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2035 return SLJIT_SUCCESS; 2036 } 2037 if (FAST_IS_REG(dst)) { 2038 EMIT_MOV(compiler, dst, 0, src1, src1w); 2039 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0); 2040 FAIL_IF(!inst); 2041 *inst |= mode; 2042 return SLJIT_SUCCESS; 2043 } 2044 2045 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 2046 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0); 2047 FAIL_IF(!inst); 2048 *inst |= mode; 2049 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 2050 return SLJIT_SUCCESS; 2051 } 2052 2053 if (dst == SLJIT_PREF_SHIFT_REG) { 2054 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 2055 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); 2056 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2057 FAIL_IF(!inst); 2058 *inst |= mode; 2059 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2060 } 2061 else if (FAST_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) { 2062 if (src1 != dst) 2063 EMIT_MOV(compiler, dst, 0, src1, src1w); 2064 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0); 2065 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); 2066 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0); 2067 FAIL_IF(!inst); 2068 *inst |= mode; 2069 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2070 } 2071 else { 2072 /* This case is really difficult, since ecx itself may used for 2073 addressing, and we must ensure to work even in that case. */ 2074 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 2075 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2076 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0); 2077 #else 2078 /* [esp+0] contains the flags. */ 2079 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0); 2080 #endif 2081 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); 2082 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2083 FAIL_IF(!inst); 2084 *inst |= mode; 2085 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2086 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0); 2087 #else 2088 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw)); 2089 #endif 2090 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 2091 } 2092 2093 return SLJIT_SUCCESS; 2094 } 2095 2096 static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler, 2097 sljit_u8 mode, sljit_s32 set_flags, 2098 sljit_s32 dst, sljit_sw dstw, 2099 sljit_s32 src1, sljit_sw src1w, 2100 sljit_s32 src2, sljit_sw src2w) 2101 { 2102 /* The CPU does not set flags if the shift count is 0. */ 2103 if (src2 & SLJIT_IMM) { 2104 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2105 if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0)) 2106 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); 2107 #else 2108 if ((src2w & 0x1f) != 0) 2109 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); 2110 #endif 2111 if (!set_flags) 2112 return emit_mov(compiler, dst, dstw, src1, src1w); 2113 /* OR dst, src, 0 */ 2114 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32, 2115 dst, dstw, src1, src1w, SLJIT_IMM, 0); 2116 } 2117 2118 if (!set_flags) 2119 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); 2120 2121 if (!FAST_IS_REG(dst)) 2122 FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0)); 2123 2124 FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w)); 2125 2126 if (FAST_IS_REG(dst)) 2127 return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0); 2128 return SLJIT_SUCCESS; 2129 } 2130 2131 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, 2132 sljit_s32 dst, sljit_sw dstw, 2133 sljit_s32 src1, sljit_sw src1w, 2134 sljit_s32 src2, sljit_sw src2w) 2135 { 2136 CHECK_ERROR(); 2137 CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); 2138 ADJUST_LOCAL_OFFSET(dst, dstw); 2139 ADJUST_LOCAL_OFFSET(src1, src1w); 2140 ADJUST_LOCAL_OFFSET(src2, src2w); 2141 2142 CHECK_EXTRA_REGS(dst, dstw, (void)0); 2143 CHECK_EXTRA_REGS(src1, src1w, (void)0); 2144 CHECK_EXTRA_REGS(src2, src2w, (void)0); 2145 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2146 compiler->mode32 = op & SLJIT_I32_OP; 2147 #endif 2148 2149 if (GET_OPCODE(op) >= SLJIT_MUL) { 2150 if (SLJIT_UNLIKELY(GET_FLAGS(op))) 2151 compiler->flags_saved = 0; 2152 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) 2153 FAIL_IF(emit_save_flags(compiler)); 2154 } 2155 2156 switch (GET_OPCODE(op)) { 2157 case SLJIT_ADD: 2158 if (!GET_FLAGS(op)) { 2159 if (emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED) 2160 return compiler->error; 2161 } 2162 else 2163 compiler->flags_saved = 0; 2164 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) 2165 FAIL_IF(emit_save_flags(compiler)); 2166 return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, 2167 dst, dstw, src1, src1w, src2, src2w); 2168 case SLJIT_ADDC: 2169 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */ 2170 FAIL_IF(emit_restore_flags(compiler, 1)); 2171 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS)) 2172 FAIL_IF(emit_save_flags(compiler)); 2173 if (SLJIT_UNLIKELY(GET_FLAGS(op))) 2174 compiler->flags_saved = 0; 2175 return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32, 2176 dst, dstw, src1, src1w, src2, src2w); 2177 case SLJIT_SUB: 2178 if (!GET_FLAGS(op)) { 2179 if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED) 2180 return compiler->error; 2181 } 2182 else 2183 compiler->flags_saved = 0; 2184 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) 2185 FAIL_IF(emit_save_flags(compiler)); 2186 if (dst == SLJIT_UNUSED) 2187 return emit_cmp_binary(compiler, src1, src1w, src2, src2w); 2188 return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32, 2189 dst, dstw, src1, src1w, src2, src2w); 2190 case SLJIT_SUBC: 2191 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */ 2192 FAIL_IF(emit_restore_flags(compiler, 1)); 2193 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS)) 2194 FAIL_IF(emit_save_flags(compiler)); 2195 if (SLJIT_UNLIKELY(GET_FLAGS(op))) 2196 compiler->flags_saved = 0; 2197 return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32, 2198 dst, dstw, src1, src1w, src2, src2w); 2199 case SLJIT_MUL: 2200 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w); 2201 case SLJIT_AND: 2202 if (dst == SLJIT_UNUSED) 2203 return emit_test_binary(compiler, src1, src1w, src2, src2w); 2204 return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32, 2205 dst, dstw, src1, src1w, src2, src2w); 2206 case SLJIT_OR: 2207 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32, 2208 dst, dstw, src1, src1w, src2, src2w); 2209 case SLJIT_XOR: 2210 return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32, 2211 dst, dstw, src1, src1w, src2, src2w); 2212 case SLJIT_SHL: 2213 return emit_shift_with_flags(compiler, SHL, GET_FLAGS(op), 2214 dst, dstw, src1, src1w, src2, src2w); 2215 case SLJIT_LSHR: 2216 return emit_shift_with_flags(compiler, SHR, GET_FLAGS(op), 2217 dst, dstw, src1, src1w, src2, src2w); 2218 case SLJIT_ASHR: 2219 return emit_shift_with_flags(compiler, SAR, GET_FLAGS(op), 2220 dst, dstw, src1, src1w, src2, src2w); 2221 } 2222 2223 return SLJIT_SUCCESS; 2224 } 2225 2226 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) 2227 { 2228 CHECK_REG_INDEX(check_sljit_get_register_index(reg)); 2229 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2230 if (reg >= SLJIT_R3 && reg <= SLJIT_R6) 2231 return -1; 2232 #endif 2233 return reg_map[reg]; 2234 } 2235 2236 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) 2237 { 2238 CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); 2239 return reg; 2240 } 2241 2242 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, 2243 void *instruction, sljit_s32 size) 2244 { 2245 sljit_u8 *inst; 2246 2247 CHECK_ERROR(); 2248 CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); 2249 2250 inst = (sljit_u8*)ensure_buf(compiler, 1 + size); 2251 FAIL_IF(!inst); 2252 INC_SIZE(size); 2253 SLJIT_MEMMOVE(inst, instruction, size); 2254 return SLJIT_SUCCESS; 2255 } 2256 2257 /* --------------------------------------------------------------------- */ 2258 /* Floating point operators */ 2259 /* --------------------------------------------------------------------- */ 2260 2261 /* Alignment + 2 * 16 bytes. */ 2262 static sljit_s32 sse2_data[3 + (4 + 4) * 2]; 2263 static sljit_s32 *sse2_buffer; 2264 2265 static void init_compiler(void) 2266 { 2267 sse2_buffer = (sljit_s32*)(((sljit_uw)sse2_data + 15) & ~0xf); 2268 /* Single precision constants. */ 2269 sse2_buffer[0] = 0x80000000; 2270 sse2_buffer[4] = 0x7fffffff; 2271 /* Double precision constants. */ 2272 sse2_buffer[8] = 0; 2273 sse2_buffer[9] = 0x80000000; 2274 sse2_buffer[12] = 0xffffffff; 2275 sse2_buffer[13] = 0x7fffffff; 2276 } 2277 2278 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) 2279 { 2280 #ifdef SLJIT_IS_FPU_AVAILABLE 2281 return SLJIT_IS_FPU_AVAILABLE; 2282 #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) 2283 if (cpu_has_sse2 == -1) 2284 get_cpu_features(); 2285 return cpu_has_sse2; 2286 #else /* SLJIT_DETECT_SSE2 */ 2287 return 1; 2288 #endif /* SLJIT_DETECT_SSE2 */ 2289 } 2290 2291 static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode, 2292 sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w) 2293 { 2294 sljit_u8 *inst; 2295 2296 inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w); 2297 FAIL_IF(!inst); 2298 *inst++ = GROUP_0F; 2299 *inst = opcode; 2300 return SLJIT_SUCCESS; 2301 } 2302 2303 static sljit_s32 emit_sse2_logic(struct sljit_compiler *compiler, sljit_u8 opcode, 2304 sljit_s32 pref66, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w) 2305 { 2306 sljit_u8 *inst; 2307 2308 inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w); 2309 FAIL_IF(!inst); 2310 *inst++ = GROUP_0F; 2311 *inst = opcode; 2312 return SLJIT_SUCCESS; 2313 } 2314 2315 static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler, 2316 sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw) 2317 { 2318 return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw); 2319 } 2320 2321 static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler, 2322 sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src) 2323 { 2324 return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw); 2325 } 2326 2327 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, 2328 sljit_s32 dst, sljit_sw dstw, 2329 sljit_s32 src, sljit_sw srcw) 2330 { 2331 sljit_s32 dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; 2332 sljit_u8 *inst; 2333 2334 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2335 if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64) 2336 compiler->mode32 = 0; 2337 #endif 2338 2339 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw); 2340 FAIL_IF(!inst); 2341 *inst++ = GROUP_0F; 2342 *inst = CVTTSD2SI_r_xm; 2343 2344 if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED) 2345 return emit_mov(compiler, dst, dstw, TMP_REG1, 0); 2346 return SLJIT_SUCCESS; 2347 } 2348 2349 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, 2350 sljit_s32 dst, sljit_sw dstw, 2351 sljit_s32 src, sljit_sw srcw) 2352 { 2353 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; 2354 sljit_u8 *inst; 2355 2356 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2357 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW) 2358 compiler->mode32 = 0; 2359 #endif 2360 2361 if (src & SLJIT_IMM) { 2362 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2363 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) 2364 srcw = (sljit_s32)srcw; 2365 #endif 2366 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 2367 src = TMP_REG1; 2368 srcw = 0; 2369 } 2370 2371 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw); 2372 FAIL_IF(!inst); 2373 *inst++ = GROUP_0F; 2374 *inst = CVTSI2SD_x_rm; 2375 2376 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2377 compiler->mode32 = 1; 2378 #endif 2379 if (dst_r == TMP_FREG) 2380 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); 2381 return SLJIT_SUCCESS; 2382 } 2383 2384 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, 2385 sljit_s32 src1, sljit_sw src1w, 2386 sljit_s32 src2, sljit_sw src2w) 2387 { 2388 compiler->flags_saved = 0; 2389 if (!FAST_IS_REG(src1)) { 2390 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w)); 2391 src1 = TMP_FREG; 2392 } 2393 return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_F32_OP), src1, src2, src2w); 2394 } 2395 2396 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, 2397 sljit_s32 dst, sljit_sw dstw, 2398 sljit_s32 src, sljit_sw srcw) 2399 { 2400 sljit_s32 dst_r; 2401 2402 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2403 compiler->mode32 = 1; 2404 #endif 2405 2406 CHECK_ERROR(); 2407 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); 2408 2409 if (GET_OPCODE(op) == SLJIT_MOV_F64) { 2410 if (FAST_IS_REG(dst)) 2411 return emit_sse2_load(compiler, op & SLJIT_F32_OP, dst, src, srcw); 2412 if (FAST_IS_REG(src)) 2413 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, src); 2414 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src, srcw)); 2415 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); 2416 } 2417 2418 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) { 2419 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; 2420 if (FAST_IS_REG(src)) { 2421 /* We overwrite the high bits of source. From SLJIT point of view, 2422 this is not an issue. 2423 Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */ 2424 FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_F32_OP, src, src, 0)); 2425 } 2426 else { 2427 FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_F32_OP), TMP_FREG, src, srcw)); 2428 src = TMP_FREG; 2429 } 2430 2431 FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_F32_OP, dst_r, src, 0)); 2432 if (dst_r == TMP_FREG) 2433 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); 2434 return SLJIT_SUCCESS; 2435 } 2436 2437 if (SLOW_IS_REG(dst)) { 2438 dst_r = dst; 2439 if (dst != src) 2440 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw)); 2441 } 2442 else { 2443 dst_r = TMP_FREG; 2444 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw)); 2445 } 2446 2447 switch (GET_OPCODE(op)) { 2448 case SLJIT_NEG_F64: 2449 FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer : sse2_buffer + 8))); 2450 break; 2451 2452 case SLJIT_ABS_F64: 2453 FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer + 4 : sse2_buffer + 12))); 2454 break; 2455 } 2456 2457 if (dst_r == TMP_FREG) 2458 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); 2459 return SLJIT_SUCCESS; 2460 } 2461 2462 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, 2463 sljit_s32 dst, sljit_sw dstw, 2464 sljit_s32 src1, sljit_sw src1w, 2465 sljit_s32 src2, sljit_sw src2w) 2466 { 2467 sljit_s32 dst_r; 2468 2469 CHECK_ERROR(); 2470 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); 2471 ADJUST_LOCAL_OFFSET(dst, dstw); 2472 ADJUST_LOCAL_OFFSET(src1, src1w); 2473 ADJUST_LOCAL_OFFSET(src2, src2w); 2474 2475 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2476 compiler->mode32 = 1; 2477 #endif 2478 2479 if (FAST_IS_REG(dst)) { 2480 dst_r = dst; 2481 if (dst == src1) 2482 ; /* Do nothing here. */ 2483 else if (dst == src2 && (op == SLJIT_ADD_F64 || op == SLJIT_MUL_F64)) { 2484 /* Swap arguments. */ 2485 src2 = src1; 2486 src2w = src1w; 2487 } 2488 else if (dst != src2) 2489 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src1, src1w)); 2490 else { 2491 dst_r = TMP_FREG; 2492 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w)); 2493 } 2494 } 2495 else { 2496 dst_r = TMP_FREG; 2497 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w)); 2498 } 2499 2500 switch (GET_OPCODE(op)) { 2501 case SLJIT_ADD_F64: 2502 FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w)); 2503 break; 2504 2505 case SLJIT_SUB_F64: 2506 FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w)); 2507 break; 2508 2509 case SLJIT_MUL_F64: 2510 FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w)); 2511 break; 2512 2513 case SLJIT_DIV_F64: 2514 FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w)); 2515 break; 2516 } 2517 2518 if (dst_r == TMP_FREG) 2519 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); 2520 return SLJIT_SUCCESS; 2521 } 2522 2523 /* --------------------------------------------------------------------- */ 2524 /* Conditional instructions */ 2525 /* --------------------------------------------------------------------- */ 2526 2527 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) 2528 { 2529 sljit_u8 *inst; 2530 struct sljit_label *label; 2531 2532 CHECK_ERROR_PTR(); 2533 CHECK_PTR(check_sljit_emit_label(compiler)); 2534 2535 /* We should restore the flags before the label, 2536 since other taken jumps has their own flags as well. */ 2537 if (SLJIT_UNLIKELY(compiler->flags_saved)) 2538 PTR_FAIL_IF(emit_restore_flags(compiler, 0)); 2539 2540 if (compiler->last_label && compiler->last_label->size == compiler->size) 2541 return compiler->last_label; 2542 2543 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); 2544 PTR_FAIL_IF(!label); 2545 set_label(label, compiler); 2546 2547 inst = (sljit_u8*)ensure_buf(compiler, 2); 2548 PTR_FAIL_IF(!inst); 2549 2550 *inst++ = 0; 2551 *inst++ = 0; 2552 2553 return label; 2554 } 2555 2556 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) 2557 { 2558 sljit_u8 *inst; 2559 struct sljit_jump *jump; 2560 2561 CHECK_ERROR_PTR(); 2562 CHECK_PTR(check_sljit_emit_jump(compiler, type)); 2563 2564 if (SLJIT_UNLIKELY(compiler->flags_saved)) { 2565 if ((type & 0xff) <= SLJIT_JUMP) 2566 PTR_FAIL_IF(emit_restore_flags(compiler, 0)); 2567 compiler->flags_saved = 0; 2568 } 2569 2570 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); 2571 PTR_FAIL_IF_NULL(jump); 2572 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); 2573 type &= 0xff; 2574 2575 if (type >= SLJIT_CALL1) 2576 PTR_FAIL_IF(call_with_args(compiler, type)); 2577 2578 /* Worst case size. */ 2579 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2580 compiler->size += (type >= SLJIT_JUMP) ? 5 : 6; 2581 #else 2582 compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3); 2583 #endif 2584 2585 inst = (sljit_u8*)ensure_buf(compiler, 2); 2586 PTR_FAIL_IF_NULL(inst); 2587 2588 *inst++ = 0; 2589 *inst++ = type + 4; 2590 return jump; 2591 } 2592 2593 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) 2594 { 2595 sljit_u8 *inst; 2596 struct sljit_jump *jump; 2597 2598 CHECK_ERROR(); 2599 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); 2600 ADJUST_LOCAL_OFFSET(src, srcw); 2601 2602 CHECK_EXTRA_REGS(src, srcw, (void)0); 2603 2604 if (SLJIT_UNLIKELY(compiler->flags_saved)) { 2605 if (type <= SLJIT_JUMP) 2606 FAIL_IF(emit_restore_flags(compiler, 0)); 2607 compiler->flags_saved = 0; 2608 } 2609 2610 if (type >= SLJIT_CALL1) { 2611 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2612 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) 2613 if (src == SLJIT_R2) { 2614 EMIT_MOV(compiler, TMP_REG1, 0, src, 0); 2615 src = TMP_REG1; 2616 } 2617 if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3) 2618 srcw += sizeof(sljit_sw); 2619 #endif 2620 #endif 2621 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64) 2622 if (src == SLJIT_R2) { 2623 EMIT_MOV(compiler, TMP_REG1, 0, src, 0); 2624 src = TMP_REG1; 2625 } 2626 #endif 2627 FAIL_IF(call_with_args(compiler, type)); 2628 } 2629 2630 if (src == SLJIT_IMM) { 2631 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); 2632 FAIL_IF_NULL(jump); 2633 set_jump(jump, compiler, JUMP_ADDR); 2634 jump->u.target = srcw; 2635 2636 /* Worst case size. */ 2637 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2638 compiler->size += 5; 2639 #else 2640 compiler->size += 10 + 3; 2641 #endif 2642 2643 inst = (sljit_u8*)ensure_buf(compiler, 2); 2644 FAIL_IF_NULL(inst); 2645 2646 *inst++ = 0; 2647 *inst++ = type + 4; 2648 } 2649 else { 2650 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2651 /* REX_W is not necessary (src is not immediate). */ 2652 compiler->mode32 = 1; 2653 #endif 2654 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); 2655 FAIL_IF(!inst); 2656 *inst++ = GROUP_FF; 2657 *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm; 2658 } 2659 return SLJIT_SUCCESS; 2660 } 2661 2662 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, 2663 sljit_s32 dst, sljit_sw dstw, 2664 sljit_s32 src, sljit_sw srcw, 2665 sljit_s32 type) 2666 { 2667 sljit_u8 *inst; 2668 sljit_u8 cond_set = 0; 2669 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2670 sljit_s32 reg; 2671 #else 2672 /* CHECK_EXTRA_REGS migh overwrite these values. */ 2673 sljit_s32 dst_save = dst; 2674 sljit_sw dstw_save = dstw; 2675 #endif 2676 2677 CHECK_ERROR(); 2678 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); 2679 SLJIT_UNUSED_ARG(srcw); 2680 2681 if (dst == SLJIT_UNUSED) 2682 return SLJIT_SUCCESS; 2683 2684 ADJUST_LOCAL_OFFSET(dst, dstw); 2685 CHECK_EXTRA_REGS(dst, dstw, (void)0); 2686 if (SLJIT_UNLIKELY(compiler->flags_saved)) 2687 FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS)); 2688 2689 type &= 0xff; 2690 /* setcc = jcc + 0x10. */ 2691 cond_set = get_jump_code(type) + 0x10; 2692 2693 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2694 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src) { 2695 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3); 2696 FAIL_IF(!inst); 2697 INC_SIZE(4 + 3); 2698 /* Set low register to conditional flag. */ 2699 *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B; 2700 *inst++ = GROUP_0F; 2701 *inst++ = cond_set; 2702 *inst++ = MOD_REG | reg_lmap[TMP_REG1]; 2703 *inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B); 2704 *inst++ = OR_rm8_r8; 2705 *inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst]; 2706 return SLJIT_SUCCESS; 2707 } 2708 2709 reg = (op == SLJIT_MOV && FAST_IS_REG(dst)) ? dst : TMP_REG1; 2710 2711 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4); 2712 FAIL_IF(!inst); 2713 INC_SIZE(4 + 4); 2714 /* Set low register to conditional flag. */ 2715 *inst++ = (reg_map[reg] <= 7) ? REX : REX_B; 2716 *inst++ = GROUP_0F; 2717 *inst++ = cond_set; 2718 *inst++ = MOD_REG | reg_lmap[reg]; 2719 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R)); 2720 *inst++ = GROUP_0F; 2721 *inst++ = MOVZX_r_rm8; 2722 *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]; 2723 2724 if (reg != TMP_REG1) 2725 return SLJIT_SUCCESS; 2726 2727 if (GET_OPCODE(op) < SLJIT_ADD) { 2728 compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV; 2729 return emit_mov(compiler, dst, dstw, TMP_REG1, 0); 2730 } 2731 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ 2732 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) 2733 compiler->skip_checks = 1; 2734 #endif 2735 return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REG1, 0); 2736 #else /* SLJIT_CONFIG_X86_64 */ 2737 if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) { 2738 if (reg_map[dst] <= 4) { 2739 /* Low byte is accessible. */ 2740 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3); 2741 FAIL_IF(!inst); 2742 INC_SIZE(3 + 3); 2743 /* Set low byte to conditional flag. */ 2744 *inst++ = GROUP_0F; 2745 *inst++ = cond_set; 2746 *inst++ = MOD_REG | reg_map[dst]; 2747 2748 *inst++ = GROUP_0F; 2749 *inst++ = MOVZX_r_rm8; 2750 *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst]; 2751 return SLJIT_SUCCESS; 2752 } 2753 2754 /* Low byte is not accessible. */ 2755 if (cpu_has_cmov == -1) 2756 get_cpu_features(); 2757 2758 if (cpu_has_cmov) { 2759 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1); 2760 /* a xor reg, reg operation would overwrite the flags. */ 2761 EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0); 2762 2763 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3); 2764 FAIL_IF(!inst); 2765 INC_SIZE(3); 2766 2767 *inst++ = GROUP_0F; 2768 /* cmovcc = setcc - 0x50. */ 2769 *inst++ = cond_set - 0x50; 2770 *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1]; 2771 return SLJIT_SUCCESS; 2772 } 2773 2774 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1); 2775 FAIL_IF(!inst); 2776 INC_SIZE(1 + 3 + 3 + 1); 2777 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2778 /* Set al to conditional flag. */ 2779 *inst++ = GROUP_0F; 2780 *inst++ = cond_set; 2781 *inst++ = MOD_REG | 0 /* eax */; 2782 2783 *inst++ = GROUP_0F; 2784 *inst++ = MOVZX_r_rm8; 2785 *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */; 2786 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2787 return SLJIT_SUCCESS; 2788 } 2789 2790 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) { 2791 SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] == 0, scratch_reg1_must_be_eax); 2792 if (dst != SLJIT_R0) { 2793 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1); 2794 FAIL_IF(!inst); 2795 INC_SIZE(1 + 3 + 2 + 1); 2796 /* Set low register to conditional flag. */ 2797 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2798 *inst++ = GROUP_0F; 2799 *inst++ = cond_set; 2800 *inst++ = MOD_REG | 0 /* eax */; 2801 *inst++ = OR_rm8_r8; 2802 *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst]; 2803 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2804 } 2805 else { 2806 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2); 2807 FAIL_IF(!inst); 2808 INC_SIZE(2 + 3 + 2 + 2); 2809 /* Set low register to conditional flag. */ 2810 *inst++ = XCHG_r_rm; 2811 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]; 2812 *inst++ = GROUP_0F; 2813 *inst++ = cond_set; 2814 *inst++ = MOD_REG | 1 /* ecx */; 2815 *inst++ = OR_rm8_r8; 2816 *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */; 2817 *inst++ = XCHG_r_rm; 2818 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]; 2819 } 2820 return SLJIT_SUCCESS; 2821 } 2822 2823 /* Set TMP_REG1 to the bit. */ 2824 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1); 2825 FAIL_IF(!inst); 2826 INC_SIZE(1 + 3 + 3 + 1); 2827 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2828 /* Set al to conditional flag. */ 2829 *inst++ = GROUP_0F; 2830 *inst++ = cond_set; 2831 *inst++ = MOD_REG | 0 /* eax */; 2832 2833 *inst++ = GROUP_0F; 2834 *inst++ = MOVZX_r_rm8; 2835 *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */; 2836 2837 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2838 2839 if (GET_OPCODE(op) < SLJIT_ADD) 2840 return emit_mov(compiler, dst, dstw, TMP_REG1, 0); 2841 2842 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ 2843 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) 2844 compiler->skip_checks = 1; 2845 #endif 2846 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0); 2847 #endif /* SLJIT_CONFIG_X86_64 */ 2848 } 2849 2850 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) 2851 { 2852 CHECK_ERROR(); 2853 CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset)); 2854 ADJUST_LOCAL_OFFSET(dst, dstw); 2855 2856 CHECK_EXTRA_REGS(dst, dstw, (void)0); 2857 2858 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2859 compiler->mode32 = 0; 2860 #endif 2861 2862 ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset); 2863 2864 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2865 if (NOT_HALFWORD(offset)) { 2866 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset)); 2867 #if (defined SLJIT_DEBUG && SLJIT_DEBUG) 2868 SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED); 2869 return compiler->error; 2870 #else 2871 return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0); 2872 #endif 2873 } 2874 #endif 2875 2876 if (offset != 0) 2877 return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset); 2878 return emit_mov(compiler, dst, dstw, SLJIT_SP, 0); 2879 } 2880 2881 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) 2882 { 2883 sljit_u8 *inst; 2884 struct sljit_const *const_; 2885 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2886 sljit_s32 reg; 2887 #endif 2888 2889 CHECK_ERROR_PTR(); 2890 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); 2891 ADJUST_LOCAL_OFFSET(dst, dstw); 2892 2893 CHECK_EXTRA_REGS(dst, dstw, (void)0); 2894 2895 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); 2896 PTR_FAIL_IF(!const_); 2897 set_const(const_, compiler); 2898 2899 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2900 compiler->mode32 = 0; 2901 reg = SLOW_IS_REG(dst) ? dst : TMP_REG1; 2902 2903 if (emit_load_imm64(compiler, reg, init_value)) 2904 return NULL; 2905 #else 2906 if (dst == SLJIT_UNUSED) 2907 dst = TMP_REG1; 2908 2909 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value)) 2910 return NULL; 2911 #endif 2912 2913 inst = (sljit_u8*)ensure_buf(compiler, 2); 2914 PTR_FAIL_IF(!inst); 2915 2916 *inst++ = 0; 2917 *inst++ = 1; 2918 2919 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2920 if (dst & SLJIT_MEM) 2921 if (emit_mov(compiler, dst, dstw, TMP_REG1, 0)) 2922 return NULL; 2923 #endif 2924 2925 return const_; 2926 } 2927 2928 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) 2929 { 2930 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2931 *(sljit_sw*)addr = new_addr - (addr + 4); 2932 #else 2933 *(sljit_uw*)addr = new_addr; 2934 #endif 2935 } 2936 2937 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) 2938 { 2939 *(sljit_sw*)addr = new_constant; 2940 } 2941 2942 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_sse2_available(void) 2943 { 2944 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) 2945 if (cpu_has_sse2 == -1) 2946 get_cpu_features(); 2947 return cpu_has_sse2; 2948 #else 2949 return 1; 2950 #endif 2951 } 2952 2953 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_cmov_available(void) 2954 { 2955 if (cpu_has_cmov == -1) 2956 get_cpu_features(); 2957 return cpu_has_cmov; 2958 } 2959 2960 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_emit_cmov(struct sljit_compiler *compiler, 2961 sljit_s32 type, 2962 sljit_s32 dst_reg, 2963 sljit_s32 src, sljit_sw srcw) 2964 { 2965 sljit_u8* inst; 2966 2967 CHECK_ERROR(); 2968 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) 2969 CHECK_ARGUMENT(sljit_x86_is_cmov_available()); 2970 CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_I32_OP))); 2971 CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64); 2972 CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_I32_OP)); 2973 FUNCTION_CHECK_SRC(src, srcw); 2974 #endif 2975 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) 2976 if (SLJIT_UNLIKELY(!!compiler->verbose)) { 2977 fprintf(compiler->verbose, " x86_cmov%s %s%s, ", 2978 !(dst_reg & SLJIT_I32_OP) ? "" : ".i", 2979 jump_names[type & 0xff], JUMP_POSTFIX(type)); 2980 sljit_verbose_reg(compiler, dst_reg & ~SLJIT_I32_OP); 2981 fprintf(compiler->verbose, ", "); 2982 sljit_verbose_param(compiler, src, srcw); 2983 fprintf(compiler->verbose, "\n"); 2984 } 2985 #endif 2986 2987 ADJUST_LOCAL_OFFSET(src, srcw); 2988 CHECK_EXTRA_REGS(src, srcw, (void)0); 2989 2990 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2991 compiler->mode32 = dst_reg & SLJIT_I32_OP; 2992 #endif 2993 dst_reg &= ~SLJIT_I32_OP; 2994 2995 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { 2996 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw); 2997 src = TMP_REG1; 2998 srcw = 0; 2999 } 3000 3001 inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw); 3002 FAIL_IF(!inst); 3003 *inst++ = GROUP_0F; 3004 *inst = get_jump_code(type & 0xff) - 0x40; 3005 return SLJIT_SUCCESS; 3006 } 3007