1 /* $NetBSD: sljitNativeARM_T2_32.c,v 1.3 2016/05/29 17:09:33 alnsn Exp $ */ 2 3 /* 4 * Stack-less Just-In-Time compiler 5 * 6 * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without modification, are 9 * permitted provided that the following conditions are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright notice, this list of 12 * conditions and the following disclaimer. 13 * 14 * 2. Redistributions in binary form must reproduce the above copyright notice, this list 15 * of conditions and the following disclaimer in the documentation and/or other materials 16 * provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY 19 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 21 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 23 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 24 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 26 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) 30 { 31 return "ARM-Thumb2" SLJIT_CPUINFO; 32 } 33 34 /* Length of an instruction word. */ 35 typedef sljit_u32 sljit_ins; 36 37 /* Last register + 1. */ 38 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) 39 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) 40 #define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) 41 #define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 5) 42 43 #define TMP_FREG1 (0) 44 #define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) 45 46 /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */ 47 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { 48 0, 0, 1, 2, 12, 11, 10, 9, 8, 7, 6, 5, 13, 3, 4, 14, 15 49 }; 50 51 #define COPY_BITS(src, from, to, bits) \ 52 ((from >= to ? (src >> (from - to)) : (src << (to - from))) & (((1 << bits) - 1) << to)) 53 54 /* Thumb16 encodings. */ 55 #define RD3(rd) (reg_map[rd]) 56 #define RN3(rn) (reg_map[rn] << 3) 57 #define RM3(rm) (reg_map[rm] << 6) 58 #define RDN3(rdn) (reg_map[rdn] << 8) 59 #define IMM3(imm) (imm << 6) 60 #define IMM8(imm) (imm) 61 62 /* Thumb16 helpers. */ 63 #define SET_REGS44(rd, rn) \ 64 ((reg_map[rn] << 3) | (reg_map[rd] & 0x7) | ((reg_map[rd] & 0x8) << 4)) 65 #define IS_2_LO_REGS(reg1, reg2) \ 66 (reg_map[reg1] <= 7 && reg_map[reg2] <= 7) 67 #define IS_3_LO_REGS(reg1, reg2, reg3) \ 68 (reg_map[reg1] <= 7 && reg_map[reg2] <= 7 && reg_map[reg3] <= 7) 69 70 /* Thumb32 encodings. */ 71 #define RD4(rd) (reg_map[rd] << 8) 72 #define RN4(rn) (reg_map[rn] << 16) 73 #define RM4(rm) (reg_map[rm]) 74 #define RT4(rt) (reg_map[rt] << 12) 75 #define DD4(dd) ((dd) << 12) 76 #define DN4(dn) ((dn) << 16) 77 #define DM4(dm) (dm) 78 #define IMM5(imm) \ 79 (COPY_BITS(imm, 2, 12, 3) | ((imm & 0x3) << 6)) 80 #define IMM12(imm) \ 81 (COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff)) 82 83 /* --------------------------------------------------------------------- */ 84 /* Instrucion forms */ 85 /* --------------------------------------------------------------------- */ 86 87 /* dot '.' changed to _ 88 I immediate form (possibly followed by number of immediate bits). */ 89 #define ADCI 0xf1400000 90 #define ADCS 0x4140 91 #define ADC_W 0xeb400000 92 #define ADD 0x4400 93 #define ADDS 0x1800 94 #define ADDSI3 0x1c00 95 #define ADDSI8 0x3000 96 #define ADD_W 0xeb000000 97 #define ADDWI 0xf2000000 98 #define ADD_SP 0xb000 99 #define ADD_W 0xeb000000 100 #define ADD_WI 0xf1000000 101 #define ANDI 0xf0000000 102 #define ANDS 0x4000 103 #define AND_W 0xea000000 104 #define ASRS 0x4100 105 #define ASRSI 0x1000 106 #define ASR_W 0xfa40f000 107 #define ASR_WI 0xea4f0020 108 #define BICI 0xf0200000 109 #define BKPT 0xbe00 110 #define BLX 0x4780 111 #define BX 0x4700 112 #define CLZ 0xfab0f080 113 #define CMPI 0x2800 114 #define CMP_W 0xebb00f00 115 #define EORI 0xf0800000 116 #define EORS 0x4040 117 #define EOR_W 0xea800000 118 #define IT 0xbf00 119 #define LSLS 0x4080 120 #define LSLSI 0x0000 121 #define LSL_W 0xfa00f000 122 #define LSL_WI 0xea4f0000 123 #define LSRS 0x40c0 124 #define LSRSI 0x0800 125 #define LSR_W 0xfa20f000 126 #define LSR_WI 0xea4f0010 127 #define MOV 0x4600 128 #define MOVS 0x0000 129 #define MOVSI 0x2000 130 #define MOVT 0xf2c00000 131 #define MOVW 0xf2400000 132 #define MOV_W 0xea4f0000 133 #define MOV_WI 0xf04f0000 134 #define MUL 0xfb00f000 135 #define MVNS 0x43c0 136 #define MVN_W 0xea6f0000 137 #define MVN_WI 0xf06f0000 138 #define NOP 0xbf00 139 #define ORNI 0xf0600000 140 #define ORRI 0xf0400000 141 #define ORRS 0x4300 142 #define ORR_W 0xea400000 143 #define POP 0xbc00 144 #define POP_W 0xe8bd0000 145 #define PUSH 0xb400 146 #define PUSH_W 0xe92d0000 147 #define RSB_WI 0xf1c00000 148 #define RSBSI 0x4240 149 #define SBCI 0xf1600000 150 #define SBCS 0x4180 151 #define SBC_W 0xeb600000 152 #define SMULL 0xfb800000 153 #define STR_SP 0x9000 154 #define SUBS 0x1a00 155 #define SUBSI3 0x1e00 156 #define SUBSI8 0x3800 157 #define SUB_W 0xeba00000 158 #define SUBWI 0xf2a00000 159 #define SUB_SP 0xb080 160 #define SUB_WI 0xf1a00000 161 #define SXTB 0xb240 162 #define SXTB_W 0xfa4ff080 163 #define SXTH 0xb200 164 #define SXTH_W 0xfa0ff080 165 #define TST 0x4200 166 #define UMULL 0xfba00000 167 #define UXTB 0xb2c0 168 #define UXTB_W 0xfa5ff080 169 #define UXTH 0xb280 170 #define UXTH_W 0xfa1ff080 171 #define VABS_F32 0xeeb00ac0 172 #define VADD_F32 0xee300a00 173 #define VCMP_F32 0xeeb40a40 174 #define VCVT_F32_S32 0xeeb80ac0 175 #define VCVT_F64_F32 0xeeb70ac0 176 #define VCVT_S32_F32 0xeebd0ac0 177 #define VDIV_F32 0xee800a00 178 #define VMOV_F32 0xeeb00a40 179 #define VMOV 0xee000a10 180 #define VMRS 0xeef1fa10 181 #define VMUL_F32 0xee200a00 182 #define VNEG_F32 0xeeb10a40 183 #define VSTR_F32 0xed000a00 184 #define VSUB_F32 0xee300a40 185 186 static sljit_s32 push_inst16(struct sljit_compiler *compiler, sljit_ins inst) 187 { 188 sljit_u16 *ptr; 189 SLJIT_ASSERT(!(inst & 0xffff0000)); 190 191 ptr = (sljit_u16*)ensure_buf(compiler, sizeof(sljit_u16)); 192 FAIL_IF(!ptr); 193 *ptr = inst; 194 compiler->size++; 195 return SLJIT_SUCCESS; 196 } 197 198 static sljit_s32 push_inst32(struct sljit_compiler *compiler, sljit_ins inst) 199 { 200 sljit_u16 *ptr = (sljit_u16*)ensure_buf(compiler, sizeof(sljit_ins)); 201 FAIL_IF(!ptr); 202 *ptr++ = inst >> 16; 203 *ptr = inst; 204 compiler->size += 2; 205 return SLJIT_SUCCESS; 206 } 207 208 static SLJIT_INLINE sljit_s32 emit_imm32_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm) 209 { 210 FAIL_IF(push_inst32(compiler, MOVW | RD4(dst) | 211 COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff))); 212 return push_inst32(compiler, MOVT | RD4(dst) | 213 COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16)); 214 } 215 216 static SLJIT_INLINE void modify_imm32_const(sljit_u16 *inst, sljit_uw new_imm) 217 { 218 sljit_s32 dst = inst[1] & 0x0f00; 219 SLJIT_ASSERT(((inst[0] & 0xfbf0) == (MOVW >> 16)) && ((inst[2] & 0xfbf0) == (MOVT >> 16)) && dst == (inst[3] & 0x0f00)); 220 inst[0] = (MOVW >> 16) | COPY_BITS(new_imm, 12, 0, 4) | COPY_BITS(new_imm, 11, 10, 1); 221 inst[1] = dst | COPY_BITS(new_imm, 8, 12, 3) | (new_imm & 0xff); 222 inst[2] = (MOVT >> 16) | COPY_BITS(new_imm, 12 + 16, 0, 4) | COPY_BITS(new_imm, 11 + 16, 10, 1); 223 inst[3] = dst | COPY_BITS(new_imm, 8 + 16, 12, 3) | ((new_imm & 0xff0000) >> 16); 224 } 225 226 static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_u16 *code_ptr, sljit_u16 *code) 227 { 228 sljit_sw diff; 229 230 if (jump->flags & SLJIT_REWRITABLE_JUMP) 231 return 0; 232 233 if (jump->flags & JUMP_ADDR) { 234 /* Branch to ARM code is not optimized yet. */ 235 if (!(jump->u.target & 0x1)) 236 return 0; 237 diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2)) >> 1; 238 } 239 else { 240 SLJIT_ASSERT(jump->flags & JUMP_LABEL); 241 diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2)) >> 1; 242 } 243 244 if (jump->flags & IS_COND) { 245 SLJIT_ASSERT(!(jump->flags & IS_BL)); 246 if (diff <= 127 && diff >= -128) { 247 jump->flags |= PATCH_TYPE1; 248 return 5; 249 } 250 if (diff <= 524287 && diff >= -524288) { 251 jump->flags |= PATCH_TYPE2; 252 return 4; 253 } 254 /* +1 comes from the prefix IT instruction. */ 255 diff--; 256 if (diff <= 8388607 && diff >= -8388608) { 257 jump->flags |= PATCH_TYPE3; 258 return 3; 259 } 260 } 261 else if (jump->flags & IS_BL) { 262 if (diff <= 8388607 && diff >= -8388608) { 263 jump->flags |= PATCH_BL; 264 return 3; 265 } 266 } 267 else { 268 if (diff <= 1023 && diff >= -1024) { 269 jump->flags |= PATCH_TYPE4; 270 return 4; 271 } 272 if (diff <= 8388607 && diff >= -8388608) { 273 jump->flags |= PATCH_TYPE5; 274 return 3; 275 } 276 } 277 278 return 0; 279 } 280 281 static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump) 282 { 283 sljit_s32 type = (jump->flags >> 4) & 0xf; 284 sljit_sw diff; 285 sljit_u16 *jump_inst; 286 sljit_s32 s, j1, j2; 287 288 if (SLJIT_UNLIKELY(type == 0)) { 289 modify_imm32_const((sljit_u16*)jump->addr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target); 290 return; 291 } 292 293 if (jump->flags & JUMP_ADDR) { 294 SLJIT_ASSERT(jump->u.target & 0x1); 295 diff = ((sljit_sw)jump->u.target - (sljit_sw)(jump->addr + 4)) >> 1; 296 } 297 else 298 diff = ((sljit_sw)(jump->u.label->addr) - (sljit_sw)(jump->addr + 4)) >> 1; 299 jump_inst = (sljit_u16*)jump->addr; 300 301 switch (type) { 302 case 1: 303 /* Encoding T1 of 'B' instruction */ 304 SLJIT_ASSERT(diff <= 127 && diff >= -128 && (jump->flags & IS_COND)); 305 jump_inst[0] = 0xd000 | (jump->flags & 0xf00) | (diff & 0xff); 306 return; 307 case 2: 308 /* Encoding T3 of 'B' instruction */ 309 SLJIT_ASSERT(diff <= 524287 && diff >= -524288 && (jump->flags & IS_COND)); 310 jump_inst[0] = 0xf000 | COPY_BITS(jump->flags, 8, 6, 4) | COPY_BITS(diff, 11, 0, 6) | COPY_BITS(diff, 19, 10, 1); 311 jump_inst[1] = 0x8000 | COPY_BITS(diff, 17, 13, 1) | COPY_BITS(diff, 18, 11, 1) | (diff & 0x7ff); 312 return; 313 case 3: 314 SLJIT_ASSERT(jump->flags & IS_COND); 315 *jump_inst++ = IT | ((jump->flags >> 4) & 0xf0) | 0x8; 316 diff--; 317 type = 5; 318 break; 319 case 4: 320 /* Encoding T2 of 'B' instruction */ 321 SLJIT_ASSERT(diff <= 1023 && diff >= -1024 && !(jump->flags & IS_COND)); 322 jump_inst[0] = 0xe000 | (diff & 0x7ff); 323 return; 324 } 325 326 SLJIT_ASSERT(diff <= 8388607 && diff >= -8388608); 327 328 /* Really complex instruction form for branches. */ 329 s = (diff >> 23) & 0x1; 330 j1 = (~(diff >> 21) ^ s) & 0x1; 331 j2 = (~(diff >> 22) ^ s) & 0x1; 332 jump_inst[0] = 0xf000 | (s << 10) | COPY_BITS(diff, 11, 0, 10); 333 jump_inst[1] = (j1 << 13) | (j2 << 11) | (diff & 0x7ff); 334 335 /* The others have a common form. */ 336 if (type == 5) /* Encoding T4 of 'B' instruction */ 337 jump_inst[1] |= 0x9000; 338 else if (type == 6) /* Encoding T1 of 'BL' instruction */ 339 jump_inst[1] |= 0xd000; 340 else 341 SLJIT_ASSERT_STOP(); 342 } 343 344 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) 345 { 346 struct sljit_memory_fragment *buf; 347 sljit_u16 *code; 348 sljit_u16 *code_ptr; 349 sljit_u16 *buf_ptr; 350 sljit_u16 *buf_end; 351 sljit_uw half_count; 352 353 struct sljit_label *label; 354 struct sljit_jump *jump; 355 struct sljit_const *const_; 356 357 CHECK_ERROR_PTR(); 358 CHECK_PTR(check_sljit_generate_code(compiler)); 359 reverse_buf(compiler); 360 361 code = (sljit_u16*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_u16)); 362 PTR_FAIL_WITH_EXEC_IF(code); 363 buf = compiler->buf; 364 365 code_ptr = code; 366 half_count = 0; 367 label = compiler->labels; 368 jump = compiler->jumps; 369 const_ = compiler->consts; 370 371 do { 372 buf_ptr = (sljit_u16*)buf->memory; 373 buf_end = buf_ptr + (buf->used_size >> 1); 374 do { 375 *code_ptr = *buf_ptr++; 376 /* These structures are ordered by their address. */ 377 SLJIT_ASSERT(!label || label->size >= half_count); 378 SLJIT_ASSERT(!jump || jump->addr >= half_count); 379 SLJIT_ASSERT(!const_ || const_->addr >= half_count); 380 if (label && label->size == half_count) { 381 label->addr = ((sljit_uw)code_ptr) | 0x1; 382 label->size = code_ptr - code; 383 label = label->next; 384 } 385 if (jump && jump->addr == half_count) { 386 jump->addr = (sljit_uw)code_ptr - ((jump->flags & IS_COND) ? 10 : 8); 387 code_ptr -= detect_jump_type(jump, code_ptr, code); 388 jump = jump->next; 389 } 390 if (const_ && const_->addr == half_count) { 391 const_->addr = (sljit_uw)code_ptr; 392 const_ = const_->next; 393 } 394 code_ptr ++; 395 half_count ++; 396 } while (buf_ptr < buf_end); 397 398 buf = buf->next; 399 } while (buf); 400 401 if (label && label->size == half_count) { 402 label->addr = ((sljit_uw)code_ptr) | 0x1; 403 label->size = code_ptr - code; 404 label = label->next; 405 } 406 407 SLJIT_ASSERT(!label); 408 SLJIT_ASSERT(!jump); 409 SLJIT_ASSERT(!const_); 410 SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); 411 412 jump = compiler->jumps; 413 while (jump) { 414 set_jump_instruction(jump); 415 jump = jump->next; 416 } 417 418 compiler->error = SLJIT_ERR_COMPILED; 419 compiler->executable_size = (code_ptr - code) * sizeof(sljit_u16); 420 SLJIT_CACHE_FLUSH(code, code_ptr); 421 /* Set thumb mode flag. */ 422 return (void*)((sljit_uw)code | 0x1); 423 } 424 425 /* --------------------------------------------------------------------- */ 426 /* Core code generator functions. */ 427 /* --------------------------------------------------------------------- */ 428 429 #define INVALID_IMM 0x80000000 430 static sljit_uw get_imm(sljit_uw imm) 431 { 432 /* Thumb immediate form. */ 433 sljit_s32 counter; 434 435 if (imm <= 0xff) 436 return imm; 437 438 if ((imm & 0xffff) == (imm >> 16)) { 439 /* Some special cases. */ 440 if (!(imm & 0xff00)) 441 return (1 << 12) | (imm & 0xff); 442 if (!(imm & 0xff)) 443 return (2 << 12) | ((imm >> 8) & 0xff); 444 if ((imm & 0xff00) == ((imm & 0xff) << 8)) 445 return (3 << 12) | (imm & 0xff); 446 } 447 448 /* Assembly optimization: count leading zeroes? */ 449 counter = 8; 450 if (!(imm & 0xffff0000)) { 451 counter += 16; 452 imm <<= 16; 453 } 454 if (!(imm & 0xff000000)) { 455 counter += 8; 456 imm <<= 8; 457 } 458 if (!(imm & 0xf0000000)) { 459 counter += 4; 460 imm <<= 4; 461 } 462 if (!(imm & 0xc0000000)) { 463 counter += 2; 464 imm <<= 2; 465 } 466 if (!(imm & 0x80000000)) { 467 counter += 1; 468 imm <<= 1; 469 } 470 /* Since imm >= 128, this must be true. */ 471 SLJIT_ASSERT(counter <= 31); 472 473 if (imm & 0x00ffffff) 474 return INVALID_IMM; /* Cannot be encoded. */ 475 476 return ((imm >> 24) & 0x7f) | COPY_BITS(counter, 4, 26, 1) | COPY_BITS(counter, 1, 12, 3) | COPY_BITS(counter, 0, 7, 1); 477 } 478 479 static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm) 480 { 481 sljit_uw tmp; 482 483 if (imm >= 0x10000) { 484 tmp = get_imm(imm); 485 if (tmp != INVALID_IMM) 486 return push_inst32(compiler, MOV_WI | RD4(dst) | tmp); 487 tmp = get_imm(~imm); 488 if (tmp != INVALID_IMM) 489 return push_inst32(compiler, MVN_WI | RD4(dst) | tmp); 490 } 491 492 /* set low 16 bits, set hi 16 bits to 0. */ 493 FAIL_IF(push_inst32(compiler, MOVW | RD4(dst) | 494 COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff))); 495 496 /* set hi 16 bit if needed. */ 497 if (imm >= 0x10000) 498 return push_inst32(compiler, MOVT | RD4(dst) | 499 COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16)); 500 return SLJIT_SUCCESS; 501 } 502 503 #define ARG1_IMM 0x0010000 504 #define ARG2_IMM 0x0020000 505 #define KEEP_FLAGS 0x0040000 506 /* SET_FLAGS must be 0x100000 as it is also the value of S bit (can be used for optimization). */ 507 #define SET_FLAGS 0x0100000 508 #define UNUSED_RETURN 0x0200000 509 #define SLOW_DEST 0x0400000 510 #define SLOW_SRC1 0x0800000 511 #define SLOW_SRC2 0x1000000 512 513 static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 dst, sljit_uw arg1, sljit_uw arg2) 514 { 515 /* dst must be register, TMP_REG1 516 arg1 must be register, TMP_REG1, imm 517 arg2 must be register, TMP_REG2, imm */ 518 sljit_s32 reg; 519 sljit_uw imm, nimm; 520 521 if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) { 522 /* Both are immediates. */ 523 flags &= ~ARG1_IMM; 524 FAIL_IF(load_immediate(compiler, TMP_REG1, arg1)); 525 arg1 = TMP_REG1; 526 } 527 528 if (flags & (ARG1_IMM | ARG2_IMM)) { 529 reg = (flags & ARG2_IMM) ? arg1 : arg2; 530 imm = (flags & ARG2_IMM) ? arg2 : arg1; 531 532 switch (flags & 0xffff) { 533 case SLJIT_CLZ: 534 case SLJIT_MUL: 535 /* No form with immediate operand. */ 536 break; 537 case SLJIT_MOV: 538 SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG1); 539 return load_immediate(compiler, dst, imm); 540 case SLJIT_NOT: 541 if (!(flags & SET_FLAGS)) 542 return load_immediate(compiler, dst, ~imm); 543 /* Since the flags should be set, we just fallback to the register mode. 544 Although some clever things could be done here, "NOT IMM" does not worth the efforts. */ 545 break; 546 case SLJIT_ADD: 547 nimm = -imm; 548 if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(reg, dst)) { 549 if (imm <= 0x7) 550 return push_inst16(compiler, ADDSI3 | IMM3(imm) | RD3(dst) | RN3(reg)); 551 if (nimm <= 0x7) 552 return push_inst16(compiler, SUBSI3 | IMM3(nimm) | RD3(dst) | RN3(reg)); 553 if (reg == dst) { 554 if (imm <= 0xff) 555 return push_inst16(compiler, ADDSI8 | IMM8(imm) | RDN3(dst)); 556 if (nimm <= 0xff) 557 return push_inst16(compiler, SUBSI8 | IMM8(nimm) | RDN3(dst)); 558 } 559 } 560 if (!(flags & SET_FLAGS)) { 561 if (imm <= 0xfff) 562 return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(imm)); 563 if (nimm <= 0xfff) 564 return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(nimm)); 565 } 566 imm = get_imm(imm); 567 if (imm != INVALID_IMM) 568 return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); 569 break; 570 case SLJIT_ADDC: 571 imm = get_imm(imm); 572 if (imm != INVALID_IMM) 573 return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); 574 break; 575 case SLJIT_SUB: 576 if (flags & ARG1_IMM) { 577 if (!(flags & KEEP_FLAGS) && imm == 0 && IS_2_LO_REGS(reg, dst)) 578 return push_inst16(compiler, RSBSI | RD3(dst) | RN3(reg)); 579 imm = get_imm(imm); 580 if (imm != INVALID_IMM) 581 return push_inst32(compiler, RSB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); 582 break; 583 } 584 nimm = -imm; 585 if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(reg, dst)) { 586 if (imm <= 0x7) 587 return push_inst16(compiler, SUBSI3 | IMM3(imm) | RD3(dst) | RN3(reg)); 588 if (nimm <= 0x7) 589 return push_inst16(compiler, ADDSI3 | IMM3(nimm) | RD3(dst) | RN3(reg)); 590 if (reg == dst) { 591 if (imm <= 0xff) 592 return push_inst16(compiler, SUBSI8 | IMM8(imm) | RDN3(dst)); 593 if (nimm <= 0xff) 594 return push_inst16(compiler, ADDSI8 | IMM8(nimm) | RDN3(dst)); 595 } 596 if (imm <= 0xff && (flags & UNUSED_RETURN)) 597 return push_inst16(compiler, CMPI | IMM8(imm) | RDN3(reg)); 598 } 599 if (!(flags & SET_FLAGS)) { 600 if (imm <= 0xfff) 601 return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(imm)); 602 if (nimm <= 0xfff) 603 return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(nimm)); 604 } 605 imm = get_imm(imm); 606 if (imm != INVALID_IMM) 607 return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); 608 break; 609 case SLJIT_SUBC: 610 if (flags & ARG1_IMM) 611 break; 612 imm = get_imm(imm); 613 if (imm != INVALID_IMM) 614 return push_inst32(compiler, SBCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); 615 break; 616 case SLJIT_AND: 617 nimm = get_imm(imm); 618 if (nimm != INVALID_IMM) 619 return push_inst32(compiler, ANDI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm); 620 imm = get_imm(imm); 621 if (imm != INVALID_IMM) 622 return push_inst32(compiler, BICI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); 623 break; 624 case SLJIT_OR: 625 nimm = get_imm(imm); 626 if (nimm != INVALID_IMM) 627 return push_inst32(compiler, ORRI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm); 628 imm = get_imm(imm); 629 if (imm != INVALID_IMM) 630 return push_inst32(compiler, ORNI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); 631 break; 632 case SLJIT_XOR: 633 imm = get_imm(imm); 634 if (imm != INVALID_IMM) 635 return push_inst32(compiler, EORI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); 636 break; 637 case SLJIT_SHL: 638 case SLJIT_LSHR: 639 case SLJIT_ASHR: 640 if (flags & ARG1_IMM) 641 break; 642 imm &= 0x1f; 643 if (imm == 0) { 644 if (!(flags & SET_FLAGS)) 645 return push_inst16(compiler, MOV | SET_REGS44(dst, reg)); 646 if (IS_2_LO_REGS(dst, reg)) 647 return push_inst16(compiler, MOVS | RD3(dst) | RN3(reg)); 648 return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(dst) | RM4(reg)); 649 } 650 switch (flags & 0xffff) { 651 case SLJIT_SHL: 652 if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg)) 653 return push_inst16(compiler, LSLSI | RD3(dst) | RN3(reg) | (imm << 6)); 654 return push_inst32(compiler, LSL_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm)); 655 case SLJIT_LSHR: 656 if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg)) 657 return push_inst16(compiler, LSRSI | RD3(dst) | RN3(reg) | (imm << 6)); 658 return push_inst32(compiler, LSR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm)); 659 default: /* SLJIT_ASHR */ 660 if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg)) 661 return push_inst16(compiler, ASRSI | RD3(dst) | RN3(reg) | (imm << 6)); 662 return push_inst32(compiler, ASR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm)); 663 } 664 default: 665 SLJIT_ASSERT_STOP(); 666 break; 667 } 668 669 if (flags & ARG2_IMM) { 670 FAIL_IF(load_immediate(compiler, TMP_REG2, arg2)); 671 arg2 = TMP_REG2; 672 } 673 else { 674 FAIL_IF(load_immediate(compiler, TMP_REG1, arg1)); 675 arg1 = TMP_REG1; 676 } 677 } 678 679 /* Both arguments are registers. */ 680 switch (flags & 0xffff) { 681 case SLJIT_MOV: 682 case SLJIT_MOV_U32: 683 case SLJIT_MOV_S32: 684 case SLJIT_MOV_P: 685 case SLJIT_MOVU: 686 case SLJIT_MOVU_U32: 687 case SLJIT_MOVU_S32: 688 case SLJIT_MOVU_P: 689 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); 690 if (dst == arg2) 691 return SLJIT_SUCCESS; 692 return push_inst16(compiler, MOV | SET_REGS44(dst, arg2)); 693 case SLJIT_MOV_U8: 694 case SLJIT_MOVU_U8: 695 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); 696 if (IS_2_LO_REGS(dst, arg2)) 697 return push_inst16(compiler, UXTB | RD3(dst) | RN3(arg2)); 698 return push_inst32(compiler, UXTB_W | RD4(dst) | RM4(arg2)); 699 case SLJIT_MOV_S8: 700 case SLJIT_MOVU_S8: 701 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); 702 if (IS_2_LO_REGS(dst, arg2)) 703 return push_inst16(compiler, SXTB | RD3(dst) | RN3(arg2)); 704 return push_inst32(compiler, SXTB_W | RD4(dst) | RM4(arg2)); 705 case SLJIT_MOV_U16: 706 case SLJIT_MOVU_U16: 707 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); 708 if (IS_2_LO_REGS(dst, arg2)) 709 return push_inst16(compiler, UXTH | RD3(dst) | RN3(arg2)); 710 return push_inst32(compiler, UXTH_W | RD4(dst) | RM4(arg2)); 711 case SLJIT_MOV_S16: 712 case SLJIT_MOVU_S16: 713 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); 714 if (IS_2_LO_REGS(dst, arg2)) 715 return push_inst16(compiler, SXTH | RD3(dst) | RN3(arg2)); 716 return push_inst32(compiler, SXTH_W | RD4(dst) | RM4(arg2)); 717 case SLJIT_NOT: 718 SLJIT_ASSERT(arg1 == TMP_REG1); 719 if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) 720 return push_inst16(compiler, MVNS | RD3(dst) | RN3(arg2)); 721 return push_inst32(compiler, MVN_W | (flags & SET_FLAGS) | RD4(dst) | RM4(arg2)); 722 case SLJIT_CLZ: 723 SLJIT_ASSERT(arg1 == TMP_REG1); 724 FAIL_IF(push_inst32(compiler, CLZ | RN4(arg2) | RD4(dst) | RM4(arg2))); 725 if (flags & SET_FLAGS) { 726 if (reg_map[dst] <= 7) 727 return push_inst16(compiler, CMPI | RDN3(dst)); 728 return push_inst32(compiler, ADD_WI | SET_FLAGS | RN4(dst) | RD4(dst)); 729 } 730 return SLJIT_SUCCESS; 731 case SLJIT_ADD: 732 if (!(flags & KEEP_FLAGS) && IS_3_LO_REGS(dst, arg1, arg2)) 733 return push_inst16(compiler, ADDS | RD3(dst) | RN3(arg1) | RM3(arg2)); 734 if (dst == arg1 && !(flags & SET_FLAGS)) 735 return push_inst16(compiler, ADD | SET_REGS44(dst, arg2)); 736 return push_inst32(compiler, ADD_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); 737 case SLJIT_ADDC: 738 if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) 739 return push_inst16(compiler, ADCS | RD3(dst) | RN3(arg2)); 740 return push_inst32(compiler, ADC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); 741 case SLJIT_SUB: 742 if (!(flags & KEEP_FLAGS) && IS_3_LO_REGS(dst, arg1, arg2)) 743 return push_inst16(compiler, SUBS | RD3(dst) | RN3(arg1) | RM3(arg2)); 744 return push_inst32(compiler, SUB_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); 745 case SLJIT_SUBC: 746 if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) 747 return push_inst16(compiler, SBCS | RD3(dst) | RN3(arg2)); 748 return push_inst32(compiler, SBC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); 749 case SLJIT_MUL: 750 if (!(flags & SET_FLAGS)) 751 return push_inst32(compiler, MUL | RD4(dst) | RN4(arg1) | RM4(arg2)); 752 SLJIT_ASSERT(reg_map[TMP_REG2] <= 7 && dst != TMP_REG2); 753 FAIL_IF(push_inst32(compiler, SMULL | RT4(dst) | RD4(TMP_REG2) | RN4(arg1) | RM4(arg2))); 754 /* cmp TMP_REG2, dst asr #31. */ 755 return push_inst32(compiler, CMP_W | RN4(TMP_REG2) | 0x70e0 | RM4(dst)); 756 case SLJIT_AND: 757 if (!(flags & KEEP_FLAGS)) { 758 if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) 759 return push_inst16(compiler, ANDS | RD3(dst) | RN3(arg2)); 760 if ((flags & UNUSED_RETURN) && IS_2_LO_REGS(arg1, arg2)) 761 return push_inst16(compiler, TST | RD3(arg1) | RN3(arg2)); 762 } 763 return push_inst32(compiler, AND_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); 764 case SLJIT_OR: 765 if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) 766 return push_inst16(compiler, ORRS | RD3(dst) | RN3(arg2)); 767 return push_inst32(compiler, ORR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); 768 case SLJIT_XOR: 769 if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) 770 return push_inst16(compiler, EORS | RD3(dst) | RN3(arg2)); 771 return push_inst32(compiler, EOR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); 772 case SLJIT_SHL: 773 if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) 774 return push_inst16(compiler, LSLS | RD3(dst) | RN3(arg2)); 775 return push_inst32(compiler, LSL_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); 776 case SLJIT_LSHR: 777 if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) 778 return push_inst16(compiler, LSRS | RD3(dst) | RN3(arg2)); 779 return push_inst32(compiler, LSR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); 780 case SLJIT_ASHR: 781 if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) 782 return push_inst16(compiler, ASRS | RD3(dst) | RN3(arg2)); 783 return push_inst32(compiler, ASR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); 784 } 785 786 SLJIT_ASSERT_STOP(); 787 return SLJIT_SUCCESS; 788 } 789 790 #define STORE 0x01 791 #define SIGNED 0x02 792 793 #define WORD_SIZE 0x00 794 #define BYTE_SIZE 0x04 795 #define HALF_SIZE 0x08 796 797 #define UPDATE 0x10 798 #define ARG_TEST 0x20 799 800 #define IS_WORD_SIZE(flags) (!(flags & (BYTE_SIZE | HALF_SIZE))) 801 #define OFFSET_CHECK(imm, shift) (!(argw & ~(imm << shift))) 802 803 /* 804 1st letter: 805 w = word 806 b = byte 807 h = half 808 809 2nd letter: 810 s = signed 811 u = unsigned 812 813 3rd letter: 814 l = load 815 s = store 816 */ 817 818 static const sljit_ins sljit_mem16[12] = { 819 /* w u l */ 0x5800 /* ldr */, 820 /* w u s */ 0x5000 /* str */, 821 /* w s l */ 0x5800 /* ldr */, 822 /* w s s */ 0x5000 /* str */, 823 824 /* b u l */ 0x5c00 /* ldrb */, 825 /* b u s */ 0x5400 /* strb */, 826 /* b s l */ 0x5600 /* ldrsb */, 827 /* b s s */ 0x5400 /* strb */, 828 829 /* h u l */ 0x5a00 /* ldrh */, 830 /* h u s */ 0x5200 /* strh */, 831 /* h s l */ 0x5e00 /* ldrsh */, 832 /* h s s */ 0x5200 /* strh */, 833 }; 834 835 static const sljit_ins sljit_mem16_imm5[12] = { 836 /* w u l */ 0x6800 /* ldr imm5 */, 837 /* w u s */ 0x6000 /* str imm5 */, 838 /* w s l */ 0x6800 /* ldr imm5 */, 839 /* w s s */ 0x6000 /* str imm5 */, 840 841 /* b u l */ 0x7800 /* ldrb imm5 */, 842 /* b u s */ 0x7000 /* strb imm5 */, 843 /* b s l */ 0x0000 /* not allowed */, 844 /* b s s */ 0x7000 /* strb imm5 */, 845 846 /* h u l */ 0x8800 /* ldrh imm5 */, 847 /* h u s */ 0x8000 /* strh imm5 */, 848 /* h s l */ 0x0000 /* not allowed */, 849 /* h s s */ 0x8000 /* strh imm5 */, 850 }; 851 852 #define MEM_IMM8 0xc00 853 #define MEM_IMM12 0x800000 854 static const sljit_ins sljit_mem32[12] = { 855 /* w u l */ 0xf8500000 /* ldr.w */, 856 /* w u s */ 0xf8400000 /* str.w */, 857 /* w s l */ 0xf8500000 /* ldr.w */, 858 /* w s s */ 0xf8400000 /* str.w */, 859 860 /* b u l */ 0xf8100000 /* ldrb.w */, 861 /* b u s */ 0xf8000000 /* strb.w */, 862 /* b s l */ 0xf9100000 /* ldrsb.w */, 863 /* b s s */ 0xf8000000 /* strb.w */, 864 865 /* h u l */ 0xf8300000 /* ldrh.w */, 866 /* h u s */ 0xf8200000 /* strsh.w */, 867 /* h s l */ 0xf9300000 /* ldrsh.w */, 868 /* h s s */ 0xf8200000 /* strsh.w */, 869 }; 870 871 /* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */ 872 static sljit_s32 emit_set_delta(struct sljit_compiler *compiler, sljit_s32 dst, sljit_s32 reg, sljit_sw value) 873 { 874 if (value >= 0) { 875 if (value <= 0xfff) 876 return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(value)); 877 value = get_imm(value); 878 if (value != INVALID_IMM) 879 return push_inst32(compiler, ADD_WI | RD4(dst) | RN4(reg) | value); 880 } 881 else { 882 value = -value; 883 if (value <= 0xfff) 884 return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(value)); 885 value = get_imm(value); 886 if (value != INVALID_IMM) 887 return push_inst32(compiler, SUB_WI | RD4(dst) | RN4(reg) | value); 888 } 889 return SLJIT_ERR_UNSUPPORTED; 890 } 891 892 /* Can perform an operation using at most 1 instruction. */ 893 static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) 894 { 895 sljit_s32 other_r, shift; 896 897 SLJIT_ASSERT(arg & SLJIT_MEM); 898 899 if (SLJIT_UNLIKELY(flags & UPDATE)) { 900 if ((arg & REG_MASK) && !(arg & OFFS_REG_MASK) && argw <= 0xff && argw >= -0xff) { 901 if (SLJIT_UNLIKELY(flags & ARG_TEST)) 902 return 1; 903 904 flags &= ~UPDATE; 905 arg &= 0xf; 906 if (argw >= 0) 907 argw |= 0x200; 908 else { 909 argw = -argw; 910 } 911 912 SLJIT_ASSERT(argw >= 0 && (argw & 0xff) <= 0xff); 913 FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | 0x100 | argw)); 914 return -1; 915 } 916 return 0; 917 } 918 919 if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { 920 if (SLJIT_UNLIKELY(flags & ARG_TEST)) 921 return 1; 922 923 argw &= 0x3; 924 other_r = OFFS_REG(arg); 925 arg &= 0xf; 926 927 if (!argw && IS_3_LO_REGS(reg, arg, other_r)) 928 FAIL_IF(push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r))); 929 else 930 FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r) | (argw << 4))); 931 return -1; 932 } 933 934 if (!(arg & REG_MASK) || argw > 0xfff || argw < -0xff) 935 return 0; 936 937 if (SLJIT_UNLIKELY(flags & ARG_TEST)) 938 return 1; 939 940 arg &= 0xf; 941 if (IS_2_LO_REGS(reg, arg) && sljit_mem16_imm5[flags]) { 942 shift = 3; 943 if (IS_WORD_SIZE(flags)) { 944 if (OFFSET_CHECK(0x1f, 2)) 945 shift = 2; 946 } 947 else if (flags & BYTE_SIZE) 948 { 949 if (OFFSET_CHECK(0x1f, 0)) 950 shift = 0; 951 } 952 else { 953 SLJIT_ASSERT(flags & HALF_SIZE); 954 if (OFFSET_CHECK(0x1f, 1)) 955 shift = 1; 956 } 957 958 if (shift != 3) { 959 FAIL_IF(push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(arg) | (argw << (6 - shift)))); 960 return -1; 961 } 962 } 963 964 /* SP based immediate. */ 965 if (SLJIT_UNLIKELY(arg == SLJIT_SP) && OFFSET_CHECK(0xff, 2) && IS_WORD_SIZE(flags) && reg_map[reg] <= 7) { 966 FAIL_IF(push_inst16(compiler, STR_SP | ((flags & STORE) ? 0 : 0x800) | RDN3(reg) | (argw >> 2))); 967 return -1; 968 } 969 970 if (argw >= 0) 971 FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | argw)); 972 else 973 FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | -argw)); 974 return -1; 975 } 976 977 /* see getput_arg below. 978 Note: can_cache is called only for binary operators. Those 979 operators always uses word arguments without write back. */ 980 static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) 981 { 982 sljit_sw diff; 983 if ((arg & OFFS_REG_MASK) || !(next_arg & SLJIT_MEM)) 984 return 0; 985 986 if (!(arg & REG_MASK)) { 987 diff = argw - next_argw; 988 if (diff <= 0xfff && diff >= -0xfff) 989 return 1; 990 return 0; 991 } 992 993 if (argw == next_argw) 994 return 1; 995 996 diff = argw - next_argw; 997 if (arg == next_arg && diff <= 0xfff && diff >= -0xfff) 998 return 1; 999 1000 return 0; 1001 } 1002 1003 /* Emit the necessary instructions. See can_cache above. */ 1004 static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, 1005 sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) 1006 { 1007 sljit_s32 tmp_r, other_r; 1008 sljit_sw diff; 1009 1010 SLJIT_ASSERT(arg & SLJIT_MEM); 1011 if (!(next_arg & SLJIT_MEM)) { 1012 next_arg = 0; 1013 next_argw = 0; 1014 } 1015 1016 tmp_r = (flags & STORE) ? TMP_REG3 : reg; 1017 1018 if (SLJIT_UNLIKELY((flags & UPDATE) && (arg & REG_MASK))) { 1019 /* Update only applies if a base register exists. */ 1020 /* There is no caching here. */ 1021 other_r = OFFS_REG(arg); 1022 arg &= 0xf; 1023 flags &= ~UPDATE; 1024 1025 if (!other_r) { 1026 if (!(argw & ~0xfff)) { 1027 FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | argw)); 1028 return push_inst32(compiler, ADDWI | RD4(arg) | RN4(arg) | IMM12(argw)); 1029 } 1030 1031 if (compiler->cache_arg == SLJIT_MEM) { 1032 if (argw == compiler->cache_argw) { 1033 other_r = TMP_REG3; 1034 argw = 0; 1035 } 1036 else if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) { 1037 FAIL_IF(compiler->error); 1038 compiler->cache_argw = argw; 1039 other_r = TMP_REG3; 1040 argw = 0; 1041 } 1042 } 1043 1044 if (argw) { 1045 FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); 1046 compiler->cache_arg = SLJIT_MEM; 1047 compiler->cache_argw = argw; 1048 other_r = TMP_REG3; 1049 argw = 0; 1050 } 1051 } 1052 1053 argw &= 0x3; 1054 if (!argw && IS_3_LO_REGS(reg, arg, other_r)) { 1055 FAIL_IF(push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r))); 1056 return push_inst16(compiler, ADD | SET_REGS44(arg, other_r)); 1057 } 1058 FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r) | (argw << 4))); 1059 return push_inst32(compiler, ADD_W | RD4(arg) | RN4(arg) | RM4(other_r) | (argw << 6)); 1060 } 1061 flags &= ~UPDATE; 1062 1063 SLJIT_ASSERT(!(arg & OFFS_REG_MASK)); 1064 1065 if (compiler->cache_arg == arg) { 1066 diff = argw - compiler->cache_argw; 1067 if (!(diff & ~0xfff)) 1068 return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(TMP_REG3) | diff); 1069 if (!((compiler->cache_argw - argw) & ~0xff)) 1070 return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(TMP_REG3) | (compiler->cache_argw - argw)); 1071 if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, diff) != SLJIT_ERR_UNSUPPORTED) { 1072 FAIL_IF(compiler->error); 1073 return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(TMP_REG3) | 0); 1074 } 1075 } 1076 1077 next_arg = (arg & REG_MASK) && (arg == next_arg) && (argw != next_argw); 1078 arg &= 0xf; 1079 if (arg && compiler->cache_arg == SLJIT_MEM) { 1080 if (compiler->cache_argw == argw) 1081 return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(TMP_REG3)); 1082 if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) { 1083 FAIL_IF(compiler->error); 1084 compiler->cache_argw = argw; 1085 return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(TMP_REG3)); 1086 } 1087 } 1088 1089 compiler->cache_argw = argw; 1090 if (next_arg && emit_set_delta(compiler, TMP_REG3, arg, argw) != SLJIT_ERR_UNSUPPORTED) { 1091 FAIL_IF(compiler->error); 1092 compiler->cache_arg = SLJIT_MEM | arg; 1093 arg = 0; 1094 } 1095 else { 1096 FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); 1097 compiler->cache_arg = SLJIT_MEM; 1098 1099 diff = argw - next_argw; 1100 if (next_arg && diff <= 0xfff && diff >= -0xfff) { 1101 FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG3, arg))); 1102 compiler->cache_arg = SLJIT_MEM | arg; 1103 arg = 0; 1104 } 1105 } 1106 1107 if (arg) 1108 return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(TMP_REG3)); 1109 return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(TMP_REG3) | 0); 1110 } 1111 1112 static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) 1113 { 1114 if (getput_arg_fast(compiler, flags, reg, arg, argw)) 1115 return compiler->error; 1116 compiler->cache_arg = 0; 1117 compiler->cache_argw = 0; 1118 return getput_arg(compiler, flags, reg, arg, argw, 0, 0); 1119 } 1120 1121 static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w) 1122 { 1123 if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) 1124 return compiler->error; 1125 return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); 1126 } 1127 1128 /* --------------------------------------------------------------------- */ 1129 /* Entry, exit */ 1130 /* --------------------------------------------------------------------- */ 1131 1132 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, 1133 sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, 1134 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) 1135 { 1136 sljit_s32 size, i, tmp; 1137 sljit_ins push; 1138 1139 CHECK_ERROR(); 1140 CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); 1141 set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); 1142 1143 push = (1 << 4); 1144 1145 tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; 1146 for (i = SLJIT_S0; i >= tmp; i--) 1147 push |= 1 << reg_map[i]; 1148 1149 for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) 1150 push |= 1 << reg_map[i]; 1151 1152 FAIL_IF((push & 0xff00) 1153 ? push_inst32(compiler, PUSH_W | (1 << 14) | push) 1154 : push_inst16(compiler, PUSH | (1 << 8) | push)); 1155 1156 /* Stack must be aligned to 8 bytes: (LR, R4) */ 1157 size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2); 1158 local_size = ((size + local_size + 7) & ~7) - size; 1159 compiler->local_size = local_size; 1160 if (local_size > 0) { 1161 if (local_size <= (127 << 2)) 1162 FAIL_IF(push_inst16(compiler, SUB_SP | (local_size >> 2))); 1163 else 1164 FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, local_size)); 1165 } 1166 1167 if (args >= 1) 1168 FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S0, SLJIT_R0))); 1169 if (args >= 2) 1170 FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S1, SLJIT_R1))); 1171 if (args >= 3) 1172 FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S2, SLJIT_R2))); 1173 1174 return SLJIT_SUCCESS; 1175 } 1176 1177 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, 1178 sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, 1179 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) 1180 { 1181 sljit_s32 size; 1182 1183 CHECK_ERROR(); 1184 CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); 1185 set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); 1186 1187 size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2); 1188 compiler->local_size = ((size + local_size + 7) & ~7) - size; 1189 return SLJIT_SUCCESS; 1190 } 1191 1192 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) 1193 { 1194 sljit_s32 i, tmp; 1195 sljit_ins pop; 1196 1197 CHECK_ERROR(); 1198 CHECK(check_sljit_emit_return(compiler, op, src, srcw)); 1199 1200 FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); 1201 1202 if (compiler->local_size > 0) { 1203 if (compiler->local_size <= (127 << 2)) 1204 FAIL_IF(push_inst16(compiler, ADD_SP | (compiler->local_size >> 2))); 1205 else 1206 FAIL_IF(emit_op_imm(compiler, SLJIT_ADD | ARG2_IMM, SLJIT_SP, SLJIT_SP, compiler->local_size)); 1207 } 1208 1209 pop = (1 << 4); 1210 1211 tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; 1212 for (i = SLJIT_S0; i >= tmp; i--) 1213 pop |= 1 << reg_map[i]; 1214 1215 for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) 1216 pop |= 1 << reg_map[i]; 1217 1218 return (pop & 0xff00) 1219 ? push_inst32(compiler, POP_W | (1 << 15) | pop) 1220 : push_inst16(compiler, POP | (1 << 8) | pop); 1221 } 1222 1223 /* --------------------------------------------------------------------- */ 1224 /* Operators */ 1225 /* --------------------------------------------------------------------- */ 1226 1227 #ifdef __cplusplus 1228 extern "C" { 1229 #endif 1230 1231 #if defined(__GNUC__) 1232 extern unsigned int __aeabi_uidivmod(unsigned int numerator, int unsigned denominator); 1233 extern int __aeabi_idivmod(int numerator, int denominator); 1234 #else 1235 #error "Software divmod functions are needed" 1236 #endif 1237 1238 #ifdef __cplusplus 1239 } 1240 #endif 1241 1242 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) 1243 { 1244 sljit_sw saved_reg_list[3]; 1245 sljit_sw saved_reg_count; 1246 1247 CHECK_ERROR(); 1248 CHECK(check_sljit_emit_op0(compiler, op)); 1249 1250 op = GET_OPCODE(op); 1251 switch (op) { 1252 case SLJIT_BREAKPOINT: 1253 return push_inst16(compiler, BKPT); 1254 case SLJIT_NOP: 1255 return push_inst16(compiler, NOP); 1256 case SLJIT_LMUL_UW: 1257 case SLJIT_LMUL_SW: 1258 return push_inst32(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL) 1259 | (reg_map[SLJIT_R1] << 8) 1260 | (reg_map[SLJIT_R0] << 12) 1261 | (reg_map[SLJIT_R0] << 16) 1262 | reg_map[SLJIT_R1]); 1263 case SLJIT_DIVMOD_UW: 1264 case SLJIT_DIVMOD_SW: 1265 case SLJIT_DIV_UW: 1266 case SLJIT_DIV_SW: 1267 SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments); 1268 SLJIT_COMPILE_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 12, bad_register_mapping); 1269 1270 saved_reg_count = 0; 1271 if (compiler->scratches >= 4) 1272 saved_reg_list[saved_reg_count++] = 12; 1273 if (compiler->scratches >= 3) 1274 saved_reg_list[saved_reg_count++] = 2; 1275 if (op >= SLJIT_DIV_UW) 1276 saved_reg_list[saved_reg_count++] = 1; 1277 1278 if (saved_reg_count > 0) { 1279 FAIL_IF(push_inst32(compiler, 0xf84d0d00 | (saved_reg_count >= 3 ? 16 : 8) 1280 | (saved_reg_list[0] << 12) /* str rX, [sp, #-8/-16]! */)); 1281 if (saved_reg_count >= 2) { 1282 SLJIT_ASSERT(saved_reg_list[1] < 8); 1283 FAIL_IF(push_inst16(compiler, 0x9001 | (saved_reg_list[1] << 8) /* str rX, [sp, #4] */)); 1284 } 1285 if (saved_reg_count >= 3) { 1286 SLJIT_ASSERT(saved_reg_list[2] < 8); 1287 FAIL_IF(push_inst16(compiler, 0x9002 | (saved_reg_list[2] << 8) /* str rX, [sp, #8] */)); 1288 } 1289 } 1290 1291 #if defined(__GNUC__) 1292 FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, 1293 ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod)))); 1294 #else 1295 #error "Software divmod functions are needed" 1296 #endif 1297 1298 if (saved_reg_count > 0) { 1299 if (saved_reg_count >= 3) { 1300 SLJIT_ASSERT(saved_reg_list[2] < 8); 1301 FAIL_IF(push_inst16(compiler, 0x9802 | (saved_reg_list[2] << 8) /* ldr rX, [sp, #8] */)); 1302 } 1303 if (saved_reg_count >= 2) { 1304 SLJIT_ASSERT(saved_reg_list[1] < 8); 1305 FAIL_IF(push_inst16(compiler, 0x9801 | (saved_reg_list[1] << 8) /* ldr rX, [sp, #4] */)); 1306 } 1307 return push_inst32(compiler, 0xf85d0b00 | (saved_reg_count >= 3 ? 16 : 8) 1308 | (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */); 1309 } 1310 return SLJIT_SUCCESS; 1311 } 1312 1313 return SLJIT_SUCCESS; 1314 } 1315 1316 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, 1317 sljit_s32 dst, sljit_sw dstw, 1318 sljit_s32 src, sljit_sw srcw) 1319 { 1320 sljit_s32 dst_r, flags; 1321 sljit_s32 op_flags = GET_ALL_FLAGS(op); 1322 1323 CHECK_ERROR(); 1324 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); 1325 ADJUST_LOCAL_OFFSET(dst, dstw); 1326 ADJUST_LOCAL_OFFSET(src, srcw); 1327 1328 compiler->cache_arg = 0; 1329 compiler->cache_argw = 0; 1330 1331 dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; 1332 1333 op = GET_OPCODE(op); 1334 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) { 1335 switch (op) { 1336 case SLJIT_MOV: 1337 case SLJIT_MOV_U32: 1338 case SLJIT_MOV_S32: 1339 case SLJIT_MOV_P: 1340 flags = WORD_SIZE; 1341 break; 1342 case SLJIT_MOV_U8: 1343 flags = BYTE_SIZE; 1344 if (src & SLJIT_IMM) 1345 srcw = (sljit_u8)srcw; 1346 break; 1347 case SLJIT_MOV_S8: 1348 flags = BYTE_SIZE | SIGNED; 1349 if (src & SLJIT_IMM) 1350 srcw = (sljit_s8)srcw; 1351 break; 1352 case SLJIT_MOV_U16: 1353 flags = HALF_SIZE; 1354 if (src & SLJIT_IMM) 1355 srcw = (sljit_u16)srcw; 1356 break; 1357 case SLJIT_MOV_S16: 1358 flags = HALF_SIZE | SIGNED; 1359 if (src & SLJIT_IMM) 1360 srcw = (sljit_s16)srcw; 1361 break; 1362 case SLJIT_MOVU: 1363 case SLJIT_MOVU_U32: 1364 case SLJIT_MOVU_S32: 1365 case SLJIT_MOVU_P: 1366 flags = WORD_SIZE | UPDATE; 1367 break; 1368 case SLJIT_MOVU_U8: 1369 flags = BYTE_SIZE | UPDATE; 1370 if (src & SLJIT_IMM) 1371 srcw = (sljit_u8)srcw; 1372 break; 1373 case SLJIT_MOVU_S8: 1374 flags = BYTE_SIZE | SIGNED | UPDATE; 1375 if (src & SLJIT_IMM) 1376 srcw = (sljit_s8)srcw; 1377 break; 1378 case SLJIT_MOVU_U16: 1379 flags = HALF_SIZE | UPDATE; 1380 if (src & SLJIT_IMM) 1381 srcw = (sljit_u16)srcw; 1382 break; 1383 case SLJIT_MOVU_S16: 1384 flags = HALF_SIZE | SIGNED | UPDATE; 1385 if (src & SLJIT_IMM) 1386 srcw = (sljit_s16)srcw; 1387 break; 1388 default: 1389 SLJIT_ASSERT_STOP(); 1390 flags = 0; 1391 break; 1392 } 1393 1394 if (src & SLJIT_IMM) 1395 FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG1, srcw)); 1396 else if (src & SLJIT_MEM) { 1397 if (getput_arg_fast(compiler, flags, dst_r, src, srcw)) 1398 FAIL_IF(compiler->error); 1399 else 1400 FAIL_IF(getput_arg(compiler, flags, dst_r, src, srcw, dst, dstw)); 1401 } else { 1402 if (dst_r != TMP_REG1) 1403 return emit_op_imm(compiler, op, dst_r, TMP_REG1, src); 1404 dst_r = src; 1405 } 1406 1407 if (dst & SLJIT_MEM) { 1408 if (getput_arg_fast(compiler, flags | STORE, dst_r, dst, dstw)) 1409 return compiler->error; 1410 else 1411 return getput_arg(compiler, flags | STORE, dst_r, dst, dstw, 0, 0); 1412 } 1413 return SLJIT_SUCCESS; 1414 } 1415 1416 if (op == SLJIT_NEG) { 1417 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ 1418 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) 1419 compiler->skip_checks = 1; 1420 #endif 1421 return sljit_emit_op2(compiler, SLJIT_SUB | op_flags, dst, dstw, SLJIT_IMM, 0, src, srcw); 1422 } 1423 1424 flags = (GET_FLAGS(op_flags) ? SET_FLAGS : 0) | ((op_flags & SLJIT_KEEP_FLAGS) ? KEEP_FLAGS : 0); 1425 if (src & SLJIT_MEM) { 1426 if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG2, src, srcw)) 1427 FAIL_IF(compiler->error); 1428 else 1429 FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src, srcw, dst, dstw)); 1430 src = TMP_REG2; 1431 } 1432 1433 if (src & SLJIT_IMM) 1434 flags |= ARG2_IMM; 1435 else 1436 srcw = src; 1437 1438 emit_op_imm(compiler, flags | op, dst_r, TMP_REG1, srcw); 1439 1440 if (dst & SLJIT_MEM) { 1441 if (getput_arg_fast(compiler, flags | STORE, dst_r, dst, dstw)) 1442 return compiler->error; 1443 else 1444 return getput_arg(compiler, flags | STORE, dst_r, dst, dstw, 0, 0); 1445 } 1446 return SLJIT_SUCCESS; 1447 } 1448 1449 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, 1450 sljit_s32 dst, sljit_sw dstw, 1451 sljit_s32 src1, sljit_sw src1w, 1452 sljit_s32 src2, sljit_sw src2w) 1453 { 1454 sljit_s32 dst_r, flags; 1455 1456 CHECK_ERROR(); 1457 CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); 1458 ADJUST_LOCAL_OFFSET(dst, dstw); 1459 ADJUST_LOCAL_OFFSET(src1, src1w); 1460 ADJUST_LOCAL_OFFSET(src2, src2w); 1461 1462 compiler->cache_arg = 0; 1463 compiler->cache_argw = 0; 1464 1465 dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; 1466 flags = (GET_FLAGS(op) ? SET_FLAGS : 0) | ((op & SLJIT_KEEP_FLAGS) ? KEEP_FLAGS : 0); 1467 1468 if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, WORD_SIZE | STORE | ARG_TEST, TMP_REG1, dst, dstw)) 1469 flags |= SLOW_DEST; 1470 1471 if (src1 & SLJIT_MEM) { 1472 if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG1, src1, src1w)) 1473 FAIL_IF(compiler->error); 1474 else 1475 flags |= SLOW_SRC1; 1476 } 1477 if (src2 & SLJIT_MEM) { 1478 if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG2, src2, src2w)) 1479 FAIL_IF(compiler->error); 1480 else 1481 flags |= SLOW_SRC2; 1482 } 1483 1484 if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { 1485 if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { 1486 FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src2, src2w, src1, src1w)); 1487 FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG1, src1, src1w, dst, dstw)); 1488 } 1489 else { 1490 FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG1, src1, src1w, src2, src2w)); 1491 FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src2, src2w, dst, dstw)); 1492 } 1493 } 1494 else if (flags & SLOW_SRC1) 1495 FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG1, src1, src1w, dst, dstw)); 1496 else if (flags & SLOW_SRC2) 1497 FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src2, src2w, dst, dstw)); 1498 1499 if (src1 & SLJIT_MEM) 1500 src1 = TMP_REG1; 1501 if (src2 & SLJIT_MEM) 1502 src2 = TMP_REG2; 1503 1504 if (src1 & SLJIT_IMM) 1505 flags |= ARG1_IMM; 1506 else 1507 src1w = src1; 1508 if (src2 & SLJIT_IMM) 1509 flags |= ARG2_IMM; 1510 else 1511 src2w = src2; 1512 1513 if (dst == SLJIT_UNUSED) 1514 flags |= UNUSED_RETURN; 1515 1516 emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src1w, src2w); 1517 1518 if (dst & SLJIT_MEM) { 1519 if (!(flags & SLOW_DEST)) { 1520 getput_arg_fast(compiler, WORD_SIZE | STORE, dst_r, dst, dstw); 1521 return compiler->error; 1522 } 1523 return getput_arg(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, 0, 0); 1524 } 1525 return SLJIT_SUCCESS; 1526 } 1527 1528 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) 1529 { 1530 CHECK_REG_INDEX(check_sljit_get_register_index(reg)); 1531 return reg_map[reg]; 1532 } 1533 1534 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) 1535 { 1536 CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); 1537 return reg << 1; 1538 } 1539 1540 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, 1541 void *instruction, sljit_s32 size) 1542 { 1543 CHECK_ERROR(); 1544 CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); 1545 1546 if (size == 2) 1547 return push_inst16(compiler, *(sljit_u16*)instruction); 1548 return push_inst32(compiler, *(sljit_ins*)instruction); 1549 } 1550 1551 /* --------------------------------------------------------------------- */ 1552 /* Floating point operators */ 1553 /* --------------------------------------------------------------------- */ 1554 1555 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) 1556 { 1557 #ifdef SLJIT_IS_FPU_AVAILABLE 1558 return SLJIT_IS_FPU_AVAILABLE; 1559 #else 1560 /* Available by default. */ 1561 return 1; 1562 #endif 1563 } 1564 1565 #define FPU_LOAD (1 << 20) 1566 1567 static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) 1568 { 1569 sljit_sw tmp; 1570 sljit_uw imm; 1571 sljit_sw inst = VSTR_F32 | (flags & (SLJIT_F32_OP | FPU_LOAD)); 1572 1573 SLJIT_ASSERT(arg & SLJIT_MEM); 1574 1575 /* Fast loads and stores. */ 1576 if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { 1577 FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG2) | RN4(arg & REG_MASK) | RM4(OFFS_REG(arg)) | ((argw & 0x3) << 6))); 1578 arg = SLJIT_MEM | TMP_REG2; 1579 argw = 0; 1580 } 1581 1582 if ((arg & REG_MASK) && (argw & 0x3) == 0) { 1583 if (!(argw & ~0x3fc)) 1584 return push_inst32(compiler, inst | 0x800000 | RN4(arg & REG_MASK) | DD4(reg) | (argw >> 2)); 1585 if (!(-argw & ~0x3fc)) 1586 return push_inst32(compiler, inst | RN4(arg & REG_MASK) | DD4(reg) | (-argw >> 2)); 1587 } 1588 1589 /* Slow cases */ 1590 SLJIT_ASSERT(!(arg & OFFS_REG_MASK)); 1591 if (compiler->cache_arg == arg) { 1592 tmp = argw - compiler->cache_argw; 1593 if (!(tmp & ~0x3fc)) 1594 return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg) | (tmp >> 2)); 1595 if (!(-tmp & ~0x3fc)) 1596 return push_inst32(compiler, inst | RN4(TMP_REG3) | DD4(reg) | (-tmp >> 2)); 1597 if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, tmp) != SLJIT_ERR_UNSUPPORTED) { 1598 FAIL_IF(compiler->error); 1599 compiler->cache_argw = argw; 1600 return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg)); 1601 } 1602 } 1603 1604 if (arg & REG_MASK) { 1605 if (emit_set_delta(compiler, TMP_REG1, arg & REG_MASK, argw) != SLJIT_ERR_UNSUPPORTED) { 1606 FAIL_IF(compiler->error); 1607 return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg)); 1608 } 1609 imm = get_imm(argw & ~0x3fc); 1610 if (imm != INVALID_IMM) { 1611 FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm)); 1612 return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg) | ((argw & 0x3fc) >> 2)); 1613 } 1614 imm = get_imm(-argw & ~0x3fc); 1615 if (imm != INVALID_IMM) { 1616 argw = -argw; 1617 FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm)); 1618 return push_inst32(compiler, inst | RN4(TMP_REG1) | DD4(reg) | ((argw & 0x3fc) >> 2)); 1619 } 1620 } 1621 1622 compiler->cache_arg = arg; 1623 compiler->cache_argw = argw; 1624 1625 FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); 1626 if (arg & REG_MASK) 1627 FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG3, (arg & REG_MASK)))); 1628 return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg)); 1629 } 1630 1631 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, 1632 sljit_s32 dst, sljit_sw dstw, 1633 sljit_s32 src, sljit_sw srcw) 1634 { 1635 if (src & SLJIT_MEM) { 1636 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src, srcw)); 1637 src = TMP_FREG1; 1638 } 1639 1640 FAIL_IF(push_inst32(compiler, VCVT_S32_F32 | (op & SLJIT_F32_OP) | DD4(TMP_FREG1) | DM4(src))); 1641 1642 if (dst == SLJIT_UNUSED) 1643 return SLJIT_SUCCESS; 1644 1645 if (FAST_IS_REG(dst)) 1646 return push_inst32(compiler, VMOV | (1 << 20) | RT4(dst) | DN4(TMP_FREG1)); 1647 1648 /* Store the integer value from a VFP register. */ 1649 return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw); 1650 } 1651 1652 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, 1653 sljit_s32 dst, sljit_sw dstw, 1654 sljit_s32 src, sljit_sw srcw) 1655 { 1656 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; 1657 1658 if (FAST_IS_REG(src)) 1659 FAIL_IF(push_inst32(compiler, VMOV | RT4(src) | DN4(TMP_FREG1))); 1660 else if (src & SLJIT_MEM) { 1661 /* Load the integer value into a VFP register. */ 1662 FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw)); 1663 } 1664 else { 1665 FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); 1666 FAIL_IF(push_inst32(compiler, VMOV | RT4(TMP_REG1) | DN4(TMP_FREG1))); 1667 } 1668 1669 FAIL_IF(push_inst32(compiler, VCVT_F32_S32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DM4(TMP_FREG1))); 1670 1671 if (dst & SLJIT_MEM) 1672 return emit_fop_mem(compiler, (op & SLJIT_F32_OP), TMP_FREG1, dst, dstw); 1673 return SLJIT_SUCCESS; 1674 } 1675 1676 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, 1677 sljit_s32 src1, sljit_sw src1w, 1678 sljit_s32 src2, sljit_sw src2w) 1679 { 1680 if (src1 & SLJIT_MEM) { 1681 emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w); 1682 src1 = TMP_FREG1; 1683 } 1684 1685 if (src2 & SLJIT_MEM) { 1686 emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG2, src2, src2w); 1687 src2 = TMP_FREG2; 1688 } 1689 1690 FAIL_IF(push_inst32(compiler, VCMP_F32 | (op & SLJIT_F32_OP) | DD4(src1) | DM4(src2))); 1691 return push_inst32(compiler, VMRS); 1692 } 1693 1694 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, 1695 sljit_s32 dst, sljit_sw dstw, 1696 sljit_s32 src, sljit_sw srcw) 1697 { 1698 sljit_s32 dst_r; 1699 1700 CHECK_ERROR(); 1701 compiler->cache_arg = 0; 1702 compiler->cache_argw = 0; 1703 if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32) 1704 op ^= SLJIT_F32_OP; 1705 1706 SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100), float_transfer_bit_error); 1707 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); 1708 1709 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; 1710 1711 if (src & SLJIT_MEM) { 1712 emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, dst_r, src, srcw); 1713 src = dst_r; 1714 } 1715 1716 switch (GET_OPCODE(op)) { 1717 case SLJIT_MOV_F64: 1718 if (src != dst_r) { 1719 if (dst_r != TMP_FREG1) 1720 FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DM4(src))); 1721 else 1722 dst_r = src; 1723 } 1724 break; 1725 case SLJIT_NEG_F64: 1726 FAIL_IF(push_inst32(compiler, VNEG_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DM4(src))); 1727 break; 1728 case SLJIT_ABS_F64: 1729 FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DM4(src))); 1730 break; 1731 case SLJIT_CONV_F64_FROM_F32: 1732 FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DM4(src))); 1733 op ^= SLJIT_F32_OP; 1734 break; 1735 } 1736 1737 if (dst & SLJIT_MEM) 1738 return emit_fop_mem(compiler, (op & SLJIT_F32_OP), dst_r, dst, dstw); 1739 return SLJIT_SUCCESS; 1740 } 1741 1742 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, 1743 sljit_s32 dst, sljit_sw dstw, 1744 sljit_s32 src1, sljit_sw src1w, 1745 sljit_s32 src2, sljit_sw src2w) 1746 { 1747 sljit_s32 dst_r; 1748 1749 CHECK_ERROR(); 1750 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); 1751 ADJUST_LOCAL_OFFSET(dst, dstw); 1752 ADJUST_LOCAL_OFFSET(src1, src1w); 1753 ADJUST_LOCAL_OFFSET(src2, src2w); 1754 1755 compiler->cache_arg = 0; 1756 compiler->cache_argw = 0; 1757 op ^= SLJIT_F32_OP; 1758 1759 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; 1760 if (src1 & SLJIT_MEM) { 1761 emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w); 1762 src1 = TMP_FREG1; 1763 } 1764 if (src2 & SLJIT_MEM) { 1765 emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG2, src2, src2w); 1766 src2 = TMP_FREG2; 1767 } 1768 1769 switch (GET_OPCODE(op)) { 1770 case SLJIT_ADD_F64: 1771 FAIL_IF(push_inst32(compiler, VADD_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); 1772 break; 1773 case SLJIT_SUB_F64: 1774 FAIL_IF(push_inst32(compiler, VSUB_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); 1775 break; 1776 case SLJIT_MUL_F64: 1777 FAIL_IF(push_inst32(compiler, VMUL_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); 1778 break; 1779 case SLJIT_DIV_F64: 1780 FAIL_IF(push_inst32(compiler, VDIV_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); 1781 break; 1782 } 1783 1784 if (!(dst & SLJIT_MEM)) 1785 return SLJIT_SUCCESS; 1786 return emit_fop_mem(compiler, (op & SLJIT_F32_OP), TMP_FREG1, dst, dstw); 1787 } 1788 1789 #undef FPU_LOAD 1790 1791 /* --------------------------------------------------------------------- */ 1792 /* Other instructions */ 1793 /* --------------------------------------------------------------------- */ 1794 1795 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) 1796 { 1797 CHECK_ERROR(); 1798 CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); 1799 ADJUST_LOCAL_OFFSET(dst, dstw); 1800 1801 /* For UNUSED dst. Uncommon, but possible. */ 1802 if (dst == SLJIT_UNUSED) 1803 return SLJIT_SUCCESS; 1804 1805 if (FAST_IS_REG(dst)) 1806 return push_inst16(compiler, MOV | SET_REGS44(dst, TMP_REG3)); 1807 1808 /* Memory. */ 1809 if (getput_arg_fast(compiler, WORD_SIZE | STORE, TMP_REG3, dst, dstw)) 1810 return compiler->error; 1811 /* TMP_REG3 is used for caching. */ 1812 FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG2, TMP_REG3))); 1813 compiler->cache_arg = 0; 1814 compiler->cache_argw = 0; 1815 return getput_arg(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, 0, 0); 1816 } 1817 1818 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) 1819 { 1820 CHECK_ERROR(); 1821 CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); 1822 ADJUST_LOCAL_OFFSET(src, srcw); 1823 1824 if (FAST_IS_REG(src)) 1825 FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG3, src))); 1826 else if (src & SLJIT_MEM) { 1827 if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG3, src, srcw)) 1828 FAIL_IF(compiler->error); 1829 else { 1830 compiler->cache_arg = 0; 1831 compiler->cache_argw = 0; 1832 FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src, srcw, 0, 0)); 1833 FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG3, TMP_REG2))); 1834 } 1835 } 1836 else if (src & SLJIT_IMM) 1837 FAIL_IF(load_immediate(compiler, TMP_REG3, srcw)); 1838 return push_inst16(compiler, BLX | RN3(TMP_REG3)); 1839 } 1840 1841 /* --------------------------------------------------------------------- */ 1842 /* Conditional instructions */ 1843 /* --------------------------------------------------------------------- */ 1844 1845 static sljit_uw get_cc(sljit_s32 type) 1846 { 1847 switch (type) { 1848 case SLJIT_EQUAL: 1849 case SLJIT_MUL_NOT_OVERFLOW: 1850 case SLJIT_EQUAL_F64: 1851 return 0x0; 1852 1853 case SLJIT_NOT_EQUAL: 1854 case SLJIT_MUL_OVERFLOW: 1855 case SLJIT_NOT_EQUAL_F64: 1856 return 0x1; 1857 1858 case SLJIT_LESS: 1859 case SLJIT_LESS_F64: 1860 return 0x3; 1861 1862 case SLJIT_GREATER_EQUAL: 1863 case SLJIT_GREATER_EQUAL_F64: 1864 return 0x2; 1865 1866 case SLJIT_GREATER: 1867 case SLJIT_GREATER_F64: 1868 return 0x8; 1869 1870 case SLJIT_LESS_EQUAL: 1871 case SLJIT_LESS_EQUAL_F64: 1872 return 0x9; 1873 1874 case SLJIT_SIG_LESS: 1875 return 0xb; 1876 1877 case SLJIT_SIG_GREATER_EQUAL: 1878 return 0xa; 1879 1880 case SLJIT_SIG_GREATER: 1881 return 0xc; 1882 1883 case SLJIT_SIG_LESS_EQUAL: 1884 return 0xd; 1885 1886 case SLJIT_OVERFLOW: 1887 case SLJIT_UNORDERED_F64: 1888 return 0x6; 1889 1890 case SLJIT_NOT_OVERFLOW: 1891 case SLJIT_ORDERED_F64: 1892 return 0x7; 1893 1894 default: /* SLJIT_JUMP */ 1895 SLJIT_ASSERT_STOP(); 1896 return 0xe; 1897 } 1898 } 1899 1900 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) 1901 { 1902 struct sljit_label *label; 1903 1904 CHECK_ERROR_PTR(); 1905 CHECK_PTR(check_sljit_emit_label(compiler)); 1906 1907 if (compiler->last_label && compiler->last_label->size == compiler->size) 1908 return compiler->last_label; 1909 1910 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); 1911 PTR_FAIL_IF(!label); 1912 set_label(label, compiler); 1913 return label; 1914 } 1915 1916 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) 1917 { 1918 struct sljit_jump *jump; 1919 sljit_ins cc; 1920 1921 CHECK_ERROR_PTR(); 1922 CHECK_PTR(check_sljit_emit_jump(compiler, type)); 1923 1924 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); 1925 PTR_FAIL_IF(!jump); 1926 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); 1927 type &= 0xff; 1928 1929 /* In ARM, we don't need to touch the arguments. */ 1930 PTR_FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0)); 1931 if (type < SLJIT_JUMP) { 1932 jump->flags |= IS_COND; 1933 cc = get_cc(type); 1934 jump->flags |= cc << 8; 1935 PTR_FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); 1936 } 1937 1938 jump->addr = compiler->size; 1939 if (type <= SLJIT_JUMP) 1940 PTR_FAIL_IF(push_inst16(compiler, BX | RN3(TMP_REG1))); 1941 else { 1942 jump->flags |= IS_BL; 1943 PTR_FAIL_IF(push_inst16(compiler, BLX | RN3(TMP_REG1))); 1944 } 1945 1946 return jump; 1947 } 1948 1949 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) 1950 { 1951 struct sljit_jump *jump; 1952 1953 CHECK_ERROR(); 1954 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); 1955 ADJUST_LOCAL_OFFSET(src, srcw); 1956 1957 /* In ARM, we don't need to touch the arguments. */ 1958 if (!(src & SLJIT_IMM)) { 1959 if (FAST_IS_REG(src)) 1960 return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(src)); 1961 1962 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, src, srcw)); 1963 if (type >= SLJIT_FAST_CALL) 1964 return push_inst16(compiler, BLX | RN3(TMP_REG1)); 1965 } 1966 1967 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); 1968 FAIL_IF(!jump); 1969 set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); 1970 jump->u.target = srcw; 1971 1972 FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0)); 1973 jump->addr = compiler->size; 1974 return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(TMP_REG1)); 1975 } 1976 1977 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, 1978 sljit_s32 dst, sljit_sw dstw, 1979 sljit_s32 src, sljit_sw srcw, 1980 sljit_s32 type) 1981 { 1982 sljit_s32 dst_r, flags = GET_ALL_FLAGS(op); 1983 sljit_ins cc, ins; 1984 1985 CHECK_ERROR(); 1986 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); 1987 ADJUST_LOCAL_OFFSET(dst, dstw); 1988 ADJUST_LOCAL_OFFSET(src, srcw); 1989 1990 if (dst == SLJIT_UNUSED) 1991 return SLJIT_SUCCESS; 1992 1993 op = GET_OPCODE(op); 1994 cc = get_cc(type & 0xff); 1995 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; 1996 1997 if (op < SLJIT_ADD) { 1998 FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4)); 1999 if (reg_map[dst_r] > 7) { 2000 FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 1)); 2001 FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 0)); 2002 } else { 2003 FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 1)); 2004 FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 0)); 2005 } 2006 if (dst_r != TMP_REG2) 2007 return SLJIT_SUCCESS; 2008 return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw); 2009 } 2010 2011 ins = (op == SLJIT_AND ? ANDI : (op == SLJIT_OR ? ORRI : EORI)); 2012 if ((op == SLJIT_OR || op == SLJIT_XOR) && FAST_IS_REG(dst) && dst == src) { 2013 /* Does not change the other bits. */ 2014 FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); 2015 FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst) | 1)); 2016 if (flags & SLJIT_SET_E) { 2017 /* The condition must always be set, even if the ORRI/EORI is not executed above. */ 2018 if (reg_map[dst] <= 7) 2019 return push_inst16(compiler, MOVS | RD3(TMP_REG1) | RN3(dst)); 2020 return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst)); 2021 } 2022 return SLJIT_SUCCESS; 2023 } 2024 2025 compiler->cache_arg = 0; 2026 compiler->cache_argw = 0; 2027 if (src & SLJIT_MEM) { 2028 FAIL_IF(emit_op_mem2(compiler, WORD_SIZE, TMP_REG2, src, srcw, dst, dstw)); 2029 src = TMP_REG2; 2030 srcw = 0; 2031 } else if (src & SLJIT_IMM) { 2032 FAIL_IF(load_immediate(compiler, TMP_REG2, srcw)); 2033 src = TMP_REG2; 2034 srcw = 0; 2035 } 2036 2037 if (op == SLJIT_AND || src != dst_r) { 2038 FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4)); 2039 FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst_r) | 1)); 2040 FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst_r) | 0)); 2041 } 2042 else { 2043 FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); 2044 FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst_r) | 1)); 2045 } 2046 2047 if (dst_r == TMP_REG2) 2048 FAIL_IF(emit_op_mem2(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, 0, 0)); 2049 2050 if (flags & SLJIT_SET_E) { 2051 /* The condition must always be set, even if the ORR/EORI is not executed above. */ 2052 if (reg_map[dst_r] <= 7) 2053 return push_inst16(compiler, MOVS | RD3(TMP_REG1) | RN3(dst_r)); 2054 return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst_r)); 2055 } 2056 return SLJIT_SUCCESS; 2057 } 2058 2059 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) 2060 { 2061 struct sljit_const *const_; 2062 sljit_s32 dst_r; 2063 2064 CHECK_ERROR_PTR(); 2065 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); 2066 ADJUST_LOCAL_OFFSET(dst, dstw); 2067 2068 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); 2069 PTR_FAIL_IF(!const_); 2070 set_const(const_, compiler); 2071 2072 dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; 2073 PTR_FAIL_IF(emit_imm32_const(compiler, dst_r, init_value)); 2074 2075 if (dst & SLJIT_MEM) 2076 PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw)); 2077 return const_; 2078 } 2079 2080 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) 2081 { 2082 sljit_u16 *inst = (sljit_u16*)addr; 2083 modify_imm32_const(inst, new_addr); 2084 SLJIT_CACHE_FLUSH(inst, inst + 4); 2085 } 2086 2087 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) 2088 { 2089 sljit_u16 *inst = (sljit_u16*)addr; 2090 modify_imm32_const(inst, new_constant); 2091 SLJIT_CACHE_FLUSH(inst, inst + 4); 2092 } 2093