1 /* $NetBSD: sljitNativeTILEGX_64.c,v 1.2 2014/06/17 19:33:20 alnsn Exp $ */ 2 3 /* 4 * Stack-less Just-In-Time compiler 5 * 6 * Copyright 2013-2013 Tilera Corporation(jiwang@tilera.com). All rights reserved. 7 * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without modification, are 10 * permitted provided that the following conditions are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright notice, this list of 13 * conditions and the following disclaimer. 14 * 15 * 2. Redistributions in binary form must reproduce the above copyright notice, this list 16 * of conditions and the following disclaimer in the documentation and/or other materials 17 * provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY 20 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 22 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 24 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 25 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 27 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /* TileGX architecture. */ 31 /* Contributed by Tilera Corporation. */ 32 #include "sljitNativeTILEGX-encoder.c" 33 34 #define SIMM_8BIT_MAX (0x7f) 35 #define SIMM_8BIT_MIN (-0x80) 36 #define SIMM_16BIT_MAX (0x7fff) 37 #define SIMM_16BIT_MIN (-0x8000) 38 #define SIMM_17BIT_MAX (0xffff) 39 #define SIMM_17BIT_MIN (-0x10000) 40 #define SIMM_32BIT_MIN (-0x80000000) 41 #define SIMM_32BIT_MAX (0x7fffffff) 42 #define SIMM_48BIT_MIN (0x800000000000L) 43 #define SIMM_48BIT_MAX (0x7fffffff0000L) 44 #define IMM16(imm) ((imm) & 0xffff) 45 46 #define UIMM_16BIT_MAX (0xffff) 47 48 #define TMP_REG1 (SLJIT_NO_REGISTERS + 1) 49 #define TMP_REG2 (SLJIT_NO_REGISTERS + 2) 50 #define TMP_REG3 (SLJIT_NO_REGISTERS + 3) 51 #define ADDR_TMP (SLJIT_NO_REGISTERS + 4) 52 #define PIC_ADDR_REG TMP_REG2 53 54 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { 55 63, 0, 1, 2, 3, 4, 30, 31, 32, 33, 34, 54, 5, 16, 6, 7 56 }; 57 58 #define SLJIT_LOCALS_REG_mapped 54 59 #define TMP_REG1_mapped 5 60 #define TMP_REG2_mapped 16 61 #define TMP_REG3_mapped 6 62 #define ADDR_TMP_mapped 7 63 #define SLJIT_SAVED_REG1_mapped 30 64 #define SLJIT_SAVED_REG2_mapped 31 65 #define SLJIT_SAVED_REG3_mapped 32 66 #define SLJIT_SAVED_EREG1_mapped 33 67 #define SLJIT_SAVED_EREG2_mapped 34 68 69 /* Flags are keept in volatile registers. */ 70 #define EQUAL_FLAG 8 71 /* And carry flag as well. */ 72 #define ULESS_FLAG 9 73 #define UGREATER_FLAG 10 74 #define LESS_FLAG 11 75 #define GREATER_FLAG 12 76 #define OVERFLOW_FLAG 13 77 78 #define ZERO 63 79 #define RA 55 80 #define TMP_EREG1 14 81 #define TMP_EREG2 15 82 83 #define LOAD_DATA 0x01 84 #define WORD_DATA 0x00 85 #define BYTE_DATA 0x02 86 #define HALF_DATA 0x04 87 #define INT_DATA 0x06 88 #define SIGNED_DATA 0x08 89 #define DOUBLE_DATA 0x10 90 91 /* Separates integer and floating point registers */ 92 #define GPR_REG 0xf 93 94 #define MEM_MASK 0x1f 95 96 #define WRITE_BACK 0x00020 97 #define ARG_TEST 0x00040 98 #define ALT_KEEP_CACHE 0x00080 99 #define CUMULATIVE_OP 0x00100 100 #define LOGICAL_OP 0x00200 101 #define IMM_OP 0x00400 102 #define SRC2_IMM 0x00800 103 104 #define UNUSED_DEST 0x01000 105 #define REG_DEST 0x02000 106 #define REG1_SOURCE 0x04000 107 #define REG2_SOURCE 0x08000 108 #define SLOW_SRC1 0x10000 109 #define SLOW_SRC2 0x20000 110 #define SLOW_DEST 0x40000 111 112 /* Only these flags are set. UNUSED_DEST is not set when no flags should be set. 113 */ 114 #define CHECK_FLAGS(list) (!(flags & UNUSED_DEST) || (op & GET_FLAGS(~(list)))) 115 116 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char *sljit_get_platform_name(void) 117 { 118 return "TileGX" SLJIT_CPUINFO; 119 } 120 121 /* Length of an instruction word */ 122 typedef sljit_uw sljit_ins; 123 124 struct jit_instr { 125 const struct tilegx_opcode* opcode; 126 tilegx_pipeline pipe; 127 unsigned long input_registers; 128 unsigned long output_registers; 129 int operand_value[4]; 130 int line; 131 }; 132 133 /* Opcode Helper Macros */ 134 #define TILEGX_X_MODE 0 135 136 #define X_MODE create_Mode(TILEGX_X_MODE) 137 138 #define FNOP_X0 \ 139 create_Opcode_X0(RRR_0_OPCODE_X0) | \ 140 create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \ 141 create_UnaryOpcodeExtension_X0(FNOP_UNARY_OPCODE_X0) 142 143 #define FNOP_X1 \ 144 create_Opcode_X1(RRR_0_OPCODE_X1) | \ 145 create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \ 146 create_UnaryOpcodeExtension_X1(FNOP_UNARY_OPCODE_X1) 147 148 #define NOP \ 149 create_Mode(TILEGX_X_MODE) | FNOP_X0 | FNOP_X1 150 151 #define ANOP_X0 \ 152 create_Opcode_X0(RRR_0_OPCODE_X0) | \ 153 create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \ 154 create_UnaryOpcodeExtension_X0(NOP_UNARY_OPCODE_X0) 155 156 #define BPT create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 157 create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \ 158 create_UnaryOpcodeExtension_X1(ILL_UNARY_OPCODE_X1) | \ 159 create_Dest_X1(0x1C) | create_SrcA_X1(0x25) | ANOP_X0 160 161 #define ADD_X1 \ 162 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 163 create_RRROpcodeExtension_X1(ADD_RRR_0_OPCODE_X1) | FNOP_X0 164 165 #define ADDI_X1 \ 166 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \ 167 create_Imm8OpcodeExtension_X1(ADDI_IMM8_OPCODE_X1) | FNOP_X0 168 169 #define SUB_X1 \ 170 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 171 create_RRROpcodeExtension_X1(SUB_RRR_0_OPCODE_X1) | FNOP_X0 172 173 #define NOR_X1 \ 174 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 175 create_RRROpcodeExtension_X1(NOR_RRR_0_OPCODE_X1) | FNOP_X0 176 177 #define OR_X1 \ 178 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 179 create_RRROpcodeExtension_X1(OR_RRR_0_OPCODE_X1) | FNOP_X0 180 181 #define AND_X1 \ 182 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 183 create_RRROpcodeExtension_X1(AND_RRR_0_OPCODE_X1) | FNOP_X0 184 185 #define XOR_X1 \ 186 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 187 create_RRROpcodeExtension_X1(XOR_RRR_0_OPCODE_X1) | FNOP_X0 188 189 #define CMOVNEZ_X0 \ 190 create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \ 191 create_RRROpcodeExtension_X0(CMOVNEZ_RRR_0_OPCODE_X0) | FNOP_X1 192 193 #define CMOVEQZ_X0 \ 194 create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \ 195 create_RRROpcodeExtension_X0(CMOVEQZ_RRR_0_OPCODE_X0) | FNOP_X1 196 197 #define ADDLI_X1 \ 198 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(ADDLI_OPCODE_X1) | FNOP_X0 199 200 #define V4INT_L_X1 \ 201 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 202 create_RRROpcodeExtension_X1(V4INT_L_RRR_0_OPCODE_X1) | FNOP_X0 203 204 #define BFEXTU_X0 \ 205 create_Mode(TILEGX_X_MODE) | create_Opcode_X0(BF_OPCODE_X0) | \ 206 create_BFOpcodeExtension_X0(BFEXTU_BF_OPCODE_X0) | FNOP_X1 207 208 #define BFEXTS_X0 \ 209 create_Mode(TILEGX_X_MODE) | create_Opcode_X0(BF_OPCODE_X0) | \ 210 create_BFOpcodeExtension_X0(BFEXTS_BF_OPCODE_X0) | FNOP_X1 211 212 #define SHL16INSLI_X1 \ 213 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHL16INSLI_OPCODE_X1) | FNOP_X0 214 215 #define ST_X1 \ 216 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 217 create_RRROpcodeExtension_X1(ST_RRR_0_OPCODE_X1) | create_Dest_X1(0x0) | FNOP_X0 218 219 #define LD_X1 \ 220 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 221 create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \ 222 create_UnaryOpcodeExtension_X1(LD_UNARY_OPCODE_X1) | FNOP_X0 223 224 #define JR_X1 \ 225 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 226 create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \ 227 create_UnaryOpcodeExtension_X1(JR_UNARY_OPCODE_X1) | FNOP_X0 228 229 #define JALR_X1 \ 230 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 231 create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \ 232 create_UnaryOpcodeExtension_X1(JALR_UNARY_OPCODE_X1) | FNOP_X0 233 234 #define CLZ_X0 \ 235 create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \ 236 create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \ 237 create_UnaryOpcodeExtension_X0(CNTLZ_UNARY_OPCODE_X0) | FNOP_X1 238 239 #define CMPLTUI_X1 \ 240 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \ 241 create_Imm8OpcodeExtension_X1(CMPLTUI_IMM8_OPCODE_X1) | FNOP_X0 242 243 #define CMPLTU_X1 \ 244 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 245 create_RRROpcodeExtension_X1(CMPLTU_RRR_0_OPCODE_X1) | FNOP_X0 246 247 #define CMPLTS_X1 \ 248 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 249 create_RRROpcodeExtension_X1(CMPLTS_RRR_0_OPCODE_X1) | FNOP_X0 250 251 #define XORI_X1 \ 252 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \ 253 create_Imm8OpcodeExtension_X1(XORI_IMM8_OPCODE_X1) | FNOP_X0 254 255 #define ORI_X1 \ 256 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \ 257 create_Imm8OpcodeExtension_X1(ORI_IMM8_OPCODE_X1) | FNOP_X0 258 259 #define ANDI_X1 \ 260 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \ 261 create_Imm8OpcodeExtension_X1(ANDI_IMM8_OPCODE_X1) | FNOP_X0 262 263 #define SHLI_X1 \ 264 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \ 265 create_ShiftOpcodeExtension_X1(SHLI_SHIFT_OPCODE_X1) | FNOP_X0 266 267 #define SHL_X1 \ 268 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 269 create_RRROpcodeExtension_X1(SHL_RRR_0_OPCODE_X1) | FNOP_X0 270 271 #define SHRSI_X1 \ 272 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \ 273 create_ShiftOpcodeExtension_X1(SHRSI_SHIFT_OPCODE_X1) | FNOP_X0 274 275 #define SHRS_X1 \ 276 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 277 create_RRROpcodeExtension_X1(SHRS_RRR_0_OPCODE_X1) | FNOP_X0 278 279 #define SHRUI_X1 \ 280 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \ 281 create_ShiftOpcodeExtension_X1(SHRUI_SHIFT_OPCODE_X1) | FNOP_X0 282 283 #define SHRU_X1 \ 284 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 285 create_RRROpcodeExtension_X1(SHRU_RRR_0_OPCODE_X1) | FNOP_X0 286 287 #define BEQZ_X1 \ 288 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(BRANCH_OPCODE_X1) | \ 289 create_BrType_X1(BEQZ_BRANCH_OPCODE_X1) | FNOP_X0 290 291 #define BNEZ_X1 \ 292 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(BRANCH_OPCODE_X1) | \ 293 create_BrType_X1(BNEZ_BRANCH_OPCODE_X1) | FNOP_X0 294 295 #define J_X1 \ 296 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(JUMP_OPCODE_X1) | \ 297 create_JumpOpcodeExtension_X1(J_JUMP_OPCODE_X1) | FNOP_X0 298 299 #define JAL_X1 \ 300 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(JUMP_OPCODE_X1) | \ 301 create_JumpOpcodeExtension_X1(JAL_JUMP_OPCODE_X1) | FNOP_X0 302 303 #define DEST_X0(x) create_Dest_X0(x) 304 #define SRCA_X0(x) create_SrcA_X0(x) 305 #define SRCB_X0(x) create_SrcB_X0(x) 306 #define DEST_X1(x) create_Dest_X1(x) 307 #define SRCA_X1(x) create_SrcA_X1(x) 308 #define SRCB_X1(x) create_SrcB_X1(x) 309 #define IMM16_X1(x) create_Imm16_X1(x) 310 #define IMM8_X1(x) create_Imm8_X1(x) 311 #define BFSTART_X0(x) create_BFStart_X0(x) 312 #define BFEND_X0(x) create_BFEnd_X0(x) 313 #define SHIFTIMM_X1(x) create_ShAmt_X1(x) 314 #define JOFF_X1(x) create_JumpOff_X1(x) 315 #define BOFF_X1(x) create_BrOff_X1(x) 316 317 static SLJIT_CONST tilegx_mnemonic data_transfer_insts[16] = { 318 /* u w s */ TILEGX_OPC_ST /* st */, 319 /* u w l */ TILEGX_OPC_LD /* ld */, 320 /* u b s */ TILEGX_OPC_ST1 /* st1 */, 321 /* u b l */ TILEGX_OPC_LD1U /* ld1u */, 322 /* u h s */ TILEGX_OPC_ST2 /* st2 */, 323 /* u h l */ TILEGX_OPC_LD2U /* ld2u */, 324 /* u i s */ TILEGX_OPC_ST4 /* st4 */, 325 /* u i l */ TILEGX_OPC_LD4U /* ld4u */, 326 /* s w s */ TILEGX_OPC_ST /* st */, 327 /* s w l */ TILEGX_OPC_LD /* ld */, 328 /* s b s */ TILEGX_OPC_ST1 /* st1 */, 329 /* s b l */ TILEGX_OPC_LD1S /* ld1s */, 330 /* s h s */ TILEGX_OPC_ST2 /* st2 */, 331 /* s h l */ TILEGX_OPC_LD2S /* ld2s */, 332 /* s i s */ TILEGX_OPC_ST4 /* st4 */, 333 /* s i l */ TILEGX_OPC_LD4S /* ld4s */, 334 }; 335 336 #ifdef TILEGX_JIT_DEBUG 337 static sljit_si push_inst_debug(struct sljit_compiler *compiler, sljit_ins ins, int line) 338 { 339 sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins)); 340 FAIL_IF(!ptr); 341 *ptr = ins; 342 compiler->size++; 343 printf("|%04d|S0|:\t\t", line); 344 print_insn_tilegx(ptr); 345 return SLJIT_SUCCESS; 346 } 347 348 static sljit_si push_inst_nodebug(struct sljit_compiler *compiler, sljit_ins ins) 349 { 350 sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins)); 351 FAIL_IF(!ptr); 352 *ptr = ins; 353 compiler->size++; 354 return SLJIT_SUCCESS; 355 } 356 357 #define push_inst(a, b) push_inst_debug(a, b, __LINE__) 358 #else 359 static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins) 360 { 361 sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins)); 362 FAIL_IF(!ptr); 363 *ptr = ins; 364 compiler->size++; 365 return SLJIT_SUCCESS; 366 } 367 #endif 368 369 #define BUNDLE_FORMAT_MASK(p0, p1, p2) \ 370 ((p0) | ((p1) << 8) | ((p2) << 16)) 371 372 #define BUNDLE_FORMAT(p0, p1, p2) \ 373 { \ 374 { \ 375 (tilegx_pipeline)(p0), \ 376 (tilegx_pipeline)(p1), \ 377 (tilegx_pipeline)(p2) \ 378 }, \ 379 BUNDLE_FORMAT_MASK(1 << (p0), 1 << (p1), (1 << (p2))) \ 380 } 381 382 #define NO_PIPELINE TILEGX_NUM_PIPELINE_ENCODINGS 383 384 #define tilegx_is_x_pipeline(p) ((int)(p) <= (int)TILEGX_PIPELINE_X1) 385 386 #define PI(encoding) \ 387 push_inst(compiler, encoding) 388 389 #define PB3(opcode, dst, srca, srcb) \ 390 push_3_buffer(compiler, opcode, dst, srca, srcb, __LINE__) 391 392 #define PB2(opcode, dst, src) \ 393 push_2_buffer(compiler, opcode, dst, src, __LINE__) 394 395 #define JR(reg) \ 396 push_jr_buffer(compiler, TILEGX_OPC_JR, reg, __LINE__) 397 398 #define ADD(dst, srca, srcb) \ 399 push_3_buffer(compiler, TILEGX_OPC_ADD, dst, srca, srcb, __LINE__) 400 401 #define SUB(dst, srca, srcb) \ 402 push_3_buffer(compiler, TILEGX_OPC_SUB, dst, srca, srcb, __LINE__) 403 404 #define NOR(dst, srca, srcb) \ 405 push_3_buffer(compiler, TILEGX_OPC_NOR, dst, srca, srcb, __LINE__) 406 407 #define OR(dst, srca, srcb) \ 408 push_3_buffer(compiler, TILEGX_OPC_OR, dst, srca, srcb, __LINE__) 409 410 #define XOR(dst, srca, srcb) \ 411 push_3_buffer(compiler, TILEGX_OPC_XOR, dst, srca, srcb, __LINE__) 412 413 #define AND(dst, srca, srcb) \ 414 push_3_buffer(compiler, TILEGX_OPC_AND, dst, srca, srcb, __LINE__) 415 416 #define CLZ(dst, src) \ 417 push_2_buffer(compiler, TILEGX_OPC_CLZ, dst, src, __LINE__) 418 419 #define SHLI(dst, srca, srcb) \ 420 push_3_buffer(compiler, TILEGX_OPC_SHLI, dst, srca, srcb, __LINE__) 421 422 #define SHRUI(dst, srca, imm) \ 423 push_3_buffer(compiler, TILEGX_OPC_SHRUI, dst, srca, imm, __LINE__) 424 425 #define XORI(dst, srca, imm) \ 426 push_3_buffer(compiler, TILEGX_OPC_XORI, dst, srca, imm, __LINE__) 427 428 #define ORI(dst, srca, imm) \ 429 push_3_buffer(compiler, TILEGX_OPC_ORI, dst, srca, imm, __LINE__) 430 431 #define CMPLTU(dst, srca, srcb) \ 432 push_3_buffer(compiler, TILEGX_OPC_CMPLTU, dst, srca, srcb, __LINE__) 433 434 #define CMPLTS(dst, srca, srcb) \ 435 push_3_buffer(compiler, TILEGX_OPC_CMPLTS, dst, srca, srcb, __LINE__) 436 437 #define CMPLTUI(dst, srca, imm) \ 438 push_3_buffer(compiler, TILEGX_OPC_CMPLTUI, dst, srca, imm, __LINE__) 439 440 #define CMOVNEZ(dst, srca, srcb) \ 441 push_3_buffer(compiler, TILEGX_OPC_CMOVNEZ, dst, srca, srcb, __LINE__) 442 443 #define CMOVEQZ(dst, srca, srcb) \ 444 push_3_buffer(compiler, TILEGX_OPC_CMOVEQZ, dst, srca, srcb, __LINE__) 445 446 #define ADDLI(dst, srca, srcb) \ 447 push_3_buffer(compiler, TILEGX_OPC_ADDLI, dst, srca, srcb, __LINE__) 448 449 #define SHL16INSLI(dst, srca, srcb) \ 450 push_3_buffer(compiler, TILEGX_OPC_SHL16INSLI, dst, srca, srcb, __LINE__) 451 452 #define LD_ADD(dst, addr, adjust) \ 453 push_3_buffer(compiler, TILEGX_OPC_LD_ADD, dst, addr, adjust, __LINE__) 454 455 #define ST_ADD(src, addr, adjust) \ 456 push_3_buffer(compiler, TILEGX_OPC_ST_ADD, src, addr, adjust, __LINE__) 457 458 #define LD(dst, addr) \ 459 push_2_buffer(compiler, TILEGX_OPC_LD, dst, addr, __LINE__) 460 461 #define BFEXTU(dst, src, start, end) \ 462 push_4_buffer(compiler, TILEGX_OPC_BFEXTU, dst, src, start, end, __LINE__) 463 464 #define BFEXTS(dst, src, start, end) \ 465 push_4_buffer(compiler, TILEGX_OPC_BFEXTS, dst, src, start, end, __LINE__) 466 467 #define ADD_SOLO(dest, srca, srcb) \ 468 push_inst(compiler, ADD_X1 | DEST_X1(dest) | SRCA_X1(srca) | SRCB_X1(srcb)) 469 470 #define ADDI_SOLO(dest, srca, imm) \ 471 push_inst(compiler, ADDI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM8_X1(imm)) 472 473 #define ADDLI_SOLO(dest, srca, imm) \ 474 push_inst(compiler, ADDLI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM16_X1(imm)) 475 476 #define SHL16INSLI_SOLO(dest, srca, imm) \ 477 push_inst(compiler, SHL16INSLI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM16_X1(imm)) 478 479 #define JALR_SOLO(reg) \ 480 push_inst(compiler, JALR_X1 | SRCA_X1(reg)) 481 482 #define JR_SOLO(reg) \ 483 push_inst(compiler, JR_X1 | SRCA_X1(reg)) 484 485 struct Format { 486 /* Mapping of bundle issue slot to assigned pipe. */ 487 tilegx_pipeline pipe[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE]; 488 489 /* Mask of pipes used by this bundle. */ 490 unsigned int pipe_mask; 491 }; 492 493 const struct Format formats[] = 494 { 495 /* In Y format we must always have something in Y2, since it has 496 * no fnop, so this conveys that Y2 must always be used. */ 497 BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2, NO_PIPELINE), 498 BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2, NO_PIPELINE), 499 BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0, NO_PIPELINE), 500 BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1, NO_PIPELINE), 501 502 /* Y format has three instructions. */ 503 BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2), 504 BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1), 505 BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2), 506 BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0), 507 BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y1), 508 BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y0), 509 510 /* X format has only two instructions. */ 511 BUNDLE_FORMAT(TILEGX_PIPELINE_X0, TILEGX_PIPELINE_X1, NO_PIPELINE), 512 BUNDLE_FORMAT(TILEGX_PIPELINE_X1, TILEGX_PIPELINE_X0, NO_PIPELINE) 513 }; 514 515 516 struct jit_instr inst_buf[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE]; 517 unsigned long inst_buf_index; 518 519 tilegx_pipeline get_any_valid_pipe(const struct tilegx_opcode* opcode) 520 { 521 /* FIXME: tile: we could pregenerate this. */ 522 int pipe; 523 for (pipe = 0; ((opcode->pipes & (1 << pipe)) == 0 && pipe < TILEGX_NUM_PIPELINE_ENCODINGS); pipe++) 524 ; 525 return (tilegx_pipeline)(pipe); 526 } 527 528 void insert_nop(tilegx_mnemonic opc, int line) 529 { 530 const struct tilegx_opcode* opcode = NULL; 531 532 memmove(&inst_buf[1], &inst_buf[0], inst_buf_index * sizeof inst_buf[0]); 533 534 opcode = &tilegx_opcodes[opc]; 535 inst_buf[0].opcode = opcode; 536 inst_buf[0].pipe = get_any_valid_pipe(opcode); 537 inst_buf[0].input_registers = 0; 538 inst_buf[0].output_registers = 0; 539 inst_buf[0].line = line; 540 ++inst_buf_index; 541 } 542 543 const struct Format* compute_format() 544 { 545 unsigned int compatible_pipes = BUNDLE_FORMAT_MASK( 546 inst_buf[0].opcode->pipes, 547 inst_buf[1].opcode->pipes, 548 (inst_buf_index == 3 ? inst_buf[2].opcode->pipes : (1 << NO_PIPELINE))); 549 550 const struct Format* match = NULL; 551 const struct Format *b = NULL; 552 unsigned int i = 0; 553 for (i; i < sizeof formats / sizeof formats[0]; i++) { 554 b = &formats[i]; 555 if ((b->pipe_mask & compatible_pipes) == b->pipe_mask) { 556 match = b; 557 break; 558 } 559 } 560 561 return match; 562 } 563 564 sljit_si assign_pipes() 565 { 566 unsigned long output_registers = 0; 567 unsigned int i = 0; 568 569 if (inst_buf_index == 1) { 570 tilegx_mnemonic opc = inst_buf[0].opcode->can_bundle 571 ? TILEGX_OPC_FNOP : TILEGX_OPC_NOP; 572 insert_nop(opc, __LINE__); 573 } 574 575 const struct Format* match = compute_format(); 576 577 if (match == NULL) 578 return -1; 579 580 for (i = 0; i < inst_buf_index; i++) { 581 582 if ((i > 0) && ((inst_buf[i].input_registers & output_registers) != 0)) 583 return -1; 584 585 if ((i > 0) && ((inst_buf[i].output_registers & output_registers) != 0)) 586 return -1; 587 588 /* Don't include Rzero in the match set, to avoid triggering 589 needlessly on 'prefetch' instrs. */ 590 591 output_registers |= inst_buf[i].output_registers & 0xFFFFFFFFFFFFFFL; 592 593 inst_buf[i].pipe = match->pipe[i]; 594 } 595 596 /* If only 2 instrs, and in Y-mode, insert a nop. */ 597 if (inst_buf_index == 2 && !tilegx_is_x_pipeline(match->pipe[0])) { 598 insert_nop(TILEGX_OPC_FNOP, __LINE__); 599 600 /* Select the yet unassigned pipe. */ 601 tilegx_pipeline pipe = (tilegx_pipeline)(((TILEGX_PIPELINE_Y0 602 + TILEGX_PIPELINE_Y1 + TILEGX_PIPELINE_Y2) 603 - (inst_buf[1].pipe + inst_buf[2].pipe))); 604 605 inst_buf[0].pipe = pipe; 606 } 607 608 return 0; 609 } 610 611 tilegx_bundle_bits get_bundle_bit(struct jit_instr *inst) 612 { 613 int i, val; 614 const struct tilegx_opcode* opcode = inst->opcode; 615 tilegx_bundle_bits bits = opcode->fixed_bit_values[inst->pipe]; 616 617 const struct tilegx_operand* operand = NULL; 618 for (i = 0; i < opcode->num_operands; i++) { 619 operand = &tilegx_operands[opcode->operands[inst->pipe][i]]; 620 val = inst->operand_value[i]; 621 622 bits |= operand->insert(val); 623 } 624 625 return bits; 626 } 627 628 static sljit_si update_buffer(struct sljit_compiler *compiler) 629 { 630 int count; 631 int i; 632 int orig_index = inst_buf_index; 633 struct jit_instr inst0 = inst_buf[0]; 634 struct jit_instr inst1 = inst_buf[1]; 635 struct jit_instr inst2 = inst_buf[2]; 636 tilegx_bundle_bits bits = 0; 637 638 /* If the bundle is valid as is, perform the encoding and return 1. */ 639 if (assign_pipes() == 0) { 640 for (i = 0; i < inst_buf_index; i++) { 641 bits |= get_bundle_bit(inst_buf + i); 642 #ifdef TILEGX_JIT_DEBUG 643 printf("|%04d", inst_buf[i].line); 644 #endif 645 } 646 #ifdef TILEGX_JIT_DEBUG 647 if (inst_buf_index == 3) 648 printf("|M0|:\t"); 649 else 650 printf("|M0|:\t\t"); 651 print_insn_tilegx(&bits); 652 #endif 653 654 inst_buf_index = 0; 655 656 #ifdef TILEGX_JIT_DEBUG 657 return push_inst_nodebug(compiler, bits); 658 #else 659 return push_inst(compiler, bits); 660 #endif 661 } 662 663 /* If the bundle is invalid, split it in two. First encode the first two 664 (or possibly 1) instructions, and then the last, separately. Note that 665 assign_pipes may have re-ordered the instrs (by inserting no-ops in 666 lower slots) so we need to reset them. */ 667 668 inst_buf_index = orig_index - 1; 669 inst_buf[0] = inst0; 670 inst_buf[1] = inst1; 671 inst_buf[2] = inst2; 672 if (assign_pipes() == 0) { 673 for (i = 0; i < inst_buf_index; i++) { 674 bits |= get_bundle_bit(inst_buf + i); 675 #ifdef TILEGX_JIT_DEBUG 676 printf("|%04d", inst_buf[i].line); 677 #endif 678 } 679 680 #ifdef TILEGX_JIT_DEBUG 681 if (inst_buf_index == 3) 682 printf("|M1|:\t"); 683 else 684 printf("|M1|:\t\t"); 685 print_insn_tilegx(&bits); 686 #endif 687 688 if ((orig_index - 1) == 2) { 689 inst_buf[0] = inst2; 690 inst_buf_index = 1; 691 } else if ((orig_index - 1) == 1) { 692 inst_buf[0] = inst1; 693 inst_buf_index = 1; 694 } else 695 SLJIT_ASSERT_STOP(); 696 697 #ifdef TILEGX_JIT_DEBUG 698 return push_inst_nodebug(compiler, bits); 699 #else 700 return push_inst(compiler, bits); 701 #endif 702 } else { 703 /* We had 3 instrs of which the first 2 can't live in the same bundle. 704 Split those two. Note that we don't try to then combine the second 705 and third instr into a single bundle. First instruction: */ 706 inst_buf_index = 1; 707 inst_buf[0] = inst0; 708 inst_buf[1] = inst1; 709 inst_buf[2] = inst2; 710 if (assign_pipes() == 0) { 711 for (i = 0; i < inst_buf_index; i++) { 712 bits |= get_bundle_bit(inst_buf + i); 713 #ifdef TILEGX_JIT_DEBUG 714 printf("|%04d", inst_buf[i].line); 715 #endif 716 } 717 718 #ifdef TILEGX_JIT_DEBUG 719 if (inst_buf_index == 3) 720 printf("|M2|:\t"); 721 else 722 printf("|M2|:\t\t"); 723 print_insn_tilegx(&bits); 724 #endif 725 726 inst_buf[0] = inst1; 727 inst_buf[1] = inst2; 728 inst_buf_index = orig_index - 1; 729 #ifdef TILEGX_JIT_DEBUG 730 return push_inst_nodebug(compiler, bits); 731 #else 732 return push_inst(compiler, bits); 733 #endif 734 } else 735 SLJIT_ASSERT_STOP(); 736 } 737 738 SLJIT_ASSERT_STOP(); 739 } 740 741 static sljit_si flush_buffer(struct sljit_compiler *compiler) 742 { 743 while (inst_buf_index != 0) 744 update_buffer(compiler); 745 } 746 747 static sljit_si push_4_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int op2, int op3, int line) 748 { 749 if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE) 750 FAIL_IF(update_buffer(compiler)); 751 752 const struct tilegx_opcode* opcode = &tilegx_opcodes[opc]; 753 inst_buf[inst_buf_index].opcode = opcode; 754 inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode); 755 inst_buf[inst_buf_index].operand_value[0] = op0; 756 inst_buf[inst_buf_index].operand_value[1] = op1; 757 inst_buf[inst_buf_index].operand_value[2] = op2; 758 inst_buf[inst_buf_index].operand_value[3] = op3; 759 inst_buf[inst_buf_index].input_registers = 1L << op1; 760 inst_buf[inst_buf_index].output_registers = 1L << op0; 761 inst_buf[inst_buf_index].line = line; 762 inst_buf_index++; 763 764 return SLJIT_SUCCESS; 765 } 766 767 static sljit_si push_3_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int op2, int line) 768 { 769 if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE) 770 FAIL_IF(update_buffer(compiler)); 771 772 const struct tilegx_opcode* opcode = &tilegx_opcodes[opc]; 773 inst_buf[inst_buf_index].opcode = opcode; 774 inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode); 775 inst_buf[inst_buf_index].operand_value[0] = op0; 776 inst_buf[inst_buf_index].operand_value[1] = op1; 777 inst_buf[inst_buf_index].operand_value[2] = op2; 778 inst_buf[inst_buf_index].line = line; 779 780 switch (opc) { 781 case TILEGX_OPC_ST_ADD: 782 inst_buf[inst_buf_index].input_registers = (1L << op0) | (1L << op1); 783 inst_buf[inst_buf_index].output_registers = 1L << op0; 784 break; 785 case TILEGX_OPC_LD_ADD: 786 inst_buf[inst_buf_index].input_registers = 1L << op1; 787 inst_buf[inst_buf_index].output_registers = (1L << op0) | (1L << op1); 788 break; 789 case TILEGX_OPC_ADD: 790 case TILEGX_OPC_AND: 791 case TILEGX_OPC_SUB: 792 case TILEGX_OPC_OR: 793 case TILEGX_OPC_XOR: 794 case TILEGX_OPC_NOR: 795 case TILEGX_OPC_SHL: 796 case TILEGX_OPC_SHRU: 797 case TILEGX_OPC_SHRS: 798 case TILEGX_OPC_CMPLTU: 799 case TILEGX_OPC_CMPLTS: 800 case TILEGX_OPC_CMOVEQZ: 801 case TILEGX_OPC_CMOVNEZ: 802 inst_buf[inst_buf_index].input_registers = (1L << op1) | (1L << op2); 803 inst_buf[inst_buf_index].output_registers = 1L << op0; 804 break; 805 case TILEGX_OPC_ADDLI: 806 case TILEGX_OPC_XORI: 807 case TILEGX_OPC_ORI: 808 case TILEGX_OPC_SHLI: 809 case TILEGX_OPC_SHRUI: 810 case TILEGX_OPC_SHRSI: 811 case TILEGX_OPC_SHL16INSLI: 812 case TILEGX_OPC_CMPLTUI: 813 case TILEGX_OPC_CMPLTSI: 814 inst_buf[inst_buf_index].input_registers = 1L << op1; 815 inst_buf[inst_buf_index].output_registers = 1L << op0; 816 break; 817 default: 818 printf("unrecoginzed opc: %s\n", opcode->name); 819 SLJIT_ASSERT_STOP(); 820 } 821 822 inst_buf_index++; 823 824 return SLJIT_SUCCESS; 825 } 826 827 static sljit_si push_2_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int line) 828 { 829 if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE) 830 FAIL_IF(update_buffer(compiler)); 831 832 const struct tilegx_opcode* opcode = &tilegx_opcodes[opc]; 833 inst_buf[inst_buf_index].opcode = opcode; 834 inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode); 835 inst_buf[inst_buf_index].operand_value[0] = op0; 836 inst_buf[inst_buf_index].operand_value[1] = op1; 837 inst_buf[inst_buf_index].line = line; 838 839 switch (opc) { 840 case TILEGX_OPC_BEQZ: 841 case TILEGX_OPC_BNEZ: 842 inst_buf[inst_buf_index].input_registers = 1L << op0; 843 break; 844 case TILEGX_OPC_ST: 845 case TILEGX_OPC_ST1: 846 case TILEGX_OPC_ST2: 847 case TILEGX_OPC_ST4: 848 inst_buf[inst_buf_index].input_registers = (1L << op0) | (1L << op1); 849 inst_buf[inst_buf_index].output_registers = 0; 850 break; 851 case TILEGX_OPC_CLZ: 852 case TILEGX_OPC_LD: 853 case TILEGX_OPC_LD1U: 854 case TILEGX_OPC_LD1S: 855 case TILEGX_OPC_LD2U: 856 case TILEGX_OPC_LD2S: 857 case TILEGX_OPC_LD4U: 858 case TILEGX_OPC_LD4S: 859 inst_buf[inst_buf_index].input_registers = 1L << op1; 860 inst_buf[inst_buf_index].output_registers = 1L << op0; 861 break; 862 default: 863 printf("unrecoginzed opc: %s\n", opcode->name); 864 SLJIT_ASSERT_STOP(); 865 } 866 867 inst_buf_index++; 868 869 return SLJIT_SUCCESS; 870 } 871 872 static sljit_si push_0_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int line) 873 { 874 if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE) 875 FAIL_IF(update_buffer(compiler)); 876 877 const struct tilegx_opcode* opcode = &tilegx_opcodes[opc]; 878 inst_buf[inst_buf_index].opcode = opcode; 879 inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode); 880 inst_buf[inst_buf_index].input_registers = 0; 881 inst_buf[inst_buf_index].output_registers = 0; 882 inst_buf[inst_buf_index].line = line; 883 inst_buf_index++; 884 885 return SLJIT_SUCCESS; 886 } 887 888 static sljit_si push_jr_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int line) 889 { 890 if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE) 891 FAIL_IF(update_buffer(compiler)); 892 893 const struct tilegx_opcode* opcode = &tilegx_opcodes[opc]; 894 inst_buf[inst_buf_index].opcode = opcode; 895 inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode); 896 inst_buf[inst_buf_index].operand_value[0] = op0; 897 inst_buf[inst_buf_index].input_registers = 1L << op0; 898 inst_buf[inst_buf_index].output_registers = 0; 899 inst_buf[inst_buf_index].line = line; 900 inst_buf_index++; 901 902 return flush_buffer(compiler); 903 } 904 905 static SLJIT_INLINE sljit_ins * detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code) 906 { 907 sljit_sw diff; 908 sljit_uw target_addr; 909 sljit_ins *inst; 910 sljit_ins saved_inst; 911 912 if (jump->flags & SLJIT_REWRITABLE_JUMP) 913 return code_ptr; 914 915 if (jump->flags & JUMP_ADDR) 916 target_addr = jump->u.target; 917 else { 918 SLJIT_ASSERT(jump->flags & JUMP_LABEL); 919 target_addr = (sljit_uw)(code + jump->u.label->size); 920 } 921 922 inst = (sljit_ins *)jump->addr; 923 if (jump->flags & IS_COND) 924 inst--; 925 926 diff = ((sljit_sw) target_addr - (sljit_sw) inst) >> 3; 927 if (diff <= SIMM_17BIT_MAX && diff >= SIMM_17BIT_MIN) { 928 jump->flags |= PATCH_B; 929 930 if (!(jump->flags & IS_COND)) { 931 if (jump->flags & IS_JAL) { 932 jump->flags &= ~(PATCH_B); 933 jump->flags |= PATCH_J; 934 inst[0] = JAL_X1; 935 936 #ifdef TILEGX_JIT_DEBUG 937 printf("[runtime relocate]%04d:\t", __LINE__); 938 print_insn_tilegx(inst); 939 #endif 940 } else { 941 inst[0] = BEQZ_X1 | SRCA_X1(ZERO); 942 943 #ifdef TILEGX_JIT_DEBUG 944 printf("[runtime relocate]%04d:\t", __LINE__); 945 print_insn_tilegx(inst); 946 #endif 947 } 948 949 return inst; 950 } 951 952 inst[0] = inst[0] ^ (0x7L << 55); 953 954 #ifdef TILEGX_JIT_DEBUG 955 printf("[runtime relocate]%04d:\t", __LINE__); 956 print_insn_tilegx(inst); 957 #endif 958 jump->addr -= sizeof(sljit_ins); 959 return inst; 960 } 961 962 if (jump->flags & IS_COND) { 963 if ((target_addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL)) { 964 jump->flags |= PATCH_J; 965 inst[0] = (inst[0] & ~(BOFF_X1(-1))) | BOFF_X1(2); 966 inst[1] = J_X1; 967 return inst + 1; 968 } 969 970 return code_ptr; 971 } 972 973 if ((target_addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL)) { 974 jump->flags |= PATCH_J; 975 976 if (jump->flags & IS_JAL) { 977 inst[0] = JAL_X1; 978 979 #ifdef TILEGX_JIT_DEBUG 980 printf("[runtime relocate]%04d:\t", __LINE__); 981 print_insn_tilegx(inst); 982 #endif 983 984 } else { 985 inst[0] = J_X1; 986 987 #ifdef TILEGX_JIT_DEBUG 988 printf("[runtime relocate]%04d:\t", __LINE__); 989 print_insn_tilegx(inst); 990 #endif 991 } 992 993 return inst; 994 } 995 996 return code_ptr; 997 } 998 999 SLJIT_API_FUNC_ATTRIBUTE void * sljit_generate_code(struct sljit_compiler *compiler) 1000 { 1001 struct sljit_memory_fragment *buf; 1002 sljit_ins *code; 1003 sljit_ins *code_ptr; 1004 sljit_ins *buf_ptr; 1005 sljit_ins *buf_end; 1006 sljit_uw word_count; 1007 sljit_uw addr; 1008 1009 struct sljit_label *label; 1010 struct sljit_jump *jump; 1011 struct sljit_const *const_; 1012 1013 CHECK_ERROR_PTR(); 1014 check_sljit_generate_code(compiler); 1015 reverse_buf(compiler); 1016 1017 code = (sljit_ins *)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins)); 1018 PTR_FAIL_WITH_EXEC_IF(code); 1019 buf = compiler->buf; 1020 1021 code_ptr = code; 1022 word_count = 0; 1023 label = compiler->labels; 1024 jump = compiler->jumps; 1025 const_ = compiler->consts; 1026 do { 1027 buf_ptr = (sljit_ins *)buf->memory; 1028 buf_end = buf_ptr + (buf->used_size >> 3); 1029 do { 1030 *code_ptr = *buf_ptr++; 1031 SLJIT_ASSERT(!label || label->size >= word_count); 1032 SLJIT_ASSERT(!jump || jump->addr >= word_count); 1033 SLJIT_ASSERT(!const_ || const_->addr >= word_count); 1034 /* These structures are ordered by their address. */ 1035 if (label && label->size == word_count) { 1036 /* Just recording the address. */ 1037 label->addr = (sljit_uw) code_ptr; 1038 label->size = code_ptr - code; 1039 label = label->next; 1040 } 1041 1042 if (jump && jump->addr == word_count) { 1043 if (jump->flags & IS_JAL) 1044 jump->addr = (sljit_uw)(code_ptr - 4); 1045 else 1046 jump->addr = (sljit_uw)(code_ptr - 3); 1047 1048 code_ptr = detect_jump_type(jump, code_ptr, code); 1049 jump = jump->next; 1050 } 1051 1052 if (const_ && const_->addr == word_count) { 1053 /* Just recording the address. */ 1054 const_->addr = (sljit_uw) code_ptr; 1055 const_ = const_->next; 1056 } 1057 1058 code_ptr++; 1059 word_count++; 1060 } while (buf_ptr < buf_end); 1061 1062 buf = buf->next; 1063 } while (buf); 1064 1065 if (label && label->size == word_count) { 1066 label->addr = (sljit_uw) code_ptr; 1067 label->size = code_ptr - code; 1068 label = label->next; 1069 } 1070 1071 SLJIT_ASSERT(!label); 1072 SLJIT_ASSERT(!jump); 1073 SLJIT_ASSERT(!const_); 1074 SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); 1075 1076 jump = compiler->jumps; 1077 while (jump) { 1078 do { 1079 addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; 1080 buf_ptr = (sljit_ins *)jump->addr; 1081 1082 if (jump->flags & PATCH_B) { 1083 addr = (sljit_sw)(addr - (jump->addr)) >> 3; 1084 SLJIT_ASSERT((sljit_sw) addr <= SIMM_17BIT_MAX && (sljit_sw) addr >= SIMM_17BIT_MIN); 1085 buf_ptr[0] = (buf_ptr[0] & ~(BOFF_X1(-1))) | BOFF_X1(addr); 1086 1087 #ifdef TILEGX_JIT_DEBUG 1088 printf("[runtime relocate]%04d:\t", __LINE__); 1089 print_insn_tilegx(buf_ptr); 1090 #endif 1091 break; 1092 } 1093 1094 if (jump->flags & PATCH_J) { 1095 SLJIT_ASSERT((addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL)); 1096 addr = (sljit_sw)(addr - (jump->addr)) >> 3; 1097 buf_ptr[0] = (buf_ptr[0] & ~(JOFF_X1(-1))) | JOFF_X1(addr); 1098 1099 #ifdef TILEGX_JIT_DEBUG 1100 printf("[runtime relocate]%04d:\t", __LINE__); 1101 print_insn_tilegx(buf_ptr); 1102 #endif 1103 break; 1104 } 1105 1106 SLJIT_ASSERT(!(jump->flags & IS_JAL)); 1107 1108 /* Set the fields of immediate loads. */ 1109 buf_ptr[0] = (buf_ptr[0] & ~(0xFFFFL << 43)) | (((addr >> 32) & 0xFFFFL) << 43); 1110 buf_ptr[1] = (buf_ptr[1] & ~(0xFFFFL << 43)) | (((addr >> 16) & 0xFFFFL) << 43); 1111 buf_ptr[2] = (buf_ptr[2] & ~(0xFFFFL << 43)) | ((addr & 0xFFFFL) << 43); 1112 } while (0); 1113 1114 jump = jump->next; 1115 } 1116 1117 compiler->error = SLJIT_ERR_COMPILED; 1118 compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); 1119 SLJIT_CACHE_FLUSH(code, code_ptr); 1120 return code; 1121 } 1122 1123 static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm) 1124 { 1125 1126 if (imm <= SIMM_16BIT_MAX && imm >= SIMM_16BIT_MIN) 1127 return ADDLI(dst_ar, ZERO, imm); 1128 1129 if (imm <= SIMM_32BIT_MAX && imm >= SIMM_32BIT_MIN) { 1130 FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 16)); 1131 return SHL16INSLI(dst_ar, dst_ar, imm); 1132 } 1133 1134 if (imm <= SIMM_48BIT_MAX && imm >= SIMM_48BIT_MIN) { 1135 FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 32)); 1136 FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16)); 1137 return SHL16INSLI(dst_ar, dst_ar, imm); 1138 } 1139 1140 FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 48)); 1141 FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 32)); 1142 FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16)); 1143 return SHL16INSLI(dst_ar, dst_ar, imm); 1144 } 1145 1146 static sljit_si emit_const(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm, int flush) 1147 { 1148 /* Should *not* be optimized as load_immediate, as pcre relocation 1149 mechanism will match this fixed 4-instruction pattern. */ 1150 if (flush) { 1151 FAIL_IF(ADDLI_SOLO(dst_ar, ZERO, imm >> 32)); 1152 FAIL_IF(SHL16INSLI_SOLO(dst_ar, dst_ar, imm >> 16)); 1153 return SHL16INSLI_SOLO(dst_ar, dst_ar, imm); 1154 } 1155 1156 FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 32)); 1157 FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16)); 1158 return SHL16INSLI(dst_ar, dst_ar, imm); 1159 } 1160 1161 static sljit_si emit_const_64(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm, int flush) 1162 { 1163 /* Should *not* be optimized as load_immediate, as pcre relocation 1164 mechanism will match this fixed 4-instruction pattern. */ 1165 if (flush) { 1166 FAIL_IF(ADDLI_SOLO(reg_map[dst_ar], ZERO, imm >> 48)); 1167 FAIL_IF(SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm >> 32)); 1168 FAIL_IF(SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm >> 16)); 1169 return SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm); 1170 } 1171 1172 FAIL_IF(ADDLI(reg_map[dst_ar], ZERO, imm >> 48)); 1173 FAIL_IF(SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm >> 32)); 1174 FAIL_IF(SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm >> 16)); 1175 return SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm); 1176 } 1177 1178 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) 1179 { 1180 sljit_ins base; 1181 sljit_ins bundle = 0; 1182 1183 CHECK_ERROR(); 1184 check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); 1185 1186 compiler->scratches = scratches; 1187 compiler->saveds = saveds; 1188 #if (defined SLJIT_DEBUG && SLJIT_DEBUG) 1189 compiler->logical_local_size = local_size; 1190 #endif 1191 1192 local_size += (saveds + 1) * sizeof(sljit_sw); 1193 local_size = (local_size + 7) & ~7; 1194 compiler->local_size = local_size; 1195 1196 if (local_size <= SIMM_16BIT_MAX) { 1197 /* Frequent case. */ 1198 FAIL_IF(ADDLI(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, -local_size)); 1199 base = SLJIT_LOCALS_REG_mapped; 1200 } else { 1201 FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, local_size)); 1202 FAIL_IF(ADD(TMP_REG2_mapped, SLJIT_LOCALS_REG_mapped, ZERO)); 1203 FAIL_IF(SUB(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped)); 1204 base = TMP_REG2_mapped; 1205 local_size = 0; 1206 } 1207 1208 FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 8)); 1209 FAIL_IF(ST_ADD(ADDR_TMP_mapped, RA, -8)); 1210 1211 if (saveds >= 1) 1212 FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_REG1_mapped, -8)); 1213 1214 if (saveds >= 2) 1215 FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_REG2_mapped, -8)); 1216 1217 if (saveds >= 3) 1218 FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_REG3_mapped, -8)); 1219 1220 if (saveds >= 4) 1221 FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_EREG1_mapped, -8)); 1222 1223 if (saveds >= 5) 1224 FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_EREG2_mapped, -8)); 1225 1226 if (args >= 1) 1227 FAIL_IF(ADD(SLJIT_SAVED_REG1_mapped, 0, ZERO)); 1228 1229 if (args >= 2) 1230 FAIL_IF(ADD(SLJIT_SAVED_REG2_mapped, 1, ZERO)); 1231 1232 if (args >= 3) 1233 FAIL_IF(ADD(SLJIT_SAVED_REG3_mapped, 2, ZERO)); 1234 1235 return SLJIT_SUCCESS; 1236 } 1237 1238 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) 1239 { 1240 CHECK_ERROR_VOID(); 1241 check_sljit_set_context(compiler, args, scratches, saveds, local_size); 1242 1243 compiler->scratches = scratches; 1244 compiler->saveds = saveds; 1245 #if (defined SLJIT_DEBUG && SLJIT_DEBUG) 1246 compiler->logical_local_size = local_size; 1247 #endif 1248 1249 local_size += (saveds + 1) * sizeof(sljit_sw); 1250 compiler->local_size = (local_size + 7) & ~7; 1251 } 1252 1253 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) 1254 { 1255 sljit_si local_size; 1256 sljit_ins base; 1257 int addr_initialized = 0; 1258 1259 CHECK_ERROR(); 1260 check_sljit_emit_return(compiler, op, src, srcw); 1261 1262 FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); 1263 1264 local_size = compiler->local_size; 1265 if (local_size <= SIMM_16BIT_MAX) 1266 base = SLJIT_LOCALS_REG_mapped; 1267 else { 1268 FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, local_size)); 1269 FAIL_IF(ADD(TMP_REG1_mapped, SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped)); 1270 base = TMP_REG1_mapped; 1271 local_size = 0; 1272 } 1273 1274 FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 8)); 1275 FAIL_IF(LD(RA, ADDR_TMP_mapped)); 1276 1277 if (compiler->saveds >= 5) { 1278 FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 48)); 1279 addr_initialized = 1; 1280 1281 FAIL_IF(LD_ADD(SLJIT_SAVED_EREG2_mapped, ADDR_TMP_mapped, 8)); 1282 } 1283 1284 if (compiler->saveds >= 4) { 1285 if (addr_initialized == 0) { 1286 FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 40)); 1287 addr_initialized = 1; 1288 } 1289 1290 FAIL_IF(LD_ADD(SLJIT_SAVED_EREG1_mapped, ADDR_TMP_mapped, 8)); 1291 } 1292 1293 if (compiler->saveds >= 3) { 1294 if (addr_initialized == 0) { 1295 FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 32)); 1296 addr_initialized = 1; 1297 } 1298 1299 FAIL_IF(LD_ADD(SLJIT_SAVED_REG3_mapped, ADDR_TMP_mapped, 8)); 1300 } 1301 1302 if (compiler->saveds >= 2) { 1303 if (addr_initialized == 0) { 1304 FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 24)); 1305 addr_initialized = 1; 1306 } 1307 1308 FAIL_IF(LD_ADD(SLJIT_SAVED_REG2_mapped, ADDR_TMP_mapped, 8)); 1309 } 1310 1311 if (compiler->saveds >= 1) { 1312 if (addr_initialized == 0) { 1313 FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 16)); 1314 /* addr_initialized = 1; no need to initialize as it's the last one. */ 1315 } 1316 1317 FAIL_IF(LD_ADD(SLJIT_SAVED_REG1_mapped, ADDR_TMP_mapped, 8)); 1318 } 1319 1320 if (compiler->local_size <= SIMM_16BIT_MAX) 1321 FAIL_IF(ADDLI(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, compiler->local_size)); 1322 else 1323 FAIL_IF(ADD(SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped, ZERO)); 1324 1325 return JR(RA); 1326 } 1327 1328 /* reg_ar is an absoulute register! */ 1329 1330 /* Can perform an operation using at most 1 instruction. */ 1331 static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw) 1332 { 1333 SLJIT_ASSERT(arg & SLJIT_MEM); 1334 1335 if ((!(flags & WRITE_BACK) || !(arg & REG_MASK)) 1336 && !(arg & OFFS_REG_MASK) && argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) { 1337 /* Works for both absoulte and relative addresses. */ 1338 if (SLJIT_UNLIKELY(flags & ARG_TEST)) 1339 return 1; 1340 1341 FAIL_IF(ADDLI(ADDR_TMP_mapped, reg_map[arg & REG_MASK], argw)); 1342 1343 if (flags & LOAD_DATA) 1344 FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, ADDR_TMP_mapped)); 1345 else 1346 FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], ADDR_TMP_mapped, reg_ar)); 1347 1348 return -1; 1349 } 1350 1351 return 0; 1352 } 1353 1354 /* See getput_arg below. 1355 Note: can_cache is called only for binary operators. Those 1356 operators always uses word arguments without write back. */ 1357 static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) 1358 { 1359 SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM)); 1360 1361 /* Simple operation except for updates. */ 1362 if (arg & OFFS_REG_MASK) { 1363 argw &= 0x3; 1364 next_argw &= 0x3; 1365 if (argw && argw == next_argw 1366 && (arg == next_arg || (arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK))) 1367 return 1; 1368 return 0; 1369 } 1370 1371 if (arg == next_arg) { 1372 if (((next_argw - argw) <= SIMM_16BIT_MAX 1373 && (next_argw - argw) >= SIMM_16BIT_MIN)) 1374 return 1; 1375 1376 return 0; 1377 } 1378 1379 return 0; 1380 } 1381 1382 /* Emit the necessary instructions. See can_cache above. */ 1383 static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) 1384 { 1385 sljit_si tmp_ar, base; 1386 1387 SLJIT_ASSERT(arg & SLJIT_MEM); 1388 if (!(next_arg & SLJIT_MEM)) { 1389 next_arg = 0; 1390 next_argw = 0; 1391 } 1392 1393 if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) 1394 tmp_ar = reg_ar; 1395 else 1396 tmp_ar = TMP_REG1_mapped; 1397 1398 base = arg & REG_MASK; 1399 1400 if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { 1401 argw &= 0x3; 1402 1403 if ((flags & WRITE_BACK) && reg_ar == reg_map[base]) { 1404 SLJIT_ASSERT(!(flags & LOAD_DATA) && reg_map[TMP_REG1] != reg_ar); 1405 FAIL_IF(ADD(TMP_REG1_mapped, reg_ar, ZERO)); 1406 reg_ar = TMP_REG1_mapped; 1407 } 1408 1409 /* Using the cache. */ 1410 if (argw == compiler->cache_argw) { 1411 if (!(flags & WRITE_BACK)) { 1412 if (arg == compiler->cache_arg) { 1413 if (flags & LOAD_DATA) 1414 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped); 1415 else 1416 return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar); 1417 } 1418 1419 if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) { 1420 if (arg == next_arg && argw == (next_argw & 0x3)) { 1421 compiler->cache_arg = arg; 1422 compiler->cache_argw = argw; 1423 FAIL_IF(ADD(TMP_REG3_mapped, reg_map[base], TMP_REG3_mapped)); 1424 if (flags & LOAD_DATA) 1425 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped); 1426 else 1427 return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar); 1428 } 1429 1430 FAIL_IF(ADD(tmp_ar, reg_map[base], TMP_REG3_mapped)); 1431 if (flags & LOAD_DATA) 1432 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar); 1433 else 1434 return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar); 1435 } 1436 } else { 1437 if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) { 1438 FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped)); 1439 if (flags & LOAD_DATA) 1440 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]); 1441 else 1442 return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar); 1443 } 1444 } 1445 } 1446 1447 if (SLJIT_UNLIKELY(argw)) { 1448 compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK); 1449 compiler->cache_argw = argw; 1450 FAIL_IF(SHLI(TMP_REG3_mapped, reg_map[OFFS_REG(arg)], argw)); 1451 } 1452 1453 if (!(flags & WRITE_BACK)) { 1454 if (arg == next_arg && argw == (next_argw & 0x3)) { 1455 compiler->cache_arg = arg; 1456 compiler->cache_argw = argw; 1457 FAIL_IF(ADD(TMP_REG3_mapped, reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3])); 1458 tmp_ar = TMP_REG3_mapped; 1459 } else 1460 FAIL_IF(ADD(tmp_ar, reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3])); 1461 1462 if (flags & LOAD_DATA) 1463 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar); 1464 else 1465 return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar); 1466 } 1467 1468 FAIL_IF(ADD(reg_map[base], reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3])); 1469 1470 if (flags & LOAD_DATA) 1471 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]); 1472 else 1473 return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar); 1474 } 1475 1476 if (SLJIT_UNLIKELY(flags & WRITE_BACK) && base) { 1477 /* Update only applies if a base register exists. */ 1478 if (reg_ar == reg_map[base]) { 1479 SLJIT_ASSERT(!(flags & LOAD_DATA) && TMP_REG1_mapped != reg_ar); 1480 if (argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) { 1481 FAIL_IF(ADDLI(ADDR_TMP_mapped, reg_map[base], argw)); 1482 if (flags & LOAD_DATA) 1483 FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, ADDR_TMP_mapped)); 1484 else 1485 FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], ADDR_TMP_mapped, reg_ar)); 1486 1487 if (argw) 1488 return ADDLI(reg_map[base], reg_map[base], argw); 1489 1490 return SLJIT_SUCCESS; 1491 } 1492 1493 FAIL_IF(ADD(TMP_REG1_mapped, reg_ar, ZERO)); 1494 reg_ar = TMP_REG1_mapped; 1495 } 1496 1497 if (argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) { 1498 if (argw) 1499 FAIL_IF(ADDLI(reg_map[base], reg_map[base], argw)); 1500 } else { 1501 if (compiler->cache_arg == SLJIT_MEM 1502 && argw - compiler->cache_argw <= SIMM_16BIT_MAX 1503 && argw - compiler->cache_argw >= SIMM_16BIT_MIN) { 1504 if (argw != compiler->cache_argw) { 1505 FAIL_IF(ADD(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw)); 1506 compiler->cache_argw = argw; 1507 } 1508 1509 FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped)); 1510 } else { 1511 compiler->cache_arg = SLJIT_MEM; 1512 compiler->cache_argw = argw; 1513 FAIL_IF(load_immediate(compiler, TMP_REG3_mapped, argw)); 1514 FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped)); 1515 } 1516 } 1517 1518 if (flags & LOAD_DATA) 1519 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]); 1520 else 1521 return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar); 1522 } 1523 1524 if (compiler->cache_arg == arg 1525 && argw - compiler->cache_argw <= SIMM_16BIT_MAX 1526 && argw - compiler->cache_argw >= SIMM_16BIT_MIN) { 1527 if (argw != compiler->cache_argw) { 1528 FAIL_IF(ADDLI(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw)); 1529 compiler->cache_argw = argw; 1530 } 1531 1532 if (flags & LOAD_DATA) 1533 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped); 1534 else 1535 return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar); 1536 } 1537 1538 if (compiler->cache_arg == SLJIT_MEM 1539 && argw - compiler->cache_argw <= SIMM_16BIT_MAX 1540 && argw - compiler->cache_argw >= SIMM_16BIT_MIN) { 1541 if (argw != compiler->cache_argw) 1542 FAIL_IF(ADDLI(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw)); 1543 } else { 1544 compiler->cache_arg = SLJIT_MEM; 1545 FAIL_IF(load_immediate(compiler, TMP_REG3_mapped, argw)); 1546 } 1547 1548 compiler->cache_argw = argw; 1549 1550 if (!base) { 1551 if (flags & LOAD_DATA) 1552 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped); 1553 else 1554 return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar); 1555 } 1556 1557 if (arg == next_arg 1558 && next_argw - argw <= SIMM_16BIT_MAX 1559 && next_argw - argw >= SIMM_16BIT_MIN) { 1560 compiler->cache_arg = arg; 1561 FAIL_IF(ADD(TMP_REG3_mapped, TMP_REG3_mapped, reg_map[base])); 1562 if (flags & LOAD_DATA) 1563 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped); 1564 else 1565 return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar); 1566 } 1567 1568 FAIL_IF(ADD(tmp_ar, TMP_REG3_mapped, reg_map[base])); 1569 1570 if (flags & LOAD_DATA) 1571 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar); 1572 else 1573 return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar); 1574 } 1575 1576 static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw) 1577 { 1578 if (getput_arg_fast(compiler, flags, reg_ar, arg, argw)) 1579 return compiler->error; 1580 1581 compiler->cache_arg = 0; 1582 compiler->cache_argw = 0; 1583 return getput_arg(compiler, flags, reg_ar, arg, argw, 0, 0); 1584 } 1585 1586 static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w) 1587 { 1588 if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) 1589 return compiler->error; 1590 return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); 1591 } 1592 1593 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) 1594 { 1595 CHECK_ERROR(); 1596 check_sljit_emit_fast_enter(compiler, dst, dstw); 1597 ADJUST_LOCAL_OFFSET(dst, dstw); 1598 1599 /* For UNUSED dst. Uncommon, but possible. */ 1600 if (dst == SLJIT_UNUSED) 1601 return SLJIT_SUCCESS; 1602 1603 if (FAST_IS_REG(dst)) 1604 return ADD(reg_map[dst], RA, ZERO); 1605 1606 /* Memory. */ 1607 return emit_op_mem(compiler, WORD_DATA, RA, dst, dstw); 1608 } 1609 1610 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) 1611 { 1612 CHECK_ERROR(); 1613 check_sljit_emit_fast_return(compiler, src, srcw); 1614 ADJUST_LOCAL_OFFSET(src, srcw); 1615 1616 if (FAST_IS_REG(src)) 1617 FAIL_IF(ADD(RA, reg_map[src], ZERO)); 1618 1619 else if (src & SLJIT_MEM) 1620 FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RA, src, srcw)); 1621 1622 else if (src & SLJIT_IMM) 1623 FAIL_IF(load_immediate(compiler, RA, srcw)); 1624 1625 return JR(RA); 1626 } 1627 1628 static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, sljit_si dst, sljit_si src1, sljit_sw src2) 1629 { 1630 sljit_si overflow_ra = 0; 1631 1632 switch (GET_OPCODE(op)) { 1633 case SLJIT_MOV: 1634 case SLJIT_MOV_P: 1635 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); 1636 if (dst != src2) 1637 return ADD(reg_map[dst], reg_map[src2], ZERO); 1638 return SLJIT_SUCCESS; 1639 1640 case SLJIT_MOV_UI: 1641 case SLJIT_MOV_SI: 1642 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); 1643 if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { 1644 if (op == SLJIT_MOV_SI) 1645 return BFEXTS(reg_map[dst], reg_map[src2], 0, 31); 1646 1647 return BFEXTU(reg_map[dst], reg_map[src2], 0, 31); 1648 } else if (dst != src2) 1649 SLJIT_ASSERT_STOP(); 1650 1651 return SLJIT_SUCCESS; 1652 1653 case SLJIT_MOV_UB: 1654 case SLJIT_MOV_SB: 1655 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); 1656 if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { 1657 if (op == SLJIT_MOV_SB) 1658 return BFEXTS(reg_map[dst], reg_map[src2], 0, 7); 1659 1660 return BFEXTU(reg_map[dst], reg_map[src2], 0, 7); 1661 } else if (dst != src2) 1662 SLJIT_ASSERT_STOP(); 1663 1664 return SLJIT_SUCCESS; 1665 1666 case SLJIT_MOV_UH: 1667 case SLJIT_MOV_SH: 1668 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); 1669 if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { 1670 if (op == SLJIT_MOV_SH) 1671 return BFEXTS(reg_map[dst], reg_map[src2], 0, 15); 1672 1673 return BFEXTU(reg_map[dst], reg_map[src2], 0, 15); 1674 } else if (dst != src2) 1675 SLJIT_ASSERT_STOP(); 1676 1677 return SLJIT_SUCCESS; 1678 1679 case SLJIT_NOT: 1680 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); 1681 if (op & SLJIT_SET_E) 1682 FAIL_IF(NOR(EQUAL_FLAG, reg_map[src2], reg_map[src2])); 1683 if (CHECK_FLAGS(SLJIT_SET_E)) 1684 FAIL_IF(NOR(reg_map[dst], reg_map[src2], reg_map[src2])); 1685 1686 return SLJIT_SUCCESS; 1687 1688 case SLJIT_CLZ: 1689 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); 1690 if (op & SLJIT_SET_E) 1691 FAIL_IF(CLZ(EQUAL_FLAG, reg_map[src2])); 1692 if (CHECK_FLAGS(SLJIT_SET_E)) 1693 FAIL_IF(CLZ(reg_map[dst], reg_map[src2])); 1694 1695 return SLJIT_SUCCESS; 1696 1697 case SLJIT_ADD: 1698 if (flags & SRC2_IMM) { 1699 if (op & SLJIT_SET_O) { 1700 FAIL_IF(SHRUI(TMP_EREG1, reg_map[src1], 63)); 1701 if (src2 < 0) 1702 FAIL_IF(XORI(TMP_EREG1, TMP_EREG1, 1)); 1703 } 1704 1705 if (op & SLJIT_SET_E) 1706 FAIL_IF(ADDLI(EQUAL_FLAG, reg_map[src1], src2)); 1707 1708 if (op & SLJIT_SET_C) { 1709 if (src2 >= 0) 1710 FAIL_IF(ORI(ULESS_FLAG ,reg_map[src1], src2)); 1711 else { 1712 FAIL_IF(ADDLI(ULESS_FLAG ,ZERO, src2)); 1713 FAIL_IF(OR(ULESS_FLAG,reg_map[src1],ULESS_FLAG)); 1714 } 1715 } 1716 1717 /* dst may be the same as src1 or src2. */ 1718 if (CHECK_FLAGS(SLJIT_SET_E)) 1719 FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], src2)); 1720 1721 if (op & SLJIT_SET_O) { 1722 FAIL_IF(SHRUI(OVERFLOW_FLAG, reg_map[dst], 63)); 1723 1724 if (src2 < 0) 1725 FAIL_IF(XORI(OVERFLOW_FLAG, OVERFLOW_FLAG, 1)); 1726 } 1727 } else { 1728 if (op & SLJIT_SET_O) { 1729 FAIL_IF(XOR(TMP_EREG1, reg_map[src1], reg_map[src2])); 1730 FAIL_IF(SHRUI(TMP_EREG1, TMP_EREG1, 63)); 1731 1732 if (src1 != dst) 1733 overflow_ra = reg_map[src1]; 1734 else if (src2 != dst) 1735 overflow_ra = reg_map[src2]; 1736 else { 1737 /* Rare ocasion. */ 1738 FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO)); 1739 overflow_ra = TMP_EREG2; 1740 } 1741 } 1742 1743 if (op & SLJIT_SET_E) 1744 FAIL_IF(ADD(EQUAL_FLAG ,reg_map[src1], reg_map[src2])); 1745 1746 if (op & SLJIT_SET_C) 1747 FAIL_IF(OR(ULESS_FLAG,reg_map[src1], reg_map[src2])); 1748 1749 /* dst may be the same as src1 or src2. */ 1750 if (CHECK_FLAGS(SLJIT_SET_E)) 1751 FAIL_IF(ADD(reg_map[dst],reg_map[src1], reg_map[src2])); 1752 1753 if (op & SLJIT_SET_O) { 1754 FAIL_IF(XOR(OVERFLOW_FLAG,reg_map[dst], overflow_ra)); 1755 FAIL_IF(SHRUI(OVERFLOW_FLAG, OVERFLOW_FLAG, 63)); 1756 } 1757 } 1758 1759 /* a + b >= a | b (otherwise, the carry should be set to 1). */ 1760 if (op & SLJIT_SET_C) 1761 FAIL_IF(CMPLTU(ULESS_FLAG ,reg_map[dst] ,ULESS_FLAG)); 1762 1763 if (op & SLJIT_SET_O) 1764 return CMOVNEZ(OVERFLOW_FLAG, TMP_EREG1, ZERO); 1765 1766 return SLJIT_SUCCESS; 1767 1768 case SLJIT_ADDC: 1769 if (flags & SRC2_IMM) { 1770 if (op & SLJIT_SET_C) { 1771 if (src2 >= 0) 1772 FAIL_IF(ORI(TMP_EREG1, reg_map[src1], src2)); 1773 else { 1774 FAIL_IF(ADDLI(TMP_EREG1, ZERO, src2)); 1775 FAIL_IF(OR(TMP_EREG1, reg_map[src1], TMP_EREG1)); 1776 } 1777 } 1778 1779 FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], src2)); 1780 1781 } else { 1782 if (op & SLJIT_SET_C) 1783 FAIL_IF(OR(TMP_EREG1, reg_map[src1], reg_map[src2])); 1784 1785 /* dst may be the same as src1 or src2. */ 1786 FAIL_IF(ADD(reg_map[dst], reg_map[src1], reg_map[src2])); 1787 } 1788 1789 if (op & SLJIT_SET_C) 1790 FAIL_IF(CMPLTU(TMP_EREG1, reg_map[dst], TMP_EREG1)); 1791 1792 FAIL_IF(ADD(reg_map[dst], reg_map[dst], ULESS_FLAG)); 1793 1794 if (!(op & SLJIT_SET_C)) 1795 return SLJIT_SUCCESS; 1796 1797 /* Set TMP_EREG2 (dst == 0) && (ULESS_FLAG == 1). */ 1798 FAIL_IF(CMPLTUI(TMP_EREG2, reg_map[dst], 1)); 1799 FAIL_IF(AND(TMP_EREG2, TMP_EREG2, ULESS_FLAG)); 1800 /* Set carry flag. */ 1801 return OR(ULESS_FLAG, TMP_EREG2, TMP_EREG1); 1802 1803 case SLJIT_SUB: 1804 if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_16BIT_MIN)) { 1805 FAIL_IF(ADDLI(TMP_REG2_mapped, ZERO, src2)); 1806 src2 = TMP_REG2; 1807 flags &= ~SRC2_IMM; 1808 } 1809 1810 if (flags & SRC2_IMM) { 1811 if (op & SLJIT_SET_O) { 1812 FAIL_IF(SHRUI(TMP_EREG1,reg_map[src1], 63)); 1813 1814 if (src2 < 0) 1815 FAIL_IF(XORI(TMP_EREG1, TMP_EREG1, 1)); 1816 1817 if (src1 != dst) 1818 overflow_ra = reg_map[src1]; 1819 else { 1820 /* Rare ocasion. */ 1821 FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO)); 1822 1823 overflow_ra = TMP_EREG2; 1824 } 1825 } 1826 1827 if (op & SLJIT_SET_E) 1828 FAIL_IF(ADDLI(EQUAL_FLAG, reg_map[src1], -src2)); 1829 1830 if (op & SLJIT_SET_C) { 1831 FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, src2)); 1832 FAIL_IF(CMPLTU(ULESS_FLAG, reg_map[src1], ADDR_TMP_mapped)); 1833 } 1834 1835 /* dst may be the same as src1 or src2. */ 1836 if (CHECK_FLAGS(SLJIT_SET_E)) 1837 FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], -src2)); 1838 1839 } else { 1840 1841 if (op & SLJIT_SET_O) { 1842 FAIL_IF(XOR(TMP_EREG1, reg_map[src1], reg_map[src2])); 1843 FAIL_IF(SHRUI(TMP_EREG1, TMP_EREG1, 63)); 1844 1845 if (src1 != dst) 1846 overflow_ra = reg_map[src1]; 1847 else { 1848 /* Rare ocasion. */ 1849 FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO)); 1850 overflow_ra = TMP_EREG2; 1851 } 1852 } 1853 1854 if (op & SLJIT_SET_E) 1855 FAIL_IF(SUB(EQUAL_FLAG, reg_map[src1], reg_map[src2])); 1856 1857 if (op & (SLJIT_SET_U | SLJIT_SET_C)) 1858 FAIL_IF(CMPLTU(ULESS_FLAG, reg_map[src1], reg_map[src2])); 1859 1860 if (op & SLJIT_SET_U) 1861 FAIL_IF(CMPLTU(UGREATER_FLAG, reg_map[src2], reg_map[src1])); 1862 1863 if (op & SLJIT_SET_S) { 1864 FAIL_IF(CMPLTS(LESS_FLAG ,reg_map[src1] ,reg_map[src2])); 1865 FAIL_IF(CMPLTS(GREATER_FLAG ,reg_map[src2] ,reg_map[src1])); 1866 } 1867 1868 /* dst may be the same as src1 or src2. */ 1869 if (CHECK_FLAGS(SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C)) 1870 FAIL_IF(SUB(reg_map[dst], reg_map[src1], reg_map[src2])); 1871 } 1872 1873 if (op & SLJIT_SET_O) { 1874 FAIL_IF(XOR(OVERFLOW_FLAG, reg_map[dst], overflow_ra)); 1875 FAIL_IF(SHRUI(OVERFLOW_FLAG, OVERFLOW_FLAG, 63)); 1876 return CMOVEQZ(OVERFLOW_FLAG, TMP_EREG1, ZERO); 1877 } 1878 1879 return SLJIT_SUCCESS; 1880 1881 case SLJIT_SUBC: 1882 if ((flags & SRC2_IMM) && src2 == SIMM_16BIT_MIN) { 1883 FAIL_IF(ADDLI(TMP_REG2_mapped, ZERO, src2)); 1884 src2 = TMP_REG2; 1885 flags &= ~SRC2_IMM; 1886 } 1887 1888 if (flags & SRC2_IMM) { 1889 if (op & SLJIT_SET_C) { 1890 FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, -src2)); 1891 FAIL_IF(CMPLTU(TMP_EREG1, reg_map[src1], ADDR_TMP_mapped)); 1892 } 1893 1894 /* dst may be the same as src1 or src2. */ 1895 FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], -src2)); 1896 1897 } else { 1898 if (op & SLJIT_SET_C) 1899 FAIL_IF(CMPLTU(TMP_EREG1, reg_map[src1], reg_map[src2])); 1900 /* dst may be the same as src1 or src2. */ 1901 FAIL_IF(SUB(reg_map[dst], reg_map[src1], reg_map[src2])); 1902 } 1903 1904 if (op & SLJIT_SET_C) 1905 FAIL_IF(CMOVEQZ(TMP_EREG1, reg_map[dst], ULESS_FLAG)); 1906 1907 FAIL_IF(SUB(reg_map[dst], reg_map[dst], ULESS_FLAG)); 1908 1909 if (op & SLJIT_SET_C) 1910 FAIL_IF(ADD(ULESS_FLAG, TMP_EREG1, ZERO)); 1911 1912 return SLJIT_SUCCESS; 1913 1914 #define EMIT_LOGICAL(op_imm, op_norm) \ 1915 if (flags & SRC2_IMM) { \ 1916 FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, src2)); \ 1917 if (op & SLJIT_SET_E) \ 1918 FAIL_IF(push_3_buffer( \ 1919 compiler, op_norm, EQUAL_FLAG, reg_map[src1], \ 1920 ADDR_TMP_mapped, __LINE__)); \ 1921 if (CHECK_FLAGS(SLJIT_SET_E)) \ 1922 FAIL_IF(push_3_buffer( \ 1923 compiler, op_norm, reg_map[dst], reg_map[src1], \ 1924 ADDR_TMP_mapped, __LINE__)); \ 1925 } else { \ 1926 if (op & SLJIT_SET_E) \ 1927 FAIL_IF(push_3_buffer( \ 1928 compiler, op_norm, EQUAL_FLAG, reg_map[src1], \ 1929 reg_map[src2], __LINE__)); \ 1930 if (CHECK_FLAGS(SLJIT_SET_E)) \ 1931 FAIL_IF(push_3_buffer( \ 1932 compiler, op_norm, reg_map[dst], reg_map[src1], \ 1933 reg_map[src2], __LINE__)); \ 1934 } 1935 1936 case SLJIT_AND: 1937 EMIT_LOGICAL(TILEGX_OPC_ANDI, TILEGX_OPC_AND); 1938 return SLJIT_SUCCESS; 1939 1940 case SLJIT_OR: 1941 EMIT_LOGICAL(TILEGX_OPC_ORI, TILEGX_OPC_OR); 1942 return SLJIT_SUCCESS; 1943 1944 case SLJIT_XOR: 1945 EMIT_LOGICAL(TILEGX_OPC_XORI, TILEGX_OPC_XOR); 1946 return SLJIT_SUCCESS; 1947 1948 #define EMIT_SHIFT(op_imm, op_norm) \ 1949 if (flags & SRC2_IMM) { \ 1950 if (op & SLJIT_SET_E) \ 1951 FAIL_IF(push_3_buffer( \ 1952 compiler, op_imm, EQUAL_FLAG, reg_map[src1], \ 1953 src2 & 0x3F, __LINE__)); \ 1954 if (CHECK_FLAGS(SLJIT_SET_E)) \ 1955 FAIL_IF(push_3_buffer( \ 1956 compiler, op_imm, reg_map[dst], reg_map[src1], \ 1957 src2 & 0x3F, __LINE__)); \ 1958 } else { \ 1959 if (op & SLJIT_SET_E) \ 1960 FAIL_IF(push_3_buffer( \ 1961 compiler, op_imm, reg_map[dst], reg_map[src1], \ 1962 src2 & 0x3F, __LINE__)); \ 1963 if (CHECK_FLAGS(SLJIT_SET_E)) \ 1964 FAIL_IF(push_3_buffer( \ 1965 compiler, op_norm, reg_map[dst], reg_map[src1], \ 1966 reg_map[src2], __LINE__)); \ 1967 } 1968 1969 case SLJIT_SHL: 1970 EMIT_SHIFT(TILEGX_OPC_SHLI, TILEGX_OPC_SHL); 1971 return SLJIT_SUCCESS; 1972 1973 case SLJIT_LSHR: 1974 EMIT_SHIFT(TILEGX_OPC_SHRUI, TILEGX_OPC_SHRU); 1975 return SLJIT_SUCCESS; 1976 1977 case SLJIT_ASHR: 1978 EMIT_SHIFT(TILEGX_OPC_SHRSI, TILEGX_OPC_SHRS); 1979 return SLJIT_SUCCESS; 1980 } 1981 1982 SLJIT_ASSERT_STOP(); 1983 return SLJIT_SUCCESS; 1984 } 1985 1986 static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, sljit_si dst, sljit_sw dstw, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w) 1987 { 1988 /* arg1 goes to TMP_REG1 or src reg. 1989 arg2 goes to TMP_REG2, imm or src reg. 1990 TMP_REG3 can be used for caching. 1991 result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */ 1992 sljit_si dst_r = TMP_REG2; 1993 sljit_si src1_r; 1994 sljit_sw src2_r = 0; 1995 sljit_si sugg_src2_r = TMP_REG2; 1996 1997 if (!(flags & ALT_KEEP_CACHE)) { 1998 compiler->cache_arg = 0; 1999 compiler->cache_argw = 0; 2000 } 2001 2002 if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { 2003 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM)) 2004 return SLJIT_SUCCESS; 2005 if (GET_FLAGS(op)) 2006 flags |= UNUSED_DEST; 2007 } else if (FAST_IS_REG(dst)) { 2008 dst_r = dst; 2009 flags |= REG_DEST; 2010 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) 2011 sugg_src2_r = dst_r; 2012 } else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1_mapped, dst, dstw)) 2013 flags |= SLOW_DEST; 2014 2015 if (flags & IMM_OP) { 2016 if ((src2 & SLJIT_IMM) && src2w) { 2017 if ((!(flags & LOGICAL_OP) 2018 && (src2w <= SIMM_16BIT_MAX && src2w >= SIMM_16BIT_MIN)) 2019 || ((flags & LOGICAL_OP) && !(src2w & ~UIMM_16BIT_MAX))) { 2020 flags |= SRC2_IMM; 2021 src2_r = src2w; 2022 } 2023 } 2024 2025 if (!(flags & SRC2_IMM) && (flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w) { 2026 if ((!(flags & LOGICAL_OP) 2027 && (src1w <= SIMM_16BIT_MAX && src1w >= SIMM_16BIT_MIN)) 2028 || ((flags & LOGICAL_OP) && !(src1w & ~UIMM_16BIT_MAX))) { 2029 flags |= SRC2_IMM; 2030 src2_r = src1w; 2031 2032 /* And swap arguments. */ 2033 src1 = src2; 2034 src1w = src2w; 2035 src2 = SLJIT_IMM; 2036 /* src2w = src2_r unneeded. */ 2037 } 2038 } 2039 } 2040 2041 /* Source 1. */ 2042 if (FAST_IS_REG(src1)) { 2043 src1_r = src1; 2044 flags |= REG1_SOURCE; 2045 } else if (src1 & SLJIT_IMM) { 2046 if (src1w) { 2047 FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, src1w)); 2048 src1_r = TMP_REG1; 2049 } else 2050 src1_r = 0; 2051 } else { 2052 if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w)) 2053 FAIL_IF(compiler->error); 2054 else 2055 flags |= SLOW_SRC1; 2056 src1_r = TMP_REG1; 2057 } 2058 2059 /* Source 2. */ 2060 if (FAST_IS_REG(src2)) { 2061 src2_r = src2; 2062 flags |= REG2_SOURCE; 2063 if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) 2064 dst_r = src2_r; 2065 } else if (src2 & SLJIT_IMM) { 2066 if (!(flags & SRC2_IMM)) { 2067 if (src2w) { 2068 FAIL_IF(load_immediate(compiler, reg_map[sugg_src2_r], src2w)); 2069 src2_r = sugg_src2_r; 2070 } else { 2071 src2_r = 0; 2072 if ((op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) && (dst & SLJIT_MEM)) 2073 dst_r = 0; 2074 } 2075 } 2076 } else { 2077 if (getput_arg_fast(compiler, flags | LOAD_DATA, reg_map[sugg_src2_r], src2, src2w)) 2078 FAIL_IF(compiler->error); 2079 else 2080 flags |= SLOW_SRC2; 2081 src2_r = sugg_src2_r; 2082 } 2083 2084 if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { 2085 SLJIT_ASSERT(src2_r == TMP_REG2); 2086 if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { 2087 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2_mapped, src2, src2w, src1, src1w)); 2088 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, dst, dstw)); 2089 } else { 2090 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, src2, src2w)); 2091 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2_mapped, src2, src2w, dst, dstw)); 2092 } 2093 } else if (flags & SLOW_SRC1) 2094 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, dst, dstw)); 2095 else if (flags & SLOW_SRC2) 2096 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, reg_map[sugg_src2_r], src2, src2w, dst, dstw)); 2097 2098 FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); 2099 2100 if (dst & SLJIT_MEM) { 2101 if (!(flags & SLOW_DEST)) { 2102 getput_arg_fast(compiler, flags, reg_map[dst_r], dst, dstw); 2103 return compiler->error; 2104 } 2105 2106 return getput_arg(compiler, flags, reg_map[dst_r], dst, dstw, 0, 0); 2107 } 2108 2109 return SLJIT_SUCCESS; 2110 } 2111 2112 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw, sljit_si type) 2113 { 2114 sljit_si sugg_dst_ar, dst_ar; 2115 sljit_si flags = GET_ALL_FLAGS(op); 2116 2117 CHECK_ERROR(); 2118 check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type); 2119 ADJUST_LOCAL_OFFSET(dst, dstw); 2120 2121 if (dst == SLJIT_UNUSED) 2122 return SLJIT_SUCCESS; 2123 2124 op = GET_OPCODE(op); 2125 sugg_dst_ar = reg_map[(op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2]; 2126 2127 compiler->cache_arg = 0; 2128 compiler->cache_argw = 0; 2129 if (op >= SLJIT_ADD && (src & SLJIT_MEM)) { 2130 ADJUST_LOCAL_OFFSET(src, srcw); 2131 FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1_mapped, src, srcw, dst, dstw)); 2132 src = TMP_REG1; 2133 srcw = 0; 2134 } 2135 2136 switch (type) { 2137 case SLJIT_C_EQUAL: 2138 case SLJIT_C_NOT_EQUAL: 2139 FAIL_IF(CMPLTUI(sugg_dst_ar, EQUAL_FLAG, 1)); 2140 dst_ar = sugg_dst_ar; 2141 break; 2142 case SLJIT_C_LESS: 2143 case SLJIT_C_GREATER_EQUAL: 2144 case SLJIT_C_FLOAT_LESS: 2145 case SLJIT_C_FLOAT_GREATER_EQUAL: 2146 dst_ar = ULESS_FLAG; 2147 break; 2148 case SLJIT_C_GREATER: 2149 case SLJIT_C_LESS_EQUAL: 2150 case SLJIT_C_FLOAT_GREATER: 2151 case SLJIT_C_FLOAT_LESS_EQUAL: 2152 dst_ar = UGREATER_FLAG; 2153 break; 2154 case SLJIT_C_SIG_LESS: 2155 case SLJIT_C_SIG_GREATER_EQUAL: 2156 dst_ar = LESS_FLAG; 2157 break; 2158 case SLJIT_C_SIG_GREATER: 2159 case SLJIT_C_SIG_LESS_EQUAL: 2160 dst_ar = GREATER_FLAG; 2161 break; 2162 case SLJIT_C_OVERFLOW: 2163 case SLJIT_C_NOT_OVERFLOW: 2164 dst_ar = OVERFLOW_FLAG; 2165 break; 2166 case SLJIT_C_MUL_OVERFLOW: 2167 case SLJIT_C_MUL_NOT_OVERFLOW: 2168 FAIL_IF(CMPLTUI(sugg_dst_ar, OVERFLOW_FLAG, 1)); 2169 dst_ar = sugg_dst_ar; 2170 type ^= 0x1; /* Flip type bit for the XORI below. */ 2171 break; 2172 case SLJIT_C_FLOAT_EQUAL: 2173 case SLJIT_C_FLOAT_NOT_EQUAL: 2174 dst_ar = EQUAL_FLAG; 2175 break; 2176 2177 default: 2178 SLJIT_ASSERT_STOP(); 2179 dst_ar = sugg_dst_ar; 2180 break; 2181 } 2182 2183 if (type & 0x1) { 2184 FAIL_IF(XORI(sugg_dst_ar, dst_ar, 1)); 2185 dst_ar = sugg_dst_ar; 2186 } 2187 2188 if (op >= SLJIT_ADD) { 2189 if (TMP_REG2_mapped != dst_ar) 2190 FAIL_IF(ADD(TMP_REG2_mapped, dst_ar, ZERO)); 2191 return emit_op(compiler, op | flags, CUMULATIVE_OP | LOGICAL_OP | IMM_OP | ALT_KEEP_CACHE, dst, dstw, src, srcw, TMP_REG2, 0); 2192 } 2193 2194 if (dst & SLJIT_MEM) 2195 return emit_op_mem(compiler, WORD_DATA, dst_ar, dst, dstw); 2196 2197 if (sugg_dst_ar != dst_ar) 2198 return ADD(sugg_dst_ar, dst_ar, ZERO); 2199 2200 return SLJIT_SUCCESS; 2201 } 2202 2203 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) { 2204 CHECK_ERROR(); 2205 check_sljit_emit_op0(compiler, op); 2206 2207 op = GET_OPCODE(op); 2208 switch (op) { 2209 case SLJIT_NOP: 2210 return push_0_buffer(compiler, TILEGX_OPC_FNOP, __LINE__); 2211 2212 case SLJIT_BREAKPOINT: 2213 return PI(BPT); 2214 2215 case SLJIT_UMUL: 2216 case SLJIT_SMUL: 2217 case SLJIT_UDIV: 2218 case SLJIT_SDIV: 2219 SLJIT_ASSERT_STOP(); 2220 } 2221 2222 return SLJIT_SUCCESS; 2223 } 2224 2225 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw) 2226 { 2227 CHECK_ERROR(); 2228 check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw); 2229 ADJUST_LOCAL_OFFSET(dst, dstw); 2230 ADJUST_LOCAL_OFFSET(src, srcw); 2231 2232 switch (GET_OPCODE(op)) { 2233 case SLJIT_MOV: 2234 case SLJIT_MOV_P: 2235 return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); 2236 2237 case SLJIT_MOV_UI: 2238 return emit_op(compiler, SLJIT_MOV_UI, INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); 2239 2240 case SLJIT_MOV_SI: 2241 return emit_op(compiler, SLJIT_MOV_SI, INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw); 2242 2243 case SLJIT_MOV_UB: 2244 return emit_op(compiler, SLJIT_MOV_UB, BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub) srcw : srcw); 2245 2246 case SLJIT_MOV_SB: 2247 return emit_op(compiler, SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb) srcw : srcw); 2248 2249 case SLJIT_MOV_UH: 2250 return emit_op(compiler, SLJIT_MOV_UH, HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh) srcw : srcw); 2251 2252 case SLJIT_MOV_SH: 2253 return emit_op(compiler, SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh) srcw : srcw); 2254 2255 case SLJIT_MOVU: 2256 case SLJIT_MOVU_P: 2257 return emit_op(compiler, SLJIT_MOV, WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); 2258 2259 case SLJIT_MOVU_UI: 2260 return emit_op(compiler, SLJIT_MOV_UI, INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); 2261 2262 case SLJIT_MOVU_SI: 2263 return emit_op(compiler, SLJIT_MOV_SI, INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); 2264 2265 case SLJIT_MOVU_UB: 2266 return emit_op(compiler, SLJIT_MOV_UB, BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub) srcw : srcw); 2267 2268 case SLJIT_MOVU_SB: 2269 return emit_op(compiler, SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb) srcw : srcw); 2270 2271 case SLJIT_MOVU_UH: 2272 return emit_op(compiler, SLJIT_MOV_UH, HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh) srcw : srcw); 2273 2274 case SLJIT_MOVU_SH: 2275 return emit_op(compiler, SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh) srcw : srcw); 2276 2277 case SLJIT_NOT: 2278 return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw); 2279 2280 case SLJIT_NEG: 2281 return emit_op(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw); 2282 2283 case SLJIT_CLZ: 2284 return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw); 2285 } 2286 2287 return SLJIT_SUCCESS; 2288 } 2289 2290 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w) 2291 { 2292 CHECK_ERROR(); 2293 check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w); 2294 ADJUST_LOCAL_OFFSET(dst, dstw); 2295 ADJUST_LOCAL_OFFSET(src1, src1w); 2296 ADJUST_LOCAL_OFFSET(src2, src2w); 2297 2298 switch (GET_OPCODE(op)) { 2299 case SLJIT_ADD: 2300 case SLJIT_ADDC: 2301 return emit_op(compiler, op, CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); 2302 2303 case SLJIT_SUB: 2304 case SLJIT_SUBC: 2305 return emit_op(compiler, op, IMM_OP, dst, dstw, src1, src1w, src2, src2w); 2306 2307 case SLJIT_MUL: 2308 return emit_op(compiler, op, CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w); 2309 2310 case SLJIT_AND: 2311 case SLJIT_OR: 2312 case SLJIT_XOR: 2313 return emit_op(compiler, op, CUMULATIVE_OP | LOGICAL_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); 2314 2315 case SLJIT_SHL: 2316 case SLJIT_LSHR: 2317 case SLJIT_ASHR: 2318 if (src2 & SLJIT_IMM) 2319 src2w &= 0x3f; 2320 if (op & SLJIT_INT_OP) 2321 src2w &= 0x1f; 2322 2323 return emit_op(compiler, op, IMM_OP, dst, dstw, src1, src1w, src2, src2w); 2324 } 2325 2326 return SLJIT_SUCCESS; 2327 } 2328 2329 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label * sljit_emit_label(struct sljit_compiler *compiler) 2330 { 2331 struct sljit_label *label; 2332 2333 flush_buffer(compiler); 2334 2335 CHECK_ERROR_PTR(); 2336 check_sljit_emit_label(compiler); 2337 2338 if (compiler->last_label && compiler->last_label->size == compiler->size) 2339 return compiler->last_label; 2340 2341 label = (struct sljit_label *)ensure_abuf(compiler, sizeof(struct sljit_label)); 2342 PTR_FAIL_IF(!label); 2343 set_label(label, compiler); 2344 return label; 2345 } 2346 2347 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) 2348 { 2349 sljit_si src_r = TMP_REG2; 2350 struct sljit_jump *jump = NULL; 2351 2352 flush_buffer(compiler); 2353 2354 CHECK_ERROR(); 2355 check_sljit_emit_ijump(compiler, type, src, srcw); 2356 ADJUST_LOCAL_OFFSET(src, srcw); 2357 2358 if (FAST_IS_REG(src)) { 2359 if (reg_map[src] != 0) 2360 src_r = src; 2361 else 2362 FAIL_IF(ADD_SOLO(TMP_REG2_mapped, reg_map[src], ZERO)); 2363 } 2364 2365 if (type >= SLJIT_CALL0) { 2366 SLJIT_ASSERT(reg_map[PIC_ADDR_REG] == 16 && PIC_ADDR_REG == TMP_REG2); 2367 if (src & (SLJIT_IMM | SLJIT_MEM)) { 2368 if (src & SLJIT_IMM) 2369 FAIL_IF(emit_const(compiler, reg_map[PIC_ADDR_REG], srcw, 1)); 2370 else { 2371 SLJIT_ASSERT(src_r == TMP_REG2 && (src & SLJIT_MEM)); 2372 FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw)); 2373 } 2374 2375 FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_SCRATCH_REG1], ZERO)); 2376 2377 FAIL_IF(ADDI_SOLO(54, 54, -16)); 2378 2379 FAIL_IF(JALR_SOLO(reg_map[PIC_ADDR_REG])); 2380 2381 return ADDI_SOLO(54, 54, 16); 2382 } 2383 2384 /* Register input. */ 2385 if (type >= SLJIT_CALL1) 2386 FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_SCRATCH_REG1], ZERO)); 2387 2388 FAIL_IF(ADD_SOLO(reg_map[PIC_ADDR_REG], reg_map[src_r], ZERO)); 2389 2390 FAIL_IF(ADDI_SOLO(54, 54, -16)); 2391 2392 FAIL_IF(JALR_SOLO(reg_map[src_r])); 2393 2394 return ADDI_SOLO(54, 54, 16); 2395 } 2396 2397 if (src & SLJIT_IMM) { 2398 jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump)); 2399 FAIL_IF(!jump); 2400 set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_JAL : 0)); 2401 jump->u.target = srcw; 2402 FAIL_IF(emit_const(compiler, TMP_REG2_mapped, 0, 1)); 2403 2404 if (type >= SLJIT_FAST_CALL) { 2405 FAIL_IF(ADD_SOLO(ZERO, ZERO, ZERO)); 2406 jump->addr = compiler->size; 2407 FAIL_IF(JR_SOLO(reg_map[src_r])); 2408 } else { 2409 jump->addr = compiler->size; 2410 FAIL_IF(JR_SOLO(reg_map[src_r])); 2411 } 2412 2413 return SLJIT_SUCCESS; 2414 2415 } else if (src & SLJIT_MEM) 2416 FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw)); 2417 2418 FAIL_IF(JR_SOLO(reg_map[src_r])); 2419 2420 if (jump) 2421 jump->addr = compiler->size; 2422 2423 return SLJIT_SUCCESS; 2424 } 2425 2426 #define BR_Z(src) \ 2427 inst = BEQZ_X1 | SRCA_X1(src); \ 2428 flags = IS_COND; 2429 2430 #define BR_NZ(src) \ 2431 inst = BNEZ_X1 | SRCA_X1(src); \ 2432 flags = IS_COND; 2433 2434 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump * sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) 2435 { 2436 struct sljit_jump *jump; 2437 sljit_ins inst; 2438 sljit_si flags = 0; 2439 2440 flush_buffer(compiler); 2441 2442 CHECK_ERROR_PTR(); 2443 check_sljit_emit_jump(compiler, type); 2444 2445 jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump)); 2446 PTR_FAIL_IF(!jump); 2447 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); 2448 type &= 0xff; 2449 2450 switch (type) { 2451 case SLJIT_C_EQUAL: 2452 case SLJIT_C_FLOAT_NOT_EQUAL: 2453 BR_NZ(EQUAL_FLAG); 2454 break; 2455 case SLJIT_C_NOT_EQUAL: 2456 case SLJIT_C_FLOAT_EQUAL: 2457 BR_Z(EQUAL_FLAG); 2458 break; 2459 case SLJIT_C_LESS: 2460 case SLJIT_C_FLOAT_LESS: 2461 BR_Z(ULESS_FLAG); 2462 break; 2463 case SLJIT_C_GREATER_EQUAL: 2464 case SLJIT_C_FLOAT_GREATER_EQUAL: 2465 BR_NZ(ULESS_FLAG); 2466 break; 2467 case SLJIT_C_GREATER: 2468 case SLJIT_C_FLOAT_GREATER: 2469 BR_Z(UGREATER_FLAG); 2470 break; 2471 case SLJIT_C_LESS_EQUAL: 2472 case SLJIT_C_FLOAT_LESS_EQUAL: 2473 BR_NZ(UGREATER_FLAG); 2474 break; 2475 case SLJIT_C_SIG_LESS: 2476 BR_Z(LESS_FLAG); 2477 break; 2478 case SLJIT_C_SIG_GREATER_EQUAL: 2479 BR_NZ(LESS_FLAG); 2480 break; 2481 case SLJIT_C_SIG_GREATER: 2482 BR_Z(GREATER_FLAG); 2483 break; 2484 case SLJIT_C_SIG_LESS_EQUAL: 2485 BR_NZ(GREATER_FLAG); 2486 break; 2487 case SLJIT_C_OVERFLOW: 2488 case SLJIT_C_MUL_OVERFLOW: 2489 BR_Z(OVERFLOW_FLAG); 2490 break; 2491 case SLJIT_C_NOT_OVERFLOW: 2492 case SLJIT_C_MUL_NOT_OVERFLOW: 2493 BR_NZ(OVERFLOW_FLAG); 2494 break; 2495 default: 2496 /* Not conditional branch. */ 2497 inst = 0; 2498 break; 2499 } 2500 2501 jump->flags |= flags; 2502 2503 if (inst) { 2504 inst = inst | ((type <= SLJIT_JUMP) ? BOFF_X1(5) : BOFF_X1(6)); 2505 PTR_FAIL_IF(PI(inst)); 2506 } 2507 2508 PTR_FAIL_IF(emit_const(compiler, TMP_REG2_mapped, 0, 1)); 2509 if (type <= SLJIT_JUMP) { 2510 jump->addr = compiler->size; 2511 PTR_FAIL_IF(JR_SOLO(TMP_REG2_mapped)); 2512 } else { 2513 SLJIT_ASSERT(reg_map[PIC_ADDR_REG] == 16 && PIC_ADDR_REG == TMP_REG2); 2514 /* Cannot be optimized out if type is >= CALL0. */ 2515 jump->flags |= IS_JAL | (type >= SLJIT_CALL0 ? SLJIT_REWRITABLE_JUMP : 0); 2516 PTR_FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_SCRATCH_REG1], ZERO)); 2517 jump->addr = compiler->size; 2518 PTR_FAIL_IF(JALR_SOLO(TMP_REG2_mapped)); 2519 } 2520 2521 return jump; 2522 } 2523 2524 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) 2525 { 2526 return 0; 2527 } 2528 2529 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw) 2530 { 2531 SLJIT_ASSERT_STOP(); 2532 } 2533 2534 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w) 2535 { 2536 SLJIT_ASSERT_STOP(); 2537 } 2538 2539 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const * sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) 2540 { 2541 struct sljit_const *const_; 2542 sljit_si reg; 2543 2544 flush_buffer(compiler); 2545 2546 CHECK_ERROR_PTR(); 2547 check_sljit_emit_const(compiler, dst, dstw, init_value); 2548 ADJUST_LOCAL_OFFSET(dst, dstw); 2549 2550 const_ = (struct sljit_const *)ensure_abuf(compiler, sizeof(struct sljit_const)); 2551 PTR_FAIL_IF(!const_); 2552 set_const(const_, compiler); 2553 2554 reg = FAST_IS_REG(dst) ? dst : TMP_REG2; 2555 2556 PTR_FAIL_IF(emit_const_64(compiler, reg, init_value, 1)); 2557 2558 if (dst & SLJIT_MEM) 2559 PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0)); 2560 return const_; 2561 } 2562 2563 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) 2564 { 2565 sljit_ins *inst = (sljit_ins *)addr; 2566 2567 inst[0] = (inst[0] & ~(0xFFFFL << 43)) | (((new_addr >> 32) & 0xffff) << 43); 2568 inst[1] = (inst[1] & ~(0xFFFFL << 43)) | (((new_addr >> 16) & 0xffff) << 43); 2569 inst[2] = (inst[2] & ~(0xFFFFL << 43)) | ((new_addr & 0xffff) << 43); 2570 SLJIT_CACHE_FLUSH(inst, inst + 3); 2571 } 2572 2573 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) 2574 { 2575 sljit_ins *inst = (sljit_ins *)addr; 2576 2577 inst[0] = (inst[0] & ~(0xFFFFL << 43)) | (((new_constant >> 48) & 0xFFFFL) << 43); 2578 inst[1] = (inst[1] & ~(0xFFFFL << 43)) | (((new_constant >> 32) & 0xFFFFL) << 43); 2579 inst[2] = (inst[2] & ~(0xFFFFL << 43)) | (((new_constant >> 16) & 0xFFFFL) << 43); 2580 inst[3] = (inst[3] & ~(0xFFFFL << 43)) | ((new_constant & 0xFFFFL) << 43); 2581 SLJIT_CACHE_FLUSH(inst, inst + 4); 2582 } 2583