xref: /netbsd-src/sys/external/bsd/sljit/dist/sljit_src/sljitNativeARM_T2_32.c (revision b7b7574d3bf8eeb51a1fa3977b59142ec6434a55)
1 /*	$NetBSD: sljitNativeARM_T2_32.c,v 1.2 2014/06/17 19:33:20 alnsn Exp $	*/
2 
3 /*
4  *    Stack-less Just-In-Time compiler
5  *
6  *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without modification, are
9  * permitted provided that the following conditions are met:
10  *
11  *   1. Redistributions of source code must retain the above copyright notice, this list of
12  *      conditions and the following disclaimer.
13  *
14  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
15  *      of conditions and the following disclaimer in the documentation and/or other materials
16  *      provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
19  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
21  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
23  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
30 {
31 	return "ARM-Thumb2" SLJIT_CPUINFO;
32 }
33 
34 /* Length of an instruction word. */
35 typedef sljit_ui sljit_ins;
36 
37 /* Last register + 1. */
38 #define TMP_REG1	(SLJIT_NO_REGISTERS + 1)
39 #define TMP_REG2	(SLJIT_NO_REGISTERS + 2)
40 #define TMP_REG3	(SLJIT_NO_REGISTERS + 3)
41 #define TMP_PC		(SLJIT_NO_REGISTERS + 4)
42 
43 #define TMP_FREG1	(0)
44 #define TMP_FREG2	(SLJIT_FLOAT_REG6 + 1)
45 
46 /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
47 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = {
48 	0, 0, 1, 2, 12, 5, 6, 7, 8, 10, 11, 13, 3, 4, 14, 15
49 };
50 
51 #define COPY_BITS(src, from, to, bits) \
52 	((from >= to ? (src >> (from - to)) : (src << (to - from))) & (((1 << bits) - 1) << to))
53 
54 /* Thumb16 encodings. */
55 #define RD3(rd) (reg_map[rd])
56 #define RN3(rn) (reg_map[rn] << 3)
57 #define RM3(rm) (reg_map[rm] << 6)
58 #define RDN3(rdn) (reg_map[rdn] << 8)
59 #define IMM3(imm) (imm << 6)
60 #define IMM8(imm) (imm)
61 
62 /* Thumb16 helpers. */
63 #define SET_REGS44(rd, rn) \
64 	((reg_map[rn] << 3) | (reg_map[rd] & 0x7) | ((reg_map[rd] & 0x8) << 4))
65 #define IS_2_LO_REGS(reg1, reg2) \
66 	(reg_map[reg1] <= 7 && reg_map[reg2] <= 7)
67 #define IS_3_LO_REGS(reg1, reg2, reg3) \
68 	(reg_map[reg1] <= 7 && reg_map[reg2] <= 7 && reg_map[reg3] <= 7)
69 
70 /* Thumb32 encodings. */
71 #define RD4(rd) (reg_map[rd] << 8)
72 #define RN4(rn) (reg_map[rn] << 16)
73 #define RM4(rm) (reg_map[rm])
74 #define RT4(rt) (reg_map[rt] << 12)
75 #define DD4(dd) ((dd) << 12)
76 #define DN4(dn) ((dn) << 16)
77 #define DM4(dm) (dm)
78 #define IMM5(imm) \
79 	(COPY_BITS(imm, 2, 12, 3) | ((imm & 0x3) << 6))
80 #define IMM12(imm) \
81 	(COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff))
82 
83 /* --------------------------------------------------------------------- */
84 /*  Instrucion forms                                                     */
85 /* --------------------------------------------------------------------- */
86 
87 /* dot '.' changed to _
88    I immediate form (possibly followed by number of immediate bits). */
89 #define ADCI		0xf1400000
90 #define ADCS		0x4140
91 #define ADC_W		0xeb400000
92 #define ADD		0x4400
93 #define ADDS		0x1800
94 #define ADDSI3		0x1c00
95 #define ADDSI8		0x3000
96 #define ADD_W		0xeb000000
97 #define ADDWI		0xf2000000
98 #define ADD_SP		0xb000
99 #define ADD_W		0xeb000000
100 #define ADD_WI		0xf1000000
101 #define ANDI		0xf0000000
102 #define ANDS		0x4000
103 #define AND_W		0xea000000
104 #define ASRS		0x4100
105 #define ASRSI		0x1000
106 #define ASR_W		0xfa40f000
107 #define ASR_WI		0xea4f0020
108 #define BICI		0xf0200000
109 #define BKPT		0xbe00
110 #define BLX		0x4780
111 #define BX		0x4700
112 #define CLZ		0xfab0f080
113 #define CMPI		0x2800
114 #define CMP_W		0xebb00f00
115 #define EORI		0xf0800000
116 #define EORS		0x4040
117 #define EOR_W		0xea800000
118 #define IT		0xbf00
119 #define LSLS		0x4080
120 #define LSLSI		0x0000
121 #define LSL_W		0xfa00f000
122 #define LSL_WI		0xea4f0000
123 #define LSRS		0x40c0
124 #define LSRSI		0x0800
125 #define LSR_W		0xfa20f000
126 #define LSR_WI		0xea4f0010
127 #define MOV		0x4600
128 #define MOVS		0x0000
129 #define MOVSI		0x2000
130 #define MOVT		0xf2c00000
131 #define MOVW		0xf2400000
132 #define MOV_W		0xea4f0000
133 #define MOV_WI		0xf04f0000
134 #define MUL		0xfb00f000
135 #define MVNS		0x43c0
136 #define MVN_W		0xea6f0000
137 #define MVN_WI		0xf06f0000
138 #define NOP		0xbf00
139 #define ORNI		0xf0600000
140 #define ORRI		0xf0400000
141 #define ORRS		0x4300
142 #define ORR_W		0xea400000
143 #define POP		0xbd00
144 #define POP_W		0xe8bd0000
145 #define PUSH		0xb500
146 #define PUSH_W		0xe92d0000
147 #define RSB_WI		0xf1c00000
148 #define RSBSI		0x4240
149 #define SBCI		0xf1600000
150 #define SBCS		0x4180
151 #define SBC_W		0xeb600000
152 #define SMULL		0xfb800000
153 #define STR_SP		0x9000
154 #define SUBS		0x1a00
155 #define SUBSI3		0x1e00
156 #define SUBSI8		0x3800
157 #define SUB_W		0xeba00000
158 #define SUBWI		0xf2a00000
159 #define SUB_SP		0xb080
160 #define SUB_WI		0xf1a00000
161 #define SXTB		0xb240
162 #define SXTB_W		0xfa4ff080
163 #define SXTH		0xb200
164 #define SXTH_W		0xfa0ff080
165 #define TST		0x4200
166 #define UMULL		0xfba00000
167 #define UXTB		0xb2c0
168 #define UXTB_W		0xfa5ff080
169 #define UXTH		0xb280
170 #define UXTH_W		0xfa1ff080
171 #define VABS_F32	0xeeb00ac0
172 #define VADD_F32	0xee300a00
173 #define VCMP_F32	0xeeb40a40
174 #define VDIV_F32	0xee800a00
175 #define VMOV_F32	0xeeb00a40
176 #define VMRS		0xeef1fa10
177 #define VMUL_F32	0xee200a00
178 #define VNEG_F32	0xeeb10a40
179 #define VSTR_F32	0xed000a00
180 #define VSUB_F32	0xee300a40
181 
182 static sljit_si push_inst16(struct sljit_compiler *compiler, sljit_ins inst)
183 {
184 	sljit_uh *ptr;
185 	SLJIT_ASSERT(!(inst & 0xffff0000));
186 
187 	ptr = (sljit_uh*)ensure_buf(compiler, sizeof(sljit_uh));
188 	FAIL_IF(!ptr);
189 	*ptr = inst;
190 	compiler->size++;
191 	return SLJIT_SUCCESS;
192 }
193 
194 static sljit_si push_inst32(struct sljit_compiler *compiler, sljit_ins inst)
195 {
196 	sljit_uh *ptr = (sljit_uh*)ensure_buf(compiler, sizeof(sljit_ins));
197 	FAIL_IF(!ptr);
198 	*ptr++ = inst >> 16;
199 	*ptr = inst;
200 	compiler->size += 2;
201 	return SLJIT_SUCCESS;
202 }
203 
204 static SLJIT_INLINE sljit_si emit_imm32_const(struct sljit_compiler *compiler, sljit_si dst, sljit_uw imm)
205 {
206 	FAIL_IF(push_inst32(compiler, MOVW | RD4(dst) |
207 		COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff)));
208 	return push_inst32(compiler, MOVT | RD4(dst) |
209 		COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16));
210 }
211 
212 static SLJIT_INLINE void modify_imm32_const(sljit_uh *inst, sljit_uw new_imm)
213 {
214 	sljit_si dst = inst[1] & 0x0f00;
215 	SLJIT_ASSERT(((inst[0] & 0xfbf0) == (MOVW >> 16)) && ((inst[2] & 0xfbf0) == (MOVT >> 16)) && dst == (inst[3] & 0x0f00));
216 	inst[0] = (MOVW >> 16) | COPY_BITS(new_imm, 12, 0, 4) | COPY_BITS(new_imm, 11, 10, 1);
217 	inst[1] = dst | COPY_BITS(new_imm, 8, 12, 3) | (new_imm & 0xff);
218 	inst[2] = (MOVT >> 16) | COPY_BITS(new_imm, 12 + 16, 0, 4) | COPY_BITS(new_imm, 11 + 16, 10, 1);
219 	inst[3] = dst | COPY_BITS(new_imm, 8 + 16, 12, 3) | ((new_imm & 0xff0000) >> 16);
220 }
221 
222 static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_uh *code_ptr, sljit_uh *code)
223 {
224 	sljit_sw diff;
225 
226 	if (jump->flags & SLJIT_REWRITABLE_JUMP)
227 		return 0;
228 
229 	if (jump->flags & JUMP_ADDR) {
230 		/* Branch to ARM code is not optimized yet. */
231 		if (!(jump->u.target & 0x1))
232 			return 0;
233 		diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2)) >> 1;
234 	}
235 	else {
236 		SLJIT_ASSERT(jump->flags & JUMP_LABEL);
237 		diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2)) >> 1;
238 	}
239 
240 	if (jump->flags & IS_COND) {
241 		SLJIT_ASSERT(!(jump->flags & IS_BL));
242 		if (diff <= 127 && diff >= -128) {
243 			jump->flags |= PATCH_TYPE1;
244 			return 5;
245 		}
246 		if (diff <= 524287 && diff >= -524288) {
247 			jump->flags |= PATCH_TYPE2;
248 			return 4;
249 		}
250 		/* +1 comes from the prefix IT instruction. */
251 		diff--;
252 		if (diff <= 8388607 && diff >= -8388608) {
253 			jump->flags |= PATCH_TYPE3;
254 			return 3;
255 		}
256 	}
257 	else if (jump->flags & IS_BL) {
258 		if (diff <= 8388607 && diff >= -8388608) {
259 			jump->flags |= PATCH_BL;
260 			return 3;
261 		}
262 	}
263 	else {
264 		if (diff <= 1023 && diff >= -1024) {
265 			jump->flags |= PATCH_TYPE4;
266 			return 4;
267 		}
268 		if (diff <= 8388607 && diff >= -8388608) {
269 			jump->flags |= PATCH_TYPE5;
270 			return 3;
271 		}
272 	}
273 
274 	return 0;
275 }
276 
277 static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump)
278 {
279 	sljit_si type = (jump->flags >> 4) & 0xf;
280 	sljit_sw diff;
281 	sljit_uh *jump_inst;
282 	sljit_si s, j1, j2;
283 
284 	if (SLJIT_UNLIKELY(type == 0)) {
285 		modify_imm32_const((sljit_uh*)jump->addr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target);
286 		return;
287 	}
288 
289 	if (jump->flags & JUMP_ADDR) {
290 		SLJIT_ASSERT(jump->u.target & 0x1);
291 		diff = ((sljit_sw)jump->u.target - (sljit_sw)(jump->addr + 4)) >> 1;
292 	}
293 	else
294 		diff = ((sljit_sw)(jump->u.label->addr) - (sljit_sw)(jump->addr + 4)) >> 1;
295 	jump_inst = (sljit_uh*)jump->addr;
296 
297 	switch (type) {
298 	case 1:
299 		/* Encoding T1 of 'B' instruction */
300 		SLJIT_ASSERT(diff <= 127 && diff >= -128 && (jump->flags & IS_COND));
301 		jump_inst[0] = 0xd000 | (jump->flags & 0xf00) | (diff & 0xff);
302 		return;
303 	case 2:
304 		/* Encoding T3 of 'B' instruction */
305 		SLJIT_ASSERT(diff <= 524287 && diff >= -524288 && (jump->flags & IS_COND));
306 		jump_inst[0] = 0xf000 | COPY_BITS(jump->flags, 8, 6, 4) | COPY_BITS(diff, 11, 0, 6) | COPY_BITS(diff, 19, 10, 1);
307 		jump_inst[1] = 0x8000 | COPY_BITS(diff, 17, 13, 1) | COPY_BITS(diff, 18, 11, 1) | (diff & 0x7ff);
308 		return;
309 	case 3:
310 		SLJIT_ASSERT(jump->flags & IS_COND);
311 		*jump_inst++ = IT | ((jump->flags >> 4) & 0xf0) | 0x8;
312 		diff--;
313 		type = 5;
314 		break;
315 	case 4:
316 		/* Encoding T2 of 'B' instruction */
317 		SLJIT_ASSERT(diff <= 1023 && diff >= -1024 && !(jump->flags & IS_COND));
318 		jump_inst[0] = 0xe000 | (diff & 0x7ff);
319 		return;
320 	}
321 
322 	SLJIT_ASSERT(diff <= 8388607 && diff >= -8388608);
323 
324 	/* Really complex instruction form for branches. */
325 	s = (diff >> 23) & 0x1;
326 	j1 = (~(diff >> 21) ^ s) & 0x1;
327 	j2 = (~(diff >> 22) ^ s) & 0x1;
328 	jump_inst[0] = 0xf000 | (s << 10) | COPY_BITS(diff, 11, 0, 10);
329 	jump_inst[1] = (j1 << 13) | (j2 << 11) | (diff & 0x7ff);
330 
331 	/* The others have a common form. */
332 	if (type == 5) /* Encoding T4 of 'B' instruction */
333 		jump_inst[1] |= 0x9000;
334 	else if (type == 6) /* Encoding T1 of 'BL' instruction */
335 		jump_inst[1] |= 0xd000;
336 	else
337 		SLJIT_ASSERT_STOP();
338 }
339 
340 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
341 {
342 	struct sljit_memory_fragment *buf;
343 	sljit_uh *code;
344 	sljit_uh *code_ptr;
345 	sljit_uh *buf_ptr;
346 	sljit_uh *buf_end;
347 	sljit_uw half_count;
348 
349 	struct sljit_label *label;
350 	struct sljit_jump *jump;
351 	struct sljit_const *const_;
352 
353 	CHECK_ERROR_PTR();
354 	check_sljit_generate_code(compiler);
355 	reverse_buf(compiler);
356 
357 	code = (sljit_uh*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_uh));
358 	PTR_FAIL_WITH_EXEC_IF(code);
359 	buf = compiler->buf;
360 
361 	code_ptr = code;
362 	half_count = 0;
363 	label = compiler->labels;
364 	jump = compiler->jumps;
365 	const_ = compiler->consts;
366 
367 	do {
368 		buf_ptr = (sljit_uh*)buf->memory;
369 		buf_end = buf_ptr + (buf->used_size >> 1);
370 		do {
371 			*code_ptr = *buf_ptr++;
372 			/* These structures are ordered by their address. */
373 			SLJIT_ASSERT(!label || label->size >= half_count);
374 			SLJIT_ASSERT(!jump || jump->addr >= half_count);
375 			SLJIT_ASSERT(!const_ || const_->addr >= half_count);
376 			if (label && label->size == half_count) {
377 				label->addr = ((sljit_uw)code_ptr) | 0x1;
378 				label->size = code_ptr - code;
379 				label = label->next;
380 			}
381 			if (jump && jump->addr == half_count) {
382 					jump->addr = (sljit_uw)code_ptr - ((jump->flags & IS_COND) ? 10 : 8);
383 					code_ptr -= detect_jump_type(jump, code_ptr, code);
384 					jump = jump->next;
385 			}
386 			if (const_ && const_->addr == half_count) {
387 				const_->addr = (sljit_uw)code_ptr;
388 				const_ = const_->next;
389 			}
390 			code_ptr ++;
391 			half_count ++;
392 		} while (buf_ptr < buf_end);
393 
394 		buf = buf->next;
395 	} while (buf);
396 
397 	if (label && label->size == half_count) {
398 		label->addr = ((sljit_uw)code_ptr) | 0x1;
399 		label->size = code_ptr - code;
400 		label = label->next;
401 	}
402 
403 	SLJIT_ASSERT(!label);
404 	SLJIT_ASSERT(!jump);
405 	SLJIT_ASSERT(!const_);
406 	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
407 
408 	jump = compiler->jumps;
409 	while (jump) {
410 		set_jump_instruction(jump);
411 		jump = jump->next;
412 	}
413 
414 	compiler->error = SLJIT_ERR_COMPILED;
415 	compiler->executable_size = (code_ptr - code) * sizeof(sljit_uh);
416 	SLJIT_CACHE_FLUSH(code, code_ptr);
417 	/* Set thumb mode flag. */
418 	return (void*)((sljit_uw)code | 0x1);
419 }
420 
421 /* --------------------------------------------------------------------- */
422 /*  Core code generator functions.                                       */
423 /* --------------------------------------------------------------------- */
424 
425 #define INVALID_IMM	0x80000000
426 static sljit_uw get_imm(sljit_uw imm)
427 {
428 	/* Thumb immediate form. */
429 	sljit_si counter;
430 
431 	if (imm <= 0xff)
432 		return imm;
433 
434 	if ((imm & 0xffff) == (imm >> 16)) {
435 		/* Some special cases. */
436 		if (!(imm & 0xff00))
437 			return (1 << 12) | (imm & 0xff);
438 		if (!(imm & 0xff))
439 			return (2 << 12) | ((imm >> 8) & 0xff);
440 		if ((imm & 0xff00) == ((imm & 0xff) << 8))
441 			return (3 << 12) | (imm & 0xff);
442 	}
443 
444 	/* Assembly optimization: count leading zeroes? */
445 	counter = 8;
446 	if (!(imm & 0xffff0000)) {
447 		counter += 16;
448 		imm <<= 16;
449 	}
450 	if (!(imm & 0xff000000)) {
451 		counter += 8;
452 		imm <<= 8;
453 	}
454 	if (!(imm & 0xf0000000)) {
455 		counter += 4;
456 		imm <<= 4;
457 	}
458 	if (!(imm & 0xc0000000)) {
459 		counter += 2;
460 		imm <<= 2;
461 	}
462 	if (!(imm & 0x80000000)) {
463 		counter += 1;
464 		imm <<= 1;
465 	}
466 	/* Since imm >= 128, this must be true. */
467 	SLJIT_ASSERT(counter <= 31);
468 
469 	if (imm & 0x00ffffff)
470 		return INVALID_IMM; /* Cannot be encoded. */
471 
472 	return ((imm >> 24) & 0x7f) | COPY_BITS(counter, 4, 26, 1) | COPY_BITS(counter, 1, 12, 3) | COPY_BITS(counter, 0, 7, 1);
473 }
474 
475 static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst, sljit_uw imm)
476 {
477 	sljit_uw tmp;
478 
479 	if (imm >= 0x10000) {
480 		tmp = get_imm(imm);
481 		if (tmp != INVALID_IMM)
482 			return push_inst32(compiler, MOV_WI | RD4(dst) | tmp);
483 		tmp = get_imm(~imm);
484 		if (tmp != INVALID_IMM)
485 			return push_inst32(compiler, MVN_WI | RD4(dst) | tmp);
486 	}
487 
488 	/* set low 16 bits, set hi 16 bits to 0. */
489 	FAIL_IF(push_inst32(compiler, MOVW | RD4(dst) |
490 		COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff)));
491 
492 	/* set hi 16 bit if needed. */
493 	if (imm >= 0x10000)
494 		return push_inst32(compiler, MOVT | RD4(dst) |
495 			COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16));
496 	return SLJIT_SUCCESS;
497 }
498 
499 #define ARG1_IMM	0x0010000
500 #define ARG2_IMM	0x0020000
501 #define KEEP_FLAGS	0x0040000
502 /* SET_FLAGS must be 0x100000 as it is also the value of S bit (can be used for optimization). */
503 #define SET_FLAGS	0x0100000
504 #define UNUSED_RETURN	0x0200000
505 #define SLOW_DEST	0x0400000
506 #define SLOW_SRC1	0x0800000
507 #define SLOW_SRC2	0x1000000
508 
509 static sljit_si emit_op_imm(struct sljit_compiler *compiler, sljit_si flags, sljit_si dst, sljit_uw arg1, sljit_uw arg2)
510 {
511 	/* dst must be register, TMP_REG1
512 	   arg1 must be register, TMP_REG1, imm
513 	   arg2 must be register, TMP_REG2, imm */
514 	sljit_si reg;
515 	sljit_uw imm, nimm;
516 
517 	if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) {
518 		/* Both are immediates. */
519 		flags &= ~ARG1_IMM;
520 		FAIL_IF(load_immediate(compiler, TMP_REG1, arg1));
521 		arg1 = TMP_REG1;
522 	}
523 
524 	if (flags & (ARG1_IMM | ARG2_IMM)) {
525 		reg = (flags & ARG2_IMM) ? arg1 : arg2;
526 		imm = (flags & ARG2_IMM) ? arg2 : arg1;
527 
528 		switch (flags & 0xffff) {
529 		case SLJIT_CLZ:
530 		case SLJIT_MUL:
531 			/* No form with immediate operand. */
532 			break;
533 		case SLJIT_MOV:
534 			SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG1);
535 			return load_immediate(compiler, dst, imm);
536 		case SLJIT_NOT:
537 			if (!(flags & SET_FLAGS))
538 				return load_immediate(compiler, dst, ~imm);
539 			/* Since the flags should be set, we just fallback to the register mode.
540 			   Although some clever things could be done here, "NOT IMM" does not worth the efforts. */
541 			break;
542 		case SLJIT_ADD:
543 			nimm = -imm;
544 			if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(reg, dst)) {
545 				if (imm <= 0x7)
546 					return push_inst16(compiler, ADDSI3 | IMM3(imm) | RD3(dst) | RN3(reg));
547 				if (nimm <= 0x7)
548 					return push_inst16(compiler, SUBSI3 | IMM3(nimm) | RD3(dst) | RN3(reg));
549 				if (reg == dst) {
550 					if (imm <= 0xff)
551 						return push_inst16(compiler, ADDSI8 | IMM8(imm) | RDN3(dst));
552 					if (nimm <= 0xff)
553 						return push_inst16(compiler, SUBSI8 | IMM8(nimm) | RDN3(dst));
554 				}
555 			}
556 			if (!(flags & SET_FLAGS)) {
557 				if (imm <= 0xfff)
558 					return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(imm));
559 				if (nimm <= 0xfff)
560 					return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(nimm));
561 			}
562 			imm = get_imm(imm);
563 			if (imm != INVALID_IMM)
564 				return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
565 			break;
566 		case SLJIT_ADDC:
567 			imm = get_imm(imm);
568 			if (imm != INVALID_IMM)
569 				return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
570 			break;
571 		case SLJIT_SUB:
572 			if (flags & ARG1_IMM) {
573 				if (!(flags & KEEP_FLAGS) && imm == 0 && IS_2_LO_REGS(reg, dst))
574 					return push_inst16(compiler, RSBSI | RD3(dst) | RN3(reg));
575 				imm = get_imm(imm);
576 				if (imm != INVALID_IMM)
577 					return push_inst32(compiler, RSB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
578 				break;
579 			}
580 			nimm = -imm;
581 			if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(reg, dst)) {
582 				if (imm <= 0x7)
583 					return push_inst16(compiler, SUBSI3 | IMM3(imm) | RD3(dst) | RN3(reg));
584 				if (nimm <= 0x7)
585 					return push_inst16(compiler, ADDSI3 | IMM3(nimm) | RD3(dst) | RN3(reg));
586 				if (reg == dst) {
587 					if (imm <= 0xff)
588 						return push_inst16(compiler, SUBSI8 | IMM8(imm) | RDN3(dst));
589 					if (nimm <= 0xff)
590 						return push_inst16(compiler, ADDSI8 | IMM8(nimm) | RDN3(dst));
591 				}
592 				if (imm <= 0xff && (flags & UNUSED_RETURN))
593 					return push_inst16(compiler, CMPI | IMM8(imm) | RDN3(reg));
594 			}
595 			if (!(flags & SET_FLAGS)) {
596 				if (imm <= 0xfff)
597 					return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(imm));
598 				if (nimm <= 0xfff)
599 					return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(nimm));
600 			}
601 			imm = get_imm(imm);
602 			if (imm != INVALID_IMM)
603 				return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
604 			break;
605 		case SLJIT_SUBC:
606 			if (flags & ARG1_IMM)
607 				break;
608 			imm = get_imm(imm);
609 			if (imm != INVALID_IMM)
610 				return push_inst32(compiler, SBCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
611 			break;
612 		case SLJIT_AND:
613 			nimm = get_imm(imm);
614 			if (nimm != INVALID_IMM)
615 				return push_inst32(compiler, ANDI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm);
616 			imm = get_imm(imm);
617 			if (imm != INVALID_IMM)
618 				return push_inst32(compiler, BICI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
619 			break;
620 		case SLJIT_OR:
621 			nimm = get_imm(imm);
622 			if (nimm != INVALID_IMM)
623 				return push_inst32(compiler, ORRI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm);
624 			imm = get_imm(imm);
625 			if (imm != INVALID_IMM)
626 				return push_inst32(compiler, ORNI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
627 			break;
628 		case SLJIT_XOR:
629 			imm = get_imm(imm);
630 			if (imm != INVALID_IMM)
631 				return push_inst32(compiler, EORI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
632 			break;
633 		case SLJIT_SHL:
634 		case SLJIT_LSHR:
635 		case SLJIT_ASHR:
636 			if (flags & ARG1_IMM)
637 				break;
638 			imm &= 0x1f;
639 			if (imm == 0) {
640 				if (!(flags & SET_FLAGS))
641 					return push_inst16(compiler, MOV | SET_REGS44(dst, reg));
642 				if (IS_2_LO_REGS(dst, reg))
643 					return push_inst16(compiler, MOVS | RD3(dst) | RN3(reg));
644 				return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(dst) | RM4(reg));
645 			}
646 			switch (flags & 0xffff) {
647 			case SLJIT_SHL:
648 				if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg))
649 					return push_inst16(compiler, LSLSI | RD3(dst) | RN3(reg) | (imm << 6));
650 				return push_inst32(compiler, LSL_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
651 			case SLJIT_LSHR:
652 				if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg))
653 					return push_inst16(compiler, LSRSI | RD3(dst) | RN3(reg) | (imm << 6));
654 				return push_inst32(compiler, LSR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
655 			default: /* SLJIT_ASHR */
656 				if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg))
657 					return push_inst16(compiler, ASRSI | RD3(dst) | RN3(reg) | (imm << 6));
658 				return push_inst32(compiler, ASR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
659 			}
660 		default:
661 			SLJIT_ASSERT_STOP();
662 			break;
663 		}
664 
665 		if (flags & ARG2_IMM) {
666 			FAIL_IF(load_immediate(compiler, TMP_REG2, arg2));
667 			arg2 = TMP_REG2;
668 		}
669 		else {
670 			FAIL_IF(load_immediate(compiler, TMP_REG1, arg1));
671 			arg1 = TMP_REG1;
672 		}
673 	}
674 
675 	/* Both arguments are registers. */
676 	switch (flags & 0xffff) {
677 	case SLJIT_MOV:
678 	case SLJIT_MOV_UI:
679 	case SLJIT_MOV_SI:
680 	case SLJIT_MOV_P:
681 	case SLJIT_MOVU:
682 	case SLJIT_MOVU_UI:
683 	case SLJIT_MOVU_SI:
684 	case SLJIT_MOVU_P:
685 		SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
686 		if (dst == arg2)
687 			return SLJIT_SUCCESS;
688 		return push_inst16(compiler, MOV | SET_REGS44(dst, arg2));
689 	case SLJIT_MOV_UB:
690 	case SLJIT_MOVU_UB:
691 		SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
692 		if (IS_2_LO_REGS(dst, arg2))
693 			return push_inst16(compiler, UXTB | RD3(dst) | RN3(arg2));
694 		return push_inst32(compiler, UXTB_W | RD4(dst) | RM4(arg2));
695 	case SLJIT_MOV_SB:
696 	case SLJIT_MOVU_SB:
697 		SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
698 		if (IS_2_LO_REGS(dst, arg2))
699 			return push_inst16(compiler, SXTB | RD3(dst) | RN3(arg2));
700 		return push_inst32(compiler, SXTB_W | RD4(dst) | RM4(arg2));
701 	case SLJIT_MOV_UH:
702 	case SLJIT_MOVU_UH:
703 		SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
704 		if (IS_2_LO_REGS(dst, arg2))
705 			return push_inst16(compiler, UXTH | RD3(dst) | RN3(arg2));
706 		return push_inst32(compiler, UXTH_W | RD4(dst) | RM4(arg2));
707 	case SLJIT_MOV_SH:
708 	case SLJIT_MOVU_SH:
709 		SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
710 		if (IS_2_LO_REGS(dst, arg2))
711 			return push_inst16(compiler, SXTH | RD3(dst) | RN3(arg2));
712 		return push_inst32(compiler, SXTH_W | RD4(dst) | RM4(arg2));
713 	case SLJIT_NOT:
714 		SLJIT_ASSERT(arg1 == TMP_REG1);
715 		if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
716 			return push_inst16(compiler, MVNS | RD3(dst) | RN3(arg2));
717 		return push_inst32(compiler, MVN_W | (flags & SET_FLAGS) | RD4(dst) | RM4(arg2));
718 	case SLJIT_CLZ:
719 		SLJIT_ASSERT(arg1 == TMP_REG1);
720 		FAIL_IF(push_inst32(compiler, CLZ | RN4(arg2) | RD4(dst) | RM4(arg2)));
721 		if (flags & SET_FLAGS) {
722 			if (reg_map[dst] <= 7)
723 				return push_inst16(compiler, CMPI | RDN3(dst));
724 			return push_inst32(compiler, ADD_WI | SET_FLAGS | RN4(dst) | RD4(dst));
725 		}
726 		return SLJIT_SUCCESS;
727 	case SLJIT_ADD:
728 		if (!(flags & KEEP_FLAGS) && IS_3_LO_REGS(dst, arg1, arg2))
729 			return push_inst16(compiler, ADDS | RD3(dst) | RN3(arg1) | RM3(arg2));
730 		if (dst == arg1 && !(flags & SET_FLAGS))
731 			return push_inst16(compiler, ADD | SET_REGS44(dst, arg2));
732 		return push_inst32(compiler, ADD_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
733 	case SLJIT_ADDC:
734 		if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
735 			return push_inst16(compiler, ADCS | RD3(dst) | RN3(arg2));
736 		return push_inst32(compiler, ADC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
737 	case SLJIT_SUB:
738 		if (!(flags & KEEP_FLAGS) && IS_3_LO_REGS(dst, arg1, arg2))
739 			return push_inst16(compiler, SUBS | RD3(dst) | RN3(arg1) | RM3(arg2));
740 		return push_inst32(compiler, SUB_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
741 	case SLJIT_SUBC:
742 		if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
743 			return push_inst16(compiler, SBCS | RD3(dst) | RN3(arg2));
744 		return push_inst32(compiler, SBC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
745 	case SLJIT_MUL:
746 		if (!(flags & SET_FLAGS))
747 			return push_inst32(compiler, MUL | RD4(dst) | RN4(arg1) | RM4(arg2));
748 		SLJIT_ASSERT(reg_map[TMP_REG2] <= 7 && dst != TMP_REG2);
749 		FAIL_IF(push_inst32(compiler, SMULL | RT4(dst) | RD4(TMP_REG2) | RN4(arg1) | RM4(arg2)));
750 		/* cmp TMP_REG2, dst asr #31. */
751 		return push_inst32(compiler, CMP_W | RN4(TMP_REG2) | 0x70e0 | RM4(dst));
752 	case SLJIT_AND:
753 		if (!(flags & KEEP_FLAGS)) {
754 			if (dst == arg1 && IS_2_LO_REGS(dst, arg2))
755 				return push_inst16(compiler, ANDS | RD3(dst) | RN3(arg2));
756 			if ((flags & UNUSED_RETURN) && IS_2_LO_REGS(arg1, arg2))
757 				return push_inst16(compiler, TST | RD3(arg1) | RN3(arg2));
758 		}
759 		return push_inst32(compiler, AND_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
760 	case SLJIT_OR:
761 		if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
762 			return push_inst16(compiler, ORRS | RD3(dst) | RN3(arg2));
763 		return push_inst32(compiler, ORR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
764 	case SLJIT_XOR:
765 		if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
766 			return push_inst16(compiler, EORS | RD3(dst) | RN3(arg2));
767 		return push_inst32(compiler, EOR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
768 	case SLJIT_SHL:
769 		if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
770 			return push_inst16(compiler, LSLS | RD3(dst) | RN3(arg2));
771 		return push_inst32(compiler, LSL_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
772 	case SLJIT_LSHR:
773 		if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
774 			return push_inst16(compiler, LSRS | RD3(dst) | RN3(arg2));
775 		return push_inst32(compiler, LSR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
776 	case SLJIT_ASHR:
777 		if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
778 			return push_inst16(compiler, ASRS | RD3(dst) | RN3(arg2));
779 		return push_inst32(compiler, ASR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
780 	}
781 
782 	SLJIT_ASSERT_STOP();
783 	return SLJIT_SUCCESS;
784 }
785 
786 #define STORE		0x01
787 #define SIGNED		0x02
788 
789 #define WORD_SIZE	0x00
790 #define BYTE_SIZE	0x04
791 #define HALF_SIZE	0x08
792 
793 #define UPDATE		0x10
794 #define ARG_TEST	0x20
795 
796 #define IS_WORD_SIZE(flags)		(!(flags & (BYTE_SIZE | HALF_SIZE)))
797 #define OFFSET_CHECK(imm, shift)	(!(argw & ~(imm << shift)))
798 
799 /*
800   1st letter:
801   w = word
802   b = byte
803   h = half
804 
805   2nd letter:
806   s = signed
807   u = unsigned
808 
809   3rd letter:
810   l = load
811   s = store
812 */
813 
814 static SLJIT_CONST sljit_ins sljit_mem16[12] = {
815 /* w u l */ 0x5800 /* ldr */,
816 /* w u s */ 0x5000 /* str */,
817 /* w s l */ 0x5800 /* ldr */,
818 /* w s s */ 0x5000 /* str */,
819 
820 /* b u l */ 0x5c00 /* ldrb */,
821 /* b u s */ 0x5400 /* strb */,
822 /* b s l */ 0x5600 /* ldrsb */,
823 /* b s s */ 0x5400 /* strb */,
824 
825 /* h u l */ 0x5a00 /* ldrh */,
826 /* h u s */ 0x5200 /* strh */,
827 /* h s l */ 0x5e00 /* ldrsh */,
828 /* h s s */ 0x5200 /* strh */,
829 };
830 
831 static SLJIT_CONST sljit_ins sljit_mem16_imm5[12] = {
832 /* w u l */ 0x6800 /* ldr imm5 */,
833 /* w u s */ 0x6000 /* str imm5 */,
834 /* w s l */ 0x6800 /* ldr imm5 */,
835 /* w s s */ 0x6000 /* str imm5 */,
836 
837 /* b u l */ 0x7800 /* ldrb imm5 */,
838 /* b u s */ 0x7000 /* strb imm5 */,
839 /* b s l */ 0x0000 /* not allowed */,
840 /* b s s */ 0x7000 /* strb imm5 */,
841 
842 /* h u l */ 0x8800 /* ldrh imm5 */,
843 /* h u s */ 0x8000 /* strh imm5 */,
844 /* h s l */ 0x0000 /* not allowed */,
845 /* h s s */ 0x8000 /* strh imm5 */,
846 };
847 
848 #define MEM_IMM8	0xc00
849 #define MEM_IMM12	0x800000
850 static SLJIT_CONST sljit_ins sljit_mem32[12] = {
851 /* w u l */ 0xf8500000 /* ldr.w */,
852 /* w u s */ 0xf8400000 /* str.w */,
853 /* w s l */ 0xf8500000 /* ldr.w */,
854 /* w s s */ 0xf8400000 /* str.w */,
855 
856 /* b u l */ 0xf8100000 /* ldrb.w */,
857 /* b u s */ 0xf8000000 /* strb.w */,
858 /* b s l */ 0xf9100000 /* ldrsb.w */,
859 /* b s s */ 0xf8000000 /* strb.w */,
860 
861 /* h u l */ 0xf8300000 /* ldrh.w */,
862 /* h u s */ 0xf8200000 /* strsh.w */,
863 /* h s l */ 0xf9300000 /* ldrsh.w */,
864 /* h s s */ 0xf8200000 /* strsh.w */,
865 };
866 
867 /* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */
868 static sljit_si emit_set_delta(struct sljit_compiler *compiler, sljit_si dst, sljit_si reg, sljit_sw value)
869 {
870 	if (value >= 0) {
871 		if (value <= 0xfff)
872 			return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(value));
873 		value = get_imm(value);
874 		if (value != INVALID_IMM)
875 			return push_inst32(compiler, ADD_WI | RD4(dst) | RN4(reg) | value);
876 	}
877 	else {
878 		value = -value;
879 		if (value <= 0xfff)
880 			return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(value));
881 		value = get_imm(value);
882 		if (value != INVALID_IMM)
883 			return push_inst32(compiler, SUB_WI | RD4(dst) | RN4(reg) | value);
884 	}
885 	return SLJIT_ERR_UNSUPPORTED;
886 }
887 
888 /* Can perform an operation using at most 1 instruction. */
889 static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw)
890 {
891 	sljit_si other_r, shift;
892 
893 	SLJIT_ASSERT(arg & SLJIT_MEM);
894 
895 	if (SLJIT_UNLIKELY(flags & UPDATE)) {
896 		if ((arg & REG_MASK) && !(arg & OFFS_REG_MASK) && argw <= 0xff && argw >= -0xff) {
897 			if (SLJIT_UNLIKELY(flags & ARG_TEST))
898 				return 1;
899 
900 			flags &= ~UPDATE;
901 			arg &= 0xf;
902 			if (argw >= 0)
903 				argw |= 0x200;
904 			else {
905 				argw = -argw;
906 			}
907 
908 			SLJIT_ASSERT(argw >= 0 && (argw & 0xff) <= 0xff);
909 			FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | 0x100 | argw));
910 			return -1;
911 		}
912 		return 0;
913 	}
914 
915 	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
916 		if (SLJIT_UNLIKELY(flags & ARG_TEST))
917 			return 1;
918 
919 		argw &= 0x3;
920 		other_r = OFFS_REG(arg);
921 		arg &= 0xf;
922 
923 		if (!argw && IS_3_LO_REGS(reg, arg, other_r))
924 			FAIL_IF(push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r)));
925 		else
926 			FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r) | (argw << 4)));
927 		return -1;
928 	}
929 
930 	if (!(arg & REG_MASK) || argw > 0xfff || argw < -0xff)
931 		return 0;
932 
933 	if (SLJIT_UNLIKELY(flags & ARG_TEST))
934 		return 1;
935 
936 	arg &= 0xf;
937 	if (IS_2_LO_REGS(reg, arg) && sljit_mem16_imm5[flags]) {
938 		shift = 3;
939 		if (IS_WORD_SIZE(flags)) {
940 			if (OFFSET_CHECK(0x1f, 2))
941 				shift = 2;
942 		}
943 		else if (flags & BYTE_SIZE)
944 		{
945 			if (OFFSET_CHECK(0x1f, 0))
946 				shift = 0;
947 		}
948 		else {
949 			SLJIT_ASSERT(flags & HALF_SIZE);
950 			if (OFFSET_CHECK(0x1f, 1))
951 				shift = 1;
952 		}
953 
954 		if (shift != 3) {
955 			FAIL_IF(push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(arg) | (argw << (6 - shift))));
956 			return -1;
957 		}
958 	}
959 
960 	/* SP based immediate. */
961 	if (SLJIT_UNLIKELY(arg == SLJIT_LOCALS_REG) && OFFSET_CHECK(0xff, 2) && IS_WORD_SIZE(flags) && reg_map[reg] <= 7) {
962 		FAIL_IF(push_inst16(compiler, STR_SP | ((flags & STORE) ? 0 : 0x800) | RDN3(reg) | (argw >> 2)));
963 		return -1;
964 	}
965 
966 	if (argw >= 0)
967 		FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | argw));
968 	else
969 		FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | -argw));
970 	return -1;
971 }
972 
973 /* see getput_arg below.
974    Note: can_cache is called only for binary operators. Those
975    operators always uses word arguments without write back. */
976 static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
977 {
978 	sljit_sw diff;
979 	if ((arg & OFFS_REG_MASK) || !(next_arg & SLJIT_MEM))
980 		return 0;
981 
982 	if (!(arg & REG_MASK)) {
983 		diff = argw - next_argw;
984 		if (diff <= 0xfff && diff >= -0xfff)
985 			return 1;
986 		return 0;
987 	}
988 
989 	if (argw == next_argw)
990 		return 1;
991 
992 	diff = argw - next_argw;
993 	if (arg == next_arg && diff <= 0xfff && diff >= -0xfff)
994 		return 1;
995 
996 	return 0;
997 }
998 
999 /* Emit the necessary instructions. See can_cache above. */
1000 static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg,
1001 	sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
1002 {
1003 	sljit_si tmp_r, other_r;
1004 	sljit_sw diff;
1005 
1006 	SLJIT_ASSERT(arg & SLJIT_MEM);
1007 	if (!(next_arg & SLJIT_MEM)) {
1008 		next_arg = 0;
1009 		next_argw = 0;
1010 	}
1011 
1012 	tmp_r = (flags & STORE) ? TMP_REG3 : reg;
1013 
1014 	if (SLJIT_UNLIKELY((flags & UPDATE) && (arg & REG_MASK))) {
1015 		/* Update only applies if a base register exists. */
1016 		/* There is no caching here. */
1017 		other_r = OFFS_REG(arg);
1018 		arg &= 0xf;
1019 		flags &= ~UPDATE;
1020 
1021 		if (!other_r) {
1022 			if (!(argw & ~0xfff)) {
1023 				FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | argw));
1024 				return push_inst32(compiler, ADDWI | RD4(arg) | RN4(arg) | IMM12(argw));
1025 			}
1026 
1027 			if (compiler->cache_arg == SLJIT_MEM) {
1028 				if (argw == compiler->cache_argw) {
1029 					other_r = TMP_REG3;
1030 					argw = 0;
1031 				}
1032 				else if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) {
1033 					FAIL_IF(compiler->error);
1034 					compiler->cache_argw = argw;
1035 					other_r = TMP_REG3;
1036 					argw = 0;
1037 				}
1038 			}
1039 
1040 			if (argw) {
1041 				FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1042 				compiler->cache_arg = SLJIT_MEM;
1043 				compiler->cache_argw = argw;
1044 				other_r = TMP_REG3;
1045 				argw = 0;
1046 			}
1047 		}
1048 
1049 		argw &= 0x3;
1050 		if (!argw && IS_3_LO_REGS(reg, arg, other_r)) {
1051 			FAIL_IF(push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r)));
1052 			return push_inst16(compiler, ADD | SET_REGS44(arg, other_r));
1053 		}
1054 		FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r) | (argw << 4)));
1055 		return push_inst32(compiler, ADD_W | RD4(arg) | RN4(arg) | RM4(other_r) | (argw << 6));
1056 	}
1057 	flags &= ~UPDATE;
1058 
1059 	SLJIT_ASSERT(!(arg & OFFS_REG_MASK));
1060 
1061 	if (compiler->cache_arg == arg) {
1062 		diff = argw - compiler->cache_argw;
1063 		if (!(diff & ~0xfff))
1064 			return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(TMP_REG3) | diff);
1065 		if (!((compiler->cache_argw - argw) & ~0xff))
1066 			return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(TMP_REG3) | (compiler->cache_argw - argw));
1067 		if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, diff) != SLJIT_ERR_UNSUPPORTED) {
1068 			FAIL_IF(compiler->error);
1069 			return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(TMP_REG3) | 0);
1070 		}
1071 	}
1072 
1073 	next_arg = (arg & REG_MASK) && (arg == next_arg) && (argw != next_argw);
1074 	arg &= 0xf;
1075 	if (arg && compiler->cache_arg == SLJIT_MEM) {
1076 		if (compiler->cache_argw == argw)
1077 			return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(TMP_REG3));
1078 		if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) {
1079 			FAIL_IF(compiler->error);
1080 			compiler->cache_argw = argw;
1081 			return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(TMP_REG3));
1082 		}
1083 	}
1084 
1085 	compiler->cache_argw = argw;
1086 	if (next_arg && emit_set_delta(compiler, TMP_REG3, arg, argw) != SLJIT_ERR_UNSUPPORTED) {
1087 		FAIL_IF(compiler->error);
1088 		compiler->cache_arg = SLJIT_MEM | arg;
1089 		arg = 0;
1090 	}
1091 	else {
1092 		FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1093 		compiler->cache_arg = SLJIT_MEM;
1094 
1095 		diff = argw - next_argw;
1096 		if (next_arg && diff <= 0xfff && diff >= -0xfff) {
1097 			FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG3, arg)));
1098 			compiler->cache_arg = SLJIT_MEM | arg;
1099 			arg = 0;
1100 		}
1101 	}
1102 
1103 	if (arg)
1104 		return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(TMP_REG3));
1105 	return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(TMP_REG3) | 0);
1106 }
1107 
1108 static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw)
1109 {
1110 	if (getput_arg_fast(compiler, flags, reg, arg, argw))
1111 		return compiler->error;
1112 	compiler->cache_arg = 0;
1113 	compiler->cache_argw = 0;
1114 	return getput_arg(compiler, flags, reg, arg, argw, 0, 0);
1115 }
1116 
1117 static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w)
1118 {
1119 	if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
1120 		return compiler->error;
1121 	return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
1122 }
1123 
1124 /* --------------------------------------------------------------------- */
1125 /*  Entry, exit                                                          */
1126 /* --------------------------------------------------------------------- */
1127 
1128 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
1129 {
1130 	sljit_si size;
1131 	sljit_ins push;
1132 
1133 	CHECK_ERROR();
1134 	check_sljit_emit_enter(compiler, args, scratches, saveds, local_size);
1135 
1136 	compiler->scratches = scratches;
1137 	compiler->saveds = saveds;
1138 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
1139 	compiler->logical_local_size = local_size;
1140 #endif
1141 
1142 	push = (1 << 4);
1143 	if (saveds >= 5)
1144 		push |= 1 << 11;
1145 	if (saveds >= 4)
1146 		push |= 1 << 10;
1147 	if (saveds >= 3)
1148 		push |= 1 << 8;
1149 	if (saveds >= 2)
1150 		push |= 1 << 7;
1151 	if (saveds >= 1)
1152 		push |= 1 << 6;
1153         if (scratches >= 5)
1154 		push |= 1 << 5;
1155 	FAIL_IF(saveds >= 3
1156 		? push_inst32(compiler, PUSH_W | (1 << 14) | push)
1157 		: push_inst16(compiler, PUSH | push));
1158 
1159 	/* Stack must be aligned to 8 bytes: */
1160 	size = (3 + saveds) * sizeof(sljit_uw);
1161 	local_size += size;
1162 	local_size = (local_size + 7) & ~7;
1163 	local_size -= size;
1164 	compiler->local_size = local_size;
1165 	if (local_size > 0) {
1166 		if (local_size <= (127 << 2))
1167 			FAIL_IF(push_inst16(compiler, SUB_SP | (local_size >> 2)));
1168 		else
1169 			FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_LOCALS_REG, SLJIT_LOCALS_REG, local_size));
1170 	}
1171 
1172 	if (args >= 1)
1173 		FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SAVED_REG1, SLJIT_SCRATCH_REG1)));
1174 	if (args >= 2)
1175 		FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SAVED_REG2, SLJIT_SCRATCH_REG2)));
1176 	if (args >= 3)
1177 		FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SAVED_REG3, SLJIT_SCRATCH_REG3)));
1178 
1179 	return SLJIT_SUCCESS;
1180 }
1181 
1182 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
1183 {
1184 	sljit_si size;
1185 
1186 	CHECK_ERROR_VOID();
1187 	check_sljit_set_context(compiler, args, scratches, saveds, local_size);
1188 
1189 	compiler->scratches = scratches;
1190 	compiler->saveds = saveds;
1191 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
1192 	compiler->logical_local_size = local_size;
1193 #endif
1194 
1195 	size = (3 + saveds) * sizeof(sljit_uw);
1196 	local_size += size;
1197 	local_size = (local_size + 7) & ~7;
1198 	local_size -= size;
1199 	compiler->local_size = local_size;
1200 }
1201 
1202 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
1203 {
1204 	sljit_ins pop;
1205 
1206 	CHECK_ERROR();
1207 	check_sljit_emit_return(compiler, op, src, srcw);
1208 
1209 	FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
1210 
1211 	if (compiler->local_size > 0) {
1212 		if (compiler->local_size <= (127 << 2))
1213 			FAIL_IF(push_inst16(compiler, ADD_SP | (compiler->local_size >> 2)));
1214 		else
1215 			FAIL_IF(emit_op_imm(compiler, SLJIT_ADD | ARG2_IMM, SLJIT_LOCALS_REG, SLJIT_LOCALS_REG, compiler->local_size));
1216 	}
1217 
1218 	pop = (1 << 4);
1219 	if (compiler->saveds >= 5)
1220 		pop |= 1 << 11;
1221 	if (compiler->saveds >= 4)
1222 		pop |= 1 << 10;
1223 	if (compiler->saveds >= 3)
1224 		pop |= 1 << 8;
1225 	if (compiler->saveds >= 2)
1226 		pop |= 1 << 7;
1227 	if (compiler->saveds >= 1)
1228 		pop |= 1 << 6;
1229         if (compiler->scratches >= 5)
1230 		pop |= 1 << 5;
1231 	return compiler->saveds >= 3
1232 		? push_inst32(compiler, POP_W | (1 << 15) | pop)
1233 		: push_inst16(compiler, POP | pop);
1234 }
1235 
1236 /* --------------------------------------------------------------------- */
1237 /*  Operators                                                            */
1238 /* --------------------------------------------------------------------- */
1239 
1240 #ifdef __cplusplus
1241 extern "C" {
1242 #endif
1243 
1244 #if defined(__GNUC__)
1245 extern unsigned int __aeabi_uidivmod(unsigned int numerator, int unsigned denominator);
1246 extern int __aeabi_idivmod(int numerator, int denominator);
1247 #else
1248 #error "Software divmod functions are needed"
1249 #endif
1250 
1251 #ifdef __cplusplus
1252 }
1253 #endif
1254 
1255 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
1256 {
1257 	CHECK_ERROR();
1258 	check_sljit_emit_op0(compiler, op);
1259 
1260 	op = GET_OPCODE(op);
1261 	switch (op) {
1262 	case SLJIT_BREAKPOINT:
1263 		return push_inst16(compiler, BKPT);
1264 	case SLJIT_NOP:
1265 		return push_inst16(compiler, NOP);
1266 	case SLJIT_UMUL:
1267 	case SLJIT_SMUL:
1268 		return push_inst32(compiler, (op == SLJIT_UMUL ? UMULL : SMULL)
1269 			| (reg_map[SLJIT_SCRATCH_REG2] << 8)
1270 			| (reg_map[SLJIT_SCRATCH_REG1] << 12)
1271 			| (reg_map[SLJIT_SCRATCH_REG1] << 16)
1272 			| reg_map[SLJIT_SCRATCH_REG2]);
1273 	case SLJIT_UDIV:
1274 	case SLJIT_SDIV:
1275 		if (compiler->scratches >= 4) {
1276 			FAIL_IF(push_inst32(compiler, 0xf84d2d04 /* str r2, [sp, #-4]! */));
1277 			FAIL_IF(push_inst32(compiler, 0xf84dcd04 /* str ip, [sp, #-4]! */));
1278 		} else if (compiler->scratches >= 3)
1279 			FAIL_IF(push_inst32(compiler, 0xf84d2d08 /* str r2, [sp, #-8]! */));
1280 #if defined(__GNUC__)
1281 		FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
1282 			(op == SLJIT_UDIV ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod))));
1283 #else
1284 #error "Software divmod functions are needed"
1285 #endif
1286 		if (compiler->scratches >= 4) {
1287 			FAIL_IF(push_inst32(compiler, 0xf85dcb04 /* ldr ip, [sp], #4 */));
1288 			return push_inst32(compiler, 0xf85d2b04 /* ldr r2, [sp], #4 */);
1289 		} else if (compiler->scratches >= 3)
1290 			return push_inst32(compiler, 0xf85d2b08 /* ldr r2, [sp], #8 */);
1291 		return SLJIT_SUCCESS;
1292 	}
1293 
1294 	return SLJIT_SUCCESS;
1295 }
1296 
1297 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
1298 	sljit_si dst, sljit_sw dstw,
1299 	sljit_si src, sljit_sw srcw)
1300 {
1301 	sljit_si dst_r, flags;
1302 	sljit_si op_flags = GET_ALL_FLAGS(op);
1303 
1304 	CHECK_ERROR();
1305 	check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
1306 	ADJUST_LOCAL_OFFSET(dst, dstw);
1307 	ADJUST_LOCAL_OFFSET(src, srcw);
1308 
1309 	compiler->cache_arg = 0;
1310 	compiler->cache_argw = 0;
1311 
1312 	dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
1313 
1314 	op = GET_OPCODE(op);
1315 	if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
1316 		switch (op) {
1317 		case SLJIT_MOV:
1318 		case SLJIT_MOV_UI:
1319 		case SLJIT_MOV_SI:
1320 		case SLJIT_MOV_P:
1321 			flags = WORD_SIZE;
1322 			break;
1323 		case SLJIT_MOV_UB:
1324 			flags = BYTE_SIZE;
1325 			if (src & SLJIT_IMM)
1326 				srcw = (sljit_ub)srcw;
1327 			break;
1328 		case SLJIT_MOV_SB:
1329 			flags = BYTE_SIZE | SIGNED;
1330 			if (src & SLJIT_IMM)
1331 				srcw = (sljit_sb)srcw;
1332 			break;
1333 		case SLJIT_MOV_UH:
1334 			flags = HALF_SIZE;
1335 			if (src & SLJIT_IMM)
1336 				srcw = (sljit_uh)srcw;
1337 			break;
1338 		case SLJIT_MOV_SH:
1339 			flags = HALF_SIZE | SIGNED;
1340 			if (src & SLJIT_IMM)
1341 				srcw = (sljit_sh)srcw;
1342 			break;
1343 		case SLJIT_MOVU:
1344 		case SLJIT_MOVU_UI:
1345 		case SLJIT_MOVU_SI:
1346 		case SLJIT_MOVU_P:
1347 			flags = WORD_SIZE | UPDATE;
1348 			break;
1349 		case SLJIT_MOVU_UB:
1350 			flags = BYTE_SIZE | UPDATE;
1351 			if (src & SLJIT_IMM)
1352 				srcw = (sljit_ub)srcw;
1353 			break;
1354 		case SLJIT_MOVU_SB:
1355 			flags = BYTE_SIZE | SIGNED | UPDATE;
1356 			if (src & SLJIT_IMM)
1357 				srcw = (sljit_sb)srcw;
1358 			break;
1359 		case SLJIT_MOVU_UH:
1360 			flags = HALF_SIZE | UPDATE;
1361 			if (src & SLJIT_IMM)
1362 				srcw = (sljit_uh)srcw;
1363 			break;
1364 		case SLJIT_MOVU_SH:
1365 			flags = HALF_SIZE | SIGNED | UPDATE;
1366 			if (src & SLJIT_IMM)
1367 				srcw = (sljit_sh)srcw;
1368 			break;
1369 		default:
1370 			SLJIT_ASSERT_STOP();
1371 			flags = 0;
1372 			break;
1373 		}
1374 
1375 		if (src & SLJIT_IMM)
1376 			FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG1, srcw));
1377 		else if (src & SLJIT_MEM) {
1378 			if (getput_arg_fast(compiler, flags, dst_r, src, srcw))
1379 				FAIL_IF(compiler->error);
1380 			else
1381 				FAIL_IF(getput_arg(compiler, flags, dst_r, src, srcw, dst, dstw));
1382 		} else {
1383 			if (dst_r != TMP_REG1)
1384 				return emit_op_imm(compiler, op, dst_r, TMP_REG1, src);
1385 			dst_r = src;
1386 		}
1387 
1388 		if (dst & SLJIT_MEM) {
1389 			if (getput_arg_fast(compiler, flags | STORE, dst_r, dst, dstw))
1390 				return compiler->error;
1391 			else
1392 				return getput_arg(compiler, flags | STORE, dst_r, dst, dstw, 0, 0);
1393 		}
1394 		return SLJIT_SUCCESS;
1395 	}
1396 
1397 	if (op == SLJIT_NEG) {
1398 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
1399 		compiler->skip_checks = 1;
1400 #endif
1401 		return sljit_emit_op2(compiler, SLJIT_SUB | op_flags, dst, dstw, SLJIT_IMM, 0, src, srcw);
1402 	}
1403 
1404 	flags = (GET_FLAGS(op_flags) ? SET_FLAGS : 0) | ((op_flags & SLJIT_KEEP_FLAGS) ? KEEP_FLAGS : 0);
1405 	if (src & SLJIT_MEM) {
1406 		if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG2, src, srcw))
1407 			FAIL_IF(compiler->error);
1408 		else
1409 			FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src, srcw, dst, dstw));
1410 		src = TMP_REG2;
1411 	}
1412 
1413 	if (src & SLJIT_IMM)
1414 		flags |= ARG2_IMM;
1415 	else
1416 		srcw = src;
1417 
1418 	emit_op_imm(compiler, flags | op, dst_r, TMP_REG1, srcw);
1419 
1420 	if (dst & SLJIT_MEM) {
1421 		if (getput_arg_fast(compiler, flags | STORE, dst_r, dst, dstw))
1422 			return compiler->error;
1423 		else
1424 			return getput_arg(compiler, flags | STORE, dst_r, dst, dstw, 0, 0);
1425 	}
1426 	return SLJIT_SUCCESS;
1427 }
1428 
1429 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
1430 	sljit_si dst, sljit_sw dstw,
1431 	sljit_si src1, sljit_sw src1w,
1432 	sljit_si src2, sljit_sw src2w)
1433 {
1434 	sljit_si dst_r, flags;
1435 
1436 	CHECK_ERROR();
1437 	check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
1438 	ADJUST_LOCAL_OFFSET(dst, dstw);
1439 	ADJUST_LOCAL_OFFSET(src1, src1w);
1440 	ADJUST_LOCAL_OFFSET(src2, src2w);
1441 
1442 	compiler->cache_arg = 0;
1443 	compiler->cache_argw = 0;
1444 
1445 	dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
1446 	flags = (GET_FLAGS(op) ? SET_FLAGS : 0) | ((op & SLJIT_KEEP_FLAGS) ? KEEP_FLAGS : 0);
1447 
1448 	if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, WORD_SIZE | STORE | ARG_TEST, TMP_REG1, dst, dstw))
1449 		flags |= SLOW_DEST;
1450 
1451 	if (src1 & SLJIT_MEM) {
1452 		if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG1, src1, src1w))
1453 			FAIL_IF(compiler->error);
1454 		else
1455 			flags |= SLOW_SRC1;
1456 	}
1457 	if (src2 & SLJIT_MEM) {
1458 		if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG2, src2, src2w))
1459 			FAIL_IF(compiler->error);
1460 		else
1461 			flags |= SLOW_SRC2;
1462 	}
1463 
1464 	if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
1465 		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
1466 			FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src2, src2w, src1, src1w));
1467 			FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG1, src1, src1w, dst, dstw));
1468 		}
1469 		else {
1470 			FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG1, src1, src1w, src2, src2w));
1471 			FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src2, src2w, dst, dstw));
1472 		}
1473 	}
1474 	else if (flags & SLOW_SRC1)
1475 		FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG1, src1, src1w, dst, dstw));
1476 	else if (flags & SLOW_SRC2)
1477 		FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src2, src2w, dst, dstw));
1478 
1479 	if (src1 & SLJIT_MEM)
1480 		src1 = TMP_REG1;
1481 	if (src2 & SLJIT_MEM)
1482 		src2 = TMP_REG2;
1483 
1484 	if (src1 & SLJIT_IMM)
1485 		flags |= ARG1_IMM;
1486 	else
1487 		src1w = src1;
1488 	if (src2 & SLJIT_IMM)
1489 		flags |= ARG2_IMM;
1490 	else
1491 		src2w = src2;
1492 
1493 	if (dst == SLJIT_UNUSED)
1494 		flags |= UNUSED_RETURN;
1495 
1496 	emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src1w, src2w);
1497 
1498 	if (dst & SLJIT_MEM) {
1499 		if (!(flags & SLOW_DEST)) {
1500 			getput_arg_fast(compiler, WORD_SIZE | STORE, dst_r, dst, dstw);
1501 			return compiler->error;
1502 		}
1503 		return getput_arg(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, 0, 0);
1504 	}
1505 	return SLJIT_SUCCESS;
1506 }
1507 
1508 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
1509 {
1510 	check_sljit_get_register_index(reg);
1511 	return reg_map[reg];
1512 }
1513 
1514 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
1515 {
1516 	check_sljit_get_float_register_index(reg);
1517 	return reg;
1518 }
1519 
1520 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
1521 	void *instruction, sljit_si size)
1522 {
1523 	CHECK_ERROR();
1524 	check_sljit_emit_op_custom(compiler, instruction, size);
1525 	SLJIT_ASSERT(size == 2 || size == 4);
1526 
1527 	if (size == 2)
1528 		return push_inst16(compiler, *(sljit_uh*)instruction);
1529 	return push_inst32(compiler, *(sljit_ins*)instruction);
1530 }
1531 
1532 /* --------------------------------------------------------------------- */
1533 /*  Floating point operators                                             */
1534 /* --------------------------------------------------------------------- */
1535 
1536 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
1537 {
1538 #ifdef SLJIT_IS_FPU_AVAILABLE
1539 	return SLJIT_IS_FPU_AVAILABLE;
1540 #else
1541 	/* Available by default. */
1542 	return 1;
1543 #endif
1544 }
1545 
1546 #define FPU_LOAD (1 << 20)
1547 
1548 static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw)
1549 {
1550 	sljit_sw tmp;
1551 	sljit_uw imm;
1552 	sljit_sw inst = VSTR_F32 | (flags & (SLJIT_SINGLE_OP | FPU_LOAD));
1553 
1554 	SLJIT_ASSERT(arg & SLJIT_MEM);
1555 
1556 	/* Fast loads and stores. */
1557 	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
1558 		FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG2) | RN4(arg & REG_MASK) | RM4(OFFS_REG(arg)) | ((argw & 0x3) << 6)));
1559 		arg = SLJIT_MEM | TMP_REG2;
1560 		argw = 0;
1561 	}
1562 
1563 	if ((arg & REG_MASK) && (argw & 0x3) == 0) {
1564 		if (!(argw & ~0x3fc))
1565 			return push_inst32(compiler, inst | 0x800000 | RN4(arg & REG_MASK) | DD4(reg) | (argw >> 2));
1566 		if (!(-argw & ~0x3fc))
1567 			return push_inst32(compiler, inst | RN4(arg & REG_MASK) | DD4(reg) | (-argw >> 2));
1568 	}
1569 
1570 	/* Slow cases */
1571 	SLJIT_ASSERT(!(arg & OFFS_REG_MASK));
1572 	if (compiler->cache_arg == arg) {
1573 		tmp = argw - compiler->cache_argw;
1574 		if (!(tmp & ~0x3fc))
1575 			return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg) | (tmp >> 2));
1576 		if (!(-tmp & ~0x3fc))
1577 			return push_inst32(compiler, inst | RN4(TMP_REG3) | DD4(reg) | (-tmp >> 2));
1578 		if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, tmp) != SLJIT_ERR_UNSUPPORTED) {
1579 			FAIL_IF(compiler->error);
1580 			compiler->cache_argw = argw;
1581 			return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg));
1582 		}
1583 	}
1584 
1585 	if (arg & REG_MASK) {
1586 		if (emit_set_delta(compiler, TMP_REG1, arg & REG_MASK, argw) != SLJIT_ERR_UNSUPPORTED) {
1587 			FAIL_IF(compiler->error);
1588 			return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg));
1589 		}
1590 		imm = get_imm(argw & ~0x3fc);
1591 		if (imm != INVALID_IMM) {
1592 			FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm));
1593 			return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg) | ((argw & 0x3fc) >> 2));
1594 		}
1595 		imm = get_imm(-argw & ~0x3fc);
1596 		if (imm != INVALID_IMM) {
1597 			argw = -argw;
1598 			FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm));
1599 			return push_inst32(compiler, inst | RN4(TMP_REG1) | DD4(reg) | ((argw & 0x3fc) >> 2));
1600 		}
1601 	}
1602 
1603 	compiler->cache_arg = arg;
1604 	compiler->cache_argw = argw;
1605 
1606 	FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1607 	if (arg & REG_MASK)
1608 		FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG3, (arg & REG_MASK))));
1609 	return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg));
1610 }
1611 
1612 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
1613 	sljit_si dst, sljit_sw dstw,
1614 	sljit_si src, sljit_sw srcw)
1615 {
1616 	sljit_si dst_r;
1617 
1618 	CHECK_ERROR();
1619 	check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
1620 	SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100), float_transfer_bit_error);
1621 
1622 	compiler->cache_arg = 0;
1623 	compiler->cache_argw = 0;
1624 	op ^= SLJIT_SINGLE_OP;
1625 
1626 	if (GET_OPCODE(op) == SLJIT_CMPD) {
1627 		if (dst & SLJIT_MEM) {
1628 			emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, dst, dstw);
1629 			dst = TMP_FREG1;
1630 		}
1631 		if (src & SLJIT_MEM) {
1632 			emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src, srcw);
1633 			src = TMP_FREG2;
1634 		}
1635 		FAIL_IF(push_inst32(compiler, VCMP_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst) | DM4(src)));
1636 		return push_inst32(compiler, VMRS);
1637 	}
1638 
1639 	dst_r = (dst <= REG_MASK) ? dst : TMP_FREG1;
1640 	if (src & SLJIT_MEM) {
1641 		emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, dst_r, src, srcw);
1642 		src = dst_r;
1643 	}
1644 
1645 	switch (GET_OPCODE(op)) {
1646 	case SLJIT_MOVD:
1647 		if (src != dst_r)
1648 			FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src)));
1649 		break;
1650 	case SLJIT_NEGD:
1651 		FAIL_IF(push_inst32(compiler, VNEG_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src)));
1652 		break;
1653 	case SLJIT_ABSD:
1654 		FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src)));
1655 		break;
1656 	}
1657 
1658 	if (!(dst & SLJIT_MEM))
1659 		return SLJIT_SUCCESS;
1660 	return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw);
1661 }
1662 
1663 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
1664 	sljit_si dst, sljit_sw dstw,
1665 	sljit_si src1, sljit_sw src1w,
1666 	sljit_si src2, sljit_sw src2w)
1667 {
1668 	sljit_si dst_r;
1669 
1670 	CHECK_ERROR();
1671 	check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
1672 
1673 	compiler->cache_arg = 0;
1674 	compiler->cache_argw = 0;
1675 	op ^= SLJIT_SINGLE_OP;
1676 
1677 	dst_r = (dst <= REG_MASK) ? dst : TMP_FREG1;
1678 	if (src1 & SLJIT_MEM) {
1679 		emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src1, src1w);
1680 		src1 = TMP_FREG1;
1681 	}
1682 	if (src2 & SLJIT_MEM) {
1683 		emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src2, src2w);
1684 		src2 = TMP_FREG2;
1685 	}
1686 
1687 	switch (GET_OPCODE(op)) {
1688 	case SLJIT_ADDD:
1689 		FAIL_IF(push_inst32(compiler, VADD_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DN4(src1) | DM4(src2)));
1690 		break;
1691 	case SLJIT_SUBD:
1692 		FAIL_IF(push_inst32(compiler, VSUB_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DN4(src1) | DM4(src2)));
1693 		break;
1694 	case SLJIT_MULD:
1695 		FAIL_IF(push_inst32(compiler, VMUL_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DN4(src1) | DM4(src2)));
1696 		break;
1697 	case SLJIT_DIVD:
1698 		FAIL_IF(push_inst32(compiler, VDIV_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DN4(src1) | DM4(src2)));
1699 		break;
1700 	}
1701 
1702 	if (!(dst & SLJIT_MEM))
1703 		return SLJIT_SUCCESS;
1704 	return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw);
1705 }
1706 
1707 #undef FPU_LOAD
1708 
1709 /* --------------------------------------------------------------------- */
1710 /*  Other instructions                                                   */
1711 /* --------------------------------------------------------------------- */
1712 
1713 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
1714 {
1715 	CHECK_ERROR();
1716 	check_sljit_emit_fast_enter(compiler, dst, dstw);
1717 	ADJUST_LOCAL_OFFSET(dst, dstw);
1718 
1719 	/* For UNUSED dst. Uncommon, but possible. */
1720 	if (dst == SLJIT_UNUSED)
1721 		return SLJIT_SUCCESS;
1722 
1723 	if (dst <= REG_MASK)
1724 		return push_inst16(compiler, MOV | SET_REGS44(dst, TMP_REG3));
1725 
1726 	/* Memory. */
1727 	if (getput_arg_fast(compiler, WORD_SIZE | STORE, TMP_REG3, dst, dstw))
1728 		return compiler->error;
1729 	/* TMP_REG3 is used for caching. */
1730 	FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG2, TMP_REG3)));
1731 	compiler->cache_arg = 0;
1732 	compiler->cache_argw = 0;
1733 	return getput_arg(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, 0, 0);
1734 }
1735 
1736 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
1737 {
1738 	CHECK_ERROR();
1739 	check_sljit_emit_fast_return(compiler, src, srcw);
1740 	ADJUST_LOCAL_OFFSET(src, srcw);
1741 
1742 	if (src <= REG_MASK)
1743 		FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG3, src)));
1744 	else if (src & SLJIT_MEM) {
1745 		if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG3, src, srcw))
1746 			FAIL_IF(compiler->error);
1747 		else {
1748 			compiler->cache_arg = 0;
1749 			compiler->cache_argw = 0;
1750 			FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src, srcw, 0, 0));
1751 			FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG3, TMP_REG2)));
1752 		}
1753 	}
1754 	else if (src & SLJIT_IMM)
1755 		FAIL_IF(load_immediate(compiler, TMP_REG3, srcw));
1756 	return push_inst16(compiler, BLX | RN3(TMP_REG3));
1757 }
1758 
1759 /* --------------------------------------------------------------------- */
1760 /*  Conditional instructions                                             */
1761 /* --------------------------------------------------------------------- */
1762 
1763 static sljit_uw get_cc(sljit_si type)
1764 {
1765 	switch (type) {
1766 	case SLJIT_C_EQUAL:
1767 	case SLJIT_C_MUL_NOT_OVERFLOW:
1768 	case SLJIT_C_FLOAT_EQUAL:
1769 		return 0x0;
1770 
1771 	case SLJIT_C_NOT_EQUAL:
1772 	case SLJIT_C_MUL_OVERFLOW:
1773 	case SLJIT_C_FLOAT_NOT_EQUAL:
1774 		return 0x1;
1775 
1776 	case SLJIT_C_LESS:
1777 	case SLJIT_C_FLOAT_LESS:
1778 		return 0x3;
1779 
1780 	case SLJIT_C_GREATER_EQUAL:
1781 	case SLJIT_C_FLOAT_GREATER_EQUAL:
1782 		return 0x2;
1783 
1784 	case SLJIT_C_GREATER:
1785 	case SLJIT_C_FLOAT_GREATER:
1786 		return 0x8;
1787 
1788 	case SLJIT_C_LESS_EQUAL:
1789 	case SLJIT_C_FLOAT_LESS_EQUAL:
1790 		return 0x9;
1791 
1792 	case SLJIT_C_SIG_LESS:
1793 		return 0xb;
1794 
1795 	case SLJIT_C_SIG_GREATER_EQUAL:
1796 		return 0xa;
1797 
1798 	case SLJIT_C_SIG_GREATER:
1799 		return 0xc;
1800 
1801 	case SLJIT_C_SIG_LESS_EQUAL:
1802 		return 0xd;
1803 
1804 	case SLJIT_C_OVERFLOW:
1805 	case SLJIT_C_FLOAT_UNORDERED:
1806 		return 0x6;
1807 
1808 	case SLJIT_C_NOT_OVERFLOW:
1809 	case SLJIT_C_FLOAT_ORDERED:
1810 		return 0x7;
1811 
1812 	default: /* SLJIT_JUMP */
1813 		return 0xe;
1814 	}
1815 }
1816 
1817 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
1818 {
1819 	struct sljit_label *label;
1820 
1821 	CHECK_ERROR_PTR();
1822 	check_sljit_emit_label(compiler);
1823 
1824 	if (compiler->last_label && compiler->last_label->size == compiler->size)
1825 		return compiler->last_label;
1826 
1827 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
1828 	PTR_FAIL_IF(!label);
1829 	set_label(label, compiler);
1830 	return label;
1831 }
1832 
1833 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
1834 {
1835 	struct sljit_jump *jump;
1836 	sljit_ins cc;
1837 
1838 	CHECK_ERROR_PTR();
1839 	check_sljit_emit_jump(compiler, type);
1840 
1841 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
1842 	PTR_FAIL_IF(!jump);
1843 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
1844 	type &= 0xff;
1845 
1846 	/* In ARM, we don't need to touch the arguments. */
1847 	PTR_FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0));
1848 	if (type < SLJIT_JUMP) {
1849 		jump->flags |= IS_COND;
1850 		cc = get_cc(type);
1851 		jump->flags |= cc << 8;
1852 		PTR_FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
1853 	}
1854 
1855 	jump->addr = compiler->size;
1856 	if (type <= SLJIT_JUMP)
1857 		PTR_FAIL_IF(push_inst16(compiler, BX | RN3(TMP_REG1)));
1858 	else {
1859 		jump->flags |= IS_BL;
1860 		PTR_FAIL_IF(push_inst16(compiler, BLX | RN3(TMP_REG1)));
1861 	}
1862 
1863 	return jump;
1864 }
1865 
1866 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
1867 {
1868 	struct sljit_jump *jump;
1869 
1870 	CHECK_ERROR();
1871 	check_sljit_emit_ijump(compiler, type, src, srcw);
1872 	ADJUST_LOCAL_OFFSET(src, srcw);
1873 
1874 	/* In ARM, we don't need to touch the arguments. */
1875 	if (!(src & SLJIT_IMM)) {
1876 		if (FAST_IS_REG(src))
1877 			return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(src));
1878 
1879 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, src, srcw));
1880 		if (type >= SLJIT_FAST_CALL)
1881 			return push_inst16(compiler, BLX | RN3(TMP_REG1));
1882 	}
1883 
1884 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
1885 	FAIL_IF(!jump);
1886 	set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
1887 	jump->u.target = srcw;
1888 
1889 	FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0));
1890 	jump->addr = compiler->size;
1891 	return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(TMP_REG1));
1892 }
1893 
1894 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
1895 	sljit_si dst, sljit_sw dstw,
1896 	sljit_si src, sljit_sw srcw,
1897 	sljit_si type)
1898 {
1899 	sljit_si dst_r, flags = GET_ALL_FLAGS(op);
1900 	sljit_ins cc, ins;
1901 
1902 	CHECK_ERROR();
1903 	check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type);
1904 	ADJUST_LOCAL_OFFSET(dst, dstw);
1905 	ADJUST_LOCAL_OFFSET(src, srcw);
1906 
1907 	if (dst == SLJIT_UNUSED)
1908 		return SLJIT_SUCCESS;
1909 
1910 	op = GET_OPCODE(op);
1911 	cc = get_cc(type);
1912 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
1913 
1914 	if (op < SLJIT_ADD) {
1915 		FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4));
1916 		if (reg_map[dst_r] > 7) {
1917 			FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 1));
1918 			FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 0));
1919 		} else {
1920 			FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 1));
1921 			FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 0));
1922 		}
1923 		if (dst_r != TMP_REG2)
1924 			return SLJIT_SUCCESS;
1925 		return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw);
1926 	}
1927 
1928 	ins = (op == SLJIT_AND ? ANDI : (op == SLJIT_OR ? ORRI : EORI));
1929 	if ((op == SLJIT_OR || op == SLJIT_XOR) && FAST_IS_REG(dst) && dst == src) {
1930 		/* Does not change the other bits. */
1931 		FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
1932 		FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst) | 1));
1933 		if (flags & SLJIT_SET_E) {
1934 			/* The condition must always be set, even if the ORRI/EORI is not executed above. */
1935 			if (reg_map[dst] <= 7)
1936 				return push_inst16(compiler, MOVS | RD3(TMP_REG1) | RN3(dst));
1937 			return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst));
1938 		}
1939 		return SLJIT_SUCCESS;
1940 	}
1941 
1942 	compiler->cache_arg = 0;
1943 	compiler->cache_argw = 0;
1944 	if (src & SLJIT_MEM) {
1945 		FAIL_IF(emit_op_mem2(compiler, WORD_SIZE, TMP_REG2, src, srcw, dst, dstw));
1946 		src = TMP_REG2;
1947 		srcw = 0;
1948 	} else if (src & SLJIT_IMM) {
1949 		FAIL_IF(load_immediate(compiler, TMP_REG2, srcw));
1950 		src = TMP_REG2;
1951 		srcw = 0;
1952 	}
1953 
1954 	if (op == SLJIT_AND || src != dst_r) {
1955 		FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4));
1956 		FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst_r) | 1));
1957 		FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst_r) | 0));
1958 	}
1959 	else {
1960 		FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
1961 		FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst_r) | 1));
1962 	}
1963 
1964 	if (dst_r == TMP_REG2)
1965 		FAIL_IF(emit_op_mem2(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, 0, 0));
1966 
1967 	if (flags & SLJIT_SET_E) {
1968 		/* The condition must always be set, even if the ORR/EORI is not executed above. */
1969 		if (reg_map[dst_r] <= 7)
1970 			return push_inst16(compiler, MOVS | RD3(TMP_REG1) | RN3(dst_r));
1971 		return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst_r));
1972 	}
1973 	return SLJIT_SUCCESS;
1974 }
1975 
1976 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
1977 {
1978 	struct sljit_const *const_;
1979 	sljit_si dst_r;
1980 
1981 	CHECK_ERROR_PTR();
1982 	check_sljit_emit_const(compiler, dst, dstw, init_value);
1983 	ADJUST_LOCAL_OFFSET(dst, dstw);
1984 
1985 	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
1986 	PTR_FAIL_IF(!const_);
1987 	set_const(const_, compiler);
1988 
1989 	dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
1990 	PTR_FAIL_IF(emit_imm32_const(compiler, dst_r, init_value));
1991 
1992 	if (dst & SLJIT_MEM)
1993 		PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw));
1994 	return const_;
1995 }
1996 
1997 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
1998 {
1999 	sljit_uh *inst = (sljit_uh*)addr;
2000 	modify_imm32_const(inst, new_addr);
2001 	SLJIT_CACHE_FLUSH(inst, inst + 4);
2002 }
2003 
2004 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
2005 {
2006 	sljit_uh *inst = (sljit_uh*)addr;
2007 	modify_imm32_const(inst, new_constant);
2008 	SLJIT_CACHE_FLUSH(inst, inst + 4);
2009 }
2010