xref: /netbsd-src/sys/external/bsd/sljit/dist/sljit_src/sljitNativePPC_common.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /*	$NetBSD: sljitNativePPC_common.c,v 1.7 2016/05/30 09:34:39 alnsn Exp $	*/
2 
3 /*
4  *    Stack-less Just-In-Time compiler
5  *
6  *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without modification, are
9  * permitted provided that the following conditions are met:
10  *
11  *   1. Redistributions of source code must retain the above copyright notice, this list of
12  *      conditions and the following disclaimer.
13  *
14  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
15  *      of conditions and the following disclaimer in the documentation and/or other materials
16  *      provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
19  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
21  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
23  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
30 {
31 	return "PowerPC" SLJIT_CPUINFO;
32 }
33 
34 /* Length of an instruction word.
35    Both for ppc-32 and ppc-64. */
36 typedef sljit_u32 sljit_ins;
37 
38 #if ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && (defined _AIX)) \
39 	|| (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
40 #define SLJIT_PPC_STACK_FRAME_V2 1
41 #endif
42 
43 #ifdef _AIX
44 #include <sys/cache.h>
45 #endif
46 
47 #if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
48 #define SLJIT_PASS_ENTRY_ADDR_TO_CALL 1
49 #endif
50 
51 #if (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL)
52 
53 static void ppc_cache_flush(sljit_ins *from, sljit_ins *to)
54 {
55 #ifdef _AIX
56 	_sync_cache_range((caddr_t)from, (int)((size_t)to - (size_t)from));
57 #elif defined(__GNUC__) || (defined(__IBM_GCC_ASM) && __IBM_GCC_ASM)
58 #	if defined(_ARCH_PWR) || defined(_ARCH_PWR2)
59 	/* Cache flush for POWER architecture. */
60 	while (from < to) {
61 		__asm__ volatile (
62 			"clf 0, %0\n"
63 			"dcs\n"
64 			: : "r"(from)
65 		);
66 		from++;
67 	}
68 	__asm__ volatile ( "ics" );
69 #	elif defined(_ARCH_COM) && !defined(_ARCH_PPC)
70 #	error "Cache flush is not implemented for PowerPC/POWER common mode."
71 #	else
72 	/* Cache flush for PowerPC architecture. */
73 	while (from < to) {
74 		__asm__ volatile (
75 			"dcbf 0, %0\n"
76 			"sync\n"
77 			"icbi 0, %0\n"
78 			: : "r"(from)
79 		);
80 		from++;
81 	}
82 	__asm__ volatile ( "isync" );
83 #	endif
84 #	ifdef __xlc__
85 #	warning "This file may fail to compile if -qfuncsect is used"
86 #	endif
87 #elif defined(__xlc__)
88 #error "Please enable GCC syntax for inline assembly statements with -qasm=gcc"
89 #else
90 #error "This platform requires a cache flush implementation."
91 #endif /* _AIX */
92 }
93 
94 #endif /* (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL) */
95 
96 #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
97 #define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
98 #define TMP_REG3	(SLJIT_NUMBER_OF_REGISTERS + 4)
99 #define TMP_ZERO	(SLJIT_NUMBER_OF_REGISTERS + 5)
100 
101 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
102 #define TMP_CALL_REG	(SLJIT_NUMBER_OF_REGISTERS + 6)
103 #else
104 #define TMP_CALL_REG	TMP_REG2
105 #endif
106 
107 #define TMP_FREG1	(0)
108 #define TMP_FREG2	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
109 
110 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = {
111 	0, 3, 4, 5, 6, 7, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 8, 9, 10, 31, 12
112 };
113 
114 /* --------------------------------------------------------------------- */
115 /*  Instrucion forms                                                     */
116 /* --------------------------------------------------------------------- */
117 #define D(d)		(reg_map[d] << 21)
118 #define S(s)		(reg_map[s] << 21)
119 #define A(a)		(reg_map[a] << 16)
120 #define B(b)		(reg_map[b] << 11)
121 #define C(c)		(reg_map[c] << 6)
122 #define FD(fd)		((fd) << 21)
123 #define FS(fs)		((fs) << 21)
124 #define FA(fa)		((fa) << 16)
125 #define FB(fb)		((fb) << 11)
126 #define FC(fc)		((fc) << 6)
127 #define IMM(imm)	((imm) & 0xffff)
128 #define CRD(d)		((d) << 21)
129 
130 /* Instruction bit sections.
131    OE and Rc flag (see ALT_SET_FLAGS). */
132 #define OERC(flags)	(((flags & ALT_SET_FLAGS) >> 10) | (flags & ALT_SET_FLAGS))
133 /* Rc flag (see ALT_SET_FLAGS). */
134 #define RC(flags)	((flags & ALT_SET_FLAGS) >> 10)
135 #define HI(opcode)	((opcode) << 26)
136 #define LO(opcode)	((opcode) << 1)
137 
138 #define ADD		(HI(31) | LO(266))
139 #define ADDC		(HI(31) | LO(10))
140 #define ADDE		(HI(31) | LO(138))
141 #define ADDI		(HI(14))
142 #define ADDIC		(HI(13))
143 #define ADDIS		(HI(15))
144 #define ADDME		(HI(31) | LO(234))
145 #define AND		(HI(31) | LO(28))
146 #define ANDI		(HI(28))
147 #define ANDIS		(HI(29))
148 #define Bx		(HI(18))
149 #define BCx		(HI(16))
150 #define BCCTR		(HI(19) | LO(528) | (3 << 11))
151 #define BLR		(HI(19) | LO(16) | (0x14 << 21))
152 #define CNTLZD		(HI(31) | LO(58))
153 #define CNTLZW		(HI(31) | LO(26))
154 #define CMP		(HI(31) | LO(0))
155 #define CMPI		(HI(11))
156 #define CMPL		(HI(31) | LO(32))
157 #define CMPLI		(HI(10))
158 #define CROR		(HI(19) | LO(449))
159 #define DIVD		(HI(31) | LO(489))
160 #define DIVDU		(HI(31) | LO(457))
161 #define DIVW		(HI(31) | LO(491))
162 #define DIVWU		(HI(31) | LO(459))
163 #define EXTSB		(HI(31) | LO(954))
164 #define EXTSH		(HI(31) | LO(922))
165 #define EXTSW		(HI(31) | LO(986))
166 #define FABS		(HI(63) | LO(264))
167 #define FADD		(HI(63) | LO(21))
168 #define FADDS		(HI(59) | LO(21))
169 #define FCFID		(HI(63) | LO(846))
170 #define FCMPU		(HI(63) | LO(0))
171 #define FCTIDZ		(HI(63) | LO(815))
172 #define FCTIWZ		(HI(63) | LO(15))
173 #define FDIV		(HI(63) | LO(18))
174 #define FDIVS		(HI(59) | LO(18))
175 #define FMR		(HI(63) | LO(72))
176 #define FMUL		(HI(63) | LO(25))
177 #define FMULS		(HI(59) | LO(25))
178 #define FNEG		(HI(63) | LO(40))
179 #define FRSP		(HI(63) | LO(12))
180 #define FSUB		(HI(63) | LO(20))
181 #define FSUBS		(HI(59) | LO(20))
182 #define LD		(HI(58) | 0)
183 #define LWZ		(HI(32))
184 #define MFCR		(HI(31) | LO(19))
185 #define MFLR		(HI(31) | LO(339) | 0x80000)
186 #define MFXER		(HI(31) | LO(339) | 0x10000)
187 #define MTCTR		(HI(31) | LO(467) | 0x90000)
188 #define MTLR		(HI(31) | LO(467) | 0x80000)
189 #define MTXER		(HI(31) | LO(467) | 0x10000)
190 #define MULHD		(HI(31) | LO(73))
191 #define MULHDU		(HI(31) | LO(9))
192 #define MULHW		(HI(31) | LO(75))
193 #define MULHWU		(HI(31) | LO(11))
194 #define MULLD		(HI(31) | LO(233))
195 #define MULLI		(HI(7))
196 #define MULLW		(HI(31) | LO(235))
197 #define NEG		(HI(31) | LO(104))
198 #define NOP		(HI(24))
199 #define NOR		(HI(31) | LO(124))
200 #define OR		(HI(31) | LO(444))
201 #define ORI		(HI(24))
202 #define ORIS		(HI(25))
203 #define RLDICL		(HI(30))
204 #define RLWINM		(HI(21))
205 #define SLD		(HI(31) | LO(27))
206 #define SLW		(HI(31) | LO(24))
207 #define SRAD		(HI(31) | LO(794))
208 #define SRADI		(HI(31) | LO(413 << 1))
209 #define SRAW		(HI(31) | LO(792))
210 #define SRAWI		(HI(31) | LO(824))
211 #define SRD		(HI(31) | LO(539))
212 #define SRW		(HI(31) | LO(536))
213 #define STD		(HI(62) | 0)
214 #define STDU		(HI(62) | 1)
215 #define STDUX		(HI(31) | LO(181))
216 #define STFIWX		(HI(31) | LO(983))
217 #define STW		(HI(36))
218 #define STWU		(HI(37))
219 #define STWUX		(HI(31) | LO(183))
220 #define SUBF		(HI(31) | LO(40))
221 #define SUBFC		(HI(31) | LO(8))
222 #define SUBFE		(HI(31) | LO(136))
223 #define SUBFIC		(HI(8))
224 #define XOR		(HI(31) | LO(316))
225 #define XORI		(HI(26))
226 #define XORIS		(HI(27))
227 
228 #define SIMM_MAX	(0x7fff)
229 #define SIMM_MIN	(-0x8000)
230 #define UIMM_MAX	(0xffff)
231 
232 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
233 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_sw addr, void* func)
234 {
235 	sljit_sw* ptrs;
236 	if (func_ptr)
237 		*func_ptr = (void*)context;
238 	ptrs = (sljit_sw*)func;
239 	context->addr = addr ? addr : ptrs[0];
240 	context->r2 = ptrs[1];
241 	context->r11 = ptrs[2];
242 }
243 #endif
244 
245 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
246 {
247 	sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
248 	FAIL_IF(!ptr);
249 	*ptr = ins;
250 	compiler->size++;
251 	return SLJIT_SUCCESS;
252 }
253 
254 static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
255 {
256 	sljit_sw diff;
257 	sljit_uw target_addr;
258 	sljit_sw extra_jump_flags;
259 
260 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
261 	if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_CALL))
262 		return 0;
263 #else
264 	if (jump->flags & SLJIT_REWRITABLE_JUMP)
265 		return 0;
266 #endif
267 
268 	if (jump->flags & JUMP_ADDR)
269 		target_addr = jump->u.target;
270 	else {
271 		SLJIT_ASSERT(jump->flags & JUMP_LABEL);
272 		target_addr = (sljit_uw)(code + jump->u.label->size);
273 	}
274 
275 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
276 	if (jump->flags & IS_CALL)
277 		goto keep_address;
278 #endif
279 
280 	diff = ((sljit_sw)target_addr - (sljit_sw)(code_ptr)) & ~0x3l;
281 
282 	extra_jump_flags = 0;
283 	if (jump->flags & IS_COND) {
284 		if (diff <= 0x7fff && diff >= -0x8000) {
285 			jump->flags |= PATCH_B;
286 			return 1;
287 		}
288 		if (target_addr <= 0xffff) {
289 			jump->flags |= PATCH_B | PATCH_ABS_B;
290 			return 1;
291 		}
292 		extra_jump_flags = REMOVE_COND;
293 
294 		diff -= sizeof(sljit_ins);
295 	}
296 
297 	if (diff <= 0x01ffffff && diff >= -0x02000000) {
298 		jump->flags |= PATCH_B | extra_jump_flags;
299 		return 1;
300 	}
301 	if (target_addr <= 0x03ffffff) {
302 		jump->flags |= PATCH_B | PATCH_ABS_B | extra_jump_flags;
303 		return 1;
304 	}
305 
306 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
307 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
308 keep_address:
309 #endif
310 	if (target_addr <= 0x7fffffff) {
311 		jump->flags |= PATCH_ABS32;
312 		return 1;
313 	}
314 	if (target_addr <= 0x7fffffffffffl) {
315 		jump->flags |= PATCH_ABS48;
316 		return 1;
317 	}
318 #endif
319 
320 	return 0;
321 }
322 
323 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
324 {
325 	struct sljit_memory_fragment *buf;
326 	sljit_ins *code;
327 	sljit_ins *code_ptr;
328 	sljit_ins *buf_ptr;
329 	sljit_ins *buf_end;
330 	sljit_uw word_count;
331 	sljit_uw addr;
332 
333 	struct sljit_label *label;
334 	struct sljit_jump *jump;
335 	struct sljit_const *const_;
336 
337 	CHECK_ERROR_PTR();
338 	CHECK_PTR(check_sljit_generate_code(compiler));
339 	reverse_buf(compiler);
340 
341 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
342 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
343 	compiler->size += (compiler->size & 0x1) + (sizeof(struct sljit_function_context) / sizeof(sljit_ins));
344 #else
345 	compiler->size += (sizeof(struct sljit_function_context) / sizeof(sljit_ins));
346 #endif
347 #endif
348 	code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
349 	PTR_FAIL_WITH_EXEC_IF(code);
350 	buf = compiler->buf;
351 
352 	code_ptr = code;
353 	word_count = 0;
354 	label = compiler->labels;
355 	jump = compiler->jumps;
356 	const_ = compiler->consts;
357 	do {
358 		buf_ptr = (sljit_ins*)buf->memory;
359 		buf_end = buf_ptr + (buf->used_size >> 2);
360 		do {
361 			*code_ptr = *buf_ptr++;
362 			SLJIT_ASSERT(!label || label->size >= word_count);
363 			SLJIT_ASSERT(!jump || jump->addr >= word_count);
364 			SLJIT_ASSERT(!const_ || const_->addr >= word_count);
365 			/* These structures are ordered by their address. */
366 			if (label && label->size == word_count) {
367 				/* Just recording the address. */
368 				label->addr = (sljit_uw)code_ptr;
369 				label->size = code_ptr - code;
370 				label = label->next;
371 			}
372 			if (jump && jump->addr == word_count) {
373 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
374 				jump->addr = (sljit_uw)(code_ptr - 3);
375 #else
376 				jump->addr = (sljit_uw)(code_ptr - 6);
377 #endif
378 				if (detect_jump_type(jump, code_ptr, code)) {
379 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
380 					code_ptr[-3] = code_ptr[0];
381 					code_ptr -= 3;
382 #else
383 					if (jump->flags & PATCH_ABS32) {
384 						code_ptr -= 3;
385 						code_ptr[-1] = code_ptr[2];
386 						code_ptr[0] = code_ptr[3];
387 					}
388 					else if (jump->flags & PATCH_ABS48) {
389 						code_ptr--;
390 						code_ptr[-1] = code_ptr[0];
391 						code_ptr[0] = code_ptr[1];
392 						/* rldicr rX,rX,32,31 -> rX,rX,16,47 */
393 						SLJIT_ASSERT((code_ptr[-3] & 0xfc00ffff) == 0x780007c6);
394 						code_ptr[-3] ^= 0x8422;
395 						/* oris -> ori */
396 						code_ptr[-2] ^= 0x4000000;
397 					}
398 					else {
399 						code_ptr[-6] = code_ptr[0];
400 						code_ptr -= 6;
401 					}
402 #endif
403 					if (jump->flags & REMOVE_COND) {
404 						code_ptr[0] = BCx | (2 << 2) | ((code_ptr[0] ^ (8 << 21)) & 0x03ff0001);
405 						code_ptr++;
406 						jump->addr += sizeof(sljit_ins);
407 						code_ptr[0] = Bx;
408 						jump->flags -= IS_COND;
409 					}
410 				}
411 				jump = jump->next;
412 			}
413 			if (const_ && const_->addr == word_count) {
414 				const_->addr = (sljit_uw)code_ptr;
415 				const_ = const_->next;
416 			}
417 			code_ptr ++;
418 			word_count ++;
419 		} while (buf_ptr < buf_end);
420 
421 		buf = buf->next;
422 	} while (buf);
423 
424 	if (label && label->size == word_count) {
425 		label->addr = (sljit_uw)code_ptr;
426 		label->size = code_ptr - code;
427 		label = label->next;
428 	}
429 
430 	SLJIT_ASSERT(!label);
431 	SLJIT_ASSERT(!jump);
432 	SLJIT_ASSERT(!const_);
433 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
434 	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size - (sizeof(struct sljit_function_context) / sizeof(sljit_ins)));
435 #else
436 	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
437 #endif
438 
439 	jump = compiler->jumps;
440 	while (jump) {
441 		do {
442 			addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
443 			buf_ptr = (sljit_ins*)jump->addr;
444 			if (jump->flags & PATCH_B) {
445 				if (jump->flags & IS_COND) {
446 					if (!(jump->flags & PATCH_ABS_B)) {
447 						addr = addr - jump->addr;
448 						SLJIT_ASSERT((sljit_sw)addr <= 0x7fff && (sljit_sw)addr >= -0x8000);
449 						*buf_ptr = BCx | (addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001);
450 					}
451 					else {
452 						SLJIT_ASSERT(addr <= 0xffff);
453 						*buf_ptr = BCx | (addr & 0xfffc) | 0x2 | ((*buf_ptr) & 0x03ff0001);
454 					}
455 				}
456 				else {
457 					if (!(jump->flags & PATCH_ABS_B)) {
458 						addr = addr - jump->addr;
459 						SLJIT_ASSERT((sljit_sw)addr <= 0x01ffffff && (sljit_sw)addr >= -0x02000000);
460 						*buf_ptr = Bx | (addr & 0x03fffffc) | ((*buf_ptr) & 0x1);
461 					}
462 					else {
463 						SLJIT_ASSERT(addr <= 0x03ffffff);
464 						*buf_ptr = Bx | (addr & 0x03fffffc) | 0x2 | ((*buf_ptr) & 0x1);
465 					}
466 				}
467 				break;
468 			}
469 			/* Set the fields of immediate loads. */
470 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
471 			buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
472 			buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
473 #else
474 			if (jump->flags & PATCH_ABS32) {
475 				SLJIT_ASSERT(addr <= 0x7fffffff);
476 				buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
477 				buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
478 				break;
479 			}
480 			if (jump->flags & PATCH_ABS48) {
481 				SLJIT_ASSERT(addr <= 0x7fffffffffff);
482 				buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 32) & 0xffff);
483 				buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 16) & 0xffff);
484 				buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | (addr & 0xffff);
485 				break;
486 			}
487 			buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 48) & 0xffff);
488 			buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 32) & 0xffff);
489 			buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | ((addr >> 16) & 0xffff);
490 			buf_ptr[4] = (buf_ptr[4] & 0xffff0000) | (addr & 0xffff);
491 #endif
492 		} while (0);
493 		jump = jump->next;
494 	}
495 
496 	compiler->error = SLJIT_ERR_COMPILED;
497 	compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
498 	SLJIT_CACHE_FLUSH(code, code_ptr);
499 
500 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
501 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
502 	if (((sljit_sw)code_ptr) & 0x4)
503 		code_ptr++;
504 	sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code);
505 	return code_ptr;
506 #else
507 	sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code);
508 	return code_ptr;
509 #endif
510 #else
511 	return code;
512 #endif
513 }
514 
515 /* --------------------------------------------------------------------- */
516 /*  Entry, exit                                                          */
517 /* --------------------------------------------------------------------- */
518 
519 /* inp_flags: */
520 
521 /* Creates an index in data_transfer_insts array. */
522 #define LOAD_DATA	0x01
523 #define INDEXED		0x02
524 #define WRITE_BACK	0x04
525 #define WORD_DATA	0x00
526 #define BYTE_DATA	0x08
527 #define HALF_DATA	0x10
528 #define INT_DATA	0x18
529 #define SIGNED_DATA	0x20
530 /* Separates integer and floating point registers */
531 #define GPR_REG		0x3f
532 #define DOUBLE_DATA	0x40
533 
534 #define MEM_MASK	0x7f
535 
536 /* Other inp_flags. */
537 
538 #define ARG_TEST	0x000100
539 /* Integer opertion and set flags -> requires exts on 64 bit systems. */
540 #define ALT_SIGN_EXT	0x000200
541 /* This flag affects the RC() and OERC() macros. */
542 #define ALT_SET_FLAGS	0x000400
543 #define ALT_KEEP_CACHE	0x000800
544 #define ALT_FORM1	0x010000
545 #define ALT_FORM2	0x020000
546 #define ALT_FORM3	0x040000
547 #define ALT_FORM4	0x080000
548 #define ALT_FORM5	0x100000
549 #define ALT_FORM6	0x200000
550 
551 /* Source and destination is register. */
552 #define REG_DEST	0x000001
553 #define REG1_SOURCE	0x000002
554 #define REG2_SOURCE	0x000004
555 /* getput_arg_fast returned true. */
556 #define FAST_DEST	0x000008
557 /* Multiple instructions are required. */
558 #define SLOW_DEST	0x000010
559 /*
560 ALT_SIGN_EXT		0x000200
561 ALT_SET_FLAGS		0x000400
562 ALT_FORM1		0x010000
563 ...
564 ALT_FORM6		0x200000 */
565 
566 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
567 #include "sljitNativePPC_32.c"
568 #else
569 #include "sljitNativePPC_64.c"
570 #endif
571 
572 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
573 #define STACK_STORE	STW
574 #define STACK_LOAD	LWZ
575 #else
576 #define STACK_STORE	STD
577 #define STACK_LOAD	LD
578 #endif
579 
580 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
581 	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
582 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
583 {
584 	sljit_s32 i, tmp, offs;
585 
586 	CHECK_ERROR();
587 	CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
588 	set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
589 
590 	FAIL_IF(push_inst(compiler, MFLR | D(0)));
591 	offs = -(sljit_s32)(sizeof(sljit_sw));
592 	FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(SLJIT_SP) | IMM(offs)));
593 
594 	tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
595 	for (i = SLJIT_S0; i >= tmp; i--) {
596 		offs -= (sljit_s32)(sizeof(sljit_sw));
597 		FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs)));
598 	}
599 
600 	for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
601 		offs -= (sljit_s32)(sizeof(sljit_sw));
602 		FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs)));
603 	}
604 
605 	SLJIT_ASSERT(offs == -(sljit_s32)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1));
606 
607 #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
608 	FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw))));
609 #else
610 	FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw))));
611 #endif
612 
613 	FAIL_IF(push_inst(compiler, ADDI | D(TMP_ZERO) | A(0) | 0));
614 	if (args >= 1)
615 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(SLJIT_S0) | B(SLJIT_R0)));
616 	if (args >= 2)
617 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R1) | A(SLJIT_S1) | B(SLJIT_R1)));
618 	if (args >= 3)
619 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R2) | A(SLJIT_S2) | B(SLJIT_R2)));
620 
621 	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET;
622 	local_size = (local_size + 15) & ~0xf;
623 	compiler->local_size = local_size;
624 
625 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
626 	if (local_size <= SIMM_MAX)
627 		FAIL_IF(push_inst(compiler, STWU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size)));
628 	else {
629 		FAIL_IF(load_immediate(compiler, 0, -local_size));
630 		FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0)));
631 	}
632 #else
633 	if (local_size <= SIMM_MAX)
634 		FAIL_IF(push_inst(compiler, STDU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size)));
635 	else {
636 		FAIL_IF(load_immediate(compiler, 0, -local_size));
637 		FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0)));
638 	}
639 #endif
640 
641 	return SLJIT_SUCCESS;
642 }
643 
644 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
645 	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
646 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
647 {
648 	CHECK_ERROR();
649 	CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
650 	set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
651 
652 	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET;
653 	compiler->local_size = (local_size + 15) & ~0xf;
654 	return SLJIT_SUCCESS;
655 }
656 
657 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
658 {
659 	sljit_s32 i, tmp, offs;
660 
661 	CHECK_ERROR();
662 	CHECK(check_sljit_emit_return(compiler, op, src, srcw));
663 
664 	FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
665 
666 	if (compiler->local_size <= SIMM_MAX)
667 		FAIL_IF(push_inst(compiler, ADDI | D(SLJIT_SP) | A(SLJIT_SP) | IMM(compiler->local_size)));
668 	else {
669 		FAIL_IF(load_immediate(compiler, 0, compiler->local_size));
670 		FAIL_IF(push_inst(compiler, ADD | D(SLJIT_SP) | A(SLJIT_SP) | B(0)));
671 	}
672 
673 #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
674 	FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw))));
675 #else
676 	FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw))));
677 #endif
678 
679 	offs = -(sljit_s32)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1);
680 
681 	tmp = compiler->scratches;
682 	for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
683 		FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs)));
684 		offs += (sljit_s32)(sizeof(sljit_sw));
685 	}
686 
687 	tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
688 	for (i = tmp; i <= SLJIT_S0; i++) {
689 		FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs)));
690 		offs += (sljit_s32)(sizeof(sljit_sw));
691 	}
692 
693 	FAIL_IF(push_inst(compiler, STACK_LOAD | D(TMP_ZERO) | A(SLJIT_SP) | IMM(offs)));
694 	SLJIT_ASSERT(offs == -(sljit_sw)(sizeof(sljit_sw)));
695 
696 	FAIL_IF(push_inst(compiler, MTLR | S(0)));
697 	FAIL_IF(push_inst(compiler, BLR));
698 
699 	return SLJIT_SUCCESS;
700 }
701 
702 #undef STACK_STORE
703 #undef STACK_LOAD
704 
705 /* --------------------------------------------------------------------- */
706 /*  Operators                                                            */
707 /* --------------------------------------------------------------------- */
708 
709 /* i/x - immediate/indexed form
710    n/w - no write-back / write-back (1 bit)
711    s/l - store/load (1 bit)
712    u/s - signed/unsigned (1 bit)
713    w/b/h/i - word/byte/half/int allowed (2 bit)
714    It contans 32 items, but not all are different. */
715 
716 /* 64 bit only: [reg+imm] must be aligned to 4 bytes. */
717 #define INT_ALIGNED	0x10000
718 /* 64-bit only: there is no lwau instruction. */
719 #define UPDATE_REQ	0x20000
720 
721 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
722 #define ARCH_32_64(a, b)	a
723 #define INST_CODE_AND_DST(inst, flags, reg) \
724 	((inst) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
725 #else
726 #define ARCH_32_64(a, b)	b
727 #define INST_CODE_AND_DST(inst, flags, reg) \
728 	(((inst) & ~(INT_ALIGNED | UPDATE_REQ)) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
729 #endif
730 
731 static const sljit_ins data_transfer_insts[64 + 8] = {
732 
733 /* -------- Unsigned -------- */
734 
735 /* Word. */
736 
737 /* u w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
738 /* u w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
739 /* u w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
740 /* u w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
741 
742 /* u w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
743 /* u w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
744 /* u w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
745 /* u w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
746 
747 /* Byte. */
748 
749 /* u b n i s */ HI(38) /* stb */,
750 /* u b n i l */ HI(34) /* lbz */,
751 /* u b n x s */ HI(31) | LO(215) /* stbx */,
752 /* u b n x l */ HI(31) | LO(87) /* lbzx */,
753 
754 /* u b w i s */ HI(39) /* stbu */,
755 /* u b w i l */ HI(35) /* lbzu */,
756 /* u b w x s */ HI(31) | LO(247) /* stbux */,
757 /* u b w x l */ HI(31) | LO(119) /* lbzux */,
758 
759 /* Half. */
760 
761 /* u h n i s */ HI(44) /* sth */,
762 /* u h n i l */ HI(40) /* lhz */,
763 /* u h n x s */ HI(31) | LO(407) /* sthx */,
764 /* u h n x l */ HI(31) | LO(279) /* lhzx */,
765 
766 /* u h w i s */ HI(45) /* sthu */,
767 /* u h w i l */ HI(41) /* lhzu */,
768 /* u h w x s */ HI(31) | LO(439) /* sthux */,
769 /* u h w x l */ HI(31) | LO(311) /* lhzux */,
770 
771 /* Int. */
772 
773 /* u i n i s */ HI(36) /* stw */,
774 /* u i n i l */ HI(32) /* lwz */,
775 /* u i n x s */ HI(31) | LO(151) /* stwx */,
776 /* u i n x l */ HI(31) | LO(23) /* lwzx */,
777 
778 /* u i w i s */ HI(37) /* stwu */,
779 /* u i w i l */ HI(33) /* lwzu */,
780 /* u i w x s */ HI(31) | LO(183) /* stwux */,
781 /* u i w x l */ HI(31) | LO(55) /* lwzux */,
782 
783 /* -------- Signed -------- */
784 
785 /* Word. */
786 
787 /* s w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
788 /* s w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
789 /* s w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
790 /* s w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
791 
792 /* s w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
793 /* s w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
794 /* s w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
795 /* s w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
796 
797 /* Byte. */
798 
799 /* s b n i s */ HI(38) /* stb */,
800 /* s b n i l */ HI(34) /* lbz */ /* EXTS_REQ */,
801 /* s b n x s */ HI(31) | LO(215) /* stbx */,
802 /* s b n x l */ HI(31) | LO(87) /* lbzx */ /* EXTS_REQ */,
803 
804 /* s b w i s */ HI(39) /* stbu */,
805 /* s b w i l */ HI(35) /* lbzu */ /* EXTS_REQ */,
806 /* s b w x s */ HI(31) | LO(247) /* stbux */,
807 /* s b w x l */ HI(31) | LO(119) /* lbzux */ /* EXTS_REQ */,
808 
809 /* Half. */
810 
811 /* s h n i s */ HI(44) /* sth */,
812 /* s h n i l */ HI(42) /* lha */,
813 /* s h n x s */ HI(31) | LO(407) /* sthx */,
814 /* s h n x l */ HI(31) | LO(343) /* lhax */,
815 
816 /* s h w i s */ HI(45) /* sthu */,
817 /* s h w i l */ HI(43) /* lhau */,
818 /* s h w x s */ HI(31) | LO(439) /* sthux */,
819 /* s h w x l */ HI(31) | LO(375) /* lhaux */,
820 
821 /* Int. */
822 
823 /* s i n i s */ HI(36) /* stw */,
824 /* s i n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x2 /* lwa */),
825 /* s i n x s */ HI(31) | LO(151) /* stwx */,
826 /* s i n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(341) /* lwax */),
827 
828 /* s i w i s */ HI(37) /* stwu */,
829 /* s i w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | UPDATE_REQ | 0x2 /* lwa */),
830 /* s i w x s */ HI(31) | LO(183) /* stwux */,
831 /* s i w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(373) /* lwaux */),
832 
833 /* -------- Double -------- */
834 
835 /* d   n i s */ HI(54) /* stfd */,
836 /* d   n i l */ HI(50) /* lfd */,
837 /* d   n x s */ HI(31) | LO(727) /* stfdx */,
838 /* d   n x l */ HI(31) | LO(599) /* lfdx */,
839 
840 /* s   n i s */ HI(52) /* stfs */,
841 /* s   n i l */ HI(48) /* lfs */,
842 /* s   n x s */ HI(31) | LO(663) /* stfsx */,
843 /* s   n x l */ HI(31) | LO(535) /* lfsx */,
844 
845 };
846 
847 #undef ARCH_32_64
848 
849 /* Simple cases, (no caching is required). */
850 static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
851 {
852 	sljit_ins inst;
853 
854 	/* Should work when (arg & REG_MASK) == 0. */
855 	SLJIT_COMPILE_ASSERT(A(0) == 0, a0_must_be_0);
856 	SLJIT_ASSERT(arg & SLJIT_MEM);
857 
858 	if (arg & OFFS_REG_MASK) {
859 		if (argw & 0x3)
860 			return 0;
861 		if (inp_flags & ARG_TEST)
862 			return 1;
863 
864 		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
865 		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
866 		FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(OFFS_REG(arg))));
867 		return -1;
868 	}
869 
870 	if (SLJIT_UNLIKELY(!(arg & REG_MASK)))
871 		inp_flags &= ~WRITE_BACK;
872 
873 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
874 	inst = data_transfer_insts[inp_flags & MEM_MASK];
875 	SLJIT_ASSERT((arg & REG_MASK) || !(inst & UPDATE_REQ));
876 
877 	if (argw > SIMM_MAX || argw < SIMM_MIN || ((inst & INT_ALIGNED) && (argw & 0x3)) || (inst & UPDATE_REQ))
878 		return 0;
879 	if (inp_flags & ARG_TEST)
880 		return 1;
881 #endif
882 
883 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
884 	if (argw > SIMM_MAX || argw < SIMM_MIN)
885 		return 0;
886 	if (inp_flags & ARG_TEST)
887 		return 1;
888 
889 	inst = data_transfer_insts[inp_flags & MEM_MASK];
890 	SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
891 #endif
892 
893 	FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | IMM(argw)));
894 	return -1;
895 }
896 
897 /* See getput_arg below.
898    Note: can_cache is called only for binary operators. Those operator always
899    uses word arguments without write back. */
900 static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
901 {
902 	sljit_sw high_short, next_high_short;
903 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
904 	sljit_sw diff;
905 #endif
906 
907 	SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
908 
909 	if (arg & OFFS_REG_MASK)
910 		return ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && (argw & 0x3) == (next_argw & 0x3));
911 
912 	if (next_arg & OFFS_REG_MASK)
913 		return 0;
914 
915 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
916 	high_short = (argw + ((argw & 0x8000) << 1)) & ~0xffff;
917 	next_high_short = (next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
918 	return high_short == next_high_short;
919 #else
920 	if (argw <= 0x7fffffffl && argw >= -0x80000000l) {
921 		high_short = (argw + ((argw & 0x8000) << 1)) & ~0xffff;
922 		next_high_short = (next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
923 		if (high_short == next_high_short)
924 			return 1;
925 	}
926 
927 	diff = argw - next_argw;
928 	if (!(arg & REG_MASK))
929 		return diff <= SIMM_MAX && diff >= SIMM_MIN;
930 
931 	if (arg == next_arg && diff <= SIMM_MAX && diff >= SIMM_MIN)
932 		return 1;
933 
934 	return 0;
935 #endif
936 }
937 
938 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
939 #define ADJUST_CACHED_IMM(imm) \
940 	if ((inst & INT_ALIGNED) && (imm & 0x3)) { \
941 		/* Adjust cached value. Fortunately this is really a rare case */ \
942 		compiler->cache_argw += imm & 0x3; \
943 		FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | (imm & 0x3))); \
944 		imm &= ~0x3; \
945 	}
946 #endif
947 
948 /* Emit the necessary instructions. See can_cache above. */
949 static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
950 {
951 	sljit_s32 tmp_r;
952 	sljit_ins inst;
953 	sljit_sw high_short, next_high_short;
954 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
955 	sljit_sw diff;
956 #endif
957 
958 	SLJIT_ASSERT(arg & SLJIT_MEM);
959 
960 	tmp_r = ((inp_flags & LOAD_DATA) && ((inp_flags) & MEM_MASK) <= GPR_REG) ? reg : TMP_REG1;
961 	/* Special case for "mov reg, [reg, ... ]". */
962 	if ((arg & REG_MASK) == tmp_r)
963 		tmp_r = TMP_REG1;
964 
965 	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
966 		argw &= 0x3;
967 		/* Otherwise getput_arg_fast would capture it. */
968 		SLJIT_ASSERT(argw);
969 
970 		if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg && argw == compiler->cache_argw)
971 			tmp_r = TMP_REG3;
972 		else {
973 			if ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && argw == (next_argw & 0x3)) {
974 				compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK);
975 				compiler->cache_argw = argw;
976 				tmp_r = TMP_REG3;
977 			}
978 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
979 			FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(arg)) | A(tmp_r) | (argw << 11) | ((31 - argw) << 1)));
980 #else
981 			FAIL_IF(push_inst(compiler, RLDI(tmp_r, OFFS_REG(arg), argw, 63 - argw, 1)));
982 #endif
983 		}
984 		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
985 		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
986 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(tmp_r));
987 	}
988 
989 	if (SLJIT_UNLIKELY(!(arg & REG_MASK)))
990 		inp_flags &= ~WRITE_BACK;
991 
992 	inst = data_transfer_insts[inp_flags & MEM_MASK];
993 	SLJIT_ASSERT((arg & REG_MASK) || !(inst & UPDATE_REQ));
994 
995 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
996 	if (argw <= 0x7fff7fffl && argw >= -0x80000000l
997 			&& (!(inst & INT_ALIGNED) || !(argw & 0x3)) && !(inst & UPDATE_REQ)) {
998 #endif
999 
1000 		arg &= REG_MASK;
1001 		high_short = (sljit_s32)(argw + ((argw & 0x8000) << 1)) & ~0xffff;
1002 		/* The getput_arg_fast should handle this otherwise. */
1003 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1004 		SLJIT_ASSERT(high_short && high_short <= 0x7fffffffl && high_short >= -0x80000000l);
1005 #else
1006 		SLJIT_ASSERT(high_short && !(inst & (INT_ALIGNED | UPDATE_REQ)));
1007 #endif
1008 
1009 		if (inp_flags & WRITE_BACK) {
1010 			if (arg == reg) {
1011 				FAIL_IF(push_inst(compiler, OR | S(reg) | A(tmp_r) | B(reg)));
1012 				reg = tmp_r;
1013 			}
1014 			tmp_r = arg;
1015 			FAIL_IF(push_inst(compiler, ADDIS | D(arg) | A(arg) | IMM(high_short >> 16)));
1016 		}
1017 		else if (compiler->cache_arg != (SLJIT_MEM | arg) || high_short != compiler->cache_argw) {
1018 			if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK)) {
1019 				next_high_short = (sljit_s32)(next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
1020 				if (high_short == next_high_short) {
1021 					compiler->cache_arg = SLJIT_MEM | arg;
1022 					compiler->cache_argw = high_short;
1023 					tmp_r = TMP_REG3;
1024 				}
1025 			}
1026 			FAIL_IF(push_inst(compiler, ADDIS | D(tmp_r) | A(arg & REG_MASK) | IMM(high_short >> 16)));
1027 		}
1028 		else
1029 			tmp_r = TMP_REG3;
1030 
1031 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r) | IMM(argw));
1032 
1033 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1034 	}
1035 
1036 	/* Everything else is PPC-64 only. */
1037 	if (SLJIT_UNLIKELY(!(arg & REG_MASK))) {
1038 		diff = argw - compiler->cache_argw;
1039 		if ((compiler->cache_arg & SLJIT_IMM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1040 			ADJUST_CACHED_IMM(diff);
1041 			return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(diff));
1042 		}
1043 
1044 		diff = argw - next_argw;
1045 		if ((next_arg & SLJIT_MEM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1046 			SLJIT_ASSERT(inp_flags & LOAD_DATA);
1047 
1048 			compiler->cache_arg = SLJIT_IMM;
1049 			compiler->cache_argw = argw;
1050 			tmp_r = TMP_REG3;
1051 		}
1052 
1053 		FAIL_IF(load_immediate(compiler, tmp_r, argw));
1054 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r));
1055 	}
1056 
1057 	diff = argw - compiler->cache_argw;
1058 	if (compiler->cache_arg == arg && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1059 		SLJIT_ASSERT(!(inp_flags & WRITE_BACK) && !(inst & UPDATE_REQ));
1060 		ADJUST_CACHED_IMM(diff);
1061 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(diff));
1062 	}
1063 
1064 	if ((compiler->cache_arg & SLJIT_IMM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1065 		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
1066 		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
1067 		if (compiler->cache_argw != argw) {
1068 			FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | IMM(diff)));
1069 			compiler->cache_argw = argw;
1070 		}
1071 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(TMP_REG3));
1072 	}
1073 
1074 	if (argw == next_argw && (next_arg & SLJIT_MEM)) {
1075 		SLJIT_ASSERT(inp_flags & LOAD_DATA);
1076 		FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1077 
1078 		compiler->cache_arg = SLJIT_IMM;
1079 		compiler->cache_argw = argw;
1080 
1081 		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
1082 		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
1083 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(TMP_REG3));
1084 	}
1085 
1086 	diff = argw - next_argw;
1087 	if (arg == next_arg && !(inp_flags & WRITE_BACK) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1088 		SLJIT_ASSERT(inp_flags & LOAD_DATA);
1089 		FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1090 		FAIL_IF(push_inst(compiler, ADD | D(TMP_REG3) | A(TMP_REG3) | B(arg & REG_MASK)));
1091 
1092 		compiler->cache_arg = arg;
1093 		compiler->cache_argw = argw;
1094 
1095 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3));
1096 	}
1097 
1098 	if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1099 		SLJIT_ASSERT(inp_flags & LOAD_DATA);
1100 		FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1101 
1102 		compiler->cache_arg = SLJIT_IMM;
1103 		compiler->cache_argw = argw;
1104 		tmp_r = TMP_REG3;
1105 	}
1106 	else
1107 		FAIL_IF(load_immediate(compiler, tmp_r, argw));
1108 
1109 	/* Get the indexed version instead of the normal one. */
1110 	inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
1111 	SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
1112 	return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(tmp_r));
1113 #endif
1114 }
1115 
1116 static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
1117 {
1118 	if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
1119 		return compiler->error;
1120 	return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
1121 }
1122 
1123 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 input_flags,
1124 	sljit_s32 dst, sljit_sw dstw,
1125 	sljit_s32 src1, sljit_sw src1w,
1126 	sljit_s32 src2, sljit_sw src2w)
1127 {
1128 	/* arg1 goes to TMP_REG1 or src reg
1129 	   arg2 goes to TMP_REG2, imm or src reg
1130 	   TMP_REG3 can be used for caching
1131 	   result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
1132 	sljit_s32 dst_r;
1133 	sljit_s32 src1_r;
1134 	sljit_s32 src2_r;
1135 	sljit_s32 sugg_src2_r = TMP_REG2;
1136 	sljit_s32 flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_FORM6 | ALT_SIGN_EXT | ALT_SET_FLAGS);
1137 
1138 	if (!(input_flags & ALT_KEEP_CACHE)) {
1139 		compiler->cache_arg = 0;
1140 		compiler->cache_argw = 0;
1141 	}
1142 
1143 	/* Destination check. */
1144 	if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
1145 		if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32 && !(src2 & SLJIT_MEM))
1146 			return SLJIT_SUCCESS;
1147 		dst_r = TMP_REG2;
1148 	}
1149 	else if (FAST_IS_REG(dst)) {
1150 		dst_r = dst;
1151 		flags |= REG_DEST;
1152 		if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
1153 			sugg_src2_r = dst_r;
1154 	}
1155 	else {
1156 		SLJIT_ASSERT(dst & SLJIT_MEM);
1157 		if (getput_arg_fast(compiler, input_flags | ARG_TEST, TMP_REG2, dst, dstw)) {
1158 			flags |= FAST_DEST;
1159 			dst_r = TMP_REG2;
1160 		}
1161 		else {
1162 			flags |= SLOW_DEST;
1163 			dst_r = 0;
1164 		}
1165 	}
1166 
1167 	/* Source 1. */
1168 	if (FAST_IS_REG(src1)) {
1169 		src1_r = src1;
1170 		flags |= REG1_SOURCE;
1171 	}
1172 	else if (src1 & SLJIT_IMM) {
1173 		FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
1174 		src1_r = TMP_REG1;
1175 	}
1176 	else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w)) {
1177 		FAIL_IF(compiler->error);
1178 		src1_r = TMP_REG1;
1179 	}
1180 	else
1181 		src1_r = 0;
1182 
1183 	/* Source 2. */
1184 	if (FAST_IS_REG(src2)) {
1185 		src2_r = src2;
1186 		flags |= REG2_SOURCE;
1187 		if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
1188 			dst_r = src2_r;
1189 	}
1190 	else if (src2 & SLJIT_IMM) {
1191 		FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w));
1192 		src2_r = sugg_src2_r;
1193 	}
1194 	else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w)) {
1195 		FAIL_IF(compiler->error);
1196 		src2_r = sugg_src2_r;
1197 	}
1198 	else
1199 		src2_r = 0;
1200 
1201 	/* src1_r, src2_r and dst_r can be zero (=unprocessed).
1202 	   All arguments are complex addressing modes, and it is a binary operator. */
1203 	if (src1_r == 0 && src2_r == 0 && dst_r == 0) {
1204 		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
1205 			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w));
1206 			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
1207 		}
1208 		else {
1209 			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
1210 			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw));
1211 		}
1212 		src1_r = TMP_REG1;
1213 		src2_r = TMP_REG2;
1214 	}
1215 	else if (src1_r == 0 && src2_r == 0) {
1216 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
1217 		src1_r = TMP_REG1;
1218 	}
1219 	else if (src1_r == 0 && dst_r == 0) {
1220 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
1221 		src1_r = TMP_REG1;
1222 	}
1223 	else if (src2_r == 0 && dst_r == 0) {
1224 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw));
1225 		src2_r = sugg_src2_r;
1226 	}
1227 
1228 	if (dst_r == 0)
1229 		dst_r = TMP_REG2;
1230 
1231 	if (src1_r == 0) {
1232 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, 0, 0));
1233 		src1_r = TMP_REG1;
1234 	}
1235 
1236 	if (src2_r == 0) {
1237 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, 0, 0));
1238 		src2_r = sugg_src2_r;
1239 	}
1240 
1241 	FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
1242 
1243 	if (flags & (FAST_DEST | SLOW_DEST)) {
1244 		if (flags & FAST_DEST)
1245 			FAIL_IF(getput_arg_fast(compiler, input_flags, dst_r, dst, dstw));
1246 		else
1247 			FAIL_IF(getput_arg(compiler, input_flags, dst_r, dst, dstw, 0, 0));
1248 	}
1249 	return SLJIT_SUCCESS;
1250 }
1251 
1252 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1253 {
1254 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1255 	sljit_s32 int_op = op & SLJIT_I32_OP;
1256 #endif
1257 
1258 	CHECK_ERROR();
1259 	CHECK(check_sljit_emit_op0(compiler, op));
1260 
1261 	op = GET_OPCODE(op);
1262 	switch (op) {
1263 	case SLJIT_BREAKPOINT:
1264 	case SLJIT_NOP:
1265 		return push_inst(compiler, NOP);
1266 	case SLJIT_LMUL_UW:
1267 	case SLJIT_LMUL_SW:
1268 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0)));
1269 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1270 		FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
1271 		return push_inst(compiler, (op == SLJIT_LMUL_UW ? MULHDU : MULHD) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1));
1272 #else
1273 		FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
1274 		return push_inst(compiler, (op == SLJIT_LMUL_UW ? MULHWU : MULHW) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1));
1275 #endif
1276 	case SLJIT_DIVMOD_UW:
1277 	case SLJIT_DIVMOD_SW:
1278 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0)));
1279 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1280 		FAIL_IF(push_inst(compiler, (int_op ? (op == SLJIT_DIVMOD_UW ? DIVWU : DIVW) : (op == SLJIT_DIVMOD_UW ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)));
1281 		FAIL_IF(push_inst(compiler, (int_op ? MULLW : MULLD) | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
1282 #else
1283 		FAIL_IF(push_inst(compiler, (op == SLJIT_DIVMOD_UW ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)));
1284 		FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
1285 #endif
1286 		return push_inst(compiler, SUBF | D(SLJIT_R1) | A(SLJIT_R1) | B(TMP_REG1));
1287 	case SLJIT_DIV_UW:
1288 	case SLJIT_DIV_SW:
1289 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1290 		return push_inst(compiler, (int_op ? (op == SLJIT_DIV_UW ? DIVWU : DIVW) : (op == SLJIT_DIV_UW ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1));
1291 #else
1292 		return push_inst(compiler, (op == SLJIT_DIV_UW ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1));
1293 #endif
1294 	}
1295 
1296 	return SLJIT_SUCCESS;
1297 }
1298 
1299 #define EMIT_MOV(type, type_flags, type_cast) \
1300 	emit_op(compiler, (src & SLJIT_IMM) ? SLJIT_MOV : type, flags | (type_flags), dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? type_cast srcw : srcw)
1301 
1302 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1303 	sljit_s32 dst, sljit_sw dstw,
1304 	sljit_s32 src, sljit_sw srcw)
1305 {
1306 	sljit_s32 flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0;
1307 	sljit_s32 op_flags = GET_ALL_FLAGS(op);
1308 
1309 	CHECK_ERROR();
1310 	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1311 	ADJUST_LOCAL_OFFSET(dst, dstw);
1312 	ADJUST_LOCAL_OFFSET(src, srcw);
1313 
1314 	op = GET_OPCODE(op);
1315 	if ((src & SLJIT_IMM) && srcw == 0)
1316 		src = TMP_ZERO;
1317 
1318 	if (op_flags & SLJIT_SET_O)
1319 		FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
1320 
1321 	if (op_flags & SLJIT_I32_OP) {
1322 		if (op < SLJIT_NOT) {
1323 			if (FAST_IS_REG(src) && src == dst) {
1324 				if (!TYPE_CAST_NEEDED(op))
1325 					return SLJIT_SUCCESS;
1326 			}
1327 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1328 			if (op == SLJIT_MOV_S32 && (src & SLJIT_MEM))
1329 				op = SLJIT_MOV_U32;
1330 			if (op == SLJIT_MOVU_S32 && (src & SLJIT_MEM))
1331 				op = SLJIT_MOVU_U32;
1332 			if (op == SLJIT_MOV_U32 && (src & SLJIT_IMM))
1333 				op = SLJIT_MOV_S32;
1334 			if (op == SLJIT_MOVU_U32 && (src & SLJIT_IMM))
1335 				op = SLJIT_MOVU_S32;
1336 #endif
1337 		}
1338 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1339 		else {
1340 			/* Most operations expect sign extended arguments. */
1341 			flags |= INT_DATA | SIGNED_DATA;
1342 			if (src & SLJIT_IMM)
1343 				srcw = (sljit_s32)srcw;
1344 		}
1345 #endif
1346 	}
1347 
1348 	switch (op) {
1349 	case SLJIT_MOV:
1350 	case SLJIT_MOV_P:
1351 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
1352 	case SLJIT_MOV_U32:
1353 	case SLJIT_MOV_S32:
1354 #endif
1355 		return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
1356 
1357 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1358 	case SLJIT_MOV_U32:
1359 		return EMIT_MOV(SLJIT_MOV_U32, INT_DATA, (sljit_u32));
1360 
1361 	case SLJIT_MOV_S32:
1362 		return EMIT_MOV(SLJIT_MOV_S32, INT_DATA | SIGNED_DATA, (sljit_s32));
1363 #endif
1364 
1365 	case SLJIT_MOV_U8:
1366 		return EMIT_MOV(SLJIT_MOV_U8, BYTE_DATA, (sljit_u8));
1367 
1368 	case SLJIT_MOV_S8:
1369 		return EMIT_MOV(SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA, (sljit_s8));
1370 
1371 	case SLJIT_MOV_U16:
1372 		return EMIT_MOV(SLJIT_MOV_U16, HALF_DATA, (sljit_u16));
1373 
1374 	case SLJIT_MOV_S16:
1375 		return EMIT_MOV(SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA, (sljit_s16));
1376 
1377 	case SLJIT_MOVU:
1378 	case SLJIT_MOVU_P:
1379 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
1380 	case SLJIT_MOVU_U32:
1381 	case SLJIT_MOVU_S32:
1382 #endif
1383 		return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
1384 
1385 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1386 	case SLJIT_MOVU_U32:
1387 		return EMIT_MOV(SLJIT_MOV_U32, INT_DATA | WRITE_BACK, (sljit_u32));
1388 
1389 	case SLJIT_MOVU_S32:
1390 		return EMIT_MOV(SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | WRITE_BACK, (sljit_s32));
1391 #endif
1392 
1393 	case SLJIT_MOVU_U8:
1394 		return EMIT_MOV(SLJIT_MOV_U8, BYTE_DATA | WRITE_BACK, (sljit_u8));
1395 
1396 	case SLJIT_MOVU_S8:
1397 		return EMIT_MOV(SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA | WRITE_BACK, (sljit_s8));
1398 
1399 	case SLJIT_MOVU_U16:
1400 		return EMIT_MOV(SLJIT_MOV_U16, HALF_DATA | WRITE_BACK, (sljit_u16));
1401 
1402 	case SLJIT_MOVU_S16:
1403 		return EMIT_MOV(SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA | WRITE_BACK, (sljit_s16));
1404 
1405 	case SLJIT_NOT:
1406 		return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw);
1407 
1408 	case SLJIT_NEG:
1409 		return emit_op(compiler, SLJIT_NEG, flags, dst, dstw, TMP_REG1, 0, src, srcw);
1410 
1411 	case SLJIT_CLZ:
1412 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1413 		return emit_op(compiler, SLJIT_CLZ, flags | (!(op_flags & SLJIT_I32_OP) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw);
1414 #else
1415 		return emit_op(compiler, SLJIT_CLZ, flags, dst, dstw, TMP_REG1, 0, src, srcw);
1416 #endif
1417 	}
1418 
1419 	return SLJIT_SUCCESS;
1420 }
1421 
1422 #undef EMIT_MOV
1423 
1424 #define TEST_SL_IMM(src, srcw) \
1425 	(((src) & SLJIT_IMM) && (srcw) <= SIMM_MAX && (srcw) >= SIMM_MIN)
1426 
1427 #define TEST_UL_IMM(src, srcw) \
1428 	(((src) & SLJIT_IMM) && !((srcw) & ~0xffff))
1429 
1430 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1431 #define TEST_SH_IMM(src, srcw) \
1432 	(((src) & SLJIT_IMM) && !((srcw) & 0xffff) && (srcw) <= 0x7fffffffl && (srcw) >= -0x80000000l)
1433 #else
1434 #define TEST_SH_IMM(src, srcw) \
1435 	(((src) & SLJIT_IMM) && !((srcw) & 0xffff))
1436 #endif
1437 
1438 #define TEST_UH_IMM(src, srcw) \
1439 	(((src) & SLJIT_IMM) && !((srcw) & ~0xffff0000))
1440 
1441 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1442 #define TEST_ADD_IMM(src, srcw) \
1443 	(((src) & SLJIT_IMM) && (srcw) <= 0x7fff7fffl && (srcw) >= -0x80000000l)
1444 #else
1445 #define TEST_ADD_IMM(src, srcw) \
1446 	((src) & SLJIT_IMM)
1447 #endif
1448 
1449 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1450 #define TEST_UI_IMM(src, srcw) \
1451 	(((src) & SLJIT_IMM) && !((srcw) & ~0xffffffff))
1452 #else
1453 #define TEST_UI_IMM(src, srcw) \
1454 	((src) & SLJIT_IMM)
1455 #endif
1456 
1457 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
1458 	sljit_s32 dst, sljit_sw dstw,
1459 	sljit_s32 src1, sljit_sw src1w,
1460 	sljit_s32 src2, sljit_sw src2w)
1461 {
1462 	sljit_s32 flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0;
1463 
1464 	CHECK_ERROR();
1465 	CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
1466 	ADJUST_LOCAL_OFFSET(dst, dstw);
1467 	ADJUST_LOCAL_OFFSET(src1, src1w);
1468 	ADJUST_LOCAL_OFFSET(src2, src2w);
1469 
1470 	if ((src1 & SLJIT_IMM) && src1w == 0)
1471 		src1 = TMP_ZERO;
1472 	if ((src2 & SLJIT_IMM) && src2w == 0)
1473 		src2 = TMP_ZERO;
1474 
1475 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1476 	if (op & SLJIT_I32_OP) {
1477 		/* Most operations expect sign extended arguments. */
1478 		flags |= INT_DATA | SIGNED_DATA;
1479 		if (src1 & SLJIT_IMM)
1480 			src1w = (sljit_s32)(src1w);
1481 		if (src2 & SLJIT_IMM)
1482 			src2w = (sljit_s32)(src2w);
1483 		if (GET_FLAGS(op))
1484 			flags |= ALT_SIGN_EXT;
1485 	}
1486 #endif
1487 	if (op & SLJIT_SET_O)
1488 		FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
1489 	if (src2 == TMP_REG2)
1490 		flags |= ALT_KEEP_CACHE;
1491 
1492 	switch (GET_OPCODE(op)) {
1493 	case SLJIT_ADD:
1494 		if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
1495 			if (TEST_SL_IMM(src2, src2w)) {
1496 				compiler->imm = src2w & 0xffff;
1497 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1498 			}
1499 			if (TEST_SL_IMM(src1, src1w)) {
1500 				compiler->imm = src1w & 0xffff;
1501 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
1502 			}
1503 			if (TEST_SH_IMM(src2, src2w)) {
1504 				compiler->imm = (src2w >> 16) & 0xffff;
1505 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1506 			}
1507 			if (TEST_SH_IMM(src1, src1w)) {
1508 				compiler->imm = (src1w >> 16) & 0xffff;
1509 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
1510 			}
1511 			/* Range between -1 and -32768 is covered above. */
1512 			if (TEST_ADD_IMM(src2, src2w)) {
1513 				compiler->imm = src2w & 0xffffffff;
1514 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
1515 			}
1516 			if (TEST_ADD_IMM(src1, src1w)) {
1517 				compiler->imm = src1w & 0xffffffff;
1518 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0);
1519 			}
1520 		}
1521 		if (!(GET_FLAGS(op) & (SLJIT_SET_E | SLJIT_SET_O))) {
1522 			if (TEST_SL_IMM(src2, src2w)) {
1523 				compiler->imm = src2w & 0xffff;
1524 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1525 			}
1526 			if (TEST_SL_IMM(src1, src1w)) {
1527 				compiler->imm = src1w & 0xffff;
1528 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
1529 			}
1530 		}
1531 		return emit_op(compiler, SLJIT_ADD, flags, dst, dstw, src1, src1w, src2, src2w);
1532 
1533 	case SLJIT_ADDC:
1534 		return emit_op(compiler, SLJIT_ADDC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w);
1535 
1536 	case SLJIT_SUB:
1537 		if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
1538 			if (TEST_SL_IMM(src2, -src2w)) {
1539 				compiler->imm = (-src2w) & 0xffff;
1540 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1541 			}
1542 			if (TEST_SL_IMM(src1, src1w)) {
1543 				compiler->imm = src1w & 0xffff;
1544 				return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
1545 			}
1546 			if (TEST_SH_IMM(src2, -src2w)) {
1547 				compiler->imm = ((-src2w) >> 16) & 0xffff;
1548 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1549 			}
1550 			/* Range between -1 and -32768 is covered above. */
1551 			if (TEST_ADD_IMM(src2, -src2w)) {
1552 				compiler->imm = -src2w & 0xffffffff;
1553 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
1554 			}
1555 		}
1556 		if (dst == SLJIT_UNUSED && (op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S)) && !(op & (SLJIT_SET_O | SLJIT_SET_C))) {
1557 			if (!(op & SLJIT_SET_U)) {
1558 				/* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */
1559 				if (TEST_SL_IMM(src2, src2w)) {
1560 					compiler->imm = src2w & 0xffff;
1561 					return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1562 				}
1563 				if (GET_FLAGS(op) == SLJIT_SET_E && TEST_SL_IMM(src1, src1w)) {
1564 					compiler->imm = src1w & 0xffff;
1565 					return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
1566 				}
1567 			}
1568 			if (!(op & (SLJIT_SET_E | SLJIT_SET_S))) {
1569 				/* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */
1570 				if (TEST_UL_IMM(src2, src2w)) {
1571 					compiler->imm = src2w & 0xffff;
1572 					return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1573 				}
1574 				return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w);
1575 			}
1576 			if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= 0x7fff) {
1577 				compiler->imm = src2w;
1578 				return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1579 			}
1580 			return emit_op(compiler, SLJIT_SUB, flags | ((op & SLJIT_SET_U) ? ALT_FORM4 : 0) | ((op & (SLJIT_SET_E | SLJIT_SET_S)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w);
1581 		}
1582 		if (!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O))) {
1583 			if (TEST_SL_IMM(src2, -src2w)) {
1584 				compiler->imm = (-src2w) & 0xffff;
1585 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1586 			}
1587 		}
1588 		/* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */
1589 		return emit_op(compiler, SLJIT_SUB, flags | (!(op & SLJIT_SET_U) ? 0 : ALT_FORM6), dst, dstw, src1, src1w, src2, src2w);
1590 
1591 	case SLJIT_SUBC:
1592 		return emit_op(compiler, SLJIT_SUBC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w);
1593 
1594 	case SLJIT_MUL:
1595 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1596 		if (op & SLJIT_I32_OP)
1597 			flags |= ALT_FORM2;
1598 #endif
1599 		if (!GET_FLAGS(op)) {
1600 			if (TEST_SL_IMM(src2, src2w)) {
1601 				compiler->imm = src2w & 0xffff;
1602 				return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1603 			}
1604 			if (TEST_SL_IMM(src1, src1w)) {
1605 				compiler->imm = src1w & 0xffff;
1606 				return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
1607 			}
1608 		}
1609 		return emit_op(compiler, SLJIT_MUL, flags, dst, dstw, src1, src1w, src2, src2w);
1610 
1611 	case SLJIT_AND:
1612 	case SLJIT_OR:
1613 	case SLJIT_XOR:
1614 		/* Commutative unsigned operations. */
1615 		if (!GET_FLAGS(op) || GET_OPCODE(op) == SLJIT_AND) {
1616 			if (TEST_UL_IMM(src2, src2w)) {
1617 				compiler->imm = src2w;
1618 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1619 			}
1620 			if (TEST_UL_IMM(src1, src1w)) {
1621 				compiler->imm = src1w;
1622 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
1623 			}
1624 			if (TEST_UH_IMM(src2, src2w)) {
1625 				compiler->imm = (src2w >> 16) & 0xffff;
1626 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1627 			}
1628 			if (TEST_UH_IMM(src1, src1w)) {
1629 				compiler->imm = (src1w >> 16) & 0xffff;
1630 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
1631 			}
1632 		}
1633 		if (!GET_FLAGS(op) && GET_OPCODE(op) != SLJIT_AND) {
1634 			if (TEST_UI_IMM(src2, src2w)) {
1635 				compiler->imm = src2w;
1636 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1637 			}
1638 			if (TEST_UI_IMM(src1, src1w)) {
1639 				compiler->imm = src1w;
1640 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
1641 			}
1642 		}
1643 		return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
1644 
1645 	case SLJIT_ASHR:
1646 		if (op & SLJIT_KEEP_FLAGS)
1647 			flags |= ALT_FORM3;
1648 		/* Fall through. */
1649 	case SLJIT_SHL:
1650 	case SLJIT_LSHR:
1651 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1652 		if (op & SLJIT_I32_OP)
1653 			flags |= ALT_FORM2;
1654 #endif
1655 		if (src2 & SLJIT_IMM) {
1656 			compiler->imm = src2w;
1657 			return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1658 		}
1659 		return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
1660 	}
1661 
1662 	return SLJIT_SUCCESS;
1663 }
1664 
1665 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
1666 {
1667 	CHECK_REG_INDEX(check_sljit_get_register_index(reg));
1668 	return reg_map[reg];
1669 }
1670 
1671 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
1672 {
1673 	CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
1674 	return reg;
1675 }
1676 
1677 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
1678 	void *instruction, sljit_s32 size)
1679 {
1680 	CHECK_ERROR();
1681 	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
1682 
1683 	return push_inst(compiler, *(sljit_ins*)instruction);
1684 }
1685 
1686 /* --------------------------------------------------------------------- */
1687 /*  Floating point operators                                             */
1688 /* --------------------------------------------------------------------- */
1689 
1690 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
1691 {
1692 #ifdef SLJIT_IS_FPU_AVAILABLE
1693 	return SLJIT_IS_FPU_AVAILABLE;
1694 #else
1695 	/* Available by default. */
1696 	return 1;
1697 #endif
1698 }
1699 
1700 #define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 6))
1701 #define SELECT_FOP(op, single, double) ((op & SLJIT_F32_OP) ? single : double)
1702 
1703 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1704 #define FLOAT_TMP_MEM_OFFSET (6 * sizeof(sljit_sw))
1705 #else
1706 #define FLOAT_TMP_MEM_OFFSET (2 * sizeof(sljit_sw))
1707 
1708 #if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
1709 #define FLOAT_TMP_MEM_OFFSET_LOW (2 * sizeof(sljit_sw))
1710 #define FLOAT_TMP_MEM_OFFSET_HI (3 * sizeof(sljit_sw))
1711 #else
1712 #define FLOAT_TMP_MEM_OFFSET_LOW (3 * sizeof(sljit_sw))
1713 #define FLOAT_TMP_MEM_OFFSET_HI (2 * sizeof(sljit_sw))
1714 #endif
1715 
1716 #endif /* SLJIT_CONFIG_PPC_64 */
1717 
1718 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
1719 	sljit_s32 dst, sljit_sw dstw,
1720 	sljit_s32 src, sljit_sw srcw)
1721 {
1722 	if (src & SLJIT_MEM) {
1723 		/* We can ignore the temporary data store on the stack from caching point of view. */
1724 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
1725 		src = TMP_FREG1;
1726 	}
1727 
1728 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1729 	op = GET_OPCODE(op);
1730 	FAIL_IF(push_inst(compiler, (op == SLJIT_CONV_S32_FROM_F64 ? FCTIWZ : FCTIDZ) | FD(TMP_FREG1) | FB(src)));
1731 
1732 	if (dst == SLJIT_UNUSED)
1733 		return SLJIT_SUCCESS;
1734 
1735 	if (op == SLJIT_CONV_SW_FROM_F64) {
1736 		if (FAST_IS_REG(dst)) {
1737 			FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0));
1738 			return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0);
1739 		}
1740 		return emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, dst, dstw, 0, 0);
1741 	}
1742 
1743 #else
1744 	FAIL_IF(push_inst(compiler, FCTIWZ | FD(TMP_FREG1) | FB(src)));
1745 
1746 	if (dst == SLJIT_UNUSED)
1747 		return SLJIT_SUCCESS;
1748 #endif
1749 
1750 	if (FAST_IS_REG(dst)) {
1751 		FAIL_IF(load_immediate(compiler, TMP_REG1, FLOAT_TMP_MEM_OFFSET));
1752 		FAIL_IF(push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(SLJIT_SP) | B(TMP_REG1)));
1753 		return emit_op_mem2(compiler, INT_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0);
1754 	}
1755 
1756 	SLJIT_ASSERT(dst & SLJIT_MEM);
1757 
1758 	if (dst & OFFS_REG_MASK) {
1759 		dstw &= 0x3;
1760 		if (dstw) {
1761 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
1762 			FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(dst)) | A(TMP_REG1) | (dstw << 11) | ((31 - dstw) << 1)));
1763 #else
1764 			FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, OFFS_REG(dst), dstw, 63 - dstw, 1)));
1765 #endif
1766 			dstw = TMP_REG1;
1767 		}
1768 		else
1769 			dstw = OFFS_REG(dst);
1770 	}
1771 	else {
1772 		if ((dst & REG_MASK) && !dstw) {
1773 			dstw = dst & REG_MASK;
1774 			dst = 0;
1775 		}
1776 		else {
1777 			/* This works regardless we have SLJIT_MEM1 or SLJIT_MEM0. */
1778 			FAIL_IF(load_immediate(compiler, TMP_REG1, dstw));
1779 			dstw = TMP_REG1;
1780 		}
1781 	}
1782 
1783 	return push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(dst & REG_MASK) | B(dstw));
1784 }
1785 
1786 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
1787 	sljit_s32 dst, sljit_sw dstw,
1788 	sljit_s32 src, sljit_sw srcw)
1789 {
1790 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1791 
1792 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1793 
1794 	if (src & SLJIT_IMM) {
1795 		if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
1796 			srcw = (sljit_s32)srcw;
1797 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
1798 		src = TMP_REG1;
1799 	}
1800 	else if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) {
1801 		if (FAST_IS_REG(src))
1802 			FAIL_IF(push_inst(compiler, EXTSW | S(src) | A(TMP_REG1)));
1803 		else
1804 			FAIL_IF(emit_op_mem2(compiler, INT_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
1805 		src = TMP_REG1;
1806 	}
1807 
1808 	if (FAST_IS_REG(src)) {
1809 		FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
1810 		FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, dst, dstw));
1811 	}
1812 	else
1813 		FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
1814 
1815 	FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1)));
1816 
1817 	if (dst & SLJIT_MEM)
1818 		return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
1819 	if (op & SLJIT_F32_OP)
1820 		return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r));
1821 	return SLJIT_SUCCESS;
1822 
1823 #else
1824 
1825 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1826 	sljit_s32 invert_sign = 1;
1827 
1828 	if (src & SLJIT_IMM) {
1829 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw ^ 0x80000000));
1830 		src = TMP_REG1;
1831 		invert_sign = 0;
1832 	}
1833 	else if (!FAST_IS_REG(src)) {
1834 		FAIL_IF(emit_op_mem2(compiler, WORD_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
1835 		src = TMP_REG1;
1836 	}
1837 
1838 	/* First, a special double floating point value is constructed: (2^53 + (input xor (2^31)))
1839 	   The double precision format has exactly 53 bit precision, so the lower 32 bit represents
1840 	   the lower 32 bit of such value. The result of xor 2^31 is the same as adding 0x80000000
1841 	   to the input, which shifts it into the 0 - 0xffffffff range. To get the converted floating
1842 	   point value, we need to substract 2^53 + 2^31 from the constructed value. */
1843 	FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330));
1844 	if (invert_sign)
1845 		FAIL_IF(push_inst(compiler, XORIS | S(src) | A(TMP_REG1) | 0x8000));
1846 	FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
1847 	FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI));
1848 	FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG1) | A(0) | 0x8000));
1849 	FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
1850 	FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
1851 	FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
1852 
1853 	FAIL_IF(push_inst(compiler, FSUB | FD(dst_r) | FA(TMP_FREG1) | FB(TMP_FREG2)));
1854 
1855 	if (dst & SLJIT_MEM)
1856 		return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
1857 	if (op & SLJIT_F32_OP)
1858 		return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r));
1859 	return SLJIT_SUCCESS;
1860 
1861 #endif
1862 }
1863 
1864 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
1865 	sljit_s32 src1, sljit_sw src1w,
1866 	sljit_s32 src2, sljit_sw src2w)
1867 {
1868 	if (src1 & SLJIT_MEM) {
1869 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
1870 		src1 = TMP_FREG1;
1871 	}
1872 
1873 	if (src2 & SLJIT_MEM) {
1874 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
1875 		src2 = TMP_FREG2;
1876 	}
1877 
1878 	return push_inst(compiler, FCMPU | CRD(4) | FA(src1) | FB(src2));
1879 }
1880 
1881 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
1882 	sljit_s32 dst, sljit_sw dstw,
1883 	sljit_s32 src, sljit_sw srcw)
1884 {
1885 	sljit_s32 dst_r;
1886 
1887 	CHECK_ERROR();
1888 	compiler->cache_arg = 0;
1889 	compiler->cache_argw = 0;
1890 
1891 	SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error);
1892 	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
1893 
1894 	if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32)
1895 		op ^= SLJIT_F32_OP;
1896 
1897 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1898 
1899 	if (src & SLJIT_MEM) {
1900 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));
1901 		src = dst_r;
1902 	}
1903 
1904 	switch (GET_OPCODE(op)) {
1905 	case SLJIT_CONV_F64_FROM_F32:
1906 		op ^= SLJIT_F32_OP;
1907 		if (op & SLJIT_F32_OP) {
1908 			FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(src)));
1909 			break;
1910 		}
1911 		/* Fall through. */
1912 	case SLJIT_MOV_F64:
1913 		if (src != dst_r) {
1914 			if (dst_r != TMP_FREG1)
1915 				FAIL_IF(push_inst(compiler, FMR | FD(dst_r) | FB(src)));
1916 			else
1917 				dst_r = src;
1918 		}
1919 		break;
1920 	case SLJIT_NEG_F64:
1921 		FAIL_IF(push_inst(compiler, FNEG | FD(dst_r) | FB(src)));
1922 		break;
1923 	case SLJIT_ABS_F64:
1924 		FAIL_IF(push_inst(compiler, FABS | FD(dst_r) | FB(src)));
1925 		break;
1926 	}
1927 
1928 	if (dst & SLJIT_MEM)
1929 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0));
1930 	return SLJIT_SUCCESS;
1931 }
1932 
1933 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
1934 	sljit_s32 dst, sljit_sw dstw,
1935 	sljit_s32 src1, sljit_sw src1w,
1936 	sljit_s32 src2, sljit_sw src2w)
1937 {
1938 	sljit_s32 dst_r, flags = 0;
1939 
1940 	CHECK_ERROR();
1941 	CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
1942 	ADJUST_LOCAL_OFFSET(dst, dstw);
1943 	ADJUST_LOCAL_OFFSET(src1, src1w);
1944 	ADJUST_LOCAL_OFFSET(src2, src2w);
1945 
1946 	compiler->cache_arg = 0;
1947 	compiler->cache_argw = 0;
1948 
1949 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2;
1950 
1951 	if (src1 & SLJIT_MEM) {
1952 		if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
1953 			FAIL_IF(compiler->error);
1954 			src1 = TMP_FREG1;
1955 		} else
1956 			flags |= ALT_FORM1;
1957 	}
1958 
1959 	if (src2 & SLJIT_MEM) {
1960 		if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) {
1961 			FAIL_IF(compiler->error);
1962 			src2 = TMP_FREG2;
1963 		} else
1964 			flags |= ALT_FORM2;
1965 	}
1966 
1967 	if ((flags & (ALT_FORM1 | ALT_FORM2)) == (ALT_FORM1 | ALT_FORM2)) {
1968 		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
1969 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
1970 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
1971 		}
1972 		else {
1973 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
1974 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
1975 		}
1976 	}
1977 	else if (flags & ALT_FORM1)
1978 		FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
1979 	else if (flags & ALT_FORM2)
1980 		FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
1981 
1982 	if (flags & ALT_FORM1)
1983 		src1 = TMP_FREG1;
1984 	if (flags & ALT_FORM2)
1985 		src2 = TMP_FREG2;
1986 
1987 	switch (GET_OPCODE(op)) {
1988 	case SLJIT_ADD_F64:
1989 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADD) | FD(dst_r) | FA(src1) | FB(src2)));
1990 		break;
1991 
1992 	case SLJIT_SUB_F64:
1993 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUB) | FD(dst_r) | FA(src1) | FB(src2)));
1994 		break;
1995 
1996 	case SLJIT_MUL_F64:
1997 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMUL) | FD(dst_r) | FA(src1) | FC(src2) /* FMUL use FC as src2 */));
1998 		break;
1999 
2000 	case SLJIT_DIV_F64:
2001 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIV) | FD(dst_r) | FA(src1) | FB(src2)));
2002 		break;
2003 	}
2004 
2005 	if (dst_r == TMP_FREG2)
2006 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
2007 
2008 	return SLJIT_SUCCESS;
2009 }
2010 
2011 #undef FLOAT_DATA
2012 #undef SELECT_FOP
2013 
2014 /* --------------------------------------------------------------------- */
2015 /*  Other instructions                                                   */
2016 /* --------------------------------------------------------------------- */
2017 
2018 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
2019 {
2020 	CHECK_ERROR();
2021 	CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
2022 	ADJUST_LOCAL_OFFSET(dst, dstw);
2023 
2024 	/* For UNUSED dst. Uncommon, but possible. */
2025 	if (dst == SLJIT_UNUSED)
2026 		return SLJIT_SUCCESS;
2027 
2028 	if (FAST_IS_REG(dst))
2029 		return push_inst(compiler, MFLR | D(dst));
2030 
2031 	/* Memory. */
2032 	FAIL_IF(push_inst(compiler, MFLR | D(TMP_REG2)));
2033 	return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
2034 }
2035 
2036 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
2037 {
2038 	CHECK_ERROR();
2039 	CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
2040 	ADJUST_LOCAL_OFFSET(src, srcw);
2041 
2042 	if (FAST_IS_REG(src))
2043 		FAIL_IF(push_inst(compiler, MTLR | S(src)));
2044 	else {
2045 		if (src & SLJIT_MEM)
2046 			FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
2047 		else if (src & SLJIT_IMM)
2048 			FAIL_IF(load_immediate(compiler, TMP_REG2, srcw));
2049 		FAIL_IF(push_inst(compiler, MTLR | S(TMP_REG2)));
2050 	}
2051 	return push_inst(compiler, BLR);
2052 }
2053 
2054 /* --------------------------------------------------------------------- */
2055 /*  Conditional instructions                                             */
2056 /* --------------------------------------------------------------------- */
2057 
2058 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2059 {
2060 	struct sljit_label *label;
2061 
2062 	CHECK_ERROR_PTR();
2063 	CHECK_PTR(check_sljit_emit_label(compiler));
2064 
2065 	if (compiler->last_label && compiler->last_label->size == compiler->size)
2066 		return compiler->last_label;
2067 
2068 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2069 	PTR_FAIL_IF(!label);
2070 	set_label(label, compiler);
2071 	return label;
2072 }
2073 
2074 static sljit_ins get_bo_bi_flags(sljit_s32 type)
2075 {
2076 	switch (type) {
2077 	case SLJIT_EQUAL:
2078 		return (12 << 21) | (2 << 16);
2079 
2080 	case SLJIT_NOT_EQUAL:
2081 		return (4 << 21) | (2 << 16);
2082 
2083 	case SLJIT_LESS:
2084 	case SLJIT_LESS_F64:
2085 		return (12 << 21) | ((4 + 0) << 16);
2086 
2087 	case SLJIT_GREATER_EQUAL:
2088 	case SLJIT_GREATER_EQUAL_F64:
2089 		return (4 << 21) | ((4 + 0) << 16);
2090 
2091 	case SLJIT_GREATER:
2092 	case SLJIT_GREATER_F64:
2093 		return (12 << 21) | ((4 + 1) << 16);
2094 
2095 	case SLJIT_LESS_EQUAL:
2096 	case SLJIT_LESS_EQUAL_F64:
2097 		return (4 << 21) | ((4 + 1) << 16);
2098 
2099 	case SLJIT_SIG_LESS:
2100 		return (12 << 21) | (0 << 16);
2101 
2102 	case SLJIT_SIG_GREATER_EQUAL:
2103 		return (4 << 21) | (0 << 16);
2104 
2105 	case SLJIT_SIG_GREATER:
2106 		return (12 << 21) | (1 << 16);
2107 
2108 	case SLJIT_SIG_LESS_EQUAL:
2109 		return (4 << 21) | (1 << 16);
2110 
2111 	case SLJIT_OVERFLOW:
2112 	case SLJIT_MUL_OVERFLOW:
2113 		return (12 << 21) | (3 << 16);
2114 
2115 	case SLJIT_NOT_OVERFLOW:
2116 	case SLJIT_MUL_NOT_OVERFLOW:
2117 		return (4 << 21) | (3 << 16);
2118 
2119 	case SLJIT_EQUAL_F64:
2120 		return (12 << 21) | ((4 + 2) << 16);
2121 
2122 	case SLJIT_NOT_EQUAL_F64:
2123 		return (4 << 21) | ((4 + 2) << 16);
2124 
2125 	case SLJIT_UNORDERED_F64:
2126 		return (12 << 21) | ((4 + 3) << 16);
2127 
2128 	case SLJIT_ORDERED_F64:
2129 		return (4 << 21) | ((4 + 3) << 16);
2130 
2131 	default:
2132 		SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL3);
2133 		return (20 << 21);
2134 	}
2135 }
2136 
2137 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2138 {
2139 	struct sljit_jump *jump;
2140 	sljit_ins bo_bi_flags;
2141 
2142 	CHECK_ERROR_PTR();
2143 	CHECK_PTR(check_sljit_emit_jump(compiler, type));
2144 
2145 	bo_bi_flags = get_bo_bi_flags(type & 0xff);
2146 	if (!bo_bi_flags)
2147 		return NULL;
2148 
2149 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2150 	PTR_FAIL_IF(!jump);
2151 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2152 	type &= 0xff;
2153 
2154 	/* In PPC, we don't need to touch the arguments. */
2155 	if (type < SLJIT_JUMP)
2156 		jump->flags |= IS_COND;
2157 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
2158 	if (type >= SLJIT_CALL0)
2159 		jump->flags |= IS_CALL;
2160 #endif
2161 
2162 	PTR_FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
2163 	PTR_FAIL_IF(push_inst(compiler, MTCTR | S(TMP_CALL_REG)));
2164 	jump->addr = compiler->size;
2165 	PTR_FAIL_IF(push_inst(compiler, BCCTR | bo_bi_flags | (type >= SLJIT_FAST_CALL ? 1 : 0)));
2166 	return jump;
2167 }
2168 
2169 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2170 {
2171 	struct sljit_jump *jump = NULL;
2172 	sljit_s32 src_r;
2173 
2174 	CHECK_ERROR();
2175 	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2176 	ADJUST_LOCAL_OFFSET(src, srcw);
2177 
2178 	if (FAST_IS_REG(src)) {
2179 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
2180 		if (type >= SLJIT_CALL0) {
2181 			FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src)));
2182 			src_r = TMP_CALL_REG;
2183 		}
2184 		else
2185 			src_r = src;
2186 #else
2187 		src_r = src;
2188 #endif
2189 	} else if (src & SLJIT_IMM) {
2190 		jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2191 		FAIL_IF(!jump);
2192 		set_jump(jump, compiler, JUMP_ADDR);
2193 		jump->u.target = srcw;
2194 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
2195 		if (type >= SLJIT_CALL0)
2196 			jump->flags |= IS_CALL;
2197 #endif
2198 		FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
2199 		src_r = TMP_CALL_REG;
2200 	}
2201 	else {
2202 		FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_CALL_REG, 0, TMP_REG1, 0, src, srcw));
2203 		src_r = TMP_CALL_REG;
2204 	}
2205 
2206 	FAIL_IF(push_inst(compiler, MTCTR | S(src_r)));
2207 	if (jump)
2208 		jump->addr = compiler->size;
2209 	return push_inst(compiler, BCCTR | (20 << 21) | (type >= SLJIT_FAST_CALL ? 1 : 0));
2210 }
2211 
2212 /* Get a bit from CR, all other bits are zeroed. */
2213 #define GET_CR_BIT(bit, dst) \
2214 	FAIL_IF(push_inst(compiler, MFCR | D(dst))); \
2215 	FAIL_IF(push_inst(compiler, RLWINM | S(dst) | A(dst) | ((1 + (bit)) << 11) | (31 << 6) | (31 << 1)));
2216 
2217 #define INVERT_BIT(dst) \
2218 	FAIL_IF(push_inst(compiler, XORI | S(dst) | A(dst) | 0x1));
2219 
2220 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
2221 	sljit_s32 dst, sljit_sw dstw,
2222 	sljit_s32 src, sljit_sw srcw,
2223 	sljit_s32 type)
2224 {
2225 	sljit_s32 reg, input_flags;
2226 	sljit_s32 flags = GET_ALL_FLAGS(op);
2227 	sljit_sw original_dstw = dstw;
2228 
2229 	CHECK_ERROR();
2230 	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
2231 	ADJUST_LOCAL_OFFSET(dst, dstw);
2232 
2233 	if (dst == SLJIT_UNUSED)
2234 		return SLJIT_SUCCESS;
2235 
2236 	op = GET_OPCODE(op);
2237 	reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
2238 
2239 	compiler->cache_arg = 0;
2240 	compiler->cache_argw = 0;
2241 	if (op >= SLJIT_ADD && (src & SLJIT_MEM)) {
2242 		ADJUST_LOCAL_OFFSET(src, srcw);
2243 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
2244 		input_flags = (flags & SLJIT_I32_OP) ? INT_DATA : WORD_DATA;
2245 #else
2246 		input_flags = WORD_DATA;
2247 #endif
2248 		FAIL_IF(emit_op_mem2(compiler, input_flags | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
2249 		src = TMP_REG1;
2250 		srcw = 0;
2251 	}
2252 
2253 	switch (type & 0xff) {
2254 	case SLJIT_EQUAL:
2255 		GET_CR_BIT(2, reg);
2256 		break;
2257 
2258 	case SLJIT_NOT_EQUAL:
2259 		GET_CR_BIT(2, reg);
2260 		INVERT_BIT(reg);
2261 		break;
2262 
2263 	case SLJIT_LESS:
2264 	case SLJIT_LESS_F64:
2265 		GET_CR_BIT(4 + 0, reg);
2266 		break;
2267 
2268 	case SLJIT_GREATER_EQUAL:
2269 	case SLJIT_GREATER_EQUAL_F64:
2270 		GET_CR_BIT(4 + 0, reg);
2271 		INVERT_BIT(reg);
2272 		break;
2273 
2274 	case SLJIT_GREATER:
2275 	case SLJIT_GREATER_F64:
2276 		GET_CR_BIT(4 + 1, reg);
2277 		break;
2278 
2279 	case SLJIT_LESS_EQUAL:
2280 	case SLJIT_LESS_EQUAL_F64:
2281 		GET_CR_BIT(4 + 1, reg);
2282 		INVERT_BIT(reg);
2283 		break;
2284 
2285 	case SLJIT_SIG_LESS:
2286 		GET_CR_BIT(0, reg);
2287 		break;
2288 
2289 	case SLJIT_SIG_GREATER_EQUAL:
2290 		GET_CR_BIT(0, reg);
2291 		INVERT_BIT(reg);
2292 		break;
2293 
2294 	case SLJIT_SIG_GREATER:
2295 		GET_CR_BIT(1, reg);
2296 		break;
2297 
2298 	case SLJIT_SIG_LESS_EQUAL:
2299 		GET_CR_BIT(1, reg);
2300 		INVERT_BIT(reg);
2301 		break;
2302 
2303 	case SLJIT_OVERFLOW:
2304 	case SLJIT_MUL_OVERFLOW:
2305 		GET_CR_BIT(3, reg);
2306 		break;
2307 
2308 	case SLJIT_NOT_OVERFLOW:
2309 	case SLJIT_MUL_NOT_OVERFLOW:
2310 		GET_CR_BIT(3, reg);
2311 		INVERT_BIT(reg);
2312 		break;
2313 
2314 	case SLJIT_EQUAL_F64:
2315 		GET_CR_BIT(4 + 2, reg);
2316 		break;
2317 
2318 	case SLJIT_NOT_EQUAL_F64:
2319 		GET_CR_BIT(4 + 2, reg);
2320 		INVERT_BIT(reg);
2321 		break;
2322 
2323 	case SLJIT_UNORDERED_F64:
2324 		GET_CR_BIT(4 + 3, reg);
2325 		break;
2326 
2327 	case SLJIT_ORDERED_F64:
2328 		GET_CR_BIT(4 + 3, reg);
2329 		INVERT_BIT(reg);
2330 		break;
2331 
2332 	default:
2333 		SLJIT_ASSERT_STOP();
2334 		break;
2335 	}
2336 
2337 	if (op < SLJIT_ADD) {
2338 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
2339 		if (op == SLJIT_MOV)
2340 			input_flags = WORD_DATA;
2341 		else {
2342 			op = SLJIT_MOV_U32;
2343 			input_flags = INT_DATA;
2344 		}
2345 #else
2346 		op = SLJIT_MOV;
2347 		input_flags = WORD_DATA;
2348 #endif
2349 		if (reg != TMP_REG2)
2350 			return SLJIT_SUCCESS;
2351 		return emit_op(compiler, op, input_flags, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
2352 	}
2353 
2354 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2355 		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2356 	compiler->skip_checks = 1;
2357 #endif
2358 	return sljit_emit_op2(compiler, op | flags, dst, original_dstw, src, srcw, TMP_REG2, 0);
2359 }
2360 
2361 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
2362 {
2363 	struct sljit_const *const_;
2364 	sljit_s32 reg;
2365 
2366 	CHECK_ERROR_PTR();
2367 	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
2368 	ADJUST_LOCAL_OFFSET(dst, dstw);
2369 
2370 	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2371 	PTR_FAIL_IF(!const_);
2372 	set_const(const_, compiler);
2373 
2374 	reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
2375 
2376 	PTR_FAIL_IF(emit_const(compiler, reg, init_value));
2377 
2378 	if (dst & SLJIT_MEM)
2379 		PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
2380 	return const_;
2381 }
2382