xref: /netbsd-src/sys/external/bsd/sljit/dist/sljit_src/sljitNativePPC_common.c (revision b7b7574d3bf8eeb51a1fa3977b59142ec6434a55)
1 /*	$NetBSD: sljitNativePPC_common.c,v 1.4 2014/06/17 19:33:20 alnsn Exp $	*/
2 
3 /*
4  *    Stack-less Just-In-Time compiler
5  *
6  *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without modification, are
9  * permitted provided that the following conditions are met:
10  *
11  *   1. Redistributions of source code must retain the above copyright notice, this list of
12  *      conditions and the following disclaimer.
13  *
14  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
15  *      of conditions and the following disclaimer in the documentation and/or other materials
16  *      provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
19  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
21  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
23  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
30 {
31 	return "PowerPC" SLJIT_CPUINFO;
32 }
33 
34 /* Length of an instruction word.
35    Both for ppc-32 and ppc-64. */
36 typedef sljit_ui sljit_ins;
37 
38 #if ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && (defined _AIX)) \
39 	|| (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
40 #define SLJIT_PPC_STACK_FRAME_V2 1
41 #endif
42 
43 #ifdef _AIX
44 #include <sys/cache.h>
45 #endif
46 
47 #if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
48 #define SLJIT_PASS_ENTRY_ADDR_TO_CALL 1
49 #endif
50 
51 #define TMP_REG1	(SLJIT_NO_REGISTERS + 1)
52 #define TMP_REG2	(SLJIT_NO_REGISTERS + 2)
53 #define TMP_REG3	(SLJIT_NO_REGISTERS + 3)
54 #define TMP_ZERO	(SLJIT_NO_REGISTERS + 4)
55 
56 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
57 #define TMP_CALL_REG	(SLJIT_NO_REGISTERS + 5)
58 #else
59 #define TMP_CALL_REG	TMP_REG2
60 #endif
61 
62 #define TMP_FREG1	(0)
63 #define TMP_FREG2	(SLJIT_FLOAT_REG6 + 1)
64 
65 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 6] = {
66 	0, 3, 4, 5, 6, 7, 30, 29, 28, 27, 26, 1, 8, 9, 10, 31, 12
67 };
68 
69 /* --------------------------------------------------------------------- */
70 /*  Instrucion forms                                                     */
71 /* --------------------------------------------------------------------- */
72 #define D(d)		(reg_map[d] << 21)
73 #define S(s)		(reg_map[s] << 21)
74 #define A(a)		(reg_map[a] << 16)
75 #define B(b)		(reg_map[b] << 11)
76 #define C(c)		(reg_map[c] << 6)
77 #define FD(fd)		((fd) << 21)
78 #define FA(fa)		((fa) << 16)
79 #define FB(fb)		((fb) << 11)
80 #define FC(fc)		((fc) << 6)
81 #define IMM(imm)	((imm) & 0xffff)
82 #define CRD(d)		((d) << 21)
83 
84 /* Instruction bit sections.
85    OE and Rc flag (see ALT_SET_FLAGS). */
86 #define OERC(flags)	(((flags & ALT_SET_FLAGS) >> 10) | (flags & ALT_SET_FLAGS))
87 /* Rc flag (see ALT_SET_FLAGS). */
88 #define RC(flags)	((flags & ALT_SET_FLAGS) >> 10)
89 #define HI(opcode)	((opcode) << 26)
90 #define LO(opcode)	((opcode) << 1)
91 
92 #define ADD		(HI(31) | LO(266))
93 #define ADDC		(HI(31) | LO(10))
94 #define ADDE		(HI(31) | LO(138))
95 #define ADDI		(HI(14))
96 #define ADDIC		(HI(13))
97 #define ADDIS		(HI(15))
98 #define ADDME		(HI(31) | LO(234))
99 #define AND		(HI(31) | LO(28))
100 #define ANDI		(HI(28))
101 #define ANDIS		(HI(29))
102 #define Bx		(HI(18))
103 #define BCx		(HI(16))
104 #define BCCTR		(HI(19) | LO(528) | (3 << 11))
105 #define BLR		(HI(19) | LO(16) | (0x14 << 21))
106 #define CNTLZD		(HI(31) | LO(58))
107 #define CNTLZW		(HI(31) | LO(26))
108 #define CMP		(HI(31) | LO(0))
109 #define CMPI		(HI(11))
110 #define CMPL		(HI(31) | LO(32))
111 #define CMPLI		(HI(10))
112 #define CROR		(HI(19) | LO(449))
113 #define DIVD		(HI(31) | LO(489))
114 #define DIVDU		(HI(31) | LO(457))
115 #define DIVW		(HI(31) | LO(491))
116 #define DIVWU		(HI(31) | LO(459))
117 #define EXTSB		(HI(31) | LO(954))
118 #define EXTSH		(HI(31) | LO(922))
119 #define EXTSW		(HI(31) | LO(986))
120 #define FABS		(HI(63) | LO(264))
121 #define FADD		(HI(63) | LO(21))
122 #define FADDS		(HI(59) | LO(21))
123 #define FCMPU		(HI(63) | LO(0))
124 #define FDIV		(HI(63) | LO(18))
125 #define FDIVS		(HI(59) | LO(18))
126 #define FMR		(HI(63) | LO(72))
127 #define FMUL		(HI(63) | LO(25))
128 #define FMULS		(HI(59) | LO(25))
129 #define FNEG		(HI(63) | LO(40))
130 #define FSUB		(HI(63) | LO(20))
131 #define FSUBS		(HI(59) | LO(20))
132 #define LD		(HI(58) | 0)
133 #define LWZ		(HI(32))
134 #define MFCR		(HI(31) | LO(19))
135 #define MFLR		(HI(31) | LO(339) | 0x80000)
136 #define MFXER		(HI(31) | LO(339) | 0x10000)
137 #define MTCTR		(HI(31) | LO(467) | 0x90000)
138 #define MTLR		(HI(31) | LO(467) | 0x80000)
139 #define MTXER		(HI(31) | LO(467) | 0x10000)
140 #define MULHD		(HI(31) | LO(73))
141 #define MULHDU		(HI(31) | LO(9))
142 #define MULHW		(HI(31) | LO(75))
143 #define MULHWU		(HI(31) | LO(11))
144 #define MULLD		(HI(31) | LO(233))
145 #define MULLI		(HI(7))
146 #define MULLW		(HI(31) | LO(235))
147 #define NEG		(HI(31) | LO(104))
148 #define NOP		(HI(24))
149 #define NOR		(HI(31) | LO(124))
150 #define OR		(HI(31) | LO(444))
151 #define ORI		(HI(24))
152 #define ORIS		(HI(25))
153 #define RLDICL		(HI(30))
154 #define RLWINM		(HI(21))
155 #define SLD		(HI(31) | LO(27))
156 #define SLW		(HI(31) | LO(24))
157 #define SRAD		(HI(31) | LO(794))
158 #define SRADI		(HI(31) | LO(413 << 1))
159 #define SRAW		(HI(31) | LO(792))
160 #define SRAWI		(HI(31) | LO(824))
161 #define SRD		(HI(31) | LO(539))
162 #define SRW		(HI(31) | LO(536))
163 #define STD		(HI(62) | 0)
164 #define STDU		(HI(62) | 1)
165 #define STDUX		(HI(31) | LO(181))
166 #define STW		(HI(36))
167 #define STWU		(HI(37))
168 #define STWUX		(HI(31) | LO(183))
169 #define SUBF		(HI(31) | LO(40))
170 #define SUBFC		(HI(31) | LO(8))
171 #define SUBFE		(HI(31) | LO(136))
172 #define SUBFIC		(HI(8))
173 #define XOR		(HI(31) | LO(316))
174 #define XORI		(HI(26))
175 #define XORIS		(HI(27))
176 
177 #define SIMM_MAX	(0x7fff)
178 #define SIMM_MIN	(-0x8000)
179 #define UIMM_MAX	(0xffff)
180 
181 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
182 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_sw addr, void* func)
183 {
184 	sljit_sw* ptrs;
185 	if (func_ptr)
186 		*func_ptr = (void*)context;
187 	ptrs = (sljit_sw*)func;
188 	context->addr = addr ? addr : ptrs[0];
189 	context->r2 = ptrs[1];
190 	context->r11 = ptrs[2];
191 }
192 #endif
193 
194 static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins)
195 {
196 	sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
197 	FAIL_IF(!ptr);
198 	*ptr = ins;
199 	compiler->size++;
200 	return SLJIT_SUCCESS;
201 }
202 
203 static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
204 {
205 	sljit_sw diff;
206 	sljit_uw target_addr;
207 	sljit_sw extra_jump_flags;
208 
209 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
210 	if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_CALL))
211 		return 0;
212 #else
213 	if (jump->flags & SLJIT_REWRITABLE_JUMP)
214 		return 0;
215 #endif
216 
217 	if (jump->flags & JUMP_ADDR)
218 		target_addr = jump->u.target;
219 	else {
220 		SLJIT_ASSERT(jump->flags & JUMP_LABEL);
221 		target_addr = (sljit_uw)(code + jump->u.label->size);
222 	}
223 
224 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
225 	if (jump->flags & IS_CALL)
226 		goto keep_address;
227 #endif
228 
229 	diff = ((sljit_sw)target_addr - (sljit_sw)(code_ptr)) & ~0x3l;
230 
231 	extra_jump_flags = 0;
232 	if (jump->flags & IS_COND) {
233 		if (diff <= 0x7fff && diff >= -0x8000) {
234 			jump->flags |= PATCH_B;
235 			return 1;
236 		}
237 		if (target_addr <= 0xffff) {
238 			jump->flags |= PATCH_B | PATCH_ABS_B;
239 			return 1;
240 		}
241 		extra_jump_flags = REMOVE_COND;
242 
243 		diff -= sizeof(sljit_ins);
244 	}
245 
246 	if (diff <= 0x01ffffff && diff >= -0x02000000) {
247 		jump->flags |= PATCH_B | extra_jump_flags;
248 		return 1;
249 	}
250 	if (target_addr <= 0x03ffffff) {
251 		jump->flags |= PATCH_B | PATCH_ABS_B | extra_jump_flags;
252 		return 1;
253 	}
254 
255 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
256 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
257 keep_address:
258 #endif
259 	if (target_addr <= 0x7fffffff) {
260 		jump->flags |= PATCH_ABS32;
261 		return 1;
262 	}
263 	if (target_addr <= 0x7fffffffffffl) {
264 		jump->flags |= PATCH_ABS48;
265 		return 1;
266 	}
267 #endif
268 
269 	return 0;
270 }
271 
272 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
273 {
274 	struct sljit_memory_fragment *buf;
275 	sljit_ins *code;
276 	sljit_ins *code_ptr;
277 	sljit_ins *buf_ptr;
278 	sljit_ins *buf_end;
279 	sljit_uw word_count;
280 	sljit_uw addr;
281 
282 	struct sljit_label *label;
283 	struct sljit_jump *jump;
284 	struct sljit_const *const_;
285 
286 	CHECK_ERROR_PTR();
287 	check_sljit_generate_code(compiler);
288 	reverse_buf(compiler);
289 
290 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
291 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
292 	compiler->size += (compiler->size & 0x1) + (sizeof(struct sljit_function_context) / sizeof(sljit_ins));
293 #else
294 	compiler->size += (sizeof(struct sljit_function_context) / sizeof(sljit_ins));
295 #endif
296 #endif
297 	code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
298 	PTR_FAIL_WITH_EXEC_IF(code);
299 	buf = compiler->buf;
300 
301 	code_ptr = code;
302 	word_count = 0;
303 	label = compiler->labels;
304 	jump = compiler->jumps;
305 	const_ = compiler->consts;
306 	do {
307 		buf_ptr = (sljit_ins*)buf->memory;
308 		buf_end = buf_ptr + (buf->used_size >> 2);
309 		do {
310 			*code_ptr = *buf_ptr++;
311 			SLJIT_ASSERT(!label || label->size >= word_count);
312 			SLJIT_ASSERT(!jump || jump->addr >= word_count);
313 			SLJIT_ASSERT(!const_ || const_->addr >= word_count);
314 			/* These structures are ordered by their address. */
315 			if (label && label->size == word_count) {
316 				/* Just recording the address. */
317 				label->addr = (sljit_uw)code_ptr;
318 				label->size = code_ptr - code;
319 				label = label->next;
320 			}
321 			if (jump && jump->addr == word_count) {
322 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
323 				jump->addr = (sljit_uw)(code_ptr - 3);
324 #else
325 				jump->addr = (sljit_uw)(code_ptr - 6);
326 #endif
327 				if (detect_jump_type(jump, code_ptr, code)) {
328 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
329 					code_ptr[-3] = code_ptr[0];
330 					code_ptr -= 3;
331 #else
332 					if (jump->flags & PATCH_ABS32) {
333 						code_ptr -= 3;
334 						code_ptr[-1] = code_ptr[2];
335 						code_ptr[0] = code_ptr[3];
336 					}
337 					else if (jump->flags & PATCH_ABS48) {
338 						code_ptr--;
339 						code_ptr[-1] = code_ptr[0];
340 						code_ptr[0] = code_ptr[1];
341 						/* rldicr rX,rX,32,31 -> rX,rX,16,47 */
342 						SLJIT_ASSERT((code_ptr[-3] & 0xfc00ffff) == 0x780007c6);
343 						code_ptr[-3] ^= 0x8422;
344 						/* oris -> ori */
345 						code_ptr[-2] ^= 0x4000000;
346 					}
347 					else {
348 						code_ptr[-6] = code_ptr[0];
349 						code_ptr -= 6;
350 					}
351 #endif
352 					if (jump->flags & REMOVE_COND) {
353 						code_ptr[0] = BCx | (2 << 2) | ((code_ptr[0] ^ (8 << 21)) & 0x03ff0001);
354 						code_ptr++;
355 						jump->addr += sizeof(sljit_ins);
356 						code_ptr[0] = Bx;
357 						jump->flags -= IS_COND;
358 					}
359 				}
360 				jump = jump->next;
361 			}
362 			if (const_ && const_->addr == word_count) {
363 				const_->addr = (sljit_uw)code_ptr;
364 				const_ = const_->next;
365 			}
366 			code_ptr ++;
367 			word_count ++;
368 		} while (buf_ptr < buf_end);
369 
370 		buf = buf->next;
371 	} while (buf);
372 
373 	if (label && label->size == word_count) {
374 		label->addr = (sljit_uw)code_ptr;
375 		label->size = code_ptr - code;
376 		label = label->next;
377 	}
378 
379 	SLJIT_ASSERT(!label);
380 	SLJIT_ASSERT(!jump);
381 	SLJIT_ASSERT(!const_);
382 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
383 	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size - (sizeof(struct sljit_function_context) / sizeof(sljit_ins)));
384 #else
385 	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
386 #endif
387 
388 	jump = compiler->jumps;
389 	while (jump) {
390 		do {
391 			addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
392 			buf_ptr = (sljit_ins*)jump->addr;
393 			if (jump->flags & PATCH_B) {
394 				if (jump->flags & IS_COND) {
395 					if (!(jump->flags & PATCH_ABS_B)) {
396 						addr = addr - jump->addr;
397 						SLJIT_ASSERT((sljit_sw)addr <= 0x7fff && (sljit_sw)addr >= -0x8000);
398 						*buf_ptr = BCx | (addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001);
399 					}
400 					else {
401 						SLJIT_ASSERT(addr <= 0xffff);
402 						*buf_ptr = BCx | (addr & 0xfffc) | 0x2 | ((*buf_ptr) & 0x03ff0001);
403 					}
404 				}
405 				else {
406 					if (!(jump->flags & PATCH_ABS_B)) {
407 						addr = addr - jump->addr;
408 						SLJIT_ASSERT((sljit_sw)addr <= 0x01ffffff && (sljit_sw)addr >= -0x02000000);
409 						*buf_ptr = Bx | (addr & 0x03fffffc) | ((*buf_ptr) & 0x1);
410 					}
411 					else {
412 						SLJIT_ASSERT(addr <= 0x03ffffff);
413 						*buf_ptr = Bx | (addr & 0x03fffffc) | 0x2 | ((*buf_ptr) & 0x1);
414 					}
415 				}
416 				break;
417 			}
418 			/* Set the fields of immediate loads. */
419 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
420 			buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
421 			buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
422 #else
423 			if (jump->flags & PATCH_ABS32) {
424 				SLJIT_ASSERT(addr <= 0x7fffffff);
425 				buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
426 				buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
427 				break;
428 			}
429 			if (jump->flags & PATCH_ABS48) {
430 				SLJIT_ASSERT(addr <= 0x7fffffffffff);
431 				buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 32) & 0xffff);
432 				buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 16) & 0xffff);
433 				buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | (addr & 0xffff);
434 				break;
435 			}
436 			buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 48) & 0xffff);
437 			buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 32) & 0xffff);
438 			buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | ((addr >> 16) & 0xffff);
439 			buf_ptr[4] = (buf_ptr[4] & 0xffff0000) | (addr & 0xffff);
440 #endif
441 		} while (0);
442 		jump = jump->next;
443 	}
444 
445 	compiler->error = SLJIT_ERR_COMPILED;
446 	compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
447 	SLJIT_CACHE_FLUSH(code, code_ptr);
448 
449 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
450 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
451 	if (((sljit_sw)code_ptr) & 0x4)
452 		code_ptr++;
453 	sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code);
454 	return code_ptr;
455 #else
456 	sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code);
457 	return code_ptr;
458 #endif
459 #else
460 	return code;
461 #endif
462 }
463 
464 /* --------------------------------------------------------------------- */
465 /*  Entry, exit                                                          */
466 /* --------------------------------------------------------------------- */
467 
468 /* inp_flags: */
469 
470 /* Creates an index in data_transfer_insts array. */
471 #define LOAD_DATA	0x01
472 #define INDEXED		0x02
473 #define WRITE_BACK	0x04
474 #define WORD_DATA	0x00
475 #define BYTE_DATA	0x08
476 #define HALF_DATA	0x10
477 #define INT_DATA	0x18
478 #define SIGNED_DATA	0x20
479 /* Separates integer and floating point registers */
480 #define GPR_REG		0x3f
481 #define DOUBLE_DATA	0x40
482 
483 #define MEM_MASK	0x7f
484 
485 /* Other inp_flags. */
486 
487 #define ARG_TEST	0x000100
488 /* Integer opertion and set flags -> requires exts on 64 bit systems. */
489 #define ALT_SIGN_EXT	0x000200
490 /* This flag affects the RC() and OERC() macros. */
491 #define ALT_SET_FLAGS	0x000400
492 #define ALT_KEEP_CACHE	0x000800
493 #define ALT_FORM1	0x010000
494 #define ALT_FORM2	0x020000
495 #define ALT_FORM3	0x040000
496 #define ALT_FORM4	0x080000
497 #define ALT_FORM5	0x100000
498 #define ALT_FORM6	0x200000
499 
500 /* Source and destination is register. */
501 #define REG_DEST	0x000001
502 #define REG1_SOURCE	0x000002
503 #define REG2_SOURCE	0x000004
504 /* getput_arg_fast returned true. */
505 #define FAST_DEST	0x000008
506 /* Multiple instructions are required. */
507 #define SLOW_DEST	0x000010
508 /*
509 ALT_SIGN_EXT		0x000200
510 ALT_SET_FLAGS		0x000400
511 ALT_FORM1		0x010000
512 ...
513 ALT_FORM6		0x200000 */
514 
515 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
516 #include "sljitNativePPC_32.c"
517 #else
518 #include "sljitNativePPC_64.c"
519 #endif
520 
521 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
522 #define STACK_STORE	STW
523 #define STACK_LOAD	LWZ
524 #else
525 #define STACK_STORE	STD
526 #define STACK_LOAD	LD
527 #endif
528 
529 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
530 {
531 	CHECK_ERROR();
532 	check_sljit_emit_enter(compiler, args, scratches, saveds, local_size);
533 
534 	compiler->scratches = scratches;
535 	compiler->saveds = saveds;
536 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
537 	compiler->logical_local_size = local_size;
538 #endif
539 
540 	FAIL_IF(push_inst(compiler, MFLR | D(0)));
541 	FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(SLJIT_LOCALS_REG) | IMM(-(sljit_si)(sizeof(sljit_sw))) ));
542 	if (saveds >= 1)
543 		FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_REG1) | A(SLJIT_LOCALS_REG) | IMM(-2 * (sljit_si)(sizeof(sljit_sw))) ));
544 	if (saveds >= 2)
545 		FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_REG2) | A(SLJIT_LOCALS_REG) | IMM(-3 * (sljit_si)(sizeof(sljit_sw))) ));
546 	if (saveds >= 3)
547 		FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_REG3) | A(SLJIT_LOCALS_REG) | IMM(-4 * (sljit_si)(sizeof(sljit_sw))) ));
548 	if (saveds >= 4)
549 		FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_EREG1) | A(SLJIT_LOCALS_REG) | IMM(-5 * (sljit_si)(sizeof(sljit_sw))) ));
550 	if (saveds >= 5)
551 		FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_EREG2) | A(SLJIT_LOCALS_REG) | IMM(-6 * (sljit_si)(sizeof(sljit_sw))) ));
552 #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
553 	FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_LOCALS_REG) | IMM(2 * sizeof(sljit_sw)) ));
554 #else
555 	FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_LOCALS_REG) | IMM(sizeof(sljit_sw)) ));
556 #endif
557 
558 	FAIL_IF(push_inst(compiler, ADDI | D(TMP_ZERO) | A(0) | 0));
559 	if (args >= 1)
560 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG1) | A(SLJIT_SAVED_REG1) | B(SLJIT_SCRATCH_REG1)));
561 	if (args >= 2)
562 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG2) | A(SLJIT_SAVED_REG2) | B(SLJIT_SCRATCH_REG2)));
563 	if (args >= 3)
564 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG3) | A(SLJIT_SAVED_REG3) | B(SLJIT_SCRATCH_REG3)));
565 
566 #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
567 	compiler->local_size = (1 + saveds + 6 + 8) * sizeof(sljit_sw) + local_size;
568 #else
569 	compiler->local_size = (1 + saveds + 2) * sizeof(sljit_sw) + local_size;
570 #endif
571 	compiler->local_size = (compiler->local_size + 15) & ~0xf;
572 
573 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
574 	if (compiler->local_size <= SIMM_MAX)
575 		FAIL_IF(push_inst(compiler, STWU | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(-compiler->local_size)));
576 	else {
577 		FAIL_IF(load_immediate(compiler, 0, -compiler->local_size));
578 		FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | B(0)));
579 	}
580 #else
581 	if (compiler->local_size <= SIMM_MAX)
582 		FAIL_IF(push_inst(compiler, STDU | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(-compiler->local_size)));
583 	else {
584 		FAIL_IF(load_immediate(compiler, 0, -compiler->local_size));
585 		FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | B(0)));
586 	}
587 #endif
588 
589 	return SLJIT_SUCCESS;
590 }
591 
592 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
593 {
594 	CHECK_ERROR_VOID();
595 	check_sljit_set_context(compiler, args, scratches, saveds, local_size);
596 
597 	compiler->scratches = scratches;
598 	compiler->saveds = saveds;
599 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
600 	compiler->logical_local_size = local_size;
601 #endif
602 
603 #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
604 	compiler->local_size = (1 + saveds + 6 + 8) * sizeof(sljit_sw) + local_size;
605 #else
606 	compiler->local_size = (1 + saveds + 2) * sizeof(sljit_sw) + local_size;
607 #endif
608 	compiler->local_size = (compiler->local_size + 15) & ~0xf;
609 }
610 
611 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
612 {
613 	CHECK_ERROR();
614 	check_sljit_emit_return(compiler, op, src, srcw);
615 
616 	FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
617 
618 	if (compiler->local_size <= SIMM_MAX)
619 		FAIL_IF(push_inst(compiler, ADDI | D(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(compiler->local_size)));
620 	else {
621 		FAIL_IF(load_immediate(compiler, 0, compiler->local_size));
622 		FAIL_IF(push_inst(compiler, ADD | D(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | B(0)));
623 	}
624 
625 #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
626 	FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_LOCALS_REG) | IMM(2 * sizeof(sljit_sw))));
627 #else
628 	FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_LOCALS_REG) | IMM(sizeof(sljit_sw))));
629 #endif
630 	if (compiler->saveds >= 5)
631 		FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_EREG2) | A(SLJIT_LOCALS_REG) | IMM(-6 * (sljit_si)(sizeof(sljit_sw))) ));
632 	if (compiler->saveds >= 4)
633 		FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_EREG1) | A(SLJIT_LOCALS_REG) | IMM(-5 * (sljit_si)(sizeof(sljit_sw))) ));
634 	if (compiler->saveds >= 3)
635 		FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG3) | A(SLJIT_LOCALS_REG) | IMM(-4 * (sljit_si)(sizeof(sljit_sw))) ));
636 	if (compiler->saveds >= 2)
637 		FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG2) | A(SLJIT_LOCALS_REG) | IMM(-3 * (sljit_si)(sizeof(sljit_sw))) ));
638 	if (compiler->saveds >= 1)
639 		FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG1) | A(SLJIT_LOCALS_REG) | IMM(-2 * (sljit_si)(sizeof(sljit_sw))) ));
640 	FAIL_IF(push_inst(compiler, STACK_LOAD | D(TMP_ZERO) | A(SLJIT_LOCALS_REG) | IMM(-(sljit_si)(sizeof(sljit_sw))) ));
641 
642 	FAIL_IF(push_inst(compiler, MTLR | S(0)));
643 	FAIL_IF(push_inst(compiler, BLR));
644 
645 	return SLJIT_SUCCESS;
646 }
647 
648 #undef STACK_STORE
649 #undef STACK_LOAD
650 
651 /* --------------------------------------------------------------------- */
652 /*  Operators                                                            */
653 /* --------------------------------------------------------------------- */
654 
655 /* i/x - immediate/indexed form
656    n/w - no write-back / write-back (1 bit)
657    s/l - store/load (1 bit)
658    u/s - signed/unsigned (1 bit)
659    w/b/h/i - word/byte/half/int allowed (2 bit)
660    It contans 32 items, but not all are different. */
661 
662 /* 64 bit only: [reg+imm] must be aligned to 4 bytes. */
663 #define INT_ALIGNED	0x10000
664 /* 64-bit only: there is no lwau instruction. */
665 #define UPDATE_REQ	0x20000
666 
667 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
668 #define ARCH_32_64(a, b)	a
669 #define INST_CODE_AND_DST(inst, flags, reg) \
670 	((inst) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
671 #else
672 #define ARCH_32_64(a, b)	b
673 #define INST_CODE_AND_DST(inst, flags, reg) \
674 	(((inst) & ~(INT_ALIGNED | UPDATE_REQ)) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
675 #endif
676 
677 static SLJIT_CONST sljit_ins data_transfer_insts[64 + 8] = {
678 
679 /* -------- Unsigned -------- */
680 
681 /* Word. */
682 
683 /* u w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
684 /* u w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
685 /* u w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
686 /* u w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
687 
688 /* u w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
689 /* u w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
690 /* u w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
691 /* u w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
692 
693 /* Byte. */
694 
695 /* u b n i s */ HI(38) /* stb */,
696 /* u b n i l */ HI(34) /* lbz */,
697 /* u b n x s */ HI(31) | LO(215) /* stbx */,
698 /* u b n x l */ HI(31) | LO(87) /* lbzx */,
699 
700 /* u b w i s */ HI(39) /* stbu */,
701 /* u b w i l */ HI(35) /* lbzu */,
702 /* u b w x s */ HI(31) | LO(247) /* stbux */,
703 /* u b w x l */ HI(31) | LO(119) /* lbzux */,
704 
705 /* Half. */
706 
707 /* u h n i s */ HI(44) /* sth */,
708 /* u h n i l */ HI(40) /* lhz */,
709 /* u h n x s */ HI(31) | LO(407) /* sthx */,
710 /* u h n x l */ HI(31) | LO(279) /* lhzx */,
711 
712 /* u h w i s */ HI(45) /* sthu */,
713 /* u h w i l */ HI(41) /* lhzu */,
714 /* u h w x s */ HI(31) | LO(439) /* sthux */,
715 /* u h w x l */ HI(31) | LO(311) /* lhzux */,
716 
717 /* Int. */
718 
719 /* u i n i s */ HI(36) /* stw */,
720 /* u i n i l */ HI(32) /* lwz */,
721 /* u i n x s */ HI(31) | LO(151) /* stwx */,
722 /* u i n x l */ HI(31) | LO(23) /* lwzx */,
723 
724 /* u i w i s */ HI(37) /* stwu */,
725 /* u i w i l */ HI(33) /* lwzu */,
726 /* u i w x s */ HI(31) | LO(183) /* stwux */,
727 /* u i w x l */ HI(31) | LO(55) /* lwzux */,
728 
729 /* -------- Signed -------- */
730 
731 /* Word. */
732 
733 /* s w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
734 /* s w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
735 /* s w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
736 /* s w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
737 
738 /* s w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
739 /* s w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
740 /* s w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
741 /* s w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
742 
743 /* Byte. */
744 
745 /* s b n i s */ HI(38) /* stb */,
746 /* s b n i l */ HI(34) /* lbz */ /* EXTS_REQ */,
747 /* s b n x s */ HI(31) | LO(215) /* stbx */,
748 /* s b n x l */ HI(31) | LO(87) /* lbzx */ /* EXTS_REQ */,
749 
750 /* s b w i s */ HI(39) /* stbu */,
751 /* s b w i l */ HI(35) /* lbzu */ /* EXTS_REQ */,
752 /* s b w x s */ HI(31) | LO(247) /* stbux */,
753 /* s b w x l */ HI(31) | LO(119) /* lbzux */ /* EXTS_REQ */,
754 
755 /* Half. */
756 
757 /* s h n i s */ HI(44) /* sth */,
758 /* s h n i l */ HI(42) /* lha */,
759 /* s h n x s */ HI(31) | LO(407) /* sthx */,
760 /* s h n x l */ HI(31) | LO(343) /* lhax */,
761 
762 /* s h w i s */ HI(45) /* sthu */,
763 /* s h w i l */ HI(43) /* lhau */,
764 /* s h w x s */ HI(31) | LO(439) /* sthux */,
765 /* s h w x l */ HI(31) | LO(375) /* lhaux */,
766 
767 /* Int. */
768 
769 /* s i n i s */ HI(36) /* stw */,
770 /* s i n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x2 /* lwa */),
771 /* s i n x s */ HI(31) | LO(151) /* stwx */,
772 /* s i n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(341) /* lwax */),
773 
774 /* s i w i s */ HI(37) /* stwu */,
775 /* s i w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | UPDATE_REQ | 0x2 /* lwa */),
776 /* s i w x s */ HI(31) | LO(183) /* stwux */,
777 /* s i w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(373) /* lwaux */),
778 
779 /* -------- Double -------- */
780 
781 /* d   n i s */ HI(54) /* stfd */,
782 /* d   n i l */ HI(50) /* lfd */,
783 /* d   n x s */ HI(31) | LO(727) /* stfdx */,
784 /* d   n x l */ HI(31) | LO(599) /* lfdx */,
785 
786 /* s   n i s */ HI(52) /* stfs */,
787 /* s   n i l */ HI(48) /* lfs */,
788 /* s   n x s */ HI(31) | LO(663) /* stfsx */,
789 /* s   n x l */ HI(31) | LO(535) /* lfsx */,
790 
791 };
792 
793 #undef ARCH_32_64
794 
795 /* Simple cases, (no caching is required). */
796 static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_flags, sljit_si reg, sljit_si arg, sljit_sw argw)
797 {
798 	sljit_ins inst;
799 
800 	/* Should work when (arg & REG_MASK) == 0. */
801 	SLJIT_COMPILE_ASSERT(A(0) == 0, a0_must_be_0);
802 	SLJIT_ASSERT(arg & SLJIT_MEM);
803 
804 	if (arg & OFFS_REG_MASK) {
805 		if (argw & 0x3)
806 			return 0;
807 		if (inp_flags & ARG_TEST)
808 			return 1;
809 
810 		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
811 		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
812 		FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(OFFS_REG(arg))));
813 		return -1;
814 	}
815 
816 	if (SLJIT_UNLIKELY(!(arg & REG_MASK)))
817 		inp_flags &= ~WRITE_BACK;
818 
819 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
820 	inst = data_transfer_insts[inp_flags & MEM_MASK];
821 	SLJIT_ASSERT((arg & REG_MASK) || !(inst & UPDATE_REQ));
822 
823 	if (argw > SIMM_MAX || argw < SIMM_MIN || ((inst & INT_ALIGNED) && (argw & 0x3)) || (inst & UPDATE_REQ))
824 		return 0;
825 	if (inp_flags & ARG_TEST)
826 		return 1;
827 #endif
828 
829 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
830 	if (argw > SIMM_MAX || argw < SIMM_MIN)
831 		return 0;
832 	if (inp_flags & ARG_TEST)
833 		return 1;
834 
835 	inst = data_transfer_insts[inp_flags & MEM_MASK];
836 	SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
837 #endif
838 
839 	FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | IMM(argw)));
840 	return -1;
841 }
842 
843 /* See getput_arg below.
844    Note: can_cache is called only for binary operators. Those operator always
845    uses word arguments without write back. */
846 static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
847 {
848 	sljit_sw high_short, next_high_short;
849 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
850 	sljit_sw diff;
851 #endif
852 
853 	SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
854 
855 	if (arg & OFFS_REG_MASK)
856 		return ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && (argw & 0x3) == (next_argw & 0x3));
857 
858 	if (next_arg & OFFS_REG_MASK)
859 		return 0;
860 
861 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
862 	high_short = (argw + ((argw & 0x8000) << 1)) & ~0xffff;
863 	next_high_short = (next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
864 	return high_short == next_high_short;
865 #else
866 	if (argw <= 0x7fffffffl && argw >= -0x80000000l) {
867 		high_short = (argw + ((argw & 0x8000) << 1)) & ~0xffff;
868 		next_high_short = (next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
869 		if (high_short == next_high_short)
870 			return 1;
871 	}
872 
873 	diff = argw - next_argw;
874 	if (!(arg & REG_MASK))
875 		return diff <= SIMM_MAX && diff >= SIMM_MIN;
876 
877 	if (arg == next_arg && diff <= SIMM_MAX && diff >= SIMM_MIN)
878 		return 1;
879 
880 	return 0;
881 #endif
882 }
883 
884 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
885 #define ADJUST_CACHED_IMM(imm) \
886 	if ((inst & INT_ALIGNED) && (imm & 0x3)) { \
887 		/* Adjust cached value. Fortunately this is really a rare case */ \
888 		compiler->cache_argw += imm & 0x3; \
889 		FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | (imm & 0x3))); \
890 		imm &= ~0x3; \
891 	}
892 #endif
893 
894 /* Emit the necessary instructions. See can_cache above. */
895 static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, sljit_si reg, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
896 {
897 	sljit_si tmp_r;
898 	sljit_ins inst;
899 	sljit_sw high_short, next_high_short;
900 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
901 	sljit_sw diff;
902 #endif
903 
904 	SLJIT_ASSERT(arg & SLJIT_MEM);
905 
906 	tmp_r = ((inp_flags & LOAD_DATA) && ((inp_flags) & MEM_MASK) <= GPR_REG) ? reg : TMP_REG1;
907 	/* Special case for "mov reg, [reg, ... ]". */
908 	if ((arg & REG_MASK) == tmp_r)
909 		tmp_r = TMP_REG1;
910 
911 	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
912 		argw &= 0x3;
913 		/* Otherwise getput_arg_fast would capture it. */
914 		SLJIT_ASSERT(argw);
915 
916 		if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg && argw == compiler->cache_argw)
917 			tmp_r = TMP_REG3;
918 		else {
919 			if ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && argw == (next_argw & 0x3)) {
920 				compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK);
921 				compiler->cache_argw = argw;
922 				tmp_r = TMP_REG3;
923 			}
924 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
925 			FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(arg)) | A(tmp_r) | (argw << 11) | ((31 - argw) << 1)));
926 #else
927 			FAIL_IF(push_inst(compiler, RLDI(tmp_r, OFFS_REG(arg), argw, 63 - argw, 1)));
928 #endif
929 		}
930 		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
931 		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
932 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(tmp_r));
933 	}
934 
935 	if (SLJIT_UNLIKELY(!(arg & REG_MASK)))
936 		inp_flags &= ~WRITE_BACK;
937 
938 	inst = data_transfer_insts[inp_flags & MEM_MASK];
939 	SLJIT_ASSERT((arg & REG_MASK) || !(inst & UPDATE_REQ));
940 
941 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
942 	if (argw <= 0x7fff7fffl && argw >= -0x80000000l
943 			&& (!(inst & INT_ALIGNED) || !(argw & 0x3)) && !(inst & UPDATE_REQ)) {
944 #endif
945 
946 		arg &= REG_MASK;
947 		high_short = (sljit_si)(argw + ((argw & 0x8000) << 1)) & ~0xffff;
948 		/* The getput_arg_fast should handle this otherwise. */
949 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
950 		SLJIT_ASSERT(high_short && high_short <= 0x7fffffffl && high_short >= -0x80000000l);
951 #else
952 		SLJIT_ASSERT(high_short && !(inst & (INT_ALIGNED | UPDATE_REQ)));
953 #endif
954 
955 		if (inp_flags & WRITE_BACK) {
956 			if (arg == reg) {
957 				FAIL_IF(push_inst(compiler, OR | S(reg) | A(tmp_r) | B(reg)));
958 				reg = tmp_r;
959 			}
960 			tmp_r = arg;
961 			FAIL_IF(push_inst(compiler, ADDIS | D(arg) | A(arg) | IMM(high_short >> 16)));
962 		}
963 		else if (compiler->cache_arg != arg || high_short != compiler->cache_argw) {
964 			if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK)) {
965 				next_high_short = (sljit_si)(next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
966 				if (high_short == next_high_short) {
967 					compiler->cache_arg = SLJIT_IMM | arg;
968 					compiler->cache_argw = next_high_short;
969 					tmp_r = TMP_REG3;
970 				}
971 			}
972 			FAIL_IF(push_inst(compiler, ADDIS | D(tmp_r) | A(arg & REG_MASK) | IMM(high_short >> 16)));
973 		}
974 		else
975 			tmp_r = TMP_REG3;
976 
977 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r) | IMM(argw));
978 
979 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
980 	}
981 
982 	/* Everything else is PPC-64 only. */
983 	if (SLJIT_UNLIKELY(!(arg & REG_MASK))) {
984 		diff = argw - compiler->cache_argw;
985 		if ((compiler->cache_arg & SLJIT_IMM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
986 			ADJUST_CACHED_IMM(diff);
987 			return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(diff));
988 		}
989 
990 		diff = argw - next_argw;
991 		if ((next_arg & SLJIT_MEM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
992 			SLJIT_ASSERT(inp_flags & LOAD_DATA);
993 
994 			compiler->cache_arg = SLJIT_IMM;
995 			compiler->cache_argw = argw;
996 			tmp_r = TMP_REG3;
997 		}
998 
999 		FAIL_IF(load_immediate(compiler, tmp_r, argw));
1000 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r));
1001 	}
1002 
1003 	diff = argw - compiler->cache_argw;
1004 	if (compiler->cache_arg == arg && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1005 		SLJIT_ASSERT(!(inp_flags & WRITE_BACK) && !(inst & UPDATE_REQ));
1006 		ADJUST_CACHED_IMM(diff);
1007 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(diff));
1008 	}
1009 
1010 	if ((compiler->cache_arg & SLJIT_IMM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1011 		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
1012 		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
1013 		if (compiler->cache_argw != argw) {
1014 			FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | IMM(diff)));
1015 			compiler->cache_argw = argw;
1016 		}
1017 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(TMP_REG3));
1018 	}
1019 
1020 	if (argw == next_argw && (next_arg & SLJIT_MEM)) {
1021 		SLJIT_ASSERT(inp_flags & LOAD_DATA);
1022 		FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1023 
1024 		compiler->cache_arg = SLJIT_IMM;
1025 		compiler->cache_argw = argw;
1026 
1027 		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
1028 		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
1029 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(TMP_REG3));
1030 	}
1031 
1032 	diff = argw - next_argw;
1033 	if (arg == next_arg && !(inp_flags & WRITE_BACK) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1034 		SLJIT_ASSERT(inp_flags & LOAD_DATA);
1035 		FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1036 		FAIL_IF(push_inst(compiler, ADD | D(TMP_REG3) | A(TMP_REG3) | B(arg & REG_MASK)));
1037 
1038 		compiler->cache_arg = arg;
1039 		compiler->cache_argw = argw;
1040 
1041 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3));
1042 	}
1043 
1044 	if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1045 		SLJIT_ASSERT(inp_flags & LOAD_DATA);
1046 		FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1047 
1048 		compiler->cache_arg = SLJIT_IMM;
1049 		compiler->cache_argw = argw;
1050 		tmp_r = TMP_REG3;
1051 	}
1052 	else
1053 		FAIL_IF(load_immediate(compiler, tmp_r, argw));
1054 
1055 	/* Get the indexed version instead of the normal one. */
1056 	inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
1057 	SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
1058 	return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(tmp_r));
1059 #endif
1060 }
1061 
1062 static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w)
1063 {
1064 	if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
1065 		return compiler->error;
1066 	return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
1067 }
1068 
1069 static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si input_flags,
1070 	sljit_si dst, sljit_sw dstw,
1071 	sljit_si src1, sljit_sw src1w,
1072 	sljit_si src2, sljit_sw src2w)
1073 {
1074 	/* arg1 goes to TMP_REG1 or src reg
1075 	   arg2 goes to TMP_REG2, imm or src reg
1076 	   TMP_REG3 can be used for caching
1077 	   result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
1078 	sljit_si dst_r;
1079 	sljit_si src1_r;
1080 	sljit_si src2_r;
1081 	sljit_si sugg_src2_r = TMP_REG2;
1082 	sljit_si flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_FORM6 | ALT_SIGN_EXT | ALT_SET_FLAGS);
1083 
1084 	if (!(input_flags & ALT_KEEP_CACHE)) {
1085 		compiler->cache_arg = 0;
1086 		compiler->cache_argw = 0;
1087 	}
1088 
1089 	/* Destination check. */
1090 	if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
1091 		if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM))
1092 			return SLJIT_SUCCESS;
1093 		dst_r = TMP_REG2;
1094 	}
1095 	else if (FAST_IS_REG(dst)) {
1096 		dst_r = dst;
1097 		flags |= REG_DEST;
1098 		if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
1099 			sugg_src2_r = dst_r;
1100 	}
1101 	else {
1102 		SLJIT_ASSERT(dst & SLJIT_MEM);
1103 		if (getput_arg_fast(compiler, input_flags | ARG_TEST, TMP_REG2, dst, dstw)) {
1104 			flags |= FAST_DEST;
1105 			dst_r = TMP_REG2;
1106 		}
1107 		else {
1108 			flags |= SLOW_DEST;
1109 			dst_r = 0;
1110 		}
1111 	}
1112 
1113 	/* Source 1. */
1114 	if (FAST_IS_REG(src1)) {
1115 		src1_r = src1;
1116 		flags |= REG1_SOURCE;
1117 	}
1118 	else if (src1 & SLJIT_IMM) {
1119 		FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
1120 		src1_r = TMP_REG1;
1121 	}
1122 	else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w)) {
1123 		FAIL_IF(compiler->error);
1124 		src1_r = TMP_REG1;
1125 	}
1126 	else
1127 		src1_r = 0;
1128 
1129 	/* Source 2. */
1130 	if (FAST_IS_REG(src2)) {
1131 		src2_r = src2;
1132 		flags |= REG2_SOURCE;
1133 		if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
1134 			dst_r = src2_r;
1135 	}
1136 	else if (src2 & SLJIT_IMM) {
1137 		FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w));
1138 		src2_r = sugg_src2_r;
1139 	}
1140 	else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w)) {
1141 		FAIL_IF(compiler->error);
1142 		src2_r = sugg_src2_r;
1143 	}
1144 	else
1145 		src2_r = 0;
1146 
1147 	/* src1_r, src2_r and dst_r can be zero (=unprocessed).
1148 	   All arguments are complex addressing modes, and it is a binary operator. */
1149 	if (src1_r == 0 && src2_r == 0 && dst_r == 0) {
1150 		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
1151 			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w));
1152 			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
1153 		}
1154 		else {
1155 			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
1156 			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw));
1157 		}
1158 		src1_r = TMP_REG1;
1159 		src2_r = TMP_REG2;
1160 	}
1161 	else if (src1_r == 0 && src2_r == 0) {
1162 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
1163 		src1_r = TMP_REG1;
1164 	}
1165 	else if (src1_r == 0 && dst_r == 0) {
1166 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
1167 		src1_r = TMP_REG1;
1168 	}
1169 	else if (src2_r == 0 && dst_r == 0) {
1170 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw));
1171 		src2_r = sugg_src2_r;
1172 	}
1173 
1174 	if (dst_r == 0)
1175 		dst_r = TMP_REG2;
1176 
1177 	if (src1_r == 0) {
1178 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, 0, 0));
1179 		src1_r = TMP_REG1;
1180 	}
1181 
1182 	if (src2_r == 0) {
1183 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, 0, 0));
1184 		src2_r = sugg_src2_r;
1185 	}
1186 
1187 	FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
1188 
1189 	if (flags & (FAST_DEST | SLOW_DEST)) {
1190 		if (flags & FAST_DEST)
1191 			FAIL_IF(getput_arg_fast(compiler, input_flags, dst_r, dst, dstw));
1192 		else
1193 			FAIL_IF(getput_arg(compiler, input_flags, dst_r, dst, dstw, 0, 0));
1194 	}
1195 	return SLJIT_SUCCESS;
1196 }
1197 
1198 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
1199 {
1200 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1201 	sljit_si int_op = op & SLJIT_INT_OP;
1202 #endif
1203 
1204 	CHECK_ERROR();
1205 	check_sljit_emit_op0(compiler, op);
1206 
1207 	op = GET_OPCODE(op);
1208 	switch (op) {
1209 	case SLJIT_BREAKPOINT:
1210 	case SLJIT_NOP:
1211 		return push_inst(compiler, NOP);
1212 	case SLJIT_UMUL:
1213 	case SLJIT_SMUL:
1214 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG1)));
1215 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1216 		FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2)));
1217 		return push_inst(compiler, (op == SLJIT_UMUL ? MULHDU : MULHD) | D(SLJIT_SCRATCH_REG2) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2));
1218 #else
1219 		FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2)));
1220 		return push_inst(compiler, (op == SLJIT_UMUL ? MULHWU : MULHW) | D(SLJIT_SCRATCH_REG2) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2));
1221 #endif
1222 	case SLJIT_UDIV:
1223 	case SLJIT_SDIV:
1224 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG1)));
1225 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1226 		if (int_op) {
1227 			FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2)));
1228 			FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG1) | B(SLJIT_SCRATCH_REG2)));
1229 		} else {
1230 			FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVDU : DIVD) | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2)));
1231 			FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG1) | B(SLJIT_SCRATCH_REG2)));
1232 		}
1233 		return push_inst(compiler, SUBF | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG2) | B(TMP_REG1));
1234 #else
1235 		FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2)));
1236 		FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG1) | B(SLJIT_SCRATCH_REG2)));
1237 		return push_inst(compiler, SUBF | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG2) | B(TMP_REG1));
1238 #endif
1239 	}
1240 
1241 	return SLJIT_SUCCESS;
1242 }
1243 
1244 #define EMIT_MOV(type, type_flags, type_cast) \
1245 	emit_op(compiler, (src & SLJIT_IMM) ? SLJIT_MOV : type, flags | (type_flags), dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? type_cast srcw : srcw)
1246 
1247 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
1248 	sljit_si dst, sljit_sw dstw,
1249 	sljit_si src, sljit_sw srcw)
1250 {
1251 	sljit_si flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0;
1252 	sljit_si op_flags = GET_ALL_FLAGS(op);
1253 
1254 	CHECK_ERROR();
1255 	check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
1256 	ADJUST_LOCAL_OFFSET(dst, dstw);
1257 	ADJUST_LOCAL_OFFSET(src, srcw);
1258 
1259 	op = GET_OPCODE(op);
1260 	if ((src & SLJIT_IMM) && srcw == 0)
1261 		src = TMP_ZERO;
1262 
1263 	if (op_flags & SLJIT_SET_O)
1264 		FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
1265 
1266 	if (op_flags & SLJIT_INT_OP) {
1267 		if (op < SLJIT_NOT) {
1268 			if (FAST_IS_REG(src) && src == dst) {
1269 				if (!TYPE_CAST_NEEDED(op))
1270 					return SLJIT_SUCCESS;
1271 			}
1272 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1273 			if (op == SLJIT_MOV_SI && (src & SLJIT_MEM))
1274 				op = SLJIT_MOV_UI;
1275 			if (op == SLJIT_MOVU_SI && (src & SLJIT_MEM))
1276 				op = SLJIT_MOVU_UI;
1277 			if (op == SLJIT_MOV_UI && (src & SLJIT_IMM))
1278 				op = SLJIT_MOV_SI;
1279 			if (op == SLJIT_MOVU_UI && (src & SLJIT_IMM))
1280 				op = SLJIT_MOVU_SI;
1281 #endif
1282 		}
1283 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1284 		else {
1285 			/* Most operations expect sign extended arguments. */
1286 			flags |= INT_DATA | SIGNED_DATA;
1287 			if (src & SLJIT_IMM)
1288 				srcw = (sljit_si)srcw;
1289 		}
1290 #endif
1291 	}
1292 
1293 	switch (op) {
1294 	case SLJIT_MOV:
1295 	case SLJIT_MOV_P:
1296 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
1297 	case SLJIT_MOV_UI:
1298 	case SLJIT_MOV_SI:
1299 #endif
1300 		return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
1301 
1302 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1303 	case SLJIT_MOV_UI:
1304 		return EMIT_MOV(SLJIT_MOV_UI, INT_DATA, (sljit_ui));
1305 
1306 	case SLJIT_MOV_SI:
1307 		return EMIT_MOV(SLJIT_MOV_SI, INT_DATA | SIGNED_DATA, (sljit_si));
1308 #endif
1309 
1310 	case SLJIT_MOV_UB:
1311 		return EMIT_MOV(SLJIT_MOV_UB, BYTE_DATA, (sljit_ub));
1312 
1313 	case SLJIT_MOV_SB:
1314 		return EMIT_MOV(SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA, (sljit_sb));
1315 
1316 	case SLJIT_MOV_UH:
1317 		return EMIT_MOV(SLJIT_MOV_UH, HALF_DATA, (sljit_uh));
1318 
1319 	case SLJIT_MOV_SH:
1320 		return EMIT_MOV(SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA, (sljit_sh));
1321 
1322 	case SLJIT_MOVU:
1323 	case SLJIT_MOVU_P:
1324 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
1325 	case SLJIT_MOVU_UI:
1326 	case SLJIT_MOVU_SI:
1327 #endif
1328 		return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
1329 
1330 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1331 	case SLJIT_MOVU_UI:
1332 		return EMIT_MOV(SLJIT_MOV_UI, INT_DATA | WRITE_BACK, (sljit_ui));
1333 
1334 	case SLJIT_MOVU_SI:
1335 		return EMIT_MOV(SLJIT_MOV_SI, INT_DATA | SIGNED_DATA | WRITE_BACK, (sljit_si));
1336 #endif
1337 
1338 	case SLJIT_MOVU_UB:
1339 		return EMIT_MOV(SLJIT_MOV_UB, BYTE_DATA | WRITE_BACK, (sljit_ub));
1340 
1341 	case SLJIT_MOVU_SB:
1342 		return EMIT_MOV(SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA | WRITE_BACK, (sljit_sb));
1343 
1344 	case SLJIT_MOVU_UH:
1345 		return EMIT_MOV(SLJIT_MOV_UH, HALF_DATA | WRITE_BACK, (sljit_uh));
1346 
1347 	case SLJIT_MOVU_SH:
1348 		return EMIT_MOV(SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA | WRITE_BACK, (sljit_sh));
1349 
1350 	case SLJIT_NOT:
1351 		return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw);
1352 
1353 	case SLJIT_NEG:
1354 		return emit_op(compiler, SLJIT_NEG, flags, dst, dstw, TMP_REG1, 0, src, srcw);
1355 
1356 	case SLJIT_CLZ:
1357 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1358 		return emit_op(compiler, SLJIT_CLZ, flags | (!(op_flags & SLJIT_INT_OP) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw);
1359 #else
1360 		return emit_op(compiler, SLJIT_CLZ, flags, dst, dstw, TMP_REG1, 0, src, srcw);
1361 #endif
1362 	}
1363 
1364 	return SLJIT_SUCCESS;
1365 }
1366 
1367 #undef EMIT_MOV
1368 
1369 #define TEST_SL_IMM(src, srcw) \
1370 	(((src) & SLJIT_IMM) && (srcw) <= SIMM_MAX && (srcw) >= SIMM_MIN)
1371 
1372 #define TEST_UL_IMM(src, srcw) \
1373 	(((src) & SLJIT_IMM) && !((srcw) & ~0xffff))
1374 
1375 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1376 #define TEST_SH_IMM(src, srcw) \
1377 	(((src) & SLJIT_IMM) && !((srcw) & 0xffff) && (srcw) <= 0x7fffffffl && (srcw) >= -0x80000000l)
1378 #else
1379 #define TEST_SH_IMM(src, srcw) \
1380 	(((src) & SLJIT_IMM) && !((srcw) & 0xffff))
1381 #endif
1382 
1383 #define TEST_UH_IMM(src, srcw) \
1384 	(((src) & SLJIT_IMM) && !((srcw) & ~0xffff0000))
1385 
1386 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1387 #define TEST_ADD_IMM(src, srcw) \
1388 	(((src) & SLJIT_IMM) && (srcw) <= 0x7fff7fffl && (srcw) >= -0x80000000l)
1389 #else
1390 #define TEST_ADD_IMM(src, srcw) \
1391 	((src) & SLJIT_IMM)
1392 #endif
1393 
1394 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1395 #define TEST_UI_IMM(src, srcw) \
1396 	(((src) & SLJIT_IMM) && !((srcw) & ~0xffffffff))
1397 #else
1398 #define TEST_UI_IMM(src, srcw) \
1399 	((src) & SLJIT_IMM)
1400 #endif
1401 
1402 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
1403 	sljit_si dst, sljit_sw dstw,
1404 	sljit_si src1, sljit_sw src1w,
1405 	sljit_si src2, sljit_sw src2w)
1406 {
1407 	sljit_si flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0;
1408 
1409 	CHECK_ERROR();
1410 	check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
1411 	ADJUST_LOCAL_OFFSET(dst, dstw);
1412 	ADJUST_LOCAL_OFFSET(src1, src1w);
1413 	ADJUST_LOCAL_OFFSET(src2, src2w);
1414 
1415 	if ((src1 & SLJIT_IMM) && src1w == 0)
1416 		src1 = TMP_ZERO;
1417 	if ((src2 & SLJIT_IMM) && src2w == 0)
1418 		src2 = TMP_ZERO;
1419 
1420 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1421 	if (op & SLJIT_INT_OP) {
1422 		/* Most operations expect sign extended arguments. */
1423 		flags |= INT_DATA | SIGNED_DATA;
1424 		if (src1 & SLJIT_IMM)
1425 			src1w = (sljit_si)(src1w);
1426 		if (src2 & SLJIT_IMM)
1427 			src2w = (sljit_si)(src2w);
1428 		if (GET_FLAGS(op))
1429 			flags |= ALT_SIGN_EXT;
1430 	}
1431 #endif
1432 	if (op & SLJIT_SET_O)
1433 		FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
1434 	if (src2 == TMP_REG2)
1435 		flags |= ALT_KEEP_CACHE;
1436 
1437 	switch (GET_OPCODE(op)) {
1438 	case SLJIT_ADD:
1439 		if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
1440 			if (TEST_SL_IMM(src2, src2w)) {
1441 				compiler->imm = src2w & 0xffff;
1442 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1443 			}
1444 			if (TEST_SL_IMM(src1, src1w)) {
1445 				compiler->imm = src1w & 0xffff;
1446 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
1447 			}
1448 			if (TEST_SH_IMM(src2, src2w)) {
1449 				compiler->imm = (src2w >> 16) & 0xffff;
1450 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1451 			}
1452 			if (TEST_SH_IMM(src1, src1w)) {
1453 				compiler->imm = (src1w >> 16) & 0xffff;
1454 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
1455 			}
1456 			/* Range between -1 and -32768 is covered above. */
1457 			if (TEST_ADD_IMM(src2, src2w)) {
1458 				compiler->imm = src2w & 0xffffffff;
1459 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
1460 			}
1461 			if (TEST_ADD_IMM(src1, src1w)) {
1462 				compiler->imm = src1w & 0xffffffff;
1463 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0);
1464 			}
1465 		}
1466 		if (!(GET_FLAGS(op) & (SLJIT_SET_E | SLJIT_SET_O))) {
1467 			if (TEST_SL_IMM(src2, src2w)) {
1468 				compiler->imm = src2w & 0xffff;
1469 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1470 			}
1471 			if (TEST_SL_IMM(src1, src1w)) {
1472 				compiler->imm = src1w & 0xffff;
1473 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
1474 			}
1475 		}
1476 		return emit_op(compiler, SLJIT_ADD, flags, dst, dstw, src1, src1w, src2, src2w);
1477 
1478 	case SLJIT_ADDC:
1479 		return emit_op(compiler, SLJIT_ADDC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w);
1480 
1481 	case SLJIT_SUB:
1482 		if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
1483 			if (TEST_SL_IMM(src2, -src2w)) {
1484 				compiler->imm = (-src2w) & 0xffff;
1485 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1486 			}
1487 			if (TEST_SL_IMM(src1, src1w)) {
1488 				compiler->imm = src1w & 0xffff;
1489 				return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
1490 			}
1491 			if (TEST_SH_IMM(src2, -src2w)) {
1492 				compiler->imm = ((-src2w) >> 16) & 0xffff;
1493 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1494 			}
1495 			/* Range between -1 and -32768 is covered above. */
1496 			if (TEST_ADD_IMM(src2, -src2w)) {
1497 				compiler->imm = -src2w & 0xffffffff;
1498 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
1499 			}
1500 		}
1501 		if (dst == SLJIT_UNUSED && (op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S)) && !(op & (SLJIT_SET_O | SLJIT_SET_C))) {
1502 			if (!(op & SLJIT_SET_U)) {
1503 				/* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */
1504 				if (TEST_SL_IMM(src2, src2w)) {
1505 					compiler->imm = src2w & 0xffff;
1506 					return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1507 				}
1508 				if (GET_FLAGS(op) == SLJIT_SET_E && TEST_SL_IMM(src1, src1w)) {
1509 					compiler->imm = src1w & 0xffff;
1510 					return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
1511 				}
1512 			}
1513 			if (!(op & (SLJIT_SET_E | SLJIT_SET_S))) {
1514 				/* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */
1515 				if (TEST_UL_IMM(src2, src2w)) {
1516 					compiler->imm = src2w & 0xffff;
1517 					return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1518 				}
1519 				return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w);
1520 			}
1521 			if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= 0x7fff) {
1522 				compiler->imm = src2w;
1523 				return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1524 			}
1525 			return emit_op(compiler, SLJIT_SUB, flags | ((op & SLJIT_SET_U) ? ALT_FORM4 : 0) | ((op & (SLJIT_SET_E | SLJIT_SET_S)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w);
1526 		}
1527 		if (!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O))) {
1528 			if (TEST_SL_IMM(src2, -src2w)) {
1529 				compiler->imm = (-src2w) & 0xffff;
1530 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1531 			}
1532 		}
1533 		/* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */
1534 		return emit_op(compiler, SLJIT_SUB, flags | (!(op & SLJIT_SET_U) ? 0 : ALT_FORM6), dst, dstw, src1, src1w, src2, src2w);
1535 
1536 	case SLJIT_SUBC:
1537 		return emit_op(compiler, SLJIT_SUBC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w);
1538 
1539 	case SLJIT_MUL:
1540 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1541 		if (op & SLJIT_INT_OP)
1542 			flags |= ALT_FORM2;
1543 #endif
1544 		if (!GET_FLAGS(op)) {
1545 			if (TEST_SL_IMM(src2, src2w)) {
1546 				compiler->imm = src2w & 0xffff;
1547 				return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1548 			}
1549 			if (TEST_SL_IMM(src1, src1w)) {
1550 				compiler->imm = src1w & 0xffff;
1551 				return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
1552 			}
1553 		}
1554 		return emit_op(compiler, SLJIT_MUL, flags, dst, dstw, src1, src1w, src2, src2w);
1555 
1556 	case SLJIT_AND:
1557 	case SLJIT_OR:
1558 	case SLJIT_XOR:
1559 		/* Commutative unsigned operations. */
1560 		if (!GET_FLAGS(op) || GET_OPCODE(op) == SLJIT_AND) {
1561 			if (TEST_UL_IMM(src2, src2w)) {
1562 				compiler->imm = src2w;
1563 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1564 			}
1565 			if (TEST_UL_IMM(src1, src1w)) {
1566 				compiler->imm = src1w;
1567 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
1568 			}
1569 			if (TEST_UH_IMM(src2, src2w)) {
1570 				compiler->imm = (src2w >> 16) & 0xffff;
1571 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1572 			}
1573 			if (TEST_UH_IMM(src1, src1w)) {
1574 				compiler->imm = (src1w >> 16) & 0xffff;
1575 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
1576 			}
1577 		}
1578 		if (!GET_FLAGS(op) && GET_OPCODE(op) != SLJIT_AND) {
1579 			if (TEST_UI_IMM(src2, src2w)) {
1580 				compiler->imm = src2w;
1581 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1582 			}
1583 			if (TEST_UI_IMM(src1, src1w)) {
1584 				compiler->imm = src1w;
1585 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
1586 			}
1587 		}
1588 		return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
1589 
1590 	case SLJIT_ASHR:
1591 		if (op & SLJIT_KEEP_FLAGS)
1592 			flags |= ALT_FORM3;
1593 		/* Fall through. */
1594 	case SLJIT_SHL:
1595 	case SLJIT_LSHR:
1596 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1597 		if (op & SLJIT_INT_OP)
1598 			flags |= ALT_FORM2;
1599 #endif
1600 		if (src2 & SLJIT_IMM) {
1601 			compiler->imm = src2w;
1602 			return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1603 		}
1604 		return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
1605 	}
1606 
1607 	return SLJIT_SUCCESS;
1608 }
1609 
1610 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
1611 {
1612 	check_sljit_get_register_index(reg);
1613 	return reg_map[reg];
1614 }
1615 
1616 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
1617 {
1618 	check_sljit_get_float_register_index(reg);
1619 	return reg;
1620 }
1621 
1622 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
1623 	void *instruction, sljit_si size)
1624 {
1625 	CHECK_ERROR();
1626 	check_sljit_emit_op_custom(compiler, instruction, size);
1627 	SLJIT_ASSERT(size == 4);
1628 
1629 	return push_inst(compiler, *(sljit_ins*)instruction);
1630 }
1631 
1632 /* --------------------------------------------------------------------- */
1633 /*  Floating point operators                                             */
1634 /* --------------------------------------------------------------------- */
1635 
1636 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
1637 {
1638 #ifdef SLJIT_IS_FPU_AVAILABLE
1639 	return SLJIT_IS_FPU_AVAILABLE;
1640 #else
1641 	/* Available by default. */
1642 	return 1;
1643 #endif
1644 }
1645 
1646 #define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_SINGLE_OP) >> 6))
1647 #define SELECT_FOP(op, single, double) ((op & SLJIT_SINGLE_OP) ? single : double)
1648 
1649 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
1650 	sljit_si dst, sljit_sw dstw,
1651 	sljit_si src, sljit_sw srcw)
1652 {
1653 	sljit_si dst_fr;
1654 
1655 	CHECK_ERROR();
1656 	check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
1657 	SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error);
1658 
1659 	compiler->cache_arg = 0;
1660 	compiler->cache_argw = 0;
1661 
1662 	if (GET_OPCODE(op) == SLJIT_CMPD) {
1663 		if (dst & SLJIT_MEM) {
1664 			FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, dst, dstw, src, srcw));
1665 			dst = TMP_FREG1;
1666 		}
1667 
1668 		if (src & SLJIT_MEM) {
1669 			FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src, srcw, 0, 0));
1670 			src = TMP_FREG2;
1671 		}
1672 
1673 		return push_inst(compiler, FCMPU | CRD(4) | FA(dst) | FB(src));
1674 	}
1675 
1676 	dst_fr = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1677 
1678 	if (src & SLJIT_MEM) {
1679 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_fr, src, srcw, dst, dstw));
1680 		src = dst_fr;
1681 	}
1682 
1683 	switch (GET_OPCODE(op)) {
1684 		case SLJIT_MOVD:
1685 			if (src != dst_fr && dst_fr != TMP_FREG1)
1686 				FAIL_IF(push_inst(compiler, FMR | FD(dst_fr) | FB(src)));
1687 			break;
1688 		case SLJIT_NEGD:
1689 			FAIL_IF(push_inst(compiler, FNEG | FD(dst_fr) | FB(src)));
1690 			break;
1691 		case SLJIT_ABSD:
1692 			FAIL_IF(push_inst(compiler, FABS | FD(dst_fr) | FB(src)));
1693 			break;
1694 	}
1695 
1696 	if (dst_fr == TMP_FREG1) {
1697 		if (GET_OPCODE(op) == SLJIT_MOVD)
1698 			dst_fr = src;
1699 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_fr, dst, dstw, 0, 0));
1700 	}
1701 
1702 	return SLJIT_SUCCESS;
1703 }
1704 
1705 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
1706 	sljit_si dst, sljit_sw dstw,
1707 	sljit_si src1, sljit_sw src1w,
1708 	sljit_si src2, sljit_sw src2w)
1709 {
1710 	sljit_si dst_fr, flags = 0;
1711 
1712 	CHECK_ERROR();
1713 	check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
1714 
1715 	compiler->cache_arg = 0;
1716 	compiler->cache_argw = 0;
1717 
1718 	dst_fr = FAST_IS_REG(dst) ? dst : TMP_FREG2;
1719 
1720 	if (src1 & SLJIT_MEM) {
1721 		if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
1722 			FAIL_IF(compiler->error);
1723 			src1 = TMP_FREG1;
1724 		} else
1725 			flags |= ALT_FORM1;
1726 	}
1727 
1728 	if (src2 & SLJIT_MEM) {
1729 		if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) {
1730 			FAIL_IF(compiler->error);
1731 			src2 = TMP_FREG2;
1732 		} else
1733 			flags |= ALT_FORM2;
1734 	}
1735 
1736 	if ((flags & (ALT_FORM1 | ALT_FORM2)) == (ALT_FORM1 | ALT_FORM2)) {
1737 		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
1738 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
1739 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
1740 		}
1741 		else {
1742 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
1743 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
1744 		}
1745 	}
1746 	else if (flags & ALT_FORM1)
1747 		FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
1748 	else if (flags & ALT_FORM2)
1749 		FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
1750 
1751 	if (flags & ALT_FORM1)
1752 		src1 = TMP_FREG1;
1753 	if (flags & ALT_FORM2)
1754 		src2 = TMP_FREG2;
1755 
1756 	switch (GET_OPCODE(op)) {
1757 	case SLJIT_ADDD:
1758 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADD) | FD(dst_fr) | FA(src1) | FB(src2)));
1759 		break;
1760 
1761 	case SLJIT_SUBD:
1762 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUB) | FD(dst_fr) | FA(src1) | FB(src2)));
1763 		break;
1764 
1765 	case SLJIT_MULD:
1766 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMUL) | FD(dst_fr) | FA(src1) | FC(src2) /* FMUL use FC as src2 */));
1767 		break;
1768 
1769 	case SLJIT_DIVD:
1770 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIV) | FD(dst_fr) | FA(src1) | FB(src2)));
1771 		break;
1772 	}
1773 
1774 	if (dst_fr == TMP_FREG2)
1775 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
1776 
1777 	return SLJIT_SUCCESS;
1778 }
1779 
1780 #undef FLOAT_DATA
1781 #undef SELECT_FOP
1782 
1783 /* --------------------------------------------------------------------- */
1784 /*  Other instructions                                                   */
1785 /* --------------------------------------------------------------------- */
1786 
1787 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
1788 {
1789 	CHECK_ERROR();
1790 	check_sljit_emit_fast_enter(compiler, dst, dstw);
1791 	ADJUST_LOCAL_OFFSET(dst, dstw);
1792 
1793 	/* For UNUSED dst. Uncommon, but possible. */
1794 	if (dst == SLJIT_UNUSED)
1795 		return SLJIT_SUCCESS;
1796 
1797 	if (FAST_IS_REG(dst))
1798 		return push_inst(compiler, MFLR | D(dst));
1799 
1800 	/* Memory. */
1801 	FAIL_IF(push_inst(compiler, MFLR | D(TMP_REG2)));
1802 	return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
1803 }
1804 
1805 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
1806 {
1807 	CHECK_ERROR();
1808 	check_sljit_emit_fast_return(compiler, src, srcw);
1809 	ADJUST_LOCAL_OFFSET(src, srcw);
1810 
1811 	if (FAST_IS_REG(src))
1812 		FAIL_IF(push_inst(compiler, MTLR | S(src)));
1813 	else {
1814 		if (src & SLJIT_MEM)
1815 			FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
1816 		else if (src & SLJIT_IMM)
1817 			FAIL_IF(load_immediate(compiler, TMP_REG2, srcw));
1818 		FAIL_IF(push_inst(compiler, MTLR | S(TMP_REG2)));
1819 	}
1820 	return push_inst(compiler, BLR);
1821 }
1822 
1823 /* --------------------------------------------------------------------- */
1824 /*  Conditional instructions                                             */
1825 /* --------------------------------------------------------------------- */
1826 
1827 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
1828 {
1829 	struct sljit_label *label;
1830 
1831 	CHECK_ERROR_PTR();
1832 	check_sljit_emit_label(compiler);
1833 
1834 	if (compiler->last_label && compiler->last_label->size == compiler->size)
1835 		return compiler->last_label;
1836 
1837 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
1838 	PTR_FAIL_IF(!label);
1839 	set_label(label, compiler);
1840 	return label;
1841 }
1842 
1843 static sljit_ins get_bo_bi_flags(sljit_si type)
1844 {
1845 	switch (type) {
1846 	case SLJIT_C_EQUAL:
1847 		return (12 << 21) | (2 << 16);
1848 
1849 	case SLJIT_C_NOT_EQUAL:
1850 		return (4 << 21) | (2 << 16);
1851 
1852 	case SLJIT_C_LESS:
1853 	case SLJIT_C_FLOAT_LESS:
1854 		return (12 << 21) | ((4 + 0) << 16);
1855 
1856 	case SLJIT_C_GREATER_EQUAL:
1857 	case SLJIT_C_FLOAT_GREATER_EQUAL:
1858 		return (4 << 21) | ((4 + 0) << 16);
1859 
1860 	case SLJIT_C_GREATER:
1861 	case SLJIT_C_FLOAT_GREATER:
1862 		return (12 << 21) | ((4 + 1) << 16);
1863 
1864 	case SLJIT_C_LESS_EQUAL:
1865 	case SLJIT_C_FLOAT_LESS_EQUAL:
1866 		return (4 << 21) | ((4 + 1) << 16);
1867 
1868 	case SLJIT_C_SIG_LESS:
1869 		return (12 << 21) | (0 << 16);
1870 
1871 	case SLJIT_C_SIG_GREATER_EQUAL:
1872 		return (4 << 21) | (0 << 16);
1873 
1874 	case SLJIT_C_SIG_GREATER:
1875 		return (12 << 21) | (1 << 16);
1876 
1877 	case SLJIT_C_SIG_LESS_EQUAL:
1878 		return (4 << 21) | (1 << 16);
1879 
1880 	case SLJIT_C_OVERFLOW:
1881 	case SLJIT_C_MUL_OVERFLOW:
1882 		return (12 << 21) | (3 << 16);
1883 
1884 	case SLJIT_C_NOT_OVERFLOW:
1885 	case SLJIT_C_MUL_NOT_OVERFLOW:
1886 		return (4 << 21) | (3 << 16);
1887 
1888 	case SLJIT_C_FLOAT_EQUAL:
1889 		return (12 << 21) | ((4 + 2) << 16);
1890 
1891 	case SLJIT_C_FLOAT_NOT_EQUAL:
1892 		return (4 << 21) | ((4 + 2) << 16);
1893 
1894 	case SLJIT_C_FLOAT_UNORDERED:
1895 		return (12 << 21) | ((4 + 3) << 16);
1896 
1897 	case SLJIT_C_FLOAT_ORDERED:
1898 		return (4 << 21) | ((4 + 3) << 16);
1899 
1900 	default:
1901 		SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL3);
1902 		return (20 << 21);
1903 	}
1904 }
1905 
1906 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
1907 {
1908 	struct sljit_jump *jump;
1909 	sljit_ins bo_bi_flags;
1910 
1911 	CHECK_ERROR_PTR();
1912 	check_sljit_emit_jump(compiler, type);
1913 
1914 	bo_bi_flags = get_bo_bi_flags(type & 0xff);
1915 	if (!bo_bi_flags)
1916 		return NULL;
1917 
1918 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
1919 	PTR_FAIL_IF(!jump);
1920 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
1921 	type &= 0xff;
1922 
1923 	/* In PPC, we don't need to touch the arguments. */
1924 	if (type < SLJIT_JUMP)
1925 		jump->flags |= IS_COND;
1926 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
1927 	if (type >= SLJIT_CALL0)
1928 		jump->flags |= IS_CALL;
1929 #endif
1930 
1931 	PTR_FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
1932 	PTR_FAIL_IF(push_inst(compiler, MTCTR | S(TMP_CALL_REG)));
1933 	jump->addr = compiler->size;
1934 	PTR_FAIL_IF(push_inst(compiler, BCCTR | bo_bi_flags | (type >= SLJIT_FAST_CALL ? 1 : 0)));
1935 	return jump;
1936 }
1937 
1938 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
1939 {
1940 	struct sljit_jump *jump = NULL;
1941 	sljit_si src_r;
1942 
1943 	CHECK_ERROR();
1944 	check_sljit_emit_ijump(compiler, type, src, srcw);
1945 	ADJUST_LOCAL_OFFSET(src, srcw);
1946 
1947 	if (FAST_IS_REG(src)) {
1948 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
1949 		if (type >= SLJIT_CALL0) {
1950 			FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src)));
1951 			src_r = TMP_CALL_REG;
1952 		}
1953 		else
1954 			src_r = src;
1955 #else
1956 		src_r = src;
1957 #endif
1958 	} else if (src & SLJIT_IMM) {
1959 		jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
1960 		FAIL_IF(!jump);
1961 		set_jump(jump, compiler, JUMP_ADDR);
1962 		jump->u.target = srcw;
1963 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
1964 		if (type >= SLJIT_CALL0)
1965 			jump->flags |= IS_CALL;
1966 #endif
1967 		FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
1968 		src_r = TMP_CALL_REG;
1969 	}
1970 	else {
1971 		FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_CALL_REG, 0, TMP_REG1, 0, src, srcw));
1972 		src_r = TMP_CALL_REG;
1973 	}
1974 
1975 	FAIL_IF(push_inst(compiler, MTCTR | S(src_r)));
1976 	if (jump)
1977 		jump->addr = compiler->size;
1978 	return push_inst(compiler, BCCTR | (20 << 21) | (type >= SLJIT_FAST_CALL ? 1 : 0));
1979 }
1980 
1981 /* Get a bit from CR, all other bits are zeroed. */
1982 #define GET_CR_BIT(bit, dst) \
1983 	FAIL_IF(push_inst(compiler, MFCR | D(dst))); \
1984 	FAIL_IF(push_inst(compiler, RLWINM | S(dst) | A(dst) | ((1 + (bit)) << 11) | (31 << 6) | (31 << 1)));
1985 
1986 #define INVERT_BIT(dst) \
1987 	FAIL_IF(push_inst(compiler, XORI | S(dst) | A(dst) | 0x1));
1988 
1989 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
1990 	sljit_si dst, sljit_sw dstw,
1991 	sljit_si src, sljit_sw srcw,
1992 	sljit_si type)
1993 {
1994 	sljit_si reg, input_flags;
1995 	sljit_si flags = GET_ALL_FLAGS(op);
1996 	sljit_sw original_dstw = dstw;
1997 
1998 	CHECK_ERROR();
1999 	check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type);
2000 	ADJUST_LOCAL_OFFSET(dst, dstw);
2001 
2002 	if (dst == SLJIT_UNUSED)
2003 		return SLJIT_SUCCESS;
2004 
2005 	op = GET_OPCODE(op);
2006 	reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
2007 
2008 	compiler->cache_arg = 0;
2009 	compiler->cache_argw = 0;
2010 	if (op >= SLJIT_ADD && (src & SLJIT_MEM)) {
2011 		ADJUST_LOCAL_OFFSET(src, srcw);
2012 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
2013 		input_flags = (flags & SLJIT_INT_OP) ? INT_DATA : WORD_DATA;
2014 #else
2015 		input_flags = WORD_DATA;
2016 #endif
2017 		FAIL_IF(emit_op_mem2(compiler, input_flags | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
2018 		src = TMP_REG1;
2019 		srcw = 0;
2020 	}
2021 
2022 	switch (type) {
2023 	case SLJIT_C_EQUAL:
2024 		GET_CR_BIT(2, reg);
2025 		break;
2026 
2027 	case SLJIT_C_NOT_EQUAL:
2028 		GET_CR_BIT(2, reg);
2029 		INVERT_BIT(reg);
2030 		break;
2031 
2032 	case SLJIT_C_LESS:
2033 	case SLJIT_C_FLOAT_LESS:
2034 		GET_CR_BIT(4 + 0, reg);
2035 		break;
2036 
2037 	case SLJIT_C_GREATER_EQUAL:
2038 	case SLJIT_C_FLOAT_GREATER_EQUAL:
2039 		GET_CR_BIT(4 + 0, reg);
2040 		INVERT_BIT(reg);
2041 		break;
2042 
2043 	case SLJIT_C_GREATER:
2044 	case SLJIT_C_FLOAT_GREATER:
2045 		GET_CR_BIT(4 + 1, reg);
2046 		break;
2047 
2048 	case SLJIT_C_LESS_EQUAL:
2049 	case SLJIT_C_FLOAT_LESS_EQUAL:
2050 		GET_CR_BIT(4 + 1, reg);
2051 		INVERT_BIT(reg);
2052 		break;
2053 
2054 	case SLJIT_C_SIG_LESS:
2055 		GET_CR_BIT(0, reg);
2056 		break;
2057 
2058 	case SLJIT_C_SIG_GREATER_EQUAL:
2059 		GET_CR_BIT(0, reg);
2060 		INVERT_BIT(reg);
2061 		break;
2062 
2063 	case SLJIT_C_SIG_GREATER:
2064 		GET_CR_BIT(1, reg);
2065 		break;
2066 
2067 	case SLJIT_C_SIG_LESS_EQUAL:
2068 		GET_CR_BIT(1, reg);
2069 		INVERT_BIT(reg);
2070 		break;
2071 
2072 	case SLJIT_C_OVERFLOW:
2073 	case SLJIT_C_MUL_OVERFLOW:
2074 		GET_CR_BIT(3, reg);
2075 		break;
2076 
2077 	case SLJIT_C_NOT_OVERFLOW:
2078 	case SLJIT_C_MUL_NOT_OVERFLOW:
2079 		GET_CR_BIT(3, reg);
2080 		INVERT_BIT(reg);
2081 		break;
2082 
2083 	case SLJIT_C_FLOAT_EQUAL:
2084 		GET_CR_BIT(4 + 2, reg);
2085 		break;
2086 
2087 	case SLJIT_C_FLOAT_NOT_EQUAL:
2088 		GET_CR_BIT(4 + 2, reg);
2089 		INVERT_BIT(reg);
2090 		break;
2091 
2092 	case SLJIT_C_FLOAT_UNORDERED:
2093 		GET_CR_BIT(4 + 3, reg);
2094 		break;
2095 
2096 	case SLJIT_C_FLOAT_ORDERED:
2097 		GET_CR_BIT(4 + 3, reg);
2098 		INVERT_BIT(reg);
2099 		break;
2100 
2101 	default:
2102 		SLJIT_ASSERT_STOP();
2103 		break;
2104 	}
2105 
2106 	if (op < SLJIT_ADD) {
2107 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
2108 		if (op == SLJIT_MOV)
2109 			input_flags = WORD_DATA;
2110 		else {
2111 			op = SLJIT_MOV_UI;
2112 			input_flags = INT_DATA;
2113 		}
2114 #else
2115 		op = SLJIT_MOV;
2116 		input_flags = WORD_DATA;
2117 #endif
2118 		if (reg != TMP_REG2)
2119 			return SLJIT_SUCCESS;
2120 		return emit_op(compiler, op, input_flags, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
2121 	}
2122 
2123 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2124 	compiler->skip_checks = 1;
2125 #endif
2126 	return sljit_emit_op2(compiler, op | flags, dst, original_dstw, src, srcw, TMP_REG2, 0);
2127 }
2128 
2129 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
2130 {
2131 	struct sljit_const *const_;
2132 	sljit_si reg;
2133 
2134 	CHECK_ERROR_PTR();
2135 	check_sljit_emit_const(compiler, dst, dstw, init_value);
2136 	ADJUST_LOCAL_OFFSET(dst, dstw);
2137 
2138 	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2139 	PTR_FAIL_IF(!const_);
2140 	set_const(const_, compiler);
2141 
2142 	reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
2143 
2144 	PTR_FAIL_IF(emit_const(compiler, reg, init_value));
2145 
2146 	if (dst & SLJIT_MEM)
2147 		PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
2148 	return const_;
2149 }
2150