xref: /netbsd-src/sys/external/bsd/sljit/dist/sljit_src/sljitNativeSPARC_common.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /*	$NetBSD: sljitNativeSPARC_common.c,v 1.3 2016/05/29 17:09:33 alnsn Exp $	*/
2 
3 /*
4  *    Stack-less Just-In-Time compiler
5  *
6  *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without modification, are
9  * permitted provided that the following conditions are met:
10  *
11  *   1. Redistributions of source code must retain the above copyright notice, this list of
12  *      conditions and the following disclaimer.
13  *
14  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
15  *      of conditions and the following disclaimer in the documentation and/or other materials
16  *      provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
19  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
21  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
23  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
30 {
31 	return "SPARC" SLJIT_CPUINFO;
32 }
33 
34 /* Length of an instruction word
35    Both for sparc-32 and sparc-64 */
36 typedef sljit_u32 sljit_ins;
37 
38 #if (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL)
39 
40 static void sparc_cache_flush(sljit_ins *from, sljit_ins *to)
41 {
42 #if defined(__SUNPRO_C) && __SUNPRO_C < 0x590
43 	__asm (
44 		/* if (from == to) return */
45 		"cmp %i0, %i1\n"
46 		"be .leave\n"
47 		"nop\n"
48 
49 		/* loop until from >= to */
50 		".mainloop:\n"
51 		"flush %i0\n"
52 		"add %i0, 8, %i0\n"
53 		"cmp %i0, %i1\n"
54 		"bcs .mainloop\n"
55 		"nop\n"
56 
57 		/* The comparison was done above. */
58 		"bne .leave\n"
59 		/* nop is not necessary here, since the
60 		   sub operation has no side effect. */
61 		"sub %i0, 4, %i0\n"
62 		"flush %i0\n"
63 		".leave:"
64 	);
65 #else
66 	if (SLJIT_UNLIKELY(from == to))
67 		return;
68 
69 	do {
70 		__asm__ volatile (
71 			"flush %0\n"
72 			: : "r"(from)
73 		);
74 		/* Operates at least on doubleword. */
75 		from += 2;
76 	} while (from < to);
77 
78 	if (from == to) {
79 		/* Flush the last word. */
80 		from --;
81 		__asm__ volatile (
82 			"flush %0\n"
83 			: : "r"(from)
84 		);
85 	}
86 #endif
87 }
88 
89 #endif /* (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL) */
90 
91 /* TMP_REG2 is not used by getput_arg */
92 #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
93 #define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
94 #define TMP_REG3	(SLJIT_NUMBER_OF_REGISTERS + 4)
95 #define TMP_LINK	(SLJIT_NUMBER_OF_REGISTERS + 5)
96 
97 #define TMP_FREG1	(0)
98 #define TMP_FREG2	((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) << 1)
99 
100 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = {
101 	0, 8, 9, 10, 13, 29, 28, 27, 23, 22, 21, 20, 19, 18, 17, 16, 26, 25, 24, 14, 1, 11, 12, 15
102 };
103 
104 /* --------------------------------------------------------------------- */
105 /*  Instrucion forms                                                     */
106 /* --------------------------------------------------------------------- */
107 
108 #define D(d)		(reg_map[d] << 25)
109 #define DA(d)		((d) << 25)
110 #define S1(s1)		(reg_map[s1] << 14)
111 #define S2(s2)		(reg_map[s2])
112 #define S1A(s1)		((s1) << 14)
113 #define S2A(s2)		(s2)
114 #define IMM_ARG		0x2000
115 #define DOP(op)		((op) << 5)
116 #define IMM(imm)	(((imm) & 0x1fff) | IMM_ARG)
117 
118 #define DR(dr)		(reg_map[dr])
119 #define OPC1(opcode)	((opcode) << 30)
120 #define OPC2(opcode)	((opcode) << 22)
121 #define OPC3(opcode)	((opcode) << 19)
122 #define SET_FLAGS	OPC3(0x10)
123 
124 #define ADD		(OPC1(0x2) | OPC3(0x00))
125 #define ADDC		(OPC1(0x2) | OPC3(0x08))
126 #define AND		(OPC1(0x2) | OPC3(0x01))
127 #define ANDN		(OPC1(0x2) | OPC3(0x05))
128 #define CALL		(OPC1(0x1))
129 #define FABSS		(OPC1(0x2) | OPC3(0x34) | DOP(0x09))
130 #define FADDD		(OPC1(0x2) | OPC3(0x34) | DOP(0x42))
131 #define FADDS		(OPC1(0x2) | OPC3(0x34) | DOP(0x41))
132 #define FCMPD		(OPC1(0x2) | OPC3(0x35) | DOP(0x52))
133 #define FCMPS		(OPC1(0x2) | OPC3(0x35) | DOP(0x51))
134 #define FDIVD		(OPC1(0x2) | OPC3(0x34) | DOP(0x4e))
135 #define FDIVS		(OPC1(0x2) | OPC3(0x34) | DOP(0x4d))
136 #define FDTOI		(OPC1(0x2) | OPC3(0x34) | DOP(0xd2))
137 #define FDTOS		(OPC1(0x2) | OPC3(0x34) | DOP(0xc6))
138 #define FITOD		(OPC1(0x2) | OPC3(0x34) | DOP(0xc8))
139 #define FITOS		(OPC1(0x2) | OPC3(0x34) | DOP(0xc4))
140 #define FMOVS		(OPC1(0x2) | OPC3(0x34) | DOP(0x01))
141 #define FMULD		(OPC1(0x2) | OPC3(0x34) | DOP(0x4a))
142 #define FMULS		(OPC1(0x2) | OPC3(0x34) | DOP(0x49))
143 #define FNEGS		(OPC1(0x2) | OPC3(0x34) | DOP(0x05))
144 #define FSTOD		(OPC1(0x2) | OPC3(0x34) | DOP(0xc9))
145 #define FSTOI		(OPC1(0x2) | OPC3(0x34) | DOP(0xd1))
146 #define FSUBD		(OPC1(0x2) | OPC3(0x34) | DOP(0x46))
147 #define FSUBS		(OPC1(0x2) | OPC3(0x34) | DOP(0x45))
148 #define JMPL		(OPC1(0x2) | OPC3(0x38))
149 #define NOP		(OPC1(0x0) | OPC2(0x04))
150 #define OR		(OPC1(0x2) | OPC3(0x02))
151 #define ORN		(OPC1(0x2) | OPC3(0x06))
152 #define RDY		(OPC1(0x2) | OPC3(0x28) | S1A(0))
153 #define RESTORE		(OPC1(0x2) | OPC3(0x3d))
154 #define SAVE		(OPC1(0x2) | OPC3(0x3c))
155 #define SETHI		(OPC1(0x0) | OPC2(0x04))
156 #define SLL		(OPC1(0x2) | OPC3(0x25))
157 #define SLLX		(OPC1(0x2) | OPC3(0x25) | (1 << 12))
158 #define SRA		(OPC1(0x2) | OPC3(0x27))
159 #define SRAX		(OPC1(0x2) | OPC3(0x27) | (1 << 12))
160 #define SRL		(OPC1(0x2) | OPC3(0x26))
161 #define SRLX		(OPC1(0x2) | OPC3(0x26) | (1 << 12))
162 #define SUB		(OPC1(0x2) | OPC3(0x04))
163 #define SUBC		(OPC1(0x2) | OPC3(0x0c))
164 #define TA		(OPC1(0x2) | OPC3(0x3a) | (8 << 25))
165 #define WRY		(OPC1(0x2) | OPC3(0x30) | DA(0))
166 #define XOR		(OPC1(0x2) | OPC3(0x03))
167 #define XNOR		(OPC1(0x2) | OPC3(0x07))
168 
169 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
170 #define MAX_DISP	(0x1fffff)
171 #define MIN_DISP	(-0x200000)
172 #define DISP_MASK	(0x3fffff)
173 
174 #define BICC		(OPC1(0x0) | OPC2(0x2))
175 #define FBFCC		(OPC1(0x0) | OPC2(0x6))
176 #define SLL_W		SLL
177 #define SDIV		(OPC1(0x2) | OPC3(0x0f))
178 #define SMUL		(OPC1(0x2) | OPC3(0x0b))
179 #define UDIV		(OPC1(0x2) | OPC3(0x0e))
180 #define UMUL		(OPC1(0x2) | OPC3(0x0a))
181 #else
182 #define SLL_W		SLLX
183 #endif
184 
185 #define SIMM_MAX	(0x0fff)
186 #define SIMM_MIN	(-0x1000)
187 
188 /* dest_reg is the absolute name of the register
189    Useful for reordering instructions in the delay slot. */
190 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit_s32 delay_slot)
191 {
192 	sljit_ins *ptr;
193 	SLJIT_ASSERT((delay_slot & DST_INS_MASK) == UNMOVABLE_INS
194 		|| (delay_slot & DST_INS_MASK) == MOVABLE_INS
195 		|| (delay_slot & DST_INS_MASK) == ((ins >> 25) & 0x1f));
196 	ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
197 	FAIL_IF(!ptr);
198 	*ptr = ins;
199 	compiler->size++;
200 	compiler->delay_slot = delay_slot;
201 	return SLJIT_SUCCESS;
202 }
203 
204 static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
205 {
206 	sljit_sw diff;
207 	sljit_uw target_addr;
208 	sljit_ins *inst;
209 	sljit_ins saved_inst;
210 
211 	if (jump->flags & SLJIT_REWRITABLE_JUMP)
212 		return code_ptr;
213 
214 	if (jump->flags & JUMP_ADDR)
215 		target_addr = jump->u.target;
216 	else {
217 		SLJIT_ASSERT(jump->flags & JUMP_LABEL);
218 		target_addr = (sljit_uw)(code + jump->u.label->size);
219 	}
220 	inst = (sljit_ins*)jump->addr;
221 
222 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
223 	if (jump->flags & IS_CALL) {
224 		/* Call is always patchable on sparc 32. */
225 		jump->flags |= PATCH_CALL;
226 		if (jump->flags & IS_MOVABLE) {
227 			inst[0] = inst[-1];
228 			inst[-1] = CALL;
229 			jump->addr -= sizeof(sljit_ins);
230 			return inst;
231 		}
232 		inst[0] = CALL;
233 		inst[1] = NOP;
234 		return inst + 1;
235 	}
236 #else
237 	/* Both calls and BPr instructions shall not pass this point. */
238 #error "Implementation required"
239 #endif
240 
241 	if (jump->flags & IS_COND)
242 		inst--;
243 
244 	if (jump->flags & IS_MOVABLE) {
245 		diff = ((sljit_sw)target_addr - (sljit_sw)(inst - 1)) >> 2;
246 		if (diff <= MAX_DISP && diff >= MIN_DISP) {
247 			jump->flags |= PATCH_B;
248 			inst--;
249 			if (jump->flags & IS_COND) {
250 				saved_inst = inst[0];
251 				inst[0] = inst[1] ^ (1 << 28);
252 				inst[1] = saved_inst;
253 			} else {
254 				inst[1] = inst[0];
255 				inst[0] = BICC | DA(0x8);
256 			}
257 			jump->addr = (sljit_uw)inst;
258 			return inst + 1;
259 		}
260 	}
261 
262 	diff = ((sljit_sw)target_addr - (sljit_sw)(inst)) >> 2;
263 	if (diff <= MAX_DISP && diff >= MIN_DISP) {
264 		jump->flags |= PATCH_B;
265 		if (jump->flags & IS_COND)
266 			inst[0] ^= (1 << 28);
267 		else
268 			inst[0] = BICC | DA(0x8);
269 		inst[1] = NOP;
270 		jump->addr = (sljit_uw)inst;
271 		return inst + 1;
272 	}
273 
274 	return code_ptr;
275 }
276 
277 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
278 {
279 	struct sljit_memory_fragment *buf;
280 	sljit_ins *code;
281 	sljit_ins *code_ptr;
282 	sljit_ins *buf_ptr;
283 	sljit_ins *buf_end;
284 	sljit_uw word_count;
285 	sljit_uw addr;
286 
287 	struct sljit_label *label;
288 	struct sljit_jump *jump;
289 	struct sljit_const *const_;
290 
291 	CHECK_ERROR_PTR();
292 	CHECK_PTR(check_sljit_generate_code(compiler));
293 	reverse_buf(compiler);
294 
295 	code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
296 	PTR_FAIL_WITH_EXEC_IF(code);
297 	buf = compiler->buf;
298 
299 	code_ptr = code;
300 	word_count = 0;
301 	label = compiler->labels;
302 	jump = compiler->jumps;
303 	const_ = compiler->consts;
304 	do {
305 		buf_ptr = (sljit_ins*)buf->memory;
306 		buf_end = buf_ptr + (buf->used_size >> 2);
307 		do {
308 			*code_ptr = *buf_ptr++;
309 			SLJIT_ASSERT(!label || label->size >= word_count);
310 			SLJIT_ASSERT(!jump || jump->addr >= word_count);
311 			SLJIT_ASSERT(!const_ || const_->addr >= word_count);
312 			/* These structures are ordered by their address. */
313 			if (label && label->size == word_count) {
314 				/* Just recording the address. */
315 				label->addr = (sljit_uw)code_ptr;
316 				label->size = code_ptr - code;
317 				label = label->next;
318 			}
319 			if (jump && jump->addr == word_count) {
320 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
321 				jump->addr = (sljit_uw)(code_ptr - 3);
322 #else
323 				jump->addr = (sljit_uw)(code_ptr - 6);
324 #endif
325 				code_ptr = detect_jump_type(jump, code_ptr, code);
326 				jump = jump->next;
327 			}
328 			if (const_ && const_->addr == word_count) {
329 				/* Just recording the address. */
330 				const_->addr = (sljit_uw)code_ptr;
331 				const_ = const_->next;
332 			}
333 			code_ptr ++;
334 			word_count ++;
335 		} while (buf_ptr < buf_end);
336 
337 		buf = buf->next;
338 	} while (buf);
339 
340 	if (label && label->size == word_count) {
341 		label->addr = (sljit_uw)code_ptr;
342 		label->size = code_ptr - code;
343 		label = label->next;
344 	}
345 
346 	SLJIT_ASSERT(!label);
347 	SLJIT_ASSERT(!jump);
348 	SLJIT_ASSERT(!const_);
349 	SLJIT_ASSERT(code_ptr - code <= (sljit_s32)compiler->size);
350 
351 	jump = compiler->jumps;
352 	while (jump) {
353 		do {
354 			addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
355 			buf_ptr = (sljit_ins*)jump->addr;
356 
357 			if (jump->flags & PATCH_CALL) {
358 				addr = (sljit_sw)(addr - jump->addr) >> 2;
359 				SLJIT_ASSERT((sljit_sw)addr <= 0x1fffffff && (sljit_sw)addr >= -0x20000000);
360 				buf_ptr[0] = CALL | (addr & 0x3fffffff);
361 				break;
362 			}
363 			if (jump->flags & PATCH_B) {
364 				addr = (sljit_sw)(addr - jump->addr) >> 2;
365 				SLJIT_ASSERT((sljit_sw)addr <= MAX_DISP && (sljit_sw)addr >= MIN_DISP);
366 				buf_ptr[0] = (buf_ptr[0] & ~DISP_MASK) | (addr & DISP_MASK);
367 				break;
368 			}
369 
370 			/* Set the fields of immediate loads. */
371 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
372 			buf_ptr[0] = (buf_ptr[0] & 0xffc00000) | ((addr >> 10) & 0x3fffff);
373 			buf_ptr[1] = (buf_ptr[1] & 0xfffffc00) | (addr & 0x3ff);
374 #else
375 #error "Implementation required"
376 #endif
377 		} while (0);
378 		jump = jump->next;
379 	}
380 
381 
382 	compiler->error = SLJIT_ERR_COMPILED;
383 	compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
384 	SLJIT_CACHE_FLUSH(code, code_ptr);
385 	return code;
386 }
387 
388 /* --------------------------------------------------------------------- */
389 /*  Entry, exit                                                          */
390 /* --------------------------------------------------------------------- */
391 
392 /* Creates an index in data_transfer_insts array. */
393 #define LOAD_DATA	0x01
394 #define WORD_DATA	0x00
395 #define BYTE_DATA	0x02
396 #define HALF_DATA	0x04
397 #define INT_DATA	0x06
398 #define SIGNED_DATA	0x08
399 /* Separates integer and floating point registers */
400 #define GPR_REG		0x0f
401 #define DOUBLE_DATA	0x10
402 #define SINGLE_DATA	0x12
403 
404 #define MEM_MASK	0x1f
405 
406 #define WRITE_BACK	0x00020
407 #define ARG_TEST	0x00040
408 #define ALT_KEEP_CACHE	0x00080
409 #define CUMULATIVE_OP	0x00100
410 #define IMM_OP		0x00200
411 #define SRC2_IMM	0x00400
412 
413 #define REG_DEST	0x00800
414 #define REG2_SOURCE	0x01000
415 #define SLOW_SRC1	0x02000
416 #define SLOW_SRC2	0x04000
417 #define SLOW_DEST	0x08000
418 
419 /* SET_FLAGS (0x10 << 19) also belong here! */
420 
421 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
422 #include "sljitNativeSPARC_32.c"
423 #else
424 #include "sljitNativeSPARC_64.c"
425 #endif
426 
427 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
428 	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
429 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
430 {
431 	CHECK_ERROR();
432 	CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
433 	set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
434 
435 	local_size = (local_size + SLJIT_LOCALS_OFFSET + 7) & ~0x7;
436 	compiler->local_size = local_size;
437 
438 	if (local_size <= SIMM_MAX) {
439 		FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_SP) | S1(SLJIT_SP) | IMM(-local_size), UNMOVABLE_INS));
440 	}
441 	else {
442 		FAIL_IF(load_immediate(compiler, TMP_REG1, -local_size));
443 		FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_SP) | S1(SLJIT_SP) | S2(TMP_REG1), UNMOVABLE_INS));
444 	}
445 
446 	/* Arguments are in their appropriate registers. */
447 
448 	return SLJIT_SUCCESS;
449 }
450 
451 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
452 	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
453 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
454 {
455 	CHECK_ERROR();
456 	CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
457 	set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
458 
459 	compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 7) & ~0x7;
460 	return SLJIT_SUCCESS;
461 }
462 
463 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
464 {
465 	CHECK_ERROR();
466 	CHECK(check_sljit_emit_return(compiler, op, src, srcw));
467 
468 	if (op != SLJIT_MOV || !FAST_IS_REG(src)) {
469 		FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
470 		src = SLJIT_R0;
471 	}
472 
473 	FAIL_IF(push_inst(compiler, JMPL | D(0) | S1A(31) | IMM(8), UNMOVABLE_INS));
474 	return push_inst(compiler, RESTORE | D(SLJIT_R0) | S1(src) | S2(0), UNMOVABLE_INS);
475 }
476 
477 /* --------------------------------------------------------------------- */
478 /*  Operators                                                            */
479 /* --------------------------------------------------------------------- */
480 
481 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
482 #define ARCH_32_64(a, b)	a
483 #else
484 #define ARCH_32_64(a, b)	b
485 #endif
486 
487 static const sljit_ins data_transfer_insts[16 + 4] = {
488 /* u w s */ ARCH_32_64(OPC1(3) | OPC3(0x04) /* stw */, OPC1(3) | OPC3(0x0e) /* stx */),
489 /* u w l */ ARCH_32_64(OPC1(3) | OPC3(0x00) /* lduw */, OPC1(3) | OPC3(0x0b) /* ldx */),
490 /* u b s */ OPC1(3) | OPC3(0x05) /* stb */,
491 /* u b l */ OPC1(3) | OPC3(0x01) /* ldub */,
492 /* u h s */ OPC1(3) | OPC3(0x06) /* sth */,
493 /* u h l */ OPC1(3) | OPC3(0x02) /* lduh */,
494 /* u i s */ OPC1(3) | OPC3(0x04) /* stw */,
495 /* u i l */ OPC1(3) | OPC3(0x00) /* lduw */,
496 
497 /* s w s */ ARCH_32_64(OPC1(3) | OPC3(0x04) /* stw */, OPC1(3) | OPC3(0x0e) /* stx */),
498 /* s w l */ ARCH_32_64(OPC1(3) | OPC3(0x00) /* lduw */, OPC1(3) | OPC3(0x0b) /* ldx */),
499 /* s b s */ OPC1(3) | OPC3(0x05) /* stb */,
500 /* s b l */ OPC1(3) | OPC3(0x09) /* ldsb */,
501 /* s h s */ OPC1(3) | OPC3(0x06) /* sth */,
502 /* s h l */ OPC1(3) | OPC3(0x0a) /* ldsh */,
503 /* s i s */ OPC1(3) | OPC3(0x04) /* stw */,
504 /* s i l */ ARCH_32_64(OPC1(3) | OPC3(0x00) /* lduw */, OPC1(3) | OPC3(0x08) /* ldsw */),
505 
506 /* d   s */ OPC1(3) | OPC3(0x27),
507 /* d   l */ OPC1(3) | OPC3(0x23),
508 /* s   s */ OPC1(3) | OPC3(0x24),
509 /* s   l */ OPC1(3) | OPC3(0x20),
510 };
511 
512 #undef ARCH_32_64
513 
514 /* Can perform an operation using at most 1 instruction. */
515 static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
516 {
517 	SLJIT_ASSERT(arg & SLJIT_MEM);
518 
519 	if (!(flags & WRITE_BACK) || !(arg & REG_MASK)) {
520 		if ((!(arg & OFFS_REG_MASK) && argw <= SIMM_MAX && argw >= SIMM_MIN)
521 				|| ((arg & OFFS_REG_MASK) && (argw & 0x3) == 0)) {
522 			/* Works for both absoulte and relative addresses (immediate case). */
523 			if (SLJIT_UNLIKELY(flags & ARG_TEST))
524 				return 1;
525 			FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK]
526 				| ((flags & MEM_MASK) <= GPR_REG ? D(reg) : DA(reg))
527 				| S1(arg & REG_MASK) | ((arg & OFFS_REG_MASK) ? S2(OFFS_REG(arg)) : IMM(argw)),
528 				((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? DR(reg) : MOVABLE_INS));
529 			return -1;
530 		}
531 	}
532 	return 0;
533 }
534 
535 /* See getput_arg below.
536    Note: can_cache is called only for binary operators. Those
537    operators always uses word arguments without write back. */
538 static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
539 {
540 	SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
541 
542 	/* Simple operation except for updates. */
543 	if (arg & OFFS_REG_MASK) {
544 		argw &= 0x3;
545 		SLJIT_ASSERT(argw);
546 		next_argw &= 0x3;
547 		if ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && argw == next_argw)
548 			return 1;
549 		return 0;
550 	}
551 
552 	if (((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN))
553 		return 1;
554 	return 0;
555 }
556 
557 /* Emit the necessary instructions. See can_cache above. */
558 static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
559 {
560 	sljit_s32 base, arg2, delay_slot;
561 	sljit_ins dest;
562 
563 	SLJIT_ASSERT(arg & SLJIT_MEM);
564 	if (!(next_arg & SLJIT_MEM)) {
565 		next_arg = 0;
566 		next_argw = 0;
567 	}
568 
569 	base = arg & REG_MASK;
570 	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
571 		argw &= 0x3;
572 		SLJIT_ASSERT(argw != 0);
573 
574 		/* Using the cache. */
575 		if (((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) && (argw == compiler->cache_argw))
576 			arg2 = TMP_REG3;
577 		else {
578 			if ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && argw == (next_argw & 0x3)) {
579 				compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK);
580 				compiler->cache_argw = argw;
581 				arg2 = TMP_REG3;
582 			}
583 			else if ((flags & LOAD_DATA) && ((flags & MEM_MASK) <= GPR_REG) && reg != base && reg != OFFS_REG(arg))
584 				arg2 = reg;
585 			else /* It must be a mov operation, so tmp1 must be free to use. */
586 				arg2 = TMP_REG1;
587 			FAIL_IF(push_inst(compiler, SLL_W | D(arg2) | S1(OFFS_REG(arg)) | IMM_ARG | argw, DR(arg2)));
588 		}
589 	}
590 	else {
591 		/* Using the cache. */
592 		if ((compiler->cache_arg == SLJIT_MEM) && (argw - compiler->cache_argw) <= SIMM_MAX && (argw - compiler->cache_argw) >= SIMM_MIN) {
593 			if (argw != compiler->cache_argw) {
594 				FAIL_IF(push_inst(compiler, ADD | D(TMP_REG3) | S1(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3)));
595 				compiler->cache_argw = argw;
596 			}
597 			arg2 = TMP_REG3;
598 		} else {
599 			if ((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN) {
600 				compiler->cache_arg = SLJIT_MEM;
601 				compiler->cache_argw = argw;
602 				arg2 = TMP_REG3;
603 			}
604 			else if ((flags & LOAD_DATA) && ((flags & MEM_MASK) <= GPR_REG) && reg != base)
605 				arg2 = reg;
606 			else /* It must be a mov operation, so tmp1 must be free to use. */
607 				arg2 = TMP_REG1;
608 			FAIL_IF(load_immediate(compiler, arg2, argw));
609 		}
610 	}
611 
612 	dest = ((flags & MEM_MASK) <= GPR_REG ? D(reg) : DA(reg));
613 	delay_slot = ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? DR(reg) : MOVABLE_INS;
614 	if (!base)
615 		return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(arg2) | IMM(0), delay_slot);
616 	if (!(flags & WRITE_BACK))
617 		return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(base) | S2(arg2), delay_slot);
618 	FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(base) | S2(arg2), delay_slot));
619 	return push_inst(compiler, ADD | D(base) | S1(base) | S2(arg2), DR(base));
620 }
621 
622 static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
623 {
624 	if (getput_arg_fast(compiler, flags, reg, arg, argw))
625 		return compiler->error;
626 	compiler->cache_arg = 0;
627 	compiler->cache_argw = 0;
628 	return getput_arg(compiler, flags, reg, arg, argw, 0, 0);
629 }
630 
631 static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
632 {
633 	if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
634 		return compiler->error;
635 	return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
636 }
637 
638 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
639 	sljit_s32 dst, sljit_sw dstw,
640 	sljit_s32 src1, sljit_sw src1w,
641 	sljit_s32 src2, sljit_sw src2w)
642 {
643 	/* arg1 goes to TMP_REG1 or src reg
644 	   arg2 goes to TMP_REG2, imm or src reg
645 	   TMP_REG3 can be used for caching
646 	   result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
647 	sljit_s32 dst_r = TMP_REG2;
648 	sljit_s32 src1_r;
649 	sljit_sw src2_r = 0;
650 	sljit_s32 sugg_src2_r = TMP_REG2;
651 
652 	if (!(flags & ALT_KEEP_CACHE)) {
653 		compiler->cache_arg = 0;
654 		compiler->cache_argw = 0;
655 	}
656 
657 	if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
658 		if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32 && !(src2 & SLJIT_MEM))
659 			return SLJIT_SUCCESS;
660 	}
661 	else if (FAST_IS_REG(dst)) {
662 		dst_r = dst;
663 		flags |= REG_DEST;
664 		if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
665 			sugg_src2_r = dst_r;
666 	}
667 	else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw))
668 		flags |= SLOW_DEST;
669 
670 	if (flags & IMM_OP) {
671 		if ((src2 & SLJIT_IMM) && src2w) {
672 			if (src2w <= SIMM_MAX && src2w >= SIMM_MIN) {
673 				flags |= SRC2_IMM;
674 				src2_r = src2w;
675 			}
676 		}
677 		if (!(flags & SRC2_IMM) && (flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w) {
678 			if (src1w <= SIMM_MAX && src1w >= SIMM_MIN) {
679 				flags |= SRC2_IMM;
680 				src2_r = src1w;
681 
682 				/* And swap arguments. */
683 				src1 = src2;
684 				src1w = src2w;
685 				src2 = SLJIT_IMM;
686 				/* src2w = src2_r unneeded. */
687 			}
688 		}
689 	}
690 
691 	/* Source 1. */
692 	if (FAST_IS_REG(src1))
693 		src1_r = src1;
694 	else if (src1 & SLJIT_IMM) {
695 		if (src1w) {
696 			FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
697 			src1_r = TMP_REG1;
698 		}
699 		else
700 			src1_r = 0;
701 	}
702 	else {
703 		if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w))
704 			FAIL_IF(compiler->error);
705 		else
706 			flags |= SLOW_SRC1;
707 		src1_r = TMP_REG1;
708 	}
709 
710 	/* Source 2. */
711 	if (FAST_IS_REG(src2)) {
712 		src2_r = src2;
713 		flags |= REG2_SOURCE;
714 		if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
715 			dst_r = src2_r;
716 	}
717 	else if (src2 & SLJIT_IMM) {
718 		if (!(flags & SRC2_IMM)) {
719 			if (src2w) {
720 				FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w));
721 				src2_r = sugg_src2_r;
722 			}
723 			else {
724 				src2_r = 0;
725 				if ((op >= SLJIT_MOV && op <= SLJIT_MOVU_S32) && (dst & SLJIT_MEM))
726 					dst_r = 0;
727 			}
728 		}
729 	}
730 	else {
731 		if (getput_arg_fast(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w))
732 			FAIL_IF(compiler->error);
733 		else
734 			flags |= SLOW_SRC2;
735 		src2_r = sugg_src2_r;
736 	}
737 
738 	if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
739 		SLJIT_ASSERT(src2_r == TMP_REG2);
740 		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
741 			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w));
742 			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
743 		}
744 		else {
745 			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
746 			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw));
747 		}
748 	}
749 	else if (flags & SLOW_SRC1)
750 		FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
751 	else if (flags & SLOW_SRC2)
752 		FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw));
753 
754 	FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
755 
756 	if (dst & SLJIT_MEM) {
757 		if (!(flags & SLOW_DEST)) {
758 			getput_arg_fast(compiler, flags, dst_r, dst, dstw);
759 			return compiler->error;
760 		}
761 		return getput_arg(compiler, flags, dst_r, dst, dstw, 0, 0);
762 	}
763 
764 	return SLJIT_SUCCESS;
765 }
766 
767 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
768 {
769 	CHECK_ERROR();
770 	CHECK(check_sljit_emit_op0(compiler, op));
771 
772 	op = GET_OPCODE(op);
773 	switch (op) {
774 	case SLJIT_BREAKPOINT:
775 		return push_inst(compiler, TA, UNMOVABLE_INS);
776 	case SLJIT_NOP:
777 		return push_inst(compiler, NOP, UNMOVABLE_INS);
778 	case SLJIT_LMUL_UW:
779 	case SLJIT_LMUL_SW:
780 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
781 		FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? UMUL : SMUL) | D(SLJIT_R0) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R0)));
782 		return push_inst(compiler, RDY | D(SLJIT_R1), DR(SLJIT_R1));
783 #else
784 #error "Implementation required"
785 #endif
786 	case SLJIT_DIVMOD_UW:
787 	case SLJIT_DIVMOD_SW:
788 	case SLJIT_DIV_UW:
789 	case SLJIT_DIV_SW:
790 		SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
791 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
792 		if ((op | 0x2) == SLJIT_DIV_UW)
793 			FAIL_IF(push_inst(compiler, WRY | S1(0), MOVABLE_INS));
794 		else {
795 			FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(SLJIT_R0) | IMM(31), DR(TMP_REG1)));
796 			FAIL_IF(push_inst(compiler, WRY | S1(TMP_REG1), MOVABLE_INS));
797 		}
798 		if (op <= SLJIT_DIVMOD_SW)
799 			FAIL_IF(push_inst(compiler, OR | D(TMP_REG2) | S1(0) | S2(SLJIT_R0), DR(TMP_REG2)));
800 		FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? UDIV : SDIV) | D(SLJIT_R0) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R0)));
801 		if (op >= SLJIT_DIV_UW)
802 			return SLJIT_SUCCESS;
803 		FAIL_IF(push_inst(compiler, SMUL | D(SLJIT_R1) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R1)));
804 		return push_inst(compiler, SUB | D(SLJIT_R1) | S1(TMP_REG2) | S2(SLJIT_R1), DR(SLJIT_R1));
805 #else
806 #error "Implementation required"
807 #endif
808 	}
809 
810 	return SLJIT_SUCCESS;
811 }
812 
813 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
814 	sljit_s32 dst, sljit_sw dstw,
815 	sljit_s32 src, sljit_sw srcw)
816 {
817 	sljit_s32 flags = GET_FLAGS(op) ? SET_FLAGS : 0;
818 
819 	CHECK_ERROR();
820 	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
821 	ADJUST_LOCAL_OFFSET(dst, dstw);
822 	ADJUST_LOCAL_OFFSET(src, srcw);
823 
824 	op = GET_OPCODE(op);
825 	switch (op) {
826 	case SLJIT_MOV:
827 	case SLJIT_MOV_P:
828 		return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
829 
830 	case SLJIT_MOV_U32:
831 		return emit_op(compiler, SLJIT_MOV_U32, flags | INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
832 
833 	case SLJIT_MOV_S32:
834 		return emit_op(compiler, SLJIT_MOV_S32, flags | INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
835 
836 	case SLJIT_MOV_U8:
837 		return emit_op(compiler, SLJIT_MOV_U8, flags | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw);
838 
839 	case SLJIT_MOV_S8:
840 		return emit_op(compiler, SLJIT_MOV_S8, flags | BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw);
841 
842 	case SLJIT_MOV_U16:
843 		return emit_op(compiler, SLJIT_MOV_U16, flags | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw);
844 
845 	case SLJIT_MOV_S16:
846 		return emit_op(compiler, SLJIT_MOV_S16, flags | HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw);
847 
848 	case SLJIT_MOVU:
849 	case SLJIT_MOVU_P:
850 		return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
851 
852 	case SLJIT_MOVU_U32:
853 		return emit_op(compiler, SLJIT_MOV_U32, flags | INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
854 
855 	case SLJIT_MOVU_S32:
856 		return emit_op(compiler, SLJIT_MOV_S32, flags | INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
857 
858 	case SLJIT_MOVU_U8:
859 		return emit_op(compiler, SLJIT_MOV_U8, flags | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw);
860 
861 	case SLJIT_MOVU_S8:
862 		return emit_op(compiler, SLJIT_MOV_S8, flags | BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw);
863 
864 	case SLJIT_MOVU_U16:
865 		return emit_op(compiler, SLJIT_MOV_U16, flags | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw);
866 
867 	case SLJIT_MOVU_S16:
868 		return emit_op(compiler, SLJIT_MOV_S16, flags | HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw);
869 
870 	case SLJIT_NOT:
871 	case SLJIT_CLZ:
872 		return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw);
873 
874 	case SLJIT_NEG:
875 		return emit_op(compiler, SLJIT_SUB, flags | IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw);
876 	}
877 
878 	return SLJIT_SUCCESS;
879 }
880 
881 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
882 	sljit_s32 dst, sljit_sw dstw,
883 	sljit_s32 src1, sljit_sw src1w,
884 	sljit_s32 src2, sljit_sw src2w)
885 {
886 	sljit_s32 flags = GET_FLAGS(op) ? SET_FLAGS : 0;
887 
888 	CHECK_ERROR();
889 	CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
890 	ADJUST_LOCAL_OFFSET(dst, dstw);
891 	ADJUST_LOCAL_OFFSET(src1, src1w);
892 	ADJUST_LOCAL_OFFSET(src2, src2w);
893 
894 	op = GET_OPCODE(op);
895 	switch (op) {
896 	case SLJIT_ADD:
897 	case SLJIT_ADDC:
898 	case SLJIT_MUL:
899 	case SLJIT_AND:
900 	case SLJIT_OR:
901 	case SLJIT_XOR:
902 		return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
903 
904 	case SLJIT_SUB:
905 	case SLJIT_SUBC:
906 		return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
907 
908 	case SLJIT_SHL:
909 	case SLJIT_LSHR:
910 	case SLJIT_ASHR:
911 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
912 		if (src2 & SLJIT_IMM)
913 			src2w &= 0x1f;
914 #else
915 		SLJIT_ASSERT_STOP();
916 #endif
917 		return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
918 	}
919 
920 	return SLJIT_SUCCESS;
921 }
922 
923 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
924 {
925 	CHECK_REG_INDEX(check_sljit_get_register_index(reg));
926 	return reg_map[reg];
927 }
928 
929 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
930 {
931 	CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
932 	return reg << 1;
933 }
934 
935 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
936 	void *instruction, sljit_s32 size)
937 {
938 	CHECK_ERROR();
939 	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
940 
941 	return push_inst(compiler, *(sljit_ins*)instruction, UNMOVABLE_INS);
942 }
943 
944 /* --------------------------------------------------------------------- */
945 /*  Floating point operators                                             */
946 /* --------------------------------------------------------------------- */
947 
948 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
949 {
950 #ifdef SLJIT_IS_FPU_AVAILABLE
951 	return SLJIT_IS_FPU_AVAILABLE;
952 #else
953 	/* Available by default. */
954 	return 1;
955 #endif
956 }
957 
958 #define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 7))
959 #define SELECT_FOP(op, single, double) ((op & SLJIT_F32_OP) ? single : double)
960 #define FLOAT_TMP_MEM_OFFSET (22 * sizeof(sljit_sw))
961 
962 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
963 	sljit_s32 dst, sljit_sw dstw,
964 	sljit_s32 src, sljit_sw srcw)
965 {
966 	if (src & SLJIT_MEM) {
967 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
968 		src = TMP_FREG1;
969 	}
970 	else
971 		src <<= 1;
972 
973 	FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOI, FDTOI) | DA(TMP_FREG1) | S2A(src), MOVABLE_INS));
974 
975 	if (dst == SLJIT_UNUSED)
976 		return SLJIT_SUCCESS;
977 
978 	if (FAST_IS_REG(dst)) {
979 		FAIL_IF(emit_op_mem2(compiler, SINGLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
980 		return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET);
981 	}
982 
983 	/* Store the integer value from a VFP register. */
984 	return emit_op_mem2(compiler, SINGLE_DATA, TMP_FREG1, dst, dstw, 0, 0);
985 }
986 
987 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
988 	sljit_s32 dst, sljit_sw dstw,
989 	sljit_s32 src, sljit_sw srcw)
990 {
991 	sljit_s32 dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1;
992 
993 	if (src & SLJIT_IMM) {
994 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
995 		if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
996 			srcw = (sljit_s32)srcw;
997 #endif
998 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
999 		src = TMP_REG1;
1000 		srcw = 0;
1001 	}
1002 
1003 	if (FAST_IS_REG(src)) {
1004 		FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
1005 		src = SLJIT_MEM1(SLJIT_SP);
1006 		srcw = FLOAT_TMP_MEM_OFFSET;
1007 	}
1008 
1009 	FAIL_IF(emit_op_mem2(compiler, SINGLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
1010 	FAIL_IF(push_inst(compiler, SELECT_FOP(op, FITOS, FITOD) | DA(dst_r) | S2A(TMP_FREG1), MOVABLE_INS));
1011 
1012 	if (dst & SLJIT_MEM)
1013 		return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
1014 	return SLJIT_SUCCESS;
1015 }
1016 
1017 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
1018 	sljit_s32 src1, sljit_sw src1w,
1019 	sljit_s32 src2, sljit_sw src2w)
1020 {
1021 	if (src1 & SLJIT_MEM) {
1022 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
1023 		src1 = TMP_FREG1;
1024 	}
1025 	else
1026 		src1 <<= 1;
1027 
1028 	if (src2 & SLJIT_MEM) {
1029 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
1030 		src2 = TMP_FREG2;
1031 	}
1032 	else
1033 		src2 <<= 1;
1034 
1035 	return push_inst(compiler, SELECT_FOP(op, FCMPS, FCMPD) | S1A(src1) | S2A(src2), FCC_IS_SET | MOVABLE_INS);
1036 }
1037 
1038 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
1039 	sljit_s32 dst, sljit_sw dstw,
1040 	sljit_s32 src, sljit_sw srcw)
1041 {
1042 	sljit_s32 dst_r;
1043 
1044 	CHECK_ERROR();
1045 	compiler->cache_arg = 0;
1046 	compiler->cache_argw = 0;
1047 
1048 	SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error);
1049 	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
1050 
1051 	if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32)
1052 		op ^= SLJIT_F32_OP;
1053 
1054 	dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1;
1055 
1056 	if (src & SLJIT_MEM) {
1057 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));
1058 		src = dst_r;
1059 	}
1060 	else
1061 		src <<= 1;
1062 
1063 	switch (GET_OPCODE(op)) {
1064 	case SLJIT_MOV_F64:
1065 		if (src != dst_r) {
1066 			if (dst_r != TMP_FREG1) {
1067 				FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r) | S2A(src), MOVABLE_INS));
1068 				if (!(op & SLJIT_F32_OP))
1069 					FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r | 1) | S2A(src | 1), MOVABLE_INS));
1070 			}
1071 			else
1072 				dst_r = src;
1073 		}
1074 		break;
1075 	case SLJIT_NEG_F64:
1076 		FAIL_IF(push_inst(compiler, FNEGS | DA(dst_r) | S2A(src), MOVABLE_INS));
1077 		if (dst_r != src && !(op & SLJIT_F32_OP))
1078 			FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r | 1) | S2A(src | 1), MOVABLE_INS));
1079 		break;
1080 	case SLJIT_ABS_F64:
1081 		FAIL_IF(push_inst(compiler, FABSS | DA(dst_r) | S2A(src), MOVABLE_INS));
1082 		if (dst_r != src && !(op & SLJIT_F32_OP))
1083 			FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r | 1) | S2A(src | 1), MOVABLE_INS));
1084 		break;
1085 	case SLJIT_CONV_F64_FROM_F32:
1086 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOD, FDTOS) | DA(dst_r) | S2A(src), MOVABLE_INS));
1087 		op ^= SLJIT_F32_OP;
1088 		break;
1089 	}
1090 
1091 	if (dst & SLJIT_MEM)
1092 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0));
1093 	return SLJIT_SUCCESS;
1094 }
1095 
1096 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
1097 	sljit_s32 dst, sljit_sw dstw,
1098 	sljit_s32 src1, sljit_sw src1w,
1099 	sljit_s32 src2, sljit_sw src2w)
1100 {
1101 	sljit_s32 dst_r, flags = 0;
1102 
1103 	CHECK_ERROR();
1104 	CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
1105 	ADJUST_LOCAL_OFFSET(dst, dstw);
1106 	ADJUST_LOCAL_OFFSET(src1, src1w);
1107 	ADJUST_LOCAL_OFFSET(src2, src2w);
1108 
1109 	compiler->cache_arg = 0;
1110 	compiler->cache_argw = 0;
1111 
1112 	dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG2;
1113 
1114 	if (src1 & SLJIT_MEM) {
1115 		if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
1116 			FAIL_IF(compiler->error);
1117 			src1 = TMP_FREG1;
1118 		} else
1119 			flags |= SLOW_SRC1;
1120 	}
1121 	else
1122 		src1 <<= 1;
1123 
1124 	if (src2 & SLJIT_MEM) {
1125 		if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) {
1126 			FAIL_IF(compiler->error);
1127 			src2 = TMP_FREG2;
1128 		} else
1129 			flags |= SLOW_SRC2;
1130 	}
1131 	else
1132 		src2 <<= 1;
1133 
1134 	if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
1135 		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
1136 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
1137 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
1138 		}
1139 		else {
1140 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
1141 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
1142 		}
1143 	}
1144 	else if (flags & SLOW_SRC1)
1145 		FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
1146 	else if (flags & SLOW_SRC2)
1147 		FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
1148 
1149 	if (flags & SLOW_SRC1)
1150 		src1 = TMP_FREG1;
1151 	if (flags & SLOW_SRC2)
1152 		src2 = TMP_FREG2;
1153 
1154 	switch (GET_OPCODE(op)) {
1155 	case SLJIT_ADD_F64:
1156 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADDD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS));
1157 		break;
1158 
1159 	case SLJIT_SUB_F64:
1160 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUBD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS));
1161 		break;
1162 
1163 	case SLJIT_MUL_F64:
1164 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMULD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS));
1165 		break;
1166 
1167 	case SLJIT_DIV_F64:
1168 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIVD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS));
1169 		break;
1170 	}
1171 
1172 	if (dst_r == TMP_FREG2)
1173 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
1174 
1175 	return SLJIT_SUCCESS;
1176 }
1177 
1178 #undef FLOAT_DATA
1179 #undef SELECT_FOP
1180 
1181 /* --------------------------------------------------------------------- */
1182 /*  Other instructions                                                   */
1183 /* --------------------------------------------------------------------- */
1184 
1185 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
1186 {
1187 	CHECK_ERROR();
1188 	CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
1189 	ADJUST_LOCAL_OFFSET(dst, dstw);
1190 
1191 	/* For UNUSED dst. Uncommon, but possible. */
1192 	if (dst == SLJIT_UNUSED)
1193 		return SLJIT_SUCCESS;
1194 
1195 	if (FAST_IS_REG(dst))
1196 		return push_inst(compiler, OR | D(dst) | S1(0) | S2(TMP_LINK), DR(dst));
1197 
1198 	/* Memory. */
1199 	return emit_op_mem(compiler, WORD_DATA, TMP_LINK, dst, dstw);
1200 }
1201 
1202 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
1203 {
1204 	CHECK_ERROR();
1205 	CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
1206 	ADJUST_LOCAL_OFFSET(src, srcw);
1207 
1208 	if (FAST_IS_REG(src))
1209 		FAIL_IF(push_inst(compiler, OR | D(TMP_LINK) | S1(0) | S2(src), DR(TMP_LINK)));
1210 	else if (src & SLJIT_MEM)
1211 		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_LINK, src, srcw));
1212 	else if (src & SLJIT_IMM)
1213 		FAIL_IF(load_immediate(compiler, TMP_LINK, srcw));
1214 
1215 	FAIL_IF(push_inst(compiler, JMPL | D(0) | S1(TMP_LINK) | IMM(8), UNMOVABLE_INS));
1216 	return push_inst(compiler, NOP, UNMOVABLE_INS);
1217 }
1218 
1219 /* --------------------------------------------------------------------- */
1220 /*  Conditional instructions                                             */
1221 /* --------------------------------------------------------------------- */
1222 
1223 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
1224 {
1225 	struct sljit_label *label;
1226 
1227 	CHECK_ERROR_PTR();
1228 	CHECK_PTR(check_sljit_emit_label(compiler));
1229 
1230 	if (compiler->last_label && compiler->last_label->size == compiler->size)
1231 		return compiler->last_label;
1232 
1233 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
1234 	PTR_FAIL_IF(!label);
1235 	set_label(label, compiler);
1236 	compiler->delay_slot = UNMOVABLE_INS;
1237 	return label;
1238 }
1239 
1240 static sljit_ins get_cc(sljit_s32 type)
1241 {
1242 	switch (type) {
1243 	case SLJIT_EQUAL:
1244 	case SLJIT_MUL_NOT_OVERFLOW:
1245 	case SLJIT_NOT_EQUAL_F64: /* Unordered. */
1246 		return DA(0x1);
1247 
1248 	case SLJIT_NOT_EQUAL:
1249 	case SLJIT_MUL_OVERFLOW:
1250 	case SLJIT_EQUAL_F64:
1251 		return DA(0x9);
1252 
1253 	case SLJIT_LESS:
1254 	case SLJIT_GREATER_F64: /* Unordered. */
1255 		return DA(0x5);
1256 
1257 	case SLJIT_GREATER_EQUAL:
1258 	case SLJIT_LESS_EQUAL_F64:
1259 		return DA(0xd);
1260 
1261 	case SLJIT_GREATER:
1262 	case SLJIT_GREATER_EQUAL_F64: /* Unordered. */
1263 		return DA(0xc);
1264 
1265 	case SLJIT_LESS_EQUAL:
1266 	case SLJIT_LESS_F64:
1267 		return DA(0x4);
1268 
1269 	case SLJIT_SIG_LESS:
1270 		return DA(0x3);
1271 
1272 	case SLJIT_SIG_GREATER_EQUAL:
1273 		return DA(0xb);
1274 
1275 	case SLJIT_SIG_GREATER:
1276 		return DA(0xa);
1277 
1278 	case SLJIT_SIG_LESS_EQUAL:
1279 		return DA(0x2);
1280 
1281 	case SLJIT_OVERFLOW:
1282 	case SLJIT_UNORDERED_F64:
1283 		return DA(0x7);
1284 
1285 	case SLJIT_NOT_OVERFLOW:
1286 	case SLJIT_ORDERED_F64:
1287 		return DA(0xf);
1288 
1289 	default:
1290 		SLJIT_ASSERT_STOP();
1291 		return DA(0x8);
1292 	}
1293 }
1294 
1295 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
1296 {
1297 	struct sljit_jump *jump;
1298 
1299 	CHECK_ERROR_PTR();
1300 	CHECK_PTR(check_sljit_emit_jump(compiler, type));
1301 
1302 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
1303 	PTR_FAIL_IF(!jump);
1304 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
1305 	type &= 0xff;
1306 
1307 	if (type < SLJIT_EQUAL_F64) {
1308 		jump->flags |= IS_COND;
1309 		if (((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) && !(compiler->delay_slot & ICC_IS_SET))
1310 			jump->flags |= IS_MOVABLE;
1311 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
1312 		PTR_FAIL_IF(push_inst(compiler, BICC | get_cc(type ^ 1) | 5, UNMOVABLE_INS));
1313 #else
1314 #error "Implementation required"
1315 #endif
1316 	}
1317 	else if (type < SLJIT_JUMP) {
1318 		jump->flags |= IS_COND;
1319 		if (((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) && !(compiler->delay_slot & FCC_IS_SET))
1320 			jump->flags |= IS_MOVABLE;
1321 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
1322 		PTR_FAIL_IF(push_inst(compiler, FBFCC | get_cc(type ^ 1) | 5, UNMOVABLE_INS));
1323 #else
1324 #error "Implementation required"
1325 #endif
1326 	} else {
1327 		if ((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS)
1328 			jump->flags |= IS_MOVABLE;
1329 		if (type >= SLJIT_FAST_CALL)
1330 			jump->flags |= IS_CALL;
1331 	}
1332 
1333 	PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0));
1334 	PTR_FAIL_IF(push_inst(compiler, JMPL | D(type >= SLJIT_FAST_CALL ? TMP_LINK : 0) | S1(TMP_REG2) | IMM(0), UNMOVABLE_INS));
1335 	jump->addr = compiler->size;
1336 	PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
1337 
1338 	return jump;
1339 }
1340 
1341 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
1342 {
1343 	struct sljit_jump *jump = NULL;
1344 	sljit_s32 src_r;
1345 
1346 	CHECK_ERROR();
1347 	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
1348 	ADJUST_LOCAL_OFFSET(src, srcw);
1349 
1350 	if (FAST_IS_REG(src))
1351 		src_r = src;
1352 	else if (src & SLJIT_IMM) {
1353 		jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
1354 		FAIL_IF(!jump);
1355 		set_jump(jump, compiler, JUMP_ADDR);
1356 		jump->u.target = srcw;
1357 		if ((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS)
1358 			jump->flags |= IS_MOVABLE;
1359 		if (type >= SLJIT_FAST_CALL)
1360 			jump->flags |= IS_CALL;
1361 
1362 		FAIL_IF(emit_const(compiler, TMP_REG2, 0));
1363 		src_r = TMP_REG2;
1364 	}
1365 	else {
1366 		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw));
1367 		src_r = TMP_REG2;
1368 	}
1369 
1370 	FAIL_IF(push_inst(compiler, JMPL | D(type >= SLJIT_FAST_CALL ? TMP_LINK : 0) | S1(src_r) | IMM(0), UNMOVABLE_INS));
1371 	if (jump)
1372 		jump->addr = compiler->size;
1373 	return push_inst(compiler, NOP, UNMOVABLE_INS);
1374 }
1375 
1376 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
1377 	sljit_s32 dst, sljit_sw dstw,
1378 	sljit_s32 src, sljit_sw srcw,
1379 	sljit_s32 type)
1380 {
1381 	sljit_s32 reg, flags = (GET_FLAGS(op) ? SET_FLAGS : 0);
1382 
1383 	CHECK_ERROR();
1384 	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
1385 	ADJUST_LOCAL_OFFSET(dst, dstw);
1386 
1387 	if (dst == SLJIT_UNUSED)
1388 		return SLJIT_SUCCESS;
1389 
1390 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
1391 	op = GET_OPCODE(op);
1392 	reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
1393 
1394 	compiler->cache_arg = 0;
1395 	compiler->cache_argw = 0;
1396 	if (op >= SLJIT_ADD && (src & SLJIT_MEM)) {
1397 		ADJUST_LOCAL_OFFSET(src, srcw);
1398 		FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
1399 		src = TMP_REG1;
1400 		srcw = 0;
1401 	}
1402 
1403 	type &= 0xff;
1404 	if (type < SLJIT_EQUAL_F64)
1405 		FAIL_IF(push_inst(compiler, BICC | get_cc(type) | 3, UNMOVABLE_INS));
1406 	else
1407 		FAIL_IF(push_inst(compiler, FBFCC | get_cc(type) | 3, UNMOVABLE_INS));
1408 
1409 	FAIL_IF(push_inst(compiler, OR | D(reg) | S1(0) | IMM(1), UNMOVABLE_INS));
1410 	FAIL_IF(push_inst(compiler, OR | D(reg) | S1(0) | IMM(0), UNMOVABLE_INS));
1411 
1412 	if (op >= SLJIT_ADD)
1413 		return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP | ALT_KEEP_CACHE, dst, dstw, src, srcw, TMP_REG2, 0);
1414 
1415 	return (reg == TMP_REG2) ? emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw) : SLJIT_SUCCESS;
1416 #else
1417 #error "Implementation required"
1418 #endif
1419 }
1420 
1421 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
1422 {
1423 	sljit_s32 reg;
1424 	struct sljit_const *const_;
1425 
1426 	CHECK_ERROR_PTR();
1427 	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
1428 	ADJUST_LOCAL_OFFSET(dst, dstw);
1429 
1430 	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
1431 	PTR_FAIL_IF(!const_);
1432 	set_const(const_, compiler);
1433 
1434 	reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
1435 
1436 	PTR_FAIL_IF(emit_const(compiler, reg, init_value));
1437 
1438 	if (dst & SLJIT_MEM)
1439 		PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
1440 	return const_;
1441 }
1442