xref: /netbsd-src/sys/arch/powerpc/fpu/fpu_emu.c (revision 6a9ecf9f5d75466f837a2dec7817a39765c934e9)
1 /*	$NetBSD: fpu_emu.c,v 1.60 2022/09/20 12:25:01 rin Exp $ */
2 
3 /*
4  * Copyright 2001 Wasabi Systems, Inc.
5  * All rights reserved.
6  *
7  * Written by Eduardo Horvath and Simon Burge for Wasabi Systems, Inc.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *      This product includes software developed for the NetBSD Project by
20  *      Wasabi Systems, Inc.
21  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22  *    or promote products derived from this software without specific prior
23  *    written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 /*
39  * Copyright (c) 1992, 1993
40  *	The Regents of the University of California.  All rights reserved.
41  *
42  * This software was developed by the Computer Systems Engineering group
43  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
44  * contributed to Berkeley.
45  *
46  * All advertising materials mentioning features or use of this software
47  * must display the following acknowledgement:
48  *	This product includes software developed by the University of
49  *	California, Lawrence Berkeley Laboratory.
50  *
51  * Redistribution and use in source and binary forms, with or without
52  * modification, are permitted provided that the following conditions
53  * are met:
54  * 1. Redistributions of source code must retain the above copyright
55  *    notice, this list of conditions and the following disclaimer.
56  * 2. Redistributions in binary form must reproduce the above copyright
57  *    notice, this list of conditions and the following disclaimer in the
58  *    documentation and/or other materials provided with the distribution.
59  * 3. Neither the name of the University nor the names of its contributors
60  *    may be used to endorse or promote products derived from this software
61  *    without specific prior written permission.
62  *
63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73  * SUCH DAMAGE.
74  *
75  *	@(#)fpu.c	8.1 (Berkeley) 6/11/93
76  */
77 
78 #include <sys/cdefs.h>
79 __KERNEL_RCSID(0, "$NetBSD: fpu_emu.c,v 1.60 2022/09/20 12:25:01 rin Exp $");
80 
81 #ifdef _KERNEL_OPT
82 #include "opt_ddb.h"
83 #endif
84 
85 #include <sys/param.h>
86 #include <sys/systm.h>
87 #include <sys/evcnt.h>
88 #include <sys/proc.h>
89 #include <sys/siginfo.h>
90 #include <sys/signal.h>
91 #include <sys/signalvar.h>
92 #include <sys/syslog.h>
93 
94 #include <powerpc/instr.h>
95 #include <powerpc/psl.h>
96 
97 #include <machine/fpu.h>
98 #include <machine/reg.h>
99 #include <machine/trap.h>
100 
101 #include <powerpc/fpu/fpu_emu.h>
102 #include <powerpc/fpu/fpu_extern.h>
103 
104 #define	FPU_EMU_EVCNT_DECL(name)					\
105 static struct evcnt fpu_emu_ev_##name =					\
106     EVCNT_INITIALIZER(EVCNT_TYPE_TRAP, NULL, "fpemu", #name);		\
107 EVCNT_ATTACH_STATIC(fpu_emu_ev_##name)
108 
109 #define	FPU_EMU_EVCNT_INCR(name)					\
110     fpu_emu_ev_##name.ev_count++
111 
112 FPU_EMU_EVCNT_DECL(stfiwx);
113 FPU_EMU_EVCNT_DECL(fpstore);
114 FPU_EMU_EVCNT_DECL(fpload);
115 FPU_EMU_EVCNT_DECL(fcmpu);
116 FPU_EMU_EVCNT_DECL(frsp);
117 FPU_EMU_EVCNT_DECL(fctiw);
118 FPU_EMU_EVCNT_DECL(fcmpo);
119 FPU_EMU_EVCNT_DECL(mtfsb1);
120 FPU_EMU_EVCNT_DECL(fnegabs);
121 FPU_EMU_EVCNT_DECL(mcrfs);
122 FPU_EMU_EVCNT_DECL(mtfsb0);
123 FPU_EMU_EVCNT_DECL(fmr);
124 FPU_EMU_EVCNT_DECL(mtfsfi);
125 FPU_EMU_EVCNT_DECL(fnabs);
126 FPU_EMU_EVCNT_DECL(fabs);
127 FPU_EMU_EVCNT_DECL(mffs);
128 FPU_EMU_EVCNT_DECL(mtfsf);
129 FPU_EMU_EVCNT_DECL(fctid);
130 FPU_EMU_EVCNT_DECL(fcfid);
131 FPU_EMU_EVCNT_DECL(fdiv);
132 FPU_EMU_EVCNT_DECL(fsub);
133 FPU_EMU_EVCNT_DECL(fadd);
134 FPU_EMU_EVCNT_DECL(fsqrt);
135 FPU_EMU_EVCNT_DECL(fsel);
136 FPU_EMU_EVCNT_DECL(fpres);
137 FPU_EMU_EVCNT_DECL(fmul);
138 FPU_EMU_EVCNT_DECL(frsqrte);
139 FPU_EMU_EVCNT_DECL(fmsub);
140 FPU_EMU_EVCNT_DECL(fmadd);
141 FPU_EMU_EVCNT_DECL(fnmsub);
142 FPU_EMU_EVCNT_DECL(fnmadd);
143 
144 /* FPSR exception masks */
145 #define FPSR_EX_MSK	(FPSCR_VX|FPSCR_OX|FPSCR_UX|FPSCR_ZX|		\
146 			FPSCR_XX|FPSCR_VXSNAN|FPSCR_VXISI|FPSCR_VXIDI|	\
147 			FPSCR_VXZDZ|FPSCR_VXIMZ|FPSCR_VXVC|FPSCR_VXSOFT|\
148 			FPSCR_VXSQRT|FPSCR_VXCVI)
149 #define	FPSR_EX		(FPSCR_VE|FPSCR_OE|FPSCR_UE|FPSCR_ZE|FPSCR_XE)
150 #define	FPSR_INV	(FPSCR_VXSNAN|FPSCR_VXISI|FPSCR_VXIDI|		\
151 			FPSCR_VXZDZ|FPSCR_VXIMZ|FPSCR_VXVC|FPSCR_VXSOFT|\
152 			FPSCR_VXSQRT|FPSCR_VXCVI)
153 #define	MCRFS_MASK							\
154     (									\
155 	FPSCR_FX     | FPSCR_OX     |					\
156 	FPSCR_UX     | FPSCR_ZX     | FPSCR_XX    | FPSCR_VXSNAN |	\
157 	FPSCR_VXISI  | FPSCR_VXIDI  | FPSCR_VXZDZ | FPSCR_VXIMZ  |	\
158 	FPSCR_VXVC   |							\
159 	FPSCR_VXSOFT | FPSCR_VXSQRT | FPSCR_VXCVI			\
160     )
161 
162 #define	FR(reg)	(fs->fpreg[reg])
163 
164 int fpe_debug = 0;
165 
166 #ifdef DDB
167 extern vaddr_t opc_disasm(vaddr_t loc, int opcode);
168 #endif
169 
170 static int fpu_execute(struct trapframe *, struct fpemu *, union instr *);
171 
172 #ifdef DEBUG
173 /*
174  * Dump a `fpn' structure.
175  */
176 void
fpu_dumpfpn(struct fpn * fp)177 fpu_dumpfpn(struct fpn *fp)
178 {
179 	static const char *class[] = {
180 		"SNAN", "QNAN", "ZERO", "NUM", "INF"
181 	};
182 
183 	KASSERT(fp != NULL);
184 
185 	printf("%s %c.%x %x %x %xE%d\n", class[fp->fp_class + 2],
186 		fp->fp_sign ? '-' : ' ',
187 		fp->fp_mant[0],	fp->fp_mant[1],
188 		fp->fp_mant[2], fp->fp_mant[3],
189 		fp->fp_exp);
190 }
191 #endif
192 
193 /*
194  * fpu_execute returns the following error numbers (0 = no error):
195  */
196 #define	FPE		1	/* take a floating point exception */
197 #define	NOTFPU		2	/* not an FPU instruction */
198 #define	FAULT		3
199 
200 
201 /*
202  * Emulate a floating-point instruction.
203  * Return true if insn is consumed anyway.
204  * Otherwise, the caller must take care of it.
205  */
206 bool
fpu_emulate(struct trapframe * tf,struct fpreg * fpf,ksiginfo_t * ksi)207 fpu_emulate(struct trapframe *tf, struct fpreg *fpf, ksiginfo_t *ksi)
208 {
209 	struct pcb *pcb;
210 	union instr insn;
211 	struct fpemu fe;
212 
213 	KSI_INIT_TRAP(ksi);
214 	ksi->ksi_signo = 0;
215 	ksi->ksi_addr = (void *)tf->tf_srr0;
216 
217 	/* initialize insn.is_datasize to tell it is *not* initialized */
218 	fe.fe_fpstate = fpf;
219 	fe.fe_cx = 0;
220 
221 	/* always set this (to avoid a warning) */
222 
223 	if (copyin((void *) (tf->tf_srr0), &insn.i_int, sizeof (insn.i_int))) {
224 #ifdef DEBUG
225 		printf("fpu_emulate: fault reading opcode\n");
226 #endif
227 		ksi->ksi_signo = SIGSEGV;
228 		ksi->ksi_trap = EXC_ISI;
229 		ksi->ksi_code = SEGV_MAPERR;
230 		return true;
231 	}
232 
233 	DPRINTF(FPE_EX, ("fpu_emulate: emulating insn %x at %p\n",
234 	    insn.i_int, (void *)tf->tf_srr0));
235 
236 	if ((insn.i_any.i_opcd == OPC_TWI) ||
237 	    ((insn.i_any.i_opcd == OPC_integer_31) &&
238 	    (insn.i_x.i_xo == OPC31_TW))) {
239 		/* Check for the two trap insns. */
240 		DPRINTF(FPE_EX, ("fpu_emulate: SIGTRAP\n"));
241 		ksi->ksi_signo = SIGTRAP;
242 		ksi->ksi_trap = EXC_PGM;
243 		ksi->ksi_code = TRAP_BRKPT;
244 		return true;
245 	}
246 	switch (fpu_execute(tf, &fe, &insn)) {
247 	case 0:
248 success:
249 		DPRINTF(FPE_EX, ("fpu_emulate: success\n"));
250 		tf->tf_srr0 += 4;
251 		return true;
252 
253 	case FPE:
254 		pcb = lwp_getpcb(curlwp);
255 		if ((pcb->pcb_flags & PSL_FE_PREC) == 0)
256 			goto success;
257 		DPRINTF(FPE_EX, ("fpu_emulate: SIGFPE\n"));
258 		ksi->ksi_signo = SIGFPE;
259 		ksi->ksi_trap = EXC_PGM;
260 		ksi->ksi_code = fpu_get_fault_code();
261 		return true;
262 
263 	case FAULT:
264 		DPRINTF(FPE_EX, ("fpu_emulate: SIGSEGV\n"));
265 		ksi->ksi_signo = SIGSEGV;
266 		ksi->ksi_trap = EXC_DSI;
267 		ksi->ksi_code = SEGV_MAPERR;
268 		ksi->ksi_addr = (void *)fe.fe_addr;
269 		return true;
270 
271 	case NOTFPU:
272 	default:
273 		DPRINTF(FPE_EX, ("fpu_emulate: SIGILL\n"));
274 #if defined(DDB) && defined(DEBUG)
275 		if (fpe_debug & FPE_EX) {
276 			printf("fpu_emulate:  illegal insn %x at %p:",
277 			insn.i_int, (void *) (tf->tf_srr0));
278 			opc_disasm((vaddr_t)(tf->tf_srr0), insn.i_int);
279 		}
280 #endif
281 		return false;
282 	}
283 }
284 
285 /*
286  * fpu_to_single(): Helper function for stfs{,u}{,x}.
287  *
288  * Single-precision (float) data is internally represented in
289  * double-precision (double) format in floating-point registers (FRs).
290  * Even though double value cannot be translated into float format in
291  * general, Power ISA (2.0.3--3.1) specify conversion algorithm when
292  * stored to memory (see Sec. 4.6.3):
293  *
294  *  - Extra fraction bits are truncated regardless of rounding mode.
295  *  - When magnitude is larger than the maximum number in float format,
296  *    bits 63--62 and 58--29 are mechanically copied into bits 31--0.
297  *  - When magnitude is representable as denormalized number in float
298  *    format, it is stored as normalized double value in FRs;
299  *    denormalization is required in this case.
300  *  - When magnitude is smaller than the minimum denormalized number in
301  *    float format, the result is undefined. For G5 (970MP Rev 1.1),
302  *    (sign | 0) seems to be stored. For G4 and prior, some ``random''
303  *    garbage is stored in exponent. We mimic G5 for now.
304  */
305 static uint32_t
fpu_to_single(uint64_t reg)306 fpu_to_single(uint64_t reg)
307 {
308 	uint32_t sign, frac, word;
309 	int exp, shift;
310 
311 	sign = (reg & __BIT(63)) >> 32;
312 	exp = __SHIFTOUT(reg, __BITS(62, 52)) - 1023;
313 	if (exp > -127 || (reg & ~__BIT(63)) == 0) {
314 		/*
315 		 * No denormalization required: normalized, zero, inf, NaN,
316 		 * or numbers larger than MAXFLOAT (see comment above).
317 		 *
318 		 * Note that MSB and 7-LSBs in exponent are same for double
319 		 * and float formats in this case.
320 		 */
321 		word =  ((reg & __BIT(62)) >> 32) |
322 		    __SHIFTOUT(reg, __BITS(58, 52) | __BITS(51, 29));
323 	} else if (exp <= -127 && exp >= -149) {
324 		/* Denormalized. */
325 		shift = - 126 - exp; /* 1 ... 23 */
326 		frac = __SHIFTOUT(__BIT(52) | reg, __BITS(52, 29 + shift));
327 		word = /* __SHIFTIN(0, __BITS(30, 23)) | */ frac;
328 	} else {
329 		/* Undefined. Mimic G5 for now. */
330 		word = 0;
331 	}
332 	return sign | word;
333 }
334 
335 /*
336  * Execute an FPU instruction (one that runs entirely in the FPU; not
337  * FBfcc or STF, for instance).  On return, fe->fe_fs->fs_fsr will be
338  * modified to reflect the setting the hardware would have left.
339  *
340  * Note that we do not catch all illegal opcodes, so you can, for instance,
341  * multiply two integers this way.
342  */
343 static int
fpu_execute(struct trapframe * tf,struct fpemu * fe,union instr * insn)344 fpu_execute(struct trapframe *tf, struct fpemu *fe, union instr *insn)
345 {
346 	struct fpn *fp;
347 	union instr instr = *insn;
348 	int *a;
349 	int ra, rb, rc, rt, type, mask, fsr, cx, bf, setcr, cond;
350 	u_int bits;
351 	struct fpreg *fs;
352 	int i;
353 
354 	/* Setup work. */
355 	fp = NULL;
356 	fs = fe->fe_fpstate;
357 	fe->fe_fpscr = ((int *)&fs->fpscr)[1];
358 
359 	/*
360 	 * On PowerPC all floating point values are stored in registers
361 	 * as doubles, even when used for single precision operations.
362 	 */
363 	type = FTYPE_DBL;
364 	cond = instr.i_any.i_rc;
365 	setcr = 0;
366 	bf = 0;	/* XXX gcc */
367 
368 #if defined(DDB) && defined(DEBUG)
369 	if (fpe_debug & FPE_EX) {
370 		vaddr_t loc = tf->tf_srr0;
371 
372 		printf("Trying to emulate: %p ", (void *)loc);
373 		opc_disasm(loc, instr.i_int);
374 	}
375 #endif
376 
377 	/*
378 	 * `Decode' and execute instruction.
379 	 */
380 
381 	if ((instr.i_any.i_opcd >= OPC_LFS && instr.i_any.i_opcd <= OPC_STFDU) ||
382 	    instr.i_any.i_opcd == OPC_integer_31) {
383 		/*
384 		 * Handle load/store insns:
385 		 *
386 		 * Convert to/from single if needed, calculate addr,
387 		 * and update index reg if needed.
388 		 */
389 		vaddr_t addr;
390 		size_t size = sizeof(double);
391 		int store, update;
392 
393 		cond = 0; /* ld/st never set condition codes */
394 
395 
396 		if (instr.i_any.i_opcd == OPC_integer_31) {
397 			if (instr.i_x.i_xo == OPC31_STFIWX) {
398 				FPU_EMU_EVCNT_INCR(stfiwx);
399 
400 				/* Store as integer */
401 				ra = instr.i_x.i_ra;
402 				rb = instr.i_x.i_rb;
403 				DPRINTF(FPE_INSN, ("reg %d has %lx reg %d has %lx\n",
404 					ra, tf->tf_fixreg[ra], rb, tf->tf_fixreg[rb]));
405 
406 				addr = tf->tf_fixreg[rb];
407 				if (ra != 0)
408 					addr += tf->tf_fixreg[ra];
409 				rt = instr.i_x.i_rt;
410 				a = (int *)&fs->fpreg[rt];
411 				DPRINTF(FPE_INSN,
412 					("fpu_execute: Store INT %x at %p\n",
413 						a[1], (void *)addr));
414 				if (copyout(&a[1], (void *)addr, sizeof(int))) {
415 					fe->fe_addr = addr;
416 					return (FAULT);
417 				}
418 				return (0);
419 			}
420 
421 			if ((instr.i_x.i_xo & OPC31_FPMASK) != OPC31_FPOP)
422 				/* Not an indexed FP load/store op */
423 				return (NOTFPU);
424 
425 			store = (instr.i_x.i_xo & 0x80);
426 			if ((instr.i_x.i_xo & 0x40) == 0) {
427 				type = FTYPE_SNG;
428 				size = sizeof(float);
429 			}
430 			update = (instr.i_x.i_xo & 0x20);
431 
432 			/* calculate EA of load/store */
433 			ra = instr.i_x.i_ra;
434 			rb = instr.i_x.i_rb;
435 			DPRINTF(FPE_INSN, ("reg %d has %lx reg %d has %lx\n",
436 				ra, tf->tf_fixreg[ra], rb, tf->tf_fixreg[rb]));
437 			addr = tf->tf_fixreg[rb];
438 			if (ra != 0)
439 				addr += tf->tf_fixreg[ra];
440 			rt = instr.i_x.i_rt;
441 		} else {
442 			store = instr.i_d.i_opcd & 0x4;
443 			if ((instr.i_d.i_opcd & 0x2) == 0) {
444 				type = FTYPE_SNG;
445 				size = sizeof(float);
446 			}
447 			update = instr.i_d.i_opcd & 0x1;
448 
449 			/* calculate EA of load/store */
450 			ra = instr.i_d.i_ra;
451 			addr = instr.i_d.i_d;
452 			DPRINTF(FPE_INSN, ("reg %d has %lx displ %lx\n",
453 				ra, tf->tf_fixreg[ra], addr));
454 			if (ra != 0)
455 				addr += tf->tf_fixreg[ra];
456 			rt = instr.i_d.i_rt;
457 		}
458 
459 		if (update && ra == 0)
460 			return (NOTFPU);
461 
462 		if (store) {
463 			/* Store */
464 			uint32_t word;
465 			const void *kaddr;
466 
467 			FPU_EMU_EVCNT_INCR(fpstore);
468 			if (type != FTYPE_DBL) {
469 				/*
470 				 * As Power ISA specifies conversion algorithm
471 				 * for store floating-point single insns, we
472 				 * cannot use fpu_explode() and _implode() here.
473 				 * See fpu_to_single() and comment therein for
474 				 * more details.
475 				 */
476 				DPRINTF(FPE_INSN,
477 					("fpu_execute: Store SNG at %p\n",
478 						(void *)addr));
479 				word = fpu_to_single(FR(rt));
480 				kaddr = &word;
481 			} else {
482 				DPRINTF(FPE_INSN,
483 					("fpu_execute: Store DBL at %p\n",
484 						(void *)addr));
485 				kaddr = &FR(rt);
486 			}
487 			if (copyout(kaddr, (void *)addr, size)) {
488 				fe->fe_addr = addr;
489 				return (FAULT);
490 			}
491 		} else {
492 			/* Load */
493 			FPU_EMU_EVCNT_INCR(fpload);
494 			DPRINTF(FPE_INSN, ("fpu_execute: Load from %p\n",
495 				(void *)addr));
496 			if (copyin((const void *)addr, &FR(rt), size)) {
497 				fe->fe_addr = addr;
498 				return (FAULT);
499 			}
500 			if (type != FTYPE_DBL) {
501 				fpu_explode(fe, fp = &fe->fe_f1, type, FR(rt));
502 				fpu_implode(fe, fp, FTYPE_DBL, &FR(rt));
503 			}
504 		}
505 		if (update)
506 			tf->tf_fixreg[ra] = addr;
507 		/* Complete. */
508 		return (0);
509 	} else if (instr.i_any.i_opcd == OPC_sp_fp_59 ||
510 		instr.i_any.i_opcd == OPC_dp_fp_63) {
511 
512 
513 		if (instr.i_any.i_opcd == OPC_dp_fp_63 &&
514 		    !(instr.i_a.i_xo & OPC63M_MASK)) {
515 			/* Format X */
516 			rt = instr.i_x.i_rt;
517 			ra = instr.i_x.i_ra;
518 			rb = instr.i_x.i_rb;
519 
520 
521 			/* One of the special opcodes.... */
522 			switch (instr.i_x.i_xo) {
523 			case	OPC63_FCMPU:
524 				FPU_EMU_EVCNT_INCR(fcmpu);
525 				DPRINTF(FPE_INSN, ("fpu_execute: FCMPU\n"));
526 				rt >>= 2;
527 				fpu_explode(fe, &fe->fe_f1, type, FR(ra));
528 				fpu_explode(fe, &fe->fe_f2, type, FR(rb));
529 				fpu_compare(fe, 0);
530 				/* Make sure we do the condition regs. */
531 				cond = 0;
532 				/* N.B.: i_rs is already left shifted by two. */
533 				bf = instr.i_x.i_rs & 0xfc;
534 				setcr = 1;
535 				break;
536 
537 			case	OPC63_FRSP:
538 				/*
539 				 * Convert to single:
540 				 *
541 				 * PowerPC uses this to round a double
542 				 * precision value to single precision,
543 				 * but values in registers are always
544 				 * stored in double precision format.
545 				 */
546 				FPU_EMU_EVCNT_INCR(frsp);
547 				DPRINTF(FPE_INSN, ("fpu_execute: FRSP\n"));
548 				fpu_explode(fe, fp = &fe->fe_f1, FTYPE_DBL,
549 				    FR(rb));
550 				fpu_implode(fe, fp, FTYPE_SNG, &FR(rt));
551 				fpu_explode(fe, fp = &fe->fe_f1, FTYPE_SNG,
552 				    FR(rt));
553 				type = FTYPE_DBL | FTYPE_FPSCR;
554 				break;
555 			case	OPC63_FCTIW:
556 			case	OPC63_FCTIWZ:
557 				FPU_EMU_EVCNT_INCR(fctiw);
558 				DPRINTF(FPE_INSN, ("fpu_execute: FCTIW\n"));
559 				fpu_explode(fe, fp = &fe->fe_f1, type, FR(rb));
560 				type = FTYPE_INT | FTYPE_FPSCR;
561 				if (instr.i_x.i_xo == OPC63_FCTIWZ)
562 					type |= FTYPE_RD_RZ;
563 				break;
564 			case	OPC63_FCMPO:
565 				FPU_EMU_EVCNT_INCR(fcmpo);
566 				DPRINTF(FPE_INSN, ("fpu_execute: FCMPO\n"));
567 				rt >>= 2;
568 				fpu_explode(fe, &fe->fe_f1, type, FR(ra));
569 				fpu_explode(fe, &fe->fe_f2, type, FR(rb));
570 				fpu_compare(fe, 1);
571 				/* Make sure we do the condition regs. */
572 				cond = 0;
573 				/* N.B.: i_rs is already left shifted by two. */
574 				bf = instr.i_x.i_rs & 0xfc;
575 				setcr = 1;
576 				break;
577 			case	OPC63_MTFSB1:
578 				FPU_EMU_EVCNT_INCR(mtfsb1);
579 				DPRINTF(FPE_INSN, ("fpu_execute: MTFSB1\n"));
580 				fe->fe_cx = (1 << (31 - rt)) &
581 				    ~(FPSCR_FEX | FPSCR_VX);
582 				break;
583 			case	OPC63_FNEG:
584 				FPU_EMU_EVCNT_INCR(fnegabs);
585 				DPRINTF(FPE_INSN, ("fpu_execute: FNEGABS\n"));
586 				memcpy(&fs->fpreg[rt], &fs->fpreg[rb],
587 					sizeof(double));
588 				a = (int *)&fs->fpreg[rt];
589 				*a ^= (1 << 31);
590 				break;
591 			case	OPC63_MCRFS:
592 				FPU_EMU_EVCNT_INCR(mcrfs);
593 				DPRINTF(FPE_INSN, ("fpu_execute: MCRFS\n"));
594 				cond = 0;
595 				rt &= 0x1c;
596 				ra &= 0x1c;
597 				/* Extract the bits we want */
598 				bits = (fe->fe_fpscr >> (28 - ra)) & 0xf;
599 				/* Clear the bits we copied. */
600 				mask = (0xf << (28 - ra)) & MCRFS_MASK;
601 				fe->fe_fpscr &= ~mask;
602 				/* Now shove them in the right part of cr */
603 				tf->tf_cr &= ~(0xf << (28 - rt));
604 				tf->tf_cr |= bits << (28 - rt);
605 				break;
606 			case	OPC63_MTFSB0:
607 				FPU_EMU_EVCNT_INCR(mtfsb0);
608 				DPRINTF(FPE_INSN, ("fpu_execute: MTFSB0\n"));
609 				fe->fe_fpscr &= ~(1 << (31 - rt)) |
610 				    (FPSCR_FEX | FPSCR_VX);
611 				break;
612 			case	OPC63_FMR:
613 				FPU_EMU_EVCNT_INCR(fmr);
614 				DPRINTF(FPE_INSN, ("fpu_execute: FMR\n"));
615 				memcpy(&fs->fpreg[rt], &fs->fpreg[rb],
616 					sizeof(double));
617 				break;
618 			case	OPC63_MTFSFI:
619 				FPU_EMU_EVCNT_INCR(mtfsfi);
620 				DPRINTF(FPE_INSN, ("fpu_execute: MTFSFI\n"));
621 				rb >>= 1;
622 				rt &= 0x1c; /* Already left-shifted 4 */
623 				bits = rb << (28 - rt);
624 				mask = 0xf << (28 - rt);
625 				fe->fe_fpscr = (fe->fe_fpscr & ~mask) | bits;
626 				break;
627 			case	OPC63_FNABS:
628 				FPU_EMU_EVCNT_INCR(fnabs);
629 				DPRINTF(FPE_INSN, ("fpu_execute: FABS\n"));
630 				memcpy(&fs->fpreg[rt], &fs->fpreg[rb],
631 					sizeof(double));
632 				a = (int *)&fs->fpreg[rt];
633 				*a |= (1 << 31);
634 				break;
635 			case	OPC63_FABS:
636 				FPU_EMU_EVCNT_INCR(fabs);
637 				DPRINTF(FPE_INSN, ("fpu_execute: FABS\n"));
638 				memcpy(&fs->fpreg[rt], &fs->fpreg[rb],
639 					sizeof(double));
640 				a = (int *)&fs->fpreg[rt];
641 				*a &= ~(1 << 31);
642 				break;
643 			case	OPC63_MFFS:
644 				FPU_EMU_EVCNT_INCR(mffs);
645 				DPRINTF(FPE_INSN, ("fpu_execute: MFFS\n"));
646 				memcpy(&fs->fpreg[rt], &fs->fpscr,
647 					sizeof(fs->fpscr));
648 				break;
649 			case	OPC63_MTFSF:
650 				FPU_EMU_EVCNT_INCR(mtfsf);
651 				DPRINTF(FPE_INSN, ("fpu_execute: MTFSF\n"));
652 				if ((rt = instr.i_xfl.i_flm) == -1) {
653 					mask = -1;
654 				} else {
655 					mask = 0;
656 					/* Convert 1 bit -> 4 bits */
657 					for (i = 0; i < 8; i++)
658 						if (rt & (1 << i))
659 							mask |=
660 							    (0xf << (4 * i));
661 				}
662 				a = (int *)&fs->fpreg[rb];
663 				bits = a[1] & mask;
664 				fe->fe_fpscr = (fe->fe_fpscr & ~mask) | bits;
665 				break;
666 			case	OPC63_FCTID:
667 			case	OPC63_FCTIDZ:
668 				FPU_EMU_EVCNT_INCR(fctid);
669 				DPRINTF(FPE_INSN, ("fpu_execute: FCTID\n"));
670 				fpu_explode(fe, fp = &fe->fe_f1, type, FR(rb));
671 				type = FTYPE_LNG | FTYPE_FPSCR;
672 				if (instr.i_x.i_xo == OPC63_FCTIDZ)
673 					type |= FTYPE_RD_RZ;
674 				break;
675 			case	OPC63_FCFID:
676 				FPU_EMU_EVCNT_INCR(fcfid);
677 				DPRINTF(FPE_INSN, ("fpu_execute: FCFID\n"));
678 				fpu_explode(fe, fp = &fe->fe_f1, FTYPE_LNG,
679 				    FR(rb));
680 				type = FTYPE_DBL | FTYPE_FPSCR;
681 				break;
682 			default:
683 				return (NOTFPU);
684 				break;
685 			}
686 		} else {
687 			/* Format A */
688 			rt = instr.i_a.i_frt;
689 			ra = instr.i_a.i_fra;
690 			rb = instr.i_a.i_frb;
691 			rc = instr.i_a.i_frc;
692 
693 			/*
694 			 * All arithmetic operations work on registers, which
695 			 * are stored as doubles.
696 			 */
697 			type = FTYPE_DBL;
698 			switch ((unsigned int)instr.i_a.i_xo) {
699 			case	OPC59_FDIVS:
700 				FPU_EMU_EVCNT_INCR(fdiv);
701 				DPRINTF(FPE_INSN, ("fpu_execute: FDIV\n"));
702 				fpu_explode(fe, &fe->fe_f1, type, FR(ra));
703 				fpu_explode(fe, &fe->fe_f2, type, FR(rb));
704 				fp = fpu_div(fe);
705 				break;
706 			case	OPC59_FSUBS:
707 				FPU_EMU_EVCNT_INCR(fsub);
708 				DPRINTF(FPE_INSN, ("fpu_execute: FSUB\n"));
709 				fpu_explode(fe, &fe->fe_f1, type, FR(ra));
710 				fpu_explode(fe, &fe->fe_f2, type, FR(rb));
711 				fp = fpu_sub(fe);
712 				break;
713 			case	OPC59_FADDS:
714 				FPU_EMU_EVCNT_INCR(fadd);
715 				DPRINTF(FPE_INSN, ("fpu_execute: FADD\n"));
716 				fpu_explode(fe, &fe->fe_f1, type, FR(ra));
717 				fpu_explode(fe, &fe->fe_f2, type, FR(rb));
718 				fp = fpu_add(fe);
719 				break;
720 			case	OPC59_FSQRTS:
721 				FPU_EMU_EVCNT_INCR(fsqrt);
722 				DPRINTF(FPE_INSN, ("fpu_execute: FSQRT\n"));
723 				fpu_explode(fe, &fe->fe_f1, type, FR(rb));
724 				fp = fpu_sqrt(fe);
725 				break;
726 			case	OPC63M_FSEL:
727 				FPU_EMU_EVCNT_INCR(fsel);
728 				DPRINTF(FPE_INSN, ("fpu_execute: FSEL\n"));
729 				a = (int *)&fe->fe_fpstate->fpreg[ra];
730 				if ((( a[0] & 0x80000000) &&
731 				     ((a[0] & 0x7fffffff) | a[1])) ||
732 				    (( a[0] & 0x7ff00000) &&
733 				     ((a[0] & 0x000fffff) | a[1]))) {
734 					/* negative/NaN or NaN */
735 					rc = rb;
736 				}
737 				DPRINTF(FPE_INSN, ("f%d => f%d\n", rc, rt));
738 				memcpy(&fs->fpreg[rt], &fs->fpreg[rc],
739 					sizeof(double));
740 				break;
741 			case	OPC59_FRES:
742 				FPU_EMU_EVCNT_INCR(fpres);
743 				DPRINTF(FPE_INSN, ("fpu_execute: FPRES\n"));
744 				fpu_explode(fe, &fe->fe_f1, FTYPE_INT, 1);
745 				fpu_explode(fe, &fe->fe_f2, type, FR(rb));
746 				fp = fpu_div(fe);
747 				break;
748 			case	OPC59_FMULS:
749 				FPU_EMU_EVCNT_INCR(fmul);
750 				DPRINTF(FPE_INSN, ("fpu_execute: FMUL\n"));
751 				fpu_explode(fe, &fe->fe_f1, type, FR(ra));
752 				fpu_explode(fe, &fe->fe_f2, type, FR(rc));
753 				fp = fpu_mul(fe);
754 				break;
755 			case	OPC63M_FRSQRTE:
756 				/* Reciprocal sqrt() estimate */
757 				FPU_EMU_EVCNT_INCR(frsqrte);
758 				DPRINTF(FPE_INSN, ("fpu_execute: FRSQRTE\n"));
759 				fpu_explode(fe, &fe->fe_f1, type, FR(rb));
760 				fp = fpu_sqrt(fe);
761 				fe->fe_f2 = *fp;
762 				fpu_explode(fe, &fe->fe_f1, FTYPE_INT, 1);
763 				fp = fpu_div(fe);
764 				break;
765 			case	OPC59_FMSUBS:
766 				FPU_EMU_EVCNT_INCR(fmsub);
767 				DPRINTF(FPE_INSN, ("fpu_execute: FMSUB\n"));
768 				fpu_explode(fe, &fe->fe_f1, type, FR(ra));
769 				fpu_explode(fe, &fe->fe_f2, type, FR(rc));
770 				fp = fpu_mul(fe);
771 				fe->fe_f1 = *fp;
772 				fpu_explode(fe, &fe->fe_f2, type, FR(rb));
773 				fp = fpu_sub(fe);
774 				break;
775 			case	OPC59_FMADDS:
776 				FPU_EMU_EVCNT_INCR(fmadd);
777 				DPRINTF(FPE_INSN, ("fpu_execute: FMADD\n"));
778 				fpu_explode(fe, &fe->fe_f1, type, FR(ra));
779 				fpu_explode(fe, &fe->fe_f2, type, FR(rc));
780 				fp = fpu_mul(fe);
781 				fe->fe_f1 = *fp;
782 				fpu_explode(fe, &fe->fe_f2, type, FR(rb));
783 				fp = fpu_add(fe);
784 				break;
785 			case	OPC59_FNMSUBS:
786 				FPU_EMU_EVCNT_INCR(fnmsub);
787 				DPRINTF(FPE_INSN, ("fpu_execute: FNMSUB\n"));
788 				fpu_explode(fe, &fe->fe_f1, type, FR(ra));
789 				fpu_explode(fe, &fe->fe_f2, type, FR(rc));
790 				fp = fpu_mul(fe);
791 				fe->fe_f1 = *fp;
792 				fpu_explode(fe, &fe->fe_f2, type, FR(rb));
793 				fp = fpu_sub(fe);
794 				/* Negate */
795 				if (!ISNAN(fp))
796 					fp->fp_sign ^= 1;
797 				break;
798 			case	OPC59_FNMADDS:
799 				FPU_EMU_EVCNT_INCR(fnmadd);
800 				DPRINTF(FPE_INSN, ("fpu_execute: FNMADD\n"));
801 				fpu_explode(fe, &fe->fe_f1, type, FR(ra));
802 				fpu_explode(fe, &fe->fe_f2, type, FR(rc));
803 				fp = fpu_mul(fe);
804 				fe->fe_f1 = *fp;
805 				fpu_explode(fe, &fe->fe_f2, type, FR(rb));
806 				fp = fpu_add(fe);
807 				/* Negate */
808 				if (!ISNAN(fp))
809 					fp->fp_sign ^= 1;
810 				break;
811 			default:
812 				return (NOTFPU);
813 				break;
814 			}
815 
816 			/* If the instruction was single precision, round */
817 			if (!(instr.i_any.i_opcd & 0x4)) {
818 				fpu_implode(fe, fp, FTYPE_SNG | FTYPE_FPSCR,
819 				    &FR(rt));
820 				fpu_explode(fe, fp = &fe->fe_f1, FTYPE_SNG,
821 				    FR(rt));
822 			} else
823 				type |= FTYPE_FPSCR;
824 		}
825 	} else {
826 		return (NOTFPU);
827 	}
828 
829 	/*
830 	 * ALU operation is complete.  Collapse the result and then check
831 	 * for exceptions.  If we got any, and they are enabled, do not
832 	 * alter the destination register, just stop with an exception.
833 	 * Otherwise set new current exceptions and accrue.
834 	 */
835 	if (fp)
836 		fpu_implode(fe, fp, type, &FR(rt));
837 	cx = fe->fe_cx;
838 	fsr = fe->fe_fpscr & ~(FPSCR_FEX|FPSCR_VX);
839 	if (cx != 0) {
840 		fsr |= cx;
841 		DPRINTF(FPE_INSN, ("fpu_execute: cx %x, fsr %x\n", cx, fsr));
842 	}
843 	if (fsr & FPSR_INV)
844 		fsr |= FPSCR_VX;
845 	mask = (fsr & FPSR_EX) << (25 - 3);
846 	if (fsr & mask)
847 		fsr |= FPSCR_FEX;
848 	if ((fsr ^ fe->fe_fpscr) & FPSR_EX_MSK)
849 		fsr |= FPSCR_FX;
850 
851 	if (cond) {
852 		bits = fsr & 0xf0000000;
853 		/* Isolate condition codes */
854 		bits >>= 28;
855 		/* Move fpu condition codes to cr[1] */
856 		tf->tf_cr &= ~(0x0f000000);
857 		tf->tf_cr |= (bits << 24);
858 		DPRINTF(FPE_INSN, ("fpu_execute: cr[1] <= %x\n", bits));
859 	}
860 
861 	if (setcr) {
862 		bits = fsr & FPSCR_FPCC;
863 		/* Isolate condition codes */
864 		bits <<= 16;
865 		/* Move fpu condition codes to cr[bf/4] */
866 		tf->tf_cr &= ~(0xf0000000>>bf);
867 		tf->tf_cr |= (bits >> bf);
868 		DPRINTF(FPE_INSN, ("fpu_execute: cr[%d] (cr=%x) <= %x\n", bf/4, tf->tf_cr, bits));
869 	}
870 
871 	((int *)&fs->fpscr)[1] = fsr;
872 	if (fsr & FPSCR_FEX)
873 		return(FPE);
874 	return (0);	/* success */
875 }
876