xref: /csrg-svn/sys/sparc/fpu/fpu_implode.c (revision 55113)
1*55113Storek /*
2*55113Storek  * Copyright (c) 1992 The Regents of the University of California.
3*55113Storek  * All rights reserved.
4*55113Storek  *
5*55113Storek  * This software was developed by the Computer Systems Engineering group
6*55113Storek  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
7*55113Storek  * contributed to Berkeley.
8*55113Storek  *
9*55113Storek  * %sccs.include.redist.c%
10*55113Storek  *
11*55113Storek  *	@(#)fpu_implode.c	7.1 (Berkeley) 07/13/92
12*55113Storek  *
13*55113Storek  * from: $Header: fpu_implode.c,v 1.4 92/06/17 05:41:33 torek Exp $
14*55113Storek  */
15*55113Storek 
16*55113Storek /*
17*55113Storek  * FPU subroutines: `implode' internal format numbers into the machine's
18*55113Storek  * `packed binary' format.
19*55113Storek  */
20*55113Storek 
21*55113Storek #include "sys/types.h"
22*55113Storek 
23*55113Storek #include "machine/ieee.h"
24*55113Storek #include "machine/instr.h"
25*55113Storek #include "machine/reg.h"
26*55113Storek 
27*55113Storek #include "fpu_arith.h"
28*55113Storek #include "fpu_emu.h"
29*55113Storek 
30*55113Storek /*
31*55113Storek  * Round a number (algorithm from Motorola MC68882 manual, modified for
32*55113Storek  * our internal format).  Set inexact exception if rounding is required.
33*55113Storek  * Return true iff we rounded up.
34*55113Storek  *
35*55113Storek  * After rounding, we discard the guard and round bits by shifting right
36*55113Storek  * 2 bits (a la fpu_shr(), but we do not bother with fp->fp_sticky).
37*55113Storek  * This saves effort later.
38*55113Storek  *
39*55113Storek  * Note that we may leave the value 2.0 in fp->fp_mant; it is the caller's
40*55113Storek  * responsibility to fix this if necessary.
41*55113Storek  */
42*55113Storek static int
43*55113Storek round(register struct fpemu *fe, register struct fpn *fp)
44*55113Storek {
45*55113Storek 	register u_int m0, m1, m2, m3;
46*55113Storek 	register int gr, s, ret;
47*55113Storek 
48*55113Storek 	m0 = fp->fp_mant[0];
49*55113Storek 	m1 = fp->fp_mant[1];
50*55113Storek 	m2 = fp->fp_mant[2];
51*55113Storek 	m3 = fp->fp_mant[3];
52*55113Storek 	gr = m3 & 3;
53*55113Storek 	s = fp->fp_sticky;
54*55113Storek 
55*55113Storek 	/* mant >>= FP_NG */
56*55113Storek 	m3 = (m3 >> FP_NG) | (m2 << (32 - FP_NG));
57*55113Storek 	m2 = (m2 >> FP_NG) | (m1 << (32 - FP_NG));
58*55113Storek 	m1 = (m1 >> FP_NG) | (m0 << (32 - FP_NG));
59*55113Storek 	m0 >>= FP_NG;
60*55113Storek 
61*55113Storek 	if ((gr | s) == 0)	/* result is exact: no rounding needed */
62*55113Storek 		goto rounddown;
63*55113Storek 
64*55113Storek 	fe->fe_cx |= FSR_NX;	/* inexact */
65*55113Storek 
66*55113Storek 	/* Go to rounddown to round down; break to round up. */
67*55113Storek 	switch ((fe->fe_fsr >> FSR_RD_SHIFT) & FSR_RD_MASK) {
68*55113Storek 
69*55113Storek 	case FSR_RD_RN:
70*55113Storek 	default:
71*55113Storek 		/*
72*55113Storek 		 * Round only if guard is set (gr & 2).  If guard is set,
73*55113Storek 		 * but round & sticky both clear, then we want to round
74*55113Storek 		 * but have a tie, so round to even, i.e., add 1 iff odd.
75*55113Storek 		 */
76*55113Storek 		if ((gr & 2) == 0)
77*55113Storek 			goto rounddown;
78*55113Storek 		if ((gr & 1) || fp->fp_sticky || (m3 & 1))
79*55113Storek 			break;
80*55113Storek 		goto rounddown;
81*55113Storek 
82*55113Storek 	case FSR_RD_RZ:
83*55113Storek 		/* Round towards zero, i.e., down. */
84*55113Storek 		goto rounddown;
85*55113Storek 
86*55113Storek 	case FSR_RD_RM:
87*55113Storek 		/* Round towards -Inf: up if negative, down if positive. */
88*55113Storek 		if (fp->fp_sign)
89*55113Storek 			break;
90*55113Storek 		goto rounddown;
91*55113Storek 
92*55113Storek 	case FSR_RD_RP:
93*55113Storek 		/* Round towards +Inf: up if positive, down otherwise. */
94*55113Storek 		if (!fp->fp_sign)
95*55113Storek 			break;
96*55113Storek 		goto rounddown;
97*55113Storek 	}
98*55113Storek 
99*55113Storek 	/* Bump low bit of mantissa, with carry. */
100*55113Storek #ifdef sparc /* ``cheating'' (left out FPU_DECL_CARRY; know this is faster) */
101*55113Storek 	FPU_ADDS(m3, m3, 1);
102*55113Storek 	FPU_ADDCS(m2, m2, 0);
103*55113Storek 	FPU_ADDCS(m1, m1, 0);
104*55113Storek 	FPU_ADDC(m0, m0, 0);
105*55113Storek #else
106*55113Storek 	if (++m3 == 0 && ++m2 == 0 && ++m1 == 0)
107*55113Storek 		m0++;
108*55113Storek #endif
109*55113Storek 	fp->fp_mant[0] = m0;
110*55113Storek 	fp->fp_mant[1] = m1;
111*55113Storek 	fp->fp_mant[2] = m2;
112*55113Storek 	fp->fp_mant[3] = m3;
113*55113Storek 	return (1);
114*55113Storek 
115*55113Storek rounddown:
116*55113Storek 	fp->fp_mant[0] = m0;
117*55113Storek 	fp->fp_mant[1] = m1;
118*55113Storek 	fp->fp_mant[2] = m2;
119*55113Storek 	fp->fp_mant[3] = m3;
120*55113Storek 	return (0);
121*55113Storek }
122*55113Storek 
123*55113Storek /*
124*55113Storek  * For overflow: return true if overflow is to go to +/-Inf, according
125*55113Storek  * to the sign of the overflowing result.  If false, overflow is to go
126*55113Storek  * to the largest magnitude value instead.
127*55113Storek  */
128*55113Storek static int
129*55113Storek toinf(struct fpemu *fe, int sign)
130*55113Storek {
131*55113Storek 	int inf;
132*55113Storek 
133*55113Storek 	/* look at rounding direction */
134*55113Storek 	switch ((fe->fe_fsr >> FSR_RD_SHIFT) & FSR_RD_MASK) {
135*55113Storek 
136*55113Storek 	default:
137*55113Storek 	case FSR_RD_RN:		/* the nearest value is always Inf */
138*55113Storek 		inf = 1;
139*55113Storek 		break;
140*55113Storek 
141*55113Storek 	case FSR_RD_RZ:		/* toward 0 => never towards Inf */
142*55113Storek 		inf = 0;
143*55113Storek 		break;
144*55113Storek 
145*55113Storek 	case FSR_RD_RP:		/* toward +Inf iff positive */
146*55113Storek 		inf = sign == 0;
147*55113Storek 		break;
148*55113Storek 
149*55113Storek 	case FSR_RD_RM:		/* toward -Inf iff negative */
150*55113Storek 		inf = sign;
151*55113Storek 		break;
152*55113Storek 	}
153*55113Storek 	return (inf);
154*55113Storek }
155*55113Storek 
156*55113Storek /*
157*55113Storek  * fpn -> int (int value returned as return value).
158*55113Storek  *
159*55113Storek  * N.B.: this conversion always rounds towards zero (this is a peculiarity
160*55113Storek  * of the SPARC instruction set).
161*55113Storek  */
162*55113Storek u_int
163*55113Storek fpu_ftoi(fe, fp)
164*55113Storek 	struct fpemu *fe;
165*55113Storek 	register struct fpn *fp;
166*55113Storek {
167*55113Storek 	register u_int i;
168*55113Storek 	register int sign, exp;
169*55113Storek 
170*55113Storek 	sign = fp->fp_sign;
171*55113Storek 	switch (fp->fp_class) {
172*55113Storek 
173*55113Storek 	case FPC_ZERO:
174*55113Storek 		return (0);
175*55113Storek 
176*55113Storek 	case FPC_NUM:
177*55113Storek 		/*
178*55113Storek 		 * If exp >= 2^32, overflow.  Otherwise shift value right
179*55113Storek 		 * into last mantissa word (this will not exceed 0xffffffff),
180*55113Storek 		 * shifting any guard and round bits out into the sticky
181*55113Storek 		 * bit.  Then ``round'' towards zero, i.e., just set an
182*55113Storek 		 * inexact exception if sticky is set (see round()).
183*55113Storek 		 * If the result is > 0x80000000, or is positive and equals
184*55113Storek 		 * 0x80000000, overflow; otherwise the last fraction word
185*55113Storek 		 * is the result.
186*55113Storek 		 */
187*55113Storek 		if ((exp = fp->fp_exp) >= 32)
188*55113Storek 			break;
189*55113Storek 		/* NB: the following includes exp < 0 cases */
190*55113Storek 		if (fpu_shr(fp, FP_NMANT - 1 - exp) != 0)
191*55113Storek 			fe->fe_cx |= FSR_NX;
192*55113Storek 		i = fp->fp_mant[3];
193*55113Storek 		if (i >= ((u_int)0x80000000 + sign))
194*55113Storek 			break;
195*55113Storek 		return (sign ? -i : i);
196*55113Storek 
197*55113Storek 	default:		/* Inf, qNaN, sNaN */
198*55113Storek 		break;
199*55113Storek 	}
200*55113Storek 	/* overflow: replace any inexact exception with invalid */
201*55113Storek 	fe->fe_cx = (fe->fe_cx & ~FSR_NX) | FSR_NV;
202*55113Storek 	return (0x7fffffff + sign);
203*55113Storek }
204*55113Storek 
205*55113Storek /*
206*55113Storek  * fpn -> single (32 bit single returned as return value).
207*55113Storek  * We assume <= 29 bits in a single-precision fraction (1.f part).
208*55113Storek  */
209*55113Storek u_int
210*55113Storek fpu_ftos(fe, fp)
211*55113Storek 	struct fpemu *fe;
212*55113Storek 	register struct fpn *fp;
213*55113Storek {
214*55113Storek 	register u_int sign = fp->fp_sign << 31;
215*55113Storek 	register int exp;
216*55113Storek 
217*55113Storek #define	SNG_EXP(e)	((e) << SNG_FRACBITS)	/* makes e an exponent */
218*55113Storek #define	SNG_MASK	(SNG_EXP(1) - 1)	/* mask for fraction */
219*55113Storek 
220*55113Storek 	/* Take care of non-numbers first. */
221*55113Storek 	if (ISNAN(fp)) {
222*55113Storek 		/*
223*55113Storek 		 * Preserve upper bits of NaN, per SPARC V8 appendix N.
224*55113Storek 		 * Note that fp->fp_mant[0] has the quiet bit set,
225*55113Storek 		 * even if it is classified as a signalling NaN.
226*55113Storek 		 */
227*55113Storek 		(void) fpu_shr(fp, FP_NMANT - 1 - SNG_FRACBITS);
228*55113Storek 		exp = SNG_EXP_INFNAN;
229*55113Storek 		goto done;
230*55113Storek 	}
231*55113Storek 	if (ISINF(fp))
232*55113Storek 		return (sign | SNG_EXP(SNG_EXP_INFNAN));
233*55113Storek 	if (ISZERO(fp))
234*55113Storek 		return (sign);
235*55113Storek 
236*55113Storek 	/*
237*55113Storek 	 * Normals (including subnormals).  Drop all the fraction bits
238*55113Storek 	 * (including the explicit ``implied'' 1 bit) down into the
239*55113Storek 	 * single-precision range.  If the number is subnormal, move
240*55113Storek 	 * the ``implied'' 1 into the explicit range as well, and shift
241*55113Storek 	 * right to introduce leading zeroes.  Rounding then acts
242*55113Storek 	 * differently for normals and subnormals: the largest subnormal
243*55113Storek 	 * may round to the smallest normal (1.0 x 2^minexp), or may
244*55113Storek 	 * remain subnormal.  In the latter case, signal an underflow
245*55113Storek 	 * if the result was inexact or if underflow traps are enabled.
246*55113Storek 	 *
247*55113Storek 	 * Rounding a normal, on the other hand, always produces another
248*55113Storek 	 * normal (although either way the result might be too big for
249*55113Storek 	 * single precision, and cause an overflow).  If rounding a
250*55113Storek 	 * normal produces 2.0 in the fraction, we need not adjust that
251*55113Storek 	 * fraction at all, since both 1.0 and 2.0 are zero under the
252*55113Storek 	 * fraction mask.
253*55113Storek 	 *
254*55113Storek 	 * Note that the guard and round bits vanish from the number after
255*55113Storek 	 * rounding.
256*55113Storek 	 */
257*55113Storek 	if ((exp = fp->fp_exp + SNG_EXP_BIAS) <= 0) {	/* subnormal */
258*55113Storek 		/* -NG for g,r; -SNG_FRACBITS-exp for fraction */
259*55113Storek 		(void) fpu_shr(fp, FP_NMANT - FP_NG - SNG_FRACBITS - exp);
260*55113Storek 		if (round(fe, fp) && fp->fp_mant[3] == SNG_EXP(1))
261*55113Storek 			return (sign | SNG_EXP(1) | 0);
262*55113Storek 		if ((fe->fe_cx & FSR_NX) ||
263*55113Storek 		    (fe->fe_fsr & (FSR_UF << FSR_TEM_SHIFT)))
264*55113Storek 			fe->fe_cx |= FSR_UF;
265*55113Storek 		return (sign | SNG_EXP(0) | fp->fp_mant[3]);
266*55113Storek 	}
267*55113Storek 	/* -FP_NG for g,r; -1 for implied 1; -SNG_FRACBITS for fraction */
268*55113Storek 	(void) fpu_shr(fp, FP_NMANT - FP_NG - 1 - SNG_FRACBITS);
269*55113Storek #ifdef DIAGNOSTIC
270*55113Storek 	if ((fp->fp_mant[3] & SNG_EXP(1 << FP_NG)) == 0)
271*55113Storek 		panic("fpu_ftos");
272*55113Storek #endif
273*55113Storek 	if (round(fe, fp) && fp->fp_mant[3] == SNG_EXP(2))
274*55113Storek 		exp++;
275*55113Storek 	if (exp >= SNG_EXP_INFNAN) {
276*55113Storek 		/* overflow to inf or to max single */
277*55113Storek 		fe->fe_cx |= FSR_OF | FSR_NX;
278*55113Storek 		if (toinf(fe, sign))
279*55113Storek 			return (sign | SNG_EXP(SNG_EXP_INFNAN));
280*55113Storek 		return (sign | SNG_EXP(SNG_EXP_INFNAN - 1) | SNG_MASK);
281*55113Storek 	}
282*55113Storek done:
283*55113Storek 	/* phew, made it */
284*55113Storek 	return (sign | SNG_EXP(exp) | (fp->fp_mant[3] & SNG_MASK));
285*55113Storek }
286*55113Storek 
287*55113Storek /*
288*55113Storek  * fpn -> double (32 bit high-order result returned; 32-bit low order result
289*55113Storek  * left in res[1]).  Assumes <= 61 bits in double precision fraction.
290*55113Storek  *
291*55113Storek  * This code mimics fpu_ftos; see it for comments.
292*55113Storek  */
293*55113Storek u_int
294*55113Storek fpu_ftod(fe, fp, res)
295*55113Storek 	struct fpemu *fe;
296*55113Storek 	register struct fpn *fp;
297*55113Storek 	u_int *res;
298*55113Storek {
299*55113Storek 	register u_int sign = fp->fp_sign << 31;
300*55113Storek 	register int exp;
301*55113Storek 
302*55113Storek #define	DBL_EXP(e)	((e) << (DBL_FRACBITS & 31))
303*55113Storek #define	DBL_MASK	(DBL_EXP(1) - 1)
304*55113Storek 
305*55113Storek 	if (ISNAN(fp)) {
306*55113Storek 		(void) fpu_shr(fp, FP_NMANT - 1 - DBL_FRACBITS);
307*55113Storek 		exp = DBL_EXP_INFNAN;
308*55113Storek 		goto done;
309*55113Storek 	}
310*55113Storek 	if (ISINF(fp)) {
311*55113Storek 		sign |= DBL_EXP(DBL_EXP_INFNAN);
312*55113Storek 		goto zero;
313*55113Storek 	}
314*55113Storek 	if (ISZERO(fp)) {
315*55113Storek zero:		res[1] = 0;
316*55113Storek 		return (sign);
317*55113Storek 	}
318*55113Storek 
319*55113Storek 	if ((exp = fp->fp_exp + DBL_EXP_BIAS) <= 0) {
320*55113Storek 		(void) fpu_shr(fp, FP_NMANT - FP_NG - DBL_FRACBITS - exp);
321*55113Storek 		if (round(fe, fp) && fp->fp_mant[2] == DBL_EXP(1)) {
322*55113Storek 			res[1] = 0;
323*55113Storek 			return (sign | DBL_EXP(1) | 0);
324*55113Storek 		}
325*55113Storek 		if ((fe->fe_cx & FSR_NX) ||
326*55113Storek 		    (fe->fe_fsr & (FSR_UF << FSR_TEM_SHIFT)))
327*55113Storek 			fe->fe_cx |= FSR_UF;
328*55113Storek 		exp = 0;
329*55113Storek 		goto done;
330*55113Storek 	}
331*55113Storek 	(void) fpu_shr(fp, FP_NMANT - FP_NG - 1 - DBL_FRACBITS);
332*55113Storek 	if (round(fe, fp) && fp->fp_mant[2] == DBL_EXP(2))
333*55113Storek 		exp++;
334*55113Storek 	if (exp >= DBL_EXP_INFNAN) {
335*55113Storek 		fe->fe_cx |= FSR_OF | FSR_NX;
336*55113Storek 		if (toinf(fe, sign)) {
337*55113Storek 			res[1] = 0;
338*55113Storek 			return (sign | DBL_EXP(DBL_EXP_INFNAN) | 0);
339*55113Storek 		}
340*55113Storek 		res[1] = ~0;
341*55113Storek 		return (sign | DBL_EXP(DBL_EXP_INFNAN) | DBL_MASK);
342*55113Storek 	}
343*55113Storek done:
344*55113Storek 	res[1] = fp->fp_mant[3];
345*55113Storek 	return (sign | DBL_EXP(exp) | (fp->fp_mant[2] & DBL_MASK));
346*55113Storek }
347*55113Storek 
348*55113Storek /*
349*55113Storek  * fpn -> extended (32 bit high-order result returned; low-order fraction
350*55113Storek  * words left in res[1]..res[3]).  Like ftod, which is like ftos ... but
351*55113Storek  * our internal format *is* extended precision, plus 2 bits for guard/round,
352*55113Storek  * so we can avoid a small bit of work.
353*55113Storek  */
354*55113Storek u_int
355*55113Storek fpu_ftox(fe, fp, res)
356*55113Storek 	struct fpemu *fe;
357*55113Storek 	register struct fpn *fp;
358*55113Storek 	u_int *res;
359*55113Storek {
360*55113Storek 	register u_int sign = fp->fp_sign << 31;
361*55113Storek 	register int exp;
362*55113Storek 
363*55113Storek #define	EXT_EXP(e)	((e) << (EXT_FRACBITS & 31))
364*55113Storek #define	EXT_MASK	(EXT_EXP(1) - 1)
365*55113Storek 
366*55113Storek 	if (ISNAN(fp)) {
367*55113Storek 		(void) fpu_shr(fp, 2);	/* since we are not rounding */
368*55113Storek 		exp = EXT_EXP_INFNAN;
369*55113Storek 		goto done;
370*55113Storek 	}
371*55113Storek 	if (ISINF(fp)) {
372*55113Storek 		sign |= EXT_EXP(EXT_EXP_INFNAN);
373*55113Storek 		goto zero;
374*55113Storek 	}
375*55113Storek 	if (ISZERO(fp)) {
376*55113Storek zero:		res[1] = res[2] = res[3] = 0;
377*55113Storek 		return (sign);
378*55113Storek 	}
379*55113Storek 
380*55113Storek 	if ((exp = fp->fp_exp + EXT_EXP_BIAS) <= 0) {
381*55113Storek 		(void) fpu_shr(fp, FP_NMANT - FP_NG - EXT_FRACBITS - exp);
382*55113Storek 		if (round(fe, fp) && fp->fp_mant[0] == EXT_EXP(1)) {
383*55113Storek 			res[1] = res[2] = res[3] = 0;
384*55113Storek 			return (sign | EXT_EXP(1) | 0);
385*55113Storek 		}
386*55113Storek 		if ((fe->fe_cx & FSR_NX) ||
387*55113Storek 		    (fe->fe_fsr & (FSR_UF << FSR_TEM_SHIFT)))
388*55113Storek 			fe->fe_cx |= FSR_UF;
389*55113Storek 		exp = 0;
390*55113Storek 		goto done;
391*55113Storek 	}
392*55113Storek 	/* Since internal == extended, no need to shift here. */
393*55113Storek 	if (round(fe, fp) && fp->fp_mant[0] == EXT_EXP(2))
394*55113Storek 		exp++;
395*55113Storek 	if (exp >= EXT_EXP_INFNAN) {
396*55113Storek 		fe->fe_cx |= FSR_OF | FSR_NX;
397*55113Storek 		if (toinf(fe, sign)) {
398*55113Storek 			res[1] = res[2] = res[3] = 0;
399*55113Storek 			return (sign | EXT_EXP(EXT_EXP_INFNAN) | 0);
400*55113Storek 		}
401*55113Storek 		res[1] = res[2] = res[3] = ~0;
402*55113Storek 		return (sign | EXT_EXP(EXT_EXP_INFNAN) | EXT_MASK);
403*55113Storek 	}
404*55113Storek done:
405*55113Storek 	res[1] = fp->fp_mant[1];
406*55113Storek 	res[2] = fp->fp_mant[2];
407*55113Storek 	res[3] = fp->fp_mant[3];
408*55113Storek 	return (sign | EXT_EXP(exp) | (fp->fp_mant[0] & EXT_MASK));
409*55113Storek }
410*55113Storek 
411*55113Storek /*
412*55113Storek  * Implode an fpn, writing the result into the given space.
413*55113Storek  */
414*55113Storek void
415*55113Storek fpu_implode(fe, fp, type, space)
416*55113Storek 	struct fpemu *fe;
417*55113Storek 	register struct fpn *fp;
418*55113Storek 	int type;
419*55113Storek 	register u_int *space;
420*55113Storek {
421*55113Storek 
422*55113Storek 	switch (type) {
423*55113Storek 
424*55113Storek 	case FTYPE_INT:
425*55113Storek 		space[0] = fpu_ftoi(fe, fp);
426*55113Storek 		break;
427*55113Storek 
428*55113Storek 	case FTYPE_SNG:
429*55113Storek 		space[0] = fpu_ftos(fe, fp);
430*55113Storek 		break;
431*55113Storek 
432*55113Storek 	case FTYPE_DBL:
433*55113Storek 		space[0] = fpu_ftod(fe, fp, space);
434*55113Storek 		break;
435*55113Storek 
436*55113Storek 	case FTYPE_EXT:
437*55113Storek 		/* funky rounding precision options ?? */
438*55113Storek 		space[0] = fpu_ftox(fe, fp, space);
439*55113Storek 		break;
440*55113Storek 
441*55113Storek 	default:
442*55113Storek 		panic("fpu_implode");
443*55113Storek 	}
444*55113Storek }
445