1*55113Storek /* 2*55113Storek * Copyright (c) 1992 The Regents of the University of California. 3*55113Storek * All rights reserved. 4*55113Storek * 5*55113Storek * This software was developed by the Computer Systems Engineering group 6*55113Storek * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and 7*55113Storek * contributed to Berkeley. 8*55113Storek * 9*55113Storek * %sccs.include.redist.c% 10*55113Storek * 11*55113Storek * @(#)fpu_implode.c 7.1 (Berkeley) 07/13/92 12*55113Storek * 13*55113Storek * from: $Header: fpu_implode.c,v 1.4 92/06/17 05:41:33 torek Exp $ 14*55113Storek */ 15*55113Storek 16*55113Storek /* 17*55113Storek * FPU subroutines: `implode' internal format numbers into the machine's 18*55113Storek * `packed binary' format. 19*55113Storek */ 20*55113Storek 21*55113Storek #include "sys/types.h" 22*55113Storek 23*55113Storek #include "machine/ieee.h" 24*55113Storek #include "machine/instr.h" 25*55113Storek #include "machine/reg.h" 26*55113Storek 27*55113Storek #include "fpu_arith.h" 28*55113Storek #include "fpu_emu.h" 29*55113Storek 30*55113Storek /* 31*55113Storek * Round a number (algorithm from Motorola MC68882 manual, modified for 32*55113Storek * our internal format). Set inexact exception if rounding is required. 33*55113Storek * Return true iff we rounded up. 34*55113Storek * 35*55113Storek * After rounding, we discard the guard and round bits by shifting right 36*55113Storek * 2 bits (a la fpu_shr(), but we do not bother with fp->fp_sticky). 37*55113Storek * This saves effort later. 38*55113Storek * 39*55113Storek * Note that we may leave the value 2.0 in fp->fp_mant; it is the caller's 40*55113Storek * responsibility to fix this if necessary. 41*55113Storek */ 42*55113Storek static int 43*55113Storek round(register struct fpemu *fe, register struct fpn *fp) 44*55113Storek { 45*55113Storek register u_int m0, m1, m2, m3; 46*55113Storek register int gr, s, ret; 47*55113Storek 48*55113Storek m0 = fp->fp_mant[0]; 49*55113Storek m1 = fp->fp_mant[1]; 50*55113Storek m2 = fp->fp_mant[2]; 51*55113Storek m3 = fp->fp_mant[3]; 52*55113Storek gr = m3 & 3; 53*55113Storek s = fp->fp_sticky; 54*55113Storek 55*55113Storek /* mant >>= FP_NG */ 56*55113Storek m3 = (m3 >> FP_NG) | (m2 << (32 - FP_NG)); 57*55113Storek m2 = (m2 >> FP_NG) | (m1 << (32 - FP_NG)); 58*55113Storek m1 = (m1 >> FP_NG) | (m0 << (32 - FP_NG)); 59*55113Storek m0 >>= FP_NG; 60*55113Storek 61*55113Storek if ((gr | s) == 0) /* result is exact: no rounding needed */ 62*55113Storek goto rounddown; 63*55113Storek 64*55113Storek fe->fe_cx |= FSR_NX; /* inexact */ 65*55113Storek 66*55113Storek /* Go to rounddown to round down; break to round up. */ 67*55113Storek switch ((fe->fe_fsr >> FSR_RD_SHIFT) & FSR_RD_MASK) { 68*55113Storek 69*55113Storek case FSR_RD_RN: 70*55113Storek default: 71*55113Storek /* 72*55113Storek * Round only if guard is set (gr & 2). If guard is set, 73*55113Storek * but round & sticky both clear, then we want to round 74*55113Storek * but have a tie, so round to even, i.e., add 1 iff odd. 75*55113Storek */ 76*55113Storek if ((gr & 2) == 0) 77*55113Storek goto rounddown; 78*55113Storek if ((gr & 1) || fp->fp_sticky || (m3 & 1)) 79*55113Storek break; 80*55113Storek goto rounddown; 81*55113Storek 82*55113Storek case FSR_RD_RZ: 83*55113Storek /* Round towards zero, i.e., down. */ 84*55113Storek goto rounddown; 85*55113Storek 86*55113Storek case FSR_RD_RM: 87*55113Storek /* Round towards -Inf: up if negative, down if positive. */ 88*55113Storek if (fp->fp_sign) 89*55113Storek break; 90*55113Storek goto rounddown; 91*55113Storek 92*55113Storek case FSR_RD_RP: 93*55113Storek /* Round towards +Inf: up if positive, down otherwise. */ 94*55113Storek if (!fp->fp_sign) 95*55113Storek break; 96*55113Storek goto rounddown; 97*55113Storek } 98*55113Storek 99*55113Storek /* Bump low bit of mantissa, with carry. */ 100*55113Storek #ifdef sparc /* ``cheating'' (left out FPU_DECL_CARRY; know this is faster) */ 101*55113Storek FPU_ADDS(m3, m3, 1); 102*55113Storek FPU_ADDCS(m2, m2, 0); 103*55113Storek FPU_ADDCS(m1, m1, 0); 104*55113Storek FPU_ADDC(m0, m0, 0); 105*55113Storek #else 106*55113Storek if (++m3 == 0 && ++m2 == 0 && ++m1 == 0) 107*55113Storek m0++; 108*55113Storek #endif 109*55113Storek fp->fp_mant[0] = m0; 110*55113Storek fp->fp_mant[1] = m1; 111*55113Storek fp->fp_mant[2] = m2; 112*55113Storek fp->fp_mant[3] = m3; 113*55113Storek return (1); 114*55113Storek 115*55113Storek rounddown: 116*55113Storek fp->fp_mant[0] = m0; 117*55113Storek fp->fp_mant[1] = m1; 118*55113Storek fp->fp_mant[2] = m2; 119*55113Storek fp->fp_mant[3] = m3; 120*55113Storek return (0); 121*55113Storek } 122*55113Storek 123*55113Storek /* 124*55113Storek * For overflow: return true if overflow is to go to +/-Inf, according 125*55113Storek * to the sign of the overflowing result. If false, overflow is to go 126*55113Storek * to the largest magnitude value instead. 127*55113Storek */ 128*55113Storek static int 129*55113Storek toinf(struct fpemu *fe, int sign) 130*55113Storek { 131*55113Storek int inf; 132*55113Storek 133*55113Storek /* look at rounding direction */ 134*55113Storek switch ((fe->fe_fsr >> FSR_RD_SHIFT) & FSR_RD_MASK) { 135*55113Storek 136*55113Storek default: 137*55113Storek case FSR_RD_RN: /* the nearest value is always Inf */ 138*55113Storek inf = 1; 139*55113Storek break; 140*55113Storek 141*55113Storek case FSR_RD_RZ: /* toward 0 => never towards Inf */ 142*55113Storek inf = 0; 143*55113Storek break; 144*55113Storek 145*55113Storek case FSR_RD_RP: /* toward +Inf iff positive */ 146*55113Storek inf = sign == 0; 147*55113Storek break; 148*55113Storek 149*55113Storek case FSR_RD_RM: /* toward -Inf iff negative */ 150*55113Storek inf = sign; 151*55113Storek break; 152*55113Storek } 153*55113Storek return (inf); 154*55113Storek } 155*55113Storek 156*55113Storek /* 157*55113Storek * fpn -> int (int value returned as return value). 158*55113Storek * 159*55113Storek * N.B.: this conversion always rounds towards zero (this is a peculiarity 160*55113Storek * of the SPARC instruction set). 161*55113Storek */ 162*55113Storek u_int 163*55113Storek fpu_ftoi(fe, fp) 164*55113Storek struct fpemu *fe; 165*55113Storek register struct fpn *fp; 166*55113Storek { 167*55113Storek register u_int i; 168*55113Storek register int sign, exp; 169*55113Storek 170*55113Storek sign = fp->fp_sign; 171*55113Storek switch (fp->fp_class) { 172*55113Storek 173*55113Storek case FPC_ZERO: 174*55113Storek return (0); 175*55113Storek 176*55113Storek case FPC_NUM: 177*55113Storek /* 178*55113Storek * If exp >= 2^32, overflow. Otherwise shift value right 179*55113Storek * into last mantissa word (this will not exceed 0xffffffff), 180*55113Storek * shifting any guard and round bits out into the sticky 181*55113Storek * bit. Then ``round'' towards zero, i.e., just set an 182*55113Storek * inexact exception if sticky is set (see round()). 183*55113Storek * If the result is > 0x80000000, or is positive and equals 184*55113Storek * 0x80000000, overflow; otherwise the last fraction word 185*55113Storek * is the result. 186*55113Storek */ 187*55113Storek if ((exp = fp->fp_exp) >= 32) 188*55113Storek break; 189*55113Storek /* NB: the following includes exp < 0 cases */ 190*55113Storek if (fpu_shr(fp, FP_NMANT - 1 - exp) != 0) 191*55113Storek fe->fe_cx |= FSR_NX; 192*55113Storek i = fp->fp_mant[3]; 193*55113Storek if (i >= ((u_int)0x80000000 + sign)) 194*55113Storek break; 195*55113Storek return (sign ? -i : i); 196*55113Storek 197*55113Storek default: /* Inf, qNaN, sNaN */ 198*55113Storek break; 199*55113Storek } 200*55113Storek /* overflow: replace any inexact exception with invalid */ 201*55113Storek fe->fe_cx = (fe->fe_cx & ~FSR_NX) | FSR_NV; 202*55113Storek return (0x7fffffff + sign); 203*55113Storek } 204*55113Storek 205*55113Storek /* 206*55113Storek * fpn -> single (32 bit single returned as return value). 207*55113Storek * We assume <= 29 bits in a single-precision fraction (1.f part). 208*55113Storek */ 209*55113Storek u_int 210*55113Storek fpu_ftos(fe, fp) 211*55113Storek struct fpemu *fe; 212*55113Storek register struct fpn *fp; 213*55113Storek { 214*55113Storek register u_int sign = fp->fp_sign << 31; 215*55113Storek register int exp; 216*55113Storek 217*55113Storek #define SNG_EXP(e) ((e) << SNG_FRACBITS) /* makes e an exponent */ 218*55113Storek #define SNG_MASK (SNG_EXP(1) - 1) /* mask for fraction */ 219*55113Storek 220*55113Storek /* Take care of non-numbers first. */ 221*55113Storek if (ISNAN(fp)) { 222*55113Storek /* 223*55113Storek * Preserve upper bits of NaN, per SPARC V8 appendix N. 224*55113Storek * Note that fp->fp_mant[0] has the quiet bit set, 225*55113Storek * even if it is classified as a signalling NaN. 226*55113Storek */ 227*55113Storek (void) fpu_shr(fp, FP_NMANT - 1 - SNG_FRACBITS); 228*55113Storek exp = SNG_EXP_INFNAN; 229*55113Storek goto done; 230*55113Storek } 231*55113Storek if (ISINF(fp)) 232*55113Storek return (sign | SNG_EXP(SNG_EXP_INFNAN)); 233*55113Storek if (ISZERO(fp)) 234*55113Storek return (sign); 235*55113Storek 236*55113Storek /* 237*55113Storek * Normals (including subnormals). Drop all the fraction bits 238*55113Storek * (including the explicit ``implied'' 1 bit) down into the 239*55113Storek * single-precision range. If the number is subnormal, move 240*55113Storek * the ``implied'' 1 into the explicit range as well, and shift 241*55113Storek * right to introduce leading zeroes. Rounding then acts 242*55113Storek * differently for normals and subnormals: the largest subnormal 243*55113Storek * may round to the smallest normal (1.0 x 2^minexp), or may 244*55113Storek * remain subnormal. In the latter case, signal an underflow 245*55113Storek * if the result was inexact or if underflow traps are enabled. 246*55113Storek * 247*55113Storek * Rounding a normal, on the other hand, always produces another 248*55113Storek * normal (although either way the result might be too big for 249*55113Storek * single precision, and cause an overflow). If rounding a 250*55113Storek * normal produces 2.0 in the fraction, we need not adjust that 251*55113Storek * fraction at all, since both 1.0 and 2.0 are zero under the 252*55113Storek * fraction mask. 253*55113Storek * 254*55113Storek * Note that the guard and round bits vanish from the number after 255*55113Storek * rounding. 256*55113Storek */ 257*55113Storek if ((exp = fp->fp_exp + SNG_EXP_BIAS) <= 0) { /* subnormal */ 258*55113Storek /* -NG for g,r; -SNG_FRACBITS-exp for fraction */ 259*55113Storek (void) fpu_shr(fp, FP_NMANT - FP_NG - SNG_FRACBITS - exp); 260*55113Storek if (round(fe, fp) && fp->fp_mant[3] == SNG_EXP(1)) 261*55113Storek return (sign | SNG_EXP(1) | 0); 262*55113Storek if ((fe->fe_cx & FSR_NX) || 263*55113Storek (fe->fe_fsr & (FSR_UF << FSR_TEM_SHIFT))) 264*55113Storek fe->fe_cx |= FSR_UF; 265*55113Storek return (sign | SNG_EXP(0) | fp->fp_mant[3]); 266*55113Storek } 267*55113Storek /* -FP_NG for g,r; -1 for implied 1; -SNG_FRACBITS for fraction */ 268*55113Storek (void) fpu_shr(fp, FP_NMANT - FP_NG - 1 - SNG_FRACBITS); 269*55113Storek #ifdef DIAGNOSTIC 270*55113Storek if ((fp->fp_mant[3] & SNG_EXP(1 << FP_NG)) == 0) 271*55113Storek panic("fpu_ftos"); 272*55113Storek #endif 273*55113Storek if (round(fe, fp) && fp->fp_mant[3] == SNG_EXP(2)) 274*55113Storek exp++; 275*55113Storek if (exp >= SNG_EXP_INFNAN) { 276*55113Storek /* overflow to inf or to max single */ 277*55113Storek fe->fe_cx |= FSR_OF | FSR_NX; 278*55113Storek if (toinf(fe, sign)) 279*55113Storek return (sign | SNG_EXP(SNG_EXP_INFNAN)); 280*55113Storek return (sign | SNG_EXP(SNG_EXP_INFNAN - 1) | SNG_MASK); 281*55113Storek } 282*55113Storek done: 283*55113Storek /* phew, made it */ 284*55113Storek return (sign | SNG_EXP(exp) | (fp->fp_mant[3] & SNG_MASK)); 285*55113Storek } 286*55113Storek 287*55113Storek /* 288*55113Storek * fpn -> double (32 bit high-order result returned; 32-bit low order result 289*55113Storek * left in res[1]). Assumes <= 61 bits in double precision fraction. 290*55113Storek * 291*55113Storek * This code mimics fpu_ftos; see it for comments. 292*55113Storek */ 293*55113Storek u_int 294*55113Storek fpu_ftod(fe, fp, res) 295*55113Storek struct fpemu *fe; 296*55113Storek register struct fpn *fp; 297*55113Storek u_int *res; 298*55113Storek { 299*55113Storek register u_int sign = fp->fp_sign << 31; 300*55113Storek register int exp; 301*55113Storek 302*55113Storek #define DBL_EXP(e) ((e) << (DBL_FRACBITS & 31)) 303*55113Storek #define DBL_MASK (DBL_EXP(1) - 1) 304*55113Storek 305*55113Storek if (ISNAN(fp)) { 306*55113Storek (void) fpu_shr(fp, FP_NMANT - 1 - DBL_FRACBITS); 307*55113Storek exp = DBL_EXP_INFNAN; 308*55113Storek goto done; 309*55113Storek } 310*55113Storek if (ISINF(fp)) { 311*55113Storek sign |= DBL_EXP(DBL_EXP_INFNAN); 312*55113Storek goto zero; 313*55113Storek } 314*55113Storek if (ISZERO(fp)) { 315*55113Storek zero: res[1] = 0; 316*55113Storek return (sign); 317*55113Storek } 318*55113Storek 319*55113Storek if ((exp = fp->fp_exp + DBL_EXP_BIAS) <= 0) { 320*55113Storek (void) fpu_shr(fp, FP_NMANT - FP_NG - DBL_FRACBITS - exp); 321*55113Storek if (round(fe, fp) && fp->fp_mant[2] == DBL_EXP(1)) { 322*55113Storek res[1] = 0; 323*55113Storek return (sign | DBL_EXP(1) | 0); 324*55113Storek } 325*55113Storek if ((fe->fe_cx & FSR_NX) || 326*55113Storek (fe->fe_fsr & (FSR_UF << FSR_TEM_SHIFT))) 327*55113Storek fe->fe_cx |= FSR_UF; 328*55113Storek exp = 0; 329*55113Storek goto done; 330*55113Storek } 331*55113Storek (void) fpu_shr(fp, FP_NMANT - FP_NG - 1 - DBL_FRACBITS); 332*55113Storek if (round(fe, fp) && fp->fp_mant[2] == DBL_EXP(2)) 333*55113Storek exp++; 334*55113Storek if (exp >= DBL_EXP_INFNAN) { 335*55113Storek fe->fe_cx |= FSR_OF | FSR_NX; 336*55113Storek if (toinf(fe, sign)) { 337*55113Storek res[1] = 0; 338*55113Storek return (sign | DBL_EXP(DBL_EXP_INFNAN) | 0); 339*55113Storek } 340*55113Storek res[1] = ~0; 341*55113Storek return (sign | DBL_EXP(DBL_EXP_INFNAN) | DBL_MASK); 342*55113Storek } 343*55113Storek done: 344*55113Storek res[1] = fp->fp_mant[3]; 345*55113Storek return (sign | DBL_EXP(exp) | (fp->fp_mant[2] & DBL_MASK)); 346*55113Storek } 347*55113Storek 348*55113Storek /* 349*55113Storek * fpn -> extended (32 bit high-order result returned; low-order fraction 350*55113Storek * words left in res[1]..res[3]). Like ftod, which is like ftos ... but 351*55113Storek * our internal format *is* extended precision, plus 2 bits for guard/round, 352*55113Storek * so we can avoid a small bit of work. 353*55113Storek */ 354*55113Storek u_int 355*55113Storek fpu_ftox(fe, fp, res) 356*55113Storek struct fpemu *fe; 357*55113Storek register struct fpn *fp; 358*55113Storek u_int *res; 359*55113Storek { 360*55113Storek register u_int sign = fp->fp_sign << 31; 361*55113Storek register int exp; 362*55113Storek 363*55113Storek #define EXT_EXP(e) ((e) << (EXT_FRACBITS & 31)) 364*55113Storek #define EXT_MASK (EXT_EXP(1) - 1) 365*55113Storek 366*55113Storek if (ISNAN(fp)) { 367*55113Storek (void) fpu_shr(fp, 2); /* since we are not rounding */ 368*55113Storek exp = EXT_EXP_INFNAN; 369*55113Storek goto done; 370*55113Storek } 371*55113Storek if (ISINF(fp)) { 372*55113Storek sign |= EXT_EXP(EXT_EXP_INFNAN); 373*55113Storek goto zero; 374*55113Storek } 375*55113Storek if (ISZERO(fp)) { 376*55113Storek zero: res[1] = res[2] = res[3] = 0; 377*55113Storek return (sign); 378*55113Storek } 379*55113Storek 380*55113Storek if ((exp = fp->fp_exp + EXT_EXP_BIAS) <= 0) { 381*55113Storek (void) fpu_shr(fp, FP_NMANT - FP_NG - EXT_FRACBITS - exp); 382*55113Storek if (round(fe, fp) && fp->fp_mant[0] == EXT_EXP(1)) { 383*55113Storek res[1] = res[2] = res[3] = 0; 384*55113Storek return (sign | EXT_EXP(1) | 0); 385*55113Storek } 386*55113Storek if ((fe->fe_cx & FSR_NX) || 387*55113Storek (fe->fe_fsr & (FSR_UF << FSR_TEM_SHIFT))) 388*55113Storek fe->fe_cx |= FSR_UF; 389*55113Storek exp = 0; 390*55113Storek goto done; 391*55113Storek } 392*55113Storek /* Since internal == extended, no need to shift here. */ 393*55113Storek if (round(fe, fp) && fp->fp_mant[0] == EXT_EXP(2)) 394*55113Storek exp++; 395*55113Storek if (exp >= EXT_EXP_INFNAN) { 396*55113Storek fe->fe_cx |= FSR_OF | FSR_NX; 397*55113Storek if (toinf(fe, sign)) { 398*55113Storek res[1] = res[2] = res[3] = 0; 399*55113Storek return (sign | EXT_EXP(EXT_EXP_INFNAN) | 0); 400*55113Storek } 401*55113Storek res[1] = res[2] = res[3] = ~0; 402*55113Storek return (sign | EXT_EXP(EXT_EXP_INFNAN) | EXT_MASK); 403*55113Storek } 404*55113Storek done: 405*55113Storek res[1] = fp->fp_mant[1]; 406*55113Storek res[2] = fp->fp_mant[2]; 407*55113Storek res[3] = fp->fp_mant[3]; 408*55113Storek return (sign | EXT_EXP(exp) | (fp->fp_mant[0] & EXT_MASK)); 409*55113Storek } 410*55113Storek 411*55113Storek /* 412*55113Storek * Implode an fpn, writing the result into the given space. 413*55113Storek */ 414*55113Storek void 415*55113Storek fpu_implode(fe, fp, type, space) 416*55113Storek struct fpemu *fe; 417*55113Storek register struct fpn *fp; 418*55113Storek int type; 419*55113Storek register u_int *space; 420*55113Storek { 421*55113Storek 422*55113Storek switch (type) { 423*55113Storek 424*55113Storek case FTYPE_INT: 425*55113Storek space[0] = fpu_ftoi(fe, fp); 426*55113Storek break; 427*55113Storek 428*55113Storek case FTYPE_SNG: 429*55113Storek space[0] = fpu_ftos(fe, fp); 430*55113Storek break; 431*55113Storek 432*55113Storek case FTYPE_DBL: 433*55113Storek space[0] = fpu_ftod(fe, fp, space); 434*55113Storek break; 435*55113Storek 436*55113Storek case FTYPE_EXT: 437*55113Storek /* funky rounding precision options ?? */ 438*55113Storek space[0] = fpu_ftox(fe, fp, space); 439*55113Storek break; 440*55113Storek 441*55113Storek default: 442*55113Storek panic("fpu_implode"); 443*55113Storek } 444*55113Storek } 445