1 /* 2 * Copyright (c) 1992 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This software was developed by the Computer Systems Engineering group 6 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and 7 * contributed to Berkeley. 8 * 9 * %sccs.include.redist.c% 10 * 11 * @(#)fpu_implode.c 7.1 (Berkeley) 07/13/92 12 * 13 * from: $Header: fpu_implode.c,v 1.4 92/06/17 05:41:33 torek Exp $ 14 */ 15 16 /* 17 * FPU subroutines: `implode' internal format numbers into the machine's 18 * `packed binary' format. 19 */ 20 21 #include "sys/types.h" 22 23 #include "machine/ieee.h" 24 #include "machine/instr.h" 25 #include "machine/reg.h" 26 27 #include "fpu_arith.h" 28 #include "fpu_emu.h" 29 30 /* 31 * Round a number (algorithm from Motorola MC68882 manual, modified for 32 * our internal format). Set inexact exception if rounding is required. 33 * Return true iff we rounded up. 34 * 35 * After rounding, we discard the guard and round bits by shifting right 36 * 2 bits (a la fpu_shr(), but we do not bother with fp->fp_sticky). 37 * This saves effort later. 38 * 39 * Note that we may leave the value 2.0 in fp->fp_mant; it is the caller's 40 * responsibility to fix this if necessary. 41 */ 42 static int 43 round(register struct fpemu *fe, register struct fpn *fp) 44 { 45 register u_int m0, m1, m2, m3; 46 register int gr, s, ret; 47 48 m0 = fp->fp_mant[0]; 49 m1 = fp->fp_mant[1]; 50 m2 = fp->fp_mant[2]; 51 m3 = fp->fp_mant[3]; 52 gr = m3 & 3; 53 s = fp->fp_sticky; 54 55 /* mant >>= FP_NG */ 56 m3 = (m3 >> FP_NG) | (m2 << (32 - FP_NG)); 57 m2 = (m2 >> FP_NG) | (m1 << (32 - FP_NG)); 58 m1 = (m1 >> FP_NG) | (m0 << (32 - FP_NG)); 59 m0 >>= FP_NG; 60 61 if ((gr | s) == 0) /* result is exact: no rounding needed */ 62 goto rounddown; 63 64 fe->fe_cx |= FSR_NX; /* inexact */ 65 66 /* Go to rounddown to round down; break to round up. */ 67 switch ((fe->fe_fsr >> FSR_RD_SHIFT) & FSR_RD_MASK) { 68 69 case FSR_RD_RN: 70 default: 71 /* 72 * Round only if guard is set (gr & 2). If guard is set, 73 * but round & sticky both clear, then we want to round 74 * but have a tie, so round to even, i.e., add 1 iff odd. 75 */ 76 if ((gr & 2) == 0) 77 goto rounddown; 78 if ((gr & 1) || fp->fp_sticky || (m3 & 1)) 79 break; 80 goto rounddown; 81 82 case FSR_RD_RZ: 83 /* Round towards zero, i.e., down. */ 84 goto rounddown; 85 86 case FSR_RD_RM: 87 /* Round towards -Inf: up if negative, down if positive. */ 88 if (fp->fp_sign) 89 break; 90 goto rounddown; 91 92 case FSR_RD_RP: 93 /* Round towards +Inf: up if positive, down otherwise. */ 94 if (!fp->fp_sign) 95 break; 96 goto rounddown; 97 } 98 99 /* Bump low bit of mantissa, with carry. */ 100 #ifdef sparc /* ``cheating'' (left out FPU_DECL_CARRY; know this is faster) */ 101 FPU_ADDS(m3, m3, 1); 102 FPU_ADDCS(m2, m2, 0); 103 FPU_ADDCS(m1, m1, 0); 104 FPU_ADDC(m0, m0, 0); 105 #else 106 if (++m3 == 0 && ++m2 == 0 && ++m1 == 0) 107 m0++; 108 #endif 109 fp->fp_mant[0] = m0; 110 fp->fp_mant[1] = m1; 111 fp->fp_mant[2] = m2; 112 fp->fp_mant[3] = m3; 113 return (1); 114 115 rounddown: 116 fp->fp_mant[0] = m0; 117 fp->fp_mant[1] = m1; 118 fp->fp_mant[2] = m2; 119 fp->fp_mant[3] = m3; 120 return (0); 121 } 122 123 /* 124 * For overflow: return true if overflow is to go to +/-Inf, according 125 * to the sign of the overflowing result. If false, overflow is to go 126 * to the largest magnitude value instead. 127 */ 128 static int 129 toinf(struct fpemu *fe, int sign) 130 { 131 int inf; 132 133 /* look at rounding direction */ 134 switch ((fe->fe_fsr >> FSR_RD_SHIFT) & FSR_RD_MASK) { 135 136 default: 137 case FSR_RD_RN: /* the nearest value is always Inf */ 138 inf = 1; 139 break; 140 141 case FSR_RD_RZ: /* toward 0 => never towards Inf */ 142 inf = 0; 143 break; 144 145 case FSR_RD_RP: /* toward +Inf iff positive */ 146 inf = sign == 0; 147 break; 148 149 case FSR_RD_RM: /* toward -Inf iff negative */ 150 inf = sign; 151 break; 152 } 153 return (inf); 154 } 155 156 /* 157 * fpn -> int (int value returned as return value). 158 * 159 * N.B.: this conversion always rounds towards zero (this is a peculiarity 160 * of the SPARC instruction set). 161 */ 162 u_int 163 fpu_ftoi(fe, fp) 164 struct fpemu *fe; 165 register struct fpn *fp; 166 { 167 register u_int i; 168 register int sign, exp; 169 170 sign = fp->fp_sign; 171 switch (fp->fp_class) { 172 173 case FPC_ZERO: 174 return (0); 175 176 case FPC_NUM: 177 /* 178 * If exp >= 2^32, overflow. Otherwise shift value right 179 * into last mantissa word (this will not exceed 0xffffffff), 180 * shifting any guard and round bits out into the sticky 181 * bit. Then ``round'' towards zero, i.e., just set an 182 * inexact exception if sticky is set (see round()). 183 * If the result is > 0x80000000, or is positive and equals 184 * 0x80000000, overflow; otherwise the last fraction word 185 * is the result. 186 */ 187 if ((exp = fp->fp_exp) >= 32) 188 break; 189 /* NB: the following includes exp < 0 cases */ 190 if (fpu_shr(fp, FP_NMANT - 1 - exp) != 0) 191 fe->fe_cx |= FSR_NX; 192 i = fp->fp_mant[3]; 193 if (i >= ((u_int)0x80000000 + sign)) 194 break; 195 return (sign ? -i : i); 196 197 default: /* Inf, qNaN, sNaN */ 198 break; 199 } 200 /* overflow: replace any inexact exception with invalid */ 201 fe->fe_cx = (fe->fe_cx & ~FSR_NX) | FSR_NV; 202 return (0x7fffffff + sign); 203 } 204 205 /* 206 * fpn -> single (32 bit single returned as return value). 207 * We assume <= 29 bits in a single-precision fraction (1.f part). 208 */ 209 u_int 210 fpu_ftos(fe, fp) 211 struct fpemu *fe; 212 register struct fpn *fp; 213 { 214 register u_int sign = fp->fp_sign << 31; 215 register int exp; 216 217 #define SNG_EXP(e) ((e) << SNG_FRACBITS) /* makes e an exponent */ 218 #define SNG_MASK (SNG_EXP(1) - 1) /* mask for fraction */ 219 220 /* Take care of non-numbers first. */ 221 if (ISNAN(fp)) { 222 /* 223 * Preserve upper bits of NaN, per SPARC V8 appendix N. 224 * Note that fp->fp_mant[0] has the quiet bit set, 225 * even if it is classified as a signalling NaN. 226 */ 227 (void) fpu_shr(fp, FP_NMANT - 1 - SNG_FRACBITS); 228 exp = SNG_EXP_INFNAN; 229 goto done; 230 } 231 if (ISINF(fp)) 232 return (sign | SNG_EXP(SNG_EXP_INFNAN)); 233 if (ISZERO(fp)) 234 return (sign); 235 236 /* 237 * Normals (including subnormals). Drop all the fraction bits 238 * (including the explicit ``implied'' 1 bit) down into the 239 * single-precision range. If the number is subnormal, move 240 * the ``implied'' 1 into the explicit range as well, and shift 241 * right to introduce leading zeroes. Rounding then acts 242 * differently for normals and subnormals: the largest subnormal 243 * may round to the smallest normal (1.0 x 2^minexp), or may 244 * remain subnormal. In the latter case, signal an underflow 245 * if the result was inexact or if underflow traps are enabled. 246 * 247 * Rounding a normal, on the other hand, always produces another 248 * normal (although either way the result might be too big for 249 * single precision, and cause an overflow). If rounding a 250 * normal produces 2.0 in the fraction, we need not adjust that 251 * fraction at all, since both 1.0 and 2.0 are zero under the 252 * fraction mask. 253 * 254 * Note that the guard and round bits vanish from the number after 255 * rounding. 256 */ 257 if ((exp = fp->fp_exp + SNG_EXP_BIAS) <= 0) { /* subnormal */ 258 /* -NG for g,r; -SNG_FRACBITS-exp for fraction */ 259 (void) fpu_shr(fp, FP_NMANT - FP_NG - SNG_FRACBITS - exp); 260 if (round(fe, fp) && fp->fp_mant[3] == SNG_EXP(1)) 261 return (sign | SNG_EXP(1) | 0); 262 if ((fe->fe_cx & FSR_NX) || 263 (fe->fe_fsr & (FSR_UF << FSR_TEM_SHIFT))) 264 fe->fe_cx |= FSR_UF; 265 return (sign | SNG_EXP(0) | fp->fp_mant[3]); 266 } 267 /* -FP_NG for g,r; -1 for implied 1; -SNG_FRACBITS for fraction */ 268 (void) fpu_shr(fp, FP_NMANT - FP_NG - 1 - SNG_FRACBITS); 269 #ifdef DIAGNOSTIC 270 if ((fp->fp_mant[3] & SNG_EXP(1 << FP_NG)) == 0) 271 panic("fpu_ftos"); 272 #endif 273 if (round(fe, fp) && fp->fp_mant[3] == SNG_EXP(2)) 274 exp++; 275 if (exp >= SNG_EXP_INFNAN) { 276 /* overflow to inf or to max single */ 277 fe->fe_cx |= FSR_OF | FSR_NX; 278 if (toinf(fe, sign)) 279 return (sign | SNG_EXP(SNG_EXP_INFNAN)); 280 return (sign | SNG_EXP(SNG_EXP_INFNAN - 1) | SNG_MASK); 281 } 282 done: 283 /* phew, made it */ 284 return (sign | SNG_EXP(exp) | (fp->fp_mant[3] & SNG_MASK)); 285 } 286 287 /* 288 * fpn -> double (32 bit high-order result returned; 32-bit low order result 289 * left in res[1]). Assumes <= 61 bits in double precision fraction. 290 * 291 * This code mimics fpu_ftos; see it for comments. 292 */ 293 u_int 294 fpu_ftod(fe, fp, res) 295 struct fpemu *fe; 296 register struct fpn *fp; 297 u_int *res; 298 { 299 register u_int sign = fp->fp_sign << 31; 300 register int exp; 301 302 #define DBL_EXP(e) ((e) << (DBL_FRACBITS & 31)) 303 #define DBL_MASK (DBL_EXP(1) - 1) 304 305 if (ISNAN(fp)) { 306 (void) fpu_shr(fp, FP_NMANT - 1 - DBL_FRACBITS); 307 exp = DBL_EXP_INFNAN; 308 goto done; 309 } 310 if (ISINF(fp)) { 311 sign |= DBL_EXP(DBL_EXP_INFNAN); 312 goto zero; 313 } 314 if (ISZERO(fp)) { 315 zero: res[1] = 0; 316 return (sign); 317 } 318 319 if ((exp = fp->fp_exp + DBL_EXP_BIAS) <= 0) { 320 (void) fpu_shr(fp, FP_NMANT - FP_NG - DBL_FRACBITS - exp); 321 if (round(fe, fp) && fp->fp_mant[2] == DBL_EXP(1)) { 322 res[1] = 0; 323 return (sign | DBL_EXP(1) | 0); 324 } 325 if ((fe->fe_cx & FSR_NX) || 326 (fe->fe_fsr & (FSR_UF << FSR_TEM_SHIFT))) 327 fe->fe_cx |= FSR_UF; 328 exp = 0; 329 goto done; 330 } 331 (void) fpu_shr(fp, FP_NMANT - FP_NG - 1 - DBL_FRACBITS); 332 if (round(fe, fp) && fp->fp_mant[2] == DBL_EXP(2)) 333 exp++; 334 if (exp >= DBL_EXP_INFNAN) { 335 fe->fe_cx |= FSR_OF | FSR_NX; 336 if (toinf(fe, sign)) { 337 res[1] = 0; 338 return (sign | DBL_EXP(DBL_EXP_INFNAN) | 0); 339 } 340 res[1] = ~0; 341 return (sign | DBL_EXP(DBL_EXP_INFNAN) | DBL_MASK); 342 } 343 done: 344 res[1] = fp->fp_mant[3]; 345 return (sign | DBL_EXP(exp) | (fp->fp_mant[2] & DBL_MASK)); 346 } 347 348 /* 349 * fpn -> extended (32 bit high-order result returned; low-order fraction 350 * words left in res[1]..res[3]). Like ftod, which is like ftos ... but 351 * our internal format *is* extended precision, plus 2 bits for guard/round, 352 * so we can avoid a small bit of work. 353 */ 354 u_int 355 fpu_ftox(fe, fp, res) 356 struct fpemu *fe; 357 register struct fpn *fp; 358 u_int *res; 359 { 360 register u_int sign = fp->fp_sign << 31; 361 register int exp; 362 363 #define EXT_EXP(e) ((e) << (EXT_FRACBITS & 31)) 364 #define EXT_MASK (EXT_EXP(1) - 1) 365 366 if (ISNAN(fp)) { 367 (void) fpu_shr(fp, 2); /* since we are not rounding */ 368 exp = EXT_EXP_INFNAN; 369 goto done; 370 } 371 if (ISINF(fp)) { 372 sign |= EXT_EXP(EXT_EXP_INFNAN); 373 goto zero; 374 } 375 if (ISZERO(fp)) { 376 zero: res[1] = res[2] = res[3] = 0; 377 return (sign); 378 } 379 380 if ((exp = fp->fp_exp + EXT_EXP_BIAS) <= 0) { 381 (void) fpu_shr(fp, FP_NMANT - FP_NG - EXT_FRACBITS - exp); 382 if (round(fe, fp) && fp->fp_mant[0] == EXT_EXP(1)) { 383 res[1] = res[2] = res[3] = 0; 384 return (sign | EXT_EXP(1) | 0); 385 } 386 if ((fe->fe_cx & FSR_NX) || 387 (fe->fe_fsr & (FSR_UF << FSR_TEM_SHIFT))) 388 fe->fe_cx |= FSR_UF; 389 exp = 0; 390 goto done; 391 } 392 /* Since internal == extended, no need to shift here. */ 393 if (round(fe, fp) && fp->fp_mant[0] == EXT_EXP(2)) 394 exp++; 395 if (exp >= EXT_EXP_INFNAN) { 396 fe->fe_cx |= FSR_OF | FSR_NX; 397 if (toinf(fe, sign)) { 398 res[1] = res[2] = res[3] = 0; 399 return (sign | EXT_EXP(EXT_EXP_INFNAN) | 0); 400 } 401 res[1] = res[2] = res[3] = ~0; 402 return (sign | EXT_EXP(EXT_EXP_INFNAN) | EXT_MASK); 403 } 404 done: 405 res[1] = fp->fp_mant[1]; 406 res[2] = fp->fp_mant[2]; 407 res[3] = fp->fp_mant[3]; 408 return (sign | EXT_EXP(exp) | (fp->fp_mant[0] & EXT_MASK)); 409 } 410 411 /* 412 * Implode an fpn, writing the result into the given space. 413 */ 414 void 415 fpu_implode(fe, fp, type, space) 416 struct fpemu *fe; 417 register struct fpn *fp; 418 int type; 419 register u_int *space; 420 { 421 422 switch (type) { 423 424 case FTYPE_INT: 425 space[0] = fpu_ftoi(fe, fp); 426 break; 427 428 case FTYPE_SNG: 429 space[0] = fpu_ftos(fe, fp); 430 break; 431 432 case FTYPE_DBL: 433 space[0] = fpu_ftod(fe, fp, space); 434 break; 435 436 case FTYPE_EXT: 437 /* funky rounding precision options ?? */ 438 space[0] = fpu_ftox(fe, fp, space); 439 break; 440 441 default: 442 panic("fpu_implode"); 443 } 444 } 445