1*84d9c625SLionel Sambuc /* $NetBSD: softfloat.c,v 1.13 2013/11/22 17:04:24 martin Exp $ */
22fe8fb19SBen Gras
32fe8fb19SBen Gras /*
42fe8fb19SBen Gras * This version hacked for use with gcc -msoft-float by bjh21.
52fe8fb19SBen Gras * (Mostly a case of #ifdefing out things GCC doesn't need or provides
62fe8fb19SBen Gras * itself).
72fe8fb19SBen Gras */
82fe8fb19SBen Gras
92fe8fb19SBen Gras /*
102fe8fb19SBen Gras * Things you may want to define:
112fe8fb19SBen Gras *
122fe8fb19SBen Gras * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
132fe8fb19SBen Gras * -msoft-float) to work. Include "softfloat-for-gcc.h" to get them
142fe8fb19SBen Gras * properly renamed.
152fe8fb19SBen Gras */
162fe8fb19SBen Gras
172fe8fb19SBen Gras /*
182fe8fb19SBen Gras ===============================================================================
192fe8fb19SBen Gras
202fe8fb19SBen Gras This C source file is part of the SoftFloat IEC/IEEE Floating-point
212fe8fb19SBen Gras Arithmetic Package, Release 2a.
222fe8fb19SBen Gras
232fe8fb19SBen Gras Written by John R. Hauser. This work was made possible in part by the
242fe8fb19SBen Gras International Computer Science Institute, located at Suite 600, 1947 Center
252fe8fb19SBen Gras Street, Berkeley, California 94704. Funding was partially provided by the
262fe8fb19SBen Gras National Science Foundation under grant MIP-9311980. The original version
272fe8fb19SBen Gras of this code was written as part of a project to build a fixed-point vector
282fe8fb19SBen Gras processor in collaboration with the University of California at Berkeley,
292fe8fb19SBen Gras overseen by Profs. Nelson Morgan and John Wawrzynek. More information
302fe8fb19SBen Gras is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
312fe8fb19SBen Gras arithmetic/SoftFloat.html'.
322fe8fb19SBen Gras
332fe8fb19SBen Gras THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
342fe8fb19SBen Gras has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
352fe8fb19SBen Gras TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
362fe8fb19SBen Gras PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
372fe8fb19SBen Gras AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
382fe8fb19SBen Gras
392fe8fb19SBen Gras Derivative works are acceptable, even for commercial purposes, so long as
402fe8fb19SBen Gras (1) they include prominent notice that the work is derivative, and (2) they
412fe8fb19SBen Gras include prominent notice akin to these four paragraphs for those parts of
422fe8fb19SBen Gras this code that are retained.
432fe8fb19SBen Gras
442fe8fb19SBen Gras ===============================================================================
452fe8fb19SBen Gras */
462fe8fb19SBen Gras
472fe8fb19SBen Gras #include <sys/cdefs.h>
482fe8fb19SBen Gras #if defined(LIBC_SCCS) && !defined(lint)
49*84d9c625SLionel Sambuc __RCSID("$NetBSD: softfloat.c,v 1.13 2013/11/22 17:04:24 martin Exp $");
502fe8fb19SBen Gras #endif /* LIBC_SCCS and not lint */
512fe8fb19SBen Gras
522fe8fb19SBen Gras #ifdef SOFTFLOAT_FOR_GCC
532fe8fb19SBen Gras #include "softfloat-for-gcc.h"
542fe8fb19SBen Gras #endif
552fe8fb19SBen Gras
562fe8fb19SBen Gras #include "milieu.h"
572fe8fb19SBen Gras #include "softfloat.h"
582fe8fb19SBen Gras
592fe8fb19SBen Gras /*
602fe8fb19SBen Gras * Conversions between floats as stored in memory and floats as
612fe8fb19SBen Gras * SoftFloat uses them
622fe8fb19SBen Gras */
632fe8fb19SBen Gras #ifndef FLOAT64_DEMANGLE
642fe8fb19SBen Gras #define FLOAT64_DEMANGLE(a) (a)
652fe8fb19SBen Gras #endif
662fe8fb19SBen Gras #ifndef FLOAT64_MANGLE
672fe8fb19SBen Gras #define FLOAT64_MANGLE(a) (a)
682fe8fb19SBen Gras #endif
692fe8fb19SBen Gras
702fe8fb19SBen Gras /*
712fe8fb19SBen Gras -------------------------------------------------------------------------------
722fe8fb19SBen Gras Floating-point rounding mode, extended double-precision rounding precision,
732fe8fb19SBen Gras and exception flags.
742fe8fb19SBen Gras -------------------------------------------------------------------------------
752fe8fb19SBen Gras */
76*84d9c625SLionel Sambuc #ifndef set_float_rounding_mode
772fe8fb19SBen Gras fp_rnd float_rounding_mode = float_round_nearest_even;
782fe8fb19SBen Gras fp_except float_exception_flags = 0;
79*84d9c625SLionel Sambuc #endif
80*84d9c625SLionel Sambuc #ifndef set_float_exception_inexact_flag
81*84d9c625SLionel Sambuc #define set_float_exception_inexact_flag() \
82*84d9c625SLionel Sambuc ((void)(float_exception_flags |= float_flag_inexact))
83*84d9c625SLionel Sambuc #endif
842fe8fb19SBen Gras #ifdef FLOATX80
852fe8fb19SBen Gras int8 floatx80_rounding_precision = 80;
862fe8fb19SBen Gras #endif
872fe8fb19SBen Gras
882fe8fb19SBen Gras /*
892fe8fb19SBen Gras -------------------------------------------------------------------------------
902fe8fb19SBen Gras Primitive arithmetic functions, including multi-word arithmetic, and
912fe8fb19SBen Gras division and square root approximations. (Can be specialized to target if
922fe8fb19SBen Gras desired.)
932fe8fb19SBen Gras -------------------------------------------------------------------------------
942fe8fb19SBen Gras */
952fe8fb19SBen Gras #include "softfloat-macros"
962fe8fb19SBen Gras
972fe8fb19SBen Gras /*
982fe8fb19SBen Gras -------------------------------------------------------------------------------
992fe8fb19SBen Gras Functions and definitions to determine: (1) whether tininess for underflow
1002fe8fb19SBen Gras is detected before or after rounding by default, (2) what (if anything)
1012fe8fb19SBen Gras happens when exceptions are raised, (3) how signaling NaNs are distinguished
1022fe8fb19SBen Gras from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
1032fe8fb19SBen Gras are propagated from function inputs to output. These details are target-
1042fe8fb19SBen Gras specific.
1052fe8fb19SBen Gras -------------------------------------------------------------------------------
1062fe8fb19SBen Gras */
1072fe8fb19SBen Gras #include "softfloat-specialize"
1082fe8fb19SBen Gras
1092fe8fb19SBen Gras #if !defined(SOFTFLOAT_FOR_GCC) || defined(FLOATX80) || defined(FLOAT128)
1102fe8fb19SBen Gras /*
1112fe8fb19SBen Gras -------------------------------------------------------------------------------
1122fe8fb19SBen Gras Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
1132fe8fb19SBen Gras and 7, and returns the properly rounded 32-bit integer corresponding to the
1142fe8fb19SBen Gras input. If `zSign' is 1, the input is negated before being converted to an
1152fe8fb19SBen Gras integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
1162fe8fb19SBen Gras is simply rounded to an integer, with the inexact exception raised if the
1172fe8fb19SBen Gras input cannot be represented exactly as an integer. However, if the fixed-
1182fe8fb19SBen Gras point input is too large, the invalid exception is raised and the largest
1192fe8fb19SBen Gras positive or negative integer is returned.
1202fe8fb19SBen Gras -------------------------------------------------------------------------------
1212fe8fb19SBen Gras */
roundAndPackInt32(flag zSign,bits64 absZ)1222fe8fb19SBen Gras static int32 roundAndPackInt32( flag zSign, bits64 absZ )
1232fe8fb19SBen Gras {
1242fe8fb19SBen Gras int8 roundingMode;
1252fe8fb19SBen Gras flag roundNearestEven;
1262fe8fb19SBen Gras int8 roundIncrement, roundBits;
1272fe8fb19SBen Gras int32 z;
1282fe8fb19SBen Gras
1292fe8fb19SBen Gras roundingMode = float_rounding_mode;
1302fe8fb19SBen Gras roundNearestEven = ( roundingMode == float_round_nearest_even );
1312fe8fb19SBen Gras roundIncrement = 0x40;
1322fe8fb19SBen Gras if ( ! roundNearestEven ) {
1332fe8fb19SBen Gras if ( roundingMode == float_round_to_zero ) {
1342fe8fb19SBen Gras roundIncrement = 0;
1352fe8fb19SBen Gras }
1362fe8fb19SBen Gras else {
1372fe8fb19SBen Gras roundIncrement = 0x7F;
1382fe8fb19SBen Gras if ( zSign ) {
1392fe8fb19SBen Gras if ( roundingMode == float_round_up ) roundIncrement = 0;
1402fe8fb19SBen Gras }
1412fe8fb19SBen Gras else {
1422fe8fb19SBen Gras if ( roundingMode == float_round_down ) roundIncrement = 0;
1432fe8fb19SBen Gras }
1442fe8fb19SBen Gras }
1452fe8fb19SBen Gras }
146f14fb602SLionel Sambuc roundBits = (int8)(absZ & 0x7F);
1472fe8fb19SBen Gras absZ = ( absZ + roundIncrement )>>7;
1482fe8fb19SBen Gras absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
149f14fb602SLionel Sambuc z = (int32)absZ;
1502fe8fb19SBen Gras if ( zSign ) z = - z;
1512fe8fb19SBen Gras if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
1522fe8fb19SBen Gras float_raise( float_flag_invalid );
1532fe8fb19SBen Gras return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
1542fe8fb19SBen Gras }
155*84d9c625SLionel Sambuc if ( roundBits ) set_float_exception_inexact_flag();
1562fe8fb19SBen Gras return z;
1572fe8fb19SBen Gras
1582fe8fb19SBen Gras }
1592fe8fb19SBen Gras
1602fe8fb19SBen Gras /*
1612fe8fb19SBen Gras -------------------------------------------------------------------------------
1622fe8fb19SBen Gras Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
1632fe8fb19SBen Gras `absZ1', with binary point between bits 63 and 64 (between the input words),
1642fe8fb19SBen Gras and returns the properly rounded 64-bit integer corresponding to the input.
1652fe8fb19SBen Gras If `zSign' is 1, the input is negated before being converted to an integer.
1662fe8fb19SBen Gras Ordinarily, the fixed-point input is simply rounded to an integer, with
1672fe8fb19SBen Gras the inexact exception raised if the input cannot be represented exactly as
1682fe8fb19SBen Gras an integer. However, if the fixed-point input is too large, the invalid
1692fe8fb19SBen Gras exception is raised and the largest positive or negative integer is
1702fe8fb19SBen Gras returned.
1712fe8fb19SBen Gras -------------------------------------------------------------------------------
1722fe8fb19SBen Gras */
roundAndPackInt64(flag zSign,bits64 absZ0,bits64 absZ1)1732fe8fb19SBen Gras static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 )
1742fe8fb19SBen Gras {
1752fe8fb19SBen Gras int8 roundingMode;
1762fe8fb19SBen Gras flag roundNearestEven, increment;
1772fe8fb19SBen Gras int64 z;
1782fe8fb19SBen Gras
1792fe8fb19SBen Gras roundingMode = float_rounding_mode;
1802fe8fb19SBen Gras roundNearestEven = ( roundingMode == float_round_nearest_even );
1812fe8fb19SBen Gras increment = ( (sbits64) absZ1 < 0 );
1822fe8fb19SBen Gras if ( ! roundNearestEven ) {
1832fe8fb19SBen Gras if ( roundingMode == float_round_to_zero ) {
1842fe8fb19SBen Gras increment = 0;
1852fe8fb19SBen Gras }
1862fe8fb19SBen Gras else {
1872fe8fb19SBen Gras if ( zSign ) {
1882fe8fb19SBen Gras increment = ( roundingMode == float_round_down ) && absZ1;
1892fe8fb19SBen Gras }
1902fe8fb19SBen Gras else {
1912fe8fb19SBen Gras increment = ( roundingMode == float_round_up ) && absZ1;
1922fe8fb19SBen Gras }
1932fe8fb19SBen Gras }
1942fe8fb19SBen Gras }
1952fe8fb19SBen Gras if ( increment ) {
1962fe8fb19SBen Gras ++absZ0;
1972fe8fb19SBen Gras if ( absZ0 == 0 ) goto overflow;
1982fe8fb19SBen Gras absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
1992fe8fb19SBen Gras }
2002fe8fb19SBen Gras z = absZ0;
2012fe8fb19SBen Gras if ( zSign ) z = - z;
2022fe8fb19SBen Gras if ( z && ( ( z < 0 ) ^ zSign ) ) {
2032fe8fb19SBen Gras overflow:
2042fe8fb19SBen Gras float_raise( float_flag_invalid );
2052fe8fb19SBen Gras return
2062fe8fb19SBen Gras zSign ? (sbits64) LIT64( 0x8000000000000000 )
2072fe8fb19SBen Gras : LIT64( 0x7FFFFFFFFFFFFFFF );
2082fe8fb19SBen Gras }
209*84d9c625SLionel Sambuc if ( absZ1 ) set_float_exception_inexact_flag();
2102fe8fb19SBen Gras return z;
2112fe8fb19SBen Gras
2122fe8fb19SBen Gras }
2132fe8fb19SBen Gras #endif
2142fe8fb19SBen Gras
2152fe8fb19SBen Gras /*
2162fe8fb19SBen Gras -------------------------------------------------------------------------------
2172fe8fb19SBen Gras Returns the fraction bits of the single-precision floating-point value `a'.
2182fe8fb19SBen Gras -------------------------------------------------------------------------------
2192fe8fb19SBen Gras */
extractFloat32Frac(float32 a)2202fe8fb19SBen Gras INLINE bits32 extractFloat32Frac( float32 a )
2212fe8fb19SBen Gras {
2222fe8fb19SBen Gras
2232fe8fb19SBen Gras return a & 0x007FFFFF;
2242fe8fb19SBen Gras
2252fe8fb19SBen Gras }
2262fe8fb19SBen Gras
2272fe8fb19SBen Gras /*
2282fe8fb19SBen Gras -------------------------------------------------------------------------------
2292fe8fb19SBen Gras Returns the exponent bits of the single-precision floating-point value `a'.
2302fe8fb19SBen Gras -------------------------------------------------------------------------------
2312fe8fb19SBen Gras */
extractFloat32Exp(float32 a)2322fe8fb19SBen Gras INLINE int16 extractFloat32Exp( float32 a )
2332fe8fb19SBen Gras {
2342fe8fb19SBen Gras
2352fe8fb19SBen Gras return ( a>>23 ) & 0xFF;
2362fe8fb19SBen Gras
2372fe8fb19SBen Gras }
2382fe8fb19SBen Gras
2392fe8fb19SBen Gras /*
2402fe8fb19SBen Gras -------------------------------------------------------------------------------
2412fe8fb19SBen Gras Returns the sign bit of the single-precision floating-point value `a'.
2422fe8fb19SBen Gras -------------------------------------------------------------------------------
2432fe8fb19SBen Gras */
extractFloat32Sign(float32 a)2442fe8fb19SBen Gras INLINE flag extractFloat32Sign( float32 a )
2452fe8fb19SBen Gras {
2462fe8fb19SBen Gras
2472fe8fb19SBen Gras return a>>31;
2482fe8fb19SBen Gras
2492fe8fb19SBen Gras }
2502fe8fb19SBen Gras
2512fe8fb19SBen Gras /*
2522fe8fb19SBen Gras -------------------------------------------------------------------------------
2532fe8fb19SBen Gras Normalizes the subnormal single-precision floating-point value represented
2542fe8fb19SBen Gras by the denormalized significand `aSig'. The normalized exponent and
2552fe8fb19SBen Gras significand are stored at the locations pointed to by `zExpPtr' and
2562fe8fb19SBen Gras `zSigPtr', respectively.
2572fe8fb19SBen Gras -------------------------------------------------------------------------------
2582fe8fb19SBen Gras */
2592fe8fb19SBen Gras static void
normalizeFloat32Subnormal(bits32 aSig,int16 * zExpPtr,bits32 * zSigPtr)2602fe8fb19SBen Gras normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
2612fe8fb19SBen Gras {
2622fe8fb19SBen Gras int8 shiftCount;
2632fe8fb19SBen Gras
2642fe8fb19SBen Gras shiftCount = countLeadingZeros32( aSig ) - 8;
2652fe8fb19SBen Gras *zSigPtr = aSig<<shiftCount;
2662fe8fb19SBen Gras *zExpPtr = 1 - shiftCount;
2672fe8fb19SBen Gras
2682fe8fb19SBen Gras }
2692fe8fb19SBen Gras
2702fe8fb19SBen Gras /*
2712fe8fb19SBen Gras -------------------------------------------------------------------------------
2722fe8fb19SBen Gras Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
2732fe8fb19SBen Gras single-precision floating-point value, returning the result. After being
2742fe8fb19SBen Gras shifted into the proper positions, the three fields are simply added
2752fe8fb19SBen Gras together to form the result. This means that any integer portion of `zSig'
2762fe8fb19SBen Gras will be added into the exponent. Since a properly normalized significand
2772fe8fb19SBen Gras will have an integer portion equal to 1, the `zExp' input should be 1 less
2782fe8fb19SBen Gras than the desired result exponent whenever `zSig' is a complete, normalized
2792fe8fb19SBen Gras significand.
2802fe8fb19SBen Gras -------------------------------------------------------------------------------
2812fe8fb19SBen Gras */
packFloat32(flag zSign,int16 zExp,bits32 zSig)2822fe8fb19SBen Gras INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
2832fe8fb19SBen Gras {
2842fe8fb19SBen Gras
2852fe8fb19SBen Gras return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
2862fe8fb19SBen Gras
2872fe8fb19SBen Gras }
2882fe8fb19SBen Gras
2892fe8fb19SBen Gras /*
2902fe8fb19SBen Gras -------------------------------------------------------------------------------
2912fe8fb19SBen Gras Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2922fe8fb19SBen Gras and significand `zSig', and returns the proper single-precision floating-
2932fe8fb19SBen Gras point value corresponding to the abstract input. Ordinarily, the abstract
2942fe8fb19SBen Gras value is simply rounded and packed into the single-precision format, with
2952fe8fb19SBen Gras the inexact exception raised if the abstract input cannot be represented
2962fe8fb19SBen Gras exactly. However, if the abstract value is too large, the overflow and
2972fe8fb19SBen Gras inexact exceptions are raised and an infinity or maximal finite value is
2982fe8fb19SBen Gras returned. If the abstract value is too small, the input value is rounded to
2992fe8fb19SBen Gras a subnormal number, and the underflow and inexact exceptions are raised if
3002fe8fb19SBen Gras the abstract input cannot be represented exactly as a subnormal single-
3012fe8fb19SBen Gras precision floating-point number.
3022fe8fb19SBen Gras The input significand `zSig' has its binary point between bits 30
3032fe8fb19SBen Gras and 29, which is 7 bits to the left of the usual location. This shifted
3042fe8fb19SBen Gras significand must be normalized or smaller. If `zSig' is not normalized,
3052fe8fb19SBen Gras `zExp' must be 0; in that case, the result returned is a subnormal number,
3062fe8fb19SBen Gras and it must not require rounding. In the usual case that `zSig' is
3072fe8fb19SBen Gras normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
3082fe8fb19SBen Gras The handling of underflow and overflow follows the IEC/IEEE Standard for
3092fe8fb19SBen Gras Binary Floating-Point Arithmetic.
3102fe8fb19SBen Gras -------------------------------------------------------------------------------
3112fe8fb19SBen Gras */
roundAndPackFloat32(flag zSign,int16 zExp,bits32 zSig)3122fe8fb19SBen Gras static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
3132fe8fb19SBen Gras {
3142fe8fb19SBen Gras int8 roundingMode;
3152fe8fb19SBen Gras flag roundNearestEven;
3162fe8fb19SBen Gras int8 roundIncrement, roundBits;
3172fe8fb19SBen Gras flag isTiny;
3182fe8fb19SBen Gras
3192fe8fb19SBen Gras roundingMode = float_rounding_mode;
3202fe8fb19SBen Gras roundNearestEven = ( roundingMode == float_round_nearest_even );
3212fe8fb19SBen Gras roundIncrement = 0x40;
3222fe8fb19SBen Gras if ( ! roundNearestEven ) {
3232fe8fb19SBen Gras if ( roundingMode == float_round_to_zero ) {
3242fe8fb19SBen Gras roundIncrement = 0;
3252fe8fb19SBen Gras }
3262fe8fb19SBen Gras else {
3272fe8fb19SBen Gras roundIncrement = 0x7F;
3282fe8fb19SBen Gras if ( zSign ) {
3292fe8fb19SBen Gras if ( roundingMode == float_round_up ) roundIncrement = 0;
3302fe8fb19SBen Gras }
3312fe8fb19SBen Gras else {
3322fe8fb19SBen Gras if ( roundingMode == float_round_down ) roundIncrement = 0;
3332fe8fb19SBen Gras }
3342fe8fb19SBen Gras }
3352fe8fb19SBen Gras }
3362fe8fb19SBen Gras roundBits = zSig & 0x7F;
3372fe8fb19SBen Gras if ( 0xFD <= (bits16) zExp ) {
3382fe8fb19SBen Gras if ( ( 0xFD < zExp )
3392fe8fb19SBen Gras || ( ( zExp == 0xFD )
3402fe8fb19SBen Gras && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
3412fe8fb19SBen Gras ) {
3422fe8fb19SBen Gras float_raise( float_flag_overflow | float_flag_inexact );
3432fe8fb19SBen Gras return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
3442fe8fb19SBen Gras }
3452fe8fb19SBen Gras if ( zExp < 0 ) {
3462fe8fb19SBen Gras isTiny =
3472fe8fb19SBen Gras ( float_detect_tininess == float_tininess_before_rounding )
3482fe8fb19SBen Gras || ( zExp < -1 )
349f14fb602SLionel Sambuc || ( zSig + roundIncrement < 0x80000000U );
3502fe8fb19SBen Gras shift32RightJamming( zSig, - zExp, &zSig );
3512fe8fb19SBen Gras zExp = 0;
3522fe8fb19SBen Gras roundBits = zSig & 0x7F;
3532fe8fb19SBen Gras if ( isTiny && roundBits ) float_raise( float_flag_underflow );
3542fe8fb19SBen Gras }
3552fe8fb19SBen Gras }
356*84d9c625SLionel Sambuc if ( roundBits ) set_float_exception_inexact_flag();
3572fe8fb19SBen Gras zSig = ( zSig + roundIncrement )>>7;
3582fe8fb19SBen Gras zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
3592fe8fb19SBen Gras if ( zSig == 0 ) zExp = 0;
3602fe8fb19SBen Gras return packFloat32( zSign, zExp, zSig );
3612fe8fb19SBen Gras
3622fe8fb19SBen Gras }
3632fe8fb19SBen Gras
3642fe8fb19SBen Gras /*
3652fe8fb19SBen Gras -------------------------------------------------------------------------------
3662fe8fb19SBen Gras Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3672fe8fb19SBen Gras and significand `zSig', and returns the proper single-precision floating-
3682fe8fb19SBen Gras point value corresponding to the abstract input. This routine is just like
3692fe8fb19SBen Gras `roundAndPackFloat32' except that `zSig' does not have to be normalized.
3702fe8fb19SBen Gras Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
3712fe8fb19SBen Gras floating-point exponent.
3722fe8fb19SBen Gras -------------------------------------------------------------------------------
3732fe8fb19SBen Gras */
3742fe8fb19SBen Gras static float32
normalizeRoundAndPackFloat32(flag zSign,int16 zExp,bits32 zSig)3752fe8fb19SBen Gras normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
3762fe8fb19SBen Gras {
3772fe8fb19SBen Gras int8 shiftCount;
3782fe8fb19SBen Gras
3792fe8fb19SBen Gras shiftCount = countLeadingZeros32( zSig ) - 1;
3802fe8fb19SBen Gras return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
3812fe8fb19SBen Gras
3822fe8fb19SBen Gras }
3832fe8fb19SBen Gras
3842fe8fb19SBen Gras /*
3852fe8fb19SBen Gras -------------------------------------------------------------------------------
3862fe8fb19SBen Gras Returns the fraction bits of the double-precision floating-point value `a'.
3872fe8fb19SBen Gras -------------------------------------------------------------------------------
3882fe8fb19SBen Gras */
extractFloat64Frac(float64 a)3892fe8fb19SBen Gras INLINE bits64 extractFloat64Frac( float64 a )
3902fe8fb19SBen Gras {
3912fe8fb19SBen Gras
3922fe8fb19SBen Gras return FLOAT64_DEMANGLE(a) & LIT64( 0x000FFFFFFFFFFFFF );
3932fe8fb19SBen Gras
3942fe8fb19SBen Gras }
3952fe8fb19SBen Gras
3962fe8fb19SBen Gras /*
3972fe8fb19SBen Gras -------------------------------------------------------------------------------
3982fe8fb19SBen Gras Returns the exponent bits of the double-precision floating-point value `a'.
3992fe8fb19SBen Gras -------------------------------------------------------------------------------
4002fe8fb19SBen Gras */
extractFloat64Exp(float64 a)4012fe8fb19SBen Gras INLINE int16 extractFloat64Exp( float64 a )
4022fe8fb19SBen Gras {
4032fe8fb19SBen Gras
404f14fb602SLionel Sambuc return (int16)((FLOAT64_DEMANGLE(a) >> 52) & 0x7FF);
4052fe8fb19SBen Gras
4062fe8fb19SBen Gras }
4072fe8fb19SBen Gras
4082fe8fb19SBen Gras /*
4092fe8fb19SBen Gras -------------------------------------------------------------------------------
4102fe8fb19SBen Gras Returns the sign bit of the double-precision floating-point value `a'.
4112fe8fb19SBen Gras -------------------------------------------------------------------------------
4122fe8fb19SBen Gras */
extractFloat64Sign(float64 a)4132fe8fb19SBen Gras INLINE flag extractFloat64Sign( float64 a )
4142fe8fb19SBen Gras {
4152fe8fb19SBen Gras
416f14fb602SLionel Sambuc return (flag)(FLOAT64_DEMANGLE(a) >> 63);
4172fe8fb19SBen Gras
4182fe8fb19SBen Gras }
4192fe8fb19SBen Gras
4202fe8fb19SBen Gras /*
4212fe8fb19SBen Gras -------------------------------------------------------------------------------
4222fe8fb19SBen Gras Normalizes the subnormal double-precision floating-point value represented
4232fe8fb19SBen Gras by the denormalized significand `aSig'. The normalized exponent and
4242fe8fb19SBen Gras significand are stored at the locations pointed to by `zExpPtr' and
4252fe8fb19SBen Gras `zSigPtr', respectively.
4262fe8fb19SBen Gras -------------------------------------------------------------------------------
4272fe8fb19SBen Gras */
4282fe8fb19SBen Gras static void
normalizeFloat64Subnormal(bits64 aSig,int16 * zExpPtr,bits64 * zSigPtr)4292fe8fb19SBen Gras normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
4302fe8fb19SBen Gras {
4312fe8fb19SBen Gras int8 shiftCount;
4322fe8fb19SBen Gras
4332fe8fb19SBen Gras shiftCount = countLeadingZeros64( aSig ) - 11;
4342fe8fb19SBen Gras *zSigPtr = aSig<<shiftCount;
4352fe8fb19SBen Gras *zExpPtr = 1 - shiftCount;
4362fe8fb19SBen Gras
4372fe8fb19SBen Gras }
4382fe8fb19SBen Gras
4392fe8fb19SBen Gras /*
4402fe8fb19SBen Gras -------------------------------------------------------------------------------
4412fe8fb19SBen Gras Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4422fe8fb19SBen Gras double-precision floating-point value, returning the result. After being
4432fe8fb19SBen Gras shifted into the proper positions, the three fields are simply added
4442fe8fb19SBen Gras together to form the result. This means that any integer portion of `zSig'
4452fe8fb19SBen Gras will be added into the exponent. Since a properly normalized significand
4462fe8fb19SBen Gras will have an integer portion equal to 1, the `zExp' input should be 1 less
4472fe8fb19SBen Gras than the desired result exponent whenever `zSig' is a complete, normalized
4482fe8fb19SBen Gras significand.
4492fe8fb19SBen Gras -------------------------------------------------------------------------------
4502fe8fb19SBen Gras */
packFloat64(flag zSign,int16 zExp,bits64 zSig)4512fe8fb19SBen Gras INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
4522fe8fb19SBen Gras {
4532fe8fb19SBen Gras
4542fe8fb19SBen Gras return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
4552fe8fb19SBen Gras ( ( (bits64) zExp )<<52 ) + zSig );
4562fe8fb19SBen Gras
4572fe8fb19SBen Gras }
4582fe8fb19SBen Gras
4592fe8fb19SBen Gras /*
4602fe8fb19SBen Gras -------------------------------------------------------------------------------
4612fe8fb19SBen Gras Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4622fe8fb19SBen Gras and significand `zSig', and returns the proper double-precision floating-
4632fe8fb19SBen Gras point value corresponding to the abstract input. Ordinarily, the abstract
4642fe8fb19SBen Gras value is simply rounded and packed into the double-precision format, with
4652fe8fb19SBen Gras the inexact exception raised if the abstract input cannot be represented
4662fe8fb19SBen Gras exactly. However, if the abstract value is too large, the overflow and
4672fe8fb19SBen Gras inexact exceptions are raised and an infinity or maximal finite value is
4682fe8fb19SBen Gras returned. If the abstract value is too small, the input value is rounded to
4692fe8fb19SBen Gras a subnormal number, and the underflow and inexact exceptions are raised if
4702fe8fb19SBen Gras the abstract input cannot be represented exactly as a subnormal double-
4712fe8fb19SBen Gras precision floating-point number.
4722fe8fb19SBen Gras The input significand `zSig' has its binary point between bits 62
4732fe8fb19SBen Gras and 61, which is 10 bits to the left of the usual location. This shifted
4742fe8fb19SBen Gras significand must be normalized or smaller. If `zSig' is not normalized,
4752fe8fb19SBen Gras `zExp' must be 0; in that case, the result returned is a subnormal number,
4762fe8fb19SBen Gras and it must not require rounding. In the usual case that `zSig' is
4772fe8fb19SBen Gras normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4782fe8fb19SBen Gras The handling of underflow and overflow follows the IEC/IEEE Standard for
4792fe8fb19SBen Gras Binary Floating-Point Arithmetic.
4802fe8fb19SBen Gras -------------------------------------------------------------------------------
4812fe8fb19SBen Gras */
roundAndPackFloat64(flag zSign,int16 zExp,bits64 zSig)4822fe8fb19SBen Gras static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
4832fe8fb19SBen Gras {
4842fe8fb19SBen Gras int8 roundingMode;
4852fe8fb19SBen Gras flag roundNearestEven;
4862fe8fb19SBen Gras int16 roundIncrement, roundBits;
4872fe8fb19SBen Gras flag isTiny;
4882fe8fb19SBen Gras
4892fe8fb19SBen Gras roundingMode = float_rounding_mode;
4902fe8fb19SBen Gras roundNearestEven = ( roundingMode == float_round_nearest_even );
4912fe8fb19SBen Gras roundIncrement = 0x200;
4922fe8fb19SBen Gras if ( ! roundNearestEven ) {
4932fe8fb19SBen Gras if ( roundingMode == float_round_to_zero ) {
4942fe8fb19SBen Gras roundIncrement = 0;
4952fe8fb19SBen Gras }
4962fe8fb19SBen Gras else {
4972fe8fb19SBen Gras roundIncrement = 0x3FF;
4982fe8fb19SBen Gras if ( zSign ) {
4992fe8fb19SBen Gras if ( roundingMode == float_round_up ) roundIncrement = 0;
5002fe8fb19SBen Gras }
5012fe8fb19SBen Gras else {
5022fe8fb19SBen Gras if ( roundingMode == float_round_down ) roundIncrement = 0;
5032fe8fb19SBen Gras }
5042fe8fb19SBen Gras }
5052fe8fb19SBen Gras }
506f14fb602SLionel Sambuc roundBits = (int16)(zSig & 0x3FF);
5072fe8fb19SBen Gras if ( 0x7FD <= (bits16) zExp ) {
5082fe8fb19SBen Gras if ( ( 0x7FD < zExp )
5092fe8fb19SBen Gras || ( ( zExp == 0x7FD )
5102fe8fb19SBen Gras && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
5112fe8fb19SBen Gras ) {
5122fe8fb19SBen Gras float_raise( float_flag_overflow | float_flag_inexact );
5132fe8fb19SBen Gras return FLOAT64_MANGLE(
5142fe8fb19SBen Gras FLOAT64_DEMANGLE(packFloat64( zSign, 0x7FF, 0 )) -
5152fe8fb19SBen Gras ( roundIncrement == 0 ));
5162fe8fb19SBen Gras }
5172fe8fb19SBen Gras if ( zExp < 0 ) {
5182fe8fb19SBen Gras isTiny =
5192fe8fb19SBen Gras ( float_detect_tininess == float_tininess_before_rounding )
5202fe8fb19SBen Gras || ( zExp < -1 )
521f14fb602SLionel Sambuc || ( zSig + roundIncrement < (bits64)LIT64( 0x8000000000000000 ) );
5222fe8fb19SBen Gras shift64RightJamming( zSig, - zExp, &zSig );
5232fe8fb19SBen Gras zExp = 0;
524f14fb602SLionel Sambuc roundBits = (int16)(zSig & 0x3FF);
5252fe8fb19SBen Gras if ( isTiny && roundBits ) float_raise( float_flag_underflow );
5262fe8fb19SBen Gras }
5272fe8fb19SBen Gras }
528*84d9c625SLionel Sambuc if ( roundBits ) set_float_exception_inexact_flag();
5292fe8fb19SBen Gras zSig = ( zSig + roundIncrement )>>10;
5302fe8fb19SBen Gras zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
5312fe8fb19SBen Gras if ( zSig == 0 ) zExp = 0;
5322fe8fb19SBen Gras return packFloat64( zSign, zExp, zSig );
5332fe8fb19SBen Gras
5342fe8fb19SBen Gras }
5352fe8fb19SBen Gras
5362fe8fb19SBen Gras /*
5372fe8fb19SBen Gras -------------------------------------------------------------------------------
5382fe8fb19SBen Gras Takes an abstract floating-point value having sign `zSign', exponent `zExp',
5392fe8fb19SBen Gras and significand `zSig', and returns the proper double-precision floating-
5402fe8fb19SBen Gras point value corresponding to the abstract input. This routine is just like
5412fe8fb19SBen Gras `roundAndPackFloat64' except that `zSig' does not have to be normalized.
5422fe8fb19SBen Gras Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
5432fe8fb19SBen Gras floating-point exponent.
5442fe8fb19SBen Gras -------------------------------------------------------------------------------
5452fe8fb19SBen Gras */
5462fe8fb19SBen Gras static float64
normalizeRoundAndPackFloat64(flag zSign,int16 zExp,bits64 zSig)5472fe8fb19SBen Gras normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
5482fe8fb19SBen Gras {
5492fe8fb19SBen Gras int8 shiftCount;
5502fe8fb19SBen Gras
5512fe8fb19SBen Gras shiftCount = countLeadingZeros64( zSig ) - 1;
5522fe8fb19SBen Gras return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
5532fe8fb19SBen Gras
5542fe8fb19SBen Gras }
5552fe8fb19SBen Gras
5562fe8fb19SBen Gras #ifdef FLOATX80
5572fe8fb19SBen Gras
5582fe8fb19SBen Gras /*
5592fe8fb19SBen Gras -------------------------------------------------------------------------------
5602fe8fb19SBen Gras Returns the fraction bits of the extended double-precision floating-point
5612fe8fb19SBen Gras value `a'.
5622fe8fb19SBen Gras -------------------------------------------------------------------------------
5632fe8fb19SBen Gras */
extractFloatx80Frac(floatx80 a)5642fe8fb19SBen Gras INLINE bits64 extractFloatx80Frac( floatx80 a )
5652fe8fb19SBen Gras {
5662fe8fb19SBen Gras
5672fe8fb19SBen Gras return a.low;
5682fe8fb19SBen Gras
5692fe8fb19SBen Gras }
5702fe8fb19SBen Gras
5712fe8fb19SBen Gras /*
5722fe8fb19SBen Gras -------------------------------------------------------------------------------
5732fe8fb19SBen Gras Returns the exponent bits of the extended double-precision floating-point
5742fe8fb19SBen Gras value `a'.
5752fe8fb19SBen Gras -------------------------------------------------------------------------------
5762fe8fb19SBen Gras */
extractFloatx80Exp(floatx80 a)5772fe8fb19SBen Gras INLINE int32 extractFloatx80Exp( floatx80 a )
5782fe8fb19SBen Gras {
5792fe8fb19SBen Gras
5802fe8fb19SBen Gras return a.high & 0x7FFF;
5812fe8fb19SBen Gras
5822fe8fb19SBen Gras }
5832fe8fb19SBen Gras
5842fe8fb19SBen Gras /*
5852fe8fb19SBen Gras -------------------------------------------------------------------------------
5862fe8fb19SBen Gras Returns the sign bit of the extended double-precision floating-point value
5872fe8fb19SBen Gras `a'.
5882fe8fb19SBen Gras -------------------------------------------------------------------------------
5892fe8fb19SBen Gras */
extractFloatx80Sign(floatx80 a)5902fe8fb19SBen Gras INLINE flag extractFloatx80Sign( floatx80 a )
5912fe8fb19SBen Gras {
5922fe8fb19SBen Gras
5932fe8fb19SBen Gras return a.high>>15;
5942fe8fb19SBen Gras
5952fe8fb19SBen Gras }
5962fe8fb19SBen Gras
5972fe8fb19SBen Gras /*
5982fe8fb19SBen Gras -------------------------------------------------------------------------------
5992fe8fb19SBen Gras Normalizes the subnormal extended double-precision floating-point value
6002fe8fb19SBen Gras represented by the denormalized significand `aSig'. The normalized exponent
6012fe8fb19SBen Gras and significand are stored at the locations pointed to by `zExpPtr' and
6022fe8fb19SBen Gras `zSigPtr', respectively.
6032fe8fb19SBen Gras -------------------------------------------------------------------------------
6042fe8fb19SBen Gras */
6052fe8fb19SBen Gras static void
normalizeFloatx80Subnormal(bits64 aSig,int32 * zExpPtr,bits64 * zSigPtr)6062fe8fb19SBen Gras normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
6072fe8fb19SBen Gras {
6082fe8fb19SBen Gras int8 shiftCount;
6092fe8fb19SBen Gras
6102fe8fb19SBen Gras shiftCount = countLeadingZeros64( aSig );
6112fe8fb19SBen Gras *zSigPtr = aSig<<shiftCount;
6122fe8fb19SBen Gras *zExpPtr = 1 - shiftCount;
6132fe8fb19SBen Gras
6142fe8fb19SBen Gras }
6152fe8fb19SBen Gras
6162fe8fb19SBen Gras /*
6172fe8fb19SBen Gras -------------------------------------------------------------------------------
6182fe8fb19SBen Gras Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
6192fe8fb19SBen Gras extended double-precision floating-point value, returning the result.
6202fe8fb19SBen Gras -------------------------------------------------------------------------------
6212fe8fb19SBen Gras */
packFloatx80(flag zSign,int32 zExp,bits64 zSig)6222fe8fb19SBen Gras INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
6232fe8fb19SBen Gras {
6242fe8fb19SBen Gras floatx80 z;
6252fe8fb19SBen Gras
6262fe8fb19SBen Gras z.low = zSig;
6272fe8fb19SBen Gras z.high = ( ( (bits16) zSign )<<15 ) + zExp;
6282fe8fb19SBen Gras return z;
6292fe8fb19SBen Gras
6302fe8fb19SBen Gras }
6312fe8fb19SBen Gras
6322fe8fb19SBen Gras /*
6332fe8fb19SBen Gras -------------------------------------------------------------------------------
6342fe8fb19SBen Gras Takes an abstract floating-point value having sign `zSign', exponent `zExp',
6352fe8fb19SBen Gras and extended significand formed by the concatenation of `zSig0' and `zSig1',
6362fe8fb19SBen Gras and returns the proper extended double-precision floating-point value
6372fe8fb19SBen Gras corresponding to the abstract input. Ordinarily, the abstract value is
6382fe8fb19SBen Gras rounded and packed into the extended double-precision format, with the
6392fe8fb19SBen Gras inexact exception raised if the abstract input cannot be represented
6402fe8fb19SBen Gras exactly. However, if the abstract value is too large, the overflow and
6412fe8fb19SBen Gras inexact exceptions are raised and an infinity or maximal finite value is
6422fe8fb19SBen Gras returned. If the abstract value is too small, the input value is rounded to
6432fe8fb19SBen Gras a subnormal number, and the underflow and inexact exceptions are raised if
6442fe8fb19SBen Gras the abstract input cannot be represented exactly as a subnormal extended
6452fe8fb19SBen Gras double-precision floating-point number.
6462fe8fb19SBen Gras If `roundingPrecision' is 32 or 64, the result is rounded to the same
6472fe8fb19SBen Gras number of bits as single or double precision, respectively. Otherwise, the
6482fe8fb19SBen Gras result is rounded to the full precision of the extended double-precision
6492fe8fb19SBen Gras format.
6502fe8fb19SBen Gras The input significand must be normalized or smaller. If the input
6512fe8fb19SBen Gras significand is not normalized, `zExp' must be 0; in that case, the result
6522fe8fb19SBen Gras returned is a subnormal number, and it must not require rounding. The
6532fe8fb19SBen Gras handling of underflow and overflow follows the IEC/IEEE Standard for Binary
6542fe8fb19SBen Gras Floating-Point Arithmetic.
6552fe8fb19SBen Gras -------------------------------------------------------------------------------
6562fe8fb19SBen Gras */
6572fe8fb19SBen Gras static floatx80
roundAndPackFloatx80(int8 roundingPrecision,flag zSign,int32 zExp,bits64 zSig0,bits64 zSig1)6582fe8fb19SBen Gras roundAndPackFloatx80(
6592fe8fb19SBen Gras int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
6602fe8fb19SBen Gras )
6612fe8fb19SBen Gras {
6622fe8fb19SBen Gras int8 roundingMode;
6632fe8fb19SBen Gras flag roundNearestEven, increment, isTiny;
6642fe8fb19SBen Gras int64 roundIncrement, roundMask, roundBits;
6652fe8fb19SBen Gras
6662fe8fb19SBen Gras roundingMode = float_rounding_mode;
6672fe8fb19SBen Gras roundNearestEven = ( roundingMode == float_round_nearest_even );
6682fe8fb19SBen Gras if ( roundingPrecision == 80 ) goto precision80;
6692fe8fb19SBen Gras if ( roundingPrecision == 64 ) {
6702fe8fb19SBen Gras roundIncrement = LIT64( 0x0000000000000400 );
6712fe8fb19SBen Gras roundMask = LIT64( 0x00000000000007FF );
6722fe8fb19SBen Gras }
6732fe8fb19SBen Gras else if ( roundingPrecision == 32 ) {
6742fe8fb19SBen Gras roundIncrement = LIT64( 0x0000008000000000 );
6752fe8fb19SBen Gras roundMask = LIT64( 0x000000FFFFFFFFFF );
6762fe8fb19SBen Gras }
6772fe8fb19SBen Gras else {
6782fe8fb19SBen Gras goto precision80;
6792fe8fb19SBen Gras }
6802fe8fb19SBen Gras zSig0 |= ( zSig1 != 0 );
6812fe8fb19SBen Gras if ( ! roundNearestEven ) {
6822fe8fb19SBen Gras if ( roundingMode == float_round_to_zero ) {
6832fe8fb19SBen Gras roundIncrement = 0;
6842fe8fb19SBen Gras }
6852fe8fb19SBen Gras else {
6862fe8fb19SBen Gras roundIncrement = roundMask;
6872fe8fb19SBen Gras if ( zSign ) {
6882fe8fb19SBen Gras if ( roundingMode == float_round_up ) roundIncrement = 0;
6892fe8fb19SBen Gras }
6902fe8fb19SBen Gras else {
6912fe8fb19SBen Gras if ( roundingMode == float_round_down ) roundIncrement = 0;
6922fe8fb19SBen Gras }
6932fe8fb19SBen Gras }
6942fe8fb19SBen Gras }
6952fe8fb19SBen Gras roundBits = zSig0 & roundMask;
6962fe8fb19SBen Gras if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
6972fe8fb19SBen Gras if ( ( 0x7FFE < zExp )
6982fe8fb19SBen Gras || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
6992fe8fb19SBen Gras ) {
7002fe8fb19SBen Gras goto overflow;
7012fe8fb19SBen Gras }
7022fe8fb19SBen Gras if ( zExp <= 0 ) {
7032fe8fb19SBen Gras isTiny =
7042fe8fb19SBen Gras ( float_detect_tininess == float_tininess_before_rounding )
7052fe8fb19SBen Gras || ( zExp < 0 )
7062fe8fb19SBen Gras || ( zSig0 <= zSig0 + roundIncrement );
7072fe8fb19SBen Gras shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
7082fe8fb19SBen Gras zExp = 0;
7092fe8fb19SBen Gras roundBits = zSig0 & roundMask;
7102fe8fb19SBen Gras if ( isTiny && roundBits ) float_raise( float_flag_underflow );
711*84d9c625SLionel Sambuc if ( roundBits ) set_float_exception_inexact_flag();
7122fe8fb19SBen Gras zSig0 += roundIncrement;
7132fe8fb19SBen Gras if ( (sbits64) zSig0 < 0 ) zExp = 1;
7142fe8fb19SBen Gras roundIncrement = roundMask + 1;
7152fe8fb19SBen Gras if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
7162fe8fb19SBen Gras roundMask |= roundIncrement;
7172fe8fb19SBen Gras }
7182fe8fb19SBen Gras zSig0 &= ~ roundMask;
7192fe8fb19SBen Gras return packFloatx80( zSign, zExp, zSig0 );
7202fe8fb19SBen Gras }
7212fe8fb19SBen Gras }
722*84d9c625SLionel Sambuc if ( roundBits ) set_float_exception_inexact_flag();
7232fe8fb19SBen Gras zSig0 += roundIncrement;
7242fe8fb19SBen Gras if ( zSig0 < roundIncrement ) {
7252fe8fb19SBen Gras ++zExp;
7262fe8fb19SBen Gras zSig0 = LIT64( 0x8000000000000000 );
7272fe8fb19SBen Gras }
7282fe8fb19SBen Gras roundIncrement = roundMask + 1;
7292fe8fb19SBen Gras if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
7302fe8fb19SBen Gras roundMask |= roundIncrement;
7312fe8fb19SBen Gras }
7322fe8fb19SBen Gras zSig0 &= ~ roundMask;
7332fe8fb19SBen Gras if ( zSig0 == 0 ) zExp = 0;
7342fe8fb19SBen Gras return packFloatx80( zSign, zExp, zSig0 );
7352fe8fb19SBen Gras precision80:
7362fe8fb19SBen Gras increment = ( (sbits64) zSig1 < 0 );
7372fe8fb19SBen Gras if ( ! roundNearestEven ) {
7382fe8fb19SBen Gras if ( roundingMode == float_round_to_zero ) {
7392fe8fb19SBen Gras increment = 0;
7402fe8fb19SBen Gras }
7412fe8fb19SBen Gras else {
7422fe8fb19SBen Gras if ( zSign ) {
7432fe8fb19SBen Gras increment = ( roundingMode == float_round_down ) && zSig1;
7442fe8fb19SBen Gras }
7452fe8fb19SBen Gras else {
7462fe8fb19SBen Gras increment = ( roundingMode == float_round_up ) && zSig1;
7472fe8fb19SBen Gras }
7482fe8fb19SBen Gras }
7492fe8fb19SBen Gras }
7502fe8fb19SBen Gras if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
7512fe8fb19SBen Gras if ( ( 0x7FFE < zExp )
7522fe8fb19SBen Gras || ( ( zExp == 0x7FFE )
7532fe8fb19SBen Gras && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
7542fe8fb19SBen Gras && increment
7552fe8fb19SBen Gras )
7562fe8fb19SBen Gras ) {
7572fe8fb19SBen Gras roundMask = 0;
7582fe8fb19SBen Gras overflow:
7592fe8fb19SBen Gras float_raise( float_flag_overflow | float_flag_inexact );
7602fe8fb19SBen Gras if ( ( roundingMode == float_round_to_zero )
7612fe8fb19SBen Gras || ( zSign && ( roundingMode == float_round_up ) )
7622fe8fb19SBen Gras || ( ! zSign && ( roundingMode == float_round_down ) )
7632fe8fb19SBen Gras ) {
7642fe8fb19SBen Gras return packFloatx80( zSign, 0x7FFE, ~ roundMask );
7652fe8fb19SBen Gras }
7662fe8fb19SBen Gras return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
7672fe8fb19SBen Gras }
7682fe8fb19SBen Gras if ( zExp <= 0 ) {
7692fe8fb19SBen Gras isTiny =
7702fe8fb19SBen Gras ( float_detect_tininess == float_tininess_before_rounding )
7712fe8fb19SBen Gras || ( zExp < 0 )
7722fe8fb19SBen Gras || ! increment
7732fe8fb19SBen Gras || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
7742fe8fb19SBen Gras shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
7752fe8fb19SBen Gras zExp = 0;
7762fe8fb19SBen Gras if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
777*84d9c625SLionel Sambuc if ( zSig1 ) set_float_exception_inexact_flag();
7782fe8fb19SBen Gras if ( roundNearestEven ) {
7792fe8fb19SBen Gras increment = ( (sbits64) zSig1 < 0 );
7802fe8fb19SBen Gras }
7812fe8fb19SBen Gras else {
7822fe8fb19SBen Gras if ( zSign ) {
7832fe8fb19SBen Gras increment = ( roundingMode == float_round_down ) && zSig1;
7842fe8fb19SBen Gras }
7852fe8fb19SBen Gras else {
7862fe8fb19SBen Gras increment = ( roundingMode == float_round_up ) && zSig1;
7872fe8fb19SBen Gras }
7882fe8fb19SBen Gras }
7892fe8fb19SBen Gras if ( increment ) {
7902fe8fb19SBen Gras ++zSig0;
7912fe8fb19SBen Gras zSig0 &=
7922fe8fb19SBen Gras ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
7932fe8fb19SBen Gras if ( (sbits64) zSig0 < 0 ) zExp = 1;
7942fe8fb19SBen Gras }
7952fe8fb19SBen Gras return packFloatx80( zSign, zExp, zSig0 );
7962fe8fb19SBen Gras }
7972fe8fb19SBen Gras }
798*84d9c625SLionel Sambuc if ( zSig1 ) set_float_exception_inexact_flag();
7992fe8fb19SBen Gras if ( increment ) {
8002fe8fb19SBen Gras ++zSig0;
8012fe8fb19SBen Gras if ( zSig0 == 0 ) {
8022fe8fb19SBen Gras ++zExp;
8032fe8fb19SBen Gras zSig0 = LIT64( 0x8000000000000000 );
8042fe8fb19SBen Gras }
8052fe8fb19SBen Gras else {
8062fe8fb19SBen Gras zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
8072fe8fb19SBen Gras }
8082fe8fb19SBen Gras }
8092fe8fb19SBen Gras else {
8102fe8fb19SBen Gras if ( zSig0 == 0 ) zExp = 0;
8112fe8fb19SBen Gras }
8122fe8fb19SBen Gras return packFloatx80( zSign, zExp, zSig0 );
8132fe8fb19SBen Gras
8142fe8fb19SBen Gras }
8152fe8fb19SBen Gras
8162fe8fb19SBen Gras /*
8172fe8fb19SBen Gras -------------------------------------------------------------------------------
8182fe8fb19SBen Gras Takes an abstract floating-point value having sign `zSign', exponent
8192fe8fb19SBen Gras `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
8202fe8fb19SBen Gras and returns the proper extended double-precision floating-point value
8212fe8fb19SBen Gras corresponding to the abstract input. This routine is just like
8222fe8fb19SBen Gras `roundAndPackFloatx80' except that the input significand does not have to be
8232fe8fb19SBen Gras normalized.
8242fe8fb19SBen Gras -------------------------------------------------------------------------------
8252fe8fb19SBen Gras */
8262fe8fb19SBen Gras static floatx80
normalizeRoundAndPackFloatx80(int8 roundingPrecision,flag zSign,int32 zExp,bits64 zSig0,bits64 zSig1)8272fe8fb19SBen Gras normalizeRoundAndPackFloatx80(
8282fe8fb19SBen Gras int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
8292fe8fb19SBen Gras )
8302fe8fb19SBen Gras {
8312fe8fb19SBen Gras int8 shiftCount;
8322fe8fb19SBen Gras
8332fe8fb19SBen Gras if ( zSig0 == 0 ) {
8342fe8fb19SBen Gras zSig0 = zSig1;
8352fe8fb19SBen Gras zSig1 = 0;
8362fe8fb19SBen Gras zExp -= 64;
8372fe8fb19SBen Gras }
8382fe8fb19SBen Gras shiftCount = countLeadingZeros64( zSig0 );
8392fe8fb19SBen Gras shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
8402fe8fb19SBen Gras zExp -= shiftCount;
8412fe8fb19SBen Gras return
8422fe8fb19SBen Gras roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
8432fe8fb19SBen Gras
8442fe8fb19SBen Gras }
8452fe8fb19SBen Gras
8462fe8fb19SBen Gras #endif
8472fe8fb19SBen Gras
8482fe8fb19SBen Gras #ifdef FLOAT128
8492fe8fb19SBen Gras
8502fe8fb19SBen Gras /*
8512fe8fb19SBen Gras -------------------------------------------------------------------------------
8522fe8fb19SBen Gras Returns the least-significant 64 fraction bits of the quadruple-precision
8532fe8fb19SBen Gras floating-point value `a'.
8542fe8fb19SBen Gras -------------------------------------------------------------------------------
8552fe8fb19SBen Gras */
extractFloat128Frac1(float128 a)8562fe8fb19SBen Gras INLINE bits64 extractFloat128Frac1( float128 a )
8572fe8fb19SBen Gras {
8582fe8fb19SBen Gras
8592fe8fb19SBen Gras return a.low;
8602fe8fb19SBen Gras
8612fe8fb19SBen Gras }
8622fe8fb19SBen Gras
8632fe8fb19SBen Gras /*
8642fe8fb19SBen Gras -------------------------------------------------------------------------------
8652fe8fb19SBen Gras Returns the most-significant 48 fraction bits of the quadruple-precision
8662fe8fb19SBen Gras floating-point value `a'.
8672fe8fb19SBen Gras -------------------------------------------------------------------------------
8682fe8fb19SBen Gras */
extractFloat128Frac0(float128 a)8692fe8fb19SBen Gras INLINE bits64 extractFloat128Frac0( float128 a )
8702fe8fb19SBen Gras {
8712fe8fb19SBen Gras
8722fe8fb19SBen Gras return a.high & LIT64( 0x0000FFFFFFFFFFFF );
8732fe8fb19SBen Gras
8742fe8fb19SBen Gras }
8752fe8fb19SBen Gras
8762fe8fb19SBen Gras /*
8772fe8fb19SBen Gras -------------------------------------------------------------------------------
8782fe8fb19SBen Gras Returns the exponent bits of the quadruple-precision floating-point value
8792fe8fb19SBen Gras `a'.
8802fe8fb19SBen Gras -------------------------------------------------------------------------------
8812fe8fb19SBen Gras */
extractFloat128Exp(float128 a)8822fe8fb19SBen Gras INLINE int32 extractFloat128Exp( float128 a )
8832fe8fb19SBen Gras {
8842fe8fb19SBen Gras
885f14fb602SLionel Sambuc return (int32)((a.high >> 48) & 0x7FFF);
8862fe8fb19SBen Gras
8872fe8fb19SBen Gras }
8882fe8fb19SBen Gras
8892fe8fb19SBen Gras /*
8902fe8fb19SBen Gras -------------------------------------------------------------------------------
8912fe8fb19SBen Gras Returns the sign bit of the quadruple-precision floating-point value `a'.
8922fe8fb19SBen Gras -------------------------------------------------------------------------------
8932fe8fb19SBen Gras */
extractFloat128Sign(float128 a)8942fe8fb19SBen Gras INLINE flag extractFloat128Sign( float128 a )
8952fe8fb19SBen Gras {
8962fe8fb19SBen Gras
897f14fb602SLionel Sambuc return (flag)(a.high >> 63);
8982fe8fb19SBen Gras
8992fe8fb19SBen Gras }
9002fe8fb19SBen Gras
9012fe8fb19SBen Gras /*
9022fe8fb19SBen Gras -------------------------------------------------------------------------------
9032fe8fb19SBen Gras Normalizes the subnormal quadruple-precision floating-point value
9042fe8fb19SBen Gras represented by the denormalized significand formed by the concatenation of
9052fe8fb19SBen Gras `aSig0' and `aSig1'. The normalized exponent is stored at the location
9062fe8fb19SBen Gras pointed to by `zExpPtr'. The most significant 49 bits of the normalized
9072fe8fb19SBen Gras significand are stored at the location pointed to by `zSig0Ptr', and the
9082fe8fb19SBen Gras least significant 64 bits of the normalized significand are stored at the
9092fe8fb19SBen Gras location pointed to by `zSig1Ptr'.
9102fe8fb19SBen Gras -------------------------------------------------------------------------------
9112fe8fb19SBen Gras */
9122fe8fb19SBen Gras static void
normalizeFloat128Subnormal(bits64 aSig0,bits64 aSig1,int32 * zExpPtr,bits64 * zSig0Ptr,bits64 * zSig1Ptr)9132fe8fb19SBen Gras normalizeFloat128Subnormal(
9142fe8fb19SBen Gras bits64 aSig0,
9152fe8fb19SBen Gras bits64 aSig1,
9162fe8fb19SBen Gras int32 *zExpPtr,
9172fe8fb19SBen Gras bits64 *zSig0Ptr,
9182fe8fb19SBen Gras bits64 *zSig1Ptr
9192fe8fb19SBen Gras )
9202fe8fb19SBen Gras {
9212fe8fb19SBen Gras int8 shiftCount;
9222fe8fb19SBen Gras
9232fe8fb19SBen Gras if ( aSig0 == 0 ) {
9242fe8fb19SBen Gras shiftCount = countLeadingZeros64( aSig1 ) - 15;
9252fe8fb19SBen Gras if ( shiftCount < 0 ) {
9262fe8fb19SBen Gras *zSig0Ptr = aSig1>>( - shiftCount );
9272fe8fb19SBen Gras *zSig1Ptr = aSig1<<( shiftCount & 63 );
9282fe8fb19SBen Gras }
9292fe8fb19SBen Gras else {
9302fe8fb19SBen Gras *zSig0Ptr = aSig1<<shiftCount;
9312fe8fb19SBen Gras *zSig1Ptr = 0;
9322fe8fb19SBen Gras }
9332fe8fb19SBen Gras *zExpPtr = - shiftCount - 63;
9342fe8fb19SBen Gras }
9352fe8fb19SBen Gras else {
9362fe8fb19SBen Gras shiftCount = countLeadingZeros64( aSig0 ) - 15;
9372fe8fb19SBen Gras shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
9382fe8fb19SBen Gras *zExpPtr = 1 - shiftCount;
9392fe8fb19SBen Gras }
9402fe8fb19SBen Gras
9412fe8fb19SBen Gras }
9422fe8fb19SBen Gras
9432fe8fb19SBen Gras /*
9442fe8fb19SBen Gras -------------------------------------------------------------------------------
9452fe8fb19SBen Gras Packs the sign `zSign', the exponent `zExp', and the significand formed
9462fe8fb19SBen Gras by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
9472fe8fb19SBen Gras floating-point value, returning the result. After being shifted into the
9482fe8fb19SBen Gras proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
9492fe8fb19SBen Gras added together to form the most significant 32 bits of the result. This
9502fe8fb19SBen Gras means that any integer portion of `zSig0' will be added into the exponent.
9512fe8fb19SBen Gras Since a properly normalized significand will have an integer portion equal
9522fe8fb19SBen Gras to 1, the `zExp' input should be 1 less than the desired result exponent
9532fe8fb19SBen Gras whenever `zSig0' and `zSig1' concatenated form a complete, normalized
9542fe8fb19SBen Gras significand.
9552fe8fb19SBen Gras -------------------------------------------------------------------------------
9562fe8fb19SBen Gras */
9572fe8fb19SBen Gras INLINE float128
packFloat128(flag zSign,int32 zExp,bits64 zSig0,bits64 zSig1)9582fe8fb19SBen Gras packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
9592fe8fb19SBen Gras {
9602fe8fb19SBen Gras float128 z;
9612fe8fb19SBen Gras
9622fe8fb19SBen Gras z.low = zSig1;
9632fe8fb19SBen Gras z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
9642fe8fb19SBen Gras return z;
9652fe8fb19SBen Gras
9662fe8fb19SBen Gras }
9672fe8fb19SBen Gras
9682fe8fb19SBen Gras /*
9692fe8fb19SBen Gras -------------------------------------------------------------------------------
9702fe8fb19SBen Gras Takes an abstract floating-point value having sign `zSign', exponent `zExp',
9712fe8fb19SBen Gras and extended significand formed by the concatenation of `zSig0', `zSig1',
9722fe8fb19SBen Gras and `zSig2', and returns the proper quadruple-precision floating-point value
9732fe8fb19SBen Gras corresponding to the abstract input. Ordinarily, the abstract value is
9742fe8fb19SBen Gras simply rounded and packed into the quadruple-precision format, with the
9752fe8fb19SBen Gras inexact exception raised if the abstract input cannot be represented
9762fe8fb19SBen Gras exactly. However, if the abstract value is too large, the overflow and
9772fe8fb19SBen Gras inexact exceptions are raised and an infinity or maximal finite value is
9782fe8fb19SBen Gras returned. If the abstract value is too small, the input value is rounded to
9792fe8fb19SBen Gras a subnormal number, and the underflow and inexact exceptions are raised if
9802fe8fb19SBen Gras the abstract input cannot be represented exactly as a subnormal quadruple-
9812fe8fb19SBen Gras precision floating-point number.
9822fe8fb19SBen Gras The input significand must be normalized or smaller. If the input
9832fe8fb19SBen Gras significand is not normalized, `zExp' must be 0; in that case, the result
9842fe8fb19SBen Gras returned is a subnormal number, and it must not require rounding. In the
9852fe8fb19SBen Gras usual case that the input significand is normalized, `zExp' must be 1 less
9862fe8fb19SBen Gras than the ``true'' floating-point exponent. The handling of underflow and
9872fe8fb19SBen Gras overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
9882fe8fb19SBen Gras -------------------------------------------------------------------------------
9892fe8fb19SBen Gras */
9902fe8fb19SBen Gras static float128
roundAndPackFloat128(flag zSign,int32 zExp,bits64 zSig0,bits64 zSig1,bits64 zSig2)9912fe8fb19SBen Gras roundAndPackFloat128(
9922fe8fb19SBen Gras flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 )
9932fe8fb19SBen Gras {
9942fe8fb19SBen Gras int8 roundingMode;
9952fe8fb19SBen Gras flag roundNearestEven, increment, isTiny;
9962fe8fb19SBen Gras
9972fe8fb19SBen Gras roundingMode = float_rounding_mode;
9982fe8fb19SBen Gras roundNearestEven = ( roundingMode == float_round_nearest_even );
9992fe8fb19SBen Gras increment = ( (sbits64) zSig2 < 0 );
10002fe8fb19SBen Gras if ( ! roundNearestEven ) {
10012fe8fb19SBen Gras if ( roundingMode == float_round_to_zero ) {
10022fe8fb19SBen Gras increment = 0;
10032fe8fb19SBen Gras }
10042fe8fb19SBen Gras else {
10052fe8fb19SBen Gras if ( zSign ) {
10062fe8fb19SBen Gras increment = ( roundingMode == float_round_down ) && zSig2;
10072fe8fb19SBen Gras }
10082fe8fb19SBen Gras else {
10092fe8fb19SBen Gras increment = ( roundingMode == float_round_up ) && zSig2;
10102fe8fb19SBen Gras }
10112fe8fb19SBen Gras }
10122fe8fb19SBen Gras }
10132fe8fb19SBen Gras if ( 0x7FFD <= (bits32) zExp ) {
10142fe8fb19SBen Gras if ( ( 0x7FFD < zExp )
10152fe8fb19SBen Gras || ( ( zExp == 0x7FFD )
10162fe8fb19SBen Gras && eq128(
10172fe8fb19SBen Gras LIT64( 0x0001FFFFFFFFFFFF ),
10182fe8fb19SBen Gras LIT64( 0xFFFFFFFFFFFFFFFF ),
10192fe8fb19SBen Gras zSig0,
10202fe8fb19SBen Gras zSig1
10212fe8fb19SBen Gras )
10222fe8fb19SBen Gras && increment
10232fe8fb19SBen Gras )
10242fe8fb19SBen Gras ) {
10252fe8fb19SBen Gras float_raise( float_flag_overflow | float_flag_inexact );
10262fe8fb19SBen Gras if ( ( roundingMode == float_round_to_zero )
10272fe8fb19SBen Gras || ( zSign && ( roundingMode == float_round_up ) )
10282fe8fb19SBen Gras || ( ! zSign && ( roundingMode == float_round_down ) )
10292fe8fb19SBen Gras ) {
10302fe8fb19SBen Gras return
10312fe8fb19SBen Gras packFloat128(
10322fe8fb19SBen Gras zSign,
10332fe8fb19SBen Gras 0x7FFE,
10342fe8fb19SBen Gras LIT64( 0x0000FFFFFFFFFFFF ),
10352fe8fb19SBen Gras LIT64( 0xFFFFFFFFFFFFFFFF )
10362fe8fb19SBen Gras );
10372fe8fb19SBen Gras }
10382fe8fb19SBen Gras return packFloat128( zSign, 0x7FFF, 0, 0 );
10392fe8fb19SBen Gras }
10402fe8fb19SBen Gras if ( zExp < 0 ) {
10412fe8fb19SBen Gras isTiny =
10422fe8fb19SBen Gras ( float_detect_tininess == float_tininess_before_rounding )
10432fe8fb19SBen Gras || ( zExp < -1 )
10442fe8fb19SBen Gras || ! increment
10452fe8fb19SBen Gras || lt128(
10462fe8fb19SBen Gras zSig0,
10472fe8fb19SBen Gras zSig1,
10482fe8fb19SBen Gras LIT64( 0x0001FFFFFFFFFFFF ),
10492fe8fb19SBen Gras LIT64( 0xFFFFFFFFFFFFFFFF )
10502fe8fb19SBen Gras );
10512fe8fb19SBen Gras shift128ExtraRightJamming(
10522fe8fb19SBen Gras zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
10532fe8fb19SBen Gras zExp = 0;
10542fe8fb19SBen Gras if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
10552fe8fb19SBen Gras if ( roundNearestEven ) {
10562fe8fb19SBen Gras increment = ( (sbits64) zSig2 < 0 );
10572fe8fb19SBen Gras }
10582fe8fb19SBen Gras else {
10592fe8fb19SBen Gras if ( zSign ) {
10602fe8fb19SBen Gras increment = ( roundingMode == float_round_down ) && zSig2;
10612fe8fb19SBen Gras }
10622fe8fb19SBen Gras else {
10632fe8fb19SBen Gras increment = ( roundingMode == float_round_up ) && zSig2;
10642fe8fb19SBen Gras }
10652fe8fb19SBen Gras }
10662fe8fb19SBen Gras }
10672fe8fb19SBen Gras }
1068*84d9c625SLionel Sambuc if ( zSig2 ) set_float_exception_inexact_flag();
10692fe8fb19SBen Gras if ( increment ) {
10702fe8fb19SBen Gras add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
10712fe8fb19SBen Gras zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
10722fe8fb19SBen Gras }
10732fe8fb19SBen Gras else {
10742fe8fb19SBen Gras if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
10752fe8fb19SBen Gras }
10762fe8fb19SBen Gras return packFloat128( zSign, zExp, zSig0, zSig1 );
10772fe8fb19SBen Gras
10782fe8fb19SBen Gras }
10792fe8fb19SBen Gras
10802fe8fb19SBen Gras /*
10812fe8fb19SBen Gras -------------------------------------------------------------------------------
10822fe8fb19SBen Gras Takes an abstract floating-point value having sign `zSign', exponent `zExp',
10832fe8fb19SBen Gras and significand formed by the concatenation of `zSig0' and `zSig1', and
10842fe8fb19SBen Gras returns the proper quadruple-precision floating-point value corresponding
10852fe8fb19SBen Gras to the abstract input. This routine is just like `roundAndPackFloat128'
10862fe8fb19SBen Gras except that the input significand has fewer bits and does not have to be
10872fe8fb19SBen Gras normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
10882fe8fb19SBen Gras point exponent.
10892fe8fb19SBen Gras -------------------------------------------------------------------------------
10902fe8fb19SBen Gras */
10912fe8fb19SBen Gras static float128
normalizeRoundAndPackFloat128(flag zSign,int32 zExp,bits64 zSig0,bits64 zSig1)10922fe8fb19SBen Gras normalizeRoundAndPackFloat128(
10932fe8fb19SBen Gras flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
10942fe8fb19SBen Gras {
10952fe8fb19SBen Gras int8 shiftCount;
10962fe8fb19SBen Gras bits64 zSig2;
10972fe8fb19SBen Gras
10982fe8fb19SBen Gras if ( zSig0 == 0 ) {
10992fe8fb19SBen Gras zSig0 = zSig1;
11002fe8fb19SBen Gras zSig1 = 0;
11012fe8fb19SBen Gras zExp -= 64;
11022fe8fb19SBen Gras }
11032fe8fb19SBen Gras shiftCount = countLeadingZeros64( zSig0 ) - 15;
11042fe8fb19SBen Gras if ( 0 <= shiftCount ) {
11052fe8fb19SBen Gras zSig2 = 0;
11062fe8fb19SBen Gras shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
11072fe8fb19SBen Gras }
11082fe8fb19SBen Gras else {
11092fe8fb19SBen Gras shift128ExtraRightJamming(
11102fe8fb19SBen Gras zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
11112fe8fb19SBen Gras }
11122fe8fb19SBen Gras zExp -= shiftCount;
11132fe8fb19SBen Gras return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
11142fe8fb19SBen Gras
11152fe8fb19SBen Gras }
11162fe8fb19SBen Gras
11172fe8fb19SBen Gras #endif
11182fe8fb19SBen Gras
11192fe8fb19SBen Gras /*
11202fe8fb19SBen Gras -------------------------------------------------------------------------------
11212fe8fb19SBen Gras Returns the result of converting the 32-bit two's complement integer `a'
11222fe8fb19SBen Gras to the single-precision floating-point format. The conversion is performed
11232fe8fb19SBen Gras according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
11242fe8fb19SBen Gras -------------------------------------------------------------------------------
11252fe8fb19SBen Gras */
int32_to_float32(int32 a)11262fe8fb19SBen Gras float32 int32_to_float32( int32 a )
11272fe8fb19SBen Gras {
11282fe8fb19SBen Gras flag zSign;
11292fe8fb19SBen Gras
11302fe8fb19SBen Gras if ( a == 0 ) return 0;
11312fe8fb19SBen Gras if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
11322fe8fb19SBen Gras zSign = ( a < 0 );
1133f14fb602SLionel Sambuc return normalizeRoundAndPackFloat32(zSign, 0x9C, (uint32)(zSign ? - a : a));
11342fe8fb19SBen Gras
11352fe8fb19SBen Gras }
11362fe8fb19SBen Gras
uint32_to_float32(uint32 a)1137f14fb602SLionel Sambuc float32 uint32_to_float32( uint32 a )
1138f14fb602SLionel Sambuc {
1139f14fb602SLionel Sambuc if ( a == 0 ) return 0;
1140f14fb602SLionel Sambuc if ( a & (bits32) 0x80000000 )
1141f14fb602SLionel Sambuc return normalizeRoundAndPackFloat32( 0, 0x9D, a >> 1 );
1142f14fb602SLionel Sambuc return normalizeRoundAndPackFloat32( 0, 0x9C, a );
1143f14fb602SLionel Sambuc }
1144f14fb602SLionel Sambuc
1145f14fb602SLionel Sambuc
11462fe8fb19SBen Gras /*
11472fe8fb19SBen Gras -------------------------------------------------------------------------------
11482fe8fb19SBen Gras Returns the result of converting the 32-bit two's complement integer `a'
11492fe8fb19SBen Gras to the double-precision floating-point format. The conversion is performed
11502fe8fb19SBen Gras according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
11512fe8fb19SBen Gras -------------------------------------------------------------------------------
11522fe8fb19SBen Gras */
int32_to_float64(int32 a)11532fe8fb19SBen Gras float64 int32_to_float64( int32 a )
11542fe8fb19SBen Gras {
11552fe8fb19SBen Gras flag zSign;
11562fe8fb19SBen Gras uint32 absA;
11572fe8fb19SBen Gras int8 shiftCount;
11582fe8fb19SBen Gras bits64 zSig;
11592fe8fb19SBen Gras
11602fe8fb19SBen Gras if ( a == 0 ) return 0;
11612fe8fb19SBen Gras zSign = ( a < 0 );
11622fe8fb19SBen Gras absA = zSign ? - a : a;
11632fe8fb19SBen Gras shiftCount = countLeadingZeros32( absA ) + 21;
11642fe8fb19SBen Gras zSig = absA;
11652fe8fb19SBen Gras return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
11662fe8fb19SBen Gras
11672fe8fb19SBen Gras }
11682fe8fb19SBen Gras
uint32_to_float64(uint32 a)1169f14fb602SLionel Sambuc float64 uint32_to_float64( uint32 a )
1170f14fb602SLionel Sambuc {
1171f14fb602SLionel Sambuc int8 shiftCount;
1172f14fb602SLionel Sambuc bits64 zSig = a;
1173f14fb602SLionel Sambuc
1174f14fb602SLionel Sambuc if ( a == 0 ) return 0;
1175f14fb602SLionel Sambuc shiftCount = countLeadingZeros32( a ) + 21;
1176f14fb602SLionel Sambuc return packFloat64( 0, 0x432 - shiftCount, zSig<<shiftCount );
1177f14fb602SLionel Sambuc
1178f14fb602SLionel Sambuc }
1179f14fb602SLionel Sambuc
11802fe8fb19SBen Gras #ifdef FLOATX80
11812fe8fb19SBen Gras
11822fe8fb19SBen Gras /*
11832fe8fb19SBen Gras -------------------------------------------------------------------------------
11842fe8fb19SBen Gras Returns the result of converting the 32-bit two's complement integer `a'
11852fe8fb19SBen Gras to the extended double-precision floating-point format. The conversion
11862fe8fb19SBen Gras is performed according to the IEC/IEEE Standard for Binary Floating-Point
11872fe8fb19SBen Gras Arithmetic.
11882fe8fb19SBen Gras -------------------------------------------------------------------------------
11892fe8fb19SBen Gras */
int32_to_floatx80(int32 a)11902fe8fb19SBen Gras floatx80 int32_to_floatx80( int32 a )
11912fe8fb19SBen Gras {
11922fe8fb19SBen Gras flag zSign;
11932fe8fb19SBen Gras uint32 absA;
11942fe8fb19SBen Gras int8 shiftCount;
11952fe8fb19SBen Gras bits64 zSig;
11962fe8fb19SBen Gras
11972fe8fb19SBen Gras if ( a == 0 ) return packFloatx80( 0, 0, 0 );
11982fe8fb19SBen Gras zSign = ( a < 0 );
11992fe8fb19SBen Gras absA = zSign ? - a : a;
12002fe8fb19SBen Gras shiftCount = countLeadingZeros32( absA ) + 32;
12012fe8fb19SBen Gras zSig = absA;
12022fe8fb19SBen Gras return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
12032fe8fb19SBen Gras
12042fe8fb19SBen Gras }
12052fe8fb19SBen Gras
uint32_to_floatx80(uint32 a)1206f14fb602SLionel Sambuc floatx80 uint32_to_floatx80( uint32 a )
1207f14fb602SLionel Sambuc {
1208f14fb602SLionel Sambuc int8 shiftCount;
1209f14fb602SLionel Sambuc bits64 zSig = a;
1210f14fb602SLionel Sambuc
1211f14fb602SLionel Sambuc if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1212f14fb602SLionel Sambuc shiftCount = countLeadingZeros32( a ) + 32;
1213f14fb602SLionel Sambuc return packFloatx80( 0, 0x403E - shiftCount, zSig<<shiftCount );
1214f14fb602SLionel Sambuc
1215f14fb602SLionel Sambuc }
1216f14fb602SLionel Sambuc
12172fe8fb19SBen Gras #endif
12182fe8fb19SBen Gras
12192fe8fb19SBen Gras #ifdef FLOAT128
12202fe8fb19SBen Gras
12212fe8fb19SBen Gras /*
12222fe8fb19SBen Gras -------------------------------------------------------------------------------
12232fe8fb19SBen Gras Returns the result of converting the 32-bit two's complement integer `a' to
12242fe8fb19SBen Gras the quadruple-precision floating-point format. The conversion is performed
12252fe8fb19SBen Gras according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
12262fe8fb19SBen Gras -------------------------------------------------------------------------------
12272fe8fb19SBen Gras */
int32_to_float128(int32 a)12282fe8fb19SBen Gras float128 int32_to_float128( int32 a )
12292fe8fb19SBen Gras {
12302fe8fb19SBen Gras flag zSign;
12312fe8fb19SBen Gras uint32 absA;
12322fe8fb19SBen Gras int8 shiftCount;
12332fe8fb19SBen Gras bits64 zSig0;
12342fe8fb19SBen Gras
12352fe8fb19SBen Gras if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
12362fe8fb19SBen Gras zSign = ( a < 0 );
12372fe8fb19SBen Gras absA = zSign ? - a : a;
12382fe8fb19SBen Gras shiftCount = countLeadingZeros32( absA ) + 17;
12392fe8fb19SBen Gras zSig0 = absA;
12402fe8fb19SBen Gras return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
12412fe8fb19SBen Gras
12422fe8fb19SBen Gras }
12432fe8fb19SBen Gras
uint32_to_float128(uint32 a)1244f14fb602SLionel Sambuc float128 uint32_to_float128( uint32 a )
1245f14fb602SLionel Sambuc {
1246f14fb602SLionel Sambuc int8 shiftCount;
1247f14fb602SLionel Sambuc bits64 zSig0 = a;
1248f14fb602SLionel Sambuc
1249f14fb602SLionel Sambuc if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1250f14fb602SLionel Sambuc shiftCount = countLeadingZeros32( a ) + 17;
1251f14fb602SLionel Sambuc return packFloat128( 0, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1252f14fb602SLionel Sambuc
1253f14fb602SLionel Sambuc }
1254f14fb602SLionel Sambuc
12552fe8fb19SBen Gras #endif
12562fe8fb19SBen Gras
12572fe8fb19SBen Gras #ifndef SOFTFLOAT_FOR_GCC /* __floatdi?f is in libgcc2.c */
12582fe8fb19SBen Gras /*
12592fe8fb19SBen Gras -------------------------------------------------------------------------------
12602fe8fb19SBen Gras Returns the result of converting the 64-bit two's complement integer `a'
12612fe8fb19SBen Gras to the single-precision floating-point format. The conversion is performed
12622fe8fb19SBen Gras according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
12632fe8fb19SBen Gras -------------------------------------------------------------------------------
12642fe8fb19SBen Gras */
int64_to_float32(int64 a)12652fe8fb19SBen Gras float32 int64_to_float32( int64 a )
12662fe8fb19SBen Gras {
12672fe8fb19SBen Gras flag zSign;
12682fe8fb19SBen Gras uint64 absA;
12692fe8fb19SBen Gras int8 shiftCount;
12702fe8fb19SBen Gras
12712fe8fb19SBen Gras if ( a == 0 ) return 0;
12722fe8fb19SBen Gras zSign = ( a < 0 );
12732fe8fb19SBen Gras absA = zSign ? - a : a;
12742fe8fb19SBen Gras shiftCount = countLeadingZeros64( absA ) - 40;
12752fe8fb19SBen Gras if ( 0 <= shiftCount ) {
12762fe8fb19SBen Gras return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
12772fe8fb19SBen Gras }
12782fe8fb19SBen Gras else {
12792fe8fb19SBen Gras shiftCount += 7;
12802fe8fb19SBen Gras if ( shiftCount < 0 ) {
12812fe8fb19SBen Gras shift64RightJamming( absA, - shiftCount, &absA );
12822fe8fb19SBen Gras }
12832fe8fb19SBen Gras else {
12842fe8fb19SBen Gras absA <<= shiftCount;
12852fe8fb19SBen Gras }
12862fe8fb19SBen Gras return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA );
12872fe8fb19SBen Gras }
12882fe8fb19SBen Gras
12892fe8fb19SBen Gras }
12902fe8fb19SBen Gras
12912fe8fb19SBen Gras /*
12922fe8fb19SBen Gras -------------------------------------------------------------------------------
12932fe8fb19SBen Gras Returns the result of converting the 64-bit two's complement integer `a'
12942fe8fb19SBen Gras to the double-precision floating-point format. The conversion is performed
12952fe8fb19SBen Gras according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
12962fe8fb19SBen Gras -------------------------------------------------------------------------------
12972fe8fb19SBen Gras */
int64_to_float64(int64 a)12982fe8fb19SBen Gras float64 int64_to_float64( int64 a )
12992fe8fb19SBen Gras {
13002fe8fb19SBen Gras flag zSign;
13012fe8fb19SBen Gras
13022fe8fb19SBen Gras if ( a == 0 ) return 0;
13032fe8fb19SBen Gras if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
13042fe8fb19SBen Gras return packFloat64( 1, 0x43E, 0 );
13052fe8fb19SBen Gras }
13062fe8fb19SBen Gras zSign = ( a < 0 );
13072fe8fb19SBen Gras return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a );
13082fe8fb19SBen Gras
13092fe8fb19SBen Gras }
13102fe8fb19SBen Gras
13112fe8fb19SBen Gras #ifdef FLOATX80
13122fe8fb19SBen Gras
13132fe8fb19SBen Gras /*
13142fe8fb19SBen Gras -------------------------------------------------------------------------------
13152fe8fb19SBen Gras Returns the result of converting the 64-bit two's complement integer `a'
13162fe8fb19SBen Gras to the extended double-precision floating-point format. The conversion
13172fe8fb19SBen Gras is performed according to the IEC/IEEE Standard for Binary Floating-Point
13182fe8fb19SBen Gras Arithmetic.
13192fe8fb19SBen Gras -------------------------------------------------------------------------------
13202fe8fb19SBen Gras */
int64_to_floatx80(int64 a)13212fe8fb19SBen Gras floatx80 int64_to_floatx80( int64 a )
13222fe8fb19SBen Gras {
13232fe8fb19SBen Gras flag zSign;
13242fe8fb19SBen Gras uint64 absA;
13252fe8fb19SBen Gras int8 shiftCount;
13262fe8fb19SBen Gras
13272fe8fb19SBen Gras if ( a == 0 ) return packFloatx80( 0, 0, 0 );
13282fe8fb19SBen Gras zSign = ( a < 0 );
13292fe8fb19SBen Gras absA = zSign ? - a : a;
13302fe8fb19SBen Gras shiftCount = countLeadingZeros64( absA );
13312fe8fb19SBen Gras return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
13322fe8fb19SBen Gras
13332fe8fb19SBen Gras }
13342fe8fb19SBen Gras
13352fe8fb19SBen Gras #endif
13362fe8fb19SBen Gras
13372fe8fb19SBen Gras #endif /* !SOFTFLOAT_FOR_GCC */
13382fe8fb19SBen Gras
13392fe8fb19SBen Gras #ifdef FLOAT128
13402fe8fb19SBen Gras
13412fe8fb19SBen Gras /*
13422fe8fb19SBen Gras -------------------------------------------------------------------------------
13432fe8fb19SBen Gras Returns the result of converting the 64-bit two's complement integer `a' to
13442fe8fb19SBen Gras the quadruple-precision floating-point format. The conversion is performed
13452fe8fb19SBen Gras according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
13462fe8fb19SBen Gras -------------------------------------------------------------------------------
13472fe8fb19SBen Gras */
int64_to_float128(int64 a)13482fe8fb19SBen Gras float128 int64_to_float128( int64 a )
13492fe8fb19SBen Gras {
13502fe8fb19SBen Gras flag zSign;
13512fe8fb19SBen Gras uint64 absA;
13522fe8fb19SBen Gras int8 shiftCount;
13532fe8fb19SBen Gras int32 zExp;
13542fe8fb19SBen Gras bits64 zSig0, zSig1;
13552fe8fb19SBen Gras
13562fe8fb19SBen Gras if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
13572fe8fb19SBen Gras zSign = ( a < 0 );
13582fe8fb19SBen Gras absA = zSign ? - a : a;
13592fe8fb19SBen Gras shiftCount = countLeadingZeros64( absA ) + 49;
13602fe8fb19SBen Gras zExp = 0x406E - shiftCount;
13612fe8fb19SBen Gras if ( 64 <= shiftCount ) {
13622fe8fb19SBen Gras zSig1 = 0;
13632fe8fb19SBen Gras zSig0 = absA;
13642fe8fb19SBen Gras shiftCount -= 64;
13652fe8fb19SBen Gras }
13662fe8fb19SBen Gras else {
13672fe8fb19SBen Gras zSig1 = absA;
13682fe8fb19SBen Gras zSig0 = 0;
13692fe8fb19SBen Gras }
13702fe8fb19SBen Gras shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
13712fe8fb19SBen Gras return packFloat128( zSign, zExp, zSig0, zSig1 );
13722fe8fb19SBen Gras
13732fe8fb19SBen Gras }
13742fe8fb19SBen Gras
13752fe8fb19SBen Gras #endif
13762fe8fb19SBen Gras
13772fe8fb19SBen Gras #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
13782fe8fb19SBen Gras /*
13792fe8fb19SBen Gras -------------------------------------------------------------------------------
13802fe8fb19SBen Gras Returns the result of converting the single-precision floating-point value
13812fe8fb19SBen Gras `a' to the 32-bit two's complement integer format. The conversion is
13822fe8fb19SBen Gras performed according to the IEC/IEEE Standard for Binary Floating-Point
13832fe8fb19SBen Gras Arithmetic---which means in particular that the conversion is rounded
13842fe8fb19SBen Gras according to the current rounding mode. If `a' is a NaN, the largest
13852fe8fb19SBen Gras positive integer is returned. Otherwise, if the conversion overflows, the
13862fe8fb19SBen Gras largest integer with the same sign as `a' is returned.
13872fe8fb19SBen Gras -------------------------------------------------------------------------------
13882fe8fb19SBen Gras */
float32_to_int32(float32 a)13892fe8fb19SBen Gras int32 float32_to_int32( float32 a )
13902fe8fb19SBen Gras {
13912fe8fb19SBen Gras flag aSign;
13922fe8fb19SBen Gras int16 aExp, shiftCount;
13932fe8fb19SBen Gras bits32 aSig;
13942fe8fb19SBen Gras bits64 aSig64;
13952fe8fb19SBen Gras
13962fe8fb19SBen Gras aSig = extractFloat32Frac( a );
13972fe8fb19SBen Gras aExp = extractFloat32Exp( a );
13982fe8fb19SBen Gras aSign = extractFloat32Sign( a );
13992fe8fb19SBen Gras if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
14002fe8fb19SBen Gras if ( aExp ) aSig |= 0x00800000;
14012fe8fb19SBen Gras shiftCount = 0xAF - aExp;
14022fe8fb19SBen Gras aSig64 = aSig;
14032fe8fb19SBen Gras aSig64 <<= 32;
14042fe8fb19SBen Gras if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
14052fe8fb19SBen Gras return roundAndPackInt32( aSign, aSig64 );
14062fe8fb19SBen Gras
14072fe8fb19SBen Gras }
14082fe8fb19SBen Gras #endif /* !SOFTFLOAT_FOR_GCC */
14092fe8fb19SBen Gras
14102fe8fb19SBen Gras /*
14112fe8fb19SBen Gras -------------------------------------------------------------------------------
14122fe8fb19SBen Gras Returns the result of converting the single-precision floating-point value
14132fe8fb19SBen Gras `a' to the 32-bit two's complement integer format. The conversion is
14142fe8fb19SBen Gras performed according to the IEC/IEEE Standard for Binary Floating-Point
14152fe8fb19SBen Gras Arithmetic, except that the conversion is always rounded toward zero.
14162fe8fb19SBen Gras If `a' is a NaN, the largest positive integer is returned. Otherwise, if
14172fe8fb19SBen Gras the conversion overflows, the largest integer with the same sign as `a' is
14182fe8fb19SBen Gras returned.
14192fe8fb19SBen Gras -------------------------------------------------------------------------------
14202fe8fb19SBen Gras */
float32_to_int32_round_to_zero(float32 a)14212fe8fb19SBen Gras int32 float32_to_int32_round_to_zero( float32 a )
14222fe8fb19SBen Gras {
14232fe8fb19SBen Gras flag aSign;
14242fe8fb19SBen Gras int16 aExp, shiftCount;
14252fe8fb19SBen Gras bits32 aSig;
14262fe8fb19SBen Gras int32 z;
14272fe8fb19SBen Gras
14282fe8fb19SBen Gras aSig = extractFloat32Frac( a );
14292fe8fb19SBen Gras aExp = extractFloat32Exp( a );
14302fe8fb19SBen Gras aSign = extractFloat32Sign( a );
14312fe8fb19SBen Gras shiftCount = aExp - 0x9E;
14322fe8fb19SBen Gras if ( 0 <= shiftCount ) {
14332fe8fb19SBen Gras if ( a != 0xCF000000 ) {
14342fe8fb19SBen Gras float_raise( float_flag_invalid );
14352fe8fb19SBen Gras if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
14362fe8fb19SBen Gras }
14372fe8fb19SBen Gras return (sbits32) 0x80000000;
14382fe8fb19SBen Gras }
14392fe8fb19SBen Gras else if ( aExp <= 0x7E ) {
1440*84d9c625SLionel Sambuc if ( aExp | aSig ) set_float_exception_inexact_flag();
14412fe8fb19SBen Gras return 0;
14422fe8fb19SBen Gras }
14432fe8fb19SBen Gras aSig = ( aSig | 0x00800000 )<<8;
14442fe8fb19SBen Gras z = aSig>>( - shiftCount );
14452fe8fb19SBen Gras if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
1446*84d9c625SLionel Sambuc set_float_exception_inexact_flag();
14472fe8fb19SBen Gras }
14482fe8fb19SBen Gras if ( aSign ) z = - z;
14492fe8fb19SBen Gras return z;
14502fe8fb19SBen Gras
14512fe8fb19SBen Gras }
14522fe8fb19SBen Gras
14532fe8fb19SBen Gras #ifndef SOFTFLOAT_FOR_GCC /* __fix?fdi provided by libgcc2.c */
14542fe8fb19SBen Gras /*
14552fe8fb19SBen Gras -------------------------------------------------------------------------------
14562fe8fb19SBen Gras Returns the result of converting the single-precision floating-point value
14572fe8fb19SBen Gras `a' to the 64-bit two's complement integer format. The conversion is
14582fe8fb19SBen Gras performed according to the IEC/IEEE Standard for Binary Floating-Point
14592fe8fb19SBen Gras Arithmetic---which means in particular that the conversion is rounded
14602fe8fb19SBen Gras according to the current rounding mode. If `a' is a NaN, the largest
14612fe8fb19SBen Gras positive integer is returned. Otherwise, if the conversion overflows, the
14622fe8fb19SBen Gras largest integer with the same sign as `a' is returned.
14632fe8fb19SBen Gras -------------------------------------------------------------------------------
14642fe8fb19SBen Gras */
float32_to_int64(float32 a)14652fe8fb19SBen Gras int64 float32_to_int64( float32 a )
14662fe8fb19SBen Gras {
14672fe8fb19SBen Gras flag aSign;
14682fe8fb19SBen Gras int16 aExp, shiftCount;
14692fe8fb19SBen Gras bits32 aSig;
14702fe8fb19SBen Gras bits64 aSig64, aSigExtra;
14712fe8fb19SBen Gras
14722fe8fb19SBen Gras aSig = extractFloat32Frac( a );
14732fe8fb19SBen Gras aExp = extractFloat32Exp( a );
14742fe8fb19SBen Gras aSign = extractFloat32Sign( a );
14752fe8fb19SBen Gras shiftCount = 0xBE - aExp;
14762fe8fb19SBen Gras if ( shiftCount < 0 ) {
14772fe8fb19SBen Gras float_raise( float_flag_invalid );
14782fe8fb19SBen Gras if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
14792fe8fb19SBen Gras return LIT64( 0x7FFFFFFFFFFFFFFF );
14802fe8fb19SBen Gras }
14812fe8fb19SBen Gras return (sbits64) LIT64( 0x8000000000000000 );
14822fe8fb19SBen Gras }
14832fe8fb19SBen Gras if ( aExp ) aSig |= 0x00800000;
14842fe8fb19SBen Gras aSig64 = aSig;
14852fe8fb19SBen Gras aSig64 <<= 40;
14862fe8fb19SBen Gras shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
14872fe8fb19SBen Gras return roundAndPackInt64( aSign, aSig64, aSigExtra );
14882fe8fb19SBen Gras
14892fe8fb19SBen Gras }
14902fe8fb19SBen Gras
14912fe8fb19SBen Gras /*
14922fe8fb19SBen Gras -------------------------------------------------------------------------------
14932fe8fb19SBen Gras Returns the result of converting the single-precision floating-point value
14942fe8fb19SBen Gras `a' to the 64-bit two's complement integer format. The conversion is
14952fe8fb19SBen Gras performed according to the IEC/IEEE Standard for Binary Floating-Point
14962fe8fb19SBen Gras Arithmetic, except that the conversion is always rounded toward zero. If
14972fe8fb19SBen Gras `a' is a NaN, the largest positive integer is returned. Otherwise, if the
14982fe8fb19SBen Gras conversion overflows, the largest integer with the same sign as `a' is
14992fe8fb19SBen Gras returned.
15002fe8fb19SBen Gras -------------------------------------------------------------------------------
15012fe8fb19SBen Gras */
float32_to_int64_round_to_zero(float32 a)15022fe8fb19SBen Gras int64 float32_to_int64_round_to_zero( float32 a )
15032fe8fb19SBen Gras {
15042fe8fb19SBen Gras flag aSign;
15052fe8fb19SBen Gras int16 aExp, shiftCount;
15062fe8fb19SBen Gras bits32 aSig;
15072fe8fb19SBen Gras bits64 aSig64;
15082fe8fb19SBen Gras int64 z;
15092fe8fb19SBen Gras
15102fe8fb19SBen Gras aSig = extractFloat32Frac( a );
15112fe8fb19SBen Gras aExp = extractFloat32Exp( a );
15122fe8fb19SBen Gras aSign = extractFloat32Sign( a );
15132fe8fb19SBen Gras shiftCount = aExp - 0xBE;
15142fe8fb19SBen Gras if ( 0 <= shiftCount ) {
15152fe8fb19SBen Gras if ( a != 0xDF000000 ) {
15162fe8fb19SBen Gras float_raise( float_flag_invalid );
15172fe8fb19SBen Gras if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
15182fe8fb19SBen Gras return LIT64( 0x7FFFFFFFFFFFFFFF );
15192fe8fb19SBen Gras }
15202fe8fb19SBen Gras }
15212fe8fb19SBen Gras return (sbits64) LIT64( 0x8000000000000000 );
15222fe8fb19SBen Gras }
15232fe8fb19SBen Gras else if ( aExp <= 0x7E ) {
1524*84d9c625SLionel Sambuc if ( aExp | aSig ) set_float_exception_inexact_flag();
15252fe8fb19SBen Gras return 0;
15262fe8fb19SBen Gras }
15272fe8fb19SBen Gras aSig64 = aSig | 0x00800000;
15282fe8fb19SBen Gras aSig64 <<= 40;
15292fe8fb19SBen Gras z = aSig64>>( - shiftCount );
15302fe8fb19SBen Gras if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
1531*84d9c625SLionel Sambuc set_float_exception_inexact_flag();
15322fe8fb19SBen Gras }
15332fe8fb19SBen Gras if ( aSign ) z = - z;
15342fe8fb19SBen Gras return z;
15352fe8fb19SBen Gras
15362fe8fb19SBen Gras }
15372fe8fb19SBen Gras #endif /* !SOFTFLOAT_FOR_GCC */
15382fe8fb19SBen Gras
15392fe8fb19SBen Gras /*
15402fe8fb19SBen Gras -------------------------------------------------------------------------------
15412fe8fb19SBen Gras Returns the result of converting the single-precision floating-point value
15422fe8fb19SBen Gras `a' to the double-precision floating-point format. The conversion is
15432fe8fb19SBen Gras performed according to the IEC/IEEE Standard for Binary Floating-Point
15442fe8fb19SBen Gras Arithmetic.
15452fe8fb19SBen Gras -------------------------------------------------------------------------------
15462fe8fb19SBen Gras */
float32_to_float64(float32 a)15472fe8fb19SBen Gras float64 float32_to_float64( float32 a )
15482fe8fb19SBen Gras {
15492fe8fb19SBen Gras flag aSign;
15502fe8fb19SBen Gras int16 aExp;
15512fe8fb19SBen Gras bits32 aSig;
15522fe8fb19SBen Gras
15532fe8fb19SBen Gras aSig = extractFloat32Frac( a );
15542fe8fb19SBen Gras aExp = extractFloat32Exp( a );
15552fe8fb19SBen Gras aSign = extractFloat32Sign( a );
15562fe8fb19SBen Gras if ( aExp == 0xFF ) {
15572fe8fb19SBen Gras if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
15582fe8fb19SBen Gras return packFloat64( aSign, 0x7FF, 0 );
15592fe8fb19SBen Gras }
15602fe8fb19SBen Gras if ( aExp == 0 ) {
15612fe8fb19SBen Gras if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
15622fe8fb19SBen Gras normalizeFloat32Subnormal( aSig, &aExp, &aSig );
15632fe8fb19SBen Gras --aExp;
15642fe8fb19SBen Gras }
15652fe8fb19SBen Gras return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
15662fe8fb19SBen Gras
15672fe8fb19SBen Gras }
15682fe8fb19SBen Gras
15692fe8fb19SBen Gras #ifdef FLOATX80
15702fe8fb19SBen Gras
15712fe8fb19SBen Gras /*
15722fe8fb19SBen Gras -------------------------------------------------------------------------------
15732fe8fb19SBen Gras Returns the result of converting the single-precision floating-point value
15742fe8fb19SBen Gras `a' to the extended double-precision floating-point format. The conversion
15752fe8fb19SBen Gras is performed according to the IEC/IEEE Standard for Binary Floating-Point
15762fe8fb19SBen Gras Arithmetic.
15772fe8fb19SBen Gras -------------------------------------------------------------------------------
15782fe8fb19SBen Gras */
float32_to_floatx80(float32 a)15792fe8fb19SBen Gras floatx80 float32_to_floatx80( float32 a )
15802fe8fb19SBen Gras {
15812fe8fb19SBen Gras flag aSign;
15822fe8fb19SBen Gras int16 aExp;
15832fe8fb19SBen Gras bits32 aSig;
15842fe8fb19SBen Gras
15852fe8fb19SBen Gras aSig = extractFloat32Frac( a );
15862fe8fb19SBen Gras aExp = extractFloat32Exp( a );
15872fe8fb19SBen Gras aSign = extractFloat32Sign( a );
15882fe8fb19SBen Gras if ( aExp == 0xFF ) {
15892fe8fb19SBen Gras if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
15902fe8fb19SBen Gras return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
15912fe8fb19SBen Gras }
15922fe8fb19SBen Gras if ( aExp == 0 ) {
15932fe8fb19SBen Gras if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
15942fe8fb19SBen Gras normalizeFloat32Subnormal( aSig, &aExp, &aSig );
15952fe8fb19SBen Gras }
15962fe8fb19SBen Gras aSig |= 0x00800000;
15972fe8fb19SBen Gras return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
15982fe8fb19SBen Gras
15992fe8fb19SBen Gras }
16002fe8fb19SBen Gras
16012fe8fb19SBen Gras #endif
16022fe8fb19SBen Gras
16032fe8fb19SBen Gras #ifdef FLOAT128
16042fe8fb19SBen Gras
16052fe8fb19SBen Gras /*
16062fe8fb19SBen Gras -------------------------------------------------------------------------------
16072fe8fb19SBen Gras Returns the result of converting the single-precision floating-point value
16082fe8fb19SBen Gras `a' to the double-precision floating-point format. The conversion is
16092fe8fb19SBen Gras performed according to the IEC/IEEE Standard for Binary Floating-Point
16102fe8fb19SBen Gras Arithmetic.
16112fe8fb19SBen Gras -------------------------------------------------------------------------------
16122fe8fb19SBen Gras */
float32_to_float128(float32 a)16132fe8fb19SBen Gras float128 float32_to_float128( float32 a )
16142fe8fb19SBen Gras {
16152fe8fb19SBen Gras flag aSign;
16162fe8fb19SBen Gras int16 aExp;
16172fe8fb19SBen Gras bits32 aSig;
16182fe8fb19SBen Gras
16192fe8fb19SBen Gras aSig = extractFloat32Frac( a );
16202fe8fb19SBen Gras aExp = extractFloat32Exp( a );
16212fe8fb19SBen Gras aSign = extractFloat32Sign( a );
16222fe8fb19SBen Gras if ( aExp == 0xFF ) {
16232fe8fb19SBen Gras if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) );
16242fe8fb19SBen Gras return packFloat128( aSign, 0x7FFF, 0, 0 );
16252fe8fb19SBen Gras }
16262fe8fb19SBen Gras if ( aExp == 0 ) {
16272fe8fb19SBen Gras if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
16282fe8fb19SBen Gras normalizeFloat32Subnormal( aSig, &aExp, &aSig );
16292fe8fb19SBen Gras --aExp;
16302fe8fb19SBen Gras }
16312fe8fb19SBen Gras return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
16322fe8fb19SBen Gras
16332fe8fb19SBen Gras }
16342fe8fb19SBen Gras
16352fe8fb19SBen Gras #endif
16362fe8fb19SBen Gras
16372fe8fb19SBen Gras #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
16382fe8fb19SBen Gras /*
16392fe8fb19SBen Gras -------------------------------------------------------------------------------
16402fe8fb19SBen Gras Rounds the single-precision floating-point value `a' to an integer, and
16412fe8fb19SBen Gras returns the result as a single-precision floating-point value. The
16422fe8fb19SBen Gras operation is performed according to the IEC/IEEE Standard for Binary
16432fe8fb19SBen Gras Floating-Point Arithmetic.
16442fe8fb19SBen Gras -------------------------------------------------------------------------------
16452fe8fb19SBen Gras */
float32_round_to_int(float32 a)16462fe8fb19SBen Gras float32 float32_round_to_int( float32 a )
16472fe8fb19SBen Gras {
16482fe8fb19SBen Gras flag aSign;
16492fe8fb19SBen Gras int16 aExp;
16502fe8fb19SBen Gras bits32 lastBitMask, roundBitsMask;
16512fe8fb19SBen Gras int8 roundingMode;
16522fe8fb19SBen Gras float32 z;
16532fe8fb19SBen Gras
16542fe8fb19SBen Gras aExp = extractFloat32Exp( a );
16552fe8fb19SBen Gras if ( 0x96 <= aExp ) {
16562fe8fb19SBen Gras if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
16572fe8fb19SBen Gras return propagateFloat32NaN( a, a );
16582fe8fb19SBen Gras }
16592fe8fb19SBen Gras return a;
16602fe8fb19SBen Gras }
16612fe8fb19SBen Gras if ( aExp <= 0x7E ) {
16622fe8fb19SBen Gras if ( (bits32) ( a<<1 ) == 0 ) return a;
1663*84d9c625SLionel Sambuc set_float_exception_inexact_flag();
16642fe8fb19SBen Gras aSign = extractFloat32Sign( a );
16652fe8fb19SBen Gras switch ( float_rounding_mode ) {
16662fe8fb19SBen Gras case float_round_nearest_even:
16672fe8fb19SBen Gras if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
16682fe8fb19SBen Gras return packFloat32( aSign, 0x7F, 0 );
16692fe8fb19SBen Gras }
16702fe8fb19SBen Gras break;
16712fe8fb19SBen Gras case float_round_to_zero:
16722fe8fb19SBen Gras break;
16732fe8fb19SBen Gras case float_round_down:
16742fe8fb19SBen Gras return aSign ? 0xBF800000 : 0;
16752fe8fb19SBen Gras case float_round_up:
16762fe8fb19SBen Gras return aSign ? 0x80000000 : 0x3F800000;
16772fe8fb19SBen Gras }
16782fe8fb19SBen Gras return packFloat32( aSign, 0, 0 );
16792fe8fb19SBen Gras }
16802fe8fb19SBen Gras lastBitMask = 1;
16812fe8fb19SBen Gras lastBitMask <<= 0x96 - aExp;
16822fe8fb19SBen Gras roundBitsMask = lastBitMask - 1;
16832fe8fb19SBen Gras z = a;
16842fe8fb19SBen Gras roundingMode = float_rounding_mode;
16852fe8fb19SBen Gras if ( roundingMode == float_round_nearest_even ) {
16862fe8fb19SBen Gras z += lastBitMask>>1;
16872fe8fb19SBen Gras if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
16882fe8fb19SBen Gras }
16892fe8fb19SBen Gras else if ( roundingMode != float_round_to_zero ) {
16902fe8fb19SBen Gras if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
16912fe8fb19SBen Gras z += roundBitsMask;
16922fe8fb19SBen Gras }
16932fe8fb19SBen Gras }
16942fe8fb19SBen Gras z &= ~ roundBitsMask;
1695*84d9c625SLionel Sambuc if ( z != a ) set_float_exception_inexact_flag();
16962fe8fb19SBen Gras return z;
16972fe8fb19SBen Gras
16982fe8fb19SBen Gras }
16992fe8fb19SBen Gras #endif /* !SOFTFLOAT_FOR_GCC */
17002fe8fb19SBen Gras
17012fe8fb19SBen Gras /*
17022fe8fb19SBen Gras -------------------------------------------------------------------------------
17032fe8fb19SBen Gras Returns the result of adding the absolute values of the single-precision
17042fe8fb19SBen Gras floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
17052fe8fb19SBen Gras before being returned. `zSign' is ignored if the result is a NaN.
17062fe8fb19SBen Gras The addition is performed according to the IEC/IEEE Standard for Binary
17072fe8fb19SBen Gras Floating-Point Arithmetic.
17082fe8fb19SBen Gras -------------------------------------------------------------------------------
17092fe8fb19SBen Gras */
addFloat32Sigs(float32 a,float32 b,flag zSign)17102fe8fb19SBen Gras static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
17112fe8fb19SBen Gras {
17122fe8fb19SBen Gras int16 aExp, bExp, zExp;
17132fe8fb19SBen Gras bits32 aSig, bSig, zSig;
17142fe8fb19SBen Gras int16 expDiff;
17152fe8fb19SBen Gras
17162fe8fb19SBen Gras aSig = extractFloat32Frac( a );
17172fe8fb19SBen Gras aExp = extractFloat32Exp( a );
17182fe8fb19SBen Gras bSig = extractFloat32Frac( b );
17192fe8fb19SBen Gras bExp = extractFloat32Exp( b );
17202fe8fb19SBen Gras expDiff = aExp - bExp;
17212fe8fb19SBen Gras aSig <<= 6;
17222fe8fb19SBen Gras bSig <<= 6;
17232fe8fb19SBen Gras if ( 0 < expDiff ) {
17242fe8fb19SBen Gras if ( aExp == 0xFF ) {
17252fe8fb19SBen Gras if ( aSig ) return propagateFloat32NaN( a, b );
17262fe8fb19SBen Gras return a;
17272fe8fb19SBen Gras }
17282fe8fb19SBen Gras if ( bExp == 0 ) {
17292fe8fb19SBen Gras --expDiff;
17302fe8fb19SBen Gras }
17312fe8fb19SBen Gras else {
17322fe8fb19SBen Gras bSig |= 0x20000000;
17332fe8fb19SBen Gras }
17342fe8fb19SBen Gras shift32RightJamming( bSig, expDiff, &bSig );
17352fe8fb19SBen Gras zExp = aExp;
17362fe8fb19SBen Gras }
17372fe8fb19SBen Gras else if ( expDiff < 0 ) {
17382fe8fb19SBen Gras if ( bExp == 0xFF ) {
17392fe8fb19SBen Gras if ( bSig ) return propagateFloat32NaN( a, b );
17402fe8fb19SBen Gras return packFloat32( zSign, 0xFF, 0 );
17412fe8fb19SBen Gras }
17422fe8fb19SBen Gras if ( aExp == 0 ) {
17432fe8fb19SBen Gras ++expDiff;
17442fe8fb19SBen Gras }
17452fe8fb19SBen Gras else {
17462fe8fb19SBen Gras aSig |= 0x20000000;
17472fe8fb19SBen Gras }
17482fe8fb19SBen Gras shift32RightJamming( aSig, - expDiff, &aSig );
17492fe8fb19SBen Gras zExp = bExp;
17502fe8fb19SBen Gras }
17512fe8fb19SBen Gras else {
17522fe8fb19SBen Gras if ( aExp == 0xFF ) {
17532fe8fb19SBen Gras if ( aSig | bSig ) return propagateFloat32NaN( a, b );
17542fe8fb19SBen Gras return a;
17552fe8fb19SBen Gras }
17562fe8fb19SBen Gras if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
17572fe8fb19SBen Gras zSig = 0x40000000 + aSig + bSig;
17582fe8fb19SBen Gras zExp = aExp;
17592fe8fb19SBen Gras goto roundAndPack;
17602fe8fb19SBen Gras }
17612fe8fb19SBen Gras aSig |= 0x20000000;
17622fe8fb19SBen Gras zSig = ( aSig + bSig )<<1;
17632fe8fb19SBen Gras --zExp;
17642fe8fb19SBen Gras if ( (sbits32) zSig < 0 ) {
17652fe8fb19SBen Gras zSig = aSig + bSig;
17662fe8fb19SBen Gras ++zExp;
17672fe8fb19SBen Gras }
17682fe8fb19SBen Gras roundAndPack:
17692fe8fb19SBen Gras return roundAndPackFloat32( zSign, zExp, zSig );
17702fe8fb19SBen Gras
17712fe8fb19SBen Gras }
17722fe8fb19SBen Gras
17732fe8fb19SBen Gras /*
17742fe8fb19SBen Gras -------------------------------------------------------------------------------
17752fe8fb19SBen Gras Returns the result of subtracting the absolute values of the single-
17762fe8fb19SBen Gras precision floating-point values `a' and `b'. If `zSign' is 1, the
17772fe8fb19SBen Gras difference is negated before being returned. `zSign' is ignored if the
17782fe8fb19SBen Gras result is a NaN. The subtraction is performed according to the IEC/IEEE
17792fe8fb19SBen Gras Standard for Binary Floating-Point Arithmetic.
17802fe8fb19SBen Gras -------------------------------------------------------------------------------
17812fe8fb19SBen Gras */
subFloat32Sigs(float32 a,float32 b,flag zSign)17822fe8fb19SBen Gras static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
17832fe8fb19SBen Gras {
17842fe8fb19SBen Gras int16 aExp, bExp, zExp;
17852fe8fb19SBen Gras bits32 aSig, bSig, zSig;
17862fe8fb19SBen Gras int16 expDiff;
17872fe8fb19SBen Gras
17882fe8fb19SBen Gras aSig = extractFloat32Frac( a );
17892fe8fb19SBen Gras aExp = extractFloat32Exp( a );
17902fe8fb19SBen Gras bSig = extractFloat32Frac( b );
17912fe8fb19SBen Gras bExp = extractFloat32Exp( b );
17922fe8fb19SBen Gras expDiff = aExp - bExp;
17932fe8fb19SBen Gras aSig <<= 7;
17942fe8fb19SBen Gras bSig <<= 7;
17952fe8fb19SBen Gras if ( 0 < expDiff ) goto aExpBigger;
17962fe8fb19SBen Gras if ( expDiff < 0 ) goto bExpBigger;
17972fe8fb19SBen Gras if ( aExp == 0xFF ) {
17982fe8fb19SBen Gras if ( aSig | bSig ) return propagateFloat32NaN( a, b );
17992fe8fb19SBen Gras float_raise( float_flag_invalid );
18002fe8fb19SBen Gras return float32_default_nan;
18012fe8fb19SBen Gras }
18022fe8fb19SBen Gras if ( aExp == 0 ) {
18032fe8fb19SBen Gras aExp = 1;
18042fe8fb19SBen Gras bExp = 1;
18052fe8fb19SBen Gras }
18062fe8fb19SBen Gras if ( bSig < aSig ) goto aBigger;
18072fe8fb19SBen Gras if ( aSig < bSig ) goto bBigger;
18082fe8fb19SBen Gras return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
18092fe8fb19SBen Gras bExpBigger:
18102fe8fb19SBen Gras if ( bExp == 0xFF ) {
18112fe8fb19SBen Gras if ( bSig ) return propagateFloat32NaN( a, b );
18122fe8fb19SBen Gras return packFloat32( zSign ^ 1, 0xFF, 0 );
18132fe8fb19SBen Gras }
18142fe8fb19SBen Gras if ( aExp == 0 ) {
18152fe8fb19SBen Gras ++expDiff;
18162fe8fb19SBen Gras }
18172fe8fb19SBen Gras else {
18182fe8fb19SBen Gras aSig |= 0x40000000;
18192fe8fb19SBen Gras }
18202fe8fb19SBen Gras shift32RightJamming( aSig, - expDiff, &aSig );
18212fe8fb19SBen Gras bSig |= 0x40000000;
18222fe8fb19SBen Gras bBigger:
18232fe8fb19SBen Gras zSig = bSig - aSig;
18242fe8fb19SBen Gras zExp = bExp;
18252fe8fb19SBen Gras zSign ^= 1;
18262fe8fb19SBen Gras goto normalizeRoundAndPack;
18272fe8fb19SBen Gras aExpBigger:
18282fe8fb19SBen Gras if ( aExp == 0xFF ) {
18292fe8fb19SBen Gras if ( aSig ) return propagateFloat32NaN( a, b );
18302fe8fb19SBen Gras return a;
18312fe8fb19SBen Gras }
18322fe8fb19SBen Gras if ( bExp == 0 ) {
18332fe8fb19SBen Gras --expDiff;
18342fe8fb19SBen Gras }
18352fe8fb19SBen Gras else {
18362fe8fb19SBen Gras bSig |= 0x40000000;
18372fe8fb19SBen Gras }
18382fe8fb19SBen Gras shift32RightJamming( bSig, expDiff, &bSig );
18392fe8fb19SBen Gras aSig |= 0x40000000;
18402fe8fb19SBen Gras aBigger:
18412fe8fb19SBen Gras zSig = aSig - bSig;
18422fe8fb19SBen Gras zExp = aExp;
18432fe8fb19SBen Gras normalizeRoundAndPack:
18442fe8fb19SBen Gras --zExp;
18452fe8fb19SBen Gras return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
18462fe8fb19SBen Gras
18472fe8fb19SBen Gras }
18482fe8fb19SBen Gras
18492fe8fb19SBen Gras /*
18502fe8fb19SBen Gras -------------------------------------------------------------------------------
18512fe8fb19SBen Gras Returns the result of adding the single-precision floating-point values `a'
18522fe8fb19SBen Gras and `b'. The operation is performed according to the IEC/IEEE Standard for
18532fe8fb19SBen Gras Binary Floating-Point Arithmetic.
18542fe8fb19SBen Gras -------------------------------------------------------------------------------
18552fe8fb19SBen Gras */
float32_add(float32 a,float32 b)18562fe8fb19SBen Gras float32 float32_add( float32 a, float32 b )
18572fe8fb19SBen Gras {
18582fe8fb19SBen Gras flag aSign, bSign;
18592fe8fb19SBen Gras
18602fe8fb19SBen Gras aSign = extractFloat32Sign( a );
18612fe8fb19SBen Gras bSign = extractFloat32Sign( b );
18622fe8fb19SBen Gras if ( aSign == bSign ) {
18632fe8fb19SBen Gras return addFloat32Sigs( a, b, aSign );
18642fe8fb19SBen Gras }
18652fe8fb19SBen Gras else {
18662fe8fb19SBen Gras return subFloat32Sigs( a, b, aSign );
18672fe8fb19SBen Gras }
18682fe8fb19SBen Gras
18692fe8fb19SBen Gras }
18702fe8fb19SBen Gras
18712fe8fb19SBen Gras /*
18722fe8fb19SBen Gras -------------------------------------------------------------------------------
18732fe8fb19SBen Gras Returns the result of subtracting the single-precision floating-point values
18742fe8fb19SBen Gras `a' and `b'. The operation is performed according to the IEC/IEEE Standard
18752fe8fb19SBen Gras for Binary Floating-Point Arithmetic.
18762fe8fb19SBen Gras -------------------------------------------------------------------------------
18772fe8fb19SBen Gras */
float32_sub(float32 a,float32 b)18782fe8fb19SBen Gras float32 float32_sub( float32 a, float32 b )
18792fe8fb19SBen Gras {
18802fe8fb19SBen Gras flag aSign, bSign;
18812fe8fb19SBen Gras
18822fe8fb19SBen Gras aSign = extractFloat32Sign( a );
18832fe8fb19SBen Gras bSign = extractFloat32Sign( b );
18842fe8fb19SBen Gras if ( aSign == bSign ) {
18852fe8fb19SBen Gras return subFloat32Sigs( a, b, aSign );
18862fe8fb19SBen Gras }
18872fe8fb19SBen Gras else {
18882fe8fb19SBen Gras return addFloat32Sigs( a, b, aSign );
18892fe8fb19SBen Gras }
18902fe8fb19SBen Gras
18912fe8fb19SBen Gras }
18922fe8fb19SBen Gras
18932fe8fb19SBen Gras /*
18942fe8fb19SBen Gras -------------------------------------------------------------------------------
18952fe8fb19SBen Gras Returns the result of multiplying the single-precision floating-point values
18962fe8fb19SBen Gras `a' and `b'. The operation is performed according to the IEC/IEEE Standard
18972fe8fb19SBen Gras for Binary Floating-Point Arithmetic.
18982fe8fb19SBen Gras -------------------------------------------------------------------------------
18992fe8fb19SBen Gras */
float32_mul(float32 a,float32 b)19002fe8fb19SBen Gras float32 float32_mul( float32 a, float32 b )
19012fe8fb19SBen Gras {
19022fe8fb19SBen Gras flag aSign, bSign, zSign;
19032fe8fb19SBen Gras int16 aExp, bExp, zExp;
19042fe8fb19SBen Gras bits32 aSig, bSig;
19052fe8fb19SBen Gras bits64 zSig64;
19062fe8fb19SBen Gras bits32 zSig;
19072fe8fb19SBen Gras
19082fe8fb19SBen Gras aSig = extractFloat32Frac( a );
19092fe8fb19SBen Gras aExp = extractFloat32Exp( a );
19102fe8fb19SBen Gras aSign = extractFloat32Sign( a );
19112fe8fb19SBen Gras bSig = extractFloat32Frac( b );
19122fe8fb19SBen Gras bExp = extractFloat32Exp( b );
19132fe8fb19SBen Gras bSign = extractFloat32Sign( b );
19142fe8fb19SBen Gras zSign = aSign ^ bSign;
19152fe8fb19SBen Gras if ( aExp == 0xFF ) {
19162fe8fb19SBen Gras if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
19172fe8fb19SBen Gras return propagateFloat32NaN( a, b );
19182fe8fb19SBen Gras }
19192fe8fb19SBen Gras if ( ( bExp | bSig ) == 0 ) {
19202fe8fb19SBen Gras float_raise( float_flag_invalid );
19212fe8fb19SBen Gras return float32_default_nan;
19222fe8fb19SBen Gras }
19232fe8fb19SBen Gras return packFloat32( zSign, 0xFF, 0 );
19242fe8fb19SBen Gras }
19252fe8fb19SBen Gras if ( bExp == 0xFF ) {
19262fe8fb19SBen Gras if ( bSig ) return propagateFloat32NaN( a, b );
19272fe8fb19SBen Gras if ( ( aExp | aSig ) == 0 ) {
19282fe8fb19SBen Gras float_raise( float_flag_invalid );
19292fe8fb19SBen Gras return float32_default_nan;
19302fe8fb19SBen Gras }
19312fe8fb19SBen Gras return packFloat32( zSign, 0xFF, 0 );
19322fe8fb19SBen Gras }
19332fe8fb19SBen Gras if ( aExp == 0 ) {
19342fe8fb19SBen Gras if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
19352fe8fb19SBen Gras normalizeFloat32Subnormal( aSig, &aExp, &aSig );
19362fe8fb19SBen Gras }
19372fe8fb19SBen Gras if ( bExp == 0 ) {
19382fe8fb19SBen Gras if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
19392fe8fb19SBen Gras normalizeFloat32Subnormal( bSig, &bExp, &bSig );
19402fe8fb19SBen Gras }
19412fe8fb19SBen Gras zExp = aExp + bExp - 0x7F;
19422fe8fb19SBen Gras aSig = ( aSig | 0x00800000 )<<7;
19432fe8fb19SBen Gras bSig = ( bSig | 0x00800000 )<<8;
19442fe8fb19SBen Gras shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
1945f14fb602SLionel Sambuc zSig = (bits32)zSig64;
19462fe8fb19SBen Gras if ( 0 <= (sbits32) ( zSig<<1 ) ) {
19472fe8fb19SBen Gras zSig <<= 1;
19482fe8fb19SBen Gras --zExp;
19492fe8fb19SBen Gras }
19502fe8fb19SBen Gras return roundAndPackFloat32( zSign, zExp, zSig );
19512fe8fb19SBen Gras
19522fe8fb19SBen Gras }
19532fe8fb19SBen Gras
19542fe8fb19SBen Gras /*
19552fe8fb19SBen Gras -------------------------------------------------------------------------------
19562fe8fb19SBen Gras Returns the result of dividing the single-precision floating-point value `a'
19572fe8fb19SBen Gras by the corresponding value `b'. The operation is performed according to the
19582fe8fb19SBen Gras IEC/IEEE Standard for Binary Floating-Point Arithmetic.
19592fe8fb19SBen Gras -------------------------------------------------------------------------------
19602fe8fb19SBen Gras */
float32_div(float32 a,float32 b)19612fe8fb19SBen Gras float32 float32_div( float32 a, float32 b )
19622fe8fb19SBen Gras {
19632fe8fb19SBen Gras flag aSign, bSign, zSign;
19642fe8fb19SBen Gras int16 aExp, bExp, zExp;
19652fe8fb19SBen Gras bits32 aSig, bSig, zSig;
19662fe8fb19SBen Gras
19672fe8fb19SBen Gras aSig = extractFloat32Frac( a );
19682fe8fb19SBen Gras aExp = extractFloat32Exp( a );
19692fe8fb19SBen Gras aSign = extractFloat32Sign( a );
19702fe8fb19SBen Gras bSig = extractFloat32Frac( b );
19712fe8fb19SBen Gras bExp = extractFloat32Exp( b );
19722fe8fb19SBen Gras bSign = extractFloat32Sign( b );
19732fe8fb19SBen Gras zSign = aSign ^ bSign;
19742fe8fb19SBen Gras if ( aExp == 0xFF ) {
19752fe8fb19SBen Gras if ( aSig ) return propagateFloat32NaN( a, b );
19762fe8fb19SBen Gras if ( bExp == 0xFF ) {
19772fe8fb19SBen Gras if ( bSig ) return propagateFloat32NaN( a, b );
19782fe8fb19SBen Gras float_raise( float_flag_invalid );
19792fe8fb19SBen Gras return float32_default_nan;
19802fe8fb19SBen Gras }
19812fe8fb19SBen Gras return packFloat32( zSign, 0xFF, 0 );
19822fe8fb19SBen Gras }
19832fe8fb19SBen Gras if ( bExp == 0xFF ) {
19842fe8fb19SBen Gras if ( bSig ) return propagateFloat32NaN( a, b );
19852fe8fb19SBen Gras return packFloat32( zSign, 0, 0 );
19862fe8fb19SBen Gras }
19872fe8fb19SBen Gras if ( bExp == 0 ) {
19882fe8fb19SBen Gras if ( bSig == 0 ) {
19892fe8fb19SBen Gras if ( ( aExp | aSig ) == 0 ) {
19902fe8fb19SBen Gras float_raise( float_flag_invalid );
19912fe8fb19SBen Gras return float32_default_nan;
19922fe8fb19SBen Gras }
19932fe8fb19SBen Gras float_raise( float_flag_divbyzero );
19942fe8fb19SBen Gras return packFloat32( zSign, 0xFF, 0 );
19952fe8fb19SBen Gras }
19962fe8fb19SBen Gras normalizeFloat32Subnormal( bSig, &bExp, &bSig );
19972fe8fb19SBen Gras }
19982fe8fb19SBen Gras if ( aExp == 0 ) {
19992fe8fb19SBen Gras if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
20002fe8fb19SBen Gras normalizeFloat32Subnormal( aSig, &aExp, &aSig );
20012fe8fb19SBen Gras }
20022fe8fb19SBen Gras zExp = aExp - bExp + 0x7D;
20032fe8fb19SBen Gras aSig = ( aSig | 0x00800000 )<<7;
20042fe8fb19SBen Gras bSig = ( bSig | 0x00800000 )<<8;
20052fe8fb19SBen Gras if ( bSig <= ( aSig + aSig ) ) {
20062fe8fb19SBen Gras aSig >>= 1;
20072fe8fb19SBen Gras ++zExp;
20082fe8fb19SBen Gras }
2009f14fb602SLionel Sambuc zSig = (bits32)((((bits64) aSig) << 32) / bSig);
20102fe8fb19SBen Gras if ( ( zSig & 0x3F ) == 0 ) {
20112fe8fb19SBen Gras zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
20122fe8fb19SBen Gras }
20132fe8fb19SBen Gras return roundAndPackFloat32( zSign, zExp, zSig );
20142fe8fb19SBen Gras
20152fe8fb19SBen Gras }
20162fe8fb19SBen Gras
20172fe8fb19SBen Gras #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
20182fe8fb19SBen Gras /*
20192fe8fb19SBen Gras -------------------------------------------------------------------------------
20202fe8fb19SBen Gras Returns the remainder of the single-precision floating-point value `a'
20212fe8fb19SBen Gras with respect to the corresponding value `b'. The operation is performed
20222fe8fb19SBen Gras according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
20232fe8fb19SBen Gras -------------------------------------------------------------------------------
20242fe8fb19SBen Gras */
float32_rem(float32 a,float32 b)20252fe8fb19SBen Gras float32 float32_rem( float32 a, float32 b )
20262fe8fb19SBen Gras {
20272fe8fb19SBen Gras flag aSign, bSign, zSign;
20282fe8fb19SBen Gras int16 aExp, bExp, expDiff;
20292fe8fb19SBen Gras bits32 aSig, bSig;
20302fe8fb19SBen Gras bits32 q;
20312fe8fb19SBen Gras bits64 aSig64, bSig64, q64;
20322fe8fb19SBen Gras bits32 alternateASig;
20332fe8fb19SBen Gras sbits32 sigMean;
20342fe8fb19SBen Gras
20352fe8fb19SBen Gras aSig = extractFloat32Frac( a );
20362fe8fb19SBen Gras aExp = extractFloat32Exp( a );
20372fe8fb19SBen Gras aSign = extractFloat32Sign( a );
20382fe8fb19SBen Gras bSig = extractFloat32Frac( b );
20392fe8fb19SBen Gras bExp = extractFloat32Exp( b );
20402fe8fb19SBen Gras bSign = extractFloat32Sign( b );
20412fe8fb19SBen Gras if ( aExp == 0xFF ) {
20422fe8fb19SBen Gras if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
20432fe8fb19SBen Gras return propagateFloat32NaN( a, b );
20442fe8fb19SBen Gras }
20452fe8fb19SBen Gras float_raise( float_flag_invalid );
20462fe8fb19SBen Gras return float32_default_nan;
20472fe8fb19SBen Gras }
20482fe8fb19SBen Gras if ( bExp == 0xFF ) {
20492fe8fb19SBen Gras if ( bSig ) return propagateFloat32NaN( a, b );
20502fe8fb19SBen Gras return a;
20512fe8fb19SBen Gras }
20522fe8fb19SBen Gras if ( bExp == 0 ) {
20532fe8fb19SBen Gras if ( bSig == 0 ) {
20542fe8fb19SBen Gras float_raise( float_flag_invalid );
20552fe8fb19SBen Gras return float32_default_nan;
20562fe8fb19SBen Gras }
20572fe8fb19SBen Gras normalizeFloat32Subnormal( bSig, &bExp, &bSig );
20582fe8fb19SBen Gras }
20592fe8fb19SBen Gras if ( aExp == 0 ) {
20602fe8fb19SBen Gras if ( aSig == 0 ) return a;
20612fe8fb19SBen Gras normalizeFloat32Subnormal( aSig, &aExp, &aSig );
20622fe8fb19SBen Gras }
20632fe8fb19SBen Gras expDiff = aExp - bExp;
20642fe8fb19SBen Gras aSig |= 0x00800000;
20652fe8fb19SBen Gras bSig |= 0x00800000;
20662fe8fb19SBen Gras if ( expDiff < 32 ) {
20672fe8fb19SBen Gras aSig <<= 8;
20682fe8fb19SBen Gras bSig <<= 8;
20692fe8fb19SBen Gras if ( expDiff < 0 ) {
20702fe8fb19SBen Gras if ( expDiff < -1 ) return a;
20712fe8fb19SBen Gras aSig >>= 1;
20722fe8fb19SBen Gras }
20732fe8fb19SBen Gras q = ( bSig <= aSig );
20742fe8fb19SBen Gras if ( q ) aSig -= bSig;
20752fe8fb19SBen Gras if ( 0 < expDiff ) {
20762fe8fb19SBen Gras q = ( ( (bits64) aSig )<<32 ) / bSig;
20772fe8fb19SBen Gras q >>= 32 - expDiff;
20782fe8fb19SBen Gras bSig >>= 2;
20792fe8fb19SBen Gras aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
20802fe8fb19SBen Gras }
20812fe8fb19SBen Gras else {
20822fe8fb19SBen Gras aSig >>= 2;
20832fe8fb19SBen Gras bSig >>= 2;
20842fe8fb19SBen Gras }
20852fe8fb19SBen Gras }
20862fe8fb19SBen Gras else {
20872fe8fb19SBen Gras if ( bSig <= aSig ) aSig -= bSig;
20882fe8fb19SBen Gras aSig64 = ( (bits64) aSig )<<40;
20892fe8fb19SBen Gras bSig64 = ( (bits64) bSig )<<40;
20902fe8fb19SBen Gras expDiff -= 64;
20912fe8fb19SBen Gras while ( 0 < expDiff ) {
20922fe8fb19SBen Gras q64 = estimateDiv128To64( aSig64, 0, bSig64 );
20932fe8fb19SBen Gras q64 = ( 2 < q64 ) ? q64 - 2 : 0;
20942fe8fb19SBen Gras aSig64 = - ( ( bSig * q64 )<<38 );
20952fe8fb19SBen Gras expDiff -= 62;
20962fe8fb19SBen Gras }
20972fe8fb19SBen Gras expDiff += 64;
20982fe8fb19SBen Gras q64 = estimateDiv128To64( aSig64, 0, bSig64 );
20992fe8fb19SBen Gras q64 = ( 2 < q64 ) ? q64 - 2 : 0;
21002fe8fb19SBen Gras q = q64>>( 64 - expDiff );
21012fe8fb19SBen Gras bSig <<= 6;
21022fe8fb19SBen Gras aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
21032fe8fb19SBen Gras }
21042fe8fb19SBen Gras do {
21052fe8fb19SBen Gras alternateASig = aSig;
21062fe8fb19SBen Gras ++q;
21072fe8fb19SBen Gras aSig -= bSig;
21082fe8fb19SBen Gras } while ( 0 <= (sbits32) aSig );
21092fe8fb19SBen Gras sigMean = aSig + alternateASig;
21102fe8fb19SBen Gras if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
21112fe8fb19SBen Gras aSig = alternateASig;
21122fe8fb19SBen Gras }
21132fe8fb19SBen Gras zSign = ( (sbits32) aSig < 0 );
21142fe8fb19SBen Gras if ( zSign ) aSig = - aSig;
21152fe8fb19SBen Gras return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
21162fe8fb19SBen Gras
21172fe8fb19SBen Gras }
21182fe8fb19SBen Gras #endif /* !SOFTFLOAT_FOR_GCC */
21192fe8fb19SBen Gras
21202fe8fb19SBen Gras #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
21212fe8fb19SBen Gras /*
21222fe8fb19SBen Gras -------------------------------------------------------------------------------
21232fe8fb19SBen Gras Returns the square root of the single-precision floating-point value `a'.
21242fe8fb19SBen Gras The operation is performed according to the IEC/IEEE Standard for Binary
21252fe8fb19SBen Gras Floating-Point Arithmetic.
21262fe8fb19SBen Gras -------------------------------------------------------------------------------
21272fe8fb19SBen Gras */
float32_sqrt(float32 a)21282fe8fb19SBen Gras float32 float32_sqrt( float32 a )
21292fe8fb19SBen Gras {
21302fe8fb19SBen Gras flag aSign;
21312fe8fb19SBen Gras int16 aExp, zExp;
21322fe8fb19SBen Gras bits32 aSig, zSig;
21332fe8fb19SBen Gras bits64 rem, term;
21342fe8fb19SBen Gras
21352fe8fb19SBen Gras aSig = extractFloat32Frac( a );
21362fe8fb19SBen Gras aExp = extractFloat32Exp( a );
21372fe8fb19SBen Gras aSign = extractFloat32Sign( a );
21382fe8fb19SBen Gras if ( aExp == 0xFF ) {
21392fe8fb19SBen Gras if ( aSig ) return propagateFloat32NaN( a, 0 );
21402fe8fb19SBen Gras if ( ! aSign ) return a;
21412fe8fb19SBen Gras float_raise( float_flag_invalid );
21422fe8fb19SBen Gras return float32_default_nan;
21432fe8fb19SBen Gras }
21442fe8fb19SBen Gras if ( aSign ) {
21452fe8fb19SBen Gras if ( ( aExp | aSig ) == 0 ) return a;
21462fe8fb19SBen Gras float_raise( float_flag_invalid );
21472fe8fb19SBen Gras return float32_default_nan;
21482fe8fb19SBen Gras }
21492fe8fb19SBen Gras if ( aExp == 0 ) {
21502fe8fb19SBen Gras if ( aSig == 0 ) return 0;
21512fe8fb19SBen Gras normalizeFloat32Subnormal( aSig, &aExp, &aSig );
21522fe8fb19SBen Gras }
21532fe8fb19SBen Gras zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
21542fe8fb19SBen Gras aSig = ( aSig | 0x00800000 )<<8;
21552fe8fb19SBen Gras zSig = estimateSqrt32( aExp, aSig ) + 2;
21562fe8fb19SBen Gras if ( ( zSig & 0x7F ) <= 5 ) {
21572fe8fb19SBen Gras if ( zSig < 2 ) {
21582fe8fb19SBen Gras zSig = 0x7FFFFFFF;
21592fe8fb19SBen Gras goto roundAndPack;
21602fe8fb19SBen Gras }
21612fe8fb19SBen Gras aSig >>= aExp & 1;
21622fe8fb19SBen Gras term = ( (bits64) zSig ) * zSig;
21632fe8fb19SBen Gras rem = ( ( (bits64) aSig )<<32 ) - term;
21642fe8fb19SBen Gras while ( (sbits64) rem < 0 ) {
21652fe8fb19SBen Gras --zSig;
21662fe8fb19SBen Gras rem += ( ( (bits64) zSig )<<1 ) | 1;
21672fe8fb19SBen Gras }
21682fe8fb19SBen Gras zSig |= ( rem != 0 );
21692fe8fb19SBen Gras }
21702fe8fb19SBen Gras shift32RightJamming( zSig, 1, &zSig );
21712fe8fb19SBen Gras roundAndPack:
21722fe8fb19SBen Gras return roundAndPackFloat32( 0, zExp, zSig );
21732fe8fb19SBen Gras
21742fe8fb19SBen Gras }
21752fe8fb19SBen Gras #endif /* !SOFTFLOAT_FOR_GCC */
21762fe8fb19SBen Gras
21772fe8fb19SBen Gras /*
21782fe8fb19SBen Gras -------------------------------------------------------------------------------
21792fe8fb19SBen Gras Returns 1 if the single-precision floating-point value `a' is equal to
21802fe8fb19SBen Gras the corresponding value `b', and 0 otherwise. The comparison is performed
21812fe8fb19SBen Gras according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
21822fe8fb19SBen Gras -------------------------------------------------------------------------------
21832fe8fb19SBen Gras */
float32_eq(float32 a,float32 b)21842fe8fb19SBen Gras flag float32_eq( float32 a, float32 b )
21852fe8fb19SBen Gras {
21862fe8fb19SBen Gras
21872fe8fb19SBen Gras if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
21882fe8fb19SBen Gras || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
21892fe8fb19SBen Gras ) {
21902fe8fb19SBen Gras if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
21912fe8fb19SBen Gras float_raise( float_flag_invalid );
21922fe8fb19SBen Gras }
21932fe8fb19SBen Gras return 0;
21942fe8fb19SBen Gras }
21952fe8fb19SBen Gras return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
21962fe8fb19SBen Gras
21972fe8fb19SBen Gras }
21982fe8fb19SBen Gras
21992fe8fb19SBen Gras /*
22002fe8fb19SBen Gras -------------------------------------------------------------------------------
22012fe8fb19SBen Gras Returns 1 if the single-precision floating-point value `a' is less than
22022fe8fb19SBen Gras or equal to the corresponding value `b', and 0 otherwise. The comparison
22032fe8fb19SBen Gras is performed according to the IEC/IEEE Standard for Binary Floating-Point
22042fe8fb19SBen Gras Arithmetic.
22052fe8fb19SBen Gras -------------------------------------------------------------------------------
22062fe8fb19SBen Gras */
float32_le(float32 a,float32 b)22072fe8fb19SBen Gras flag float32_le( float32 a, float32 b )
22082fe8fb19SBen Gras {
22092fe8fb19SBen Gras flag aSign, bSign;
22102fe8fb19SBen Gras
22112fe8fb19SBen Gras if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
22122fe8fb19SBen Gras || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
22132fe8fb19SBen Gras ) {
22142fe8fb19SBen Gras float_raise( float_flag_invalid );
22152fe8fb19SBen Gras return 0;
22162fe8fb19SBen Gras }
22172fe8fb19SBen Gras aSign = extractFloat32Sign( a );
22182fe8fb19SBen Gras bSign = extractFloat32Sign( b );
22192fe8fb19SBen Gras if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
22202fe8fb19SBen Gras return ( a == b ) || ( aSign ^ ( a < b ) );
22212fe8fb19SBen Gras
22222fe8fb19SBen Gras }
22232fe8fb19SBen Gras
22242fe8fb19SBen Gras /*
22252fe8fb19SBen Gras -------------------------------------------------------------------------------
22262fe8fb19SBen Gras Returns 1 if the single-precision floating-point value `a' is less than
22272fe8fb19SBen Gras the corresponding value `b', and 0 otherwise. The comparison is performed
22282fe8fb19SBen Gras according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
22292fe8fb19SBen Gras -------------------------------------------------------------------------------
22302fe8fb19SBen Gras */
float32_lt(float32 a,float32 b)22312fe8fb19SBen Gras flag float32_lt( float32 a, float32 b )
22322fe8fb19SBen Gras {
22332fe8fb19SBen Gras flag aSign, bSign;
22342fe8fb19SBen Gras
22352fe8fb19SBen Gras if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
22362fe8fb19SBen Gras || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
22372fe8fb19SBen Gras ) {
22382fe8fb19SBen Gras float_raise( float_flag_invalid );
22392fe8fb19SBen Gras return 0;
22402fe8fb19SBen Gras }
22412fe8fb19SBen Gras aSign = extractFloat32Sign( a );
22422fe8fb19SBen Gras bSign = extractFloat32Sign( b );
22432fe8fb19SBen Gras if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
22442fe8fb19SBen Gras return ( a != b ) && ( aSign ^ ( a < b ) );
22452fe8fb19SBen Gras
22462fe8fb19SBen Gras }
22472fe8fb19SBen Gras
22482fe8fb19SBen Gras #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
22492fe8fb19SBen Gras /*
22502fe8fb19SBen Gras -------------------------------------------------------------------------------
22512fe8fb19SBen Gras Returns 1 if the single-precision floating-point value `a' is equal to
22522fe8fb19SBen Gras the corresponding value `b', and 0 otherwise. The invalid exception is
22532fe8fb19SBen Gras raised if either operand is a NaN. Otherwise, the comparison is performed
22542fe8fb19SBen Gras according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
22552fe8fb19SBen Gras -------------------------------------------------------------------------------
22562fe8fb19SBen Gras */
float32_eq_signaling(float32 a,float32 b)22572fe8fb19SBen Gras flag float32_eq_signaling( float32 a, float32 b )
22582fe8fb19SBen Gras {
22592fe8fb19SBen Gras
22602fe8fb19SBen Gras if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
22612fe8fb19SBen Gras || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
22622fe8fb19SBen Gras ) {
22632fe8fb19SBen Gras float_raise( float_flag_invalid );
22642fe8fb19SBen Gras return 0;
22652fe8fb19SBen Gras }
22662fe8fb19SBen Gras return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
22672fe8fb19SBen Gras
22682fe8fb19SBen Gras }
22692fe8fb19SBen Gras
22702fe8fb19SBen Gras /*
22712fe8fb19SBen Gras -------------------------------------------------------------------------------
22722fe8fb19SBen Gras Returns 1 if the single-precision floating-point value `a' is less than or
22732fe8fb19SBen Gras equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
22742fe8fb19SBen Gras cause an exception. Otherwise, the comparison is performed according to the
22752fe8fb19SBen Gras IEC/IEEE Standard for Binary Floating-Point Arithmetic.
22762fe8fb19SBen Gras -------------------------------------------------------------------------------
22772fe8fb19SBen Gras */
float32_le_quiet(float32 a,float32 b)22782fe8fb19SBen Gras flag float32_le_quiet( float32 a, float32 b )
22792fe8fb19SBen Gras {
22802fe8fb19SBen Gras flag aSign, bSign;
22812fe8fb19SBen Gras
22822fe8fb19SBen Gras if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
22832fe8fb19SBen Gras || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
22842fe8fb19SBen Gras ) {
22852fe8fb19SBen Gras if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
22862fe8fb19SBen Gras float_raise( float_flag_invalid );
22872fe8fb19SBen Gras }
22882fe8fb19SBen Gras return 0;
22892fe8fb19SBen Gras }
22902fe8fb19SBen Gras aSign = extractFloat32Sign( a );
22912fe8fb19SBen Gras bSign = extractFloat32Sign( b );
22922fe8fb19SBen Gras if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
22932fe8fb19SBen Gras return ( a == b ) || ( aSign ^ ( a < b ) );
22942fe8fb19SBen Gras
22952fe8fb19SBen Gras }
22962fe8fb19SBen Gras
22972fe8fb19SBen Gras /*
22982fe8fb19SBen Gras -------------------------------------------------------------------------------
22992fe8fb19SBen Gras Returns 1 if the single-precision floating-point value `a' is less than
23002fe8fb19SBen Gras the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
23012fe8fb19SBen Gras exception. Otherwise, the comparison is performed according to the IEC/IEEE
23022fe8fb19SBen Gras Standard for Binary Floating-Point Arithmetic.
23032fe8fb19SBen Gras -------------------------------------------------------------------------------
23042fe8fb19SBen Gras */
float32_lt_quiet(float32 a,float32 b)23052fe8fb19SBen Gras flag float32_lt_quiet( float32 a, float32 b )
23062fe8fb19SBen Gras {
23072fe8fb19SBen Gras flag aSign, bSign;
23082fe8fb19SBen Gras
23092fe8fb19SBen Gras if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
23102fe8fb19SBen Gras || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
23112fe8fb19SBen Gras ) {
23122fe8fb19SBen Gras if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
23132fe8fb19SBen Gras float_raise( float_flag_invalid );
23142fe8fb19SBen Gras }
23152fe8fb19SBen Gras return 0;
23162fe8fb19SBen Gras }
23172fe8fb19SBen Gras aSign = extractFloat32Sign( a );
23182fe8fb19SBen Gras bSign = extractFloat32Sign( b );
23192fe8fb19SBen Gras if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
23202fe8fb19SBen Gras return ( a != b ) && ( aSign ^ ( a < b ) );
23212fe8fb19SBen Gras
23222fe8fb19SBen Gras }
23232fe8fb19SBen Gras #endif /* !SOFTFLOAT_FOR_GCC */
23242fe8fb19SBen Gras
23252fe8fb19SBen Gras #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
23262fe8fb19SBen Gras /*
23272fe8fb19SBen Gras -------------------------------------------------------------------------------
23282fe8fb19SBen Gras Returns the result of converting the double-precision floating-point value
23292fe8fb19SBen Gras `a' to the 32-bit two's complement integer format. The conversion is
23302fe8fb19SBen Gras performed according to the IEC/IEEE Standard for Binary Floating-Point
23312fe8fb19SBen Gras Arithmetic---which means in particular that the conversion is rounded
23322fe8fb19SBen Gras according to the current rounding mode. If `a' is a NaN, the largest
23332fe8fb19SBen Gras positive integer is returned. Otherwise, if the conversion overflows, the
23342fe8fb19SBen Gras largest integer with the same sign as `a' is returned.
23352fe8fb19SBen Gras -------------------------------------------------------------------------------
23362fe8fb19SBen Gras */
float64_to_int32(float64 a)23372fe8fb19SBen Gras int32 float64_to_int32( float64 a )
23382fe8fb19SBen Gras {
23392fe8fb19SBen Gras flag aSign;
23402fe8fb19SBen Gras int16 aExp, shiftCount;
23412fe8fb19SBen Gras bits64 aSig;
23422fe8fb19SBen Gras
23432fe8fb19SBen Gras aSig = extractFloat64Frac( a );
23442fe8fb19SBen Gras aExp = extractFloat64Exp( a );
23452fe8fb19SBen Gras aSign = extractFloat64Sign( a );
23462fe8fb19SBen Gras if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
23472fe8fb19SBen Gras if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
23482fe8fb19SBen Gras shiftCount = 0x42C - aExp;
23492fe8fb19SBen Gras if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
23502fe8fb19SBen Gras return roundAndPackInt32( aSign, aSig );
23512fe8fb19SBen Gras
23522fe8fb19SBen Gras }
23532fe8fb19SBen Gras #endif /* !SOFTFLOAT_FOR_GCC */
23542fe8fb19SBen Gras
23552fe8fb19SBen Gras /*
23562fe8fb19SBen Gras -------------------------------------------------------------------------------
23572fe8fb19SBen Gras Returns the result of converting the double-precision floating-point value
23582fe8fb19SBen Gras `a' to the 32-bit two's complement integer format. The conversion is
23592fe8fb19SBen Gras performed according to the IEC/IEEE Standard for Binary Floating-Point
23602fe8fb19SBen Gras Arithmetic, except that the conversion is always rounded toward zero.
23612fe8fb19SBen Gras If `a' is a NaN, the largest positive integer is returned. Otherwise, if
23622fe8fb19SBen Gras the conversion overflows, the largest integer with the same sign as `a' is
23632fe8fb19SBen Gras returned.
23642fe8fb19SBen Gras -------------------------------------------------------------------------------
23652fe8fb19SBen Gras */
float64_to_int32_round_to_zero(float64 a)23662fe8fb19SBen Gras int32 float64_to_int32_round_to_zero( float64 a )
23672fe8fb19SBen Gras {
23682fe8fb19SBen Gras flag aSign;
23692fe8fb19SBen Gras int16 aExp, shiftCount;
23702fe8fb19SBen Gras bits64 aSig, savedASig;
23712fe8fb19SBen Gras int32 z;
23722fe8fb19SBen Gras
23732fe8fb19SBen Gras aSig = extractFloat64Frac( a );
23742fe8fb19SBen Gras aExp = extractFloat64Exp( a );
23752fe8fb19SBen Gras aSign = extractFloat64Sign( a );
23762fe8fb19SBen Gras if ( 0x41E < aExp ) {
23772fe8fb19SBen Gras if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
23782fe8fb19SBen Gras goto invalid;
23792fe8fb19SBen Gras }
23802fe8fb19SBen Gras else if ( aExp < 0x3FF ) {
2381*84d9c625SLionel Sambuc if ( aExp || aSig ) set_float_exception_inexact_flag();
23822fe8fb19SBen Gras return 0;
23832fe8fb19SBen Gras }
23842fe8fb19SBen Gras aSig |= LIT64( 0x0010000000000000 );
23852fe8fb19SBen Gras shiftCount = 0x433 - aExp;
23862fe8fb19SBen Gras savedASig = aSig;
23872fe8fb19SBen Gras aSig >>= shiftCount;
2388f14fb602SLionel Sambuc z = (int32)aSig;
23892fe8fb19SBen Gras if ( aSign ) z = - z;
23902fe8fb19SBen Gras if ( ( z < 0 ) ^ aSign ) {
23912fe8fb19SBen Gras invalid:
23922fe8fb19SBen Gras float_raise( float_flag_invalid );
23932fe8fb19SBen Gras return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
23942fe8fb19SBen Gras }
23952fe8fb19SBen Gras if ( ( aSig<<shiftCount ) != savedASig ) {
2396*84d9c625SLionel Sambuc set_float_exception_inexact_flag();
23972fe8fb19SBen Gras }
23982fe8fb19SBen Gras return z;
23992fe8fb19SBen Gras
24002fe8fb19SBen Gras }
24012fe8fb19SBen Gras
24022fe8fb19SBen Gras #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
24032fe8fb19SBen Gras /*
24042fe8fb19SBen Gras -------------------------------------------------------------------------------
24052fe8fb19SBen Gras Returns the result of converting the double-precision floating-point value
24062fe8fb19SBen Gras `a' to the 64-bit two's complement integer format. The conversion is
24072fe8fb19SBen Gras performed according to the IEC/IEEE Standard for Binary Floating-Point
24082fe8fb19SBen Gras Arithmetic---which means in particular that the conversion is rounded
24092fe8fb19SBen Gras according to the current rounding mode. If `a' is a NaN, the largest
24102fe8fb19SBen Gras positive integer is returned. Otherwise, if the conversion overflows, the
24112fe8fb19SBen Gras largest integer with the same sign as `a' is returned.
24122fe8fb19SBen Gras -------------------------------------------------------------------------------
24132fe8fb19SBen Gras */
float64_to_int64(float64 a)24142fe8fb19SBen Gras int64 float64_to_int64( float64 a )
24152fe8fb19SBen Gras {
24162fe8fb19SBen Gras flag aSign;
24172fe8fb19SBen Gras int16 aExp, shiftCount;
24182fe8fb19SBen Gras bits64 aSig, aSigExtra;
24192fe8fb19SBen Gras
24202fe8fb19SBen Gras aSig = extractFloat64Frac( a );
24212fe8fb19SBen Gras aExp = extractFloat64Exp( a );
24222fe8fb19SBen Gras aSign = extractFloat64Sign( a );
24232fe8fb19SBen Gras if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
24242fe8fb19SBen Gras shiftCount = 0x433 - aExp;
24252fe8fb19SBen Gras if ( shiftCount <= 0 ) {
24262fe8fb19SBen Gras if ( 0x43E < aExp ) {
24272fe8fb19SBen Gras float_raise( float_flag_invalid );
24282fe8fb19SBen Gras if ( ! aSign
24292fe8fb19SBen Gras || ( ( aExp == 0x7FF )
24302fe8fb19SBen Gras && ( aSig != LIT64( 0x0010000000000000 ) ) )
24312fe8fb19SBen Gras ) {
24322fe8fb19SBen Gras return LIT64( 0x7FFFFFFFFFFFFFFF );
24332fe8fb19SBen Gras }
24342fe8fb19SBen Gras return (sbits64) LIT64( 0x8000000000000000 );
24352fe8fb19SBen Gras }
24362fe8fb19SBen Gras aSigExtra = 0;
24372fe8fb19SBen Gras aSig <<= - shiftCount;
24382fe8fb19SBen Gras }
24392fe8fb19SBen Gras else {
24402fe8fb19SBen Gras shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
24412fe8fb19SBen Gras }
24422fe8fb19SBen Gras return roundAndPackInt64( aSign, aSig, aSigExtra );
24432fe8fb19SBen Gras
24442fe8fb19SBen Gras }
24452fe8fb19SBen Gras
24462fe8fb19SBen Gras /*
24472fe8fb19SBen Gras -------------------------------------------------------------------------------
24482fe8fb19SBen Gras Returns the result of converting the double-precision floating-point value
24492fe8fb19SBen Gras `a' to the 64-bit two's complement integer format. The conversion is
24502fe8fb19SBen Gras performed according to the IEC/IEEE Standard for Binary Floating-Point
24512fe8fb19SBen Gras Arithmetic, except that the conversion is always rounded toward zero.
24522fe8fb19SBen Gras If `a' is a NaN, the largest positive integer is returned. Otherwise, if
24532fe8fb19SBen Gras the conversion overflows, the largest integer with the same sign as `a' is
24542fe8fb19SBen Gras returned.
24552fe8fb19SBen Gras -------------------------------------------------------------------------------
24562fe8fb19SBen Gras */
float64_to_int64_round_to_zero(float64 a)24572fe8fb19SBen Gras int64 float64_to_int64_round_to_zero( float64 a )
24582fe8fb19SBen Gras {
24592fe8fb19SBen Gras flag aSign;
24602fe8fb19SBen Gras int16 aExp, shiftCount;
24612fe8fb19SBen Gras bits64 aSig;
24622fe8fb19SBen Gras int64 z;
24632fe8fb19SBen Gras
24642fe8fb19SBen Gras aSig = extractFloat64Frac( a );
24652fe8fb19SBen Gras aExp = extractFloat64Exp( a );
24662fe8fb19SBen Gras aSign = extractFloat64Sign( a );
24672fe8fb19SBen Gras if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
24682fe8fb19SBen Gras shiftCount = aExp - 0x433;
24692fe8fb19SBen Gras if ( 0 <= shiftCount ) {
24702fe8fb19SBen Gras if ( 0x43E <= aExp ) {
24712fe8fb19SBen Gras if ( a != LIT64( 0xC3E0000000000000 ) ) {
24722fe8fb19SBen Gras float_raise( float_flag_invalid );
24732fe8fb19SBen Gras if ( ! aSign
24742fe8fb19SBen Gras || ( ( aExp == 0x7FF )
24752fe8fb19SBen Gras && ( aSig != LIT64( 0x0010000000000000 ) ) )
24762fe8fb19SBen Gras ) {
24772fe8fb19SBen Gras return LIT64( 0x7FFFFFFFFFFFFFFF );
24782fe8fb19SBen Gras }
24792fe8fb19SBen Gras }
24802fe8fb19SBen Gras return (sbits64) LIT64( 0x8000000000000000 );
24812fe8fb19SBen Gras }
24822fe8fb19SBen Gras z = aSig<<shiftCount;
24832fe8fb19SBen Gras }
24842fe8fb19SBen Gras else {
24852fe8fb19SBen Gras if ( aExp < 0x3FE ) {
2486*84d9c625SLionel Sambuc if ( aExp | aSig ) set_float_exception_inexact_flag();
24872fe8fb19SBen Gras return 0;
24882fe8fb19SBen Gras }
24892fe8fb19SBen Gras z = aSig>>( - shiftCount );
24902fe8fb19SBen Gras if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
2491*84d9c625SLionel Sambuc set_float_exception_inexact_flag();
24922fe8fb19SBen Gras }
24932fe8fb19SBen Gras }
24942fe8fb19SBen Gras if ( aSign ) z = - z;
24952fe8fb19SBen Gras return z;
24962fe8fb19SBen Gras
24972fe8fb19SBen Gras }
24982fe8fb19SBen Gras #endif /* !SOFTFLOAT_FOR_GCC */
24992fe8fb19SBen Gras
25002fe8fb19SBen Gras /*
25012fe8fb19SBen Gras -------------------------------------------------------------------------------
25022fe8fb19SBen Gras Returns the result of converting the double-precision floating-point value
25032fe8fb19SBen Gras `a' to the single-precision floating-point format. The conversion is
25042fe8fb19SBen Gras performed according to the IEC/IEEE Standard for Binary Floating-Point
25052fe8fb19SBen Gras Arithmetic.
25062fe8fb19SBen Gras -------------------------------------------------------------------------------
25072fe8fb19SBen Gras */
float64_to_float32(float64 a)25082fe8fb19SBen Gras float32 float64_to_float32( float64 a )
25092fe8fb19SBen Gras {
25102fe8fb19SBen Gras flag aSign;
25112fe8fb19SBen Gras int16 aExp;
25122fe8fb19SBen Gras bits64 aSig;
25132fe8fb19SBen Gras bits32 zSig;
25142fe8fb19SBen Gras
25152fe8fb19SBen Gras aSig = extractFloat64Frac( a );
25162fe8fb19SBen Gras aExp = extractFloat64Exp( a );
25172fe8fb19SBen Gras aSign = extractFloat64Sign( a );
25182fe8fb19SBen Gras if ( aExp == 0x7FF ) {
25192fe8fb19SBen Gras if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
25202fe8fb19SBen Gras return packFloat32( aSign, 0xFF, 0 );
25212fe8fb19SBen Gras }
25222fe8fb19SBen Gras shift64RightJamming( aSig, 22, &aSig );
2523f14fb602SLionel Sambuc zSig = (bits32)aSig;
25242fe8fb19SBen Gras if ( aExp || zSig ) {
25252fe8fb19SBen Gras zSig |= 0x40000000;
25262fe8fb19SBen Gras aExp -= 0x381;
25272fe8fb19SBen Gras }
25282fe8fb19SBen Gras return roundAndPackFloat32( aSign, aExp, zSig );
25292fe8fb19SBen Gras
25302fe8fb19SBen Gras }
25312fe8fb19SBen Gras
25322fe8fb19SBen Gras #ifdef FLOATX80
25332fe8fb19SBen Gras
25342fe8fb19SBen Gras /*
25352fe8fb19SBen Gras -------------------------------------------------------------------------------
25362fe8fb19SBen Gras Returns the result of converting the double-precision floating-point value
25372fe8fb19SBen Gras `a' to the extended double-precision floating-point format. The conversion
25382fe8fb19SBen Gras is performed according to the IEC/IEEE Standard for Binary Floating-Point
25392fe8fb19SBen Gras Arithmetic.
25402fe8fb19SBen Gras -------------------------------------------------------------------------------
25412fe8fb19SBen Gras */
float64_to_floatx80(float64 a)25422fe8fb19SBen Gras floatx80 float64_to_floatx80( float64 a )
25432fe8fb19SBen Gras {
25442fe8fb19SBen Gras flag aSign;
25452fe8fb19SBen Gras int16 aExp;
25462fe8fb19SBen Gras bits64 aSig;
25472fe8fb19SBen Gras
25482fe8fb19SBen Gras aSig = extractFloat64Frac( a );
25492fe8fb19SBen Gras aExp = extractFloat64Exp( a );
25502fe8fb19SBen Gras aSign = extractFloat64Sign( a );
25512fe8fb19SBen Gras if ( aExp == 0x7FF ) {
25522fe8fb19SBen Gras if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
25532fe8fb19SBen Gras return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
25542fe8fb19SBen Gras }
25552fe8fb19SBen Gras if ( aExp == 0 ) {
25562fe8fb19SBen Gras if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
25572fe8fb19SBen Gras normalizeFloat64Subnormal( aSig, &aExp, &aSig );
25582fe8fb19SBen Gras }
25592fe8fb19SBen Gras return
25602fe8fb19SBen Gras packFloatx80(
25612fe8fb19SBen Gras aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
25622fe8fb19SBen Gras
25632fe8fb19SBen Gras }
25642fe8fb19SBen Gras
25652fe8fb19SBen Gras #endif
25662fe8fb19SBen Gras
25672fe8fb19SBen Gras #ifdef FLOAT128
25682fe8fb19SBen Gras
25692fe8fb19SBen Gras /*
25702fe8fb19SBen Gras -------------------------------------------------------------------------------
25712fe8fb19SBen Gras Returns the result of converting the double-precision floating-point value
25722fe8fb19SBen Gras `a' to the quadruple-precision floating-point format. The conversion is
25732fe8fb19SBen Gras performed according to the IEC/IEEE Standard for Binary Floating-Point
25742fe8fb19SBen Gras Arithmetic.
25752fe8fb19SBen Gras -------------------------------------------------------------------------------
25762fe8fb19SBen Gras */
float64_to_float128(float64 a)25772fe8fb19SBen Gras float128 float64_to_float128( float64 a )
25782fe8fb19SBen Gras {
25792fe8fb19SBen Gras flag aSign;
25802fe8fb19SBen Gras int16 aExp;
25812fe8fb19SBen Gras bits64 aSig, zSig0, zSig1;
25822fe8fb19SBen Gras
25832fe8fb19SBen Gras aSig = extractFloat64Frac( a );
25842fe8fb19SBen Gras aExp = extractFloat64Exp( a );
25852fe8fb19SBen Gras aSign = extractFloat64Sign( a );
25862fe8fb19SBen Gras if ( aExp == 0x7FF ) {
25872fe8fb19SBen Gras if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a ) );
25882fe8fb19SBen Gras return packFloat128( aSign, 0x7FFF, 0, 0 );
25892fe8fb19SBen Gras }
25902fe8fb19SBen Gras if ( aExp == 0 ) {
25912fe8fb19SBen Gras if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
25922fe8fb19SBen Gras normalizeFloat64Subnormal( aSig, &aExp, &aSig );
25932fe8fb19SBen Gras --aExp;
25942fe8fb19SBen Gras }
25952fe8fb19SBen Gras shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
25962fe8fb19SBen Gras return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
25972fe8fb19SBen Gras
25982fe8fb19SBen Gras }
25992fe8fb19SBen Gras
26002fe8fb19SBen Gras #endif
26012fe8fb19SBen Gras
26022fe8fb19SBen Gras #ifndef SOFTFLOAT_FOR_GCC
26032fe8fb19SBen Gras /*
26042fe8fb19SBen Gras -------------------------------------------------------------------------------
26052fe8fb19SBen Gras Rounds the double-precision floating-point value `a' to an integer, and
26062fe8fb19SBen Gras returns the result as a double-precision floating-point value. The
26072fe8fb19SBen Gras operation is performed according to the IEC/IEEE Standard for Binary
26082fe8fb19SBen Gras Floating-Point Arithmetic.
26092fe8fb19SBen Gras -------------------------------------------------------------------------------
26102fe8fb19SBen Gras */
float64_round_to_int(float64 a)26112fe8fb19SBen Gras float64 float64_round_to_int( float64 a )
26122fe8fb19SBen Gras {
26132fe8fb19SBen Gras flag aSign;
26142fe8fb19SBen Gras int16 aExp;
26152fe8fb19SBen Gras bits64 lastBitMask, roundBitsMask;
26162fe8fb19SBen Gras int8 roundingMode;
26172fe8fb19SBen Gras float64 z;
26182fe8fb19SBen Gras
26192fe8fb19SBen Gras aExp = extractFloat64Exp( a );
26202fe8fb19SBen Gras if ( 0x433 <= aExp ) {
26212fe8fb19SBen Gras if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
26222fe8fb19SBen Gras return propagateFloat64NaN( a, a );
26232fe8fb19SBen Gras }
26242fe8fb19SBen Gras return a;
26252fe8fb19SBen Gras }
26262fe8fb19SBen Gras if ( aExp < 0x3FF ) {
26272fe8fb19SBen Gras if ( (bits64) ( a<<1 ) == 0 ) return a;
2628*84d9c625SLionel Sambuc set_float_exception_inexact_flag();
26292fe8fb19SBen Gras aSign = extractFloat64Sign( a );
26302fe8fb19SBen Gras switch ( float_rounding_mode ) {
26312fe8fb19SBen Gras case float_round_nearest_even:
26322fe8fb19SBen Gras if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
26332fe8fb19SBen Gras return packFloat64( aSign, 0x3FF, 0 );
26342fe8fb19SBen Gras }
26352fe8fb19SBen Gras break;
26362fe8fb19SBen Gras case float_round_to_zero:
26372fe8fb19SBen Gras break;
26382fe8fb19SBen Gras case float_round_down:
26392fe8fb19SBen Gras return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
26402fe8fb19SBen Gras case float_round_up:
26412fe8fb19SBen Gras return
26422fe8fb19SBen Gras aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
26432fe8fb19SBen Gras }
26442fe8fb19SBen Gras return packFloat64( aSign, 0, 0 );
26452fe8fb19SBen Gras }
26462fe8fb19SBen Gras lastBitMask = 1;
26472fe8fb19SBen Gras lastBitMask <<= 0x433 - aExp;
26482fe8fb19SBen Gras roundBitsMask = lastBitMask - 1;
26492fe8fb19SBen Gras z = a;
26502fe8fb19SBen Gras roundingMode = float_rounding_mode;
26512fe8fb19SBen Gras if ( roundingMode == float_round_nearest_even ) {
26522fe8fb19SBen Gras z += lastBitMask>>1;
26532fe8fb19SBen Gras if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
26542fe8fb19SBen Gras }
26552fe8fb19SBen Gras else if ( roundingMode != float_round_to_zero ) {
26562fe8fb19SBen Gras if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
26572fe8fb19SBen Gras z += roundBitsMask;
26582fe8fb19SBen Gras }
26592fe8fb19SBen Gras }
26602fe8fb19SBen Gras z &= ~ roundBitsMask;
2661*84d9c625SLionel Sambuc if ( z != a ) set_float_exception_inexact_flag();
26622fe8fb19SBen Gras return z;
26632fe8fb19SBen Gras
26642fe8fb19SBen Gras }
26652fe8fb19SBen Gras #endif
26662fe8fb19SBen Gras
26672fe8fb19SBen Gras /*
26682fe8fb19SBen Gras -------------------------------------------------------------------------------
26692fe8fb19SBen Gras Returns the result of adding the absolute values of the double-precision
26702fe8fb19SBen Gras floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
26712fe8fb19SBen Gras before being returned. `zSign' is ignored if the result is a NaN.
26722fe8fb19SBen Gras The addition is performed according to the IEC/IEEE Standard for Binary
26732fe8fb19SBen Gras Floating-Point Arithmetic.
26742fe8fb19SBen Gras -------------------------------------------------------------------------------
26752fe8fb19SBen Gras */
addFloat64Sigs(float64 a,float64 b,flag zSign)26762fe8fb19SBen Gras static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
26772fe8fb19SBen Gras {
26782fe8fb19SBen Gras int16 aExp, bExp, zExp;
26792fe8fb19SBen Gras bits64 aSig, bSig, zSig;
26802fe8fb19SBen Gras int16 expDiff;
26812fe8fb19SBen Gras
26822fe8fb19SBen Gras aSig = extractFloat64Frac( a );
26832fe8fb19SBen Gras aExp = extractFloat64Exp( a );
26842fe8fb19SBen Gras bSig = extractFloat64Frac( b );
26852fe8fb19SBen Gras bExp = extractFloat64Exp( b );
26862fe8fb19SBen Gras expDiff = aExp - bExp;
26872fe8fb19SBen Gras aSig <<= 9;
26882fe8fb19SBen Gras bSig <<= 9;
26892fe8fb19SBen Gras if ( 0 < expDiff ) {
26902fe8fb19SBen Gras if ( aExp == 0x7FF ) {
26912fe8fb19SBen Gras if ( aSig ) return propagateFloat64NaN( a, b );
26922fe8fb19SBen Gras return a;
26932fe8fb19SBen Gras }
26942fe8fb19SBen Gras if ( bExp == 0 ) {
26952fe8fb19SBen Gras --expDiff;
26962fe8fb19SBen Gras }
26972fe8fb19SBen Gras else {
26982fe8fb19SBen Gras bSig |= LIT64( 0x2000000000000000 );
26992fe8fb19SBen Gras }
27002fe8fb19SBen Gras shift64RightJamming( bSig, expDiff, &bSig );
27012fe8fb19SBen Gras zExp = aExp;
27022fe8fb19SBen Gras }
27032fe8fb19SBen Gras else if ( expDiff < 0 ) {
27042fe8fb19SBen Gras if ( bExp == 0x7FF ) {
27052fe8fb19SBen Gras if ( bSig ) return propagateFloat64NaN( a, b );
27062fe8fb19SBen Gras return packFloat64( zSign, 0x7FF, 0 );
27072fe8fb19SBen Gras }
27082fe8fb19SBen Gras if ( aExp == 0 ) {
27092fe8fb19SBen Gras ++expDiff;
27102fe8fb19SBen Gras }
27112fe8fb19SBen Gras else {
27122fe8fb19SBen Gras aSig |= LIT64( 0x2000000000000000 );
27132fe8fb19SBen Gras }
27142fe8fb19SBen Gras shift64RightJamming( aSig, - expDiff, &aSig );
27152fe8fb19SBen Gras zExp = bExp;
27162fe8fb19SBen Gras }
27172fe8fb19SBen Gras else {
27182fe8fb19SBen Gras if ( aExp == 0x7FF ) {
27192fe8fb19SBen Gras if ( aSig | bSig ) return propagateFloat64NaN( a, b );
27202fe8fb19SBen Gras return a;
27212fe8fb19SBen Gras }
27222fe8fb19SBen Gras if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
27232fe8fb19SBen Gras zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
27242fe8fb19SBen Gras zExp = aExp;
27252fe8fb19SBen Gras goto roundAndPack;
27262fe8fb19SBen Gras }
27272fe8fb19SBen Gras aSig |= LIT64( 0x2000000000000000 );
27282fe8fb19SBen Gras zSig = ( aSig + bSig )<<1;
27292fe8fb19SBen Gras --zExp;
27302fe8fb19SBen Gras if ( (sbits64) zSig < 0 ) {
27312fe8fb19SBen Gras zSig = aSig + bSig;
27322fe8fb19SBen Gras ++zExp;
27332fe8fb19SBen Gras }
27342fe8fb19SBen Gras roundAndPack:
27352fe8fb19SBen Gras return roundAndPackFloat64( zSign, zExp, zSig );
27362fe8fb19SBen Gras
27372fe8fb19SBen Gras }
27382fe8fb19SBen Gras
27392fe8fb19SBen Gras /*
27402fe8fb19SBen Gras -------------------------------------------------------------------------------
27412fe8fb19SBen Gras Returns the result of subtracting the absolute values of the double-
27422fe8fb19SBen Gras precision floating-point values `a' and `b'. If `zSign' is 1, the
27432fe8fb19SBen Gras difference is negated before being returned. `zSign' is ignored if the
27442fe8fb19SBen Gras result is a NaN. The subtraction is performed according to the IEC/IEEE
27452fe8fb19SBen Gras Standard for Binary Floating-Point Arithmetic.
27462fe8fb19SBen Gras -------------------------------------------------------------------------------
27472fe8fb19SBen Gras */
subFloat64Sigs(float64 a,float64 b,flag zSign)27482fe8fb19SBen Gras static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
27492fe8fb19SBen Gras {
27502fe8fb19SBen Gras int16 aExp, bExp, zExp;
27512fe8fb19SBen Gras bits64 aSig, bSig, zSig;
27522fe8fb19SBen Gras int16 expDiff;
27532fe8fb19SBen Gras
27542fe8fb19SBen Gras aSig = extractFloat64Frac( a );
27552fe8fb19SBen Gras aExp = extractFloat64Exp( a );
27562fe8fb19SBen Gras bSig = extractFloat64Frac( b );
27572fe8fb19SBen Gras bExp = extractFloat64Exp( b );
27582fe8fb19SBen Gras expDiff = aExp - bExp;
27592fe8fb19SBen Gras aSig <<= 10;
27602fe8fb19SBen Gras bSig <<= 10;
27612fe8fb19SBen Gras if ( 0 < expDiff ) goto aExpBigger;
27622fe8fb19SBen Gras if ( expDiff < 0 ) goto bExpBigger;
27632fe8fb19SBen Gras if ( aExp == 0x7FF ) {
27642fe8fb19SBen Gras if ( aSig | bSig ) return propagateFloat64NaN( a, b );
27652fe8fb19SBen Gras float_raise( float_flag_invalid );
27662fe8fb19SBen Gras return float64_default_nan;
27672fe8fb19SBen Gras }
27682fe8fb19SBen Gras if ( aExp == 0 ) {
27692fe8fb19SBen Gras aExp = 1;
27702fe8fb19SBen Gras bExp = 1;
27712fe8fb19SBen Gras }
27722fe8fb19SBen Gras if ( bSig < aSig ) goto aBigger;
27732fe8fb19SBen Gras if ( aSig < bSig ) goto bBigger;
27742fe8fb19SBen Gras return packFloat64( float_rounding_mode == float_round_down, 0, 0 );
27752fe8fb19SBen Gras bExpBigger:
27762fe8fb19SBen Gras if ( bExp == 0x7FF ) {
27772fe8fb19SBen Gras if ( bSig ) return propagateFloat64NaN( a, b );
27782fe8fb19SBen Gras return packFloat64( zSign ^ 1, 0x7FF, 0 );
27792fe8fb19SBen Gras }
27802fe8fb19SBen Gras if ( aExp == 0 ) {
27812fe8fb19SBen Gras ++expDiff;
27822fe8fb19SBen Gras }
27832fe8fb19SBen Gras else {
27842fe8fb19SBen Gras aSig |= LIT64( 0x4000000000000000 );
27852fe8fb19SBen Gras }
27862fe8fb19SBen Gras shift64RightJamming( aSig, - expDiff, &aSig );
27872fe8fb19SBen Gras bSig |= LIT64( 0x4000000000000000 );
27882fe8fb19SBen Gras bBigger:
27892fe8fb19SBen Gras zSig = bSig - aSig;
27902fe8fb19SBen Gras zExp = bExp;
27912fe8fb19SBen Gras zSign ^= 1;
27922fe8fb19SBen Gras goto normalizeRoundAndPack;
27932fe8fb19SBen Gras aExpBigger:
27942fe8fb19SBen Gras if ( aExp == 0x7FF ) {
27952fe8fb19SBen Gras if ( aSig ) return propagateFloat64NaN( a, b );
27962fe8fb19SBen Gras return a;
27972fe8fb19SBen Gras }
27982fe8fb19SBen Gras if ( bExp == 0 ) {
27992fe8fb19SBen Gras --expDiff;
28002fe8fb19SBen Gras }
28012fe8fb19SBen Gras else {
28022fe8fb19SBen Gras bSig |= LIT64( 0x4000000000000000 );
28032fe8fb19SBen Gras }
28042fe8fb19SBen Gras shift64RightJamming( bSig, expDiff, &bSig );
28052fe8fb19SBen Gras aSig |= LIT64( 0x4000000000000000 );
28062fe8fb19SBen Gras aBigger:
28072fe8fb19SBen Gras zSig = aSig - bSig;
28082fe8fb19SBen Gras zExp = aExp;
28092fe8fb19SBen Gras normalizeRoundAndPack:
28102fe8fb19SBen Gras --zExp;
28112fe8fb19SBen Gras return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
28122fe8fb19SBen Gras
28132fe8fb19SBen Gras }
28142fe8fb19SBen Gras
28152fe8fb19SBen Gras /*
28162fe8fb19SBen Gras -------------------------------------------------------------------------------
28172fe8fb19SBen Gras Returns the result of adding the double-precision floating-point values `a'
28182fe8fb19SBen Gras and `b'. The operation is performed according to the IEC/IEEE Standard for
28192fe8fb19SBen Gras Binary Floating-Point Arithmetic.
28202fe8fb19SBen Gras -------------------------------------------------------------------------------
28212fe8fb19SBen Gras */
float64_add(float64 a,float64 b)28222fe8fb19SBen Gras float64 float64_add( float64 a, float64 b )
28232fe8fb19SBen Gras {
28242fe8fb19SBen Gras flag aSign, bSign;
28252fe8fb19SBen Gras
28262fe8fb19SBen Gras aSign = extractFloat64Sign( a );
28272fe8fb19SBen Gras bSign = extractFloat64Sign( b );
28282fe8fb19SBen Gras if ( aSign == bSign ) {
28292fe8fb19SBen Gras return addFloat64Sigs( a, b, aSign );
28302fe8fb19SBen Gras }
28312fe8fb19SBen Gras else {
28322fe8fb19SBen Gras return subFloat64Sigs( a, b, aSign );
28332fe8fb19SBen Gras }
28342fe8fb19SBen Gras
28352fe8fb19SBen Gras }
28362fe8fb19SBen Gras
28372fe8fb19SBen Gras /*
28382fe8fb19SBen Gras -------------------------------------------------------------------------------
28392fe8fb19SBen Gras Returns the result of subtracting the double-precision floating-point values
28402fe8fb19SBen Gras `a' and `b'. The operation is performed according to the IEC/IEEE Standard
28412fe8fb19SBen Gras for Binary Floating-Point Arithmetic.
28422fe8fb19SBen Gras -------------------------------------------------------------------------------
28432fe8fb19SBen Gras */
float64_sub(float64 a,float64 b)28442fe8fb19SBen Gras float64 float64_sub( float64 a, float64 b )
28452fe8fb19SBen Gras {
28462fe8fb19SBen Gras flag aSign, bSign;
28472fe8fb19SBen Gras
28482fe8fb19SBen Gras aSign = extractFloat64Sign( a );
28492fe8fb19SBen Gras bSign = extractFloat64Sign( b );
28502fe8fb19SBen Gras if ( aSign == bSign ) {
28512fe8fb19SBen Gras return subFloat64Sigs( a, b, aSign );
28522fe8fb19SBen Gras }
28532fe8fb19SBen Gras else {
28542fe8fb19SBen Gras return addFloat64Sigs( a, b, aSign );
28552fe8fb19SBen Gras }
28562fe8fb19SBen Gras
28572fe8fb19SBen Gras }
28582fe8fb19SBen Gras
28592fe8fb19SBen Gras /*
28602fe8fb19SBen Gras -------------------------------------------------------------------------------
28612fe8fb19SBen Gras Returns the result of multiplying the double-precision floating-point values
28622fe8fb19SBen Gras `a' and `b'. The operation is performed according to the IEC/IEEE Standard
28632fe8fb19SBen Gras for Binary Floating-Point Arithmetic.
28642fe8fb19SBen Gras -------------------------------------------------------------------------------
28652fe8fb19SBen Gras */
float64_mul(float64 a,float64 b)28662fe8fb19SBen Gras float64 float64_mul( float64 a, float64 b )
28672fe8fb19SBen Gras {
28682fe8fb19SBen Gras flag aSign, bSign, zSign;
28692fe8fb19SBen Gras int16 aExp, bExp, zExp;
28702fe8fb19SBen Gras bits64 aSig, bSig, zSig0, zSig1;
28712fe8fb19SBen Gras
28722fe8fb19SBen Gras aSig = extractFloat64Frac( a );
28732fe8fb19SBen Gras aExp = extractFloat64Exp( a );
28742fe8fb19SBen Gras aSign = extractFloat64Sign( a );
28752fe8fb19SBen Gras bSig = extractFloat64Frac( b );
28762fe8fb19SBen Gras bExp = extractFloat64Exp( b );
28772fe8fb19SBen Gras bSign = extractFloat64Sign( b );
28782fe8fb19SBen Gras zSign = aSign ^ bSign;
28792fe8fb19SBen Gras if ( aExp == 0x7FF ) {
28802fe8fb19SBen Gras if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
28812fe8fb19SBen Gras return propagateFloat64NaN( a, b );
28822fe8fb19SBen Gras }
28832fe8fb19SBen Gras if ( ( bExp | bSig ) == 0 ) {
28842fe8fb19SBen Gras float_raise( float_flag_invalid );
28852fe8fb19SBen Gras return float64_default_nan;
28862fe8fb19SBen Gras }
28872fe8fb19SBen Gras return packFloat64( zSign, 0x7FF, 0 );
28882fe8fb19SBen Gras }
28892fe8fb19SBen Gras if ( bExp == 0x7FF ) {
28902fe8fb19SBen Gras if ( bSig ) return propagateFloat64NaN( a, b );
28912fe8fb19SBen Gras if ( ( aExp | aSig ) == 0 ) {
28922fe8fb19SBen Gras float_raise( float_flag_invalid );
28932fe8fb19SBen Gras return float64_default_nan;
28942fe8fb19SBen Gras }
28952fe8fb19SBen Gras return packFloat64( zSign, 0x7FF, 0 );
28962fe8fb19SBen Gras }
28972fe8fb19SBen Gras if ( aExp == 0 ) {
28982fe8fb19SBen Gras if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
28992fe8fb19SBen Gras normalizeFloat64Subnormal( aSig, &aExp, &aSig );
29002fe8fb19SBen Gras }
29012fe8fb19SBen Gras if ( bExp == 0 ) {
29022fe8fb19SBen Gras if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
29032fe8fb19SBen Gras normalizeFloat64Subnormal( bSig, &bExp, &bSig );
29042fe8fb19SBen Gras }
29052fe8fb19SBen Gras zExp = aExp + bExp - 0x3FF;
29062fe8fb19SBen Gras aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
29072fe8fb19SBen Gras bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
29082fe8fb19SBen Gras mul64To128( aSig, bSig, &zSig0, &zSig1 );
29092fe8fb19SBen Gras zSig0 |= ( zSig1 != 0 );
29102fe8fb19SBen Gras if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
29112fe8fb19SBen Gras zSig0 <<= 1;
29122fe8fb19SBen Gras --zExp;
29132fe8fb19SBen Gras }
29142fe8fb19SBen Gras return roundAndPackFloat64( zSign, zExp, zSig0 );
29152fe8fb19SBen Gras
29162fe8fb19SBen Gras }
29172fe8fb19SBen Gras
29182fe8fb19SBen Gras /*
29192fe8fb19SBen Gras -------------------------------------------------------------------------------
29202fe8fb19SBen Gras Returns the result of dividing the double-precision floating-point value `a'
29212fe8fb19SBen Gras by the corresponding value `b'. The operation is performed according to
29222fe8fb19SBen Gras the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
29232fe8fb19SBen Gras -------------------------------------------------------------------------------
29242fe8fb19SBen Gras */
float64_div(float64 a,float64 b)29252fe8fb19SBen Gras float64 float64_div( float64 a, float64 b )
29262fe8fb19SBen Gras {
29272fe8fb19SBen Gras flag aSign, bSign, zSign;
29282fe8fb19SBen Gras int16 aExp, bExp, zExp;
29292fe8fb19SBen Gras bits64 aSig, bSig, zSig;
29302fe8fb19SBen Gras bits64 rem0, rem1;
29312fe8fb19SBen Gras bits64 term0, term1;
29322fe8fb19SBen Gras
29332fe8fb19SBen Gras aSig = extractFloat64Frac( a );
29342fe8fb19SBen Gras aExp = extractFloat64Exp( a );
29352fe8fb19SBen Gras aSign = extractFloat64Sign( a );
29362fe8fb19SBen Gras bSig = extractFloat64Frac( b );
29372fe8fb19SBen Gras bExp = extractFloat64Exp( b );
29382fe8fb19SBen Gras bSign = extractFloat64Sign( b );
29392fe8fb19SBen Gras zSign = aSign ^ bSign;
29402fe8fb19SBen Gras if ( aExp == 0x7FF ) {
29412fe8fb19SBen Gras if ( aSig ) return propagateFloat64NaN( a, b );
29422fe8fb19SBen Gras if ( bExp == 0x7FF ) {
29432fe8fb19SBen Gras if ( bSig ) return propagateFloat64NaN( a, b );
29442fe8fb19SBen Gras float_raise( float_flag_invalid );
29452fe8fb19SBen Gras return float64_default_nan;
29462fe8fb19SBen Gras }
29472fe8fb19SBen Gras return packFloat64( zSign, 0x7FF, 0 );
29482fe8fb19SBen Gras }
29492fe8fb19SBen Gras if ( bExp == 0x7FF ) {
29502fe8fb19SBen Gras if ( bSig ) return propagateFloat64NaN( a, b );
29512fe8fb19SBen Gras return packFloat64( zSign, 0, 0 );
29522fe8fb19SBen Gras }
29532fe8fb19SBen Gras if ( bExp == 0 ) {
29542fe8fb19SBen Gras if ( bSig == 0 ) {
29552fe8fb19SBen Gras if ( ( aExp | aSig ) == 0 ) {
29562fe8fb19SBen Gras float_raise( float_flag_invalid );
29572fe8fb19SBen Gras return float64_default_nan;
29582fe8fb19SBen Gras }
29592fe8fb19SBen Gras float_raise( float_flag_divbyzero );
29602fe8fb19SBen Gras return packFloat64( zSign, 0x7FF, 0 );
29612fe8fb19SBen Gras }
29622fe8fb19SBen Gras normalizeFloat64Subnormal( bSig, &bExp, &bSig );
29632fe8fb19SBen Gras }
29642fe8fb19SBen Gras if ( aExp == 0 ) {
29652fe8fb19SBen Gras if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
29662fe8fb19SBen Gras normalizeFloat64Subnormal( aSig, &aExp, &aSig );
29672fe8fb19SBen Gras }
29682fe8fb19SBen Gras zExp = aExp - bExp + 0x3FD;
29692fe8fb19SBen Gras aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
29702fe8fb19SBen Gras bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
29712fe8fb19SBen Gras if ( bSig <= ( aSig + aSig ) ) {
29722fe8fb19SBen Gras aSig >>= 1;
29732fe8fb19SBen Gras ++zExp;
29742fe8fb19SBen Gras }
29752fe8fb19SBen Gras zSig = estimateDiv128To64( aSig, 0, bSig );
29762fe8fb19SBen Gras if ( ( zSig & 0x1FF ) <= 2 ) {
29772fe8fb19SBen Gras mul64To128( bSig, zSig, &term0, &term1 );
29782fe8fb19SBen Gras sub128( aSig, 0, term0, term1, &rem0, &rem1 );
29792fe8fb19SBen Gras while ( (sbits64) rem0 < 0 ) {
29802fe8fb19SBen Gras --zSig;
29812fe8fb19SBen Gras add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
29822fe8fb19SBen Gras }
29832fe8fb19SBen Gras zSig |= ( rem1 != 0 );
29842fe8fb19SBen Gras }
29852fe8fb19SBen Gras return roundAndPackFloat64( zSign, zExp, zSig );
29862fe8fb19SBen Gras
29872fe8fb19SBen Gras }
29882fe8fb19SBen Gras
29892fe8fb19SBen Gras #ifndef SOFTFLOAT_FOR_GCC
29902fe8fb19SBen Gras /*
29912fe8fb19SBen Gras -------------------------------------------------------------------------------
29922fe8fb19SBen Gras Returns the remainder of the double-precision floating-point value `a'
29932fe8fb19SBen Gras with respect to the corresponding value `b'. The operation is performed
29942fe8fb19SBen Gras according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
29952fe8fb19SBen Gras -------------------------------------------------------------------------------
29962fe8fb19SBen Gras */
float64_rem(float64 a,float64 b)29972fe8fb19SBen Gras float64 float64_rem( float64 a, float64 b )
29982fe8fb19SBen Gras {
29992fe8fb19SBen Gras flag aSign, bSign, zSign;
30002fe8fb19SBen Gras int16 aExp, bExp, expDiff;
30012fe8fb19SBen Gras bits64 aSig, bSig;
30022fe8fb19SBen Gras bits64 q, alternateASig;
30032fe8fb19SBen Gras sbits64 sigMean;
30042fe8fb19SBen Gras
30052fe8fb19SBen Gras aSig = extractFloat64Frac( a );
30062fe8fb19SBen Gras aExp = extractFloat64Exp( a );
30072fe8fb19SBen Gras aSign = extractFloat64Sign( a );
30082fe8fb19SBen Gras bSig = extractFloat64Frac( b );
30092fe8fb19SBen Gras bExp = extractFloat64Exp( b );
30102fe8fb19SBen Gras bSign = extractFloat64Sign( b );
30112fe8fb19SBen Gras if ( aExp == 0x7FF ) {
30122fe8fb19SBen Gras if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
30132fe8fb19SBen Gras return propagateFloat64NaN( a, b );
30142fe8fb19SBen Gras }
30152fe8fb19SBen Gras float_raise( float_flag_invalid );
30162fe8fb19SBen Gras return float64_default_nan;
30172fe8fb19SBen Gras }
30182fe8fb19SBen Gras if ( bExp == 0x7FF ) {
30192fe8fb19SBen Gras if ( bSig ) return propagateFloat64NaN( a, b );
30202fe8fb19SBen Gras return a;
30212fe8fb19SBen Gras }
30222fe8fb19SBen Gras if ( bExp == 0 ) {
30232fe8fb19SBen Gras if ( bSig == 0 ) {
30242fe8fb19SBen Gras float_raise( float_flag_invalid );
30252fe8fb19SBen Gras return float64_default_nan;
30262fe8fb19SBen Gras }
30272fe8fb19SBen Gras normalizeFloat64Subnormal( bSig, &bExp, &bSig );
30282fe8fb19SBen Gras }
30292fe8fb19SBen Gras if ( aExp == 0 ) {
30302fe8fb19SBen Gras if ( aSig == 0 ) return a;
30312fe8fb19SBen Gras normalizeFloat64Subnormal( aSig, &aExp, &aSig );
30322fe8fb19SBen Gras }
30332fe8fb19SBen Gras expDiff = aExp - bExp;
30342fe8fb19SBen Gras aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
30352fe8fb19SBen Gras bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
30362fe8fb19SBen Gras if ( expDiff < 0 ) {
30372fe8fb19SBen Gras if ( expDiff < -1 ) return a;
30382fe8fb19SBen Gras aSig >>= 1;
30392fe8fb19SBen Gras }
30402fe8fb19SBen Gras q = ( bSig <= aSig );
30412fe8fb19SBen Gras if ( q ) aSig -= bSig;
30422fe8fb19SBen Gras expDiff -= 64;
30432fe8fb19SBen Gras while ( 0 < expDiff ) {
30442fe8fb19SBen Gras q = estimateDiv128To64( aSig, 0, bSig );
30452fe8fb19SBen Gras q = ( 2 < q ) ? q - 2 : 0;
30462fe8fb19SBen Gras aSig = - ( ( bSig>>2 ) * q );
30472fe8fb19SBen Gras expDiff -= 62;
30482fe8fb19SBen Gras }
30492fe8fb19SBen Gras expDiff += 64;
30502fe8fb19SBen Gras if ( 0 < expDiff ) {
30512fe8fb19SBen Gras q = estimateDiv128To64( aSig, 0, bSig );
30522fe8fb19SBen Gras q = ( 2 < q ) ? q - 2 : 0;
30532fe8fb19SBen Gras q >>= 64 - expDiff;
30542fe8fb19SBen Gras bSig >>= 2;
30552fe8fb19SBen Gras aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
30562fe8fb19SBen Gras }
30572fe8fb19SBen Gras else {
30582fe8fb19SBen Gras aSig >>= 2;
30592fe8fb19SBen Gras bSig >>= 2;
30602fe8fb19SBen Gras }
30612fe8fb19SBen Gras do {
30622fe8fb19SBen Gras alternateASig = aSig;
30632fe8fb19SBen Gras ++q;
30642fe8fb19SBen Gras aSig -= bSig;
30652fe8fb19SBen Gras } while ( 0 <= (sbits64) aSig );
30662fe8fb19SBen Gras sigMean = aSig + alternateASig;
30672fe8fb19SBen Gras if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
30682fe8fb19SBen Gras aSig = alternateASig;
30692fe8fb19SBen Gras }
30702fe8fb19SBen Gras zSign = ( (sbits64) aSig < 0 );
30712fe8fb19SBen Gras if ( zSign ) aSig = - aSig;
30722fe8fb19SBen Gras return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
30732fe8fb19SBen Gras
30742fe8fb19SBen Gras }
30752fe8fb19SBen Gras
30762fe8fb19SBen Gras /*
30772fe8fb19SBen Gras -------------------------------------------------------------------------------
30782fe8fb19SBen Gras Returns the square root of the double-precision floating-point value `a'.
30792fe8fb19SBen Gras The operation is performed according to the IEC/IEEE Standard for Binary
30802fe8fb19SBen Gras Floating-Point Arithmetic.
30812fe8fb19SBen Gras -------------------------------------------------------------------------------
30822fe8fb19SBen Gras */
float64_sqrt(float64 a)30832fe8fb19SBen Gras float64 float64_sqrt( float64 a )
30842fe8fb19SBen Gras {
30852fe8fb19SBen Gras flag aSign;
30862fe8fb19SBen Gras int16 aExp, zExp;
30872fe8fb19SBen Gras bits64 aSig, zSig, doubleZSig;
30882fe8fb19SBen Gras bits64 rem0, rem1, term0, term1;
30892fe8fb19SBen Gras
30902fe8fb19SBen Gras aSig = extractFloat64Frac( a );
30912fe8fb19SBen Gras aExp = extractFloat64Exp( a );
30922fe8fb19SBen Gras aSign = extractFloat64Sign( a );
30932fe8fb19SBen Gras if ( aExp == 0x7FF ) {
30942fe8fb19SBen Gras if ( aSig ) return propagateFloat64NaN( a, a );
30952fe8fb19SBen Gras if ( ! aSign ) return a;
30962fe8fb19SBen Gras float_raise( float_flag_invalid );
30972fe8fb19SBen Gras return float64_default_nan;
30982fe8fb19SBen Gras }
30992fe8fb19SBen Gras if ( aSign ) {
31002fe8fb19SBen Gras if ( ( aExp | aSig ) == 0 ) return a;
31012fe8fb19SBen Gras float_raise( float_flag_invalid );
31022fe8fb19SBen Gras return float64_default_nan;
31032fe8fb19SBen Gras }
31042fe8fb19SBen Gras if ( aExp == 0 ) {
31052fe8fb19SBen Gras if ( aSig == 0 ) return 0;
31062fe8fb19SBen Gras normalizeFloat64Subnormal( aSig, &aExp, &aSig );
31072fe8fb19SBen Gras }
31082fe8fb19SBen Gras zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
31092fe8fb19SBen Gras aSig |= LIT64( 0x0010000000000000 );
31102fe8fb19SBen Gras zSig = estimateSqrt32( aExp, aSig>>21 );
31112fe8fb19SBen Gras aSig <<= 9 - ( aExp & 1 );
31122fe8fb19SBen Gras zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
31132fe8fb19SBen Gras if ( ( zSig & 0x1FF ) <= 5 ) {
31142fe8fb19SBen Gras doubleZSig = zSig<<1;
31152fe8fb19SBen Gras mul64To128( zSig, zSig, &term0, &term1 );
31162fe8fb19SBen Gras sub128( aSig, 0, term0, term1, &rem0, &rem1 );
31172fe8fb19SBen Gras while ( (sbits64) rem0 < 0 ) {
31182fe8fb19SBen Gras --zSig;
31192fe8fb19SBen Gras doubleZSig -= 2;
31202fe8fb19SBen Gras add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
31212fe8fb19SBen Gras }
31222fe8fb19SBen Gras zSig |= ( ( rem0 | rem1 ) != 0 );
31232fe8fb19SBen Gras }
31242fe8fb19SBen Gras return roundAndPackFloat64( 0, zExp, zSig );
31252fe8fb19SBen Gras
31262fe8fb19SBen Gras }
31272fe8fb19SBen Gras #endif
31282fe8fb19SBen Gras
31292fe8fb19SBen Gras /*
31302fe8fb19SBen Gras -------------------------------------------------------------------------------
31312fe8fb19SBen Gras Returns 1 if the double-precision floating-point value `a' is equal to the
31322fe8fb19SBen Gras corresponding value `b', and 0 otherwise. The comparison is performed
31332fe8fb19SBen Gras according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
31342fe8fb19SBen Gras -------------------------------------------------------------------------------
31352fe8fb19SBen Gras */
float64_eq(float64 a,float64 b)31362fe8fb19SBen Gras flag float64_eq( float64 a, float64 b )
31372fe8fb19SBen Gras {
31382fe8fb19SBen Gras
31392fe8fb19SBen Gras if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
31402fe8fb19SBen Gras || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
31412fe8fb19SBen Gras ) {
31422fe8fb19SBen Gras if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
31432fe8fb19SBen Gras float_raise( float_flag_invalid );
31442fe8fb19SBen Gras }
31452fe8fb19SBen Gras return 0;
31462fe8fb19SBen Gras }
31472fe8fb19SBen Gras return ( a == b ) ||
31482fe8fb19SBen Gras ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
31492fe8fb19SBen Gras
31502fe8fb19SBen Gras }
31512fe8fb19SBen Gras
31522fe8fb19SBen Gras /*
31532fe8fb19SBen Gras -------------------------------------------------------------------------------
31542fe8fb19SBen Gras Returns 1 if the double-precision floating-point value `a' is less than or
31552fe8fb19SBen Gras equal to the corresponding value `b', and 0 otherwise. The comparison is
31562fe8fb19SBen Gras performed according to the IEC/IEEE Standard for Binary Floating-Point
31572fe8fb19SBen Gras Arithmetic.
31582fe8fb19SBen Gras -------------------------------------------------------------------------------
31592fe8fb19SBen Gras */
float64_le(float64 a,float64 b)31602fe8fb19SBen Gras flag float64_le( float64 a, float64 b )
31612fe8fb19SBen Gras {
31622fe8fb19SBen Gras flag aSign, bSign;
31632fe8fb19SBen Gras
31642fe8fb19SBen Gras if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
31652fe8fb19SBen Gras || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
31662fe8fb19SBen Gras ) {
31672fe8fb19SBen Gras float_raise( float_flag_invalid );
31682fe8fb19SBen Gras return 0;
31692fe8fb19SBen Gras }
31702fe8fb19SBen Gras aSign = extractFloat64Sign( a );
31712fe8fb19SBen Gras bSign = extractFloat64Sign( b );
31722fe8fb19SBen Gras if ( aSign != bSign )
31732fe8fb19SBen Gras return aSign ||
31742fe8fb19SBen Gras ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
31752fe8fb19SBen Gras 0 );
31762fe8fb19SBen Gras return ( a == b ) ||
31772fe8fb19SBen Gras ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
31782fe8fb19SBen Gras
31792fe8fb19SBen Gras }
31802fe8fb19SBen Gras
31812fe8fb19SBen Gras /*
31822fe8fb19SBen Gras -------------------------------------------------------------------------------
31832fe8fb19SBen Gras Returns 1 if the double-precision floating-point value `a' is less than
31842fe8fb19SBen Gras the corresponding value `b', and 0 otherwise. The comparison is performed
31852fe8fb19SBen Gras according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
31862fe8fb19SBen Gras -------------------------------------------------------------------------------
31872fe8fb19SBen Gras */
float64_lt(float64 a,float64 b)31882fe8fb19SBen Gras flag float64_lt( float64 a, float64 b )
31892fe8fb19SBen Gras {
31902fe8fb19SBen Gras flag aSign, bSign;
31912fe8fb19SBen Gras
31922fe8fb19SBen Gras if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
31932fe8fb19SBen Gras || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
31942fe8fb19SBen Gras ) {
31952fe8fb19SBen Gras float_raise( float_flag_invalid );
31962fe8fb19SBen Gras return 0;
31972fe8fb19SBen Gras }
31982fe8fb19SBen Gras aSign = extractFloat64Sign( a );
31992fe8fb19SBen Gras bSign = extractFloat64Sign( b );
32002fe8fb19SBen Gras if ( aSign != bSign )
32012fe8fb19SBen Gras return aSign &&
32022fe8fb19SBen Gras ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
32032fe8fb19SBen Gras 0 );
32042fe8fb19SBen Gras return ( a != b ) &&
32052fe8fb19SBen Gras ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
32062fe8fb19SBen Gras
32072fe8fb19SBen Gras }
32082fe8fb19SBen Gras
32092fe8fb19SBen Gras #ifndef SOFTFLOAT_FOR_GCC
32102fe8fb19SBen Gras /*
32112fe8fb19SBen Gras -------------------------------------------------------------------------------
32122fe8fb19SBen Gras Returns 1 if the double-precision floating-point value `a' is equal to the
32132fe8fb19SBen Gras corresponding value `b', and 0 otherwise. The invalid exception is raised
32142fe8fb19SBen Gras if either operand is a NaN. Otherwise, the comparison is performed
32152fe8fb19SBen Gras according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
32162fe8fb19SBen Gras -------------------------------------------------------------------------------
32172fe8fb19SBen Gras */
float64_eq_signaling(float64 a,float64 b)32182fe8fb19SBen Gras flag float64_eq_signaling( float64 a, float64 b )
32192fe8fb19SBen Gras {
32202fe8fb19SBen Gras
32212fe8fb19SBen Gras if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
32222fe8fb19SBen Gras || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
32232fe8fb19SBen Gras ) {
32242fe8fb19SBen Gras float_raise( float_flag_invalid );
32252fe8fb19SBen Gras return 0;
32262fe8fb19SBen Gras }
32272fe8fb19SBen Gras return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
32282fe8fb19SBen Gras
32292fe8fb19SBen Gras }
32302fe8fb19SBen Gras
32312fe8fb19SBen Gras /*
32322fe8fb19SBen Gras -------------------------------------------------------------------------------
32332fe8fb19SBen Gras Returns 1 if the double-precision floating-point value `a' is less than or
32342fe8fb19SBen Gras equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
32352fe8fb19SBen Gras cause an exception. Otherwise, the comparison is performed according to the
32362fe8fb19SBen Gras IEC/IEEE Standard for Binary Floating-Point Arithmetic.
32372fe8fb19SBen Gras -------------------------------------------------------------------------------
32382fe8fb19SBen Gras */
float64_le_quiet(float64 a,float64 b)32392fe8fb19SBen Gras flag float64_le_quiet( float64 a, float64 b )
32402fe8fb19SBen Gras {
32412fe8fb19SBen Gras flag aSign, bSign;
32422fe8fb19SBen Gras
32432fe8fb19SBen Gras if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
32442fe8fb19SBen Gras || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
32452fe8fb19SBen Gras ) {
32462fe8fb19SBen Gras if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
32472fe8fb19SBen Gras float_raise( float_flag_invalid );
32482fe8fb19SBen Gras }
32492fe8fb19SBen Gras return 0;
32502fe8fb19SBen Gras }
32512fe8fb19SBen Gras aSign = extractFloat64Sign( a );
32522fe8fb19SBen Gras bSign = extractFloat64Sign( b );
32532fe8fb19SBen Gras if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
32542fe8fb19SBen Gras return ( a == b ) || ( aSign ^ ( a < b ) );
32552fe8fb19SBen Gras
32562fe8fb19SBen Gras }
32572fe8fb19SBen Gras
32582fe8fb19SBen Gras /*
32592fe8fb19SBen Gras -------------------------------------------------------------------------------
32602fe8fb19SBen Gras Returns 1 if the double-precision floating-point value `a' is less than
32612fe8fb19SBen Gras the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
32622fe8fb19SBen Gras exception. Otherwise, the comparison is performed according to the IEC/IEEE
32632fe8fb19SBen Gras Standard for Binary Floating-Point Arithmetic.
32642fe8fb19SBen Gras -------------------------------------------------------------------------------
32652fe8fb19SBen Gras */
float64_lt_quiet(float64 a,float64 b)32662fe8fb19SBen Gras flag float64_lt_quiet( float64 a, float64 b )
32672fe8fb19SBen Gras {
32682fe8fb19SBen Gras flag aSign, bSign;
32692fe8fb19SBen Gras
32702fe8fb19SBen Gras if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
32712fe8fb19SBen Gras || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
32722fe8fb19SBen Gras ) {
32732fe8fb19SBen Gras if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
32742fe8fb19SBen Gras float_raise( float_flag_invalid );
32752fe8fb19SBen Gras }
32762fe8fb19SBen Gras return 0;
32772fe8fb19SBen Gras }
32782fe8fb19SBen Gras aSign = extractFloat64Sign( a );
32792fe8fb19SBen Gras bSign = extractFloat64Sign( b );
32802fe8fb19SBen Gras if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
32812fe8fb19SBen Gras return ( a != b ) && ( aSign ^ ( a < b ) );
32822fe8fb19SBen Gras
32832fe8fb19SBen Gras }
32842fe8fb19SBen Gras #endif
32852fe8fb19SBen Gras
32862fe8fb19SBen Gras #ifdef FLOATX80
32872fe8fb19SBen Gras
32882fe8fb19SBen Gras /*
32892fe8fb19SBen Gras -------------------------------------------------------------------------------
32902fe8fb19SBen Gras Returns the result of converting the extended double-precision floating-
32912fe8fb19SBen Gras point value `a' to the 32-bit two's complement integer format. The
32922fe8fb19SBen Gras conversion is performed according to the IEC/IEEE Standard for Binary
32932fe8fb19SBen Gras Floating-Point Arithmetic---which means in particular that the conversion
32942fe8fb19SBen Gras is rounded according to the current rounding mode. If `a' is a NaN, the
32952fe8fb19SBen Gras largest positive integer is returned. Otherwise, if the conversion
32962fe8fb19SBen Gras overflows, the largest integer with the same sign as `a' is returned.
32972fe8fb19SBen Gras -------------------------------------------------------------------------------
32982fe8fb19SBen Gras */
floatx80_to_int32(floatx80 a)32992fe8fb19SBen Gras int32 floatx80_to_int32( floatx80 a )
33002fe8fb19SBen Gras {
33012fe8fb19SBen Gras flag aSign;
33022fe8fb19SBen Gras int32 aExp, shiftCount;
33032fe8fb19SBen Gras bits64 aSig;
33042fe8fb19SBen Gras
33052fe8fb19SBen Gras aSig = extractFloatx80Frac( a );
33062fe8fb19SBen Gras aExp = extractFloatx80Exp( a );
33072fe8fb19SBen Gras aSign = extractFloatx80Sign( a );
33082fe8fb19SBen Gras if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
33092fe8fb19SBen Gras shiftCount = 0x4037 - aExp;
33102fe8fb19SBen Gras if ( shiftCount <= 0 ) shiftCount = 1;
33112fe8fb19SBen Gras shift64RightJamming( aSig, shiftCount, &aSig );
33122fe8fb19SBen Gras return roundAndPackInt32( aSign, aSig );
33132fe8fb19SBen Gras
33142fe8fb19SBen Gras }
33152fe8fb19SBen Gras
33162fe8fb19SBen Gras /*
33172fe8fb19SBen Gras -------------------------------------------------------------------------------
33182fe8fb19SBen Gras Returns the result of converting the extended double-precision floating-
33192fe8fb19SBen Gras point value `a' to the 32-bit two's complement integer format. The
33202fe8fb19SBen Gras conversion is performed according to the IEC/IEEE Standard for Binary
33212fe8fb19SBen Gras Floating-Point Arithmetic, except that the conversion is always rounded
33222fe8fb19SBen Gras toward zero. If `a' is a NaN, the largest positive integer is returned.
33232fe8fb19SBen Gras Otherwise, if the conversion overflows, the largest integer with the same
33242fe8fb19SBen Gras sign as `a' is returned.
33252fe8fb19SBen Gras -------------------------------------------------------------------------------
33262fe8fb19SBen Gras */
floatx80_to_int32_round_to_zero(floatx80 a)33272fe8fb19SBen Gras int32 floatx80_to_int32_round_to_zero( floatx80 a )
33282fe8fb19SBen Gras {
33292fe8fb19SBen Gras flag aSign;
33302fe8fb19SBen Gras int32 aExp, shiftCount;
33312fe8fb19SBen Gras bits64 aSig, savedASig;
33322fe8fb19SBen Gras int32 z;
33332fe8fb19SBen Gras
33342fe8fb19SBen Gras aSig = extractFloatx80Frac( a );
33352fe8fb19SBen Gras aExp = extractFloatx80Exp( a );
33362fe8fb19SBen Gras aSign = extractFloatx80Sign( a );
33372fe8fb19SBen Gras if ( 0x401E < aExp ) {
33382fe8fb19SBen Gras if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
33392fe8fb19SBen Gras goto invalid;
33402fe8fb19SBen Gras }
33412fe8fb19SBen Gras else if ( aExp < 0x3FFF ) {
3342*84d9c625SLionel Sambuc if ( aExp || aSig ) set_float_exception_inexact_flag();
33432fe8fb19SBen Gras return 0;
33442fe8fb19SBen Gras }
33452fe8fb19SBen Gras shiftCount = 0x403E - aExp;
33462fe8fb19SBen Gras savedASig = aSig;
33472fe8fb19SBen Gras aSig >>= shiftCount;
33482fe8fb19SBen Gras z = aSig;
33492fe8fb19SBen Gras if ( aSign ) z = - z;
33502fe8fb19SBen Gras if ( ( z < 0 ) ^ aSign ) {
33512fe8fb19SBen Gras invalid:
33522fe8fb19SBen Gras float_raise( float_flag_invalid );
33532fe8fb19SBen Gras return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
33542fe8fb19SBen Gras }
33552fe8fb19SBen Gras if ( ( aSig<<shiftCount ) != savedASig ) {
3356*84d9c625SLionel Sambuc set_float_exception_inexact_flag();
33572fe8fb19SBen Gras }
33582fe8fb19SBen Gras return z;
33592fe8fb19SBen Gras
33602fe8fb19SBen Gras }
33612fe8fb19SBen Gras
33622fe8fb19SBen Gras /*
33632fe8fb19SBen Gras -------------------------------------------------------------------------------
33642fe8fb19SBen Gras Returns the result of converting the extended double-precision floating-
33652fe8fb19SBen Gras point value `a' to the 64-bit two's complement integer format. The
33662fe8fb19SBen Gras conversion is performed according to the IEC/IEEE Standard for Binary
33672fe8fb19SBen Gras Floating-Point Arithmetic---which means in particular that the conversion
33682fe8fb19SBen Gras is rounded according to the current rounding mode. If `a' is a NaN,
33692fe8fb19SBen Gras the largest positive integer is returned. Otherwise, if the conversion
33702fe8fb19SBen Gras overflows, the largest integer with the same sign as `a' is returned.
33712fe8fb19SBen Gras -------------------------------------------------------------------------------
33722fe8fb19SBen Gras */
floatx80_to_int64(floatx80 a)33732fe8fb19SBen Gras int64 floatx80_to_int64( floatx80 a )
33742fe8fb19SBen Gras {
33752fe8fb19SBen Gras flag aSign;
33762fe8fb19SBen Gras int32 aExp, shiftCount;
33772fe8fb19SBen Gras bits64 aSig, aSigExtra;
33782fe8fb19SBen Gras
33792fe8fb19SBen Gras aSig = extractFloatx80Frac( a );
33802fe8fb19SBen Gras aExp = extractFloatx80Exp( a );
33812fe8fb19SBen Gras aSign = extractFloatx80Sign( a );
33822fe8fb19SBen Gras shiftCount = 0x403E - aExp;
33832fe8fb19SBen Gras if ( shiftCount <= 0 ) {
33842fe8fb19SBen Gras if ( shiftCount ) {
33852fe8fb19SBen Gras float_raise( float_flag_invalid );
33862fe8fb19SBen Gras if ( ! aSign
33872fe8fb19SBen Gras || ( ( aExp == 0x7FFF )
33882fe8fb19SBen Gras && ( aSig != LIT64( 0x8000000000000000 ) ) )
33892fe8fb19SBen Gras ) {
33902fe8fb19SBen Gras return LIT64( 0x7FFFFFFFFFFFFFFF );
33912fe8fb19SBen Gras }
33922fe8fb19SBen Gras return (sbits64) LIT64( 0x8000000000000000 );
33932fe8fb19SBen Gras }
33942fe8fb19SBen Gras aSigExtra = 0;
33952fe8fb19SBen Gras }
33962fe8fb19SBen Gras else {
33972fe8fb19SBen Gras shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
33982fe8fb19SBen Gras }
33992fe8fb19SBen Gras return roundAndPackInt64( aSign, aSig, aSigExtra );
34002fe8fb19SBen Gras
34012fe8fb19SBen Gras }
34022fe8fb19SBen Gras
34032fe8fb19SBen Gras /*
34042fe8fb19SBen Gras -------------------------------------------------------------------------------
34052fe8fb19SBen Gras Returns the result of converting the extended double-precision floating-
34062fe8fb19SBen Gras point value `a' to the 64-bit two's complement integer format. The
34072fe8fb19SBen Gras conversion is performed according to the IEC/IEEE Standard for Binary
34082fe8fb19SBen Gras Floating-Point Arithmetic, except that the conversion is always rounded
34092fe8fb19SBen Gras toward zero. If `a' is a NaN, the largest positive integer is returned.
34102fe8fb19SBen Gras Otherwise, if the conversion overflows, the largest integer with the same
34112fe8fb19SBen Gras sign as `a' is returned.
34122fe8fb19SBen Gras -------------------------------------------------------------------------------
34132fe8fb19SBen Gras */
floatx80_to_int64_round_to_zero(floatx80 a)34142fe8fb19SBen Gras int64 floatx80_to_int64_round_to_zero( floatx80 a )
34152fe8fb19SBen Gras {
34162fe8fb19SBen Gras flag aSign;
34172fe8fb19SBen Gras int32 aExp, shiftCount;
34182fe8fb19SBen Gras bits64 aSig;
34192fe8fb19SBen Gras int64 z;
34202fe8fb19SBen Gras
34212fe8fb19SBen Gras aSig = extractFloatx80Frac( a );
34222fe8fb19SBen Gras aExp = extractFloatx80Exp( a );
34232fe8fb19SBen Gras aSign = extractFloatx80Sign( a );
34242fe8fb19SBen Gras shiftCount = aExp - 0x403E;
34252fe8fb19SBen Gras if ( 0 <= shiftCount ) {
34262fe8fb19SBen Gras aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
34272fe8fb19SBen Gras if ( ( a.high != 0xC03E ) || aSig ) {
34282fe8fb19SBen Gras float_raise( float_flag_invalid );
34292fe8fb19SBen Gras if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
34302fe8fb19SBen Gras return LIT64( 0x7FFFFFFFFFFFFFFF );
34312fe8fb19SBen Gras }
34322fe8fb19SBen Gras }
34332fe8fb19SBen Gras return (sbits64) LIT64( 0x8000000000000000 );
34342fe8fb19SBen Gras }
34352fe8fb19SBen Gras else if ( aExp < 0x3FFF ) {
3436*84d9c625SLionel Sambuc if ( aExp | aSig ) set_float_exception_inexact_flag();
34372fe8fb19SBen Gras return 0;
34382fe8fb19SBen Gras }
34392fe8fb19SBen Gras z = aSig>>( - shiftCount );
34402fe8fb19SBen Gras if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
3441*84d9c625SLionel Sambuc set_float_exception_inexact_flag();
34422fe8fb19SBen Gras }
34432fe8fb19SBen Gras if ( aSign ) z = - z;
34442fe8fb19SBen Gras return z;
34452fe8fb19SBen Gras
34462fe8fb19SBen Gras }
34472fe8fb19SBen Gras
34482fe8fb19SBen Gras /*
34492fe8fb19SBen Gras -------------------------------------------------------------------------------
34502fe8fb19SBen Gras Returns the result of converting the extended double-precision floating-
34512fe8fb19SBen Gras point value `a' to the single-precision floating-point format. The
34522fe8fb19SBen Gras conversion is performed according to the IEC/IEEE Standard for Binary
34532fe8fb19SBen Gras Floating-Point Arithmetic.
34542fe8fb19SBen Gras -------------------------------------------------------------------------------
34552fe8fb19SBen Gras */
floatx80_to_float32(floatx80 a)34562fe8fb19SBen Gras float32 floatx80_to_float32( floatx80 a )
34572fe8fb19SBen Gras {
34582fe8fb19SBen Gras flag aSign;
34592fe8fb19SBen Gras int32 aExp;
34602fe8fb19SBen Gras bits64 aSig;
34612fe8fb19SBen Gras
34622fe8fb19SBen Gras aSig = extractFloatx80Frac( a );
34632fe8fb19SBen Gras aExp = extractFloatx80Exp( a );
34642fe8fb19SBen Gras aSign = extractFloatx80Sign( a );
34652fe8fb19SBen Gras if ( aExp == 0x7FFF ) {
34662fe8fb19SBen Gras if ( (bits64) ( aSig<<1 ) ) {
34672fe8fb19SBen Gras return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
34682fe8fb19SBen Gras }
34692fe8fb19SBen Gras return packFloat32( aSign, 0xFF, 0 );
34702fe8fb19SBen Gras }
34712fe8fb19SBen Gras shift64RightJamming( aSig, 33, &aSig );
34722fe8fb19SBen Gras if ( aExp || aSig ) aExp -= 0x3F81;
34732fe8fb19SBen Gras return roundAndPackFloat32( aSign, aExp, aSig );
34742fe8fb19SBen Gras
34752fe8fb19SBen Gras }
34762fe8fb19SBen Gras
34772fe8fb19SBen Gras /*
34782fe8fb19SBen Gras -------------------------------------------------------------------------------
34792fe8fb19SBen Gras Returns the result of converting the extended double-precision floating-
34802fe8fb19SBen Gras point value `a' to the double-precision floating-point format. The
34812fe8fb19SBen Gras conversion is performed according to the IEC/IEEE Standard for Binary
34822fe8fb19SBen Gras Floating-Point Arithmetic.
34832fe8fb19SBen Gras -------------------------------------------------------------------------------
34842fe8fb19SBen Gras */
floatx80_to_float64(floatx80 a)34852fe8fb19SBen Gras float64 floatx80_to_float64( floatx80 a )
34862fe8fb19SBen Gras {
34872fe8fb19SBen Gras flag aSign;
34882fe8fb19SBen Gras int32 aExp;
34892fe8fb19SBen Gras bits64 aSig, zSig;
34902fe8fb19SBen Gras
34912fe8fb19SBen Gras aSig = extractFloatx80Frac( a );
34922fe8fb19SBen Gras aExp = extractFloatx80Exp( a );
34932fe8fb19SBen Gras aSign = extractFloatx80Sign( a );
34942fe8fb19SBen Gras if ( aExp == 0x7FFF ) {
34952fe8fb19SBen Gras if ( (bits64) ( aSig<<1 ) ) {
34962fe8fb19SBen Gras return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
34972fe8fb19SBen Gras }
34982fe8fb19SBen Gras return packFloat64( aSign, 0x7FF, 0 );
34992fe8fb19SBen Gras }
35002fe8fb19SBen Gras shift64RightJamming( aSig, 1, &zSig );
35012fe8fb19SBen Gras if ( aExp || aSig ) aExp -= 0x3C01;
35022fe8fb19SBen Gras return roundAndPackFloat64( aSign, aExp, zSig );
35032fe8fb19SBen Gras
35042fe8fb19SBen Gras }
35052fe8fb19SBen Gras
35062fe8fb19SBen Gras #ifdef FLOAT128
35072fe8fb19SBen Gras
35082fe8fb19SBen Gras /*
35092fe8fb19SBen Gras -------------------------------------------------------------------------------
35102fe8fb19SBen Gras Returns the result of converting the extended double-precision floating-
35112fe8fb19SBen Gras point value `a' to the quadruple-precision floating-point format. The
35122fe8fb19SBen Gras conversion is performed according to the IEC/IEEE Standard for Binary
35132fe8fb19SBen Gras Floating-Point Arithmetic.
35142fe8fb19SBen Gras -------------------------------------------------------------------------------
35152fe8fb19SBen Gras */
floatx80_to_float128(floatx80 a)35162fe8fb19SBen Gras float128 floatx80_to_float128( floatx80 a )
35172fe8fb19SBen Gras {
35182fe8fb19SBen Gras flag aSign;
35192fe8fb19SBen Gras int16 aExp;
35202fe8fb19SBen Gras bits64 aSig, zSig0, zSig1;
35212fe8fb19SBen Gras
35222fe8fb19SBen Gras aSig = extractFloatx80Frac( a );
35232fe8fb19SBen Gras aExp = extractFloatx80Exp( a );
35242fe8fb19SBen Gras aSign = extractFloatx80Sign( a );
35252fe8fb19SBen Gras if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
35262fe8fb19SBen Gras return commonNaNToFloat128( floatx80ToCommonNaN( a ) );
35272fe8fb19SBen Gras }
35282fe8fb19SBen Gras shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
35292fe8fb19SBen Gras return packFloat128( aSign, aExp, zSig0, zSig1 );
35302fe8fb19SBen Gras
35312fe8fb19SBen Gras }
35322fe8fb19SBen Gras
35332fe8fb19SBen Gras #endif
35342fe8fb19SBen Gras
35352fe8fb19SBen Gras /*
35362fe8fb19SBen Gras -------------------------------------------------------------------------------
35372fe8fb19SBen Gras Rounds the extended double-precision floating-point value `a' to an integer,
35382fe8fb19SBen Gras and returns the result as an extended quadruple-precision floating-point
35392fe8fb19SBen Gras value. The operation is performed according to the IEC/IEEE Standard for
35402fe8fb19SBen Gras Binary Floating-Point Arithmetic.
35412fe8fb19SBen Gras -------------------------------------------------------------------------------
35422fe8fb19SBen Gras */
floatx80_round_to_int(floatx80 a)35432fe8fb19SBen Gras floatx80 floatx80_round_to_int( floatx80 a )
35442fe8fb19SBen Gras {
35452fe8fb19SBen Gras flag aSign;
35462fe8fb19SBen Gras int32 aExp;
35472fe8fb19SBen Gras bits64 lastBitMask, roundBitsMask;
35482fe8fb19SBen Gras int8 roundingMode;
35492fe8fb19SBen Gras floatx80 z;
35502fe8fb19SBen Gras
35512fe8fb19SBen Gras aExp = extractFloatx80Exp( a );
35522fe8fb19SBen Gras if ( 0x403E <= aExp ) {
35532fe8fb19SBen Gras if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
35542fe8fb19SBen Gras return propagateFloatx80NaN( a, a );
35552fe8fb19SBen Gras }
35562fe8fb19SBen Gras return a;
35572fe8fb19SBen Gras }
35582fe8fb19SBen Gras if ( aExp < 0x3FFF ) {
35592fe8fb19SBen Gras if ( ( aExp == 0 )
35602fe8fb19SBen Gras && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
35612fe8fb19SBen Gras return a;
35622fe8fb19SBen Gras }
3563*84d9c625SLionel Sambuc set_float_exception_inexact_flag();
35642fe8fb19SBen Gras aSign = extractFloatx80Sign( a );
35652fe8fb19SBen Gras switch ( float_rounding_mode ) {
35662fe8fb19SBen Gras case float_round_nearest_even:
35672fe8fb19SBen Gras if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
35682fe8fb19SBen Gras ) {
35692fe8fb19SBen Gras return
35702fe8fb19SBen Gras packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
35712fe8fb19SBen Gras }
35722fe8fb19SBen Gras break;
35732fe8fb19SBen Gras case float_round_to_zero:
35742fe8fb19SBen Gras break;
35752fe8fb19SBen Gras case float_round_down:
35762fe8fb19SBen Gras return
35772fe8fb19SBen Gras aSign ?
35782fe8fb19SBen Gras packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
35792fe8fb19SBen Gras : packFloatx80( 0, 0, 0 );
35802fe8fb19SBen Gras case float_round_up:
35812fe8fb19SBen Gras return
35822fe8fb19SBen Gras aSign ? packFloatx80( 1, 0, 0 )
35832fe8fb19SBen Gras : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
35842fe8fb19SBen Gras }
35852fe8fb19SBen Gras return packFloatx80( aSign, 0, 0 );
35862fe8fb19SBen Gras }
35872fe8fb19SBen Gras lastBitMask = 1;
35882fe8fb19SBen Gras lastBitMask <<= 0x403E - aExp;
35892fe8fb19SBen Gras roundBitsMask = lastBitMask - 1;
35902fe8fb19SBen Gras z = a;
35912fe8fb19SBen Gras roundingMode = float_rounding_mode;
35922fe8fb19SBen Gras if ( roundingMode == float_round_nearest_even ) {
35932fe8fb19SBen Gras z.low += lastBitMask>>1;
35942fe8fb19SBen Gras if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
35952fe8fb19SBen Gras }
35962fe8fb19SBen Gras else if ( roundingMode != float_round_to_zero ) {
35972fe8fb19SBen Gras if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
35982fe8fb19SBen Gras z.low += roundBitsMask;
35992fe8fb19SBen Gras }
36002fe8fb19SBen Gras }
36012fe8fb19SBen Gras z.low &= ~ roundBitsMask;
36022fe8fb19SBen Gras if ( z.low == 0 ) {
36032fe8fb19SBen Gras ++z.high;
36042fe8fb19SBen Gras z.low = LIT64( 0x8000000000000000 );
36052fe8fb19SBen Gras }
3606*84d9c625SLionel Sambuc if ( z.low != a.low ) set_float_exception_inexact_flag();
36072fe8fb19SBen Gras return z;
36082fe8fb19SBen Gras
36092fe8fb19SBen Gras }
36102fe8fb19SBen Gras
36112fe8fb19SBen Gras /*
36122fe8fb19SBen Gras -------------------------------------------------------------------------------
36132fe8fb19SBen Gras Returns the result of adding the absolute values of the extended double-
36142fe8fb19SBen Gras precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
36152fe8fb19SBen Gras negated before being returned. `zSign' is ignored if the result is a NaN.
36162fe8fb19SBen Gras The addition is performed according to the IEC/IEEE Standard for Binary
36172fe8fb19SBen Gras Floating-Point Arithmetic.
36182fe8fb19SBen Gras -------------------------------------------------------------------------------
36192fe8fb19SBen Gras */
addFloatx80Sigs(floatx80 a,floatx80 b,flag zSign)36202fe8fb19SBen Gras static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
36212fe8fb19SBen Gras {
36222fe8fb19SBen Gras int32 aExp, bExp, zExp;
36232fe8fb19SBen Gras bits64 aSig, bSig, zSig0, zSig1;
36242fe8fb19SBen Gras int32 expDiff;
36252fe8fb19SBen Gras
36262fe8fb19SBen Gras aSig = extractFloatx80Frac( a );
36272fe8fb19SBen Gras aExp = extractFloatx80Exp( a );
36282fe8fb19SBen Gras bSig = extractFloatx80Frac( b );
36292fe8fb19SBen Gras bExp = extractFloatx80Exp( b );
36302fe8fb19SBen Gras expDiff = aExp - bExp;
36312fe8fb19SBen Gras if ( 0 < expDiff ) {
36322fe8fb19SBen Gras if ( aExp == 0x7FFF ) {
36332fe8fb19SBen Gras if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
36342fe8fb19SBen Gras return a;
36352fe8fb19SBen Gras }
36362fe8fb19SBen Gras if ( bExp == 0 ) --expDiff;
36372fe8fb19SBen Gras shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
36382fe8fb19SBen Gras zExp = aExp;
36392fe8fb19SBen Gras }
36402fe8fb19SBen Gras else if ( expDiff < 0 ) {
36412fe8fb19SBen Gras if ( bExp == 0x7FFF ) {
36422fe8fb19SBen Gras if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
36432fe8fb19SBen Gras return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
36442fe8fb19SBen Gras }
36452fe8fb19SBen Gras if ( aExp == 0 ) ++expDiff;
36462fe8fb19SBen Gras shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
36472fe8fb19SBen Gras zExp = bExp;
36482fe8fb19SBen Gras }
36492fe8fb19SBen Gras else {
36502fe8fb19SBen Gras if ( aExp == 0x7FFF ) {
36512fe8fb19SBen Gras if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
36522fe8fb19SBen Gras return propagateFloatx80NaN( a, b );
36532fe8fb19SBen Gras }
36542fe8fb19SBen Gras return a;
36552fe8fb19SBen Gras }
36562fe8fb19SBen Gras zSig1 = 0;
36572fe8fb19SBen Gras zSig0 = aSig + bSig;
36582fe8fb19SBen Gras if ( aExp == 0 ) {
36592fe8fb19SBen Gras normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
36602fe8fb19SBen Gras goto roundAndPack;
36612fe8fb19SBen Gras }
36622fe8fb19SBen Gras zExp = aExp;
36632fe8fb19SBen Gras goto shiftRight1;
36642fe8fb19SBen Gras }
36652fe8fb19SBen Gras zSig0 = aSig + bSig;
36662fe8fb19SBen Gras if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
36672fe8fb19SBen Gras shiftRight1:
36682fe8fb19SBen Gras shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
36692fe8fb19SBen Gras zSig0 |= LIT64( 0x8000000000000000 );
36702fe8fb19SBen Gras ++zExp;
36712fe8fb19SBen Gras roundAndPack:
36722fe8fb19SBen Gras return
36732fe8fb19SBen Gras roundAndPackFloatx80(
36742fe8fb19SBen Gras floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
36752fe8fb19SBen Gras
36762fe8fb19SBen Gras }
36772fe8fb19SBen Gras
36782fe8fb19SBen Gras /*
36792fe8fb19SBen Gras -------------------------------------------------------------------------------
36802fe8fb19SBen Gras Returns the result of subtracting the absolute values of the extended
36812fe8fb19SBen Gras double-precision floating-point values `a' and `b'. If `zSign' is 1, the
36822fe8fb19SBen Gras difference is negated before being returned. `zSign' is ignored if the
36832fe8fb19SBen Gras result is a NaN. The subtraction is performed according to the IEC/IEEE
36842fe8fb19SBen Gras Standard for Binary Floating-Point Arithmetic.
36852fe8fb19SBen Gras -------------------------------------------------------------------------------
36862fe8fb19SBen Gras */
subFloatx80Sigs(floatx80 a,floatx80 b,flag zSign)36872fe8fb19SBen Gras static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
36882fe8fb19SBen Gras {
36892fe8fb19SBen Gras int32 aExp, bExp, zExp;
36902fe8fb19SBen Gras bits64 aSig, bSig, zSig0, zSig1;
36912fe8fb19SBen Gras int32 expDiff;
36922fe8fb19SBen Gras floatx80 z;
36932fe8fb19SBen Gras
36942fe8fb19SBen Gras aSig = extractFloatx80Frac( a );
36952fe8fb19SBen Gras aExp = extractFloatx80Exp( a );
36962fe8fb19SBen Gras bSig = extractFloatx80Frac( b );
36972fe8fb19SBen Gras bExp = extractFloatx80Exp( b );
36982fe8fb19SBen Gras expDiff = aExp - bExp;
36992fe8fb19SBen Gras if ( 0 < expDiff ) goto aExpBigger;
37002fe8fb19SBen Gras if ( expDiff < 0 ) goto bExpBigger;
37012fe8fb19SBen Gras if ( aExp == 0x7FFF ) {
37022fe8fb19SBen Gras if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
37032fe8fb19SBen Gras return propagateFloatx80NaN( a, b );
37042fe8fb19SBen Gras }
37052fe8fb19SBen Gras float_raise( float_flag_invalid );
37062fe8fb19SBen Gras z.low = floatx80_default_nan_low;
37072fe8fb19SBen Gras z.high = floatx80_default_nan_high;
37082fe8fb19SBen Gras return z;
37092fe8fb19SBen Gras }
37102fe8fb19SBen Gras if ( aExp == 0 ) {
37112fe8fb19SBen Gras aExp = 1;
37122fe8fb19SBen Gras bExp = 1;
37132fe8fb19SBen Gras }
37142fe8fb19SBen Gras zSig1 = 0;
37152fe8fb19SBen Gras if ( bSig < aSig ) goto aBigger;
37162fe8fb19SBen Gras if ( aSig < bSig ) goto bBigger;
37172fe8fb19SBen Gras return packFloatx80( float_rounding_mode == float_round_down, 0, 0 );
37182fe8fb19SBen Gras bExpBigger:
37192fe8fb19SBen Gras if ( bExp == 0x7FFF ) {
37202fe8fb19SBen Gras if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
37212fe8fb19SBen Gras return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
37222fe8fb19SBen Gras }
37232fe8fb19SBen Gras if ( aExp == 0 ) ++expDiff;
37242fe8fb19SBen Gras shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
37252fe8fb19SBen Gras bBigger:
37262fe8fb19SBen Gras sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
37272fe8fb19SBen Gras zExp = bExp;
37282fe8fb19SBen Gras zSign ^= 1;
37292fe8fb19SBen Gras goto normalizeRoundAndPack;
37302fe8fb19SBen Gras aExpBigger:
37312fe8fb19SBen Gras if ( aExp == 0x7FFF ) {
37322fe8fb19SBen Gras if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
37332fe8fb19SBen Gras return a;
37342fe8fb19SBen Gras }
37352fe8fb19SBen Gras if ( bExp == 0 ) --expDiff;
37362fe8fb19SBen Gras shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
37372fe8fb19SBen Gras aBigger:
37382fe8fb19SBen Gras sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
37392fe8fb19SBen Gras zExp = aExp;
37402fe8fb19SBen Gras normalizeRoundAndPack:
37412fe8fb19SBen Gras return
37422fe8fb19SBen Gras normalizeRoundAndPackFloatx80(
37432fe8fb19SBen Gras floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
37442fe8fb19SBen Gras
37452fe8fb19SBen Gras }
37462fe8fb19SBen Gras
37472fe8fb19SBen Gras /*
37482fe8fb19SBen Gras -------------------------------------------------------------------------------
37492fe8fb19SBen Gras Returns the result of adding the extended double-precision floating-point
37502fe8fb19SBen Gras values `a' and `b'. The operation is performed according to the IEC/IEEE
37512fe8fb19SBen Gras Standard for Binary Floating-Point Arithmetic.
37522fe8fb19SBen Gras -------------------------------------------------------------------------------
37532fe8fb19SBen Gras */
floatx80_add(floatx80 a,floatx80 b)37542fe8fb19SBen Gras floatx80 floatx80_add( floatx80 a, floatx80 b )
37552fe8fb19SBen Gras {
37562fe8fb19SBen Gras flag aSign, bSign;
37572fe8fb19SBen Gras
37582fe8fb19SBen Gras aSign = extractFloatx80Sign( a );
37592fe8fb19SBen Gras bSign = extractFloatx80Sign( b );
37602fe8fb19SBen Gras if ( aSign == bSign ) {
37612fe8fb19SBen Gras return addFloatx80Sigs( a, b, aSign );
37622fe8fb19SBen Gras }
37632fe8fb19SBen Gras else {
37642fe8fb19SBen Gras return subFloatx80Sigs( a, b, aSign );
37652fe8fb19SBen Gras }
37662fe8fb19SBen Gras
37672fe8fb19SBen Gras }
37682fe8fb19SBen Gras
37692fe8fb19SBen Gras /*
37702fe8fb19SBen Gras -------------------------------------------------------------------------------
37712fe8fb19SBen Gras Returns the result of subtracting the extended double-precision floating-
37722fe8fb19SBen Gras point values `a' and `b'. The operation is performed according to the
37732fe8fb19SBen Gras IEC/IEEE Standard for Binary Floating-Point Arithmetic.
37742fe8fb19SBen Gras -------------------------------------------------------------------------------
37752fe8fb19SBen Gras */
floatx80_sub(floatx80 a,floatx80 b)37762fe8fb19SBen Gras floatx80 floatx80_sub( floatx80 a, floatx80 b )
37772fe8fb19SBen Gras {
37782fe8fb19SBen Gras flag aSign, bSign;
37792fe8fb19SBen Gras
37802fe8fb19SBen Gras aSign = extractFloatx80Sign( a );
37812fe8fb19SBen Gras bSign = extractFloatx80Sign( b );
37822fe8fb19SBen Gras if ( aSign == bSign ) {
37832fe8fb19SBen Gras return subFloatx80Sigs( a, b, aSign );
37842fe8fb19SBen Gras }
37852fe8fb19SBen Gras else {
37862fe8fb19SBen Gras return addFloatx80Sigs( a, b, aSign );
37872fe8fb19SBen Gras }
37882fe8fb19SBen Gras
37892fe8fb19SBen Gras }
37902fe8fb19SBen Gras
37912fe8fb19SBen Gras /*
37922fe8fb19SBen Gras -------------------------------------------------------------------------------
37932fe8fb19SBen Gras Returns the result of multiplying the extended double-precision floating-
37942fe8fb19SBen Gras point values `a' and `b'. The operation is performed according to the
37952fe8fb19SBen Gras IEC/IEEE Standard for Binary Floating-Point Arithmetic.
37962fe8fb19SBen Gras -------------------------------------------------------------------------------
37972fe8fb19SBen Gras */
floatx80_mul(floatx80 a,floatx80 b)37982fe8fb19SBen Gras floatx80 floatx80_mul( floatx80 a, floatx80 b )
37992fe8fb19SBen Gras {
38002fe8fb19SBen Gras flag aSign, bSign, zSign;
38012fe8fb19SBen Gras int32 aExp, bExp, zExp;
38022fe8fb19SBen Gras bits64 aSig, bSig, zSig0, zSig1;
38032fe8fb19SBen Gras floatx80 z;
38042fe8fb19SBen Gras
38052fe8fb19SBen Gras aSig = extractFloatx80Frac( a );
38062fe8fb19SBen Gras aExp = extractFloatx80Exp( a );
38072fe8fb19SBen Gras aSign = extractFloatx80Sign( a );
38082fe8fb19SBen Gras bSig = extractFloatx80Frac( b );
38092fe8fb19SBen Gras bExp = extractFloatx80Exp( b );
38102fe8fb19SBen Gras bSign = extractFloatx80Sign( b );
38112fe8fb19SBen Gras zSign = aSign ^ bSign;
38122fe8fb19SBen Gras if ( aExp == 0x7FFF ) {
38132fe8fb19SBen Gras if ( (bits64) ( aSig<<1 )
38142fe8fb19SBen Gras || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
38152fe8fb19SBen Gras return propagateFloatx80NaN( a, b );
38162fe8fb19SBen Gras }
38172fe8fb19SBen Gras if ( ( bExp | bSig ) == 0 ) goto invalid;
38182fe8fb19SBen Gras return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
38192fe8fb19SBen Gras }
38202fe8fb19SBen Gras if ( bExp == 0x7FFF ) {
38212fe8fb19SBen Gras if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
38222fe8fb19SBen Gras if ( ( aExp | aSig ) == 0 ) {
38232fe8fb19SBen Gras invalid:
38242fe8fb19SBen Gras float_raise( float_flag_invalid );
38252fe8fb19SBen Gras z.low = floatx80_default_nan_low;
38262fe8fb19SBen Gras z.high = floatx80_default_nan_high;
38272fe8fb19SBen Gras return z;
38282fe8fb19SBen Gras }
38292fe8fb19SBen Gras return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
38302fe8fb19SBen Gras }
38312fe8fb19SBen Gras if ( aExp == 0 ) {
38322fe8fb19SBen Gras if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
38332fe8fb19SBen Gras normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
38342fe8fb19SBen Gras }
38352fe8fb19SBen Gras if ( bExp == 0 ) {
38362fe8fb19SBen Gras if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
38372fe8fb19SBen Gras normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
38382fe8fb19SBen Gras }
38392fe8fb19SBen Gras zExp = aExp + bExp - 0x3FFE;
38402fe8fb19SBen Gras mul64To128( aSig, bSig, &zSig0, &zSig1 );
38412fe8fb19SBen Gras if ( 0 < (sbits64) zSig0 ) {
38422fe8fb19SBen Gras shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
38432fe8fb19SBen Gras --zExp;
38442fe8fb19SBen Gras }
38452fe8fb19SBen Gras return
38462fe8fb19SBen Gras roundAndPackFloatx80(
38472fe8fb19SBen Gras floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
38482fe8fb19SBen Gras
38492fe8fb19SBen Gras }
38502fe8fb19SBen Gras
38512fe8fb19SBen Gras /*
38522fe8fb19SBen Gras -------------------------------------------------------------------------------
38532fe8fb19SBen Gras Returns the result of dividing the extended double-precision floating-point
38542fe8fb19SBen Gras value `a' by the corresponding value `b'. The operation is performed
38552fe8fb19SBen Gras according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
38562fe8fb19SBen Gras -------------------------------------------------------------------------------
38572fe8fb19SBen Gras */
floatx80_div(floatx80 a,floatx80 b)38582fe8fb19SBen Gras floatx80 floatx80_div( floatx80 a, floatx80 b )
38592fe8fb19SBen Gras {
38602fe8fb19SBen Gras flag aSign, bSign, zSign;
38612fe8fb19SBen Gras int32 aExp, bExp, zExp;
38622fe8fb19SBen Gras bits64 aSig, bSig, zSig0, zSig1;
38632fe8fb19SBen Gras bits64 rem0, rem1, rem2, term0, term1, term2;
38642fe8fb19SBen Gras floatx80 z;
38652fe8fb19SBen Gras
38662fe8fb19SBen Gras aSig = extractFloatx80Frac( a );
38672fe8fb19SBen Gras aExp = extractFloatx80Exp( a );
38682fe8fb19SBen Gras aSign = extractFloatx80Sign( a );
38692fe8fb19SBen Gras bSig = extractFloatx80Frac( b );
38702fe8fb19SBen Gras bExp = extractFloatx80Exp( b );
38712fe8fb19SBen Gras bSign = extractFloatx80Sign( b );
38722fe8fb19SBen Gras zSign = aSign ^ bSign;
38732fe8fb19SBen Gras if ( aExp == 0x7FFF ) {
38742fe8fb19SBen Gras if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
38752fe8fb19SBen Gras if ( bExp == 0x7FFF ) {
38762fe8fb19SBen Gras if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
38772fe8fb19SBen Gras goto invalid;
38782fe8fb19SBen Gras }
38792fe8fb19SBen Gras return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
38802fe8fb19SBen Gras }
38812fe8fb19SBen Gras if ( bExp == 0x7FFF ) {
38822fe8fb19SBen Gras if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
38832fe8fb19SBen Gras return packFloatx80( zSign, 0, 0 );
38842fe8fb19SBen Gras }
38852fe8fb19SBen Gras if ( bExp == 0 ) {
38862fe8fb19SBen Gras if ( bSig == 0 ) {
38872fe8fb19SBen Gras if ( ( aExp | aSig ) == 0 ) {
38882fe8fb19SBen Gras invalid:
38892fe8fb19SBen Gras float_raise( float_flag_invalid );
38902fe8fb19SBen Gras z.low = floatx80_default_nan_low;
38912fe8fb19SBen Gras z.high = floatx80_default_nan_high;
38922fe8fb19SBen Gras return z;
38932fe8fb19SBen Gras }
38942fe8fb19SBen Gras float_raise( float_flag_divbyzero );
38952fe8fb19SBen Gras return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
38962fe8fb19SBen Gras }
38972fe8fb19SBen Gras normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
38982fe8fb19SBen Gras }
38992fe8fb19SBen Gras if ( aExp == 0 ) {
39002fe8fb19SBen Gras if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
39012fe8fb19SBen Gras normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
39022fe8fb19SBen Gras }
39032fe8fb19SBen Gras zExp = aExp - bExp + 0x3FFE;
39042fe8fb19SBen Gras rem1 = 0;
39052fe8fb19SBen Gras if ( bSig <= aSig ) {
39062fe8fb19SBen Gras shift128Right( aSig, 0, 1, &aSig, &rem1 );
39072fe8fb19SBen Gras ++zExp;
39082fe8fb19SBen Gras }
39092fe8fb19SBen Gras zSig0 = estimateDiv128To64( aSig, rem1, bSig );
39102fe8fb19SBen Gras mul64To128( bSig, zSig0, &term0, &term1 );
39112fe8fb19SBen Gras sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
39122fe8fb19SBen Gras while ( (sbits64) rem0 < 0 ) {
39132fe8fb19SBen Gras --zSig0;
39142fe8fb19SBen Gras add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
39152fe8fb19SBen Gras }
39162fe8fb19SBen Gras zSig1 = estimateDiv128To64( rem1, 0, bSig );
39172fe8fb19SBen Gras if ( (bits64) ( zSig1<<1 ) <= 8 ) {
39182fe8fb19SBen Gras mul64To128( bSig, zSig1, &term1, &term2 );
39192fe8fb19SBen Gras sub128( rem1, 0, term1, term2, &rem1, &rem2 );
39202fe8fb19SBen Gras while ( (sbits64) rem1 < 0 ) {
39212fe8fb19SBen Gras --zSig1;
39222fe8fb19SBen Gras add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
39232fe8fb19SBen Gras }
39242fe8fb19SBen Gras zSig1 |= ( ( rem1 | rem2 ) != 0 );
39252fe8fb19SBen Gras }
39262fe8fb19SBen Gras return
39272fe8fb19SBen Gras roundAndPackFloatx80(
39282fe8fb19SBen Gras floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
39292fe8fb19SBen Gras
39302fe8fb19SBen Gras }
39312fe8fb19SBen Gras
39322fe8fb19SBen Gras /*
39332fe8fb19SBen Gras -------------------------------------------------------------------------------
39342fe8fb19SBen Gras Returns the remainder of the extended double-precision floating-point value
39352fe8fb19SBen Gras `a' with respect to the corresponding value `b'. The operation is performed
39362fe8fb19SBen Gras according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
39372fe8fb19SBen Gras -------------------------------------------------------------------------------
39382fe8fb19SBen Gras */
floatx80_rem(floatx80 a,floatx80 b)39392fe8fb19SBen Gras floatx80 floatx80_rem( floatx80 a, floatx80 b )
39402fe8fb19SBen Gras {
39412fe8fb19SBen Gras flag aSign, bSign, zSign;
39422fe8fb19SBen Gras int32 aExp, bExp, expDiff;
39432fe8fb19SBen Gras bits64 aSig0, aSig1, bSig;
39442fe8fb19SBen Gras bits64 q, term0, term1, alternateASig0, alternateASig1;
39452fe8fb19SBen Gras floatx80 z;
39462fe8fb19SBen Gras
39472fe8fb19SBen Gras aSig0 = extractFloatx80Frac( a );
39482fe8fb19SBen Gras aExp = extractFloatx80Exp( a );
39492fe8fb19SBen Gras aSign = extractFloatx80Sign( a );
39502fe8fb19SBen Gras bSig = extractFloatx80Frac( b );
39512fe8fb19SBen Gras bExp = extractFloatx80Exp( b );
39522fe8fb19SBen Gras bSign = extractFloatx80Sign( b );
39532fe8fb19SBen Gras if ( aExp == 0x7FFF ) {
39542fe8fb19SBen Gras if ( (bits64) ( aSig0<<1 )
39552fe8fb19SBen Gras || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
39562fe8fb19SBen Gras return propagateFloatx80NaN( a, b );
39572fe8fb19SBen Gras }
39582fe8fb19SBen Gras goto invalid;
39592fe8fb19SBen Gras }
39602fe8fb19SBen Gras if ( bExp == 0x7FFF ) {
39612fe8fb19SBen Gras if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
39622fe8fb19SBen Gras return a;
39632fe8fb19SBen Gras }
39642fe8fb19SBen Gras if ( bExp == 0 ) {
39652fe8fb19SBen Gras if ( bSig == 0 ) {
39662fe8fb19SBen Gras invalid:
39672fe8fb19SBen Gras float_raise( float_flag_invalid );
39682fe8fb19SBen Gras z.low = floatx80_default_nan_low;
39692fe8fb19SBen Gras z.high = floatx80_default_nan_high;
39702fe8fb19SBen Gras return z;
39712fe8fb19SBen Gras }
39722fe8fb19SBen Gras normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
39732fe8fb19SBen Gras }
39742fe8fb19SBen Gras if ( aExp == 0 ) {
39752fe8fb19SBen Gras if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
39762fe8fb19SBen Gras normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
39772fe8fb19SBen Gras }
39782fe8fb19SBen Gras bSig |= LIT64( 0x8000000000000000 );
39792fe8fb19SBen Gras zSign = aSign;
39802fe8fb19SBen Gras expDiff = aExp - bExp;
39812fe8fb19SBen Gras aSig1 = 0;
39822fe8fb19SBen Gras if ( expDiff < 0 ) {
39832fe8fb19SBen Gras if ( expDiff < -1 ) return a;
39842fe8fb19SBen Gras shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
39852fe8fb19SBen Gras expDiff = 0;
39862fe8fb19SBen Gras }
39872fe8fb19SBen Gras q = ( bSig <= aSig0 );
39882fe8fb19SBen Gras if ( q ) aSig0 -= bSig;
39892fe8fb19SBen Gras expDiff -= 64;
39902fe8fb19SBen Gras while ( 0 < expDiff ) {
39912fe8fb19SBen Gras q = estimateDiv128To64( aSig0, aSig1, bSig );
39922fe8fb19SBen Gras q = ( 2 < q ) ? q - 2 : 0;
39932fe8fb19SBen Gras mul64To128( bSig, q, &term0, &term1 );
39942fe8fb19SBen Gras sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
39952fe8fb19SBen Gras shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
39962fe8fb19SBen Gras expDiff -= 62;
39972fe8fb19SBen Gras }
39982fe8fb19SBen Gras expDiff += 64;
39992fe8fb19SBen Gras if ( 0 < expDiff ) {
40002fe8fb19SBen Gras q = estimateDiv128To64( aSig0, aSig1, bSig );
40012fe8fb19SBen Gras q = ( 2 < q ) ? q - 2 : 0;
40022fe8fb19SBen Gras q >>= 64 - expDiff;
40032fe8fb19SBen Gras mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
40042fe8fb19SBen Gras sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
40052fe8fb19SBen Gras shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
40062fe8fb19SBen Gras while ( le128( term0, term1, aSig0, aSig1 ) ) {
40072fe8fb19SBen Gras ++q;
40082fe8fb19SBen Gras sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
40092fe8fb19SBen Gras }
40102fe8fb19SBen Gras }
40112fe8fb19SBen Gras else {
40122fe8fb19SBen Gras term1 = 0;
40132fe8fb19SBen Gras term0 = bSig;
40142fe8fb19SBen Gras }
40152fe8fb19SBen Gras sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
40162fe8fb19SBen Gras if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
40172fe8fb19SBen Gras || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
40182fe8fb19SBen Gras && ( q & 1 ) )
40192fe8fb19SBen Gras ) {
40202fe8fb19SBen Gras aSig0 = alternateASig0;
40212fe8fb19SBen Gras aSig1 = alternateASig1;
40222fe8fb19SBen Gras zSign = ! zSign;
40232fe8fb19SBen Gras }
40242fe8fb19SBen Gras return
40252fe8fb19SBen Gras normalizeRoundAndPackFloatx80(
40262fe8fb19SBen Gras 80, zSign, bExp + expDiff, aSig0, aSig1 );
40272fe8fb19SBen Gras
40282fe8fb19SBen Gras }
40292fe8fb19SBen Gras
40302fe8fb19SBen Gras /*
40312fe8fb19SBen Gras -------------------------------------------------------------------------------
40322fe8fb19SBen Gras Returns the square root of the extended double-precision floating-point
40332fe8fb19SBen Gras value `a'. The operation is performed according to the IEC/IEEE Standard
40342fe8fb19SBen Gras for Binary Floating-Point Arithmetic.
40352fe8fb19SBen Gras -------------------------------------------------------------------------------
40362fe8fb19SBen Gras */
floatx80_sqrt(floatx80 a)40372fe8fb19SBen Gras floatx80 floatx80_sqrt( floatx80 a )
40382fe8fb19SBen Gras {
40392fe8fb19SBen Gras flag aSign;
40402fe8fb19SBen Gras int32 aExp, zExp;
40412fe8fb19SBen Gras bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
40422fe8fb19SBen Gras bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
40432fe8fb19SBen Gras floatx80 z;
40442fe8fb19SBen Gras
40452fe8fb19SBen Gras aSig0 = extractFloatx80Frac( a );
40462fe8fb19SBen Gras aExp = extractFloatx80Exp( a );
40472fe8fb19SBen Gras aSign = extractFloatx80Sign( a );
40482fe8fb19SBen Gras if ( aExp == 0x7FFF ) {
40492fe8fb19SBen Gras if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
40502fe8fb19SBen Gras if ( ! aSign ) return a;
40512fe8fb19SBen Gras goto invalid;
40522fe8fb19SBen Gras }
40532fe8fb19SBen Gras if ( aSign ) {
40542fe8fb19SBen Gras if ( ( aExp | aSig0 ) == 0 ) return a;
40552fe8fb19SBen Gras invalid:
40562fe8fb19SBen Gras float_raise( float_flag_invalid );
40572fe8fb19SBen Gras z.low = floatx80_default_nan_low;
40582fe8fb19SBen Gras z.high = floatx80_default_nan_high;
40592fe8fb19SBen Gras return z;
40602fe8fb19SBen Gras }
40612fe8fb19SBen Gras if ( aExp == 0 ) {
40622fe8fb19SBen Gras if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
40632fe8fb19SBen Gras normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
40642fe8fb19SBen Gras }
40652fe8fb19SBen Gras zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
40662fe8fb19SBen Gras zSig0 = estimateSqrt32( aExp, aSig0>>32 );
40672fe8fb19SBen Gras shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
40682fe8fb19SBen Gras zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
40692fe8fb19SBen Gras doubleZSig0 = zSig0<<1;
40702fe8fb19SBen Gras mul64To128( zSig0, zSig0, &term0, &term1 );
40712fe8fb19SBen Gras sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
40722fe8fb19SBen Gras while ( (sbits64) rem0 < 0 ) {
40732fe8fb19SBen Gras --zSig0;
40742fe8fb19SBen Gras doubleZSig0 -= 2;
40752fe8fb19SBen Gras add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
40762fe8fb19SBen Gras }
40772fe8fb19SBen Gras zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
40782fe8fb19SBen Gras if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
40792fe8fb19SBen Gras if ( zSig1 == 0 ) zSig1 = 1;
40802fe8fb19SBen Gras mul64To128( doubleZSig0, zSig1, &term1, &term2 );
40812fe8fb19SBen Gras sub128( rem1, 0, term1, term2, &rem1, &rem2 );
40822fe8fb19SBen Gras mul64To128( zSig1, zSig1, &term2, &term3 );
40832fe8fb19SBen Gras sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
40842fe8fb19SBen Gras while ( (sbits64) rem1 < 0 ) {
40852fe8fb19SBen Gras --zSig1;
40862fe8fb19SBen Gras shortShift128Left( 0, zSig1, 1, &term2, &term3 );
40872fe8fb19SBen Gras term3 |= 1;
40882fe8fb19SBen Gras term2 |= doubleZSig0;
40892fe8fb19SBen Gras add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
40902fe8fb19SBen Gras }
40912fe8fb19SBen Gras zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
40922fe8fb19SBen Gras }
40932fe8fb19SBen Gras shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
40942fe8fb19SBen Gras zSig0 |= doubleZSig0;
40952fe8fb19SBen Gras return
40962fe8fb19SBen Gras roundAndPackFloatx80(
40972fe8fb19SBen Gras floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
40982fe8fb19SBen Gras
40992fe8fb19SBen Gras }
41002fe8fb19SBen Gras
41012fe8fb19SBen Gras /*
41022fe8fb19SBen Gras -------------------------------------------------------------------------------
41032fe8fb19SBen Gras Returns 1 if the extended double-precision floating-point value `a' is
41042fe8fb19SBen Gras equal to the corresponding value `b', and 0 otherwise. The comparison is
41052fe8fb19SBen Gras performed according to the IEC/IEEE Standard for Binary Floating-Point
41062fe8fb19SBen Gras Arithmetic.
41072fe8fb19SBen Gras -------------------------------------------------------------------------------
41082fe8fb19SBen Gras */
floatx80_eq(floatx80 a,floatx80 b)41092fe8fb19SBen Gras flag floatx80_eq( floatx80 a, floatx80 b )
41102fe8fb19SBen Gras {
41112fe8fb19SBen Gras
41122fe8fb19SBen Gras if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
41132fe8fb19SBen Gras && (bits64) ( extractFloatx80Frac( a )<<1 ) )
41142fe8fb19SBen Gras || ( ( extractFloatx80Exp( b ) == 0x7FFF )
41152fe8fb19SBen Gras && (bits64) ( extractFloatx80Frac( b )<<1 ) )
41162fe8fb19SBen Gras ) {
41172fe8fb19SBen Gras if ( floatx80_is_signaling_nan( a )
41182fe8fb19SBen Gras || floatx80_is_signaling_nan( b ) ) {
41192fe8fb19SBen Gras float_raise( float_flag_invalid );
41202fe8fb19SBen Gras }
41212fe8fb19SBen Gras return 0;
41222fe8fb19SBen Gras }
41232fe8fb19SBen Gras return
41242fe8fb19SBen Gras ( a.low == b.low )
41252fe8fb19SBen Gras && ( ( a.high == b.high )
41262fe8fb19SBen Gras || ( ( a.low == 0 )
41272fe8fb19SBen Gras && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
41282fe8fb19SBen Gras );
41292fe8fb19SBen Gras
41302fe8fb19SBen Gras }
41312fe8fb19SBen Gras
41322fe8fb19SBen Gras /*
41332fe8fb19SBen Gras -------------------------------------------------------------------------------
41342fe8fb19SBen Gras Returns 1 if the extended double-precision floating-point value `a' is
41352fe8fb19SBen Gras less than or equal to the corresponding value `b', and 0 otherwise. The
41362fe8fb19SBen Gras comparison is performed according to the IEC/IEEE Standard for Binary
41372fe8fb19SBen Gras Floating-Point Arithmetic.
41382fe8fb19SBen Gras -------------------------------------------------------------------------------
41392fe8fb19SBen Gras */
floatx80_le(floatx80 a,floatx80 b)41402fe8fb19SBen Gras flag floatx80_le( floatx80 a, floatx80 b )
41412fe8fb19SBen Gras {
41422fe8fb19SBen Gras flag aSign, bSign;
41432fe8fb19SBen Gras
41442fe8fb19SBen Gras if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
41452fe8fb19SBen Gras && (bits64) ( extractFloatx80Frac( a )<<1 ) )
41462fe8fb19SBen Gras || ( ( extractFloatx80Exp( b ) == 0x7FFF )
41472fe8fb19SBen Gras && (bits64) ( extractFloatx80Frac( b )<<1 ) )
41482fe8fb19SBen Gras ) {
41492fe8fb19SBen Gras float_raise( float_flag_invalid );
41502fe8fb19SBen Gras return 0;
41512fe8fb19SBen Gras }
41522fe8fb19SBen Gras aSign = extractFloatx80Sign( a );
41532fe8fb19SBen Gras bSign = extractFloatx80Sign( b );
41542fe8fb19SBen Gras if ( aSign != bSign ) {
41552fe8fb19SBen Gras return
41562fe8fb19SBen Gras aSign
41572fe8fb19SBen Gras || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
41582fe8fb19SBen Gras == 0 );
41592fe8fb19SBen Gras }
41602fe8fb19SBen Gras return
41612fe8fb19SBen Gras aSign ? le128( b.high, b.low, a.high, a.low )
41622fe8fb19SBen Gras : le128( a.high, a.low, b.high, b.low );
41632fe8fb19SBen Gras
41642fe8fb19SBen Gras }
41652fe8fb19SBen Gras
41662fe8fb19SBen Gras /*
41672fe8fb19SBen Gras -------------------------------------------------------------------------------
41682fe8fb19SBen Gras Returns 1 if the extended double-precision floating-point value `a' is
41692fe8fb19SBen Gras less than the corresponding value `b', and 0 otherwise. The comparison
41702fe8fb19SBen Gras is performed according to the IEC/IEEE Standard for Binary Floating-Point
41712fe8fb19SBen Gras Arithmetic.
41722fe8fb19SBen Gras -------------------------------------------------------------------------------
41732fe8fb19SBen Gras */
floatx80_lt(floatx80 a,floatx80 b)41742fe8fb19SBen Gras flag floatx80_lt( floatx80 a, floatx80 b )
41752fe8fb19SBen Gras {
41762fe8fb19SBen Gras flag aSign, bSign;
41772fe8fb19SBen Gras
41782fe8fb19SBen Gras if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
41792fe8fb19SBen Gras && (bits64) ( extractFloatx80Frac( a )<<1 ) )
41802fe8fb19SBen Gras || ( ( extractFloatx80Exp( b ) == 0x7FFF )
41812fe8fb19SBen Gras && (bits64) ( extractFloatx80Frac( b )<<1 ) )
41822fe8fb19SBen Gras ) {
41832fe8fb19SBen Gras float_raise( float_flag_invalid );
41842fe8fb19SBen Gras return 0;
41852fe8fb19SBen Gras }
41862fe8fb19SBen Gras aSign = extractFloatx80Sign( a );
41872fe8fb19SBen Gras bSign = extractFloatx80Sign( b );
41882fe8fb19SBen Gras if ( aSign != bSign ) {
41892fe8fb19SBen Gras return
41902fe8fb19SBen Gras aSign
41912fe8fb19SBen Gras && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
41922fe8fb19SBen Gras != 0 );
41932fe8fb19SBen Gras }
41942fe8fb19SBen Gras return
41952fe8fb19SBen Gras aSign ? lt128( b.high, b.low, a.high, a.low )
41962fe8fb19SBen Gras : lt128( a.high, a.low, b.high, b.low );
41972fe8fb19SBen Gras
41982fe8fb19SBen Gras }
41992fe8fb19SBen Gras
42002fe8fb19SBen Gras /*
42012fe8fb19SBen Gras -------------------------------------------------------------------------------
42022fe8fb19SBen Gras Returns 1 if the extended double-precision floating-point value `a' is equal
42032fe8fb19SBen Gras to the corresponding value `b', and 0 otherwise. The invalid exception is
42042fe8fb19SBen Gras raised if either operand is a NaN. Otherwise, the comparison is performed
42052fe8fb19SBen Gras according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
42062fe8fb19SBen Gras -------------------------------------------------------------------------------
42072fe8fb19SBen Gras */
floatx80_eq_signaling(floatx80 a,floatx80 b)42082fe8fb19SBen Gras flag floatx80_eq_signaling( floatx80 a, floatx80 b )
42092fe8fb19SBen Gras {
42102fe8fb19SBen Gras
42112fe8fb19SBen Gras if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
42122fe8fb19SBen Gras && (bits64) ( extractFloatx80Frac( a )<<1 ) )
42132fe8fb19SBen Gras || ( ( extractFloatx80Exp( b ) == 0x7FFF )
42142fe8fb19SBen Gras && (bits64) ( extractFloatx80Frac( b )<<1 ) )
42152fe8fb19SBen Gras ) {
42162fe8fb19SBen Gras float_raise( float_flag_invalid );
42172fe8fb19SBen Gras return 0;
42182fe8fb19SBen Gras }
42192fe8fb19SBen Gras return
42202fe8fb19SBen Gras ( a.low == b.low )
42212fe8fb19SBen Gras && ( ( a.high == b.high )
42222fe8fb19SBen Gras || ( ( a.low == 0 )
42232fe8fb19SBen Gras && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
42242fe8fb19SBen Gras );
42252fe8fb19SBen Gras
42262fe8fb19SBen Gras }
42272fe8fb19SBen Gras
42282fe8fb19SBen Gras /*
42292fe8fb19SBen Gras -------------------------------------------------------------------------------
42302fe8fb19SBen Gras Returns 1 if the extended double-precision floating-point value `a' is less
42312fe8fb19SBen Gras than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
42322fe8fb19SBen Gras do not cause an exception. Otherwise, the comparison is performed according
42332fe8fb19SBen Gras to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
42342fe8fb19SBen Gras -------------------------------------------------------------------------------
42352fe8fb19SBen Gras */
floatx80_le_quiet(floatx80 a,floatx80 b)42362fe8fb19SBen Gras flag floatx80_le_quiet( floatx80 a, floatx80 b )
42372fe8fb19SBen Gras {
42382fe8fb19SBen Gras flag aSign, bSign;
42392fe8fb19SBen Gras
42402fe8fb19SBen Gras if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
42412fe8fb19SBen Gras && (bits64) ( extractFloatx80Frac( a )<<1 ) )
42422fe8fb19SBen Gras || ( ( extractFloatx80Exp( b ) == 0x7FFF )
42432fe8fb19SBen Gras && (bits64) ( extractFloatx80Frac( b )<<1 ) )
42442fe8fb19SBen Gras ) {
42452fe8fb19SBen Gras if ( floatx80_is_signaling_nan( a )
42462fe8fb19SBen Gras || floatx80_is_signaling_nan( b ) ) {
42472fe8fb19SBen Gras float_raise( float_flag_invalid );
42482fe8fb19SBen Gras }
42492fe8fb19SBen Gras return 0;
42502fe8fb19SBen Gras }
42512fe8fb19SBen Gras aSign = extractFloatx80Sign( a );
42522fe8fb19SBen Gras bSign = extractFloatx80Sign( b );
42532fe8fb19SBen Gras if ( aSign != bSign ) {
42542fe8fb19SBen Gras return
42552fe8fb19SBen Gras aSign
42562fe8fb19SBen Gras || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
42572fe8fb19SBen Gras == 0 );
42582fe8fb19SBen Gras }
42592fe8fb19SBen Gras return
42602fe8fb19SBen Gras aSign ? le128( b.high, b.low, a.high, a.low )
42612fe8fb19SBen Gras : le128( a.high, a.low, b.high, b.low );
42622fe8fb19SBen Gras
42632fe8fb19SBen Gras }
42642fe8fb19SBen Gras
42652fe8fb19SBen Gras /*
42662fe8fb19SBen Gras -------------------------------------------------------------------------------
42672fe8fb19SBen Gras Returns 1 if the extended double-precision floating-point value `a' is less
42682fe8fb19SBen Gras than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
42692fe8fb19SBen Gras an exception. Otherwise, the comparison is performed according to the
42702fe8fb19SBen Gras IEC/IEEE Standard for Binary Floating-Point Arithmetic.
42712fe8fb19SBen Gras -------------------------------------------------------------------------------
42722fe8fb19SBen Gras */
floatx80_lt_quiet(floatx80 a,floatx80 b)42732fe8fb19SBen Gras flag floatx80_lt_quiet( floatx80 a, floatx80 b )
42742fe8fb19SBen Gras {
42752fe8fb19SBen Gras flag aSign, bSign;
42762fe8fb19SBen Gras
42772fe8fb19SBen Gras if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
42782fe8fb19SBen Gras && (bits64) ( extractFloatx80Frac( a )<<1 ) )
42792fe8fb19SBen Gras || ( ( extractFloatx80Exp( b ) == 0x7FFF )
42802fe8fb19SBen Gras && (bits64) ( extractFloatx80Frac( b )<<1 ) )
42812fe8fb19SBen Gras ) {
42822fe8fb19SBen Gras if ( floatx80_is_signaling_nan( a )
42832fe8fb19SBen Gras || floatx80_is_signaling_nan( b ) ) {
42842fe8fb19SBen Gras float_raise( float_flag_invalid );
42852fe8fb19SBen Gras }
42862fe8fb19SBen Gras return 0;
42872fe8fb19SBen Gras }
42882fe8fb19SBen Gras aSign = extractFloatx80Sign( a );
42892fe8fb19SBen Gras bSign = extractFloatx80Sign( b );
42902fe8fb19SBen Gras if ( aSign != bSign ) {
42912fe8fb19SBen Gras return
42922fe8fb19SBen Gras aSign
42932fe8fb19SBen Gras && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
42942fe8fb19SBen Gras != 0 );
42952fe8fb19SBen Gras }
42962fe8fb19SBen Gras return
42972fe8fb19SBen Gras aSign ? lt128( b.high, b.low, a.high, a.low )
42982fe8fb19SBen Gras : lt128( a.high, a.low, b.high, b.low );
42992fe8fb19SBen Gras
43002fe8fb19SBen Gras }
43012fe8fb19SBen Gras
43022fe8fb19SBen Gras #endif
43032fe8fb19SBen Gras
43042fe8fb19SBen Gras #ifdef FLOAT128
43052fe8fb19SBen Gras
43062fe8fb19SBen Gras /*
43072fe8fb19SBen Gras -------------------------------------------------------------------------------
43082fe8fb19SBen Gras Returns the result of converting the quadruple-precision floating-point
43092fe8fb19SBen Gras value `a' to the 32-bit two's complement integer format. The conversion
43102fe8fb19SBen Gras is performed according to the IEC/IEEE Standard for Binary Floating-Point
43112fe8fb19SBen Gras Arithmetic---which means in particular that the conversion is rounded
43122fe8fb19SBen Gras according to the current rounding mode. If `a' is a NaN, the largest
43132fe8fb19SBen Gras positive integer is returned. Otherwise, if the conversion overflows, the
43142fe8fb19SBen Gras largest integer with the same sign as `a' is returned.
43152fe8fb19SBen Gras -------------------------------------------------------------------------------
43162fe8fb19SBen Gras */
float128_to_int32(float128 a)43172fe8fb19SBen Gras int32 float128_to_int32( float128 a )
43182fe8fb19SBen Gras {
43192fe8fb19SBen Gras flag aSign;
43202fe8fb19SBen Gras int32 aExp, shiftCount;
43212fe8fb19SBen Gras bits64 aSig0, aSig1;
43222fe8fb19SBen Gras
43232fe8fb19SBen Gras aSig1 = extractFloat128Frac1( a );
43242fe8fb19SBen Gras aSig0 = extractFloat128Frac0( a );
43252fe8fb19SBen Gras aExp = extractFloat128Exp( a );
43262fe8fb19SBen Gras aSign = extractFloat128Sign( a );
43272fe8fb19SBen Gras if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
43282fe8fb19SBen Gras if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
43292fe8fb19SBen Gras aSig0 |= ( aSig1 != 0 );
43302fe8fb19SBen Gras shiftCount = 0x4028 - aExp;
43312fe8fb19SBen Gras if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
43322fe8fb19SBen Gras return roundAndPackInt32( aSign, aSig0 );
43332fe8fb19SBen Gras
43342fe8fb19SBen Gras }
43352fe8fb19SBen Gras
43362fe8fb19SBen Gras /*
43372fe8fb19SBen Gras -------------------------------------------------------------------------------
43382fe8fb19SBen Gras Returns the result of converting the quadruple-precision floating-point
43392fe8fb19SBen Gras value `a' to the 32-bit two's complement integer format. The conversion
43402fe8fb19SBen Gras is performed according to the IEC/IEEE Standard for Binary Floating-Point
43412fe8fb19SBen Gras Arithmetic, except that the conversion is always rounded toward zero. If
43422fe8fb19SBen Gras `a' is a NaN, the largest positive integer is returned. Otherwise, if the
43432fe8fb19SBen Gras conversion overflows, the largest integer with the same sign as `a' is
43442fe8fb19SBen Gras returned.
43452fe8fb19SBen Gras -------------------------------------------------------------------------------
43462fe8fb19SBen Gras */
float128_to_int32_round_to_zero(float128 a)43472fe8fb19SBen Gras int32 float128_to_int32_round_to_zero( float128 a )
43482fe8fb19SBen Gras {
43492fe8fb19SBen Gras flag aSign;
43502fe8fb19SBen Gras int32 aExp, shiftCount;
43512fe8fb19SBen Gras bits64 aSig0, aSig1, savedASig;
43522fe8fb19SBen Gras int32 z;
43532fe8fb19SBen Gras
43542fe8fb19SBen Gras aSig1 = extractFloat128Frac1( a );
43552fe8fb19SBen Gras aSig0 = extractFloat128Frac0( a );
43562fe8fb19SBen Gras aExp = extractFloat128Exp( a );
43572fe8fb19SBen Gras aSign = extractFloat128Sign( a );
43582fe8fb19SBen Gras aSig0 |= ( aSig1 != 0 );
43592fe8fb19SBen Gras if ( 0x401E < aExp ) {
43602fe8fb19SBen Gras if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
43612fe8fb19SBen Gras goto invalid;
43622fe8fb19SBen Gras }
43632fe8fb19SBen Gras else if ( aExp < 0x3FFF ) {
4364*84d9c625SLionel Sambuc if ( aExp || aSig0 ) set_float_exception_inexact_flag();
43652fe8fb19SBen Gras return 0;
43662fe8fb19SBen Gras }
43672fe8fb19SBen Gras aSig0 |= LIT64( 0x0001000000000000 );
43682fe8fb19SBen Gras shiftCount = 0x402F - aExp;
43692fe8fb19SBen Gras savedASig = aSig0;
43702fe8fb19SBen Gras aSig0 >>= shiftCount;
4371f14fb602SLionel Sambuc z = (int32)aSig0;
43722fe8fb19SBen Gras if ( aSign ) z = - z;
43732fe8fb19SBen Gras if ( ( z < 0 ) ^ aSign ) {
43742fe8fb19SBen Gras invalid:
43752fe8fb19SBen Gras float_raise( float_flag_invalid );
43762fe8fb19SBen Gras return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
43772fe8fb19SBen Gras }
43782fe8fb19SBen Gras if ( ( aSig0<<shiftCount ) != savedASig ) {
4379*84d9c625SLionel Sambuc set_float_exception_inexact_flag();
43802fe8fb19SBen Gras }
43812fe8fb19SBen Gras return z;
43822fe8fb19SBen Gras
43832fe8fb19SBen Gras }
43842fe8fb19SBen Gras
43852fe8fb19SBen Gras /*
43862fe8fb19SBen Gras -------------------------------------------------------------------------------
43872fe8fb19SBen Gras Returns the result of converting the quadruple-precision floating-point
43882fe8fb19SBen Gras value `a' to the 64-bit two's complement integer format. The conversion
43892fe8fb19SBen Gras is performed according to the IEC/IEEE Standard for Binary Floating-Point
43902fe8fb19SBen Gras Arithmetic---which means in particular that the conversion is rounded
43912fe8fb19SBen Gras according to the current rounding mode. If `a' is a NaN, the largest
43922fe8fb19SBen Gras positive integer is returned. Otherwise, if the conversion overflows, the
43932fe8fb19SBen Gras largest integer with the same sign as `a' is returned.
43942fe8fb19SBen Gras -------------------------------------------------------------------------------
43952fe8fb19SBen Gras */
float128_to_int64(float128 a)43962fe8fb19SBen Gras int64 float128_to_int64( float128 a )
43972fe8fb19SBen Gras {
43982fe8fb19SBen Gras flag aSign;
43992fe8fb19SBen Gras int32 aExp, shiftCount;
44002fe8fb19SBen Gras bits64 aSig0, aSig1;
44012fe8fb19SBen Gras
44022fe8fb19SBen Gras aSig1 = extractFloat128Frac1( a );
44032fe8fb19SBen Gras aSig0 = extractFloat128Frac0( a );
44042fe8fb19SBen Gras aExp = extractFloat128Exp( a );
44052fe8fb19SBen Gras aSign = extractFloat128Sign( a );
44062fe8fb19SBen Gras if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
44072fe8fb19SBen Gras shiftCount = 0x402F - aExp;
44082fe8fb19SBen Gras if ( shiftCount <= 0 ) {
44092fe8fb19SBen Gras if ( 0x403E < aExp ) {
44102fe8fb19SBen Gras float_raise( float_flag_invalid );
44112fe8fb19SBen Gras if ( ! aSign
44122fe8fb19SBen Gras || ( ( aExp == 0x7FFF )
44132fe8fb19SBen Gras && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
44142fe8fb19SBen Gras )
44152fe8fb19SBen Gras ) {
44162fe8fb19SBen Gras return LIT64( 0x7FFFFFFFFFFFFFFF );
44172fe8fb19SBen Gras }
44182fe8fb19SBen Gras return (sbits64) LIT64( 0x8000000000000000 );
44192fe8fb19SBen Gras }
44202fe8fb19SBen Gras shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
44212fe8fb19SBen Gras }
44222fe8fb19SBen Gras else {
44232fe8fb19SBen Gras shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
44242fe8fb19SBen Gras }
44252fe8fb19SBen Gras return roundAndPackInt64( aSign, aSig0, aSig1 );
44262fe8fb19SBen Gras
44272fe8fb19SBen Gras }
44282fe8fb19SBen Gras
44292fe8fb19SBen Gras /*
44302fe8fb19SBen Gras -------------------------------------------------------------------------------
44312fe8fb19SBen Gras Returns the result of converting the quadruple-precision floating-point
44322fe8fb19SBen Gras value `a' to the 64-bit two's complement integer format. The conversion
44332fe8fb19SBen Gras is performed according to the IEC/IEEE Standard for Binary Floating-Point
44342fe8fb19SBen Gras Arithmetic, except that the conversion is always rounded toward zero.
44352fe8fb19SBen Gras If `a' is a NaN, the largest positive integer is returned. Otherwise, if
44362fe8fb19SBen Gras the conversion overflows, the largest integer with the same sign as `a' is
44372fe8fb19SBen Gras returned.
44382fe8fb19SBen Gras -------------------------------------------------------------------------------
44392fe8fb19SBen Gras */
float128_to_int64_round_to_zero(float128 a)44402fe8fb19SBen Gras int64 float128_to_int64_round_to_zero( float128 a )
44412fe8fb19SBen Gras {
44422fe8fb19SBen Gras flag aSign;
44432fe8fb19SBen Gras int32 aExp, shiftCount;
44442fe8fb19SBen Gras bits64 aSig0, aSig1;
44452fe8fb19SBen Gras int64 z;
44462fe8fb19SBen Gras
44472fe8fb19SBen Gras aSig1 = extractFloat128Frac1( a );
44482fe8fb19SBen Gras aSig0 = extractFloat128Frac0( a );
44492fe8fb19SBen Gras aExp = extractFloat128Exp( a );
44502fe8fb19SBen Gras aSign = extractFloat128Sign( a );
44512fe8fb19SBen Gras if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
44522fe8fb19SBen Gras shiftCount = aExp - 0x402F;
44532fe8fb19SBen Gras if ( 0 < shiftCount ) {
44542fe8fb19SBen Gras if ( 0x403E <= aExp ) {
44552fe8fb19SBen Gras aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
44562fe8fb19SBen Gras if ( ( a.high == LIT64( 0xC03E000000000000 ) )
44572fe8fb19SBen Gras && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
4458*84d9c625SLionel Sambuc if ( aSig1 ) set_float_exception_inexact_flag();
44592fe8fb19SBen Gras }
44602fe8fb19SBen Gras else {
44612fe8fb19SBen Gras float_raise( float_flag_invalid );
44622fe8fb19SBen Gras if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
44632fe8fb19SBen Gras return LIT64( 0x7FFFFFFFFFFFFFFF );
44642fe8fb19SBen Gras }
44652fe8fb19SBen Gras }
44662fe8fb19SBen Gras return (sbits64) LIT64( 0x8000000000000000 );
44672fe8fb19SBen Gras }
44682fe8fb19SBen Gras z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
44692fe8fb19SBen Gras if ( (bits64) ( aSig1<<shiftCount ) ) {
4470*84d9c625SLionel Sambuc set_float_exception_inexact_flag();
44712fe8fb19SBen Gras }
44722fe8fb19SBen Gras }
44732fe8fb19SBen Gras else {
44742fe8fb19SBen Gras if ( aExp < 0x3FFF ) {
44752fe8fb19SBen Gras if ( aExp | aSig0 | aSig1 ) {
4476*84d9c625SLionel Sambuc set_float_exception_inexact_flag();
44772fe8fb19SBen Gras }
44782fe8fb19SBen Gras return 0;
44792fe8fb19SBen Gras }
44802fe8fb19SBen Gras z = aSig0>>( - shiftCount );
44812fe8fb19SBen Gras if ( aSig1
44822fe8fb19SBen Gras || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
4483*84d9c625SLionel Sambuc set_float_exception_inexact_flag();
44842fe8fb19SBen Gras }
44852fe8fb19SBen Gras }
44862fe8fb19SBen Gras if ( aSign ) z = - z;
44872fe8fb19SBen Gras return z;
44882fe8fb19SBen Gras
44892fe8fb19SBen Gras }
44902fe8fb19SBen Gras
4491f14fb602SLionel Sambuc #if (defined(SOFTFLOATSPARC64_FOR_GCC) || defined(SOFTFLOAT_FOR_GCC)) \
4492f14fb602SLionel Sambuc && defined(SOFTFLOAT_NEED_FIXUNS)
44932fe8fb19SBen Gras /*
44942fe8fb19SBen Gras * just like above - but do not care for overflow of signed results
44952fe8fb19SBen Gras */
float128_to_uint64_round_to_zero(float128 a)44962fe8fb19SBen Gras uint64 float128_to_uint64_round_to_zero( float128 a )
44972fe8fb19SBen Gras {
44982fe8fb19SBen Gras flag aSign;
44992fe8fb19SBen Gras int32 aExp, shiftCount;
45002fe8fb19SBen Gras bits64 aSig0, aSig1;
45012fe8fb19SBen Gras uint64 z;
45022fe8fb19SBen Gras
45032fe8fb19SBen Gras aSig1 = extractFloat128Frac1( a );
45042fe8fb19SBen Gras aSig0 = extractFloat128Frac0( a );
45052fe8fb19SBen Gras aExp = extractFloat128Exp( a );
45062fe8fb19SBen Gras aSign = extractFloat128Sign( a );
45072fe8fb19SBen Gras if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
45082fe8fb19SBen Gras shiftCount = aExp - 0x402F;
45092fe8fb19SBen Gras if ( 0 < shiftCount ) {
45102fe8fb19SBen Gras if ( 0x403F <= aExp ) {
45112fe8fb19SBen Gras aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
45122fe8fb19SBen Gras if ( ( a.high == LIT64( 0xC03E000000000000 ) )
45132fe8fb19SBen Gras && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
4514*84d9c625SLionel Sambuc if ( aSig1 ) set_float_exception_inexact_flag();
45152fe8fb19SBen Gras }
45162fe8fb19SBen Gras else {
45172fe8fb19SBen Gras float_raise( float_flag_invalid );
45182fe8fb19SBen Gras }
45192fe8fb19SBen Gras return LIT64( 0xFFFFFFFFFFFFFFFF );
45202fe8fb19SBen Gras }
45212fe8fb19SBen Gras z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
45222fe8fb19SBen Gras if ( (bits64) ( aSig1<<shiftCount ) ) {
4523*84d9c625SLionel Sambuc set_float_exception_inexact_flag();
45242fe8fb19SBen Gras }
45252fe8fb19SBen Gras }
45262fe8fb19SBen Gras else {
45272fe8fb19SBen Gras if ( aExp < 0x3FFF ) {
45282fe8fb19SBen Gras if ( aExp | aSig0 | aSig1 ) {
4529*84d9c625SLionel Sambuc set_float_exception_inexact_flag();
45302fe8fb19SBen Gras }
45312fe8fb19SBen Gras return 0;
45322fe8fb19SBen Gras }
45332fe8fb19SBen Gras z = aSig0>>( - shiftCount );
45342fe8fb19SBen Gras if (aSig1 || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
4535*84d9c625SLionel Sambuc set_float_exception_inexact_flag();
45362fe8fb19SBen Gras }
45372fe8fb19SBen Gras }
45382fe8fb19SBen Gras if ( aSign ) z = - z;
45392fe8fb19SBen Gras return z;
45402fe8fb19SBen Gras
45412fe8fb19SBen Gras }
4542f14fb602SLionel Sambuc #endif /* (SOFTFLOATSPARC64_FOR_GCC || SOFTFLOAT_FOR_GCC) && SOFTFLOAT_NEED_FIXUNS */
45432fe8fb19SBen Gras
45442fe8fb19SBen Gras /*
45452fe8fb19SBen Gras -------------------------------------------------------------------------------
45462fe8fb19SBen Gras Returns the result of converting the quadruple-precision floating-point
45472fe8fb19SBen Gras value `a' to the single-precision floating-point format. The conversion
45482fe8fb19SBen Gras is performed according to the IEC/IEEE Standard for Binary Floating-Point
45492fe8fb19SBen Gras Arithmetic.
45502fe8fb19SBen Gras -------------------------------------------------------------------------------
45512fe8fb19SBen Gras */
float128_to_float32(float128 a)45522fe8fb19SBen Gras float32 float128_to_float32( float128 a )
45532fe8fb19SBen Gras {
45542fe8fb19SBen Gras flag aSign;
45552fe8fb19SBen Gras int32 aExp;
45562fe8fb19SBen Gras bits64 aSig0, aSig1;
45572fe8fb19SBen Gras bits32 zSig;
45582fe8fb19SBen Gras
45592fe8fb19SBen Gras aSig1 = extractFloat128Frac1( a );
45602fe8fb19SBen Gras aSig0 = extractFloat128Frac0( a );
45612fe8fb19SBen Gras aExp = extractFloat128Exp( a );
45622fe8fb19SBen Gras aSign = extractFloat128Sign( a );
45632fe8fb19SBen Gras if ( aExp == 0x7FFF ) {
45642fe8fb19SBen Gras if ( aSig0 | aSig1 ) {
45652fe8fb19SBen Gras return commonNaNToFloat32( float128ToCommonNaN( a ) );
45662fe8fb19SBen Gras }
45672fe8fb19SBen Gras return packFloat32( aSign, 0xFF, 0 );
45682fe8fb19SBen Gras }
45692fe8fb19SBen Gras aSig0 |= ( aSig1 != 0 );
45702fe8fb19SBen Gras shift64RightJamming( aSig0, 18, &aSig0 );
4571f14fb602SLionel Sambuc zSig = (bits32)aSig0;
45722fe8fb19SBen Gras if ( aExp || zSig ) {
45732fe8fb19SBen Gras zSig |= 0x40000000;
45742fe8fb19SBen Gras aExp -= 0x3F81;
45752fe8fb19SBen Gras }
45762fe8fb19SBen Gras return roundAndPackFloat32( aSign, aExp, zSig );
45772fe8fb19SBen Gras
45782fe8fb19SBen Gras }
45792fe8fb19SBen Gras
45802fe8fb19SBen Gras /*
45812fe8fb19SBen Gras -------------------------------------------------------------------------------
45822fe8fb19SBen Gras Returns the result of converting the quadruple-precision floating-point
45832fe8fb19SBen Gras value `a' to the double-precision floating-point format. The conversion
45842fe8fb19SBen Gras is performed according to the IEC/IEEE Standard for Binary Floating-Point
45852fe8fb19SBen Gras Arithmetic.
45862fe8fb19SBen Gras -------------------------------------------------------------------------------
45872fe8fb19SBen Gras */
float128_to_float64(float128 a)45882fe8fb19SBen Gras float64 float128_to_float64( float128 a )
45892fe8fb19SBen Gras {
45902fe8fb19SBen Gras flag aSign;
45912fe8fb19SBen Gras int32 aExp;
45922fe8fb19SBen Gras bits64 aSig0, aSig1;
45932fe8fb19SBen Gras
45942fe8fb19SBen Gras aSig1 = extractFloat128Frac1( a );
45952fe8fb19SBen Gras aSig0 = extractFloat128Frac0( a );
45962fe8fb19SBen Gras aExp = extractFloat128Exp( a );
45972fe8fb19SBen Gras aSign = extractFloat128Sign( a );
45982fe8fb19SBen Gras if ( aExp == 0x7FFF ) {
45992fe8fb19SBen Gras if ( aSig0 | aSig1 ) {
46002fe8fb19SBen Gras return commonNaNToFloat64( float128ToCommonNaN( a ) );
46012fe8fb19SBen Gras }
46022fe8fb19SBen Gras return packFloat64( aSign, 0x7FF, 0 );
46032fe8fb19SBen Gras }
46042fe8fb19SBen Gras shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
46052fe8fb19SBen Gras aSig0 |= ( aSig1 != 0 );
46062fe8fb19SBen Gras if ( aExp || aSig0 ) {
46072fe8fb19SBen Gras aSig0 |= LIT64( 0x4000000000000000 );
46082fe8fb19SBen Gras aExp -= 0x3C01;
46092fe8fb19SBen Gras }
46102fe8fb19SBen Gras return roundAndPackFloat64( aSign, aExp, aSig0 );
46112fe8fb19SBen Gras
46122fe8fb19SBen Gras }
46132fe8fb19SBen Gras
46142fe8fb19SBen Gras #ifdef FLOATX80
46152fe8fb19SBen Gras
46162fe8fb19SBen Gras /*
46172fe8fb19SBen Gras -------------------------------------------------------------------------------
46182fe8fb19SBen Gras Returns the result of converting the quadruple-precision floating-point
46192fe8fb19SBen Gras value `a' to the extended double-precision floating-point format. The
46202fe8fb19SBen Gras conversion is performed according to the IEC/IEEE Standard for Binary
46212fe8fb19SBen Gras Floating-Point Arithmetic.
46222fe8fb19SBen Gras -------------------------------------------------------------------------------
46232fe8fb19SBen Gras */
float128_to_floatx80(float128 a)46242fe8fb19SBen Gras floatx80 float128_to_floatx80( float128 a )
46252fe8fb19SBen Gras {
46262fe8fb19SBen Gras flag aSign;
46272fe8fb19SBen Gras int32 aExp;
46282fe8fb19SBen Gras bits64 aSig0, aSig1;
46292fe8fb19SBen Gras
46302fe8fb19SBen Gras aSig1 = extractFloat128Frac1( a );
46312fe8fb19SBen Gras aSig0 = extractFloat128Frac0( a );
46322fe8fb19SBen Gras aExp = extractFloat128Exp( a );
46332fe8fb19SBen Gras aSign = extractFloat128Sign( a );
46342fe8fb19SBen Gras if ( aExp == 0x7FFF ) {
46352fe8fb19SBen Gras if ( aSig0 | aSig1 ) {
46362fe8fb19SBen Gras return commonNaNToFloatx80( float128ToCommonNaN( a ) );
46372fe8fb19SBen Gras }
46382fe8fb19SBen Gras return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
46392fe8fb19SBen Gras }
46402fe8fb19SBen Gras if ( aExp == 0 ) {
46412fe8fb19SBen Gras if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
46422fe8fb19SBen Gras normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
46432fe8fb19SBen Gras }
46442fe8fb19SBen Gras else {
46452fe8fb19SBen Gras aSig0 |= LIT64( 0x0001000000000000 );
46462fe8fb19SBen Gras }
46472fe8fb19SBen Gras shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
46482fe8fb19SBen Gras return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 );
46492fe8fb19SBen Gras
46502fe8fb19SBen Gras }
46512fe8fb19SBen Gras
46522fe8fb19SBen Gras #endif
46532fe8fb19SBen Gras
46542fe8fb19SBen Gras /*
46552fe8fb19SBen Gras -------------------------------------------------------------------------------
46562fe8fb19SBen Gras Rounds the quadruple-precision floating-point value `a' to an integer, and
46572fe8fb19SBen Gras returns the result as a quadruple-precision floating-point value. The
46582fe8fb19SBen Gras operation is performed according to the IEC/IEEE Standard for Binary
46592fe8fb19SBen Gras Floating-Point Arithmetic.
46602fe8fb19SBen Gras -------------------------------------------------------------------------------
46612fe8fb19SBen Gras */
float128_round_to_int(float128 a)46622fe8fb19SBen Gras float128 float128_round_to_int( float128 a )
46632fe8fb19SBen Gras {
46642fe8fb19SBen Gras flag aSign;
46652fe8fb19SBen Gras int32 aExp;
46662fe8fb19SBen Gras bits64 lastBitMask, roundBitsMask;
46672fe8fb19SBen Gras int8 roundingMode;
46682fe8fb19SBen Gras float128 z;
46692fe8fb19SBen Gras
46702fe8fb19SBen Gras aExp = extractFloat128Exp( a );
46712fe8fb19SBen Gras if ( 0x402F <= aExp ) {
46722fe8fb19SBen Gras if ( 0x406F <= aExp ) {
46732fe8fb19SBen Gras if ( ( aExp == 0x7FFF )
46742fe8fb19SBen Gras && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
46752fe8fb19SBen Gras ) {
46762fe8fb19SBen Gras return propagateFloat128NaN( a, a );
46772fe8fb19SBen Gras }
46782fe8fb19SBen Gras return a;
46792fe8fb19SBen Gras }
46802fe8fb19SBen Gras lastBitMask = 1;
46812fe8fb19SBen Gras lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
46822fe8fb19SBen Gras roundBitsMask = lastBitMask - 1;
46832fe8fb19SBen Gras z = a;
46842fe8fb19SBen Gras roundingMode = float_rounding_mode;
46852fe8fb19SBen Gras if ( roundingMode == float_round_nearest_even ) {
46862fe8fb19SBen Gras if ( lastBitMask ) {
46872fe8fb19SBen Gras add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
46882fe8fb19SBen Gras if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
46892fe8fb19SBen Gras }
46902fe8fb19SBen Gras else {
46912fe8fb19SBen Gras if ( (sbits64) z.low < 0 ) {
46922fe8fb19SBen Gras ++z.high;
46932fe8fb19SBen Gras if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
46942fe8fb19SBen Gras }
46952fe8fb19SBen Gras }
46962fe8fb19SBen Gras }
46972fe8fb19SBen Gras else if ( roundingMode != float_round_to_zero ) {
46982fe8fb19SBen Gras if ( extractFloat128Sign( z )
46992fe8fb19SBen Gras ^ ( roundingMode == float_round_up ) ) {
47002fe8fb19SBen Gras add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
47012fe8fb19SBen Gras }
47022fe8fb19SBen Gras }
47032fe8fb19SBen Gras z.low &= ~ roundBitsMask;
47042fe8fb19SBen Gras }
47052fe8fb19SBen Gras else {
47062fe8fb19SBen Gras if ( aExp < 0x3FFF ) {
47072fe8fb19SBen Gras if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
4708*84d9c625SLionel Sambuc set_float_exception_inexact_flag();
47092fe8fb19SBen Gras aSign = extractFloat128Sign( a );
47102fe8fb19SBen Gras switch ( float_rounding_mode ) {
47112fe8fb19SBen Gras case float_round_nearest_even:
47122fe8fb19SBen Gras if ( ( aExp == 0x3FFE )
47132fe8fb19SBen Gras && ( extractFloat128Frac0( a )
47142fe8fb19SBen Gras | extractFloat128Frac1( a ) )
47152fe8fb19SBen Gras ) {
47162fe8fb19SBen Gras return packFloat128( aSign, 0x3FFF, 0, 0 );
47172fe8fb19SBen Gras }
47182fe8fb19SBen Gras break;
47192fe8fb19SBen Gras case float_round_to_zero:
47202fe8fb19SBen Gras break;
47212fe8fb19SBen Gras case float_round_down:
47222fe8fb19SBen Gras return
47232fe8fb19SBen Gras aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
47242fe8fb19SBen Gras : packFloat128( 0, 0, 0, 0 );
47252fe8fb19SBen Gras case float_round_up:
47262fe8fb19SBen Gras return
47272fe8fb19SBen Gras aSign ? packFloat128( 1, 0, 0, 0 )
47282fe8fb19SBen Gras : packFloat128( 0, 0x3FFF, 0, 0 );
47292fe8fb19SBen Gras }
47302fe8fb19SBen Gras return packFloat128( aSign, 0, 0, 0 );
47312fe8fb19SBen Gras }
47322fe8fb19SBen Gras lastBitMask = 1;
47332fe8fb19SBen Gras lastBitMask <<= 0x402F - aExp;
47342fe8fb19SBen Gras roundBitsMask = lastBitMask - 1;
47352fe8fb19SBen Gras z.low = 0;
47362fe8fb19SBen Gras z.high = a.high;
47372fe8fb19SBen Gras roundingMode = float_rounding_mode;
47382fe8fb19SBen Gras if ( roundingMode == float_round_nearest_even ) {
47392fe8fb19SBen Gras z.high += lastBitMask>>1;
47402fe8fb19SBen Gras if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
47412fe8fb19SBen Gras z.high &= ~ lastBitMask;
47422fe8fb19SBen Gras }
47432fe8fb19SBen Gras }
47442fe8fb19SBen Gras else if ( roundingMode != float_round_to_zero ) {
47452fe8fb19SBen Gras if ( extractFloat128Sign( z )
47462fe8fb19SBen Gras ^ ( roundingMode == float_round_up ) ) {
47472fe8fb19SBen Gras z.high |= ( a.low != 0 );
47482fe8fb19SBen Gras z.high += roundBitsMask;
47492fe8fb19SBen Gras }
47502fe8fb19SBen Gras }
47512fe8fb19SBen Gras z.high &= ~ roundBitsMask;
47522fe8fb19SBen Gras }
47532fe8fb19SBen Gras if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
4754*84d9c625SLionel Sambuc set_float_exception_inexact_flag();
47552fe8fb19SBen Gras }
47562fe8fb19SBen Gras return z;
47572fe8fb19SBen Gras
47582fe8fb19SBen Gras }
47592fe8fb19SBen Gras
47602fe8fb19SBen Gras /*
47612fe8fb19SBen Gras -------------------------------------------------------------------------------
47622fe8fb19SBen Gras Returns the result of adding the absolute values of the quadruple-precision
47632fe8fb19SBen Gras floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
47642fe8fb19SBen Gras before being returned. `zSign' is ignored if the result is a NaN.
47652fe8fb19SBen Gras The addition is performed according to the IEC/IEEE Standard for Binary
47662fe8fb19SBen Gras Floating-Point Arithmetic.
47672fe8fb19SBen Gras -------------------------------------------------------------------------------
47682fe8fb19SBen Gras */
addFloat128Sigs(float128 a,float128 b,flag zSign)47692fe8fb19SBen Gras static float128 addFloat128Sigs( float128 a, float128 b, flag zSign )
47702fe8fb19SBen Gras {
47712fe8fb19SBen Gras int32 aExp, bExp, zExp;
47722fe8fb19SBen Gras bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
47732fe8fb19SBen Gras int32 expDiff;
47742fe8fb19SBen Gras
47752fe8fb19SBen Gras aSig1 = extractFloat128Frac1( a );
47762fe8fb19SBen Gras aSig0 = extractFloat128Frac0( a );
47772fe8fb19SBen Gras aExp = extractFloat128Exp( a );
47782fe8fb19SBen Gras bSig1 = extractFloat128Frac1( b );
47792fe8fb19SBen Gras bSig0 = extractFloat128Frac0( b );
47802fe8fb19SBen Gras bExp = extractFloat128Exp( b );
47812fe8fb19SBen Gras expDiff = aExp - bExp;
47822fe8fb19SBen Gras if ( 0 < expDiff ) {
47832fe8fb19SBen Gras if ( aExp == 0x7FFF ) {
47842fe8fb19SBen Gras if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
47852fe8fb19SBen Gras return a;
47862fe8fb19SBen Gras }
47872fe8fb19SBen Gras if ( bExp == 0 ) {
47882fe8fb19SBen Gras --expDiff;
47892fe8fb19SBen Gras }
47902fe8fb19SBen Gras else {
47912fe8fb19SBen Gras bSig0 |= LIT64( 0x0001000000000000 );
47922fe8fb19SBen Gras }
47932fe8fb19SBen Gras shift128ExtraRightJamming(
47942fe8fb19SBen Gras bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
47952fe8fb19SBen Gras zExp = aExp;
47962fe8fb19SBen Gras }
47972fe8fb19SBen Gras else if ( expDiff < 0 ) {
47982fe8fb19SBen Gras if ( bExp == 0x7FFF ) {
47992fe8fb19SBen Gras if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
48002fe8fb19SBen Gras return packFloat128( zSign, 0x7FFF, 0, 0 );
48012fe8fb19SBen Gras }
48022fe8fb19SBen Gras if ( aExp == 0 ) {
48032fe8fb19SBen Gras ++expDiff;
48042fe8fb19SBen Gras }
48052fe8fb19SBen Gras else {
48062fe8fb19SBen Gras aSig0 |= LIT64( 0x0001000000000000 );
48072fe8fb19SBen Gras }
48082fe8fb19SBen Gras shift128ExtraRightJamming(
48092fe8fb19SBen Gras aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
48102fe8fb19SBen Gras zExp = bExp;
48112fe8fb19SBen Gras }
48122fe8fb19SBen Gras else {
48132fe8fb19SBen Gras if ( aExp == 0x7FFF ) {
48142fe8fb19SBen Gras if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
48152fe8fb19SBen Gras return propagateFloat128NaN( a, b );
48162fe8fb19SBen Gras }
48172fe8fb19SBen Gras return a;
48182fe8fb19SBen Gras }
48192fe8fb19SBen Gras add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
48202fe8fb19SBen Gras if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
48212fe8fb19SBen Gras zSig2 = 0;
48222fe8fb19SBen Gras zSig0 |= LIT64( 0x0002000000000000 );
48232fe8fb19SBen Gras zExp = aExp;
48242fe8fb19SBen Gras goto shiftRight1;
48252fe8fb19SBen Gras }
48262fe8fb19SBen Gras aSig0 |= LIT64( 0x0001000000000000 );
48272fe8fb19SBen Gras add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
48282fe8fb19SBen Gras --zExp;
48292fe8fb19SBen Gras if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
48302fe8fb19SBen Gras ++zExp;
48312fe8fb19SBen Gras shiftRight1:
48322fe8fb19SBen Gras shift128ExtraRightJamming(
48332fe8fb19SBen Gras zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
48342fe8fb19SBen Gras roundAndPack:
48352fe8fb19SBen Gras return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
48362fe8fb19SBen Gras
48372fe8fb19SBen Gras }
48382fe8fb19SBen Gras
48392fe8fb19SBen Gras /*
48402fe8fb19SBen Gras -------------------------------------------------------------------------------
48412fe8fb19SBen Gras Returns the result of subtracting the absolute values of the quadruple-
48422fe8fb19SBen Gras precision floating-point values `a' and `b'. If `zSign' is 1, the
48432fe8fb19SBen Gras difference is negated before being returned. `zSign' is ignored if the
48442fe8fb19SBen Gras result is a NaN. The subtraction is performed according to the IEC/IEEE
48452fe8fb19SBen Gras Standard for Binary Floating-Point Arithmetic.
48462fe8fb19SBen Gras -------------------------------------------------------------------------------
48472fe8fb19SBen Gras */
subFloat128Sigs(float128 a,float128 b,flag zSign)48482fe8fb19SBen Gras static float128 subFloat128Sigs( float128 a, float128 b, flag zSign )
48492fe8fb19SBen Gras {
48502fe8fb19SBen Gras int32 aExp, bExp, zExp;
48512fe8fb19SBen Gras bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
48522fe8fb19SBen Gras int32 expDiff;
48532fe8fb19SBen Gras float128 z;
48542fe8fb19SBen Gras
48552fe8fb19SBen Gras aSig1 = extractFloat128Frac1( a );
48562fe8fb19SBen Gras aSig0 = extractFloat128Frac0( a );
48572fe8fb19SBen Gras aExp = extractFloat128Exp( a );
48582fe8fb19SBen Gras bSig1 = extractFloat128Frac1( b );
48592fe8fb19SBen Gras bSig0 = extractFloat128Frac0( b );
48602fe8fb19SBen Gras bExp = extractFloat128Exp( b );
48612fe8fb19SBen Gras expDiff = aExp - bExp;
48622fe8fb19SBen Gras shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
48632fe8fb19SBen Gras shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
48642fe8fb19SBen Gras if ( 0 < expDiff ) goto aExpBigger;
48652fe8fb19SBen Gras if ( expDiff < 0 ) goto bExpBigger;
48662fe8fb19SBen Gras if ( aExp == 0x7FFF ) {
48672fe8fb19SBen Gras if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
48682fe8fb19SBen Gras return propagateFloat128NaN( a, b );
48692fe8fb19SBen Gras }
48702fe8fb19SBen Gras float_raise( float_flag_invalid );
48712fe8fb19SBen Gras z.low = float128_default_nan_low;
48722fe8fb19SBen Gras z.high = float128_default_nan_high;
48732fe8fb19SBen Gras return z;
48742fe8fb19SBen Gras }
48752fe8fb19SBen Gras if ( aExp == 0 ) {
48762fe8fb19SBen Gras aExp = 1;
48772fe8fb19SBen Gras bExp = 1;
48782fe8fb19SBen Gras }
48792fe8fb19SBen Gras if ( bSig0 < aSig0 ) goto aBigger;
48802fe8fb19SBen Gras if ( aSig0 < bSig0 ) goto bBigger;
48812fe8fb19SBen Gras if ( bSig1 < aSig1 ) goto aBigger;
48822fe8fb19SBen Gras if ( aSig1 < bSig1 ) goto bBigger;
48832fe8fb19SBen Gras return packFloat128( float_rounding_mode == float_round_down, 0, 0, 0 );
48842fe8fb19SBen Gras bExpBigger:
48852fe8fb19SBen Gras if ( bExp == 0x7FFF ) {
48862fe8fb19SBen Gras if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
48872fe8fb19SBen Gras return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
48882fe8fb19SBen Gras }
48892fe8fb19SBen Gras if ( aExp == 0 ) {
48902fe8fb19SBen Gras ++expDiff;
48912fe8fb19SBen Gras }
48922fe8fb19SBen Gras else {
48932fe8fb19SBen Gras aSig0 |= LIT64( 0x4000000000000000 );
48942fe8fb19SBen Gras }
48952fe8fb19SBen Gras shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
48962fe8fb19SBen Gras bSig0 |= LIT64( 0x4000000000000000 );
48972fe8fb19SBen Gras bBigger:
48982fe8fb19SBen Gras sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
48992fe8fb19SBen Gras zExp = bExp;
49002fe8fb19SBen Gras zSign ^= 1;
49012fe8fb19SBen Gras goto normalizeRoundAndPack;
49022fe8fb19SBen Gras aExpBigger:
49032fe8fb19SBen Gras if ( aExp == 0x7FFF ) {
49042fe8fb19SBen Gras if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
49052fe8fb19SBen Gras return a;
49062fe8fb19SBen Gras }
49072fe8fb19SBen Gras if ( bExp == 0 ) {
49082fe8fb19SBen Gras --expDiff;
49092fe8fb19SBen Gras }
49102fe8fb19SBen Gras else {
49112fe8fb19SBen Gras bSig0 |= LIT64( 0x4000000000000000 );
49122fe8fb19SBen Gras }
49132fe8fb19SBen Gras shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
49142fe8fb19SBen Gras aSig0 |= LIT64( 0x4000000000000000 );
49152fe8fb19SBen Gras aBigger:
49162fe8fb19SBen Gras sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
49172fe8fb19SBen Gras zExp = aExp;
49182fe8fb19SBen Gras normalizeRoundAndPack:
49192fe8fb19SBen Gras --zExp;
49202fe8fb19SBen Gras return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 );
49212fe8fb19SBen Gras
49222fe8fb19SBen Gras }
49232fe8fb19SBen Gras
49242fe8fb19SBen Gras /*
49252fe8fb19SBen Gras -------------------------------------------------------------------------------
49262fe8fb19SBen Gras Returns the result of adding the quadruple-precision floating-point values
49272fe8fb19SBen Gras `a' and `b'. The operation is performed according to the IEC/IEEE Standard
49282fe8fb19SBen Gras for Binary Floating-Point Arithmetic.
49292fe8fb19SBen Gras -------------------------------------------------------------------------------
49302fe8fb19SBen Gras */
float128_add(float128 a,float128 b)49312fe8fb19SBen Gras float128 float128_add( float128 a, float128 b )
49322fe8fb19SBen Gras {
49332fe8fb19SBen Gras flag aSign, bSign;
49342fe8fb19SBen Gras
49352fe8fb19SBen Gras aSign = extractFloat128Sign( a );
49362fe8fb19SBen Gras bSign = extractFloat128Sign( b );
49372fe8fb19SBen Gras if ( aSign == bSign ) {
49382fe8fb19SBen Gras return addFloat128Sigs( a, b, aSign );
49392fe8fb19SBen Gras }
49402fe8fb19SBen Gras else {
49412fe8fb19SBen Gras return subFloat128Sigs( a, b, aSign );
49422fe8fb19SBen Gras }
49432fe8fb19SBen Gras
49442fe8fb19SBen Gras }
49452fe8fb19SBen Gras
49462fe8fb19SBen Gras /*
49472fe8fb19SBen Gras -------------------------------------------------------------------------------
49482fe8fb19SBen Gras Returns the result of subtracting the quadruple-precision floating-point
49492fe8fb19SBen Gras values `a' and `b'. The operation is performed according to the IEC/IEEE
49502fe8fb19SBen Gras Standard for Binary Floating-Point Arithmetic.
49512fe8fb19SBen Gras -------------------------------------------------------------------------------
49522fe8fb19SBen Gras */
float128_sub(float128 a,float128 b)49532fe8fb19SBen Gras float128 float128_sub( float128 a, float128 b )
49542fe8fb19SBen Gras {
49552fe8fb19SBen Gras flag aSign, bSign;
49562fe8fb19SBen Gras
49572fe8fb19SBen Gras aSign = extractFloat128Sign( a );
49582fe8fb19SBen Gras bSign = extractFloat128Sign( b );
49592fe8fb19SBen Gras if ( aSign == bSign ) {
49602fe8fb19SBen Gras return subFloat128Sigs( a, b, aSign );
49612fe8fb19SBen Gras }
49622fe8fb19SBen Gras else {
49632fe8fb19SBen Gras return addFloat128Sigs( a, b, aSign );
49642fe8fb19SBen Gras }
49652fe8fb19SBen Gras
49662fe8fb19SBen Gras }
49672fe8fb19SBen Gras
49682fe8fb19SBen Gras /*
49692fe8fb19SBen Gras -------------------------------------------------------------------------------
49702fe8fb19SBen Gras Returns the result of multiplying the quadruple-precision floating-point
49712fe8fb19SBen Gras values `a' and `b'. The operation is performed according to the IEC/IEEE
49722fe8fb19SBen Gras Standard for Binary Floating-Point Arithmetic.
49732fe8fb19SBen Gras -------------------------------------------------------------------------------
49742fe8fb19SBen Gras */
float128_mul(float128 a,float128 b)49752fe8fb19SBen Gras float128 float128_mul( float128 a, float128 b )
49762fe8fb19SBen Gras {
49772fe8fb19SBen Gras flag aSign, bSign, zSign;
49782fe8fb19SBen Gras int32 aExp, bExp, zExp;
49792fe8fb19SBen Gras bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
49802fe8fb19SBen Gras float128 z;
49812fe8fb19SBen Gras
49822fe8fb19SBen Gras aSig1 = extractFloat128Frac1( a );
49832fe8fb19SBen Gras aSig0 = extractFloat128Frac0( a );
49842fe8fb19SBen Gras aExp = extractFloat128Exp( a );
49852fe8fb19SBen Gras aSign = extractFloat128Sign( a );
49862fe8fb19SBen Gras bSig1 = extractFloat128Frac1( b );
49872fe8fb19SBen Gras bSig0 = extractFloat128Frac0( b );
49882fe8fb19SBen Gras bExp = extractFloat128Exp( b );
49892fe8fb19SBen Gras bSign = extractFloat128Sign( b );
49902fe8fb19SBen Gras zSign = aSign ^ bSign;
49912fe8fb19SBen Gras if ( aExp == 0x7FFF ) {
49922fe8fb19SBen Gras if ( ( aSig0 | aSig1 )
49932fe8fb19SBen Gras || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
49942fe8fb19SBen Gras return propagateFloat128NaN( a, b );
49952fe8fb19SBen Gras }
49962fe8fb19SBen Gras if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
49972fe8fb19SBen Gras return packFloat128( zSign, 0x7FFF, 0, 0 );
49982fe8fb19SBen Gras }
49992fe8fb19SBen Gras if ( bExp == 0x7FFF ) {
50002fe8fb19SBen Gras if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
50012fe8fb19SBen Gras if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
50022fe8fb19SBen Gras invalid:
50032fe8fb19SBen Gras float_raise( float_flag_invalid );
50042fe8fb19SBen Gras z.low = float128_default_nan_low;
50052fe8fb19SBen Gras z.high = float128_default_nan_high;
50062fe8fb19SBen Gras return z;
50072fe8fb19SBen Gras }
50082fe8fb19SBen Gras return packFloat128( zSign, 0x7FFF, 0, 0 );
50092fe8fb19SBen Gras }
50102fe8fb19SBen Gras if ( aExp == 0 ) {
50112fe8fb19SBen Gras if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
50122fe8fb19SBen Gras normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
50132fe8fb19SBen Gras }
50142fe8fb19SBen Gras if ( bExp == 0 ) {
50152fe8fb19SBen Gras if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
50162fe8fb19SBen Gras normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
50172fe8fb19SBen Gras }
50182fe8fb19SBen Gras zExp = aExp + bExp - 0x4000;
50192fe8fb19SBen Gras aSig0 |= LIT64( 0x0001000000000000 );
50202fe8fb19SBen Gras shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
50212fe8fb19SBen Gras mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
50222fe8fb19SBen Gras add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
50232fe8fb19SBen Gras zSig2 |= ( zSig3 != 0 );
50242fe8fb19SBen Gras if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
50252fe8fb19SBen Gras shift128ExtraRightJamming(
50262fe8fb19SBen Gras zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
50272fe8fb19SBen Gras ++zExp;
50282fe8fb19SBen Gras }
50292fe8fb19SBen Gras return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
50302fe8fb19SBen Gras
50312fe8fb19SBen Gras }
50322fe8fb19SBen Gras
50332fe8fb19SBen Gras /*
50342fe8fb19SBen Gras -------------------------------------------------------------------------------
50352fe8fb19SBen Gras Returns the result of dividing the quadruple-precision floating-point value
50362fe8fb19SBen Gras `a' by the corresponding value `b'. The operation is performed according to
50372fe8fb19SBen Gras the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
50382fe8fb19SBen Gras -------------------------------------------------------------------------------
50392fe8fb19SBen Gras */
float128_div(float128 a,float128 b)50402fe8fb19SBen Gras float128 float128_div( float128 a, float128 b )
50412fe8fb19SBen Gras {
50422fe8fb19SBen Gras flag aSign, bSign, zSign;
50432fe8fb19SBen Gras int32 aExp, bExp, zExp;
50442fe8fb19SBen Gras bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
50452fe8fb19SBen Gras bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
50462fe8fb19SBen Gras float128 z;
50472fe8fb19SBen Gras
50482fe8fb19SBen Gras aSig1 = extractFloat128Frac1( a );
50492fe8fb19SBen Gras aSig0 = extractFloat128Frac0( a );
50502fe8fb19SBen Gras aExp = extractFloat128Exp( a );
50512fe8fb19SBen Gras aSign = extractFloat128Sign( a );
50522fe8fb19SBen Gras bSig1 = extractFloat128Frac1( b );
50532fe8fb19SBen Gras bSig0 = extractFloat128Frac0( b );
50542fe8fb19SBen Gras bExp = extractFloat128Exp( b );
50552fe8fb19SBen Gras bSign = extractFloat128Sign( b );
50562fe8fb19SBen Gras zSign = aSign ^ bSign;
50572fe8fb19SBen Gras if ( aExp == 0x7FFF ) {
50582fe8fb19SBen Gras if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
50592fe8fb19SBen Gras if ( bExp == 0x7FFF ) {
50602fe8fb19SBen Gras if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
50612fe8fb19SBen Gras goto invalid;
50622fe8fb19SBen Gras }
50632fe8fb19SBen Gras return packFloat128( zSign, 0x7FFF, 0, 0 );
50642fe8fb19SBen Gras }
50652fe8fb19SBen Gras if ( bExp == 0x7FFF ) {
50662fe8fb19SBen Gras if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
50672fe8fb19SBen Gras return packFloat128( zSign, 0, 0, 0 );
50682fe8fb19SBen Gras }
50692fe8fb19SBen Gras if ( bExp == 0 ) {
50702fe8fb19SBen Gras if ( ( bSig0 | bSig1 ) == 0 ) {
50712fe8fb19SBen Gras if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
50722fe8fb19SBen Gras invalid:
50732fe8fb19SBen Gras float_raise( float_flag_invalid );
50742fe8fb19SBen Gras z.low = float128_default_nan_low;
50752fe8fb19SBen Gras z.high = float128_default_nan_high;
50762fe8fb19SBen Gras return z;
50772fe8fb19SBen Gras }
50782fe8fb19SBen Gras float_raise( float_flag_divbyzero );
50792fe8fb19SBen Gras return packFloat128( zSign, 0x7FFF, 0, 0 );
50802fe8fb19SBen Gras }
50812fe8fb19SBen Gras normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
50822fe8fb19SBen Gras }
50832fe8fb19SBen Gras if ( aExp == 0 ) {
50842fe8fb19SBen Gras if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
50852fe8fb19SBen Gras normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
50862fe8fb19SBen Gras }
50872fe8fb19SBen Gras zExp = aExp - bExp + 0x3FFD;
50882fe8fb19SBen Gras shortShift128Left(
50892fe8fb19SBen Gras aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
50902fe8fb19SBen Gras shortShift128Left(
50912fe8fb19SBen Gras bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
50922fe8fb19SBen Gras if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
50932fe8fb19SBen Gras shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
50942fe8fb19SBen Gras ++zExp;
50952fe8fb19SBen Gras }
50962fe8fb19SBen Gras zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
50972fe8fb19SBen Gras mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
50982fe8fb19SBen Gras sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
50992fe8fb19SBen Gras while ( (sbits64) rem0 < 0 ) {
51002fe8fb19SBen Gras --zSig0;
51012fe8fb19SBen Gras add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
51022fe8fb19SBen Gras }
51032fe8fb19SBen Gras zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
51042fe8fb19SBen Gras if ( ( zSig1 & 0x3FFF ) <= 4 ) {
51052fe8fb19SBen Gras mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
51062fe8fb19SBen Gras sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
51072fe8fb19SBen Gras while ( (sbits64) rem1 < 0 ) {
51082fe8fb19SBen Gras --zSig1;
51092fe8fb19SBen Gras add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
51102fe8fb19SBen Gras }
51112fe8fb19SBen Gras zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
51122fe8fb19SBen Gras }
51132fe8fb19SBen Gras shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
51142fe8fb19SBen Gras return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
51152fe8fb19SBen Gras
51162fe8fb19SBen Gras }
51172fe8fb19SBen Gras
51182fe8fb19SBen Gras /*
51192fe8fb19SBen Gras -------------------------------------------------------------------------------
51202fe8fb19SBen Gras Returns the remainder of the quadruple-precision floating-point value `a'
51212fe8fb19SBen Gras with respect to the corresponding value `b'. The operation is performed
51222fe8fb19SBen Gras according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
51232fe8fb19SBen Gras -------------------------------------------------------------------------------
51242fe8fb19SBen Gras */
float128_rem(float128 a,float128 b)51252fe8fb19SBen Gras float128 float128_rem( float128 a, float128 b )
51262fe8fb19SBen Gras {
5127f14fb602SLionel Sambuc flag aSign, zSign;
51282fe8fb19SBen Gras int32 aExp, bExp, expDiff;
51292fe8fb19SBen Gras bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
51302fe8fb19SBen Gras bits64 allZero, alternateASig0, alternateASig1, sigMean1;
51312fe8fb19SBen Gras sbits64 sigMean0;
51322fe8fb19SBen Gras float128 z;
51332fe8fb19SBen Gras
51342fe8fb19SBen Gras aSig1 = extractFloat128Frac1( a );
51352fe8fb19SBen Gras aSig0 = extractFloat128Frac0( a );
51362fe8fb19SBen Gras aExp = extractFloat128Exp( a );
51372fe8fb19SBen Gras aSign = extractFloat128Sign( a );
51382fe8fb19SBen Gras bSig1 = extractFloat128Frac1( b );
51392fe8fb19SBen Gras bSig0 = extractFloat128Frac0( b );
51402fe8fb19SBen Gras bExp = extractFloat128Exp( b );
51412fe8fb19SBen Gras if ( aExp == 0x7FFF ) {
51422fe8fb19SBen Gras if ( ( aSig0 | aSig1 )
51432fe8fb19SBen Gras || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
51442fe8fb19SBen Gras return propagateFloat128NaN( a, b );
51452fe8fb19SBen Gras }
51462fe8fb19SBen Gras goto invalid;
51472fe8fb19SBen Gras }
51482fe8fb19SBen Gras if ( bExp == 0x7FFF ) {
51492fe8fb19SBen Gras if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
51502fe8fb19SBen Gras return a;
51512fe8fb19SBen Gras }
51522fe8fb19SBen Gras if ( bExp == 0 ) {
51532fe8fb19SBen Gras if ( ( bSig0 | bSig1 ) == 0 ) {
51542fe8fb19SBen Gras invalid:
51552fe8fb19SBen Gras float_raise( float_flag_invalid );
51562fe8fb19SBen Gras z.low = float128_default_nan_low;
51572fe8fb19SBen Gras z.high = float128_default_nan_high;
51582fe8fb19SBen Gras return z;
51592fe8fb19SBen Gras }
51602fe8fb19SBen Gras normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
51612fe8fb19SBen Gras }
51622fe8fb19SBen Gras if ( aExp == 0 ) {
51632fe8fb19SBen Gras if ( ( aSig0 | aSig1 ) == 0 ) return a;
51642fe8fb19SBen Gras normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
51652fe8fb19SBen Gras }
51662fe8fb19SBen Gras expDiff = aExp - bExp;
51672fe8fb19SBen Gras if ( expDiff < -1 ) return a;
51682fe8fb19SBen Gras shortShift128Left(
51692fe8fb19SBen Gras aSig0 | LIT64( 0x0001000000000000 ),
51702fe8fb19SBen Gras aSig1,
51712fe8fb19SBen Gras 15 - ( expDiff < 0 ),
51722fe8fb19SBen Gras &aSig0,
51732fe8fb19SBen Gras &aSig1
51742fe8fb19SBen Gras );
51752fe8fb19SBen Gras shortShift128Left(
51762fe8fb19SBen Gras bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
51772fe8fb19SBen Gras q = le128( bSig0, bSig1, aSig0, aSig1 );
51782fe8fb19SBen Gras if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
51792fe8fb19SBen Gras expDiff -= 64;
51802fe8fb19SBen Gras while ( 0 < expDiff ) {
51812fe8fb19SBen Gras q = estimateDiv128To64( aSig0, aSig1, bSig0 );
51822fe8fb19SBen Gras q = ( 4 < q ) ? q - 4 : 0;
51832fe8fb19SBen Gras mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
51842fe8fb19SBen Gras shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
51852fe8fb19SBen Gras shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
51862fe8fb19SBen Gras sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
51872fe8fb19SBen Gras expDiff -= 61;
51882fe8fb19SBen Gras }
51892fe8fb19SBen Gras if ( -64 < expDiff ) {
51902fe8fb19SBen Gras q = estimateDiv128To64( aSig0, aSig1, bSig0 );
51912fe8fb19SBen Gras q = ( 4 < q ) ? q - 4 : 0;
51922fe8fb19SBen Gras q >>= - expDiff;
51932fe8fb19SBen Gras shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
51942fe8fb19SBen Gras expDiff += 52;
51952fe8fb19SBen Gras if ( expDiff < 0 ) {
51962fe8fb19SBen Gras shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
51972fe8fb19SBen Gras }
51982fe8fb19SBen Gras else {
51992fe8fb19SBen Gras shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
52002fe8fb19SBen Gras }
52012fe8fb19SBen Gras mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
52022fe8fb19SBen Gras sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
52032fe8fb19SBen Gras }
52042fe8fb19SBen Gras else {
52052fe8fb19SBen Gras shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
52062fe8fb19SBen Gras shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
52072fe8fb19SBen Gras }
52082fe8fb19SBen Gras do {
52092fe8fb19SBen Gras alternateASig0 = aSig0;
52102fe8fb19SBen Gras alternateASig1 = aSig1;
52112fe8fb19SBen Gras ++q;
52122fe8fb19SBen Gras sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
52132fe8fb19SBen Gras } while ( 0 <= (sbits64) aSig0 );
52142fe8fb19SBen Gras add128(
52152fe8fb19SBen Gras aSig0, aSig1, alternateASig0, alternateASig1, (bits64 *)&sigMean0, &sigMean1 );
52162fe8fb19SBen Gras if ( ( sigMean0 < 0 )
52172fe8fb19SBen Gras || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
52182fe8fb19SBen Gras aSig0 = alternateASig0;
52192fe8fb19SBen Gras aSig1 = alternateASig1;
52202fe8fb19SBen Gras }
52212fe8fb19SBen Gras zSign = ( (sbits64) aSig0 < 0 );
52222fe8fb19SBen Gras if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
52232fe8fb19SBen Gras return
52242fe8fb19SBen Gras normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
52252fe8fb19SBen Gras
52262fe8fb19SBen Gras }
52272fe8fb19SBen Gras
52282fe8fb19SBen Gras /*
52292fe8fb19SBen Gras -------------------------------------------------------------------------------
52302fe8fb19SBen Gras Returns the square root of the quadruple-precision floating-point value `a'.
52312fe8fb19SBen Gras The operation is performed according to the IEC/IEEE Standard for Binary
52322fe8fb19SBen Gras Floating-Point Arithmetic.
52332fe8fb19SBen Gras -------------------------------------------------------------------------------
52342fe8fb19SBen Gras */
float128_sqrt(float128 a)52352fe8fb19SBen Gras float128 float128_sqrt( float128 a )
52362fe8fb19SBen Gras {
52372fe8fb19SBen Gras flag aSign;
52382fe8fb19SBen Gras int32 aExp, zExp;
52392fe8fb19SBen Gras bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
52402fe8fb19SBen Gras bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
52412fe8fb19SBen Gras float128 z;
52422fe8fb19SBen Gras
52432fe8fb19SBen Gras aSig1 = extractFloat128Frac1( a );
52442fe8fb19SBen Gras aSig0 = extractFloat128Frac0( a );
52452fe8fb19SBen Gras aExp = extractFloat128Exp( a );
52462fe8fb19SBen Gras aSign = extractFloat128Sign( a );
52472fe8fb19SBen Gras if ( aExp == 0x7FFF ) {
52482fe8fb19SBen Gras if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a );
52492fe8fb19SBen Gras if ( ! aSign ) return a;
52502fe8fb19SBen Gras goto invalid;
52512fe8fb19SBen Gras }
52522fe8fb19SBen Gras if ( aSign ) {
52532fe8fb19SBen Gras if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
52542fe8fb19SBen Gras invalid:
52552fe8fb19SBen Gras float_raise( float_flag_invalid );
52562fe8fb19SBen Gras z.low = float128_default_nan_low;
52572fe8fb19SBen Gras z.high = float128_default_nan_high;
52582fe8fb19SBen Gras return z;
52592fe8fb19SBen Gras }
52602fe8fb19SBen Gras if ( aExp == 0 ) {
52612fe8fb19SBen Gras if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
52622fe8fb19SBen Gras normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
52632fe8fb19SBen Gras }
5264*84d9c625SLionel Sambuc zExp = (int32) ( (aExp - 0x3FFF) >> 1) + 0x3FFE;
52652fe8fb19SBen Gras aSig0 |= LIT64( 0x0001000000000000 );
5266f14fb602SLionel Sambuc zSig0 = estimateSqrt32((int16)aExp, (bits32)(aSig0>>17));
52672fe8fb19SBen Gras shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
52682fe8fb19SBen Gras zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
52692fe8fb19SBen Gras doubleZSig0 = zSig0<<1;
52702fe8fb19SBen Gras mul64To128( zSig0, zSig0, &term0, &term1 );
52712fe8fb19SBen Gras sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
52722fe8fb19SBen Gras while ( (sbits64) rem0 < 0 ) {
52732fe8fb19SBen Gras --zSig0;
52742fe8fb19SBen Gras doubleZSig0 -= 2;
52752fe8fb19SBen Gras add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
52762fe8fb19SBen Gras }
52772fe8fb19SBen Gras zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
52782fe8fb19SBen Gras if ( ( zSig1 & 0x1FFF ) <= 5 ) {
52792fe8fb19SBen Gras if ( zSig1 == 0 ) zSig1 = 1;
52802fe8fb19SBen Gras mul64To128( doubleZSig0, zSig1, &term1, &term2 );
52812fe8fb19SBen Gras sub128( rem1, 0, term1, term2, &rem1, &rem2 );
52822fe8fb19SBen Gras mul64To128( zSig1, zSig1, &term2, &term3 );
52832fe8fb19SBen Gras sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
52842fe8fb19SBen Gras while ( (sbits64) rem1 < 0 ) {
52852fe8fb19SBen Gras --zSig1;
52862fe8fb19SBen Gras shortShift128Left( 0, zSig1, 1, &term2, &term3 );
52872fe8fb19SBen Gras term3 |= 1;
52882fe8fb19SBen Gras term2 |= doubleZSig0;
52892fe8fb19SBen Gras add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
52902fe8fb19SBen Gras }
52912fe8fb19SBen Gras zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
52922fe8fb19SBen Gras }
52932fe8fb19SBen Gras shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
52942fe8fb19SBen Gras return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 );
52952fe8fb19SBen Gras
52962fe8fb19SBen Gras }
52972fe8fb19SBen Gras
52982fe8fb19SBen Gras /*
52992fe8fb19SBen Gras -------------------------------------------------------------------------------
53002fe8fb19SBen Gras Returns 1 if the quadruple-precision floating-point value `a' is equal to
53012fe8fb19SBen Gras the corresponding value `b', and 0 otherwise. The comparison is performed
53022fe8fb19SBen Gras according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
53032fe8fb19SBen Gras -------------------------------------------------------------------------------
53042fe8fb19SBen Gras */
float128_eq(float128 a,float128 b)53052fe8fb19SBen Gras flag float128_eq( float128 a, float128 b )
53062fe8fb19SBen Gras {
53072fe8fb19SBen Gras
53082fe8fb19SBen Gras if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
53092fe8fb19SBen Gras && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
53102fe8fb19SBen Gras || ( ( extractFloat128Exp( b ) == 0x7FFF )
53112fe8fb19SBen Gras && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
53122fe8fb19SBen Gras ) {
53132fe8fb19SBen Gras if ( float128_is_signaling_nan( a )
53142fe8fb19SBen Gras || float128_is_signaling_nan( b ) ) {
53152fe8fb19SBen Gras float_raise( float_flag_invalid );
53162fe8fb19SBen Gras }
53172fe8fb19SBen Gras return 0;
53182fe8fb19SBen Gras }
53192fe8fb19SBen Gras return
53202fe8fb19SBen Gras ( a.low == b.low )
53212fe8fb19SBen Gras && ( ( a.high == b.high )
53222fe8fb19SBen Gras || ( ( a.low == 0 )
53232fe8fb19SBen Gras && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
53242fe8fb19SBen Gras );
53252fe8fb19SBen Gras
53262fe8fb19SBen Gras }
53272fe8fb19SBen Gras
53282fe8fb19SBen Gras /*
53292fe8fb19SBen Gras -------------------------------------------------------------------------------
53302fe8fb19SBen Gras Returns 1 if the quadruple-precision floating-point value `a' is less than
53312fe8fb19SBen Gras or equal to the corresponding value `b', and 0 otherwise. The comparison
53322fe8fb19SBen Gras is performed according to the IEC/IEEE Standard for Binary Floating-Point
53332fe8fb19SBen Gras Arithmetic.
53342fe8fb19SBen Gras -------------------------------------------------------------------------------
53352fe8fb19SBen Gras */
float128_le(float128 a,float128 b)53362fe8fb19SBen Gras flag float128_le( float128 a, float128 b )
53372fe8fb19SBen Gras {
53382fe8fb19SBen Gras flag aSign, bSign;
53392fe8fb19SBen Gras
53402fe8fb19SBen Gras if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
53412fe8fb19SBen Gras && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
53422fe8fb19SBen Gras || ( ( extractFloat128Exp( b ) == 0x7FFF )
53432fe8fb19SBen Gras && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
53442fe8fb19SBen Gras ) {
53452fe8fb19SBen Gras float_raise( float_flag_invalid );
53462fe8fb19SBen Gras return 0;
53472fe8fb19SBen Gras }
53482fe8fb19SBen Gras aSign = extractFloat128Sign( a );
53492fe8fb19SBen Gras bSign = extractFloat128Sign( b );
53502fe8fb19SBen Gras if ( aSign != bSign ) {
53512fe8fb19SBen Gras return
53522fe8fb19SBen Gras aSign
53532fe8fb19SBen Gras || ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
53542fe8fb19SBen Gras == 0 );
53552fe8fb19SBen Gras }
53562fe8fb19SBen Gras return
53572fe8fb19SBen Gras aSign ? le128( b.high, b.low, a.high, a.low )
53582fe8fb19SBen Gras : le128( a.high, a.low, b.high, b.low );
53592fe8fb19SBen Gras
53602fe8fb19SBen Gras }
53612fe8fb19SBen Gras
53622fe8fb19SBen Gras /*
53632fe8fb19SBen Gras -------------------------------------------------------------------------------
53642fe8fb19SBen Gras Returns 1 if the quadruple-precision floating-point value `a' is less than
53652fe8fb19SBen Gras the corresponding value `b', and 0 otherwise. The comparison is performed
53662fe8fb19SBen Gras according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
53672fe8fb19SBen Gras -------------------------------------------------------------------------------
53682fe8fb19SBen Gras */
float128_lt(float128 a,float128 b)53692fe8fb19SBen Gras flag float128_lt( float128 a, float128 b )
53702fe8fb19SBen Gras {
53712fe8fb19SBen Gras flag aSign, bSign;
53722fe8fb19SBen Gras
53732fe8fb19SBen Gras if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
53742fe8fb19SBen Gras && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
53752fe8fb19SBen Gras || ( ( extractFloat128Exp( b ) == 0x7FFF )
53762fe8fb19SBen Gras && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
53772fe8fb19SBen Gras ) {
53782fe8fb19SBen Gras float_raise( float_flag_invalid );
53792fe8fb19SBen Gras return 0;
53802fe8fb19SBen Gras }
53812fe8fb19SBen Gras aSign = extractFloat128Sign( a );
53822fe8fb19SBen Gras bSign = extractFloat128Sign( b );
53832fe8fb19SBen Gras if ( aSign != bSign ) {
53842fe8fb19SBen Gras return
53852fe8fb19SBen Gras aSign
53862fe8fb19SBen Gras && ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
53872fe8fb19SBen Gras != 0 );
53882fe8fb19SBen Gras }
53892fe8fb19SBen Gras return
53902fe8fb19SBen Gras aSign ? lt128( b.high, b.low, a.high, a.low )
53912fe8fb19SBen Gras : lt128( a.high, a.low, b.high, b.low );
53922fe8fb19SBen Gras
53932fe8fb19SBen Gras }
53942fe8fb19SBen Gras
53952fe8fb19SBen Gras /*
53962fe8fb19SBen Gras -------------------------------------------------------------------------------
53972fe8fb19SBen Gras Returns 1 if the quadruple-precision floating-point value `a' is equal to
53982fe8fb19SBen Gras the corresponding value `b', and 0 otherwise. The invalid exception is
53992fe8fb19SBen Gras raised if either operand is a NaN. Otherwise, the comparison is performed
54002fe8fb19SBen Gras according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
54012fe8fb19SBen Gras -------------------------------------------------------------------------------
54022fe8fb19SBen Gras */
float128_eq_signaling(float128 a,float128 b)54032fe8fb19SBen Gras flag float128_eq_signaling( float128 a, float128 b )
54042fe8fb19SBen Gras {
54052fe8fb19SBen Gras
54062fe8fb19SBen Gras if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
54072fe8fb19SBen Gras && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
54082fe8fb19SBen Gras || ( ( extractFloat128Exp( b ) == 0x7FFF )
54092fe8fb19SBen Gras && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
54102fe8fb19SBen Gras ) {
54112fe8fb19SBen Gras float_raise( float_flag_invalid );
54122fe8fb19SBen Gras return 0;
54132fe8fb19SBen Gras }
54142fe8fb19SBen Gras return
54152fe8fb19SBen Gras ( a.low == b.low )
54162fe8fb19SBen Gras && ( ( a.high == b.high )
54172fe8fb19SBen Gras || ( ( a.low == 0 )
54182fe8fb19SBen Gras && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
54192fe8fb19SBen Gras );
54202fe8fb19SBen Gras
54212fe8fb19SBen Gras }
54222fe8fb19SBen Gras
54232fe8fb19SBen Gras /*
54242fe8fb19SBen Gras -------------------------------------------------------------------------------
54252fe8fb19SBen Gras Returns 1 if the quadruple-precision floating-point value `a' is less than
54262fe8fb19SBen Gras or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
54272fe8fb19SBen Gras cause an exception. Otherwise, the comparison is performed according to the
54282fe8fb19SBen Gras IEC/IEEE Standard for Binary Floating-Point Arithmetic.
54292fe8fb19SBen Gras -------------------------------------------------------------------------------
54302fe8fb19SBen Gras */
float128_le_quiet(float128 a,float128 b)54312fe8fb19SBen Gras flag float128_le_quiet( float128 a, float128 b )
54322fe8fb19SBen Gras {
54332fe8fb19SBen Gras flag aSign, bSign;
54342fe8fb19SBen Gras
54352fe8fb19SBen Gras if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
54362fe8fb19SBen Gras && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
54372fe8fb19SBen Gras || ( ( extractFloat128Exp( b ) == 0x7FFF )
54382fe8fb19SBen Gras && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
54392fe8fb19SBen Gras ) {
54402fe8fb19SBen Gras if ( float128_is_signaling_nan( a )
54412fe8fb19SBen Gras || float128_is_signaling_nan( b ) ) {
54422fe8fb19SBen Gras float_raise( float_flag_invalid );
54432fe8fb19SBen Gras }
54442fe8fb19SBen Gras return 0;
54452fe8fb19SBen Gras }
54462fe8fb19SBen Gras aSign = extractFloat128Sign( a );
54472fe8fb19SBen Gras bSign = extractFloat128Sign( b );
54482fe8fb19SBen Gras if ( aSign != bSign ) {
54492fe8fb19SBen Gras return
54502fe8fb19SBen Gras aSign
54512fe8fb19SBen Gras || ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
54522fe8fb19SBen Gras == 0 );
54532fe8fb19SBen Gras }
54542fe8fb19SBen Gras return
54552fe8fb19SBen Gras aSign ? le128( b.high, b.low, a.high, a.low )
54562fe8fb19SBen Gras : le128( a.high, a.low, b.high, b.low );
54572fe8fb19SBen Gras
54582fe8fb19SBen Gras }
54592fe8fb19SBen Gras
54602fe8fb19SBen Gras /*
54612fe8fb19SBen Gras -------------------------------------------------------------------------------
54622fe8fb19SBen Gras Returns 1 if the quadruple-precision floating-point value `a' is less than
54632fe8fb19SBen Gras the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
54642fe8fb19SBen Gras exception. Otherwise, the comparison is performed according to the IEC/IEEE
54652fe8fb19SBen Gras Standard for Binary Floating-Point Arithmetic.
54662fe8fb19SBen Gras -------------------------------------------------------------------------------
54672fe8fb19SBen Gras */
float128_lt_quiet(float128 a,float128 b)54682fe8fb19SBen Gras flag float128_lt_quiet( float128 a, float128 b )
54692fe8fb19SBen Gras {
54702fe8fb19SBen Gras flag aSign, bSign;
54712fe8fb19SBen Gras
54722fe8fb19SBen Gras if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
54732fe8fb19SBen Gras && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
54742fe8fb19SBen Gras || ( ( extractFloat128Exp( b ) == 0x7FFF )
54752fe8fb19SBen Gras && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
54762fe8fb19SBen Gras ) {
54772fe8fb19SBen Gras if ( float128_is_signaling_nan( a )
54782fe8fb19SBen Gras || float128_is_signaling_nan( b ) ) {
54792fe8fb19SBen Gras float_raise( float_flag_invalid );
54802fe8fb19SBen Gras }
54812fe8fb19SBen Gras return 0;
54822fe8fb19SBen Gras }
54832fe8fb19SBen Gras aSign = extractFloat128Sign( a );
54842fe8fb19SBen Gras bSign = extractFloat128Sign( b );
54852fe8fb19SBen Gras if ( aSign != bSign ) {
54862fe8fb19SBen Gras return
54872fe8fb19SBen Gras aSign
54882fe8fb19SBen Gras && ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
54892fe8fb19SBen Gras != 0 );
54902fe8fb19SBen Gras }
54912fe8fb19SBen Gras return
54922fe8fb19SBen Gras aSign ? lt128( b.high, b.low, a.high, a.low )
54932fe8fb19SBen Gras : lt128( a.high, a.low, b.high, b.low );
54942fe8fb19SBen Gras
54952fe8fb19SBen Gras }
54962fe8fb19SBen Gras
54972fe8fb19SBen Gras #endif
54982fe8fb19SBen Gras
54992fe8fb19SBen Gras
55002fe8fb19SBen Gras #if defined(SOFTFLOAT_FOR_GCC) && defined(SOFTFLOAT_NEED_FIXUNS)
55012fe8fb19SBen Gras
55022fe8fb19SBen Gras /*
55032fe8fb19SBen Gras * These two routines are not part of the original softfloat distribution.
55042fe8fb19SBen Gras *
55052fe8fb19SBen Gras * They are based on the corresponding conversions to integer but return
55062fe8fb19SBen Gras * unsigned numbers instead since these functions are required by GCC.
55072fe8fb19SBen Gras *
55082fe8fb19SBen Gras * Added by Mark Brinicombe <mark@NetBSD.org> 27/09/97
55092fe8fb19SBen Gras *
55102fe8fb19SBen Gras * float64 version overhauled for SoftFloat 2a [bjh21 2000-07-15]
55112fe8fb19SBen Gras */
55122fe8fb19SBen Gras
55132fe8fb19SBen Gras /*
55142fe8fb19SBen Gras -------------------------------------------------------------------------------
55152fe8fb19SBen Gras Returns the result of converting the double-precision floating-point value
55162fe8fb19SBen Gras `a' to the 32-bit unsigned integer format. The conversion is
55172fe8fb19SBen Gras performed according to the IEC/IEEE Standard for Binary Floating-point
55182fe8fb19SBen Gras Arithmetic, except that the conversion is always rounded toward zero. If
55192fe8fb19SBen Gras `a' is a NaN, the largest positive integer is returned. If the conversion
55202fe8fb19SBen Gras overflows, the largest integer positive is returned.
55212fe8fb19SBen Gras -------------------------------------------------------------------------------
55222fe8fb19SBen Gras */
float64_to_uint32_round_to_zero(float64 a)55232fe8fb19SBen Gras uint32 float64_to_uint32_round_to_zero( float64 a )
55242fe8fb19SBen Gras {
55252fe8fb19SBen Gras flag aSign;
55262fe8fb19SBen Gras int16 aExp, shiftCount;
55272fe8fb19SBen Gras bits64 aSig, savedASig;
55282fe8fb19SBen Gras uint32 z;
55292fe8fb19SBen Gras
55302fe8fb19SBen Gras aSig = extractFloat64Frac( a );
55312fe8fb19SBen Gras aExp = extractFloat64Exp( a );
55322fe8fb19SBen Gras aSign = extractFloat64Sign( a );
55332fe8fb19SBen Gras
55342fe8fb19SBen Gras if (aSign) {
55352fe8fb19SBen Gras float_raise( float_flag_invalid );
55362fe8fb19SBen Gras return(0);
55372fe8fb19SBen Gras }
55382fe8fb19SBen Gras
55392fe8fb19SBen Gras if ( 0x41E < aExp ) {
55402fe8fb19SBen Gras float_raise( float_flag_invalid );
55412fe8fb19SBen Gras return 0xffffffff;
55422fe8fb19SBen Gras }
55432fe8fb19SBen Gras else if ( aExp < 0x3FF ) {
5544*84d9c625SLionel Sambuc if ( aExp || aSig ) set_float_exception_inexact_flag();
55452fe8fb19SBen Gras return 0;
55462fe8fb19SBen Gras }
55472fe8fb19SBen Gras aSig |= LIT64( 0x0010000000000000 );
55482fe8fb19SBen Gras shiftCount = 0x433 - aExp;
55492fe8fb19SBen Gras savedASig = aSig;
55502fe8fb19SBen Gras aSig >>= shiftCount;
5551f14fb602SLionel Sambuc z = (uint32)aSig;
55522fe8fb19SBen Gras if ( ( aSig<<shiftCount ) != savedASig ) {
5553*84d9c625SLionel Sambuc set_float_exception_inexact_flag();
55542fe8fb19SBen Gras }
55552fe8fb19SBen Gras return z;
55562fe8fb19SBen Gras
55572fe8fb19SBen Gras }
55582fe8fb19SBen Gras
55592fe8fb19SBen Gras /*
55602fe8fb19SBen Gras -------------------------------------------------------------------------------
55612fe8fb19SBen Gras Returns the result of converting the single-precision floating-point value
55622fe8fb19SBen Gras `a' to the 32-bit unsigned integer format. The conversion is
55632fe8fb19SBen Gras performed according to the IEC/IEEE Standard for Binary Floating-point
55642fe8fb19SBen Gras Arithmetic, except that the conversion is always rounded toward zero. If
55652fe8fb19SBen Gras `a' is a NaN, the largest positive integer is returned. If the conversion
55662fe8fb19SBen Gras overflows, the largest positive integer is returned.
55672fe8fb19SBen Gras -------------------------------------------------------------------------------
55682fe8fb19SBen Gras */
float32_to_uint32_round_to_zero(float32 a)55692fe8fb19SBen Gras uint32 float32_to_uint32_round_to_zero( float32 a )
55702fe8fb19SBen Gras {
55712fe8fb19SBen Gras flag aSign;
55722fe8fb19SBen Gras int16 aExp, shiftCount;
55732fe8fb19SBen Gras bits32 aSig;
55742fe8fb19SBen Gras uint32 z;
55752fe8fb19SBen Gras
55762fe8fb19SBen Gras aSig = extractFloat32Frac( a );
55772fe8fb19SBen Gras aExp = extractFloat32Exp( a );
55782fe8fb19SBen Gras aSign = extractFloat32Sign( a );
55792fe8fb19SBen Gras shiftCount = aExp - 0x9E;
55802fe8fb19SBen Gras
55812fe8fb19SBen Gras if (aSign) {
55822fe8fb19SBen Gras float_raise( float_flag_invalid );
55832fe8fb19SBen Gras return(0);
55842fe8fb19SBen Gras }
55852fe8fb19SBen Gras if ( 0 < shiftCount ) {
55862fe8fb19SBen Gras float_raise( float_flag_invalid );
55872fe8fb19SBen Gras return 0xFFFFFFFF;
55882fe8fb19SBen Gras }
55892fe8fb19SBen Gras else if ( aExp <= 0x7E ) {
5590*84d9c625SLionel Sambuc if ( aExp | aSig ) set_float_exception_inexact_flag();
55912fe8fb19SBen Gras return 0;
55922fe8fb19SBen Gras }
55932fe8fb19SBen Gras aSig = ( aSig | 0x800000 )<<8;
55942fe8fb19SBen Gras z = aSig>>( - shiftCount );
55952fe8fb19SBen Gras if ( aSig<<( shiftCount & 31 ) ) {
5596*84d9c625SLionel Sambuc set_float_exception_inexact_flag();
55972fe8fb19SBen Gras }
55982fe8fb19SBen Gras return z;
55992fe8fb19SBen Gras
56002fe8fb19SBen Gras }
56012fe8fb19SBen Gras
56022fe8fb19SBen Gras #endif
5603