1*d249112cSthorpej /* $NetBSD: softfloat-macros.h,v 1.3 2020/09/01 15:45:20 thorpej Exp $ */
22df695b1Sross
39fc5e708Sthorpej /*============================================================================
42df695b1Sross
5*d249112cSthorpej This C source fragment is part of the Berkeley SoftFloat IEEE Floating-Point
6*d249112cSthorpej Arithmetic Package, Release 2c, by John R. Hauser.
72df695b1Sross
89fc5e708Sthorpej THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
99fc5e708Sthorpej been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
109fc5e708Sthorpej RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
11*d249112cSthorpej AND ORGANIZATIONS WHO CAN AND WILL TOLERATE ALL LOSSES, COSTS, OR OTHER
12*d249112cSthorpej PROBLEMS THEY INCUR DUE TO THE SOFTWARE WITHOUT RECOMPENSE FROM JOHN HAUSER OR
13*d249112cSthorpej THE INTERNATIONAL COMPUTER SCIENCE INSTITUTE, AND WHO FURTHERMORE EFFECTIVELY
14*d249112cSthorpej INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE INSTITUTE
15*d249112cSthorpej (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR OTHER
16*d249112cSthorpej PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE, OR
17*d249112cSthorpej INCURRED BY ANYONE DUE TO A DERIVATIVE WORK THEY CREATE USING ANY PART OF THE
18*d249112cSthorpej SOFTWARE.
192df695b1Sross
20*d249112cSthorpej Derivative works require also that (1) the source code for the derivative work
21*d249112cSthorpej includes prominent notice that the work is derivative, and (2) the source code
22*d249112cSthorpej includes prominent notice of these three paragraphs for those parts of this
23*d249112cSthorpej code that are retained.
242df695b1Sross
259fc5e708Sthorpej =============================================================================*/
262df695b1Sross
279fc5e708Sthorpej /*----------------------------------------------------------------------------
289fc5e708Sthorpej | Shifts `a' right by the number of bits given in `count'. If any nonzero
29*d249112cSthorpej | bits are shifted off, they are "jammed" into the least significant bit of
309fc5e708Sthorpej | the result by setting the least significant bit to 1. The value of `count'
319fc5e708Sthorpej | can be arbitrarily large; in particular, if `count' is greater than 32, the
329fc5e708Sthorpej | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
339fc5e708Sthorpej | The result is stored in the location pointed to by `zPtr'.
349fc5e708Sthorpej *----------------------------------------------------------------------------*/
359fc5e708Sthorpej
shift32RightJamming(bits32 a,int16 count,bits32 * zPtr)362df695b1Sross INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
372df695b1Sross {
382df695b1Sross bits32 z;
392df695b1Sross
402df695b1Sross if ( count == 0 ) {
412df695b1Sross z = a;
422df695b1Sross }
432df695b1Sross else if ( count < 32 ) {
442df695b1Sross z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
452df695b1Sross }
462df695b1Sross else {
472df695b1Sross z = ( a != 0 );
482df695b1Sross }
492df695b1Sross *zPtr = z;
502df695b1Sross
512df695b1Sross }
522df695b1Sross
539fc5e708Sthorpej /*----------------------------------------------------------------------------
549fc5e708Sthorpej | Shifts `a' right by the number of bits given in `count'. If any nonzero
55*d249112cSthorpej | bits are shifted off, they are "jammed" into the least significant bit of
569fc5e708Sthorpej | the result by setting the least significant bit to 1. The value of `count'
579fc5e708Sthorpej | can be arbitrarily large; in particular, if `count' is greater than 64, the
589fc5e708Sthorpej | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
599fc5e708Sthorpej | The result is stored in the location pointed to by `zPtr'.
609fc5e708Sthorpej *----------------------------------------------------------------------------*/
619fc5e708Sthorpej
shift64RightJamming(bits64 a,int16 count,bits64 * zPtr)622df695b1Sross INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
632df695b1Sross {
642df695b1Sross bits64 z;
652df695b1Sross
662df695b1Sross if ( count == 0 ) {
672df695b1Sross z = a;
682df695b1Sross }
692df695b1Sross else if ( count < 64 ) {
702df695b1Sross z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
712df695b1Sross }
722df695b1Sross else {
732df695b1Sross z = ( a != 0 );
742df695b1Sross }
752df695b1Sross *zPtr = z;
762df695b1Sross
772df695b1Sross }
782df695b1Sross
799fc5e708Sthorpej /*----------------------------------------------------------------------------
809fc5e708Sthorpej | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
819fc5e708Sthorpej | _plus_ the number of bits given in `count'. The shifted result is at most
829fc5e708Sthorpej | 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
839fc5e708Sthorpej | bits shifted off form a second 64-bit result as follows: The _last_ bit
849fc5e708Sthorpej | shifted off is the most-significant bit of the extra result, and the other
859fc5e708Sthorpej | 63 bits of the extra result are all zero if and only if _all_but_the_last_
869fc5e708Sthorpej | bits shifted off were all zero. This extra result is stored in the location
879fc5e708Sthorpej | pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
889fc5e708Sthorpej | (This routine makes more sense if `a0' and `a1' are considered to form
899fc5e708Sthorpej | a fixed-point value with binary point between `a0' and `a1'. This fixed-
909fc5e708Sthorpej | point value is shifted right by the number of bits given in `count', and
919fc5e708Sthorpej | the integer part of the result is returned at the location pointed to by
929fc5e708Sthorpej | `z0Ptr'. The fractional part of the result may be slightly corrupted as
939fc5e708Sthorpej | described above, and is returned at the location pointed to by `z1Ptr'.)
949fc5e708Sthorpej *----------------------------------------------------------------------------*/
959fc5e708Sthorpej
962df695b1Sross INLINE void
shift64ExtraRightJamming(bits64 a0,bits64 a1,int16 count,bits64 * z0Ptr,bits64 * z1Ptr)972df695b1Sross shift64ExtraRightJamming(
982df695b1Sross bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
992df695b1Sross {
1002df695b1Sross bits64 z0, z1;
1012df695b1Sross int8 negCount = ( - count ) & 63;
1022df695b1Sross
1032df695b1Sross if ( count == 0 ) {
1042df695b1Sross z1 = a1;
1052df695b1Sross z0 = a0;
1062df695b1Sross }
1072df695b1Sross else if ( count < 64 ) {
1082df695b1Sross z1 = ( a0<<negCount ) | ( a1 != 0 );
1092df695b1Sross z0 = a0>>count;
1102df695b1Sross }
1112df695b1Sross else {
1122df695b1Sross if ( count == 64 ) {
1132df695b1Sross z1 = a0 | ( a1 != 0 );
1142df695b1Sross }
1152df695b1Sross else {
1162df695b1Sross z1 = ( ( a0 | a1 ) != 0 );
1172df695b1Sross }
1182df695b1Sross z0 = 0;
1192df695b1Sross }
1202df695b1Sross *z1Ptr = z1;
1212df695b1Sross *z0Ptr = z0;
1222df695b1Sross
1232df695b1Sross }
1242df695b1Sross
1259fc5e708Sthorpej /*----------------------------------------------------------------------------
1269fc5e708Sthorpej | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
1279fc5e708Sthorpej | number of bits given in `count'. Any bits shifted off are lost. The value
1289fc5e708Sthorpej | of `count' can be arbitrarily large; in particular, if `count' is greater
1299fc5e708Sthorpej | than 128, the result will be 0. The result is broken into two 64-bit pieces
1309fc5e708Sthorpej | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
1319fc5e708Sthorpej *----------------------------------------------------------------------------*/
1329fc5e708Sthorpej
1332df695b1Sross INLINE void
shift128Right(bits64 a0,bits64 a1,int16 count,bits64 * z0Ptr,bits64 * z1Ptr)1342df695b1Sross shift128Right(
1352df695b1Sross bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
1362df695b1Sross {
1372df695b1Sross bits64 z0, z1;
1382df695b1Sross int8 negCount = ( - count ) & 63;
1392df695b1Sross
1402df695b1Sross if ( count == 0 ) {
1412df695b1Sross z1 = a1;
1422df695b1Sross z0 = a0;
1432df695b1Sross }
1442df695b1Sross else if ( count < 64 ) {
1452df695b1Sross z1 = ( a0<<negCount ) | ( a1>>count );
1462df695b1Sross z0 = a0>>count;
1472df695b1Sross }
1482df695b1Sross else {
149*d249112cSthorpej z1 = ( count < 128 ) ? ( a0>>( count & 63 ) ) : 0;
1502df695b1Sross z0 = 0;
1512df695b1Sross }
1522df695b1Sross *z1Ptr = z1;
1532df695b1Sross *z0Ptr = z0;
1542df695b1Sross
1552df695b1Sross }
1562df695b1Sross
1579fc5e708Sthorpej /*----------------------------------------------------------------------------
1589fc5e708Sthorpej | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
1599fc5e708Sthorpej | number of bits given in `count'. If any nonzero bits are shifted off, they
160*d249112cSthorpej | are "jammed" into the least significant bit of the result by setting the
1619fc5e708Sthorpej | least significant bit to 1. The value of `count' can be arbitrarily large;
1629fc5e708Sthorpej | in particular, if `count' is greater than 128, the result will be either
1639fc5e708Sthorpej | 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
1649fc5e708Sthorpej | nonzero. The result is broken into two 64-bit pieces which are stored at
1659fc5e708Sthorpej | the locations pointed to by `z0Ptr' and `z1Ptr'.
1669fc5e708Sthorpej *----------------------------------------------------------------------------*/
1679fc5e708Sthorpej
1682df695b1Sross INLINE void
shift128RightJamming(bits64 a0,bits64 a1,int16 count,bits64 * z0Ptr,bits64 * z1Ptr)1692df695b1Sross shift128RightJamming(
1702df695b1Sross bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
1712df695b1Sross {
1722df695b1Sross bits64 z0, z1;
1732df695b1Sross int8 negCount = ( - count ) & 63;
1742df695b1Sross
1752df695b1Sross if ( count == 0 ) {
1762df695b1Sross z1 = a1;
1772df695b1Sross z0 = a0;
1782df695b1Sross }
1792df695b1Sross else if ( count < 64 ) {
1802df695b1Sross z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
1812df695b1Sross z0 = a0>>count;
1822df695b1Sross }
1832df695b1Sross else {
1842df695b1Sross if ( count == 64 ) {
1852df695b1Sross z1 = a0 | ( a1 != 0 );
1862df695b1Sross }
1872df695b1Sross else if ( count < 128 ) {
1882df695b1Sross z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
1892df695b1Sross }
1902df695b1Sross else {
1912df695b1Sross z1 = ( ( a0 | a1 ) != 0 );
1922df695b1Sross }
1932df695b1Sross z0 = 0;
1942df695b1Sross }
1952df695b1Sross *z1Ptr = z1;
1962df695b1Sross *z0Ptr = z0;
1972df695b1Sross
1982df695b1Sross }
1992df695b1Sross
2009fc5e708Sthorpej /*----------------------------------------------------------------------------
2019fc5e708Sthorpej | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
2029fc5e708Sthorpej | by 64 _plus_ the number of bits given in `count'. The shifted result is
2039fc5e708Sthorpej | at most 128 nonzero bits; these are broken into two 64-bit pieces which are
2049fc5e708Sthorpej | stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
2059fc5e708Sthorpej | off form a third 64-bit result as follows: The _last_ bit shifted off is
2069fc5e708Sthorpej | the most-significant bit of the extra result, and the other 63 bits of the
2079fc5e708Sthorpej | extra result are all zero if and only if _all_but_the_last_ bits shifted off
2089fc5e708Sthorpej | were all zero. This extra result is stored in the location pointed to by
2099fc5e708Sthorpej | `z2Ptr'. The value of `count' can be arbitrarily large.
2109fc5e708Sthorpej | (This routine makes more sense if `a0', `a1', and `a2' are considered
2119fc5e708Sthorpej | to form a fixed-point value with binary point between `a1' and `a2'. This
2129fc5e708Sthorpej | fixed-point value is shifted right by the number of bits given in `count',
2139fc5e708Sthorpej | and the integer part of the result is returned at the locations pointed to
2149fc5e708Sthorpej | by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
2159fc5e708Sthorpej | corrupted as described above, and is returned at the location pointed to by
2169fc5e708Sthorpej | `z2Ptr'.)
2179fc5e708Sthorpej *----------------------------------------------------------------------------*/
2189fc5e708Sthorpej
2192df695b1Sross INLINE void
shift128ExtraRightJamming(bits64 a0,bits64 a1,bits64 a2,int16 count,bits64 * z0Ptr,bits64 * z1Ptr,bits64 * z2Ptr)2202df695b1Sross shift128ExtraRightJamming(
2212df695b1Sross bits64 a0,
2222df695b1Sross bits64 a1,
2232df695b1Sross bits64 a2,
2242df695b1Sross int16 count,
2252df695b1Sross bits64 *z0Ptr,
2262df695b1Sross bits64 *z1Ptr,
2272df695b1Sross bits64 *z2Ptr
2282df695b1Sross )
2292df695b1Sross {
2302df695b1Sross bits64 z0, z1, z2;
2312df695b1Sross int8 negCount = ( - count ) & 63;
2322df695b1Sross
2332df695b1Sross if ( count == 0 ) {
2342df695b1Sross z2 = a2;
2352df695b1Sross z1 = a1;
2362df695b1Sross z0 = a0;
2372df695b1Sross }
2382df695b1Sross else {
2392df695b1Sross if ( count < 64 ) {
2402df695b1Sross z2 = a1<<negCount;
2412df695b1Sross z1 = ( a0<<negCount ) | ( a1>>count );
2422df695b1Sross z0 = a0>>count;
2432df695b1Sross }
2442df695b1Sross else {
2452df695b1Sross if ( count == 64 ) {
2462df695b1Sross z2 = a1;
2472df695b1Sross z1 = a0;
2482df695b1Sross }
2492df695b1Sross else {
2502df695b1Sross a2 |= a1;
2512df695b1Sross if ( count < 128 ) {
2522df695b1Sross z2 = a0<<negCount;
2532df695b1Sross z1 = a0>>( count & 63 );
2542df695b1Sross }
2552df695b1Sross else {
2562df695b1Sross z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
2572df695b1Sross z1 = 0;
2582df695b1Sross }
2592df695b1Sross }
2602df695b1Sross z0 = 0;
2612df695b1Sross }
2622df695b1Sross z2 |= ( a2 != 0 );
2632df695b1Sross }
2642df695b1Sross *z2Ptr = z2;
2652df695b1Sross *z1Ptr = z1;
2662df695b1Sross *z0Ptr = z0;
2672df695b1Sross
2682df695b1Sross }
2692df695b1Sross
2709fc5e708Sthorpej /*----------------------------------------------------------------------------
2719fc5e708Sthorpej | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
2729fc5e708Sthorpej | number of bits given in `count'. Any bits shifted off are lost. The value
2739fc5e708Sthorpej | of `count' must be less than 64. The result is broken into two 64-bit
2749fc5e708Sthorpej | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
2759fc5e708Sthorpej *----------------------------------------------------------------------------*/
2769fc5e708Sthorpej
2772df695b1Sross INLINE void
shortShift128Left(bits64 a0,bits64 a1,int16 count,bits64 * z0Ptr,bits64 * z1Ptr)2782df695b1Sross shortShift128Left(
2792df695b1Sross bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
2802df695b1Sross {
2812df695b1Sross
2822df695b1Sross *z1Ptr = a1<<count;
2832df695b1Sross *z0Ptr =
2842df695b1Sross ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
2852df695b1Sross
2862df695b1Sross }
2872df695b1Sross
2889fc5e708Sthorpej /*----------------------------------------------------------------------------
2899fc5e708Sthorpej | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
2909fc5e708Sthorpej | by the number of bits given in `count'. Any bits shifted off are lost.
2919fc5e708Sthorpej | The value of `count' must be less than 64. The result is broken into three
2929fc5e708Sthorpej | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
2939fc5e708Sthorpej | `z1Ptr', and `z2Ptr'.
2949fc5e708Sthorpej *----------------------------------------------------------------------------*/
2959fc5e708Sthorpej
2962df695b1Sross INLINE void
shortShift192Left(bits64 a0,bits64 a1,bits64 a2,int16 count,bits64 * z0Ptr,bits64 * z1Ptr,bits64 * z2Ptr)2972df695b1Sross shortShift192Left(
2982df695b1Sross bits64 a0,
2992df695b1Sross bits64 a1,
3002df695b1Sross bits64 a2,
3012df695b1Sross int16 count,
3022df695b1Sross bits64 *z0Ptr,
3032df695b1Sross bits64 *z1Ptr,
3042df695b1Sross bits64 *z2Ptr
3052df695b1Sross )
3062df695b1Sross {
3072df695b1Sross bits64 z0, z1, z2;
3082df695b1Sross int8 negCount;
3092df695b1Sross
3102df695b1Sross z2 = a2<<count;
3112df695b1Sross z1 = a1<<count;
3122df695b1Sross z0 = a0<<count;
3132df695b1Sross if ( 0 < count ) {
3142df695b1Sross negCount = ( ( - count ) & 63 );
3152df695b1Sross z1 |= a2>>negCount;
3162df695b1Sross z0 |= a1>>negCount;
3172df695b1Sross }
3182df695b1Sross *z2Ptr = z2;
3192df695b1Sross *z1Ptr = z1;
3202df695b1Sross *z0Ptr = z0;
3212df695b1Sross
3222df695b1Sross }
3232df695b1Sross
3249fc5e708Sthorpej /*----------------------------------------------------------------------------
3259fc5e708Sthorpej | Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
3269fc5e708Sthorpej | value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
3279fc5e708Sthorpej | any carry out is lost. The result is broken into two 64-bit pieces which
3289fc5e708Sthorpej | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
3299fc5e708Sthorpej *----------------------------------------------------------------------------*/
3309fc5e708Sthorpej
3312df695b1Sross INLINE void
add128(bits64 a0,bits64 a1,bits64 b0,bits64 b1,bits64 * z0Ptr,bits64 * z1Ptr)3322df695b1Sross add128(
3332df695b1Sross bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
3342df695b1Sross {
3352df695b1Sross bits64 z1;
3362df695b1Sross
3372df695b1Sross z1 = a1 + b1;
3382df695b1Sross *z1Ptr = z1;
3392df695b1Sross *z0Ptr = a0 + b0 + ( z1 < a1 );
3402df695b1Sross
3412df695b1Sross }
3422df695b1Sross
3439fc5e708Sthorpej /*----------------------------------------------------------------------------
3449fc5e708Sthorpej | Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
3459fc5e708Sthorpej | 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
3469fc5e708Sthorpej | modulo 2^192, so any carry out is lost. The result is broken into three
3479fc5e708Sthorpej | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
3489fc5e708Sthorpej | `z1Ptr', and `z2Ptr'.
3499fc5e708Sthorpej *----------------------------------------------------------------------------*/
3509fc5e708Sthorpej
3512df695b1Sross INLINE void
add192(bits64 a0,bits64 a1,bits64 a2,bits64 b0,bits64 b1,bits64 b2,bits64 * z0Ptr,bits64 * z1Ptr,bits64 * z2Ptr)3522df695b1Sross add192(
3532df695b1Sross bits64 a0,
3542df695b1Sross bits64 a1,
3552df695b1Sross bits64 a2,
3562df695b1Sross bits64 b0,
3572df695b1Sross bits64 b1,
3582df695b1Sross bits64 b2,
3592df695b1Sross bits64 *z0Ptr,
3602df695b1Sross bits64 *z1Ptr,
3612df695b1Sross bits64 *z2Ptr
3622df695b1Sross )
3632df695b1Sross {
3642df695b1Sross bits64 z0, z1, z2;
3652df695b1Sross int8 carry0, carry1;
3662df695b1Sross
3672df695b1Sross z2 = a2 + b2;
3682df695b1Sross carry1 = ( z2 < a2 );
3692df695b1Sross z1 = a1 + b1;
3702df695b1Sross carry0 = ( z1 < a1 );
3712df695b1Sross z0 = a0 + b0;
3722df695b1Sross z1 += carry1;
3732df695b1Sross z0 += ( z1 < carry1 );
3742df695b1Sross z0 += carry0;
3752df695b1Sross *z2Ptr = z2;
3762df695b1Sross *z1Ptr = z1;
3772df695b1Sross *z0Ptr = z0;
3782df695b1Sross
3792df695b1Sross }
3802df695b1Sross
3819fc5e708Sthorpej /*----------------------------------------------------------------------------
3829fc5e708Sthorpej | Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
3839fc5e708Sthorpej | 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
3849fc5e708Sthorpej | 2^128, so any borrow out (carry out) is lost. The result is broken into two
3859fc5e708Sthorpej | 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
3869fc5e708Sthorpej | `z1Ptr'.
3879fc5e708Sthorpej *----------------------------------------------------------------------------*/
3889fc5e708Sthorpej
3892df695b1Sross INLINE void
sub128(bits64 a0,bits64 a1,bits64 b0,bits64 b1,bits64 * z0Ptr,bits64 * z1Ptr)3902df695b1Sross sub128(
3912df695b1Sross bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
3922df695b1Sross {
3932df695b1Sross
3942df695b1Sross *z1Ptr = a1 - b1;
3952df695b1Sross *z0Ptr = a0 - b0 - ( a1 < b1 );
3962df695b1Sross
3972df695b1Sross }
3982df695b1Sross
3999fc5e708Sthorpej /*----------------------------------------------------------------------------
4009fc5e708Sthorpej | Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
4019fc5e708Sthorpej | from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
4029fc5e708Sthorpej | Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
4039fc5e708Sthorpej | result is broken into three 64-bit pieces which are stored at the locations
4049fc5e708Sthorpej | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
4059fc5e708Sthorpej *----------------------------------------------------------------------------*/
4069fc5e708Sthorpej
4072df695b1Sross INLINE void
sub192(bits64 a0,bits64 a1,bits64 a2,bits64 b0,bits64 b1,bits64 b2,bits64 * z0Ptr,bits64 * z1Ptr,bits64 * z2Ptr)4082df695b1Sross sub192(
4092df695b1Sross bits64 a0,
4102df695b1Sross bits64 a1,
4112df695b1Sross bits64 a2,
4122df695b1Sross bits64 b0,
4132df695b1Sross bits64 b1,
4142df695b1Sross bits64 b2,
4152df695b1Sross bits64 *z0Ptr,
4162df695b1Sross bits64 *z1Ptr,
4172df695b1Sross bits64 *z2Ptr
4182df695b1Sross )
4192df695b1Sross {
4202df695b1Sross bits64 z0, z1, z2;
4212df695b1Sross int8 borrow0, borrow1;
4222df695b1Sross
4232df695b1Sross z2 = a2 - b2;
4242df695b1Sross borrow1 = ( a2 < b2 );
4252df695b1Sross z1 = a1 - b1;
4262df695b1Sross borrow0 = ( a1 < b1 );
4272df695b1Sross z0 = a0 - b0;
4282df695b1Sross z0 -= ( z1 < borrow1 );
4292df695b1Sross z1 -= borrow1;
4302df695b1Sross z0 -= borrow0;
4312df695b1Sross *z2Ptr = z2;
4322df695b1Sross *z1Ptr = z1;
4332df695b1Sross *z0Ptr = z0;
4342df695b1Sross
4352df695b1Sross }
4362df695b1Sross
4379fc5e708Sthorpej /*----------------------------------------------------------------------------
4389fc5e708Sthorpej | Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
4399fc5e708Sthorpej | into two 64-bit pieces which are stored at the locations pointed to by
4409fc5e708Sthorpej | `z0Ptr' and `z1Ptr'.
4419fc5e708Sthorpej *----------------------------------------------------------------------------*/
4429fc5e708Sthorpej
mul64To128(bits64 a,bits64 b,bits64 * z0Ptr,bits64 * z1Ptr)4432df695b1Sross INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
4442df695b1Sross {
4452df695b1Sross bits32 aHigh, aLow, bHigh, bLow;
4462df695b1Sross bits64 z0, zMiddleA, zMiddleB, z1;
4472df695b1Sross
4482df695b1Sross aLow = a;
4492df695b1Sross aHigh = a>>32;
4502df695b1Sross bLow = b;
4512df695b1Sross bHigh = b>>32;
4522df695b1Sross z1 = ( (bits64) aLow ) * bLow;
4532df695b1Sross zMiddleA = ( (bits64) aLow ) * bHigh;
4542df695b1Sross zMiddleB = ( (bits64) aHigh ) * bLow;
4552df695b1Sross z0 = ( (bits64) aHigh ) * bHigh;
4562df695b1Sross zMiddleA += zMiddleB;
4572df695b1Sross z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
4582df695b1Sross zMiddleA <<= 32;
4592df695b1Sross z1 += zMiddleA;
4602df695b1Sross z0 += ( z1 < zMiddleA );
4612df695b1Sross *z1Ptr = z1;
4622df695b1Sross *z0Ptr = z0;
4632df695b1Sross
4642df695b1Sross }
4652df695b1Sross
4669fc5e708Sthorpej /*----------------------------------------------------------------------------
4679fc5e708Sthorpej | Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
4689fc5e708Sthorpej | `b' to obtain a 192-bit product. The product is broken into three 64-bit
4699fc5e708Sthorpej | pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
4709fc5e708Sthorpej | `z2Ptr'.
4719fc5e708Sthorpej *----------------------------------------------------------------------------*/
4729fc5e708Sthorpej
4732df695b1Sross INLINE void
mul128By64To192(bits64 a0,bits64 a1,bits64 b,bits64 * z0Ptr,bits64 * z1Ptr,bits64 * z2Ptr)4742df695b1Sross mul128By64To192(
4752df695b1Sross bits64 a0,
4762df695b1Sross bits64 a1,
4772df695b1Sross bits64 b,
4782df695b1Sross bits64 *z0Ptr,
4792df695b1Sross bits64 *z1Ptr,
4802df695b1Sross bits64 *z2Ptr
4812df695b1Sross )
4822df695b1Sross {
4832df695b1Sross bits64 z0, z1, z2, more1;
4842df695b1Sross
4852df695b1Sross mul64To128( a1, b, &z1, &z2 );
4862df695b1Sross mul64To128( a0, b, &z0, &more1 );
4872df695b1Sross add128( z0, more1, 0, z1, &z0, &z1 );
4882df695b1Sross *z2Ptr = z2;
4892df695b1Sross *z1Ptr = z1;
4902df695b1Sross *z0Ptr = z0;
4912df695b1Sross
4922df695b1Sross }
4932df695b1Sross
4949fc5e708Sthorpej /*----------------------------------------------------------------------------
4959fc5e708Sthorpej | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
4969fc5e708Sthorpej | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
4979fc5e708Sthorpej | product. The product is broken into four 64-bit pieces which are stored at
4989fc5e708Sthorpej | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
4999fc5e708Sthorpej *----------------------------------------------------------------------------*/
5009fc5e708Sthorpej
5012df695b1Sross INLINE void
mul128To256(bits64 a0,bits64 a1,bits64 b0,bits64 b1,bits64 * z0Ptr,bits64 * z1Ptr,bits64 * z2Ptr,bits64 * z3Ptr)5022df695b1Sross mul128To256(
5032df695b1Sross bits64 a0,
5042df695b1Sross bits64 a1,
5052df695b1Sross bits64 b0,
5062df695b1Sross bits64 b1,
5072df695b1Sross bits64 *z0Ptr,
5082df695b1Sross bits64 *z1Ptr,
5092df695b1Sross bits64 *z2Ptr,
5102df695b1Sross bits64 *z3Ptr
5112df695b1Sross )
5122df695b1Sross {
5132df695b1Sross bits64 z0, z1, z2, z3;
5142df695b1Sross bits64 more1, more2;
5152df695b1Sross
5162df695b1Sross mul64To128( a1, b1, &z2, &z3 );
5172df695b1Sross mul64To128( a1, b0, &z1, &more2 );
5182df695b1Sross add128( z1, more2, 0, z2, &z1, &z2 );
5192df695b1Sross mul64To128( a0, b0, &z0, &more1 );
5202df695b1Sross add128( z0, more1, 0, z1, &z0, &z1 );
5212df695b1Sross mul64To128( a0, b1, &more1, &more2 );
5222df695b1Sross add128( more1, more2, 0, z2, &more1, &z2 );
5232df695b1Sross add128( z0, z1, 0, more1, &z0, &z1 );
5242df695b1Sross *z3Ptr = z3;
5252df695b1Sross *z2Ptr = z2;
5262df695b1Sross *z1Ptr = z1;
5272df695b1Sross *z0Ptr = z0;
5282df695b1Sross
5292df695b1Sross }
5302df695b1Sross
5319fc5e708Sthorpej /*----------------------------------------------------------------------------
5329fc5e708Sthorpej | Returns an approximation to the 64-bit integer quotient obtained by dividing
5339fc5e708Sthorpej | `b' into the 128-bit value formed by concatenating `a0' and `a1'. The
5349fc5e708Sthorpej | divisor `b' must be at least 2^63. If q is the exact quotient truncated
5359fc5e708Sthorpej | toward zero, the approximation returned lies between q and q + 2 inclusive.
5369fc5e708Sthorpej | If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
5379fc5e708Sthorpej | unsigned integer is returned.
5389fc5e708Sthorpej *----------------------------------------------------------------------------*/
5399fc5e708Sthorpej
estimateDiv128To64(bits64 a0,bits64 a1,bits64 b)5402df695b1Sross static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
5412df695b1Sross {
5422df695b1Sross bits64 b0, b1;
5432df695b1Sross bits64 rem0, rem1, term0, term1;
5442df695b1Sross bits64 z;
5452df695b1Sross
5462df695b1Sross if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
5472df695b1Sross b0 = b>>32;
5482df695b1Sross z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
5492df695b1Sross mul64To128( b, z, &term0, &term1 );
5502df695b1Sross sub128( a0, a1, term0, term1, &rem0, &rem1 );
5512df695b1Sross while ( ( (sbits64) rem0 ) < 0 ) {
5522df695b1Sross z -= LIT64( 0x100000000 );
5532df695b1Sross b1 = b<<32;
5542df695b1Sross add128( rem0, rem1, b0, b1, &rem0, &rem1 );
5552df695b1Sross }
5562df695b1Sross rem0 = ( rem0<<32 ) | ( rem1>>32 );
5572df695b1Sross z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
5582df695b1Sross return z;
5592df695b1Sross
5602df695b1Sross }
5612df695b1Sross
5622df695b1Sross #ifndef SOFTFLOAT_FOR_GCC /* Not used */
5639fc5e708Sthorpej /*----------------------------------------------------------------------------
5649fc5e708Sthorpej | Returns an approximation to the square root of the 32-bit significand given
5659fc5e708Sthorpej | by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
5669fc5e708Sthorpej | `aExp' (the least significant bit) is 1, the integer returned approximates
5679fc5e708Sthorpej | 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
5689fc5e708Sthorpej | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
5699fc5e708Sthorpej | case, the approximation returned lies strictly within +/-2 of the exact
5709fc5e708Sthorpej | value.
5719fc5e708Sthorpej *----------------------------------------------------------------------------*/
5729fc5e708Sthorpej
estimateSqrt32(int16 aExp,bits32 a)5732df695b1Sross static bits32 estimateSqrt32( int16 aExp, bits32 a )
5742df695b1Sross {
5752df695b1Sross static const bits16 sqrtOddAdjustments[] = {
5762df695b1Sross 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
5772df695b1Sross 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
5782df695b1Sross };
5792df695b1Sross static const bits16 sqrtEvenAdjustments[] = {
5802df695b1Sross 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
5812df695b1Sross 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
5822df695b1Sross };
5832df695b1Sross int8 index;
5842df695b1Sross bits32 z;
5852df695b1Sross
5862df695b1Sross index = ( a>>27 ) & 15;
5872df695b1Sross if ( aExp & 1 ) {
5882df695b1Sross z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
5892df695b1Sross z = ( ( a / z )<<14 ) + ( z<<15 );
5902df695b1Sross a >>= 1;
5912df695b1Sross }
5922df695b1Sross else {
5932df695b1Sross z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
5942df695b1Sross z = a / z + z;
5952df695b1Sross z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
5962df695b1Sross if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
5972df695b1Sross }
5982df695b1Sross return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
5992df695b1Sross
6002df695b1Sross }
6012df695b1Sross #endif
6022df695b1Sross
6039fc5e708Sthorpej /*----------------------------------------------------------------------------
6049fc5e708Sthorpej | Returns the number of leading 0 bits before the most-significant 1 bit of
6059fc5e708Sthorpej | `a'. If `a' is zero, 32 is returned.
6069fc5e708Sthorpej *----------------------------------------------------------------------------*/
6079fc5e708Sthorpej
countLeadingZeros32(bits32 a)6082df695b1Sross static int8 countLeadingZeros32( bits32 a )
6092df695b1Sross {
6102df695b1Sross static const int8 countLeadingZerosHigh[] = {
6112df695b1Sross 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
6122df695b1Sross 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
6132df695b1Sross 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
6142df695b1Sross 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
6152df695b1Sross 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6162df695b1Sross 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6172df695b1Sross 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6182df695b1Sross 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6192df695b1Sross 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6202df695b1Sross 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6212df695b1Sross 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6222df695b1Sross 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6232df695b1Sross 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6242df695b1Sross 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6252df695b1Sross 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6262df695b1Sross 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6272df695b1Sross };
6282df695b1Sross int8 shiftCount;
6292df695b1Sross
6302df695b1Sross shiftCount = 0;
6312df695b1Sross if ( a < 0x10000 ) {
6322df695b1Sross shiftCount += 16;
6332df695b1Sross a <<= 16;
6342df695b1Sross }
6352df695b1Sross if ( a < 0x1000000 ) {
6362df695b1Sross shiftCount += 8;
6372df695b1Sross a <<= 8;
6382df695b1Sross }
6392df695b1Sross shiftCount += countLeadingZerosHigh[ a>>24 ];
6402df695b1Sross return shiftCount;
6412df695b1Sross
6422df695b1Sross }
6432df695b1Sross
6449fc5e708Sthorpej /*----------------------------------------------------------------------------
6459fc5e708Sthorpej | Returns the number of leading 0 bits before the most-significant 1 bit of
6469fc5e708Sthorpej | `a'. If `a' is zero, 64 is returned.
6479fc5e708Sthorpej *----------------------------------------------------------------------------*/
6489fc5e708Sthorpej
countLeadingZeros64(bits64 a)6492df695b1Sross static int8 countLeadingZeros64( bits64 a )
6502df695b1Sross {
6512df695b1Sross int8 shiftCount;
6522df695b1Sross
6532df695b1Sross shiftCount = 0;
6542df695b1Sross if ( a < ( (bits64) 1 )<<32 ) {
6552df695b1Sross shiftCount += 32;
6562df695b1Sross }
6572df695b1Sross else {
6582df695b1Sross a >>= 32;
6592df695b1Sross }
6602df695b1Sross shiftCount += countLeadingZeros32( a );
6612df695b1Sross return shiftCount;
6622df695b1Sross
6632df695b1Sross }
6642df695b1Sross
6659fc5e708Sthorpej /*----------------------------------------------------------------------------
6669fc5e708Sthorpej | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
6679fc5e708Sthorpej | is equal to the 128-bit value formed by concatenating `b0' and `b1'.
6689fc5e708Sthorpej | Otherwise, returns 0.
6699fc5e708Sthorpej *----------------------------------------------------------------------------*/
6709fc5e708Sthorpej
eq128(bits64 a0,bits64 a1,bits64 b0,bits64 b1)6712df695b1Sross INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
6722df695b1Sross {
6732df695b1Sross
6742df695b1Sross return ( a0 == b0 ) && ( a1 == b1 );
6752df695b1Sross
6762df695b1Sross }
6772df695b1Sross
6789fc5e708Sthorpej /*----------------------------------------------------------------------------
6799fc5e708Sthorpej | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
6809fc5e708Sthorpej | than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
6819fc5e708Sthorpej | Otherwise, returns 0.
6829fc5e708Sthorpej *----------------------------------------------------------------------------*/
6839fc5e708Sthorpej
le128(bits64 a0,bits64 a1,bits64 b0,bits64 b1)6842df695b1Sross INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
6852df695b1Sross {
6862df695b1Sross
6872df695b1Sross return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
6882df695b1Sross
6892df695b1Sross }
6902df695b1Sross
6919fc5e708Sthorpej /*----------------------------------------------------------------------------
6929fc5e708Sthorpej | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
6939fc5e708Sthorpej | than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
6949fc5e708Sthorpej | returns 0.
6959fc5e708Sthorpej *----------------------------------------------------------------------------*/
6969fc5e708Sthorpej
lt128(bits64 a0,bits64 a1,bits64 b0,bits64 b1)6972df695b1Sross INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
6982df695b1Sross {
6992df695b1Sross
7002df695b1Sross return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
7012df695b1Sross
7022df695b1Sross }
7032df695b1Sross
7049fc5e708Sthorpej /*----------------------------------------------------------------------------
7059fc5e708Sthorpej | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
7069fc5e708Sthorpej | not equal to the 128-bit value formed by concatenating `b0' and `b1'.
7079fc5e708Sthorpej | Otherwise, returns 0.
7089fc5e708Sthorpej *----------------------------------------------------------------------------*/
7099fc5e708Sthorpej
ne128(bits64 a0,bits64 a1,bits64 b0,bits64 b1)7102df695b1Sross INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
7112df695b1Sross {
7122df695b1Sross
7132df695b1Sross return ( a0 != b0 ) || ( a1 != b1 );
7142df695b1Sross
7152df695b1Sross }
7162df695b1Sross
717