1*39052f3bSchristos/* $NetBSD: softfloat-macros,v 1.3 2012/03/21 02:32:26 christos Exp $ */ 2936b7f4cSbjh21 3936b7f4cSbjh21/* 4936b7f4cSbjh21=============================================================================== 5936b7f4cSbjh21 6936b7f4cSbjh21This C source fragment is part of the SoftFloat IEC/IEEE Floating-point 7936b7f4cSbjh21Arithmetic Package, Release 2a. 8936b7f4cSbjh21 9936b7f4cSbjh21Written by John R. Hauser. This work was made possible in part by the 10936b7f4cSbjh21International Computer Science Institute, located at Suite 600, 1947 Center 11936b7f4cSbjh21Street, Berkeley, California 94704. Funding was partially provided by the 12936b7f4cSbjh21National Science Foundation under grant MIP-9311980. The original version 13936b7f4cSbjh21of this code was written as part of a project to build a fixed-point vector 14936b7f4cSbjh21processor in collaboration with the University of California at Berkeley, 15936b7f4cSbjh21overseen by Profs. Nelson Morgan and John Wawrzynek. More information 16936b7f4cSbjh21is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ 17936b7f4cSbjh21arithmetic/SoftFloat.html'. 18936b7f4cSbjh21 19936b7f4cSbjh21THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 20936b7f4cSbjh21has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 21936b7f4cSbjh21TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 22936b7f4cSbjh21PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 23936b7f4cSbjh21AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 24936b7f4cSbjh21 25936b7f4cSbjh21Derivative works are acceptable, even for commercial purposes, so long as 26936b7f4cSbjh21(1) they include prominent notice that the work is derivative, and (2) they 27936b7f4cSbjh21include prominent notice akin to these four paragraphs for those parts of 28936b7f4cSbjh21this code that are retained. 29936b7f4cSbjh21 30936b7f4cSbjh21=============================================================================== 31936b7f4cSbjh21*/ 32936b7f4cSbjh21 33936b7f4cSbjh21/* 34936b7f4cSbjh21------------------------------------------------------------------------------- 35936b7f4cSbjh21Shifts `a' right by the number of bits given in `count'. If any nonzero 36936b7f4cSbjh21bits are shifted off, they are ``jammed'' into the least significant bit of 37936b7f4cSbjh21the result by setting the least significant bit to 1. The value of `count' 38936b7f4cSbjh21can be arbitrarily large; in particular, if `count' is greater than 32, the 39936b7f4cSbjh21result will be either 0 or 1, depending on whether `a' is zero or nonzero. 40936b7f4cSbjh21The result is stored in the location pointed to by `zPtr'. 41936b7f4cSbjh21------------------------------------------------------------------------------- 42936b7f4cSbjh21*/ 43936b7f4cSbjh21INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr ) 44936b7f4cSbjh21{ 45936b7f4cSbjh21 bits32 z; 46936b7f4cSbjh21 47936b7f4cSbjh21 if ( count == 0 ) { 48936b7f4cSbjh21 z = a; 49936b7f4cSbjh21 } 50936b7f4cSbjh21 else if ( count < 32 ) { 51936b7f4cSbjh21 z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 ); 52936b7f4cSbjh21 } 53936b7f4cSbjh21 else { 54936b7f4cSbjh21 z = ( a != 0 ); 55936b7f4cSbjh21 } 56936b7f4cSbjh21 *zPtr = z; 57936b7f4cSbjh21 58936b7f4cSbjh21} 59936b7f4cSbjh21 60936b7f4cSbjh21/* 61936b7f4cSbjh21------------------------------------------------------------------------------- 62936b7f4cSbjh21Shifts `a' right by the number of bits given in `count'. If any nonzero 63936b7f4cSbjh21bits are shifted off, they are ``jammed'' into the least significant bit of 64936b7f4cSbjh21the result by setting the least significant bit to 1. The value of `count' 65936b7f4cSbjh21can be arbitrarily large; in particular, if `count' is greater than 64, the 66936b7f4cSbjh21result will be either 0 or 1, depending on whether `a' is zero or nonzero. 67936b7f4cSbjh21The result is stored in the location pointed to by `zPtr'. 68936b7f4cSbjh21------------------------------------------------------------------------------- 69936b7f4cSbjh21*/ 70936b7f4cSbjh21INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr ) 71936b7f4cSbjh21{ 72936b7f4cSbjh21 bits64 z; 73936b7f4cSbjh21 74936b7f4cSbjh21 if ( count == 0 ) { 75936b7f4cSbjh21 z = a; 76936b7f4cSbjh21 } 77936b7f4cSbjh21 else if ( count < 64 ) { 78936b7f4cSbjh21 z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 ); 79936b7f4cSbjh21 } 80936b7f4cSbjh21 else { 81936b7f4cSbjh21 z = ( a != 0 ); 82936b7f4cSbjh21 } 83936b7f4cSbjh21 *zPtr = z; 84936b7f4cSbjh21 85936b7f4cSbjh21} 86936b7f4cSbjh21 87936b7f4cSbjh21/* 88936b7f4cSbjh21------------------------------------------------------------------------------- 89936b7f4cSbjh21Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64 90936b7f4cSbjh21_plus_ the number of bits given in `count'. The shifted result is at most 91936b7f4cSbjh2164 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The 92936b7f4cSbjh21bits shifted off form a second 64-bit result as follows: The _last_ bit 93936b7f4cSbjh21shifted off is the most-significant bit of the extra result, and the other 94936b7f4cSbjh2163 bits of the extra result are all zero if and only if _all_but_the_last_ 95936b7f4cSbjh21bits shifted off were all zero. This extra result is stored in the location 96936b7f4cSbjh21pointed to by `z1Ptr'. The value of `count' can be arbitrarily large. 97936b7f4cSbjh21 (This routine makes more sense if `a0' and `a1' are considered to form a 98936b7f4cSbjh21fixed-point value with binary point between `a0' and `a1'. This fixed-point 99936b7f4cSbjh21value is shifted right by the number of bits given in `count', and the 100936b7f4cSbjh21integer part of the result is returned at the location pointed to by 101936b7f4cSbjh21`z0Ptr'. The fractional part of the result may be slightly corrupted as 102936b7f4cSbjh21described above, and is returned at the location pointed to by `z1Ptr'.) 103936b7f4cSbjh21------------------------------------------------------------------------------- 104936b7f4cSbjh21*/ 105936b7f4cSbjh21INLINE void 106936b7f4cSbjh21 shift64ExtraRightJamming( 107936b7f4cSbjh21 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) 108936b7f4cSbjh21{ 109936b7f4cSbjh21 bits64 z0, z1; 110936b7f4cSbjh21 int8 negCount = ( - count ) & 63; 111936b7f4cSbjh21 112936b7f4cSbjh21 if ( count == 0 ) { 113936b7f4cSbjh21 z1 = a1; 114936b7f4cSbjh21 z0 = a0; 115936b7f4cSbjh21 } 116936b7f4cSbjh21 else if ( count < 64 ) { 117936b7f4cSbjh21 z1 = ( a0<<negCount ) | ( a1 != 0 ); 118936b7f4cSbjh21 z0 = a0>>count; 119936b7f4cSbjh21 } 120936b7f4cSbjh21 else { 121936b7f4cSbjh21 if ( count == 64 ) { 122936b7f4cSbjh21 z1 = a0 | ( a1 != 0 ); 123936b7f4cSbjh21 } 124936b7f4cSbjh21 else { 125936b7f4cSbjh21 z1 = ( ( a0 | a1 ) != 0 ); 126936b7f4cSbjh21 } 127936b7f4cSbjh21 z0 = 0; 128936b7f4cSbjh21 } 129936b7f4cSbjh21 *z1Ptr = z1; 130936b7f4cSbjh21 *z0Ptr = z0; 131936b7f4cSbjh21 132936b7f4cSbjh21} 133936b7f4cSbjh21 134936b7f4cSbjh21/* 135936b7f4cSbjh21------------------------------------------------------------------------------- 136936b7f4cSbjh21Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the 137936b7f4cSbjh21number of bits given in `count'. Any bits shifted off are lost. The value 138936b7f4cSbjh21of `count' can be arbitrarily large; in particular, if `count' is greater 139936b7f4cSbjh21than 128, the result will be 0. The result is broken into two 64-bit pieces 140936b7f4cSbjh21which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 141936b7f4cSbjh21------------------------------------------------------------------------------- 142936b7f4cSbjh21*/ 143936b7f4cSbjh21INLINE void 144936b7f4cSbjh21 shift128Right( 145936b7f4cSbjh21 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) 146936b7f4cSbjh21{ 147936b7f4cSbjh21 bits64 z0, z1; 148936b7f4cSbjh21 int8 negCount = ( - count ) & 63; 149936b7f4cSbjh21 150936b7f4cSbjh21 if ( count == 0 ) { 151936b7f4cSbjh21 z1 = a1; 152936b7f4cSbjh21 z0 = a0; 153936b7f4cSbjh21 } 154936b7f4cSbjh21 else if ( count < 64 ) { 155936b7f4cSbjh21 z1 = ( a0<<negCount ) | ( a1>>count ); 156936b7f4cSbjh21 z0 = a0>>count; 157936b7f4cSbjh21 } 158936b7f4cSbjh21 else { 159936b7f4cSbjh21 z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0; 160936b7f4cSbjh21 z0 = 0; 161936b7f4cSbjh21 } 162936b7f4cSbjh21 *z1Ptr = z1; 163936b7f4cSbjh21 *z0Ptr = z0; 164936b7f4cSbjh21 165936b7f4cSbjh21} 166936b7f4cSbjh21 167936b7f4cSbjh21/* 168936b7f4cSbjh21------------------------------------------------------------------------------- 169936b7f4cSbjh21Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the 170936b7f4cSbjh21number of bits given in `count'. If any nonzero bits are shifted off, they 171936b7f4cSbjh21are ``jammed'' into the least significant bit of the result by setting the 172936b7f4cSbjh21least significant bit to 1. The value of `count' can be arbitrarily large; 173936b7f4cSbjh21in particular, if `count' is greater than 128, the result will be either 174936b7f4cSbjh210 or 1, depending on whether the concatenation of `a0' and `a1' is zero or 175936b7f4cSbjh21nonzero. The result is broken into two 64-bit pieces which are stored at 176936b7f4cSbjh21the locations pointed to by `z0Ptr' and `z1Ptr'. 177936b7f4cSbjh21------------------------------------------------------------------------------- 178936b7f4cSbjh21*/ 179936b7f4cSbjh21INLINE void 180936b7f4cSbjh21 shift128RightJamming( 181936b7f4cSbjh21 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) 182936b7f4cSbjh21{ 183936b7f4cSbjh21 bits64 z0, z1; 184936b7f4cSbjh21 int8 negCount = ( - count ) & 63; 185936b7f4cSbjh21 186936b7f4cSbjh21 if ( count == 0 ) { 187936b7f4cSbjh21 z1 = a1; 188936b7f4cSbjh21 z0 = a0; 189936b7f4cSbjh21 } 190936b7f4cSbjh21 else if ( count < 64 ) { 191936b7f4cSbjh21 z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 ); 192936b7f4cSbjh21 z0 = a0>>count; 193936b7f4cSbjh21 } 194936b7f4cSbjh21 else { 195936b7f4cSbjh21 if ( count == 64 ) { 196936b7f4cSbjh21 z1 = a0 | ( a1 != 0 ); 197936b7f4cSbjh21 } 198936b7f4cSbjh21 else if ( count < 128 ) { 199936b7f4cSbjh21 z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 ); 200936b7f4cSbjh21 } 201936b7f4cSbjh21 else { 202936b7f4cSbjh21 z1 = ( ( a0 | a1 ) != 0 ); 203936b7f4cSbjh21 } 204936b7f4cSbjh21 z0 = 0; 205936b7f4cSbjh21 } 206936b7f4cSbjh21 *z1Ptr = z1; 207936b7f4cSbjh21 *z0Ptr = z0; 208936b7f4cSbjh21 209936b7f4cSbjh21} 210936b7f4cSbjh21 211936b7f4cSbjh21/* 212936b7f4cSbjh21------------------------------------------------------------------------------- 213936b7f4cSbjh21Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right 214936b7f4cSbjh21by 64 _plus_ the number of bits given in `count'. The shifted result is 215936b7f4cSbjh21at most 128 nonzero bits; these are broken into two 64-bit pieces which are 216936b7f4cSbjh21stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted 217936b7f4cSbjh21off form a third 64-bit result as follows: The _last_ bit shifted off is 218936b7f4cSbjh21the most-significant bit of the extra result, and the other 63 bits of the 219936b7f4cSbjh21extra result are all zero if and only if _all_but_the_last_ bits shifted off 220936b7f4cSbjh21were all zero. This extra result is stored in the location pointed to by 221936b7f4cSbjh21`z2Ptr'. The value of `count' can be arbitrarily large. 222936b7f4cSbjh21 (This routine makes more sense if `a0', `a1', and `a2' are considered 223936b7f4cSbjh21to form a fixed-point value with binary point between `a1' and `a2'. This 224936b7f4cSbjh21fixed-point value is shifted right by the number of bits given in `count', 225936b7f4cSbjh21and the integer part of the result is returned at the locations pointed to 226936b7f4cSbjh21by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly 227936b7f4cSbjh21corrupted as described above, and is returned at the location pointed to by 228936b7f4cSbjh21`z2Ptr'.) 229936b7f4cSbjh21------------------------------------------------------------------------------- 230936b7f4cSbjh21*/ 231936b7f4cSbjh21INLINE void 232936b7f4cSbjh21 shift128ExtraRightJamming( 233936b7f4cSbjh21 bits64 a0, 234936b7f4cSbjh21 bits64 a1, 235936b7f4cSbjh21 bits64 a2, 236936b7f4cSbjh21 int16 count, 237936b7f4cSbjh21 bits64 *z0Ptr, 238936b7f4cSbjh21 bits64 *z1Ptr, 239936b7f4cSbjh21 bits64 *z2Ptr 240936b7f4cSbjh21 ) 241936b7f4cSbjh21{ 242936b7f4cSbjh21 bits64 z0, z1, z2; 243936b7f4cSbjh21 int8 negCount = ( - count ) & 63; 244936b7f4cSbjh21 245936b7f4cSbjh21 if ( count == 0 ) { 246936b7f4cSbjh21 z2 = a2; 247936b7f4cSbjh21 z1 = a1; 248936b7f4cSbjh21 z0 = a0; 249936b7f4cSbjh21 } 250936b7f4cSbjh21 else { 251936b7f4cSbjh21 if ( count < 64 ) { 252936b7f4cSbjh21 z2 = a1<<negCount; 253936b7f4cSbjh21 z1 = ( a0<<negCount ) | ( a1>>count ); 254936b7f4cSbjh21 z0 = a0>>count; 255936b7f4cSbjh21 } 256936b7f4cSbjh21 else { 257936b7f4cSbjh21 if ( count == 64 ) { 258936b7f4cSbjh21 z2 = a1; 259936b7f4cSbjh21 z1 = a0; 260936b7f4cSbjh21 } 261936b7f4cSbjh21 else { 262936b7f4cSbjh21 a2 |= a1; 263936b7f4cSbjh21 if ( count < 128 ) { 264936b7f4cSbjh21 z2 = a0<<negCount; 265936b7f4cSbjh21 z1 = a0>>( count & 63 ); 266936b7f4cSbjh21 } 267936b7f4cSbjh21 else { 268936b7f4cSbjh21 z2 = ( count == 128 ) ? a0 : ( a0 != 0 ); 269936b7f4cSbjh21 z1 = 0; 270936b7f4cSbjh21 } 271936b7f4cSbjh21 } 272936b7f4cSbjh21 z0 = 0; 273936b7f4cSbjh21 } 274936b7f4cSbjh21 z2 |= ( a2 != 0 ); 275936b7f4cSbjh21 } 276936b7f4cSbjh21 *z2Ptr = z2; 277936b7f4cSbjh21 *z1Ptr = z1; 278936b7f4cSbjh21 *z0Ptr = z0; 279936b7f4cSbjh21 280936b7f4cSbjh21} 281936b7f4cSbjh21 282936b7f4cSbjh21/* 283936b7f4cSbjh21------------------------------------------------------------------------------- 284936b7f4cSbjh21Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the 285936b7f4cSbjh21number of bits given in `count'. Any bits shifted off are lost. The value 286936b7f4cSbjh21of `count' must be less than 64. The result is broken into two 64-bit 287936b7f4cSbjh21pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 288936b7f4cSbjh21------------------------------------------------------------------------------- 289936b7f4cSbjh21*/ 290936b7f4cSbjh21INLINE void 291936b7f4cSbjh21 shortShift128Left( 292936b7f4cSbjh21 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) 293936b7f4cSbjh21{ 294936b7f4cSbjh21 295936b7f4cSbjh21 *z1Ptr = a1<<count; 296936b7f4cSbjh21 *z0Ptr = 297936b7f4cSbjh21 ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) ); 298936b7f4cSbjh21 299936b7f4cSbjh21} 300936b7f4cSbjh21 301936b7f4cSbjh21/* 302936b7f4cSbjh21------------------------------------------------------------------------------- 303936b7f4cSbjh21Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left 304936b7f4cSbjh21by the number of bits given in `count'. Any bits shifted off are lost. 305936b7f4cSbjh21The value of `count' must be less than 64. The result is broken into three 306936b7f4cSbjh2164-bit pieces which are stored at the locations pointed to by `z0Ptr', 307936b7f4cSbjh21`z1Ptr', and `z2Ptr'. 308936b7f4cSbjh21------------------------------------------------------------------------------- 309936b7f4cSbjh21*/ 310936b7f4cSbjh21INLINE void 311936b7f4cSbjh21 shortShift192Left( 312936b7f4cSbjh21 bits64 a0, 313936b7f4cSbjh21 bits64 a1, 314936b7f4cSbjh21 bits64 a2, 315936b7f4cSbjh21 int16 count, 316936b7f4cSbjh21 bits64 *z0Ptr, 317936b7f4cSbjh21 bits64 *z1Ptr, 318936b7f4cSbjh21 bits64 *z2Ptr 319936b7f4cSbjh21 ) 320936b7f4cSbjh21{ 321936b7f4cSbjh21 bits64 z0, z1, z2; 322936b7f4cSbjh21 int8 negCount; 323936b7f4cSbjh21 324936b7f4cSbjh21 z2 = a2<<count; 325936b7f4cSbjh21 z1 = a1<<count; 326936b7f4cSbjh21 z0 = a0<<count; 327936b7f4cSbjh21 if ( 0 < count ) { 328936b7f4cSbjh21 negCount = ( ( - count ) & 63 ); 329936b7f4cSbjh21 z1 |= a2>>negCount; 330936b7f4cSbjh21 z0 |= a1>>negCount; 331936b7f4cSbjh21 } 332936b7f4cSbjh21 *z2Ptr = z2; 333936b7f4cSbjh21 *z1Ptr = z1; 334936b7f4cSbjh21 *z0Ptr = z0; 335936b7f4cSbjh21 336936b7f4cSbjh21} 337936b7f4cSbjh21 338936b7f4cSbjh21/* 339936b7f4cSbjh21------------------------------------------------------------------------------- 340936b7f4cSbjh21Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit 341936b7f4cSbjh21value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so 342936b7f4cSbjh21any carry out is lost. The result is broken into two 64-bit pieces which 343936b7f4cSbjh21are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 344936b7f4cSbjh21------------------------------------------------------------------------------- 345936b7f4cSbjh21*/ 346936b7f4cSbjh21INLINE void 347936b7f4cSbjh21 add128( 348936b7f4cSbjh21 bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr ) 349936b7f4cSbjh21{ 350936b7f4cSbjh21 bits64 z1; 351936b7f4cSbjh21 352936b7f4cSbjh21 z1 = a1 + b1; 353936b7f4cSbjh21 *z1Ptr = z1; 354936b7f4cSbjh21 *z0Ptr = a0 + b0 + ( z1 < a1 ); 355936b7f4cSbjh21 356936b7f4cSbjh21} 357936b7f4cSbjh21 358936b7f4cSbjh21/* 359936b7f4cSbjh21------------------------------------------------------------------------------- 360936b7f4cSbjh21Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the 361936b7f4cSbjh21192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is 362936b7f4cSbjh21modulo 2^192, so any carry out is lost. The result is broken into three 363936b7f4cSbjh2164-bit pieces which are stored at the locations pointed to by `z0Ptr', 364936b7f4cSbjh21`z1Ptr', and `z2Ptr'. 365936b7f4cSbjh21------------------------------------------------------------------------------- 366936b7f4cSbjh21*/ 367936b7f4cSbjh21INLINE void 368936b7f4cSbjh21 add192( 369936b7f4cSbjh21 bits64 a0, 370936b7f4cSbjh21 bits64 a1, 371936b7f4cSbjh21 bits64 a2, 372936b7f4cSbjh21 bits64 b0, 373936b7f4cSbjh21 bits64 b1, 374936b7f4cSbjh21 bits64 b2, 375936b7f4cSbjh21 bits64 *z0Ptr, 376936b7f4cSbjh21 bits64 *z1Ptr, 377936b7f4cSbjh21 bits64 *z2Ptr 378936b7f4cSbjh21 ) 379936b7f4cSbjh21{ 380936b7f4cSbjh21 bits64 z0, z1, z2; 381936b7f4cSbjh21 int8 carry0, carry1; 382936b7f4cSbjh21 383936b7f4cSbjh21 z2 = a2 + b2; 384936b7f4cSbjh21 carry1 = ( z2 < a2 ); 385936b7f4cSbjh21 z1 = a1 + b1; 386936b7f4cSbjh21 carry0 = ( z1 < a1 ); 387936b7f4cSbjh21 z0 = a0 + b0; 388936b7f4cSbjh21 z1 += carry1; 389ea680523Stron z0 += ( z1 < (bits64)carry1 ); 390936b7f4cSbjh21 z0 += carry0; 391936b7f4cSbjh21 *z2Ptr = z2; 392936b7f4cSbjh21 *z1Ptr = z1; 393936b7f4cSbjh21 *z0Ptr = z0; 394936b7f4cSbjh21 395936b7f4cSbjh21} 396936b7f4cSbjh21 397936b7f4cSbjh21/* 398936b7f4cSbjh21------------------------------------------------------------------------------- 399936b7f4cSbjh21Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the 400936b7f4cSbjh21128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo 401936b7f4cSbjh212^128, so any borrow out (carry out) is lost. The result is broken into two 402936b7f4cSbjh2164-bit pieces which are stored at the locations pointed to by `z0Ptr' and 403936b7f4cSbjh21`z1Ptr'. 404936b7f4cSbjh21------------------------------------------------------------------------------- 405936b7f4cSbjh21*/ 406936b7f4cSbjh21INLINE void 407936b7f4cSbjh21 sub128( 408936b7f4cSbjh21 bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr ) 409936b7f4cSbjh21{ 410936b7f4cSbjh21 411936b7f4cSbjh21 *z1Ptr = a1 - b1; 412936b7f4cSbjh21 *z0Ptr = a0 - b0 - ( a1 < b1 ); 413936b7f4cSbjh21 414936b7f4cSbjh21} 415936b7f4cSbjh21 416936b7f4cSbjh21/* 417936b7f4cSbjh21------------------------------------------------------------------------------- 418936b7f4cSbjh21Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2' 419936b7f4cSbjh21from the 192-bit value formed by concatenating `a0', `a1', and `a2'. 420936b7f4cSbjh21Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The 421936b7f4cSbjh21result is broken into three 64-bit pieces which are stored at the locations 422936b7f4cSbjh21pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'. 423936b7f4cSbjh21------------------------------------------------------------------------------- 424936b7f4cSbjh21*/ 425936b7f4cSbjh21INLINE void 426936b7f4cSbjh21 sub192( 427936b7f4cSbjh21 bits64 a0, 428936b7f4cSbjh21 bits64 a1, 429936b7f4cSbjh21 bits64 a2, 430936b7f4cSbjh21 bits64 b0, 431936b7f4cSbjh21 bits64 b1, 432936b7f4cSbjh21 bits64 b2, 433936b7f4cSbjh21 bits64 *z0Ptr, 434936b7f4cSbjh21 bits64 *z1Ptr, 435936b7f4cSbjh21 bits64 *z2Ptr 436936b7f4cSbjh21 ) 437936b7f4cSbjh21{ 438936b7f4cSbjh21 bits64 z0, z1, z2; 439936b7f4cSbjh21 int8 borrow0, borrow1; 440936b7f4cSbjh21 441936b7f4cSbjh21 z2 = a2 - b2; 442936b7f4cSbjh21 borrow1 = ( a2 < b2 ); 443936b7f4cSbjh21 z1 = a1 - b1; 444936b7f4cSbjh21 borrow0 = ( a1 < b1 ); 445936b7f4cSbjh21 z0 = a0 - b0; 446ea680523Stron z0 -= ( z1 < (bits64)borrow1 ); 447936b7f4cSbjh21 z1 -= borrow1; 448936b7f4cSbjh21 z0 -= borrow0; 449936b7f4cSbjh21 *z2Ptr = z2; 450936b7f4cSbjh21 *z1Ptr = z1; 451936b7f4cSbjh21 *z0Ptr = z0; 452936b7f4cSbjh21 453936b7f4cSbjh21} 454936b7f4cSbjh21 455936b7f4cSbjh21/* 456936b7f4cSbjh21------------------------------------------------------------------------------- 457936b7f4cSbjh21Multiplies `a' by `b' to obtain a 128-bit product. The product is broken 458936b7f4cSbjh21into two 64-bit pieces which are stored at the locations pointed to by 459936b7f4cSbjh21`z0Ptr' and `z1Ptr'. 460936b7f4cSbjh21------------------------------------------------------------------------------- 461936b7f4cSbjh21*/ 462936b7f4cSbjh21INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr ) 463936b7f4cSbjh21{ 464936b7f4cSbjh21 bits32 aHigh, aLow, bHigh, bLow; 465936b7f4cSbjh21 bits64 z0, zMiddleA, zMiddleB, z1; 466936b7f4cSbjh21 467*39052f3bSchristos aLow = (bits32)a; 468*39052f3bSchristos aHigh = (bits32)(a>>32); 469*39052f3bSchristos bLow = (bits32)b; 470*39052f3bSchristos bHigh = (bits32)(b>>32); 471936b7f4cSbjh21 z1 = ( (bits64) aLow ) * bLow; 472936b7f4cSbjh21 zMiddleA = ( (bits64) aLow ) * bHigh; 473936b7f4cSbjh21 zMiddleB = ( (bits64) aHigh ) * bLow; 474936b7f4cSbjh21 z0 = ( (bits64) aHigh ) * bHigh; 475936b7f4cSbjh21 zMiddleA += zMiddleB; 476936b7f4cSbjh21 z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 ); 477936b7f4cSbjh21 zMiddleA <<= 32; 478936b7f4cSbjh21 z1 += zMiddleA; 479936b7f4cSbjh21 z0 += ( z1 < zMiddleA ); 480936b7f4cSbjh21 *z1Ptr = z1; 481936b7f4cSbjh21 *z0Ptr = z0; 482936b7f4cSbjh21 483936b7f4cSbjh21} 484936b7f4cSbjh21 485936b7f4cSbjh21/* 486936b7f4cSbjh21------------------------------------------------------------------------------- 487936b7f4cSbjh21Multiplies the 128-bit value formed by concatenating `a0' and `a1' by 488936b7f4cSbjh21`b' to obtain a 192-bit product. The product is broken into three 64-bit 489936b7f4cSbjh21pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and 490936b7f4cSbjh21`z2Ptr'. 491936b7f4cSbjh21------------------------------------------------------------------------------- 492936b7f4cSbjh21*/ 493936b7f4cSbjh21INLINE void 494936b7f4cSbjh21 mul128By64To192( 495936b7f4cSbjh21 bits64 a0, 496936b7f4cSbjh21 bits64 a1, 497936b7f4cSbjh21 bits64 b, 498936b7f4cSbjh21 bits64 *z0Ptr, 499936b7f4cSbjh21 bits64 *z1Ptr, 500936b7f4cSbjh21 bits64 *z2Ptr 501936b7f4cSbjh21 ) 502936b7f4cSbjh21{ 503936b7f4cSbjh21 bits64 z0, z1, z2, more1; 504936b7f4cSbjh21 505936b7f4cSbjh21 mul64To128( a1, b, &z1, &z2 ); 506936b7f4cSbjh21 mul64To128( a0, b, &z0, &more1 ); 507936b7f4cSbjh21 add128( z0, more1, 0, z1, &z0, &z1 ); 508936b7f4cSbjh21 *z2Ptr = z2; 509936b7f4cSbjh21 *z1Ptr = z1; 510936b7f4cSbjh21 *z0Ptr = z0; 511936b7f4cSbjh21 512936b7f4cSbjh21} 513936b7f4cSbjh21 514936b7f4cSbjh21/* 515936b7f4cSbjh21------------------------------------------------------------------------------- 516936b7f4cSbjh21Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the 517936b7f4cSbjh21128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit 518936b7f4cSbjh21product. The product is broken into four 64-bit pieces which are stored at 519936b7f4cSbjh21the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. 520936b7f4cSbjh21------------------------------------------------------------------------------- 521936b7f4cSbjh21*/ 522936b7f4cSbjh21INLINE void 523936b7f4cSbjh21 mul128To256( 524936b7f4cSbjh21 bits64 a0, 525936b7f4cSbjh21 bits64 a1, 526936b7f4cSbjh21 bits64 b0, 527936b7f4cSbjh21 bits64 b1, 528936b7f4cSbjh21 bits64 *z0Ptr, 529936b7f4cSbjh21 bits64 *z1Ptr, 530936b7f4cSbjh21 bits64 *z2Ptr, 531936b7f4cSbjh21 bits64 *z3Ptr 532936b7f4cSbjh21 ) 533936b7f4cSbjh21{ 534936b7f4cSbjh21 bits64 z0, z1, z2, z3; 535936b7f4cSbjh21 bits64 more1, more2; 536936b7f4cSbjh21 537936b7f4cSbjh21 mul64To128( a1, b1, &z2, &z3 ); 538936b7f4cSbjh21 mul64To128( a1, b0, &z1, &more2 ); 539936b7f4cSbjh21 add128( z1, more2, 0, z2, &z1, &z2 ); 540936b7f4cSbjh21 mul64To128( a0, b0, &z0, &more1 ); 541936b7f4cSbjh21 add128( z0, more1, 0, z1, &z0, &z1 ); 542936b7f4cSbjh21 mul64To128( a0, b1, &more1, &more2 ); 543936b7f4cSbjh21 add128( more1, more2, 0, z2, &more1, &z2 ); 544936b7f4cSbjh21 add128( z0, z1, 0, more1, &z0, &z1 ); 545936b7f4cSbjh21 *z3Ptr = z3; 546936b7f4cSbjh21 *z2Ptr = z2; 547936b7f4cSbjh21 *z1Ptr = z1; 548936b7f4cSbjh21 *z0Ptr = z0; 549936b7f4cSbjh21 550936b7f4cSbjh21} 551936b7f4cSbjh21 552936b7f4cSbjh21/* 553936b7f4cSbjh21------------------------------------------------------------------------------- 554936b7f4cSbjh21Returns an approximation to the 64-bit integer quotient obtained by dividing 555936b7f4cSbjh21`b' into the 128-bit value formed by concatenating `a0' and `a1'. The 556936b7f4cSbjh21divisor `b' must be at least 2^63. If q is the exact quotient truncated 557936b7f4cSbjh21toward zero, the approximation returned lies between q and q + 2 inclusive. 558936b7f4cSbjh21If the exact quotient q is larger than 64 bits, the maximum positive 64-bit 559936b7f4cSbjh21unsigned integer is returned. 560936b7f4cSbjh21------------------------------------------------------------------------------- 561936b7f4cSbjh21*/ 562936b7f4cSbjh21static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b ) 563936b7f4cSbjh21{ 564936b7f4cSbjh21 bits64 b0, b1; 565936b7f4cSbjh21 bits64 rem0, rem1, term0, term1; 566936b7f4cSbjh21 bits64 z; 567936b7f4cSbjh21 568936b7f4cSbjh21 if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF ); 569936b7f4cSbjh21 b0 = b>>32; 570936b7f4cSbjh21 z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32; 571936b7f4cSbjh21 mul64To128( b, z, &term0, &term1 ); 572936b7f4cSbjh21 sub128( a0, a1, term0, term1, &rem0, &rem1 ); 573936b7f4cSbjh21 while ( ( (sbits64) rem0 ) < 0 ) { 574936b7f4cSbjh21 z -= LIT64( 0x100000000 ); 575936b7f4cSbjh21 b1 = b<<32; 576936b7f4cSbjh21 add128( rem0, rem1, b0, b1, &rem0, &rem1 ); 577936b7f4cSbjh21 } 578936b7f4cSbjh21 rem0 = ( rem0<<32 ) | ( rem1>>32 ); 579936b7f4cSbjh21 z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0; 580936b7f4cSbjh21 return z; 581936b7f4cSbjh21 582936b7f4cSbjh21} 583936b7f4cSbjh21 584936b7f4cSbjh21#if !defined(SOFTFLOAT_FOR_GCC) || defined(FLOATX80) || defined(FLOAT128) 585936b7f4cSbjh21/* 586936b7f4cSbjh21------------------------------------------------------------------------------- 587936b7f4cSbjh21Returns an approximation to the square root of the 32-bit significand given 588936b7f4cSbjh21by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of 589936b7f4cSbjh21`aExp' (the least significant bit) is 1, the integer returned approximates 590936b7f4cSbjh212^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' 591936b7f4cSbjh21is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either 592936b7f4cSbjh21case, the approximation returned lies strictly within +/-2 of the exact 593936b7f4cSbjh21value. 594936b7f4cSbjh21------------------------------------------------------------------------------- 595936b7f4cSbjh21*/ 596936b7f4cSbjh21static bits32 estimateSqrt32( int16 aExp, bits32 a ) 597936b7f4cSbjh21{ 598936b7f4cSbjh21 static const bits16 sqrtOddAdjustments[] = { 599936b7f4cSbjh21 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, 600936b7f4cSbjh21 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67 601936b7f4cSbjh21 }; 602936b7f4cSbjh21 static const bits16 sqrtEvenAdjustments[] = { 603936b7f4cSbjh21 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E, 604936b7f4cSbjh21 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002 605936b7f4cSbjh21 }; 606936b7f4cSbjh21 int8 idx; 607936b7f4cSbjh21 bits32 z; 608936b7f4cSbjh21 609936b7f4cSbjh21 idx = ( a>>27 ) & 15; 610936b7f4cSbjh21 if ( aExp & 1 ) { 611936b7f4cSbjh21 z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ idx ]; 612936b7f4cSbjh21 z = ( ( a / z )<<14 ) + ( z<<15 ); 613936b7f4cSbjh21 a >>= 1; 614936b7f4cSbjh21 } 615936b7f4cSbjh21 else { 616936b7f4cSbjh21 z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ idx ]; 617936b7f4cSbjh21 z = a / z + z; 618936b7f4cSbjh21 z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 ); 619*39052f3bSchristos if ( z <= a ) return (bits32) ( ( (bits32) a )>>1 ); 620936b7f4cSbjh21 } 621936b7f4cSbjh21 return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 ); 622936b7f4cSbjh21 623936b7f4cSbjh21} 624936b7f4cSbjh21#endif 625936b7f4cSbjh21 626936b7f4cSbjh21/* 627936b7f4cSbjh21------------------------------------------------------------------------------- 628936b7f4cSbjh21Returns the number of leading 0 bits before the most-significant 1 bit of 629936b7f4cSbjh21`a'. If `a' is zero, 32 is returned. 630936b7f4cSbjh21------------------------------------------------------------------------------- 631936b7f4cSbjh21*/ 632936b7f4cSbjh21static int8 countLeadingZeros32( bits32 a ) 633936b7f4cSbjh21{ 634936b7f4cSbjh21 static const int8 countLeadingZerosHigh[] = { 635936b7f4cSbjh21 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 636936b7f4cSbjh21 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 637936b7f4cSbjh21 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 638936b7f4cSbjh21 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 639936b7f4cSbjh21 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 640936b7f4cSbjh21 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 641936b7f4cSbjh21 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 642936b7f4cSbjh21 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 643936b7f4cSbjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 644936b7f4cSbjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 645936b7f4cSbjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 646936b7f4cSbjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 647936b7f4cSbjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 648936b7f4cSbjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 649936b7f4cSbjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 650936b7f4cSbjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 651936b7f4cSbjh21 }; 652936b7f4cSbjh21 int8 shiftCount; 653936b7f4cSbjh21 654936b7f4cSbjh21 shiftCount = 0; 655936b7f4cSbjh21 if ( a < 0x10000 ) { 656936b7f4cSbjh21 shiftCount += 16; 657936b7f4cSbjh21 a <<= 16; 658936b7f4cSbjh21 } 659936b7f4cSbjh21 if ( a < 0x1000000 ) { 660936b7f4cSbjh21 shiftCount += 8; 661936b7f4cSbjh21 a <<= 8; 662936b7f4cSbjh21 } 663936b7f4cSbjh21 shiftCount += countLeadingZerosHigh[ a>>24 ]; 664936b7f4cSbjh21 return shiftCount; 665936b7f4cSbjh21 666936b7f4cSbjh21} 667936b7f4cSbjh21 668936b7f4cSbjh21/* 669936b7f4cSbjh21------------------------------------------------------------------------------- 670936b7f4cSbjh21Returns the number of leading 0 bits before the most-significant 1 bit of 671936b7f4cSbjh21`a'. If `a' is zero, 64 is returned. 672936b7f4cSbjh21------------------------------------------------------------------------------- 673936b7f4cSbjh21*/ 674936b7f4cSbjh21static int8 countLeadingZeros64( bits64 a ) 675936b7f4cSbjh21{ 676936b7f4cSbjh21 int8 shiftCount; 677936b7f4cSbjh21 678936b7f4cSbjh21 shiftCount = 0; 679936b7f4cSbjh21 if ( a < ( (bits64) 1 )<<32 ) { 680936b7f4cSbjh21 shiftCount += 32; 681936b7f4cSbjh21 } 682936b7f4cSbjh21 else { 683936b7f4cSbjh21 a >>= 32; 684936b7f4cSbjh21 } 685*39052f3bSchristos shiftCount += (int8)countLeadingZeros32( (bits32)a ); 686936b7f4cSbjh21 return shiftCount; 687936b7f4cSbjh21 688936b7f4cSbjh21} 689936b7f4cSbjh21 690936b7f4cSbjh21/* 691936b7f4cSbjh21------------------------------------------------------------------------------- 692936b7f4cSbjh21Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' 693936b7f4cSbjh21is equal to the 128-bit value formed by concatenating `b0' and `b1'. 694936b7f4cSbjh21Otherwise, returns 0. 695936b7f4cSbjh21------------------------------------------------------------------------------- 696936b7f4cSbjh21*/ 697936b7f4cSbjh21INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) 698936b7f4cSbjh21{ 699936b7f4cSbjh21 700936b7f4cSbjh21 return ( a0 == b0 ) && ( a1 == b1 ); 701936b7f4cSbjh21 702936b7f4cSbjh21} 703936b7f4cSbjh21 704936b7f4cSbjh21/* 705936b7f4cSbjh21------------------------------------------------------------------------------- 706936b7f4cSbjh21Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less 707936b7f4cSbjh21than or equal to the 128-bit value formed by concatenating `b0' and `b1'. 708936b7f4cSbjh21Otherwise, returns 0. 709936b7f4cSbjh21------------------------------------------------------------------------------- 710936b7f4cSbjh21*/ 711936b7f4cSbjh21INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) 712936b7f4cSbjh21{ 713936b7f4cSbjh21 714936b7f4cSbjh21 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) ); 715936b7f4cSbjh21 716936b7f4cSbjh21} 717936b7f4cSbjh21 718936b7f4cSbjh21/* 719936b7f4cSbjh21------------------------------------------------------------------------------- 720936b7f4cSbjh21Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less 721936b7f4cSbjh21than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise, 722936b7f4cSbjh21returns 0. 723936b7f4cSbjh21------------------------------------------------------------------------------- 724936b7f4cSbjh21*/ 725936b7f4cSbjh21INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) 726936b7f4cSbjh21{ 727936b7f4cSbjh21 728936b7f4cSbjh21 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) ); 729936b7f4cSbjh21 730936b7f4cSbjh21} 731936b7f4cSbjh21 732936b7f4cSbjh21/* 733936b7f4cSbjh21------------------------------------------------------------------------------- 734936b7f4cSbjh21Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is 735936b7f4cSbjh21not equal to the 128-bit value formed by concatenating `b0' and `b1'. 736936b7f4cSbjh21Otherwise, returns 0. 737936b7f4cSbjh21------------------------------------------------------------------------------- 738936b7f4cSbjh21*/ 739936b7f4cSbjh21INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) 740936b7f4cSbjh21{ 741936b7f4cSbjh21 742936b7f4cSbjh21 return ( a0 != b0 ) || ( a1 != b1 ); 743936b7f4cSbjh21 744936b7f4cSbjh21} 745936b7f4cSbjh21 746