1936b7f4cSbjh21 2936b7f4cSbjh21/* 3936b7f4cSbjh21=============================================================================== 4936b7f4cSbjh21 5936b7f4cSbjh21This C source fragment is part of the SoftFloat IEC/IEEE Floating-point 6936b7f4cSbjh21Arithmetic Package, Release 2a. 7936b7f4cSbjh21 8936b7f4cSbjh21Written by John R. Hauser. This work was made possible in part by the 9936b7f4cSbjh21International Computer Science Institute, located at Suite 600, 1947 Center 10936b7f4cSbjh21Street, Berkeley, California 94704. Funding was partially provided by the 11936b7f4cSbjh21National Science Foundation under grant MIP-9311980. The original version 12936b7f4cSbjh21of this code was written as part of a project to build a fixed-point vector 13936b7f4cSbjh21processor in collaboration with the University of California at Berkeley, 14936b7f4cSbjh21overseen by Profs. Nelson Morgan and John Wawrzynek. More information 15936b7f4cSbjh21is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ 16936b7f4cSbjh21arithmetic/SoftFloat.html'. 17936b7f4cSbjh21 18936b7f4cSbjh21THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 19936b7f4cSbjh21has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 20936b7f4cSbjh21TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 21936b7f4cSbjh21PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 22936b7f4cSbjh21AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 23936b7f4cSbjh21 24936b7f4cSbjh21Derivative works are acceptable, even for commercial purposes, so long as 25936b7f4cSbjh21(1) they include prominent notice that the work is derivative, and (2) they 26936b7f4cSbjh21include prominent notice akin to these four paragraphs for those parts of 27936b7f4cSbjh21this code that are retained. 28936b7f4cSbjh21 29936b7f4cSbjh21=============================================================================== 30936b7f4cSbjh21*/ 31936b7f4cSbjh21 32936b7f4cSbjh21/* 33936b7f4cSbjh21------------------------------------------------------------------------------- 34936b7f4cSbjh21Shifts `a' right by the number of bits given in `count'. If any nonzero 35936b7f4cSbjh21bits are shifted off, they are ``jammed'' into the least significant bit of 36936b7f4cSbjh21the result by setting the least significant bit to 1. The value of `count' 37936b7f4cSbjh21can be arbitrarily large; in particular, if `count' is greater than 32, the 38936b7f4cSbjh21result will be either 0 or 1, depending on whether `a' is zero or nonzero. 39936b7f4cSbjh21The result is stored in the location pointed to by `zPtr'. 40936b7f4cSbjh21------------------------------------------------------------------------------- 41936b7f4cSbjh21*/ 42936b7f4cSbjh21INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr ) 43936b7f4cSbjh21{ 44936b7f4cSbjh21 bits32 z; 45936b7f4cSbjh21 46936b7f4cSbjh21 if ( count == 0 ) { 47936b7f4cSbjh21 z = a; 48936b7f4cSbjh21 } 49936b7f4cSbjh21 else if ( count < 32 ) { 50936b7f4cSbjh21 z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 ); 51936b7f4cSbjh21 } 52936b7f4cSbjh21 else { 53936b7f4cSbjh21 z = ( a != 0 ); 54936b7f4cSbjh21 } 55936b7f4cSbjh21 *zPtr = z; 56936b7f4cSbjh21 57936b7f4cSbjh21} 58936b7f4cSbjh21 59936b7f4cSbjh21/* 60936b7f4cSbjh21------------------------------------------------------------------------------- 61936b7f4cSbjh21Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the 62936b7f4cSbjh21number of bits given in `count'. Any bits shifted off are lost. The value 63936b7f4cSbjh21of `count' can be arbitrarily large; in particular, if `count' is greater 64936b7f4cSbjh21than 64, the result will be 0. The result is broken into two 32-bit pieces 65936b7f4cSbjh21which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 66936b7f4cSbjh21------------------------------------------------------------------------------- 67936b7f4cSbjh21*/ 68936b7f4cSbjh21INLINE void 69936b7f4cSbjh21 shift64Right( 70936b7f4cSbjh21 bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr ) 71936b7f4cSbjh21{ 72936b7f4cSbjh21 bits32 z0, z1; 73936b7f4cSbjh21 int8 negCount = ( - count ) & 31; 74936b7f4cSbjh21 75936b7f4cSbjh21 if ( count == 0 ) { 76936b7f4cSbjh21 z1 = a1; 77936b7f4cSbjh21 z0 = a0; 78936b7f4cSbjh21 } 79936b7f4cSbjh21 else if ( count < 32 ) { 80936b7f4cSbjh21 z1 = ( a0<<negCount ) | ( a1>>count ); 81936b7f4cSbjh21 z0 = a0>>count; 82936b7f4cSbjh21 } 83936b7f4cSbjh21 else { 84936b7f4cSbjh21 z1 = ( count < 64 ) ? ( a0>>( count & 31 ) ) : 0; 85936b7f4cSbjh21 z0 = 0; 86936b7f4cSbjh21 } 87936b7f4cSbjh21 *z1Ptr = z1; 88936b7f4cSbjh21 *z0Ptr = z0; 89936b7f4cSbjh21 90936b7f4cSbjh21} 91936b7f4cSbjh21 92936b7f4cSbjh21/* 93936b7f4cSbjh21------------------------------------------------------------------------------- 94936b7f4cSbjh21Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the 95936b7f4cSbjh21number of bits given in `count'. If any nonzero bits are shifted off, they 96936b7f4cSbjh21are ``jammed'' into the least significant bit of the result by setting the 97936b7f4cSbjh21least significant bit to 1. The value of `count' can be arbitrarily large; 98936b7f4cSbjh21in particular, if `count' is greater than 64, the result will be either 0 99936b7f4cSbjh21or 1, depending on whether the concatenation of `a0' and `a1' is zero or 100936b7f4cSbjh21nonzero. The result is broken into two 32-bit pieces which are stored at 101936b7f4cSbjh21the locations pointed to by `z0Ptr' and `z1Ptr'. 102936b7f4cSbjh21------------------------------------------------------------------------------- 103936b7f4cSbjh21*/ 104936b7f4cSbjh21INLINE void 105936b7f4cSbjh21 shift64RightJamming( 106936b7f4cSbjh21 bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr ) 107936b7f4cSbjh21{ 108936b7f4cSbjh21 bits32 z0, z1; 109936b7f4cSbjh21 int8 negCount = ( - count ) & 31; 110936b7f4cSbjh21 111936b7f4cSbjh21 if ( count == 0 ) { 112936b7f4cSbjh21 z1 = a1; 113936b7f4cSbjh21 z0 = a0; 114936b7f4cSbjh21 } 115936b7f4cSbjh21 else if ( count < 32 ) { 116936b7f4cSbjh21 z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 ); 117936b7f4cSbjh21 z0 = a0>>count; 118936b7f4cSbjh21 } 119936b7f4cSbjh21 else { 120936b7f4cSbjh21 if ( count == 32 ) { 121936b7f4cSbjh21 z1 = a0 | ( a1 != 0 ); 122936b7f4cSbjh21 } 123936b7f4cSbjh21 else if ( count < 64 ) { 124936b7f4cSbjh21 z1 = ( a0>>( count & 31 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 ); 125936b7f4cSbjh21 } 126936b7f4cSbjh21 else { 127936b7f4cSbjh21 z1 = ( ( a0 | a1 ) != 0 ); 128936b7f4cSbjh21 } 129936b7f4cSbjh21 z0 = 0; 130936b7f4cSbjh21 } 131936b7f4cSbjh21 *z1Ptr = z1; 132936b7f4cSbjh21 *z0Ptr = z0; 133936b7f4cSbjh21 134936b7f4cSbjh21} 135936b7f4cSbjh21 136936b7f4cSbjh21/* 137936b7f4cSbjh21------------------------------------------------------------------------------- 138936b7f4cSbjh21Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' right 139936b7f4cSbjh21by 32 _plus_ the number of bits given in `count'. The shifted result is 140936b7f4cSbjh21at most 64 nonzero bits; these are broken into two 32-bit pieces which are 141936b7f4cSbjh21stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted 142936b7f4cSbjh21off form a third 32-bit result as follows: The _last_ bit shifted off is 143936b7f4cSbjh21the most-significant bit of the extra result, and the other 31 bits of the 144936b7f4cSbjh21extra result are all zero if and only if _all_but_the_last_ bits shifted off 145936b7f4cSbjh21were all zero. This extra result is stored in the location pointed to by 146936b7f4cSbjh21`z2Ptr'. The value of `count' can be arbitrarily large. 147936b7f4cSbjh21 (This routine makes more sense if `a0', `a1', and `a2' are considered 148936b7f4cSbjh21to form a fixed-point value with binary point between `a1' and `a2'. This 149936b7f4cSbjh21fixed-point value is shifted right by the number of bits given in `count', 150936b7f4cSbjh21and the integer part of the result is returned at the locations pointed to 151936b7f4cSbjh21by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly 152936b7f4cSbjh21corrupted as described above, and is returned at the location pointed to by 153936b7f4cSbjh21`z2Ptr'.) 154936b7f4cSbjh21------------------------------------------------------------------------------- 155936b7f4cSbjh21*/ 156936b7f4cSbjh21INLINE void 157936b7f4cSbjh21 shift64ExtraRightJamming( 158936b7f4cSbjh21 bits32 a0, 159936b7f4cSbjh21 bits32 a1, 160936b7f4cSbjh21 bits32 a2, 161936b7f4cSbjh21 int16 count, 162936b7f4cSbjh21 bits32 *z0Ptr, 163936b7f4cSbjh21 bits32 *z1Ptr, 164936b7f4cSbjh21 bits32 *z2Ptr 165936b7f4cSbjh21 ) 166936b7f4cSbjh21{ 167936b7f4cSbjh21 bits32 z0, z1, z2; 168936b7f4cSbjh21 int8 negCount = ( - count ) & 31; 169936b7f4cSbjh21 170936b7f4cSbjh21 if ( count == 0 ) { 171936b7f4cSbjh21 z2 = a2; 172936b7f4cSbjh21 z1 = a1; 173936b7f4cSbjh21 z0 = a0; 174936b7f4cSbjh21 } 175936b7f4cSbjh21 else { 176936b7f4cSbjh21 if ( count < 32 ) { 177936b7f4cSbjh21 z2 = a1<<negCount; 178936b7f4cSbjh21 z1 = ( a0<<negCount ) | ( a1>>count ); 179936b7f4cSbjh21 z0 = a0>>count; 180936b7f4cSbjh21 } 181936b7f4cSbjh21 else { 182936b7f4cSbjh21 if ( count == 32 ) { 183936b7f4cSbjh21 z2 = a1; 184936b7f4cSbjh21 z1 = a0; 185936b7f4cSbjh21 } 186936b7f4cSbjh21 else { 187936b7f4cSbjh21 a2 |= a1; 188936b7f4cSbjh21 if ( count < 64 ) { 189936b7f4cSbjh21 z2 = a0<<negCount; 190936b7f4cSbjh21 z1 = a0>>( count & 31 ); 191936b7f4cSbjh21 } 192936b7f4cSbjh21 else { 193936b7f4cSbjh21 z2 = ( count == 64 ) ? a0 : ( a0 != 0 ); 194936b7f4cSbjh21 z1 = 0; 195936b7f4cSbjh21 } 196936b7f4cSbjh21 } 197936b7f4cSbjh21 z0 = 0; 198936b7f4cSbjh21 } 199936b7f4cSbjh21 z2 |= ( a2 != 0 ); 200936b7f4cSbjh21 } 201936b7f4cSbjh21 *z2Ptr = z2; 202936b7f4cSbjh21 *z1Ptr = z1; 203936b7f4cSbjh21 *z0Ptr = z0; 204936b7f4cSbjh21 205936b7f4cSbjh21} 206936b7f4cSbjh21 207936b7f4cSbjh21/* 208936b7f4cSbjh21------------------------------------------------------------------------------- 209936b7f4cSbjh21Shifts the 64-bit value formed by concatenating `a0' and `a1' left by the 210936b7f4cSbjh21number of bits given in `count'. Any bits shifted off are lost. The value 211936b7f4cSbjh21of `count' must be less than 32. The result is broken into two 32-bit 212936b7f4cSbjh21pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 213936b7f4cSbjh21------------------------------------------------------------------------------- 214936b7f4cSbjh21*/ 215936b7f4cSbjh21INLINE void 216936b7f4cSbjh21 shortShift64Left( 217936b7f4cSbjh21 bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr ) 218936b7f4cSbjh21{ 219936b7f4cSbjh21 220936b7f4cSbjh21 *z1Ptr = a1<<count; 221936b7f4cSbjh21 *z0Ptr = 222936b7f4cSbjh21 ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 31 ) ); 223936b7f4cSbjh21 224936b7f4cSbjh21} 225936b7f4cSbjh21 226936b7f4cSbjh21/* 227936b7f4cSbjh21------------------------------------------------------------------------------- 228936b7f4cSbjh21Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' left 229936b7f4cSbjh21by the number of bits given in `count'. Any bits shifted off are lost. 230936b7f4cSbjh21The value of `count' must be less than 32. The result is broken into three 231936b7f4cSbjh2132-bit pieces which are stored at the locations pointed to by `z0Ptr', 232936b7f4cSbjh21`z1Ptr', and `z2Ptr'. 233936b7f4cSbjh21------------------------------------------------------------------------------- 234936b7f4cSbjh21*/ 235936b7f4cSbjh21INLINE void 236936b7f4cSbjh21 shortShift96Left( 237936b7f4cSbjh21 bits32 a0, 238936b7f4cSbjh21 bits32 a1, 239936b7f4cSbjh21 bits32 a2, 240936b7f4cSbjh21 int16 count, 241936b7f4cSbjh21 bits32 *z0Ptr, 242936b7f4cSbjh21 bits32 *z1Ptr, 243936b7f4cSbjh21 bits32 *z2Ptr 244936b7f4cSbjh21 ) 245936b7f4cSbjh21{ 246936b7f4cSbjh21 bits32 z0, z1, z2; 247936b7f4cSbjh21 int8 negCount; 248936b7f4cSbjh21 249936b7f4cSbjh21 z2 = a2<<count; 250936b7f4cSbjh21 z1 = a1<<count; 251936b7f4cSbjh21 z0 = a0<<count; 252936b7f4cSbjh21 if ( 0 < count ) { 253936b7f4cSbjh21 negCount = ( ( - count ) & 31 ); 254936b7f4cSbjh21 z1 |= a2>>negCount; 255936b7f4cSbjh21 z0 |= a1>>negCount; 256936b7f4cSbjh21 } 257936b7f4cSbjh21 *z2Ptr = z2; 258936b7f4cSbjh21 *z1Ptr = z1; 259936b7f4cSbjh21 *z0Ptr = z0; 260936b7f4cSbjh21 261936b7f4cSbjh21} 262936b7f4cSbjh21 263936b7f4cSbjh21/* 264936b7f4cSbjh21------------------------------------------------------------------------------- 265936b7f4cSbjh21Adds the 64-bit value formed by concatenating `a0' and `a1' to the 64-bit 266936b7f4cSbjh21value formed by concatenating `b0' and `b1'. Addition is modulo 2^64, so 267936b7f4cSbjh21any carry out is lost. The result is broken into two 32-bit pieces which 268936b7f4cSbjh21are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 269936b7f4cSbjh21------------------------------------------------------------------------------- 270936b7f4cSbjh21*/ 271936b7f4cSbjh21INLINE void 272936b7f4cSbjh21 add64( 273936b7f4cSbjh21 bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr ) 274936b7f4cSbjh21{ 275936b7f4cSbjh21 bits32 z1; 276936b7f4cSbjh21 277936b7f4cSbjh21 z1 = a1 + b1; 278936b7f4cSbjh21 *z1Ptr = z1; 279936b7f4cSbjh21 *z0Ptr = a0 + b0 + ( z1 < a1 ); 280936b7f4cSbjh21 281936b7f4cSbjh21} 282936b7f4cSbjh21 283936b7f4cSbjh21/* 284936b7f4cSbjh21------------------------------------------------------------------------------- 285936b7f4cSbjh21Adds the 96-bit value formed by concatenating `a0', `a1', and `a2' to the 286936b7f4cSbjh2196-bit value formed by concatenating `b0', `b1', and `b2'. Addition is 287936b7f4cSbjh21modulo 2^96, so any carry out is lost. The result is broken into three 288936b7f4cSbjh2132-bit pieces which are stored at the locations pointed to by `z0Ptr', 289936b7f4cSbjh21`z1Ptr', and `z2Ptr'. 290936b7f4cSbjh21------------------------------------------------------------------------------- 291936b7f4cSbjh21*/ 292936b7f4cSbjh21INLINE void 293936b7f4cSbjh21 add96( 294936b7f4cSbjh21 bits32 a0, 295936b7f4cSbjh21 bits32 a1, 296936b7f4cSbjh21 bits32 a2, 297936b7f4cSbjh21 bits32 b0, 298936b7f4cSbjh21 bits32 b1, 299936b7f4cSbjh21 bits32 b2, 300936b7f4cSbjh21 bits32 *z0Ptr, 301936b7f4cSbjh21 bits32 *z1Ptr, 302936b7f4cSbjh21 bits32 *z2Ptr 303936b7f4cSbjh21 ) 304936b7f4cSbjh21{ 305936b7f4cSbjh21 bits32 z0, z1, z2; 306936b7f4cSbjh21 int8 carry0, carry1; 307936b7f4cSbjh21 308936b7f4cSbjh21 z2 = a2 + b2; 309936b7f4cSbjh21 carry1 = ( z2 < a2 ); 310936b7f4cSbjh21 z1 = a1 + b1; 311936b7f4cSbjh21 carry0 = ( z1 < a1 ); 312936b7f4cSbjh21 z0 = a0 + b0; 313936b7f4cSbjh21 z1 += carry1; 314*d52f6f4bSlukem z0 += ( z1 < (bits32)carry1 ); 315936b7f4cSbjh21 z0 += carry0; 316936b7f4cSbjh21 *z2Ptr = z2; 317936b7f4cSbjh21 *z1Ptr = z1; 318936b7f4cSbjh21 *z0Ptr = z0; 319936b7f4cSbjh21 320936b7f4cSbjh21} 321936b7f4cSbjh21 322936b7f4cSbjh21/* 323936b7f4cSbjh21------------------------------------------------------------------------------- 324936b7f4cSbjh21Subtracts the 64-bit value formed by concatenating `b0' and `b1' from the 325936b7f4cSbjh2164-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo 326936b7f4cSbjh212^64, so any borrow out (carry out) is lost. The result is broken into two 327936b7f4cSbjh2132-bit pieces which are stored at the locations pointed to by `z0Ptr' and 328936b7f4cSbjh21`z1Ptr'. 329936b7f4cSbjh21------------------------------------------------------------------------------- 330936b7f4cSbjh21*/ 331936b7f4cSbjh21INLINE void 332936b7f4cSbjh21 sub64( 333936b7f4cSbjh21 bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr ) 334936b7f4cSbjh21{ 335936b7f4cSbjh21 336936b7f4cSbjh21 *z1Ptr = a1 - b1; 337936b7f4cSbjh21 *z0Ptr = a0 - b0 - ( a1 < b1 ); 338936b7f4cSbjh21 339936b7f4cSbjh21} 340936b7f4cSbjh21 341936b7f4cSbjh21/* 342936b7f4cSbjh21------------------------------------------------------------------------------- 343936b7f4cSbjh21Subtracts the 96-bit value formed by concatenating `b0', `b1', and `b2' from 344936b7f4cSbjh21the 96-bit value formed by concatenating `a0', `a1', and `a2'. Subtraction 345936b7f4cSbjh21is modulo 2^96, so any borrow out (carry out) is lost. The result is broken 346936b7f4cSbjh21into three 32-bit pieces which are stored at the locations pointed to by 347936b7f4cSbjh21`z0Ptr', `z1Ptr', and `z2Ptr'. 348936b7f4cSbjh21------------------------------------------------------------------------------- 349936b7f4cSbjh21*/ 350936b7f4cSbjh21INLINE void 351936b7f4cSbjh21 sub96( 352936b7f4cSbjh21 bits32 a0, 353936b7f4cSbjh21 bits32 a1, 354936b7f4cSbjh21 bits32 a2, 355936b7f4cSbjh21 bits32 b0, 356936b7f4cSbjh21 bits32 b1, 357936b7f4cSbjh21 bits32 b2, 358936b7f4cSbjh21 bits32 *z0Ptr, 359936b7f4cSbjh21 bits32 *z1Ptr, 360936b7f4cSbjh21 bits32 *z2Ptr 361936b7f4cSbjh21 ) 362936b7f4cSbjh21{ 363936b7f4cSbjh21 bits32 z0, z1, z2; 364936b7f4cSbjh21 int8 borrow0, borrow1; 365936b7f4cSbjh21 366936b7f4cSbjh21 z2 = a2 - b2; 367936b7f4cSbjh21 borrow1 = ( a2 < b2 ); 368936b7f4cSbjh21 z1 = a1 - b1; 369936b7f4cSbjh21 borrow0 = ( a1 < b1 ); 370936b7f4cSbjh21 z0 = a0 - b0; 371*d52f6f4bSlukem z0 -= ( z1 < (bits32)borrow1 ); 372936b7f4cSbjh21 z1 -= borrow1; 373936b7f4cSbjh21 z0 -= borrow0; 374936b7f4cSbjh21 *z2Ptr = z2; 375936b7f4cSbjh21 *z1Ptr = z1; 376936b7f4cSbjh21 *z0Ptr = z0; 377936b7f4cSbjh21 378936b7f4cSbjh21} 379936b7f4cSbjh21 380936b7f4cSbjh21/* 381936b7f4cSbjh21------------------------------------------------------------------------------- 382936b7f4cSbjh21Multiplies `a' by `b' to obtain a 64-bit product. The product is broken 383936b7f4cSbjh21into two 32-bit pieces which are stored at the locations pointed to by 384936b7f4cSbjh21`z0Ptr' and `z1Ptr'. 385936b7f4cSbjh21------------------------------------------------------------------------------- 386936b7f4cSbjh21*/ 387936b7f4cSbjh21INLINE void mul32To64( bits32 a, bits32 b, bits32 *z0Ptr, bits32 *z1Ptr ) 388936b7f4cSbjh21{ 389936b7f4cSbjh21 bits16 aHigh, aLow, bHigh, bLow; 390936b7f4cSbjh21 bits32 z0, zMiddleA, zMiddleB, z1; 391936b7f4cSbjh21 392936b7f4cSbjh21 aLow = a; 393936b7f4cSbjh21 aHigh = a>>16; 394936b7f4cSbjh21 bLow = b; 395936b7f4cSbjh21 bHigh = b>>16; 396936b7f4cSbjh21 z1 = ( (bits32) aLow ) * bLow; 397936b7f4cSbjh21 zMiddleA = ( (bits32) aLow ) * bHigh; 398936b7f4cSbjh21 zMiddleB = ( (bits32) aHigh ) * bLow; 399936b7f4cSbjh21 z0 = ( (bits32) aHigh ) * bHigh; 400936b7f4cSbjh21 zMiddleA += zMiddleB; 401936b7f4cSbjh21 z0 += ( ( (bits32) ( zMiddleA < zMiddleB ) )<<16 ) + ( zMiddleA>>16 ); 402936b7f4cSbjh21 zMiddleA <<= 16; 403936b7f4cSbjh21 z1 += zMiddleA; 404936b7f4cSbjh21 z0 += ( z1 < zMiddleA ); 405936b7f4cSbjh21 *z1Ptr = z1; 406936b7f4cSbjh21 *z0Ptr = z0; 407936b7f4cSbjh21 408936b7f4cSbjh21} 409936b7f4cSbjh21 410936b7f4cSbjh21/* 411936b7f4cSbjh21------------------------------------------------------------------------------- 412936b7f4cSbjh21Multiplies the 64-bit value formed by concatenating `a0' and `a1' by `b' 413936b7f4cSbjh21to obtain a 96-bit product. The product is broken into three 32-bit pieces 414936b7f4cSbjh21which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and 415936b7f4cSbjh21`z2Ptr'. 416936b7f4cSbjh21------------------------------------------------------------------------------- 417936b7f4cSbjh21*/ 418936b7f4cSbjh21INLINE void 419936b7f4cSbjh21 mul64By32To96( 420936b7f4cSbjh21 bits32 a0, 421936b7f4cSbjh21 bits32 a1, 422936b7f4cSbjh21 bits32 b, 423936b7f4cSbjh21 bits32 *z0Ptr, 424936b7f4cSbjh21 bits32 *z1Ptr, 425936b7f4cSbjh21 bits32 *z2Ptr 426936b7f4cSbjh21 ) 427936b7f4cSbjh21{ 428936b7f4cSbjh21 bits32 z0, z1, z2, more1; 429936b7f4cSbjh21 430936b7f4cSbjh21 mul32To64( a1, b, &z1, &z2 ); 431936b7f4cSbjh21 mul32To64( a0, b, &z0, &more1 ); 432936b7f4cSbjh21 add64( z0, more1, 0, z1, &z0, &z1 ); 433936b7f4cSbjh21 *z2Ptr = z2; 434936b7f4cSbjh21 *z1Ptr = z1; 435936b7f4cSbjh21 *z0Ptr = z0; 436936b7f4cSbjh21 437936b7f4cSbjh21} 438936b7f4cSbjh21 439936b7f4cSbjh21/* 440936b7f4cSbjh21------------------------------------------------------------------------------- 441936b7f4cSbjh21Multiplies the 64-bit value formed by concatenating `a0' and `a1' to the 442936b7f4cSbjh2164-bit value formed by concatenating `b0' and `b1' to obtain a 128-bit 443936b7f4cSbjh21product. The product is broken into four 32-bit pieces which are stored at 444936b7f4cSbjh21the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. 445936b7f4cSbjh21------------------------------------------------------------------------------- 446936b7f4cSbjh21*/ 447936b7f4cSbjh21INLINE void 448936b7f4cSbjh21 mul64To128( 449936b7f4cSbjh21 bits32 a0, 450936b7f4cSbjh21 bits32 a1, 451936b7f4cSbjh21 bits32 b0, 452936b7f4cSbjh21 bits32 b1, 453936b7f4cSbjh21 bits32 *z0Ptr, 454936b7f4cSbjh21 bits32 *z1Ptr, 455936b7f4cSbjh21 bits32 *z2Ptr, 456936b7f4cSbjh21 bits32 *z3Ptr 457936b7f4cSbjh21 ) 458936b7f4cSbjh21{ 459936b7f4cSbjh21 bits32 z0, z1, z2, z3; 460936b7f4cSbjh21 bits32 more1, more2; 461936b7f4cSbjh21 462936b7f4cSbjh21 mul32To64( a1, b1, &z2, &z3 ); 463936b7f4cSbjh21 mul32To64( a1, b0, &z1, &more2 ); 464936b7f4cSbjh21 add64( z1, more2, 0, z2, &z1, &z2 ); 465936b7f4cSbjh21 mul32To64( a0, b0, &z0, &more1 ); 466936b7f4cSbjh21 add64( z0, more1, 0, z1, &z0, &z1 ); 467936b7f4cSbjh21 mul32To64( a0, b1, &more1, &more2 ); 468936b7f4cSbjh21 add64( more1, more2, 0, z2, &more1, &z2 ); 469936b7f4cSbjh21 add64( z0, z1, 0, more1, &z0, &z1 ); 470936b7f4cSbjh21 *z3Ptr = z3; 471936b7f4cSbjh21 *z2Ptr = z2; 472936b7f4cSbjh21 *z1Ptr = z1; 473936b7f4cSbjh21 *z0Ptr = z0; 474936b7f4cSbjh21 475936b7f4cSbjh21} 476936b7f4cSbjh21 477936b7f4cSbjh21/* 478936b7f4cSbjh21------------------------------------------------------------------------------- 479936b7f4cSbjh21Returns an approximation to the 32-bit integer quotient obtained by dividing 480936b7f4cSbjh21`b' into the 64-bit value formed by concatenating `a0' and `a1'. The 481936b7f4cSbjh21divisor `b' must be at least 2^31. If q is the exact quotient truncated 482936b7f4cSbjh21toward zero, the approximation returned lies between q and q + 2 inclusive. 483936b7f4cSbjh21If the exact quotient q is larger than 32 bits, the maximum positive 32-bit 484936b7f4cSbjh21unsigned integer is returned. 485936b7f4cSbjh21------------------------------------------------------------------------------- 486936b7f4cSbjh21*/ 487936b7f4cSbjh21static bits32 estimateDiv64To32( bits32 a0, bits32 a1, bits32 b ) 488936b7f4cSbjh21{ 489936b7f4cSbjh21 bits32 b0, b1; 490936b7f4cSbjh21 bits32 rem0, rem1, term0, term1; 491936b7f4cSbjh21 bits32 z; 492936b7f4cSbjh21 493936b7f4cSbjh21 if ( b <= a0 ) return 0xFFFFFFFF; 494936b7f4cSbjh21 b0 = b>>16; 495936b7f4cSbjh21 z = ( b0<<16 <= a0 ) ? 0xFFFF0000 : ( a0 / b0 )<<16; 496936b7f4cSbjh21 mul32To64( b, z, &term0, &term1 ); 497936b7f4cSbjh21 sub64( a0, a1, term0, term1, &rem0, &rem1 ); 498936b7f4cSbjh21 while ( ( (sbits32) rem0 ) < 0 ) { 499936b7f4cSbjh21 z -= 0x10000; 500936b7f4cSbjh21 b1 = b<<16; 501936b7f4cSbjh21 add64( rem0, rem1, b0, b1, &rem0, &rem1 ); 502936b7f4cSbjh21 } 503936b7f4cSbjh21 rem0 = ( rem0<<16 ) | ( rem1>>16 ); 504936b7f4cSbjh21 z |= ( b0<<16 <= rem0 ) ? 0xFFFF : rem0 / b0; 505936b7f4cSbjh21 return z; 506936b7f4cSbjh21 507936b7f4cSbjh21} 508936b7f4cSbjh21 509936b7f4cSbjh21#ifndef SOFTFLOAT_FOR_GCC 510936b7f4cSbjh21/* 511936b7f4cSbjh21------------------------------------------------------------------------------- 512936b7f4cSbjh21Returns an approximation to the square root of the 32-bit significand given 513936b7f4cSbjh21by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of 514936b7f4cSbjh21`aExp' (the least significant bit) is 1, the integer returned approximates 515936b7f4cSbjh212^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' 516936b7f4cSbjh21is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either 517936b7f4cSbjh21case, the approximation returned lies strictly within +/-2 of the exact 518936b7f4cSbjh21value. 519936b7f4cSbjh21------------------------------------------------------------------------------- 520936b7f4cSbjh21*/ 521936b7f4cSbjh21static bits32 estimateSqrt32( int16 aExp, bits32 a ) 522936b7f4cSbjh21{ 523936b7f4cSbjh21 static const bits16 sqrtOddAdjustments[] = { 524936b7f4cSbjh21 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, 525936b7f4cSbjh21 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67 526936b7f4cSbjh21 }; 527936b7f4cSbjh21 static const bits16 sqrtEvenAdjustments[] = { 528936b7f4cSbjh21 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E, 529936b7f4cSbjh21 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002 530936b7f4cSbjh21 }; 531936b7f4cSbjh21 int8 index; 532936b7f4cSbjh21 bits32 z; 533936b7f4cSbjh21 534936b7f4cSbjh21 index = ( a>>27 ) & 15; 535936b7f4cSbjh21 if ( aExp & 1 ) { 536936b7f4cSbjh21 z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ]; 537936b7f4cSbjh21 z = ( ( a / z )<<14 ) + ( z<<15 ); 538936b7f4cSbjh21 a >>= 1; 539936b7f4cSbjh21 } 540936b7f4cSbjh21 else { 541936b7f4cSbjh21 z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ]; 542936b7f4cSbjh21 z = a / z + z; 543936b7f4cSbjh21 z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 ); 544936b7f4cSbjh21 if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 ); 545936b7f4cSbjh21 } 546936b7f4cSbjh21 return ( ( estimateDiv64To32( a, 0, z ) )>>1 ) + ( z>>1 ); 547936b7f4cSbjh21 548936b7f4cSbjh21} 549936b7f4cSbjh21#endif 550936b7f4cSbjh21 551936b7f4cSbjh21/* 552936b7f4cSbjh21------------------------------------------------------------------------------- 553936b7f4cSbjh21Returns the number of leading 0 bits before the most-significant 1 bit of 554936b7f4cSbjh21`a'. If `a' is zero, 32 is returned. 555936b7f4cSbjh21------------------------------------------------------------------------------- 556936b7f4cSbjh21*/ 557936b7f4cSbjh21static int8 countLeadingZeros32( bits32 a ) 558936b7f4cSbjh21{ 559936b7f4cSbjh21 static const int8 countLeadingZerosHigh[] = { 560936b7f4cSbjh21 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 561936b7f4cSbjh21 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 562936b7f4cSbjh21 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 563936b7f4cSbjh21 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 564936b7f4cSbjh21 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 565936b7f4cSbjh21 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 566936b7f4cSbjh21 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 567936b7f4cSbjh21 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 568936b7f4cSbjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 569936b7f4cSbjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 570936b7f4cSbjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 571936b7f4cSbjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 572936b7f4cSbjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 573936b7f4cSbjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 574936b7f4cSbjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 575936b7f4cSbjh21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 576936b7f4cSbjh21 }; 577936b7f4cSbjh21 int8 shiftCount; 578936b7f4cSbjh21 579936b7f4cSbjh21 shiftCount = 0; 580936b7f4cSbjh21 if ( a < 0x10000 ) { 581936b7f4cSbjh21 shiftCount += 16; 582936b7f4cSbjh21 a <<= 16; 583936b7f4cSbjh21 } 584936b7f4cSbjh21 if ( a < 0x1000000 ) { 585936b7f4cSbjh21 shiftCount += 8; 586936b7f4cSbjh21 a <<= 8; 587936b7f4cSbjh21 } 588936b7f4cSbjh21 shiftCount += countLeadingZerosHigh[ a>>24 ]; 589936b7f4cSbjh21 return shiftCount; 590936b7f4cSbjh21 591936b7f4cSbjh21} 592936b7f4cSbjh21 593936b7f4cSbjh21/* 594936b7f4cSbjh21------------------------------------------------------------------------------- 595936b7f4cSbjh21Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is 596936b7f4cSbjh21equal to the 64-bit value formed by concatenating `b0' and `b1'. Otherwise, 597936b7f4cSbjh21returns 0. 598936b7f4cSbjh21------------------------------------------------------------------------------- 599936b7f4cSbjh21*/ 600936b7f4cSbjh21INLINE flag eq64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 ) 601936b7f4cSbjh21{ 602936b7f4cSbjh21 603936b7f4cSbjh21 return ( a0 == b0 ) && ( a1 == b1 ); 604936b7f4cSbjh21 605936b7f4cSbjh21} 606936b7f4cSbjh21 607936b7f4cSbjh21/* 608936b7f4cSbjh21------------------------------------------------------------------------------- 609936b7f4cSbjh21Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less 610936b7f4cSbjh21than or equal to the 64-bit value formed by concatenating `b0' and `b1'. 611936b7f4cSbjh21Otherwise, returns 0. 612936b7f4cSbjh21------------------------------------------------------------------------------- 613936b7f4cSbjh21*/ 614936b7f4cSbjh21INLINE flag le64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 ) 615936b7f4cSbjh21{ 616936b7f4cSbjh21 617936b7f4cSbjh21 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) ); 618936b7f4cSbjh21 619936b7f4cSbjh21} 620936b7f4cSbjh21 621936b7f4cSbjh21/* 622936b7f4cSbjh21------------------------------------------------------------------------------- 623936b7f4cSbjh21Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less 624936b7f4cSbjh21than the 64-bit value formed by concatenating `b0' and `b1'. Otherwise, 625936b7f4cSbjh21returns 0. 626936b7f4cSbjh21------------------------------------------------------------------------------- 627936b7f4cSbjh21*/ 628936b7f4cSbjh21INLINE flag lt64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 ) 629936b7f4cSbjh21{ 630936b7f4cSbjh21 631936b7f4cSbjh21 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) ); 632936b7f4cSbjh21 633936b7f4cSbjh21} 634936b7f4cSbjh21 635936b7f4cSbjh21/* 636936b7f4cSbjh21------------------------------------------------------------------------------- 637936b7f4cSbjh21Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is not 638936b7f4cSbjh21equal to the 64-bit value formed by concatenating `b0' and `b1'. Otherwise, 639936b7f4cSbjh21returns 0. 640936b7f4cSbjh21------------------------------------------------------------------------------- 641936b7f4cSbjh21*/ 642936b7f4cSbjh21INLINE flag ne64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 ) 643936b7f4cSbjh21{ 644936b7f4cSbjh21 645936b7f4cSbjh21 return ( a0 != b0 ) || ( a1 != b1 ); 646936b7f4cSbjh21 647936b7f4cSbjh21} 648936b7f4cSbjh21 649