xref: /openbsd-src/sys/lib/libkern/softfloat-macros.h (revision 33b4f39fbeffad07bc3206f173cff9f3c9901cd1)
1 /*	$OpenBSD: softfloat-macros.h,v 1.1 2002/04/28 20:55:14 pvalchev Exp $	*/
2 /*	$NetBSD: softfloat-macros.h,v 1.1 2001/04/26 03:10:47 ross Exp $	*/
3 
4 /*
5 ===============================================================================
6 
7 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
8 Arithmetic Package, Release 2a.
9 
10 Written by John R. Hauser.  This work was made possible in part by the
11 International Computer Science Institute, located at Suite 600, 1947 Center
12 Street, Berkeley, California 94704.  Funding was partially provided by the
13 National Science Foundation under grant MIP-9311980.  The original version
14 of this code was written as part of a project to build a fixed-point vector
15 processor in collaboration with the University of California at Berkeley,
16 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
17 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
18 arithmetic/SoftFloat.html'.
19 
20 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable
21 effort has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT
22 WILL AT TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS
23 RESTRICTED TO PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL
24 RESPONSIBILITY FOR ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM
25 THEIR OWN USE OF THE SOFTWARE, AND WHO ALSO EFFECTIVELY INDEMNIFY
26 (possibly via similar legal warning) JOHN HAUSER AND THE INTERNATIONAL
27 COMPUTER SCIENCE INSTITUTE AGAINST ALL LOSSES, COSTS, OR OTHER PROBLEMS
28 ARISING FROM THE USE OF THE SOFTWARE BY THEIR CUSTOMERS AND CLIENTS.
29 
30 Derivative works are acceptable, even for commercial purposes, so long as
31 (1) they include prominent notice that the work is derivative, and (2) they
32 include prominent notice akin to these four paragraphs for those parts of
33 this code that are retained.
34 
35 ===============================================================================
36 */
37 
38 #ifndef NO_IEEE
39 
40 /*
41 -------------------------------------------------------------------------------
42 Shifts `a' right by the number of bits given in `count'.  If any nonzero
43 bits are shifted off, they are ``jammed'' into the least significant bit of
44 the result by setting the least significant bit to 1.  The value of `count'
45 can be arbitrarily large; in particular, if `count' is greater than 32, the
46 result will be either 0 or 1, depending on whether `a' is zero or nonzero.
47 The result is stored in the location pointed to by `zPtr'.
48 -------------------------------------------------------------------------------
49 */
50 INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
51 {
52     bits32 z;
53 
54     if ( count == 0 ) {
55         z = a;
56     }
57     else if ( count < 32 ) {
58         z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
59     }
60     else {
61         z = ( a != 0 );
62     }
63     *zPtr = z;
64 
65 }
66 
67 /*
68 -------------------------------------------------------------------------------
69 Shifts `a' right by the number of bits given in `count'.  If any nonzero
70 bits are shifted off, they are ``jammed'' into the least significant bit of
71 the result by setting the least significant bit to 1.  The value of `count'
72 can be arbitrarily large; in particular, if `count' is greater than 64, the
73 result will be either 0 or 1, depending on whether `a' is zero or nonzero.
74 The result is stored in the location pointed to by `zPtr'.
75 -------------------------------------------------------------------------------
76 */
77 INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
78 {
79     bits64 z;
80 
81     if ( count == 0 ) {
82         z = a;
83     }
84     else if ( count < 64 ) {
85         z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
86     }
87     else {
88         z = ( a != 0 );
89     }
90     *zPtr = z;
91 
92 }
93 
94 /*
95 -------------------------------------------------------------------------------
96 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
97 _plus_ the number of bits given in `count'.  The shifted result is at most
98 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
99 bits shifted off form a second 64-bit result as follows:  The _last_ bit
100 shifted off is the most-significant bit of the extra result, and the other
101 63 bits of the extra result are all zero if and only if _all_but_the_last_
102 bits shifted off were all zero.  This extra result is stored in the location
103 pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
104     (This routine makes more sense if `a0' and `a1' are considered to form a
105 fixed-point value with binary point between `a0' and `a1'.  This fixed-point
106 value is shifted right by the number of bits given in `count', and the
107 integer part of the result is returned at the location pointed to by
108 `z0Ptr'.  The fractional part of the result may be slightly corrupted as
109 described above, and is returned at the location pointed to by `z1Ptr'.)
110 -------------------------------------------------------------------------------
111 */
112 INLINE void
113  shift64ExtraRightJamming(
114      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
115 {
116     bits64 z0, z1;
117     int8 negCount = ( - count ) & 63;
118 
119     if ( count == 0 ) {
120         z1 = a1;
121         z0 = a0;
122     }
123     else if ( count < 64 ) {
124         z1 = ( a0<<negCount ) | ( a1 != 0 );
125         z0 = a0>>count;
126     }
127     else {
128         if ( count == 64 ) {
129             z1 = a0 | ( a1 != 0 );
130         }
131         else {
132             z1 = ( ( a0 | a1 ) != 0 );
133         }
134         z0 = 0;
135     }
136     *z1Ptr = z1;
137     *z0Ptr = z0;
138 
139 }
140 
141 /*
142 -------------------------------------------------------------------------------
143 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
144 number of bits given in `count'.  Any bits shifted off are lost.  The value
145 of `count' can be arbitrarily large; in particular, if `count' is greater
146 than 128, the result will be 0.  The result is broken into two 64-bit pieces
147 which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
148 -------------------------------------------------------------------------------
149 */
150 INLINE void
151  shift128Right(
152      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
153 {
154     bits64 z0, z1;
155     int8 negCount = ( - count ) & 63;
156 
157     if ( count == 0 ) {
158         z1 = a1;
159         z0 = a0;
160     }
161     else if ( count < 64 ) {
162         z1 = ( a0<<negCount ) | ( a1>>count );
163         z0 = a0>>count;
164     }
165     else {
166         z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
167         z0 = 0;
168     }
169     *z1Ptr = z1;
170     *z0Ptr = z0;
171 
172 }
173 
174 /*
175 -------------------------------------------------------------------------------
176 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
177 number of bits given in `count'.  If any nonzero bits are shifted off, they
178 are ``jammed'' into the least significant bit of the result by setting the
179 least significant bit to 1.  The value of `count' can be arbitrarily large;
180 in particular, if `count' is greater than 128, the result will be either
181 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
182 nonzero.  The result is broken into two 64-bit pieces which are stored at
183 the locations pointed to by `z0Ptr' and `z1Ptr'.
184 -------------------------------------------------------------------------------
185 */
186 INLINE void
187  shift128RightJamming(
188      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
189 {
190     bits64 z0, z1;
191     int8 negCount = ( - count ) & 63;
192 
193     if ( count == 0 ) {
194         z1 = a1;
195         z0 = a0;
196     }
197     else if ( count < 64 ) {
198         z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
199         z0 = a0>>count;
200     }
201     else {
202         if ( count == 64 ) {
203             z1 = a0 | ( a1 != 0 );
204         }
205         else if ( count < 128 ) {
206             z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
207         }
208         else {
209             z1 = ( ( a0 | a1 ) != 0 );
210         }
211         z0 = 0;
212     }
213     *z1Ptr = z1;
214     *z0Ptr = z0;
215 
216 }
217 
218 /*
219 -------------------------------------------------------------------------------
220 Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
221 by 64 _plus_ the number of bits given in `count'.  The shifted result is
222 at most 128 nonzero bits; these are broken into two 64-bit pieces which are
223 stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
224 off form a third 64-bit result as follows:  The _last_ bit shifted off is
225 the most-significant bit of the extra result, and the other 63 bits of the
226 extra result are all zero if and only if _all_but_the_last_ bits shifted off
227 were all zero.  This extra result is stored in the location pointed to by
228 `z2Ptr'.  The value of `count' can be arbitrarily large.
229     (This routine makes more sense if `a0', `a1', and `a2' are considered
230 to form a fixed-point value with binary point between `a1' and `a2'.  This
231 fixed-point value is shifted right by the number of bits given in `count',
232 and the integer part of the result is returned at the locations pointed to
233 by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
234 corrupted as described above, and is returned at the location pointed to by
235 `z2Ptr'.)
236 -------------------------------------------------------------------------------
237 */
238 INLINE void
239  shift128ExtraRightJamming(
240      bits64 a0,
241      bits64 a1,
242      bits64 a2,
243      int16 count,
244      bits64 *z0Ptr,
245      bits64 *z1Ptr,
246      bits64 *z2Ptr
247  )
248 {
249     bits64 z0, z1, z2;
250     int8 negCount = ( - count ) & 63;
251 
252     if ( count == 0 ) {
253         z2 = a2;
254         z1 = a1;
255         z0 = a0;
256     }
257     else {
258         if ( count < 64 ) {
259             z2 = a1<<negCount;
260             z1 = ( a0<<negCount ) | ( a1>>count );
261             z0 = a0>>count;
262         }
263         else {
264             if ( count == 64 ) {
265                 z2 = a1;
266                 z1 = a0;
267             }
268             else {
269                 a2 |= a1;
270                 if ( count < 128 ) {
271                     z2 = a0<<negCount;
272                     z1 = a0>>( count & 63 );
273                 }
274                 else {
275                     z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
276                     z1 = 0;
277                 }
278             }
279             z0 = 0;
280         }
281         z2 |= ( a2 != 0 );
282     }
283     *z2Ptr = z2;
284     *z1Ptr = z1;
285     *z0Ptr = z0;
286 
287 }
288 
289 /*
290 -------------------------------------------------------------------------------
291 Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
292 number of bits given in `count'.  Any bits shifted off are lost.  The value
293 of `count' must be less than 64.  The result is broken into two 64-bit
294 pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
295 -------------------------------------------------------------------------------
296 */
297 INLINE void
298  shortShift128Left(
299      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
300 {
301 
302     *z1Ptr = a1<<count;
303     *z0Ptr =
304         ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
305 
306 }
307 
308 /*
309 -------------------------------------------------------------------------------
310 Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
311 by the number of bits given in `count'.  Any bits shifted off are lost.
312 The value of `count' must be less than 64.  The result is broken into three
313 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
314 `z1Ptr', and `z2Ptr'.
315 -------------------------------------------------------------------------------
316 */
317 INLINE void
318  shortShift192Left(
319      bits64 a0,
320      bits64 a1,
321      bits64 a2,
322      int16 count,
323      bits64 *z0Ptr,
324      bits64 *z1Ptr,
325      bits64 *z2Ptr
326  )
327 {
328     bits64 z0, z1, z2;
329     int8 negCount;
330 
331     z2 = a2<<count;
332     z1 = a1<<count;
333     z0 = a0<<count;
334     if ( 0 < count ) {
335         negCount = ( ( - count ) & 63 );
336         z1 |= a2>>negCount;
337         z0 |= a1>>negCount;
338     }
339     *z2Ptr = z2;
340     *z1Ptr = z1;
341     *z0Ptr = z0;
342 
343 }
344 
345 /*
346 -------------------------------------------------------------------------------
347 Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
348 value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
349 any carry out is lost.  The result is broken into two 64-bit pieces which
350 are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
351 -------------------------------------------------------------------------------
352 */
353 INLINE void
354  add128(
355      bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
356 {
357     bits64 z1;
358 
359     z1 = a1 + b1;
360     *z1Ptr = z1;
361     *z0Ptr = a0 + b0 + ( z1 < a1 );
362 
363 }
364 
365 /*
366 -------------------------------------------------------------------------------
367 Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
368 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
369 modulo 2^192, so any carry out is lost.  The result is broken into three
370 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
371 `z1Ptr', and `z2Ptr'.
372 -------------------------------------------------------------------------------
373 */
374 INLINE void
375  add192(
376      bits64 a0,
377      bits64 a1,
378      bits64 a2,
379      bits64 b0,
380      bits64 b1,
381      bits64 b2,
382      bits64 *z0Ptr,
383      bits64 *z1Ptr,
384      bits64 *z2Ptr
385  )
386 {
387     bits64 z0, z1, z2;
388     int8 carry0, carry1;
389 
390     z2 = a2 + b2;
391     carry1 = ( z2 < a2 );
392     z1 = a1 + b1;
393     carry0 = ( z1 < a1 );
394     z0 = a0 + b0;
395     z1 += carry1;
396     z0 += ( z1 < carry1 );
397     z0 += carry0;
398     *z2Ptr = z2;
399     *z1Ptr = z1;
400     *z0Ptr = z0;
401 
402 }
403 
404 /*
405 -------------------------------------------------------------------------------
406 Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
407 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
408 2^128, so any borrow out (carry out) is lost.  The result is broken into two
409 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
410 `z1Ptr'.
411 -------------------------------------------------------------------------------
412 */
413 INLINE void
414  sub128(
415      bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
416 {
417 
418     *z1Ptr = a1 - b1;
419     *z0Ptr = a0 - b0 - ( a1 < b1 );
420 
421 }
422 
423 /*
424 -------------------------------------------------------------------------------
425 Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
426 from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
427 Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
428 result is broken into three 64-bit pieces which are stored at the locations
429 pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
430 -------------------------------------------------------------------------------
431 */
432 INLINE void
433  sub192(
434      bits64 a0,
435      bits64 a1,
436      bits64 a2,
437      bits64 b0,
438      bits64 b1,
439      bits64 b2,
440      bits64 *z0Ptr,
441      bits64 *z1Ptr,
442      bits64 *z2Ptr
443  )
444 {
445     bits64 z0, z1, z2;
446     int8 borrow0, borrow1;
447 
448     z2 = a2 - b2;
449     borrow1 = ( a2 < b2 );
450     z1 = a1 - b1;
451     borrow0 = ( a1 < b1 );
452     z0 = a0 - b0;
453     z0 -= ( z1 < borrow1 );
454     z1 -= borrow1;
455     z0 -= borrow0;
456     *z2Ptr = z2;
457     *z1Ptr = z1;
458     *z0Ptr = z0;
459 
460 }
461 
462 /*
463 -------------------------------------------------------------------------------
464 Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
465 into two 64-bit pieces which are stored at the locations pointed to by
466 `z0Ptr' and `z1Ptr'.
467 -------------------------------------------------------------------------------
468 */
469 INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
470 {
471     bits32 aHigh, aLow, bHigh, bLow;
472     bits64 z0, zMiddleA, zMiddleB, z1;
473 
474     aLow = a;
475     aHigh = a>>32;
476     bLow = b;
477     bHigh = b>>32;
478     z1 = ( (bits64) aLow ) * bLow;
479     zMiddleA = ( (bits64) aLow ) * bHigh;
480     zMiddleB = ( (bits64) aHigh ) * bLow;
481     z0 = ( (bits64) aHigh ) * bHigh;
482     zMiddleA += zMiddleB;
483     z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
484     zMiddleA <<= 32;
485     z1 += zMiddleA;
486     z0 += ( z1 < zMiddleA );
487     *z1Ptr = z1;
488     *z0Ptr = z0;
489 
490 }
491 
492 /*
493 -------------------------------------------------------------------------------
494 Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
495 `b' to obtain a 192-bit product.  The product is broken into three 64-bit
496 pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
497 `z2Ptr'.
498 -------------------------------------------------------------------------------
499 */
500 INLINE void
501  mul128By64To192(
502      bits64 a0,
503      bits64 a1,
504      bits64 b,
505      bits64 *z0Ptr,
506      bits64 *z1Ptr,
507      bits64 *z2Ptr
508  )
509 {
510     bits64 z0, z1, z2, more1;
511 
512     mul64To128( a1, b, &z1, &z2 );
513     mul64To128( a0, b, &z0, &more1 );
514     add128( z0, more1, 0, z1, &z0, &z1 );
515     *z2Ptr = z2;
516     *z1Ptr = z1;
517     *z0Ptr = z0;
518 
519 }
520 
521 /*
522 -------------------------------------------------------------------------------
523 Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
524 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
525 product.  The product is broken into four 64-bit pieces which are stored at
526 the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
527 -------------------------------------------------------------------------------
528 */
529 INLINE void
530  mul128To256(
531      bits64 a0,
532      bits64 a1,
533      bits64 b0,
534      bits64 b1,
535      bits64 *z0Ptr,
536      bits64 *z1Ptr,
537      bits64 *z2Ptr,
538      bits64 *z3Ptr
539  )
540 {
541     bits64 z0, z1, z2, z3;
542     bits64 more1, more2;
543 
544     mul64To128( a1, b1, &z2, &z3 );
545     mul64To128( a1, b0, &z1, &more2 );
546     add128( z1, more2, 0, z2, &z1, &z2 );
547     mul64To128( a0, b0, &z0, &more1 );
548     add128( z0, more1, 0, z1, &z0, &z1 );
549     mul64To128( a0, b1, &more1, &more2 );
550     add128( more1, more2, 0, z2, &more1, &z2 );
551     add128( z0, z1, 0, more1, &z0, &z1 );
552     *z3Ptr = z3;
553     *z2Ptr = z2;
554     *z1Ptr = z1;
555     *z0Ptr = z0;
556 
557 }
558 
559 /*
560 -------------------------------------------------------------------------------
561 Returns an approximation to the 64-bit integer quotient obtained by dividing
562 `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
563 divisor `b' must be at least 2^63.  If q is the exact quotient truncated
564 toward zero, the approximation returned lies between q and q + 2 inclusive.
565 If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
566 unsigned integer is returned.
567 -------------------------------------------------------------------------------
568 */
569 static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
570 {
571     bits64 b0, b1;
572     bits64 rem0, rem1, term0, term1;
573     bits64 z;
574 
575     if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
576     b0 = b>>32;
577     z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
578     mul64To128( b, z, &term0, &term1 );
579     sub128( a0, a1, term0, term1, &rem0, &rem1 );
580     while ( ( (sbits64) rem0 ) < 0 ) {
581         z -= LIT64( 0x100000000 );
582         b1 = b<<32;
583         add128( rem0, rem1, b0, b1, &rem0, &rem1 );
584     }
585     rem0 = ( rem0<<32 ) | ( rem1>>32 );
586     z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
587     return z;
588 
589 }
590 
591 #ifndef SOFTFLOAT_FOR_GCC /* Not used */
592 /*
593 -------------------------------------------------------------------------------
594 Returns an approximation to the square root of the 32-bit significand given
595 by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
596 `aExp' (the least significant bit) is 1, the integer returned approximates
597 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
598 is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
599 case, the approximation returned lies strictly within +/-2 of the exact
600 value.
601 -------------------------------------------------------------------------------
602 */
603 static bits32 estimateSqrt32( int16 aExp, bits32 a )
604 {
605     static const bits16 sqrtOddAdjustments[] = {
606         0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
607         0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
608     };
609     static const bits16 sqrtEvenAdjustments[] = {
610         0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
611         0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
612     };
613     int8 index;
614     bits32 z;
615 
616     index = ( a>>27 ) & 15;
617     if ( aExp & 1 ) {
618         z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
619         z = ( ( a / z )<<14 ) + ( z<<15 );
620         a >>= 1;
621     }
622     else {
623         z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
624         z = a / z + z;
625         z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
626         if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
627     }
628     return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
629 
630 }
631 #endif
632 
633 /*
634 -------------------------------------------------------------------------------
635 Returns the number of leading 0 bits before the most-significant 1 bit of
636 `a'.  If `a' is zero, 32 is returned.
637 -------------------------------------------------------------------------------
638 */
639 static int8 countLeadingZeros32( bits32 a )
640 {
641     static const int8 countLeadingZerosHigh[] = {
642         8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
643         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
644         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
645         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
646         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
647         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
648         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
649         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
650         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
651         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
652         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
653         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
654         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
655         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
656         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
657         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
658     };
659     int8 shiftCount;
660 
661     shiftCount = 0;
662     if ( a < 0x10000 ) {
663         shiftCount += 16;
664         a <<= 16;
665     }
666     if ( a < 0x1000000 ) {
667         shiftCount += 8;
668         a <<= 8;
669     }
670     shiftCount += countLeadingZerosHigh[ a>>24 ];
671     return shiftCount;
672 
673 }
674 
675 /*
676 -------------------------------------------------------------------------------
677 Returns the number of leading 0 bits before the most-significant 1 bit of
678 `a'.  If `a' is zero, 64 is returned.
679 -------------------------------------------------------------------------------
680 */
681 static int8 countLeadingZeros64( bits64 a )
682 {
683     int8 shiftCount;
684 
685     shiftCount = 0;
686     if ( a < ( (bits64) 1 )<<32 ) {
687         shiftCount += 32;
688     }
689     else {
690         a >>= 32;
691     }
692     shiftCount += countLeadingZeros32( a );
693     return shiftCount;
694 
695 }
696 
697 /*
698 -------------------------------------------------------------------------------
699 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
700 is equal to the 128-bit value formed by concatenating `b0' and `b1'.
701 Otherwise, returns 0.
702 -------------------------------------------------------------------------------
703 */
704 INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
705 {
706 
707     return ( a0 == b0 ) && ( a1 == b1 );
708 
709 }
710 
711 /*
712 -------------------------------------------------------------------------------
713 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
714 than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
715 Otherwise, returns 0.
716 -------------------------------------------------------------------------------
717 */
718 INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
719 {
720 
721     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
722 
723 }
724 
725 /*
726 -------------------------------------------------------------------------------
727 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
728 than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
729 returns 0.
730 -------------------------------------------------------------------------------
731 */
732 INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
733 {
734 
735     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
736 
737 }
738 
739 /*
740 -------------------------------------------------------------------------------
741 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
742 not equal to the 128-bit value formed by concatenating `b0' and `b1'.
743 Otherwise, returns 0.
744 -------------------------------------------------------------------------------
745 */
746 INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
747 {
748 
749     return ( a0 != b0 ) || ( a1 != b1 );
750 
751 }
752 
753 #endif /* !NO_IEEE */
754