xref: /openbsd-src/sys/lib/libkern/softfloat.c (revision 37ecf0c705093ea06613ee0f53a55e96cc9d149e)
1*37ecf0c7Smiod /*	$OpenBSD: softfloat.c,v 1.6 2014/07/01 20:21:17 miod Exp $	*/
2433075b6Spvalchev /*	$NetBSD: softfloat.c,v 1.1 2001/04/26 03:10:47 ross Exp $	*/
3433075b6Spvalchev 
4433075b6Spvalchev /*
5433075b6Spvalchev  * This version hacked for use with gcc -msoft-float by bjh21.
6433075b6Spvalchev  * (Mostly a case of #ifdefing out things GCC doesn't need or provides
7433075b6Spvalchev  *  itself).
8433075b6Spvalchev  */
9433075b6Spvalchev 
10433075b6Spvalchev /*
11433075b6Spvalchev  * Things you may want to define:
12433075b6Spvalchev  *
13433075b6Spvalchev  * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
14433075b6Spvalchev  *   -msoft-float) to work.  Include "softfloat-for-gcc.h" to get them
15433075b6Spvalchev  *   properly renamed.
16433075b6Spvalchev  */
17433075b6Spvalchev 
18433075b6Spvalchev /*
19433075b6Spvalchev ===============================================================================
20433075b6Spvalchev 
21433075b6Spvalchev This C source file is part of the SoftFloat IEC/IEEE Floating-point
22433075b6Spvalchev Arithmetic Package, Release 2a.
23433075b6Spvalchev 
24433075b6Spvalchev Written by John R. Hauser.  This work was made possible in part by the
25433075b6Spvalchev International Computer Science Institute, located at Suite 600, 1947 Center
26433075b6Spvalchev Street, Berkeley, California 94704.  Funding was partially provided by the
27433075b6Spvalchev National Science Foundation under grant MIP-9311980.  The original version
28433075b6Spvalchev of this code was written as part of a project to build a fixed-point vector
29433075b6Spvalchev processor in collaboration with the University of California at Berkeley,
30433075b6Spvalchev overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
31433075b6Spvalchev is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
32433075b6Spvalchev arithmetic/SoftFloat.html'.
33433075b6Spvalchev 
34433075b6Spvalchev THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable
35433075b6Spvalchev effort has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT
36433075b6Spvalchev WILL AT TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS
37433075b6Spvalchev RESTRICTED TO PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL
38433075b6Spvalchev RESPONSIBILITY FOR ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM
39433075b6Spvalchev THEIR OWN USE OF THE SOFTWARE, AND WHO ALSO EFFECTIVELY INDEMNIFY
40433075b6Spvalchev (possibly via similar legal warning) JOHN HAUSER AND THE INTERNATIONAL
41433075b6Spvalchev COMPUTER SCIENCE INSTITUTE AGAINST ALL LOSSES, COSTS, OR OTHER PROBLEMS
42433075b6Spvalchev ARISING FROM THE USE OF THE SOFTWARE BY THEIR CUSTOMERS AND CLIENTS.
43433075b6Spvalchev 
44433075b6Spvalchev Derivative works are acceptable, even for commercial purposes, so long as
45433075b6Spvalchev (1) they include prominent notice that the work is derivative, and (2) they
46433075b6Spvalchev include prominent notice akin to these four paragraphs for those parts of
47433075b6Spvalchev this code that are retained.
48433075b6Spvalchev 
49433075b6Spvalchev ===============================================================================
50433075b6Spvalchev */
51433075b6Spvalchev 
52433075b6Spvalchev #ifdef SOFTFLOAT_FOR_GCC
53433075b6Spvalchev #include "softfloat-for-gcc.h"
54433075b6Spvalchev #endif
55433075b6Spvalchev 
56433075b6Spvalchev #include "milieu.h"
57433075b6Spvalchev #include "softfloat.h"
58433075b6Spvalchev 
59*37ecf0c7Smiod float32 normalizeRoundAndPackFloat32(flag, int16, bits32);
60*37ecf0c7Smiod float64 normalizeRoundAndPackFloat64(flag, int16, bits64);
61*37ecf0c7Smiod 
62433075b6Spvalchev /*
63433075b6Spvalchev  * Conversions between floats as stored in memory and floats as
64433075b6Spvalchev  * SoftFloat uses them
65433075b6Spvalchev  */
66433075b6Spvalchev #ifndef FLOAT64_DEMANGLE
67433075b6Spvalchev #define FLOAT64_DEMANGLE(a)	(a)
68433075b6Spvalchev #endif
69433075b6Spvalchev #ifndef FLOAT64_MANGLE
70433075b6Spvalchev #define FLOAT64_MANGLE(a)	(a)
71433075b6Spvalchev #endif
72433075b6Spvalchev 
73433075b6Spvalchev /*
74433075b6Spvalchev -------------------------------------------------------------------------------
75433075b6Spvalchev Floating-point rounding mode, extended double-precision rounding precision,
76433075b6Spvalchev and exception flags.
77433075b6Spvalchev -------------------------------------------------------------------------------
78433075b6Spvalchev */
79433075b6Spvalchev 
80433075b6Spvalchev /*
81433075b6Spvalchev  * XXX: This may cause options-MULTIPROCESSOR or thread problems someday.
82433075b6Spvalchev  * 	Right now, it does not.  I've removed all other dynamic global
83433075b6Spvalchev  * 	variables. [ross]
84433075b6Spvalchev  */
85433075b6Spvalchev #ifdef FLOATX80
86433075b6Spvalchev int8 floatx80_rounding_precision = 80;
87433075b6Spvalchev #endif
88433075b6Spvalchev 
89433075b6Spvalchev /*
90433075b6Spvalchev -------------------------------------------------------------------------------
91433075b6Spvalchev Primitive arithmetic functions, including multi-word arithmetic, and
92433075b6Spvalchev division and square root approximations.  (Can be specialized to target if
93433075b6Spvalchev desired.)
94433075b6Spvalchev -------------------------------------------------------------------------------
95433075b6Spvalchev */
96433075b6Spvalchev #include "softfloat-macros.h"
97433075b6Spvalchev 
98433075b6Spvalchev /*
99433075b6Spvalchev -------------------------------------------------------------------------------
100433075b6Spvalchev Functions and definitions to determine:  (1) whether tininess for underflow
101433075b6Spvalchev is detected before or after rounding by default, (2) what (if anything)
102433075b6Spvalchev happens when exceptions are raised, (3) how signaling NaNs are distinguished
103433075b6Spvalchev from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
104433075b6Spvalchev are propagated from function inputs to output.  These details are target-
105433075b6Spvalchev specific.
106433075b6Spvalchev -------------------------------------------------------------------------------
107433075b6Spvalchev */
108433075b6Spvalchev #include "softfloat-specialize.h"
109433075b6Spvalchev 
110433075b6Spvalchev #ifndef SOFTFLOAT_FOR_GCC /* Not used */
111433075b6Spvalchev /*
112433075b6Spvalchev -------------------------------------------------------------------------------
113433075b6Spvalchev Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
114433075b6Spvalchev and 7, and returns the properly rounded 32-bit integer corresponding to the
115433075b6Spvalchev input.  If `zSign' is 1, the input is negated before being converted to an
116433075b6Spvalchev integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
117433075b6Spvalchev is simply rounded to an integer, with the inexact exception raised if the
118433075b6Spvalchev input cannot be represented exactly as an integer.  However, if the fixed-
119433075b6Spvalchev point input is too large, the invalid exception is raised and the largest
120433075b6Spvalchev positive or negative integer is returned.
121433075b6Spvalchev -------------------------------------------------------------------------------
122433075b6Spvalchev */
roundAndPackInt32(flag zSign,bits64 absZ)123433075b6Spvalchev static int32 roundAndPackInt32( flag zSign, bits64 absZ )
124433075b6Spvalchev {
125433075b6Spvalchev     int8 roundingMode;
126433075b6Spvalchev     flag roundNearestEven;
127433075b6Spvalchev     int8 roundIncrement, roundBits;
128433075b6Spvalchev     int32 z;
129433075b6Spvalchev 
130433075b6Spvalchev     roundingMode = float_rounding_mode();
131433075b6Spvalchev     roundNearestEven = ( roundingMode == float_round_nearest_even );
132433075b6Spvalchev     roundIncrement = 0x40;
133433075b6Spvalchev     if ( ! roundNearestEven ) {
134433075b6Spvalchev         if ( roundingMode == float_round_to_zero ) {
135433075b6Spvalchev             roundIncrement = 0;
136433075b6Spvalchev         }
137433075b6Spvalchev         else {
138433075b6Spvalchev             roundIncrement = 0x7F;
139433075b6Spvalchev             if ( zSign ) {
140433075b6Spvalchev                 if ( roundingMode == float_round_up ) roundIncrement = 0;
141433075b6Spvalchev             }
142433075b6Spvalchev             else {
143433075b6Spvalchev                 if ( roundingMode == float_round_down ) roundIncrement = 0;
144433075b6Spvalchev             }
145433075b6Spvalchev         }
146433075b6Spvalchev     }
147433075b6Spvalchev     roundBits = absZ & 0x7F;
148433075b6Spvalchev     absZ = ( absZ + roundIncrement )>>7;
149433075b6Spvalchev     absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
150433075b6Spvalchev     z = absZ;
151433075b6Spvalchev     if ( zSign ) z = - z;
152433075b6Spvalchev     if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
153433075b6Spvalchev         float_raise( float_flag_invalid );
154433075b6Spvalchev         return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
155433075b6Spvalchev     }
156433075b6Spvalchev     if ( roundBits ) float_set_inexact();
157433075b6Spvalchev     return z;
158433075b6Spvalchev 
159433075b6Spvalchev }
160433075b6Spvalchev 
161433075b6Spvalchev /*
162433075b6Spvalchev -------------------------------------------------------------------------------
163433075b6Spvalchev Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
164433075b6Spvalchev `absZ1', with binary point between bits 63 and 64 (between the input words),
165433075b6Spvalchev and returns the properly rounded 64-bit integer corresponding to the input.
166433075b6Spvalchev If `zSign' is 1, the input is negated before being converted to an integer.
167433075b6Spvalchev Ordinarily, the fixed-point input is simply rounded to an integer, with
168433075b6Spvalchev the inexact exception raised if the input cannot be represented exactly as
169433075b6Spvalchev an integer.  However, if the fixed-point input is too large, the invalid
170433075b6Spvalchev exception is raised and the largest positive or negative integer is
171433075b6Spvalchev returned.
172433075b6Spvalchev -------------------------------------------------------------------------------
173433075b6Spvalchev */
roundAndPackInt64(flag zSign,bits64 absZ0,bits64 absZ1)174433075b6Spvalchev static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 )
175433075b6Spvalchev {
176433075b6Spvalchev     int8 roundingMode;
177433075b6Spvalchev     flag roundNearestEven, increment;
178433075b6Spvalchev     int64 z;
179433075b6Spvalchev 
180433075b6Spvalchev     roundingMode = float_rounding_mode();
181433075b6Spvalchev     roundNearestEven = ( roundingMode == float_round_nearest_even );
182433075b6Spvalchev     increment = ( (sbits64) absZ1 < 0 );
183433075b6Spvalchev     if ( ! roundNearestEven ) {
184433075b6Spvalchev         if ( roundingMode == float_round_to_zero ) {
185433075b6Spvalchev             increment = 0;
186433075b6Spvalchev         }
187433075b6Spvalchev         else {
188433075b6Spvalchev             if ( zSign ) {
189433075b6Spvalchev                 increment = ( roundingMode == float_round_down ) && absZ1;
190433075b6Spvalchev             }
191433075b6Spvalchev             else {
192433075b6Spvalchev                 increment = ( roundingMode == float_round_up ) && absZ1;
193433075b6Spvalchev             }
194433075b6Spvalchev         }
195433075b6Spvalchev     }
196433075b6Spvalchev     if ( increment ) {
197433075b6Spvalchev         ++absZ0;
198433075b6Spvalchev         if ( absZ0 == 0 ) goto overflow;
199433075b6Spvalchev         absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
200433075b6Spvalchev     }
201433075b6Spvalchev     z = absZ0;
202433075b6Spvalchev     if ( zSign ) z = - z;
203433075b6Spvalchev     if ( z && ( ( z < 0 ) ^ zSign ) ) {
204433075b6Spvalchev  overflow:
205433075b6Spvalchev         float_raise( float_flag_invalid );
206433075b6Spvalchev         return
207433075b6Spvalchev               zSign ? (sbits64) LIT64( 0x8000000000000000 )
208433075b6Spvalchev             : LIT64( 0x7FFFFFFFFFFFFFFF );
209433075b6Spvalchev     }
210433075b6Spvalchev     if ( absZ1 ) float_set_inexact();
211433075b6Spvalchev     return z;
212433075b6Spvalchev 
213433075b6Spvalchev }
214a657e0a7Smartynas 
215a657e0a7Smartynas #ifdef __alpha__
216a657e0a7Smartynas /*
217a657e0a7Smartynas -------------------------------------------------------------------------------
218a657e0a7Smartynas Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
219a657e0a7Smartynas `absZ1', with binary point between bits 63 and 64 (between the input words),
220a657e0a7Smartynas and returns the properly rounded 64-bit integer corresponding to the input.
221a657e0a7Smartynas If `zSign' is 1, the input is negated before being converted to an integer.
222a657e0a7Smartynas Ordinarily, the fixed-point input is simply rounded to an integer, with
223a657e0a7Smartynas the inexact exception raised if the input cannot be represented exactly as
224a657e0a7Smartynas an integer.
225a657e0a7Smartynas -------------------------------------------------------------------------------
226a657e0a7Smartynas */
roundAndPackInt64NoOverflow(flag zSign,bits64 absZ0,bits64 absZ1)227a657e0a7Smartynas static int64 roundAndPackInt64NoOverflow( flag zSign, bits64 absZ0,
228a657e0a7Smartynas     bits64 absZ1 )
229a657e0a7Smartynas {
230a657e0a7Smartynas     int8 roundingMode;
231a657e0a7Smartynas     flag roundNearestEven, increment;
232a657e0a7Smartynas     int64 z;
233a657e0a7Smartynas 
234a657e0a7Smartynas     roundingMode = float_rounding_mode();
235a657e0a7Smartynas     roundNearestEven = ( roundingMode == float_round_nearest_even );
236a657e0a7Smartynas     increment = ( (sbits64) absZ1 < 0 );
237a657e0a7Smartynas     if ( ! roundNearestEven ) {
238a657e0a7Smartynas         if ( roundingMode == float_round_to_zero ) {
239a657e0a7Smartynas             increment = 0;
240a657e0a7Smartynas         }
241a657e0a7Smartynas         else {
242a657e0a7Smartynas             if ( zSign ) {
243a657e0a7Smartynas                 increment = ( roundingMode == float_round_down ) && absZ1;
244a657e0a7Smartynas             }
245a657e0a7Smartynas             else {
246a657e0a7Smartynas                 increment = ( roundingMode == float_round_up ) && absZ1;
247a657e0a7Smartynas             }
248a657e0a7Smartynas         }
249a657e0a7Smartynas     }
250a657e0a7Smartynas     if ( increment ) {
251a657e0a7Smartynas         ++absZ0;
252a657e0a7Smartynas         absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
253a657e0a7Smartynas     }
254a657e0a7Smartynas     z = absZ0;
255a657e0a7Smartynas     if ( zSign ) z = - z;
256a657e0a7Smartynas     if ( absZ1 ) float_set_inexact();
257a657e0a7Smartynas     return z;
258a657e0a7Smartynas 
259a657e0a7Smartynas }
260a657e0a7Smartynas #endif /* __alpha__ */
261433075b6Spvalchev #endif
262433075b6Spvalchev 
263433075b6Spvalchev /*
264433075b6Spvalchev -------------------------------------------------------------------------------
265433075b6Spvalchev Returns the fraction bits of the single-precision floating-point value `a'.
266433075b6Spvalchev -------------------------------------------------------------------------------
267433075b6Spvalchev */
extractFloat32Frac(float32 a)268433075b6Spvalchev INLINE bits32 extractFloat32Frac( float32 a )
269433075b6Spvalchev {
270433075b6Spvalchev 
271433075b6Spvalchev     return a & 0x007FFFFF;
272433075b6Spvalchev 
273433075b6Spvalchev }
274433075b6Spvalchev 
275433075b6Spvalchev /*
276433075b6Spvalchev -------------------------------------------------------------------------------
277433075b6Spvalchev Returns the exponent bits of the single-precision floating-point value `a'.
278433075b6Spvalchev -------------------------------------------------------------------------------
279433075b6Spvalchev */
extractFloat32Exp(float32 a)280433075b6Spvalchev INLINE int16 extractFloat32Exp( float32 a )
281433075b6Spvalchev {
282433075b6Spvalchev 
283433075b6Spvalchev     return ( a>>23 ) & 0xFF;
284433075b6Spvalchev 
285433075b6Spvalchev }
286433075b6Spvalchev 
287433075b6Spvalchev /*
288433075b6Spvalchev -------------------------------------------------------------------------------
289433075b6Spvalchev Returns the sign bit of the single-precision floating-point value `a'.
290433075b6Spvalchev -------------------------------------------------------------------------------
291433075b6Spvalchev */
extractFloat32Sign(float32 a)292433075b6Spvalchev INLINE flag extractFloat32Sign( float32 a )
293433075b6Spvalchev {
294433075b6Spvalchev 
295433075b6Spvalchev     return a>>31;
296433075b6Spvalchev 
297433075b6Spvalchev }
298433075b6Spvalchev 
299433075b6Spvalchev /*
300433075b6Spvalchev -------------------------------------------------------------------------------
301433075b6Spvalchev Normalizes the subnormal single-precision floating-point value represented
302433075b6Spvalchev by the denormalized significand `aSig'.  The normalized exponent and
303433075b6Spvalchev significand are stored at the locations pointed to by `zExpPtr' and
304433075b6Spvalchev `zSigPtr', respectively.
305433075b6Spvalchev -------------------------------------------------------------------------------
306433075b6Spvalchev */
307433075b6Spvalchev static void
normalizeFloat32Subnormal(bits32 aSig,int16 * zExpPtr,bits32 * zSigPtr)308433075b6Spvalchev  normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
309433075b6Spvalchev {
310433075b6Spvalchev     int8 shiftCount;
311433075b6Spvalchev 
312433075b6Spvalchev     shiftCount = countLeadingZeros32( aSig ) - 8;
313433075b6Spvalchev     *zSigPtr = aSig<<shiftCount;
314433075b6Spvalchev     *zExpPtr = 1 - shiftCount;
315433075b6Spvalchev 
316433075b6Spvalchev }
317433075b6Spvalchev 
318433075b6Spvalchev /*
319433075b6Spvalchev -------------------------------------------------------------------------------
320433075b6Spvalchev Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
321433075b6Spvalchev single-precision floating-point value, returning the result.  After being
322433075b6Spvalchev shifted into the proper positions, the three fields are simply added
323433075b6Spvalchev together to form the result.  This means that any integer portion of `zSig'
324433075b6Spvalchev will be added into the exponent.  Since a properly normalized significand
325433075b6Spvalchev will have an integer portion equal to 1, the `zExp' input should be 1 less
326433075b6Spvalchev than the desired result exponent whenever `zSig' is a complete, normalized
327433075b6Spvalchev significand.
328433075b6Spvalchev -------------------------------------------------------------------------------
329433075b6Spvalchev */
packFloat32(flag zSign,int16 zExp,bits32 zSig)330433075b6Spvalchev INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
331433075b6Spvalchev {
332433075b6Spvalchev 
333433075b6Spvalchev     return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
334433075b6Spvalchev 
335433075b6Spvalchev }
336433075b6Spvalchev 
337433075b6Spvalchev /*
338433075b6Spvalchev -------------------------------------------------------------------------------
339433075b6Spvalchev Takes an abstract floating-point value having sign `zSign', exponent `zExp',
340433075b6Spvalchev and significand `zSig', and returns the proper single-precision floating-
341433075b6Spvalchev point value corresponding to the abstract input.  Ordinarily, the abstract
342433075b6Spvalchev value is simply rounded and packed into the single-precision format, with
343433075b6Spvalchev the inexact exception raised if the abstract input cannot be represented
344433075b6Spvalchev exactly.  However, if the abstract value is too large, the overflow and
345433075b6Spvalchev inexact exceptions are raised and an infinity or maximal finite value is
346433075b6Spvalchev returned.  If the abstract value is too small, the input value is rounded to
347433075b6Spvalchev a subnormal number, and the underflow and inexact exceptions are raised if
348433075b6Spvalchev the abstract input cannot be represented exactly as a subnormal single-
349433075b6Spvalchev precision floating-point number.
350433075b6Spvalchev     The input significand `zSig' has its binary point between bits 30
351433075b6Spvalchev and 29, which is 7 bits to the left of the usual location.  This shifted
352433075b6Spvalchev significand must be normalized or smaller.  If `zSig' is not normalized,
353433075b6Spvalchev `zExp' must be 0; in that case, the result returned is a subnormal number,
354433075b6Spvalchev and it must not require rounding.  In the usual case that `zSig' is
355433075b6Spvalchev normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
356433075b6Spvalchev The handling of underflow and overflow follows the IEC/IEEE Standard for
357433075b6Spvalchev Binary Floating-Point Arithmetic.
358433075b6Spvalchev -------------------------------------------------------------------------------
359433075b6Spvalchev */
roundAndPackFloat32(flag zSign,int16 zExp,bits32 zSig)360433075b6Spvalchev static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
361433075b6Spvalchev {
362433075b6Spvalchev     int8 roundingMode;
363433075b6Spvalchev     flag roundNearestEven;
364433075b6Spvalchev     int8 roundIncrement, roundBits;
365433075b6Spvalchev     flag isTiny;
366433075b6Spvalchev 
367433075b6Spvalchev     roundingMode = float_rounding_mode();
368433075b6Spvalchev     roundNearestEven = ( roundingMode == float_round_nearest_even );
369433075b6Spvalchev     roundIncrement = 0x40;
370433075b6Spvalchev     if ( ! roundNearestEven ) {
371433075b6Spvalchev         if ( roundingMode == float_round_to_zero ) {
372433075b6Spvalchev             roundIncrement = 0;
373433075b6Spvalchev         }
374433075b6Spvalchev         else {
375433075b6Spvalchev             roundIncrement = 0x7F;
376433075b6Spvalchev             if ( zSign ) {
377433075b6Spvalchev                 if ( roundingMode == float_round_up ) roundIncrement = 0;
378433075b6Spvalchev             }
379433075b6Spvalchev             else {
380433075b6Spvalchev                 if ( roundingMode == float_round_down ) roundIncrement = 0;
381433075b6Spvalchev             }
382433075b6Spvalchev         }
383433075b6Spvalchev     }
384433075b6Spvalchev     roundBits = zSig & 0x7F;
385433075b6Spvalchev     if ( 0xFD <= (bits16) zExp ) {
386433075b6Spvalchev         if (    ( 0xFD < zExp )
387433075b6Spvalchev              || (    ( zExp == 0xFD )
388433075b6Spvalchev                   && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
389433075b6Spvalchev            ) {
390433075b6Spvalchev             float_raise( float_flag_overflow | float_flag_inexact );
391433075b6Spvalchev             return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
392433075b6Spvalchev         }
393433075b6Spvalchev         if ( zExp < 0 ) {
394433075b6Spvalchev             isTiny =
395433075b6Spvalchev                    ( float_detect_tininess == float_tininess_before_rounding )
396433075b6Spvalchev                 || ( zExp < -1 )
397433075b6Spvalchev                 || ( zSig + roundIncrement < 0x80000000 );
398433075b6Spvalchev             shift32RightJamming( zSig, - zExp, &zSig );
399433075b6Spvalchev             zExp = 0;
400433075b6Spvalchev             roundBits = zSig & 0x7F;
401433075b6Spvalchev             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
402433075b6Spvalchev         }
403433075b6Spvalchev     }
404433075b6Spvalchev     if ( roundBits ) float_set_inexact();
405433075b6Spvalchev     zSig = ( zSig + roundIncrement )>>7;
406433075b6Spvalchev     zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
407433075b6Spvalchev     if ( zSig == 0 ) zExp = 0;
408433075b6Spvalchev     return packFloat32( zSign, zExp, zSig );
409433075b6Spvalchev 
410433075b6Spvalchev }
411433075b6Spvalchev 
412433075b6Spvalchev /*
413433075b6Spvalchev -------------------------------------------------------------------------------
414433075b6Spvalchev Takes an abstract floating-point value having sign `zSign', exponent `zExp',
415433075b6Spvalchev and significand `zSig', and returns the proper single-precision floating-
416433075b6Spvalchev point value corresponding to the abstract input.  This routine is just like
417433075b6Spvalchev `roundAndPackFloat32' except that `zSig' does not have to be normalized.
418433075b6Spvalchev Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
419433075b6Spvalchev floating-point exponent.
420433075b6Spvalchev -------------------------------------------------------------------------------
421433075b6Spvalchev */
422*37ecf0c7Smiod float32
normalizeRoundAndPackFloat32(flag zSign,int16 zExp,bits32 zSig)423433075b6Spvalchev  normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
424433075b6Spvalchev {
425433075b6Spvalchev     int8 shiftCount;
426433075b6Spvalchev 
427433075b6Spvalchev     shiftCount = countLeadingZeros32( zSig ) - 1;
428433075b6Spvalchev     return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
429433075b6Spvalchev 
430433075b6Spvalchev }
431433075b6Spvalchev 
432433075b6Spvalchev /*
433433075b6Spvalchev -------------------------------------------------------------------------------
434433075b6Spvalchev Returns the fraction bits of the double-precision floating-point value `a'.
435433075b6Spvalchev -------------------------------------------------------------------------------
436433075b6Spvalchev */
extractFloat64Frac(float64 a)437433075b6Spvalchev INLINE bits64 extractFloat64Frac( float64 a )
438433075b6Spvalchev {
439433075b6Spvalchev 
440433075b6Spvalchev     return FLOAT64_DEMANGLE(a) & LIT64( 0x000FFFFFFFFFFFFF );
441433075b6Spvalchev 
442433075b6Spvalchev }
443433075b6Spvalchev 
444433075b6Spvalchev /*
445433075b6Spvalchev -------------------------------------------------------------------------------
446433075b6Spvalchev Returns the exponent bits of the double-precision floating-point value `a'.
447433075b6Spvalchev -------------------------------------------------------------------------------
448433075b6Spvalchev */
extractFloat64Exp(float64 a)449433075b6Spvalchev INLINE int16 extractFloat64Exp( float64 a )
450433075b6Spvalchev {
451433075b6Spvalchev 
452433075b6Spvalchev     return ( FLOAT64_DEMANGLE(a)>>52 ) & 0x7FF;
453433075b6Spvalchev 
454433075b6Spvalchev }
455433075b6Spvalchev 
456433075b6Spvalchev /*
457433075b6Spvalchev -------------------------------------------------------------------------------
458433075b6Spvalchev Returns the sign bit of the double-precision floating-point value `a'.
459433075b6Spvalchev -------------------------------------------------------------------------------
460433075b6Spvalchev */
extractFloat64Sign(float64 a)461433075b6Spvalchev INLINE flag extractFloat64Sign( float64 a )
462433075b6Spvalchev {
463433075b6Spvalchev 
464433075b6Spvalchev     return FLOAT64_DEMANGLE(a)>>63;
465433075b6Spvalchev 
466433075b6Spvalchev }
467433075b6Spvalchev 
468433075b6Spvalchev /*
469433075b6Spvalchev -------------------------------------------------------------------------------
470433075b6Spvalchev Normalizes the subnormal double-precision floating-point value represented
471433075b6Spvalchev by the denormalized significand `aSig'.  The normalized exponent and
472433075b6Spvalchev significand are stored at the locations pointed to by `zExpPtr' and
473433075b6Spvalchev `zSigPtr', respectively.
474433075b6Spvalchev -------------------------------------------------------------------------------
475433075b6Spvalchev */
476433075b6Spvalchev static void
normalizeFloat64Subnormal(bits64 aSig,int16 * zExpPtr,bits64 * zSigPtr)477433075b6Spvalchev  normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
478433075b6Spvalchev {
479433075b6Spvalchev     int8 shiftCount;
480433075b6Spvalchev 
481433075b6Spvalchev     shiftCount = countLeadingZeros64( aSig ) - 11;
482433075b6Spvalchev     *zSigPtr = aSig<<shiftCount;
483433075b6Spvalchev     *zExpPtr = 1 - shiftCount;
484433075b6Spvalchev 
485433075b6Spvalchev }
486433075b6Spvalchev 
487433075b6Spvalchev /*
488433075b6Spvalchev -------------------------------------------------------------------------------
489433075b6Spvalchev Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
490433075b6Spvalchev double-precision floating-point value, returning the result.  After being
491433075b6Spvalchev shifted into the proper positions, the three fields are simply added
492433075b6Spvalchev together to form the result.  This means that any integer portion of `zSig'
493433075b6Spvalchev will be added into the exponent.  Since a properly normalized significand
494433075b6Spvalchev will have an integer portion equal to 1, the `zExp' input should be 1 less
495433075b6Spvalchev than the desired result exponent whenever `zSig' is a complete, normalized
496433075b6Spvalchev significand.
497433075b6Spvalchev -------------------------------------------------------------------------------
498433075b6Spvalchev */
packFloat64(flag zSign,int16 zExp,bits64 zSig)499433075b6Spvalchev INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
500433075b6Spvalchev {
501433075b6Spvalchev 
502433075b6Spvalchev     return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
503433075b6Spvalchev 			   ( ( (bits64) zExp )<<52 ) + zSig );
504433075b6Spvalchev 
505433075b6Spvalchev }
506433075b6Spvalchev 
507433075b6Spvalchev /*
508433075b6Spvalchev -------------------------------------------------------------------------------
509433075b6Spvalchev Takes an abstract floating-point value having sign `zSign', exponent `zExp',
510433075b6Spvalchev and significand `zSig', and returns the proper double-precision floating-
511433075b6Spvalchev point value corresponding to the abstract input.  Ordinarily, the abstract
512433075b6Spvalchev value is simply rounded and packed into the double-precision format, with
513433075b6Spvalchev the inexact exception raised if the abstract input cannot be represented
514433075b6Spvalchev exactly.  However, if the abstract value is too large, the overflow and
515433075b6Spvalchev inexact exceptions are raised and an infinity or maximal finite value is
516433075b6Spvalchev returned.  If the abstract value is too small, the input value is rounded to
517433075b6Spvalchev a subnormal number, and the underflow and inexact exceptions are raised if
518433075b6Spvalchev the abstract input cannot be represented exactly as a subnormal double-
519433075b6Spvalchev precision floating-point number.
520433075b6Spvalchev     The input significand `zSig' has its binary point between bits 62
521433075b6Spvalchev and 61, which is 10 bits to the left of the usual location.  This shifted
522433075b6Spvalchev significand must be normalized or smaller.  If `zSig' is not normalized,
523433075b6Spvalchev `zExp' must be 0; in that case, the result returned is a subnormal number,
524433075b6Spvalchev and it must not require rounding.  In the usual case that `zSig' is
525433075b6Spvalchev normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
526433075b6Spvalchev The handling of underflow and overflow follows the IEC/IEEE Standard for
527433075b6Spvalchev Binary Floating-Point Arithmetic.
528433075b6Spvalchev -------------------------------------------------------------------------------
529433075b6Spvalchev */
roundAndPackFloat64(flag zSign,int16 zExp,bits64 zSig)530433075b6Spvalchev static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
531433075b6Spvalchev {
532433075b6Spvalchev     int8 roundingMode;
533433075b6Spvalchev     flag roundNearestEven;
534433075b6Spvalchev     int16 roundIncrement, roundBits;
535433075b6Spvalchev     flag isTiny;
536433075b6Spvalchev 
537433075b6Spvalchev     roundingMode = float_rounding_mode();
538433075b6Spvalchev     roundNearestEven = ( roundingMode == float_round_nearest_even );
539433075b6Spvalchev     roundIncrement = 0x200;
540433075b6Spvalchev     if ( ! roundNearestEven ) {
541433075b6Spvalchev         if ( roundingMode == float_round_to_zero ) {
542433075b6Spvalchev             roundIncrement = 0;
543433075b6Spvalchev         }
544433075b6Spvalchev         else {
545433075b6Spvalchev             roundIncrement = 0x3FF;
546433075b6Spvalchev             if ( zSign ) {
547433075b6Spvalchev                 if ( roundingMode == float_round_up ) roundIncrement = 0;
548433075b6Spvalchev             }
549433075b6Spvalchev             else {
550433075b6Spvalchev                 if ( roundingMode == float_round_down ) roundIncrement = 0;
551433075b6Spvalchev             }
552433075b6Spvalchev         }
553433075b6Spvalchev     }
554433075b6Spvalchev     roundBits = zSig & 0x3FF;
555433075b6Spvalchev     if ( 0x7FD <= (bits16) zExp ) {
556433075b6Spvalchev         if (    ( 0x7FD < zExp )
557433075b6Spvalchev              || (    ( zExp == 0x7FD )
558433075b6Spvalchev                   && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
559433075b6Spvalchev            ) {
560433075b6Spvalchev             float_raise( float_flag_overflow | float_flag_inexact );
561433075b6Spvalchev             return FLOAT64_MANGLE(
562433075b6Spvalchev 		FLOAT64_DEMANGLE(packFloat64( zSign, 0x7FF, 0 )) -
563433075b6Spvalchev 		( roundIncrement == 0 ));
564433075b6Spvalchev         }
565433075b6Spvalchev         if ( zExp < 0 ) {
566433075b6Spvalchev             isTiny =
567433075b6Spvalchev                    ( float_detect_tininess == float_tininess_before_rounding )
568433075b6Spvalchev                 || ( zExp < -1 )
569433075b6Spvalchev                 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
570433075b6Spvalchev             shift64RightJamming( zSig, - zExp, &zSig );
571433075b6Spvalchev             zExp = 0;
572433075b6Spvalchev             roundBits = zSig & 0x3FF;
573433075b6Spvalchev             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
574433075b6Spvalchev         }
575433075b6Spvalchev     }
576433075b6Spvalchev     if ( roundBits ) float_set_inexact();
577433075b6Spvalchev     zSig = ( zSig + roundIncrement )>>10;
578433075b6Spvalchev     zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
579433075b6Spvalchev     if ( zSig == 0 ) zExp = 0;
580433075b6Spvalchev     return packFloat64( zSign, zExp, zSig );
581433075b6Spvalchev 
582433075b6Spvalchev }
583433075b6Spvalchev 
584433075b6Spvalchev /*
585433075b6Spvalchev -------------------------------------------------------------------------------
586433075b6Spvalchev Takes an abstract floating-point value having sign `zSign', exponent `zExp',
587433075b6Spvalchev and significand `zSig', and returns the proper double-precision floating-
588433075b6Spvalchev point value corresponding to the abstract input.  This routine is just like
589433075b6Spvalchev `roundAndPackFloat64' except that `zSig' does not have to be normalized.
590433075b6Spvalchev Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
591433075b6Spvalchev floating-point exponent.
592433075b6Spvalchev -------------------------------------------------------------------------------
593433075b6Spvalchev */
594*37ecf0c7Smiod float64
normalizeRoundAndPackFloat64(flag zSign,int16 zExp,bits64 zSig)595433075b6Spvalchev  normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
596433075b6Spvalchev {
597433075b6Spvalchev     int8 shiftCount;
598433075b6Spvalchev 
599433075b6Spvalchev     shiftCount = countLeadingZeros64( zSig ) - 1;
600433075b6Spvalchev     return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
601433075b6Spvalchev 
602433075b6Spvalchev }
603433075b6Spvalchev 
604433075b6Spvalchev #ifdef FLOATX80
605433075b6Spvalchev 
606433075b6Spvalchev /*
607433075b6Spvalchev -------------------------------------------------------------------------------
608433075b6Spvalchev Returns the fraction bits of the extended double-precision floating-point
609433075b6Spvalchev value `a'.
610433075b6Spvalchev -------------------------------------------------------------------------------
611433075b6Spvalchev */
extractFloatx80Frac(floatx80 a)612433075b6Spvalchev INLINE bits64 extractFloatx80Frac( floatx80 a )
613433075b6Spvalchev {
614433075b6Spvalchev 
615433075b6Spvalchev     return a.low;
616433075b6Spvalchev 
617433075b6Spvalchev }
618433075b6Spvalchev 
619433075b6Spvalchev /*
620433075b6Spvalchev -------------------------------------------------------------------------------
621433075b6Spvalchev Returns the exponent bits of the extended double-precision floating-point
622433075b6Spvalchev value `a'.
623433075b6Spvalchev -------------------------------------------------------------------------------
624433075b6Spvalchev */
extractFloatx80Exp(floatx80 a)625433075b6Spvalchev INLINE int32 extractFloatx80Exp( floatx80 a )
626433075b6Spvalchev {
627433075b6Spvalchev 
628433075b6Spvalchev     return a.high & 0x7FFF;
629433075b6Spvalchev 
630433075b6Spvalchev }
631433075b6Spvalchev 
632433075b6Spvalchev /*
633433075b6Spvalchev -------------------------------------------------------------------------------
634433075b6Spvalchev Returns the sign bit of the extended double-precision floating-point value
635433075b6Spvalchev `a'.
636433075b6Spvalchev -------------------------------------------------------------------------------
637433075b6Spvalchev */
extractFloatx80Sign(floatx80 a)638433075b6Spvalchev INLINE flag extractFloatx80Sign( floatx80 a )
639433075b6Spvalchev {
640433075b6Spvalchev 
641433075b6Spvalchev     return a.high>>15;
642433075b6Spvalchev 
643433075b6Spvalchev }
644433075b6Spvalchev 
645433075b6Spvalchev /*
646433075b6Spvalchev -------------------------------------------------------------------------------
647433075b6Spvalchev Normalizes the subnormal extended double-precision floating-point value
648433075b6Spvalchev represented by the denormalized significand `aSig'.  The normalized exponent
649433075b6Spvalchev and significand are stored at the locations pointed to by `zExpPtr' and
650433075b6Spvalchev `zSigPtr', respectively.
651433075b6Spvalchev -------------------------------------------------------------------------------
652433075b6Spvalchev */
653433075b6Spvalchev static void
normalizeFloatx80Subnormal(bits64 aSig,int32 * zExpPtr,bits64 * zSigPtr)654433075b6Spvalchev  normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
655433075b6Spvalchev {
656433075b6Spvalchev     int8 shiftCount;
657433075b6Spvalchev 
658433075b6Spvalchev     shiftCount = countLeadingZeros64( aSig );
659433075b6Spvalchev     *zSigPtr = aSig<<shiftCount;
660433075b6Spvalchev     *zExpPtr = 1 - shiftCount;
661433075b6Spvalchev 
662433075b6Spvalchev }
663433075b6Spvalchev 
664433075b6Spvalchev /*
665433075b6Spvalchev -------------------------------------------------------------------------------
666433075b6Spvalchev Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
667433075b6Spvalchev extended double-precision floating-point value, returning the result.
668433075b6Spvalchev -------------------------------------------------------------------------------
669433075b6Spvalchev */
packFloatx80(flag zSign,int32 zExp,bits64 zSig)670433075b6Spvalchev INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
671433075b6Spvalchev {
672433075b6Spvalchev     floatx80 z;
673433075b6Spvalchev 
674433075b6Spvalchev     z.low = zSig;
675433075b6Spvalchev     z.high = ( ( (bits16) zSign )<<15 ) + zExp;
676433075b6Spvalchev     return z;
677433075b6Spvalchev 
678433075b6Spvalchev }
679433075b6Spvalchev 
680433075b6Spvalchev /*
681433075b6Spvalchev -------------------------------------------------------------------------------
682433075b6Spvalchev Takes an abstract floating-point value having sign `zSign', exponent `zExp',
683433075b6Spvalchev and extended significand formed by the concatenation of `zSig0' and `zSig1',
684433075b6Spvalchev and returns the proper extended double-precision floating-point value
685433075b6Spvalchev corresponding to the abstract input.  Ordinarily, the abstract value is
686433075b6Spvalchev rounded and packed into the extended double-precision format, with the
687433075b6Spvalchev inexact exception raised if the abstract input cannot be represented
688433075b6Spvalchev exactly.  However, if the abstract value is too large, the overflow and
689433075b6Spvalchev inexact exceptions are raised and an infinity or maximal finite value is
690433075b6Spvalchev returned.  If the abstract value is too small, the input value is rounded to
691433075b6Spvalchev a subnormal number, and the underflow and inexact exceptions are raised if
692433075b6Spvalchev the abstract input cannot be represented exactly as a subnormal extended
693433075b6Spvalchev double-precision floating-point number.
694433075b6Spvalchev     If `roundingPrecision' is 32 or 64, the result is rounded to the same
695433075b6Spvalchev number of bits as single or double precision, respectively.  Otherwise, the
696433075b6Spvalchev result is rounded to the full precision of the extended double-precision
697433075b6Spvalchev format.
698433075b6Spvalchev     The input significand must be normalized or smaller.  If the input
699433075b6Spvalchev significand is not normalized, `zExp' must be 0; in that case, the result
700433075b6Spvalchev returned is a subnormal number, and it must not require rounding.  The
701433075b6Spvalchev handling of underflow and overflow follows the IEC/IEEE Standard for Binary
702433075b6Spvalchev Floating-Point Arithmetic.
703433075b6Spvalchev -------------------------------------------------------------------------------
704433075b6Spvalchev */
705433075b6Spvalchev static floatx80
roundAndPackFloatx80(int8 roundingPrecision,flag zSign,int32 zExp,bits64 zSig0,bits64 zSig1)706433075b6Spvalchev  roundAndPackFloatx80(
707433075b6Spvalchev      int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
708433075b6Spvalchev  )
709433075b6Spvalchev {
710433075b6Spvalchev     int8 roundingMode;
711433075b6Spvalchev     flag roundNearestEven, increment, isTiny;
712433075b6Spvalchev     int64 roundIncrement, roundMask, roundBits;
713433075b6Spvalchev 
714433075b6Spvalchev     roundingMode = float_rounding_mode();
715433075b6Spvalchev     roundNearestEven = ( roundingMode == float_round_nearest_even );
716433075b6Spvalchev     if ( roundingPrecision == 80 ) goto precision80;
717433075b6Spvalchev     if ( roundingPrecision == 64 ) {
718433075b6Spvalchev         roundIncrement = LIT64( 0x0000000000000400 );
719433075b6Spvalchev         roundMask = LIT64( 0x00000000000007FF );
720433075b6Spvalchev     }
721433075b6Spvalchev     else if ( roundingPrecision == 32 ) {
722433075b6Spvalchev         roundIncrement = LIT64( 0x0000008000000000 );
723433075b6Spvalchev         roundMask = LIT64( 0x000000FFFFFFFFFF );
724433075b6Spvalchev     }
725433075b6Spvalchev     else {
726433075b6Spvalchev         goto precision80;
727433075b6Spvalchev     }
728433075b6Spvalchev     zSig0 |= ( zSig1 != 0 );
729433075b6Spvalchev     if ( ! roundNearestEven ) {
730433075b6Spvalchev         if ( roundingMode == float_round_to_zero ) {
731433075b6Spvalchev             roundIncrement = 0;
732433075b6Spvalchev         }
733433075b6Spvalchev         else {
734433075b6Spvalchev             roundIncrement = roundMask;
735433075b6Spvalchev             if ( zSign ) {
736433075b6Spvalchev                 if ( roundingMode == float_round_up ) roundIncrement = 0;
737433075b6Spvalchev             }
738433075b6Spvalchev             else {
739433075b6Spvalchev                 if ( roundingMode == float_round_down ) roundIncrement = 0;
740433075b6Spvalchev             }
741433075b6Spvalchev         }
742433075b6Spvalchev     }
743433075b6Spvalchev     roundBits = zSig0 & roundMask;
744433075b6Spvalchev     if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
745433075b6Spvalchev         if (    ( 0x7FFE < zExp )
746433075b6Spvalchev              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
747433075b6Spvalchev            ) {
748433075b6Spvalchev             goto overflow;
749433075b6Spvalchev         }
750433075b6Spvalchev         if ( zExp <= 0 ) {
751433075b6Spvalchev             isTiny =
752433075b6Spvalchev                    ( float_detect_tininess == float_tininess_before_rounding )
753433075b6Spvalchev                 || ( zExp < 0 )
754433075b6Spvalchev                 || ( zSig0 <= zSig0 + roundIncrement );
755433075b6Spvalchev             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
756433075b6Spvalchev             zExp = 0;
757433075b6Spvalchev             roundBits = zSig0 & roundMask;
758433075b6Spvalchev             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
759433075b6Spvalchev             if ( roundBits ) float_set_inexact();
760433075b6Spvalchev             zSig0 += roundIncrement;
761433075b6Spvalchev             if ( (sbits64) zSig0 < 0 ) zExp = 1;
762433075b6Spvalchev             roundIncrement = roundMask + 1;
763433075b6Spvalchev             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
764433075b6Spvalchev                 roundMask |= roundIncrement;
765433075b6Spvalchev             }
766433075b6Spvalchev             zSig0 &= ~ roundMask;
767433075b6Spvalchev             return packFloatx80( zSign, zExp, zSig0 );
768433075b6Spvalchev         }
769433075b6Spvalchev     }
770433075b6Spvalchev     if ( roundBits ) float_set_inexact();
771433075b6Spvalchev     zSig0 += roundIncrement;
772433075b6Spvalchev     if ( zSig0 < roundIncrement ) {
773433075b6Spvalchev         ++zExp;
774433075b6Spvalchev         zSig0 = LIT64( 0x8000000000000000 );
775433075b6Spvalchev     }
776433075b6Spvalchev     roundIncrement = roundMask + 1;
777433075b6Spvalchev     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
778433075b6Spvalchev         roundMask |= roundIncrement;
779433075b6Spvalchev     }
780433075b6Spvalchev     zSig0 &= ~ roundMask;
781433075b6Spvalchev     if ( zSig0 == 0 ) zExp = 0;
782433075b6Spvalchev     return packFloatx80( zSign, zExp, zSig0 );
783433075b6Spvalchev  precision80:
784433075b6Spvalchev     increment = ( (sbits64) zSig1 < 0 );
785433075b6Spvalchev     if ( ! roundNearestEven ) {
786433075b6Spvalchev         if ( roundingMode == float_round_to_zero ) {
787433075b6Spvalchev             increment = 0;
788433075b6Spvalchev         }
789433075b6Spvalchev         else {
790433075b6Spvalchev             if ( zSign ) {
791433075b6Spvalchev                 increment = ( roundingMode == float_round_down ) && zSig1;
792433075b6Spvalchev             }
793433075b6Spvalchev             else {
794433075b6Spvalchev                 increment = ( roundingMode == float_round_up ) && zSig1;
795433075b6Spvalchev             }
796433075b6Spvalchev         }
797433075b6Spvalchev     }
798433075b6Spvalchev     if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
799433075b6Spvalchev         if (    ( 0x7FFE < zExp )
800433075b6Spvalchev              || (    ( zExp == 0x7FFE )
801433075b6Spvalchev                   && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
802433075b6Spvalchev                   && increment
803433075b6Spvalchev                 )
804433075b6Spvalchev            ) {
805433075b6Spvalchev             roundMask = 0;
806433075b6Spvalchev  overflow:
807433075b6Spvalchev             float_raise( float_flag_overflow | float_flag_inexact );
808433075b6Spvalchev             if (    ( roundingMode == float_round_to_zero )
809433075b6Spvalchev                  || ( zSign && ( roundingMode == float_round_up ) )
810433075b6Spvalchev                  || ( ! zSign && ( roundingMode == float_round_down ) )
811433075b6Spvalchev                ) {
812433075b6Spvalchev                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
813433075b6Spvalchev             }
814433075b6Spvalchev             return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
815433075b6Spvalchev         }
816433075b6Spvalchev         if ( zExp <= 0 ) {
817433075b6Spvalchev             isTiny =
818433075b6Spvalchev                    ( float_detect_tininess == float_tininess_before_rounding )
819433075b6Spvalchev                 || ( zExp < 0 )
820433075b6Spvalchev                 || ! increment
821433075b6Spvalchev                 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
822433075b6Spvalchev             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
823433075b6Spvalchev             zExp = 0;
824433075b6Spvalchev             if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
825433075b6Spvalchev             if ( zSig1 ) float_set_inexact();
826433075b6Spvalchev             if ( roundNearestEven ) {
827433075b6Spvalchev                 increment = ( (sbits64) zSig1 < 0 );
828433075b6Spvalchev             }
829433075b6Spvalchev             else {
830433075b6Spvalchev                 if ( zSign ) {
831433075b6Spvalchev                     increment = ( roundingMode == float_round_down ) && zSig1;
832433075b6Spvalchev                 }
833433075b6Spvalchev                 else {
834433075b6Spvalchev                     increment = ( roundingMode == float_round_up ) && zSig1;
835433075b6Spvalchev                 }
836433075b6Spvalchev             }
837433075b6Spvalchev             if ( increment ) {
838433075b6Spvalchev                 ++zSig0;
839433075b6Spvalchev                 zSig0 &=
840433075b6Spvalchev                     ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
841433075b6Spvalchev                 if ( (sbits64) zSig0 < 0 ) zExp = 1;
842433075b6Spvalchev             }
843433075b6Spvalchev             return packFloatx80( zSign, zExp, zSig0 );
844433075b6Spvalchev         }
845433075b6Spvalchev     }
846433075b6Spvalchev     if ( zSig1 ) float_set_inexact();
847433075b6Spvalchev     if ( increment ) {
848433075b6Spvalchev         ++zSig0;
849433075b6Spvalchev         if ( zSig0 == 0 ) {
850433075b6Spvalchev             ++zExp;
851433075b6Spvalchev             zSig0 = LIT64( 0x8000000000000000 );
852433075b6Spvalchev         }
853433075b6Spvalchev         else {
854433075b6Spvalchev             zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
855433075b6Spvalchev         }
856433075b6Spvalchev     }
857433075b6Spvalchev     else {
858433075b6Spvalchev         if ( zSig0 == 0 ) zExp = 0;
859433075b6Spvalchev     }
860433075b6Spvalchev     return packFloatx80( zSign, zExp, zSig0 );
861433075b6Spvalchev 
862433075b6Spvalchev }
863433075b6Spvalchev 
864433075b6Spvalchev /*
865433075b6Spvalchev -------------------------------------------------------------------------------
866433075b6Spvalchev Takes an abstract floating-point value having sign `zSign', exponent
867433075b6Spvalchev `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
868433075b6Spvalchev and returns the proper extended double-precision floating-point value
869433075b6Spvalchev corresponding to the abstract input.  This routine is just like
870433075b6Spvalchev `roundAndPackFloatx80' except that the input significand does not have to be
871433075b6Spvalchev normalized.
872433075b6Spvalchev -------------------------------------------------------------------------------
873433075b6Spvalchev */
874433075b6Spvalchev static floatx80
normalizeRoundAndPackFloatx80(int8 roundingPrecision,flag zSign,int32 zExp,bits64 zSig0,bits64 zSig1)875433075b6Spvalchev  normalizeRoundAndPackFloatx80(
876433075b6Spvalchev      int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
877433075b6Spvalchev  )
878433075b6Spvalchev {
879433075b6Spvalchev     int8 shiftCount;
880433075b6Spvalchev 
881433075b6Spvalchev     if ( zSig0 == 0 ) {
882433075b6Spvalchev         zSig0 = zSig1;
883433075b6Spvalchev         zSig1 = 0;
884433075b6Spvalchev         zExp -= 64;
885433075b6Spvalchev     }
886433075b6Spvalchev     shiftCount = countLeadingZeros64( zSig0 );
887433075b6Spvalchev     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
888433075b6Spvalchev     zExp -= shiftCount;
889433075b6Spvalchev     return
890433075b6Spvalchev         roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
891433075b6Spvalchev 
892433075b6Spvalchev }
893433075b6Spvalchev 
894433075b6Spvalchev #endif
895433075b6Spvalchev 
896433075b6Spvalchev #ifdef FLOAT128
897433075b6Spvalchev 
898433075b6Spvalchev /*
899433075b6Spvalchev -------------------------------------------------------------------------------
900433075b6Spvalchev Returns the least-significant 64 fraction bits of the quadruple-precision
901433075b6Spvalchev floating-point value `a'.
902433075b6Spvalchev -------------------------------------------------------------------------------
903433075b6Spvalchev */
extractFloat128Frac1(float128 a)904433075b6Spvalchev INLINE bits64 extractFloat128Frac1( float128 a )
905433075b6Spvalchev {
906433075b6Spvalchev 
907433075b6Spvalchev     return a.low;
908433075b6Spvalchev 
909433075b6Spvalchev }
910433075b6Spvalchev 
911433075b6Spvalchev /*
912433075b6Spvalchev -------------------------------------------------------------------------------
913433075b6Spvalchev Returns the most-significant 48 fraction bits of the quadruple-precision
914433075b6Spvalchev floating-point value `a'.
915433075b6Spvalchev -------------------------------------------------------------------------------
916433075b6Spvalchev */
extractFloat128Frac0(float128 a)917433075b6Spvalchev INLINE bits64 extractFloat128Frac0( float128 a )
918433075b6Spvalchev {
919433075b6Spvalchev 
920433075b6Spvalchev     return a.high & LIT64( 0x0000FFFFFFFFFFFF );
921433075b6Spvalchev 
922433075b6Spvalchev }
923433075b6Spvalchev 
924433075b6Spvalchev /*
925433075b6Spvalchev -------------------------------------------------------------------------------
926433075b6Spvalchev Returns the exponent bits of the quadruple-precision floating-point value
927433075b6Spvalchev `a'.
928433075b6Spvalchev -------------------------------------------------------------------------------
929433075b6Spvalchev */
extractFloat128Exp(float128 a)930433075b6Spvalchev INLINE int32 extractFloat128Exp( float128 a )
931433075b6Spvalchev {
932433075b6Spvalchev 
933433075b6Spvalchev     return ( a.high>>48 ) & 0x7FFF;
934433075b6Spvalchev 
935433075b6Spvalchev }
936433075b6Spvalchev 
937433075b6Spvalchev /*
938433075b6Spvalchev -------------------------------------------------------------------------------
939433075b6Spvalchev Returns the sign bit of the quadruple-precision floating-point value `a'.
940433075b6Spvalchev -------------------------------------------------------------------------------
941433075b6Spvalchev */
extractFloat128Sign(float128 a)942433075b6Spvalchev INLINE flag extractFloat128Sign( float128 a )
943433075b6Spvalchev {
944433075b6Spvalchev 
945433075b6Spvalchev     return a.high>>63;
946433075b6Spvalchev 
947433075b6Spvalchev }
948433075b6Spvalchev 
949433075b6Spvalchev /*
950433075b6Spvalchev -------------------------------------------------------------------------------
951433075b6Spvalchev Normalizes the subnormal quadruple-precision floating-point value
952433075b6Spvalchev represented by the denormalized significand formed by the concatenation of
953433075b6Spvalchev `aSig0' and `aSig1'.  The normalized exponent is stored at the location
954433075b6Spvalchev pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
955433075b6Spvalchev significand are stored at the location pointed to by `zSig0Ptr', and the
956433075b6Spvalchev least significant 64 bits of the normalized significand are stored at the
957433075b6Spvalchev location pointed to by `zSig1Ptr'.
958433075b6Spvalchev -------------------------------------------------------------------------------
959433075b6Spvalchev */
960433075b6Spvalchev static void
normalizeFloat128Subnormal(bits64 aSig0,bits64 aSig1,int32 * zExpPtr,bits64 * zSig0Ptr,bits64 * zSig1Ptr)961433075b6Spvalchev  normalizeFloat128Subnormal(
962433075b6Spvalchev      bits64 aSig0,
963433075b6Spvalchev      bits64 aSig1,
964433075b6Spvalchev      int32 *zExpPtr,
965433075b6Spvalchev      bits64 *zSig0Ptr,
966433075b6Spvalchev      bits64 *zSig1Ptr
967433075b6Spvalchev  )
968433075b6Spvalchev {
969433075b6Spvalchev     int8 shiftCount;
970433075b6Spvalchev 
971433075b6Spvalchev     if ( aSig0 == 0 ) {
972433075b6Spvalchev         shiftCount = countLeadingZeros64( aSig1 ) - 15;
973433075b6Spvalchev         if ( shiftCount < 0 ) {
974433075b6Spvalchev             *zSig0Ptr = aSig1>>( - shiftCount );
975433075b6Spvalchev             *zSig1Ptr = aSig1<<( shiftCount & 63 );
976433075b6Spvalchev         }
977433075b6Spvalchev         else {
978433075b6Spvalchev             *zSig0Ptr = aSig1<<shiftCount;
979433075b6Spvalchev             *zSig1Ptr = 0;
980433075b6Spvalchev         }
981433075b6Spvalchev         *zExpPtr = - shiftCount - 63;
982433075b6Spvalchev     }
983433075b6Spvalchev     else {
984433075b6Spvalchev         shiftCount = countLeadingZeros64( aSig0 ) - 15;
985433075b6Spvalchev         shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
986433075b6Spvalchev         *zExpPtr = 1 - shiftCount;
987433075b6Spvalchev     }
988433075b6Spvalchev 
989433075b6Spvalchev }
990433075b6Spvalchev 
991433075b6Spvalchev /*
992433075b6Spvalchev -------------------------------------------------------------------------------
993433075b6Spvalchev Packs the sign `zSign', the exponent `zExp', and the significand formed
994433075b6Spvalchev by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
995433075b6Spvalchev floating-point value, returning the result.  After being shifted into the
996433075b6Spvalchev proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
997433075b6Spvalchev added together to form the most significant 32 bits of the result.  This
998433075b6Spvalchev means that any integer portion of `zSig0' will be added into the exponent.
999433075b6Spvalchev Since a properly normalized significand will have an integer portion equal
1000433075b6Spvalchev to 1, the `zExp' input should be 1 less than the desired result exponent
1001433075b6Spvalchev whenever `zSig0' and `zSig1' concatenated form a complete, normalized
1002433075b6Spvalchev significand.
1003433075b6Spvalchev -------------------------------------------------------------------------------
1004433075b6Spvalchev */
1005433075b6Spvalchev INLINE float128
packFloat128(flag zSign,int32 zExp,bits64 zSig0,bits64 zSig1)1006433075b6Spvalchev  packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
1007433075b6Spvalchev {
1008433075b6Spvalchev     float128 z;
1009433075b6Spvalchev 
1010433075b6Spvalchev     z.low = zSig1;
1011433075b6Spvalchev     z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
1012433075b6Spvalchev     return z;
1013433075b6Spvalchev 
1014433075b6Spvalchev }
1015433075b6Spvalchev 
1016433075b6Spvalchev /*
1017433075b6Spvalchev -------------------------------------------------------------------------------
1018433075b6Spvalchev Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1019433075b6Spvalchev and extended significand formed by the concatenation of `zSig0', `zSig1',
1020433075b6Spvalchev and `zSig2', and returns the proper quadruple-precision floating-point value
1021433075b6Spvalchev corresponding to the abstract input.  Ordinarily, the abstract value is
1022433075b6Spvalchev simply rounded and packed into the quadruple-precision format, with the
1023433075b6Spvalchev inexact exception raised if the abstract input cannot be represented
1024433075b6Spvalchev exactly.  However, if the abstract value is too large, the overflow and
1025433075b6Spvalchev inexact exceptions are raised and an infinity or maximal finite value is
1026433075b6Spvalchev returned.  If the abstract value is too small, the input value is rounded to
1027433075b6Spvalchev a subnormal number, and the underflow and inexact exceptions are raised if
1028433075b6Spvalchev the abstract input cannot be represented exactly as a subnormal quadruple-
1029433075b6Spvalchev precision floating-point number.
1030433075b6Spvalchev     The input significand must be normalized or smaller.  If the input
1031433075b6Spvalchev significand is not normalized, `zExp' must be 0; in that case, the result
1032433075b6Spvalchev returned is a subnormal number, and it must not require rounding.  In the
1033433075b6Spvalchev usual case that the input significand is normalized, `zExp' must be 1 less
1034433075b6Spvalchev than the ``true'' floating-point exponent.  The handling of underflow and
1035433075b6Spvalchev overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1036433075b6Spvalchev -------------------------------------------------------------------------------
1037433075b6Spvalchev */
1038433075b6Spvalchev static float128
roundAndPackFloat128(flag zSign,int32 zExp,bits64 zSig0,bits64 zSig1,bits64 zSig2)1039433075b6Spvalchev  roundAndPackFloat128(
1040433075b6Spvalchev      flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 )
1041433075b6Spvalchev {
1042433075b6Spvalchev     int8 roundingMode;
1043433075b6Spvalchev     flag roundNearestEven, increment, isTiny;
1044433075b6Spvalchev 
1045433075b6Spvalchev     roundingMode = float_rounding_mode();
1046433075b6Spvalchev     roundNearestEven = ( roundingMode == float_round_nearest_even );
1047433075b6Spvalchev     increment = ( (sbits64) zSig2 < 0 );
1048433075b6Spvalchev     if ( ! roundNearestEven ) {
1049433075b6Spvalchev         if ( roundingMode == float_round_to_zero ) {
1050433075b6Spvalchev             increment = 0;
1051433075b6Spvalchev         }
1052433075b6Spvalchev         else {
1053433075b6Spvalchev             if ( zSign ) {
1054433075b6Spvalchev                 increment = ( roundingMode == float_round_down ) && zSig2;
1055433075b6Spvalchev             }
1056433075b6Spvalchev             else {
1057433075b6Spvalchev                 increment = ( roundingMode == float_round_up ) && zSig2;
1058433075b6Spvalchev             }
1059433075b6Spvalchev         }
1060433075b6Spvalchev     }
1061433075b6Spvalchev     if ( 0x7FFD <= (bits32) zExp ) {
1062433075b6Spvalchev         if (    ( 0x7FFD < zExp )
1063433075b6Spvalchev              || (    ( zExp == 0x7FFD )
1064433075b6Spvalchev                   && eq128(
1065433075b6Spvalchev                          LIT64( 0x0001FFFFFFFFFFFF ),
1066433075b6Spvalchev                          LIT64( 0xFFFFFFFFFFFFFFFF ),
1067433075b6Spvalchev                          zSig0,
1068433075b6Spvalchev                          zSig1
1069433075b6Spvalchev                      )
1070433075b6Spvalchev                   && increment
1071433075b6Spvalchev                 )
1072433075b6Spvalchev            ) {
1073433075b6Spvalchev             float_raise( float_flag_overflow | float_flag_inexact );
1074433075b6Spvalchev             if (    ( roundingMode == float_round_to_zero )
1075433075b6Spvalchev                  || ( zSign && ( roundingMode == float_round_up ) )
1076433075b6Spvalchev                  || ( ! zSign && ( roundingMode == float_round_down ) )
1077433075b6Spvalchev                ) {
1078433075b6Spvalchev                 return
1079433075b6Spvalchev                     packFloat128(
1080433075b6Spvalchev                         zSign,
1081433075b6Spvalchev                         0x7FFE,
1082433075b6Spvalchev                         LIT64( 0x0000FFFFFFFFFFFF ),
1083433075b6Spvalchev                         LIT64( 0xFFFFFFFFFFFFFFFF )
1084433075b6Spvalchev                     );
1085433075b6Spvalchev             }
1086433075b6Spvalchev             return packFloat128( zSign, 0x7FFF, 0, 0 );
1087433075b6Spvalchev         }
1088433075b6Spvalchev         if ( zExp < 0 ) {
1089433075b6Spvalchev             isTiny =
1090433075b6Spvalchev                    ( float_detect_tininess == float_tininess_before_rounding )
1091433075b6Spvalchev                 || ( zExp < -1 )
1092433075b6Spvalchev                 || ! increment
1093433075b6Spvalchev                 || lt128(
1094433075b6Spvalchev                        zSig0,
1095433075b6Spvalchev                        zSig1,
1096433075b6Spvalchev                        LIT64( 0x0001FFFFFFFFFFFF ),
1097433075b6Spvalchev                        LIT64( 0xFFFFFFFFFFFFFFFF )
1098433075b6Spvalchev                    );
1099433075b6Spvalchev             shift128ExtraRightJamming(
1100433075b6Spvalchev                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
1101433075b6Spvalchev             zExp = 0;
1102433075b6Spvalchev             if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
1103433075b6Spvalchev             if ( roundNearestEven ) {
1104433075b6Spvalchev                 increment = ( (sbits64) zSig2 < 0 );
1105433075b6Spvalchev             }
1106433075b6Spvalchev             else {
1107433075b6Spvalchev                 if ( zSign ) {
1108433075b6Spvalchev                     increment = ( roundingMode == float_round_down ) && zSig2;
1109433075b6Spvalchev                 }
1110433075b6Spvalchev                 else {
1111433075b6Spvalchev                     increment = ( roundingMode == float_round_up ) && zSig2;
1112433075b6Spvalchev                 }
1113433075b6Spvalchev             }
1114433075b6Spvalchev         }
1115433075b6Spvalchev     }
1116433075b6Spvalchev     if ( zSig2 ) float_set_inexact();
1117433075b6Spvalchev     if ( increment ) {
1118433075b6Spvalchev         add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
1119433075b6Spvalchev         zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
1120433075b6Spvalchev     }
1121433075b6Spvalchev     else {
1122433075b6Spvalchev         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
1123433075b6Spvalchev     }
1124433075b6Spvalchev     return packFloat128( zSign, zExp, zSig0, zSig1 );
1125433075b6Spvalchev 
1126433075b6Spvalchev }
1127433075b6Spvalchev 
1128433075b6Spvalchev /*
1129433075b6Spvalchev -------------------------------------------------------------------------------
1130433075b6Spvalchev Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1131433075b6Spvalchev and significand formed by the concatenation of `zSig0' and `zSig1', and
1132433075b6Spvalchev returns the proper quadruple-precision floating-point value corresponding
1133433075b6Spvalchev to the abstract input.  This routine is just like `roundAndPackFloat128'
1134433075b6Spvalchev except that the input significand has fewer bits and does not have to be
1135433075b6Spvalchev normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
1136433075b6Spvalchev point exponent.
1137433075b6Spvalchev -------------------------------------------------------------------------------
1138433075b6Spvalchev */
1139433075b6Spvalchev static float128
normalizeRoundAndPackFloat128(flag zSign,int32 zExp,bits64 zSig0,bits64 zSig1)1140433075b6Spvalchev  normalizeRoundAndPackFloat128(
1141433075b6Spvalchev      flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
1142433075b6Spvalchev {
1143433075b6Spvalchev     int8 shiftCount;
1144433075b6Spvalchev     bits64 zSig2;
1145433075b6Spvalchev 
1146433075b6Spvalchev     if ( zSig0 == 0 ) {
1147433075b6Spvalchev         zSig0 = zSig1;
1148433075b6Spvalchev         zSig1 = 0;
1149433075b6Spvalchev         zExp -= 64;
1150433075b6Spvalchev     }
1151433075b6Spvalchev     shiftCount = countLeadingZeros64( zSig0 ) - 15;
1152433075b6Spvalchev     if ( 0 <= shiftCount ) {
1153433075b6Spvalchev         zSig2 = 0;
1154433075b6Spvalchev         shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1155433075b6Spvalchev     }
1156433075b6Spvalchev     else {
1157433075b6Spvalchev         shift128ExtraRightJamming(
1158433075b6Spvalchev             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
1159433075b6Spvalchev     }
1160433075b6Spvalchev     zExp -= shiftCount;
1161433075b6Spvalchev     return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
1162433075b6Spvalchev 
1163433075b6Spvalchev }
1164433075b6Spvalchev 
1165433075b6Spvalchev #endif
1166433075b6Spvalchev 
1167433075b6Spvalchev /*
1168433075b6Spvalchev -------------------------------------------------------------------------------
1169433075b6Spvalchev Returns the result of converting the 32-bit two's complement integer `a'
1170433075b6Spvalchev to the single-precision floating-point format.  The conversion is performed
1171433075b6Spvalchev according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1172433075b6Spvalchev -------------------------------------------------------------------------------
1173433075b6Spvalchev */
int32_to_float32(int32 a)1174433075b6Spvalchev float32 int32_to_float32( int32 a )
1175433075b6Spvalchev {
1176433075b6Spvalchev     flag zSign;
1177433075b6Spvalchev 
1178433075b6Spvalchev     if ( a == 0 ) return 0;
1179433075b6Spvalchev     if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
1180433075b6Spvalchev     zSign = ( a < 0 );
1181433075b6Spvalchev     return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
1182433075b6Spvalchev 
1183433075b6Spvalchev }
1184433075b6Spvalchev 
1185433075b6Spvalchev /*
1186433075b6Spvalchev -------------------------------------------------------------------------------
1187433075b6Spvalchev Returns the result of converting the 32-bit two's complement integer `a'
1188433075b6Spvalchev to the double-precision floating-point format.  The conversion is performed
1189433075b6Spvalchev according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1190433075b6Spvalchev -------------------------------------------------------------------------------
1191433075b6Spvalchev */
int32_to_float64(int32 a)1192433075b6Spvalchev float64 int32_to_float64( int32 a )
1193433075b6Spvalchev {
1194433075b6Spvalchev     flag zSign;
1195433075b6Spvalchev     uint32 absA;
1196433075b6Spvalchev     int8 shiftCount;
1197433075b6Spvalchev     bits64 zSig;
1198433075b6Spvalchev 
1199433075b6Spvalchev     if ( a == 0 ) return 0;
1200433075b6Spvalchev     zSign = ( a < 0 );
1201433075b6Spvalchev     absA = zSign ? - a : a;
1202433075b6Spvalchev     shiftCount = countLeadingZeros32( absA ) + 21;
1203433075b6Spvalchev     zSig = absA;
1204433075b6Spvalchev     return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
1205433075b6Spvalchev 
1206433075b6Spvalchev }
1207433075b6Spvalchev 
1208433075b6Spvalchev #ifdef FLOATX80
1209433075b6Spvalchev 
1210433075b6Spvalchev /*
1211433075b6Spvalchev -------------------------------------------------------------------------------
1212433075b6Spvalchev Returns the result of converting the 32-bit two's complement integer `a'
1213433075b6Spvalchev to the extended double-precision floating-point format.  The conversion
1214433075b6Spvalchev is performed according to the IEC/IEEE Standard for Binary Floating-Point
1215433075b6Spvalchev Arithmetic.
1216433075b6Spvalchev -------------------------------------------------------------------------------
1217433075b6Spvalchev */
int32_to_floatx80(int32 a)1218433075b6Spvalchev floatx80 int32_to_floatx80( int32 a )
1219433075b6Spvalchev {
1220433075b6Spvalchev     flag zSign;
1221433075b6Spvalchev     uint32 absA;
1222433075b6Spvalchev     int8 shiftCount;
1223433075b6Spvalchev     bits64 zSig;
1224433075b6Spvalchev 
1225433075b6Spvalchev     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1226433075b6Spvalchev     zSign = ( a < 0 );
1227433075b6Spvalchev     absA = zSign ? - a : a;
1228433075b6Spvalchev     shiftCount = countLeadingZeros32( absA ) + 32;
1229433075b6Spvalchev     zSig = absA;
1230433075b6Spvalchev     return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
1231433075b6Spvalchev 
1232433075b6Spvalchev }
1233433075b6Spvalchev 
1234433075b6Spvalchev #endif
1235433075b6Spvalchev 
1236433075b6Spvalchev #ifdef FLOAT128
1237433075b6Spvalchev 
1238433075b6Spvalchev /*
1239433075b6Spvalchev -------------------------------------------------------------------------------
1240433075b6Spvalchev Returns the result of converting the 32-bit two's complement integer `a' to
1241433075b6Spvalchev the quadruple-precision floating-point format.  The conversion is performed
1242433075b6Spvalchev according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1243433075b6Spvalchev -------------------------------------------------------------------------------
1244433075b6Spvalchev */
int32_to_float128(int32 a)1245433075b6Spvalchev float128 int32_to_float128( int32 a )
1246433075b6Spvalchev {
1247433075b6Spvalchev     flag zSign;
1248433075b6Spvalchev     uint32 absA;
1249433075b6Spvalchev     int8 shiftCount;
1250433075b6Spvalchev     bits64 zSig0;
1251433075b6Spvalchev 
1252433075b6Spvalchev     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1253433075b6Spvalchev     zSign = ( a < 0 );
1254433075b6Spvalchev     absA = zSign ? - a : a;
1255433075b6Spvalchev     shiftCount = countLeadingZeros32( absA ) + 17;
1256433075b6Spvalchev     zSig0 = absA;
1257433075b6Spvalchev     return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1258433075b6Spvalchev 
1259433075b6Spvalchev }
1260433075b6Spvalchev 
1261433075b6Spvalchev #endif
1262433075b6Spvalchev 
1263433075b6Spvalchev #ifndef SOFTFLOAT_FOR_GCC /* __floatdi?f is in libgcc2.c */
1264433075b6Spvalchev /*
1265433075b6Spvalchev -------------------------------------------------------------------------------
1266433075b6Spvalchev Returns the result of converting the 64-bit two's complement integer `a'
1267433075b6Spvalchev to the single-precision floating-point format.  The conversion is performed
1268433075b6Spvalchev according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1269433075b6Spvalchev -------------------------------------------------------------------------------
1270433075b6Spvalchev */
int64_to_float32(int64 a)1271433075b6Spvalchev float32 int64_to_float32( int64 a )
1272433075b6Spvalchev {
1273433075b6Spvalchev     flag zSign;
1274433075b6Spvalchev     uint64 absA;
1275433075b6Spvalchev     int8 shiftCount;
1276433075b6Spvalchev 
1277433075b6Spvalchev     if ( a == 0 ) return 0;
1278433075b6Spvalchev     zSign = ( a < 0 );
1279433075b6Spvalchev     absA = zSign ? - a : a;
1280433075b6Spvalchev     shiftCount = countLeadingZeros64( absA ) - 40;
1281433075b6Spvalchev     if ( 0 <= shiftCount ) {
1282433075b6Spvalchev         return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
1283433075b6Spvalchev     }
1284433075b6Spvalchev     else {
1285433075b6Spvalchev         shiftCount += 7;
1286433075b6Spvalchev         if ( shiftCount < 0 ) {
1287433075b6Spvalchev             shift64RightJamming( absA, - shiftCount, &absA );
1288433075b6Spvalchev         }
1289433075b6Spvalchev         else {
1290433075b6Spvalchev             absA <<= shiftCount;
1291433075b6Spvalchev         }
1292433075b6Spvalchev         return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA );
1293433075b6Spvalchev     }
1294433075b6Spvalchev 
1295433075b6Spvalchev }
1296433075b6Spvalchev 
1297433075b6Spvalchev /*
1298433075b6Spvalchev -------------------------------------------------------------------------------
1299433075b6Spvalchev Returns the result of converting the 64-bit two's complement integer `a'
1300433075b6Spvalchev to the double-precision floating-point format.  The conversion is performed
1301433075b6Spvalchev according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1302433075b6Spvalchev -------------------------------------------------------------------------------
1303433075b6Spvalchev */
int64_to_float64(int64 a)1304433075b6Spvalchev float64 int64_to_float64( int64 a )
1305433075b6Spvalchev {
1306433075b6Spvalchev     flag zSign;
1307433075b6Spvalchev 
1308433075b6Spvalchev     if ( a == 0 ) return 0;
1309433075b6Spvalchev     if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
1310433075b6Spvalchev         return packFloat64( 1, 0x43E, 0 );
1311433075b6Spvalchev     }
1312433075b6Spvalchev     zSign = ( a < 0 );
1313433075b6Spvalchev     return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a );
1314433075b6Spvalchev 
1315433075b6Spvalchev }
1316433075b6Spvalchev 
1317433075b6Spvalchev #ifdef FLOATX80
1318433075b6Spvalchev 
1319433075b6Spvalchev /*
1320433075b6Spvalchev -------------------------------------------------------------------------------
1321433075b6Spvalchev Returns the result of converting the 64-bit two's complement integer `a'
1322433075b6Spvalchev to the extended double-precision floating-point format.  The conversion
1323433075b6Spvalchev is performed according to the IEC/IEEE Standard for Binary Floating-Point
1324433075b6Spvalchev Arithmetic.
1325433075b6Spvalchev -------------------------------------------------------------------------------
1326433075b6Spvalchev */
int64_to_floatx80(int64 a)1327433075b6Spvalchev floatx80 int64_to_floatx80( int64 a )
1328433075b6Spvalchev {
1329433075b6Spvalchev     flag zSign;
1330433075b6Spvalchev     uint64 absA;
1331433075b6Spvalchev     int8 shiftCount;
1332433075b6Spvalchev 
1333433075b6Spvalchev     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1334433075b6Spvalchev     zSign = ( a < 0 );
1335433075b6Spvalchev     absA = zSign ? - a : a;
1336433075b6Spvalchev     shiftCount = countLeadingZeros64( absA );
1337433075b6Spvalchev     return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
1338433075b6Spvalchev 
1339433075b6Spvalchev }
1340433075b6Spvalchev 
1341433075b6Spvalchev #endif
1342433075b6Spvalchev 
1343433075b6Spvalchev #ifdef FLOAT128
1344433075b6Spvalchev 
1345433075b6Spvalchev /*
1346433075b6Spvalchev -------------------------------------------------------------------------------
1347433075b6Spvalchev Returns the result of converting the 64-bit two's complement integer `a' to
1348433075b6Spvalchev the quadruple-precision floating-point format.  The conversion is performed
1349433075b6Spvalchev according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1350433075b6Spvalchev -------------------------------------------------------------------------------
1351433075b6Spvalchev */
int64_to_float128(int64 a)1352433075b6Spvalchev float128 int64_to_float128( int64 a )
1353433075b6Spvalchev {
1354433075b6Spvalchev     flag zSign;
1355433075b6Spvalchev     uint64 absA;
1356433075b6Spvalchev     int8 shiftCount;
1357433075b6Spvalchev     int32 zExp;
1358433075b6Spvalchev     bits64 zSig0, zSig1;
1359433075b6Spvalchev 
1360433075b6Spvalchev     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1361433075b6Spvalchev     zSign = ( a < 0 );
1362433075b6Spvalchev     absA = zSign ? - a : a;
1363433075b6Spvalchev     shiftCount = countLeadingZeros64( absA ) + 49;
1364433075b6Spvalchev     zExp = 0x406E - shiftCount;
1365433075b6Spvalchev     if ( 64 <= shiftCount ) {
1366433075b6Spvalchev         zSig1 = 0;
1367433075b6Spvalchev         zSig0 = absA;
1368433075b6Spvalchev         shiftCount -= 64;
1369433075b6Spvalchev     }
1370433075b6Spvalchev     else {
1371433075b6Spvalchev         zSig1 = absA;
1372433075b6Spvalchev         zSig0 = 0;
1373433075b6Spvalchev     }
1374433075b6Spvalchev     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1375433075b6Spvalchev     return packFloat128( zSign, zExp, zSig0, zSig1 );
1376433075b6Spvalchev 
1377433075b6Spvalchev }
1378433075b6Spvalchev 
1379433075b6Spvalchev #endif
1380433075b6Spvalchev #endif /* !SOFTFLOAT_FOR_GCC */
1381433075b6Spvalchev 
1382433075b6Spvalchev #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1383433075b6Spvalchev /*
1384433075b6Spvalchev -------------------------------------------------------------------------------
1385433075b6Spvalchev Returns the result of converting the single-precision floating-point value
1386433075b6Spvalchev `a' to the 32-bit two's complement integer format.  The conversion is
1387433075b6Spvalchev performed according to the IEC/IEEE Standard for Binary Floating-Point
1388433075b6Spvalchev Arithmetic---which means in particular that the conversion is rounded
1389433075b6Spvalchev according to the current rounding mode.  If `a' is a NaN, the largest
1390433075b6Spvalchev positive integer is returned.  Otherwise, if the conversion overflows, the
1391433075b6Spvalchev largest integer with the same sign as `a' is returned.
1392433075b6Spvalchev -------------------------------------------------------------------------------
1393433075b6Spvalchev */
float32_to_int32(float32 a)1394433075b6Spvalchev int32 float32_to_int32( float32 a )
1395433075b6Spvalchev {
1396433075b6Spvalchev     flag aSign;
1397433075b6Spvalchev     int16 aExp, shiftCount;
1398433075b6Spvalchev     bits32 aSig;
1399433075b6Spvalchev     bits64 aSig64;
1400433075b6Spvalchev 
1401433075b6Spvalchev     aSig = extractFloat32Frac( a );
1402433075b6Spvalchev     aExp = extractFloat32Exp( a );
1403433075b6Spvalchev     aSign = extractFloat32Sign( a );
1404433075b6Spvalchev     if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
1405433075b6Spvalchev     if ( aExp ) aSig |= 0x00800000;
1406433075b6Spvalchev     shiftCount = 0xAF - aExp;
1407433075b6Spvalchev     aSig64 = aSig;
1408433075b6Spvalchev     aSig64 <<= 32;
1409433075b6Spvalchev     if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
1410433075b6Spvalchev     return roundAndPackInt32( aSign, aSig64 );
1411433075b6Spvalchev 
1412433075b6Spvalchev }
1413433075b6Spvalchev #endif /* !SOFTFLOAT_FOR_GCC */
1414433075b6Spvalchev 
1415433075b6Spvalchev /*
1416433075b6Spvalchev -------------------------------------------------------------------------------
1417433075b6Spvalchev Returns the result of converting the single-precision floating-point value
1418433075b6Spvalchev `a' to the 32-bit two's complement integer format.  The conversion is
1419433075b6Spvalchev performed according to the IEC/IEEE Standard for Binary Floating-Point
1420433075b6Spvalchev Arithmetic, except that the conversion is always rounded toward zero.
1421433075b6Spvalchev If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
1422433075b6Spvalchev the conversion overflows, the largest integer with the same sign as `a' is
1423433075b6Spvalchev returned.
1424433075b6Spvalchev -------------------------------------------------------------------------------
1425433075b6Spvalchev */
float32_to_int32_round_to_zero(float32 a)1426433075b6Spvalchev int32 float32_to_int32_round_to_zero( float32 a )
1427433075b6Spvalchev {
1428433075b6Spvalchev     flag aSign;
1429433075b6Spvalchev     int16 aExp, shiftCount;
1430433075b6Spvalchev     bits32 aSig;
1431433075b6Spvalchev     int32 z;
1432433075b6Spvalchev 
1433433075b6Spvalchev     aSig = extractFloat32Frac( a );
1434433075b6Spvalchev     aExp = extractFloat32Exp( a );
1435433075b6Spvalchev     aSign = extractFloat32Sign( a );
1436433075b6Spvalchev     shiftCount = aExp - 0x9E;
1437433075b6Spvalchev     if ( 0 <= shiftCount ) {
1438433075b6Spvalchev         if ( a != 0xCF000000 ) {
1439433075b6Spvalchev             float_raise( float_flag_invalid );
1440433075b6Spvalchev             if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
1441433075b6Spvalchev         }
1442433075b6Spvalchev         return (sbits32) 0x80000000;
1443433075b6Spvalchev     }
1444433075b6Spvalchev     else if ( aExp <= 0x7E ) {
1445433075b6Spvalchev         if ( aExp | aSig ) float_set_inexact();
1446433075b6Spvalchev         return 0;
1447433075b6Spvalchev     }
1448433075b6Spvalchev     aSig = ( aSig | 0x00800000 )<<8;
1449433075b6Spvalchev     z = aSig>>( - shiftCount );
1450433075b6Spvalchev     if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
1451433075b6Spvalchev         float_set_inexact();
1452433075b6Spvalchev     }
1453433075b6Spvalchev     if ( aSign ) z = - z;
1454433075b6Spvalchev     return z;
1455433075b6Spvalchev 
1456433075b6Spvalchev }
1457433075b6Spvalchev 
1458433075b6Spvalchev #ifndef SOFTFLOAT_FOR_GCC /* __fix?fdi provided by libgcc2.c */
1459433075b6Spvalchev /*
1460433075b6Spvalchev -------------------------------------------------------------------------------
1461433075b6Spvalchev Returns the result of converting the single-precision floating-point value
1462433075b6Spvalchev `a' to the 64-bit two's complement integer format.  The conversion is
1463433075b6Spvalchev performed according to the IEC/IEEE Standard for Binary Floating-Point
1464433075b6Spvalchev Arithmetic---which means in particular that the conversion is rounded
1465433075b6Spvalchev according to the current rounding mode.  If `a' is a NaN, the largest
1466433075b6Spvalchev positive integer is returned.  Otherwise, if the conversion overflows, the
1467433075b6Spvalchev largest integer with the same sign as `a' is returned.
1468433075b6Spvalchev -------------------------------------------------------------------------------
1469433075b6Spvalchev */
float32_to_int64(float32 a)1470433075b6Spvalchev int64 float32_to_int64( float32 a )
1471433075b6Spvalchev {
1472433075b6Spvalchev     flag aSign;
1473433075b6Spvalchev     int16 aExp, shiftCount;
1474433075b6Spvalchev     bits32 aSig;
1475433075b6Spvalchev     bits64 aSig64, aSigExtra;
1476433075b6Spvalchev 
1477433075b6Spvalchev     aSig = extractFloat32Frac( a );
1478433075b6Spvalchev     aExp = extractFloat32Exp( a );
1479433075b6Spvalchev     aSign = extractFloat32Sign( a );
1480433075b6Spvalchev     shiftCount = 0xBE - aExp;
1481433075b6Spvalchev     if ( shiftCount < 0 ) {
1482433075b6Spvalchev         float_raise( float_flag_invalid );
1483433075b6Spvalchev         if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1484433075b6Spvalchev             return LIT64( 0x7FFFFFFFFFFFFFFF );
1485433075b6Spvalchev         }
1486433075b6Spvalchev         return (sbits64) LIT64( 0x8000000000000000 );
1487433075b6Spvalchev     }
1488433075b6Spvalchev     if ( aExp ) aSig |= 0x00800000;
1489433075b6Spvalchev     aSig64 = aSig;
1490433075b6Spvalchev     aSig64 <<= 40;
1491433075b6Spvalchev     shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
1492433075b6Spvalchev     return roundAndPackInt64( aSign, aSig64, aSigExtra );
1493433075b6Spvalchev 
1494433075b6Spvalchev }
1495433075b6Spvalchev 
1496433075b6Spvalchev /*
1497433075b6Spvalchev -------------------------------------------------------------------------------
1498433075b6Spvalchev Returns the result of converting the single-precision floating-point value
1499433075b6Spvalchev `a' to the 64-bit two's complement integer format.  The conversion is
1500433075b6Spvalchev performed according to the IEC/IEEE Standard for Binary Floating-Point
1501433075b6Spvalchev Arithmetic, except that the conversion is always rounded toward zero.  If
1502433075b6Spvalchev `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
1503433075b6Spvalchev conversion overflows, the largest integer with the same sign as `a' is
1504433075b6Spvalchev returned.
1505433075b6Spvalchev -------------------------------------------------------------------------------
1506433075b6Spvalchev */
float32_to_int64_round_to_zero(float32 a)1507433075b6Spvalchev int64 float32_to_int64_round_to_zero( float32 a )
1508433075b6Spvalchev {
1509433075b6Spvalchev     flag aSign;
1510433075b6Spvalchev     int16 aExp, shiftCount;
1511433075b6Spvalchev     bits32 aSig;
1512433075b6Spvalchev     bits64 aSig64;
1513433075b6Spvalchev     int64 z;
1514433075b6Spvalchev 
1515433075b6Spvalchev     aSig = extractFloat32Frac( a );
1516433075b6Spvalchev     aExp = extractFloat32Exp( a );
1517433075b6Spvalchev     aSign = extractFloat32Sign( a );
1518433075b6Spvalchev     shiftCount = aExp - 0xBE;
1519433075b6Spvalchev     if ( 0 <= shiftCount ) {
1520433075b6Spvalchev         if ( a != 0xDF000000 ) {
1521433075b6Spvalchev             float_raise( float_flag_invalid );
1522433075b6Spvalchev             if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1523433075b6Spvalchev                 return LIT64( 0x7FFFFFFFFFFFFFFF );
1524433075b6Spvalchev             }
1525433075b6Spvalchev         }
1526433075b6Spvalchev         return (sbits64) LIT64( 0x8000000000000000 );
1527433075b6Spvalchev     }
1528433075b6Spvalchev     else if ( aExp <= 0x7E ) {
1529433075b6Spvalchev         if ( aExp | aSig ) float_set_inexact();
1530433075b6Spvalchev         return 0;
1531433075b6Spvalchev     }
1532433075b6Spvalchev     aSig64 = aSig | 0x00800000;
1533433075b6Spvalchev     aSig64 <<= 40;
1534433075b6Spvalchev     z = aSig64>>( - shiftCount );
1535433075b6Spvalchev     if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
1536433075b6Spvalchev         float_set_inexact();
1537433075b6Spvalchev     }
1538433075b6Spvalchev     if ( aSign ) z = - z;
1539433075b6Spvalchev     return z;
1540433075b6Spvalchev 
1541433075b6Spvalchev }
1542433075b6Spvalchev #endif /* !SOFTFLOAT_FOR_GCC */
1543433075b6Spvalchev 
1544433075b6Spvalchev /*
1545433075b6Spvalchev -------------------------------------------------------------------------------
1546433075b6Spvalchev Returns the result of converting the single-precision floating-point value
1547433075b6Spvalchev `a' to the double-precision floating-point format.  The conversion is
1548433075b6Spvalchev performed according to the IEC/IEEE Standard for Binary Floating-Point
1549433075b6Spvalchev Arithmetic.
1550433075b6Spvalchev -------------------------------------------------------------------------------
1551433075b6Spvalchev */
float32_to_float64(float32 a)1552433075b6Spvalchev float64 float32_to_float64( float32 a )
1553433075b6Spvalchev {
1554433075b6Spvalchev     flag aSign;
1555433075b6Spvalchev     int16 aExp;
1556433075b6Spvalchev     bits32 aSig;
1557433075b6Spvalchev 
1558433075b6Spvalchev     aSig = extractFloat32Frac( a );
1559433075b6Spvalchev     aExp = extractFloat32Exp( a );
1560433075b6Spvalchev     aSign = extractFloat32Sign( a );
1561433075b6Spvalchev     if ( aExp == 0xFF ) {
1562433075b6Spvalchev         if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
1563433075b6Spvalchev         return packFloat64( aSign, 0x7FF, 0 );
1564433075b6Spvalchev     }
1565433075b6Spvalchev     if ( aExp == 0 ) {
1566433075b6Spvalchev         if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
1567433075b6Spvalchev         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1568433075b6Spvalchev         --aExp;
1569433075b6Spvalchev     }
1570433075b6Spvalchev     return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
1571433075b6Spvalchev 
1572433075b6Spvalchev }
1573433075b6Spvalchev 
1574433075b6Spvalchev #ifdef FLOATX80
1575433075b6Spvalchev 
1576433075b6Spvalchev /*
1577433075b6Spvalchev -------------------------------------------------------------------------------
1578433075b6Spvalchev Returns the result of converting the single-precision floating-point value
1579433075b6Spvalchev `a' to the extended double-precision floating-point format.  The conversion
1580433075b6Spvalchev is performed according to the IEC/IEEE Standard for Binary Floating-Point
1581433075b6Spvalchev Arithmetic.
1582433075b6Spvalchev -------------------------------------------------------------------------------
1583433075b6Spvalchev */
float32_to_floatx80(float32 a)1584433075b6Spvalchev floatx80 float32_to_floatx80( float32 a )
1585433075b6Spvalchev {
1586433075b6Spvalchev     flag aSign;
1587433075b6Spvalchev     int16 aExp;
1588433075b6Spvalchev     bits32 aSig;
1589433075b6Spvalchev 
1590433075b6Spvalchev     aSig = extractFloat32Frac( a );
1591433075b6Spvalchev     aExp = extractFloat32Exp( a );
1592433075b6Spvalchev     aSign = extractFloat32Sign( a );
1593433075b6Spvalchev     if ( aExp == 0xFF ) {
1594433075b6Spvalchev         if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
1595433075b6Spvalchev         return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1596433075b6Spvalchev     }
1597433075b6Spvalchev     if ( aExp == 0 ) {
1598433075b6Spvalchev         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1599433075b6Spvalchev         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1600433075b6Spvalchev     }
1601433075b6Spvalchev     aSig |= 0x00800000;
1602433075b6Spvalchev     return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
1603433075b6Spvalchev 
1604433075b6Spvalchev }
1605433075b6Spvalchev 
1606433075b6Spvalchev #endif
1607433075b6Spvalchev 
1608433075b6Spvalchev #ifdef FLOAT128
1609433075b6Spvalchev 
1610433075b6Spvalchev /*
1611433075b6Spvalchev -------------------------------------------------------------------------------
1612433075b6Spvalchev Returns the result of converting the single-precision floating-point value
1613433075b6Spvalchev `a' to the double-precision floating-point format.  The conversion is
1614433075b6Spvalchev performed according to the IEC/IEEE Standard for Binary Floating-Point
1615433075b6Spvalchev Arithmetic.
1616433075b6Spvalchev -------------------------------------------------------------------------------
1617433075b6Spvalchev */
float32_to_float128(float32 a)1618433075b6Spvalchev float128 float32_to_float128( float32 a )
1619433075b6Spvalchev {
1620433075b6Spvalchev     flag aSign;
1621433075b6Spvalchev     int16 aExp;
1622433075b6Spvalchev     bits32 aSig;
1623433075b6Spvalchev 
1624433075b6Spvalchev     aSig = extractFloat32Frac( a );
1625433075b6Spvalchev     aExp = extractFloat32Exp( a );
1626433075b6Spvalchev     aSign = extractFloat32Sign( a );
1627433075b6Spvalchev     if ( aExp == 0xFF ) {
1628433075b6Spvalchev         if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) );
1629433075b6Spvalchev         return packFloat128( aSign, 0x7FFF, 0, 0 );
1630433075b6Spvalchev     }
1631433075b6Spvalchev     if ( aExp == 0 ) {
1632433075b6Spvalchev         if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
1633433075b6Spvalchev         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1634433075b6Spvalchev         --aExp;
1635433075b6Spvalchev     }
1636433075b6Spvalchev     return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
1637433075b6Spvalchev 
1638433075b6Spvalchev }
1639433075b6Spvalchev 
1640433075b6Spvalchev #endif
1641433075b6Spvalchev 
1642433075b6Spvalchev #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1643433075b6Spvalchev /*
1644433075b6Spvalchev -------------------------------------------------------------------------------
1645433075b6Spvalchev Rounds the single-precision floating-point value `a' to an integer, and
1646433075b6Spvalchev returns the result as a single-precision floating-point value.  The
1647433075b6Spvalchev operation is performed according to the IEC/IEEE Standard for Binary
1648433075b6Spvalchev Floating-Point Arithmetic.
1649433075b6Spvalchev -------------------------------------------------------------------------------
1650433075b6Spvalchev */
float32_round_to_int(float32 a)1651433075b6Spvalchev float32 float32_round_to_int( float32 a )
1652433075b6Spvalchev {
1653433075b6Spvalchev     flag aSign;
1654433075b6Spvalchev     int16 aExp;
1655433075b6Spvalchev     bits32 lastBitMask, roundBitsMask;
1656433075b6Spvalchev     int8 roundingMode;
1657433075b6Spvalchev     float32 z;
1658433075b6Spvalchev 
1659433075b6Spvalchev     aExp = extractFloat32Exp( a );
1660433075b6Spvalchev     if ( 0x96 <= aExp ) {
1661433075b6Spvalchev         if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
1662433075b6Spvalchev             return propagateFloat32NaN( a, a );
1663433075b6Spvalchev         }
1664433075b6Spvalchev         return a;
1665433075b6Spvalchev     }
1666433075b6Spvalchev     if ( aExp <= 0x7E ) {
1667433075b6Spvalchev         if ( (bits32) ( a<<1 ) == 0 ) return a;
1668433075b6Spvalchev         float_set_inexact();
1669433075b6Spvalchev         aSign = extractFloat32Sign( a );
1670433075b6Spvalchev         switch ( float_rounding_mode() ) {
1671433075b6Spvalchev          case float_round_nearest_even:
1672433075b6Spvalchev             if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
1673433075b6Spvalchev                 return packFloat32( aSign, 0x7F, 0 );
1674433075b6Spvalchev             }
1675433075b6Spvalchev             break;
1676433075b6Spvalchev          case float_round_down:
1677433075b6Spvalchev             return aSign ? 0xBF800000 : 0;
1678433075b6Spvalchev          case float_round_up:
1679433075b6Spvalchev             return aSign ? 0x80000000 : 0x3F800000;
1680433075b6Spvalchev         }
1681433075b6Spvalchev         return packFloat32( aSign, 0, 0 );
1682433075b6Spvalchev     }
1683433075b6Spvalchev     lastBitMask = 1;
1684433075b6Spvalchev     lastBitMask <<= 0x96 - aExp;
1685433075b6Spvalchev     roundBitsMask = lastBitMask - 1;
1686433075b6Spvalchev     z = a;
1687433075b6Spvalchev     roundingMode = float_rounding_mode();
1688433075b6Spvalchev     if ( roundingMode == float_round_nearest_even ) {
1689433075b6Spvalchev         z += lastBitMask>>1;
1690433075b6Spvalchev         if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
1691433075b6Spvalchev     }
1692433075b6Spvalchev     else if ( roundingMode != float_round_to_zero ) {
1693433075b6Spvalchev         if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
1694433075b6Spvalchev             z += roundBitsMask;
1695433075b6Spvalchev         }
1696433075b6Spvalchev     }
1697433075b6Spvalchev     z &= ~ roundBitsMask;
1698433075b6Spvalchev     if ( z != a ) float_set_inexact();
1699433075b6Spvalchev     return z;
1700433075b6Spvalchev 
1701433075b6Spvalchev }
1702433075b6Spvalchev #endif /* !SOFTFLOAT_FOR_GCC */
1703433075b6Spvalchev 
1704433075b6Spvalchev /*
1705433075b6Spvalchev -------------------------------------------------------------------------------
1706433075b6Spvalchev Returns the result of adding the absolute values of the single-precision
1707433075b6Spvalchev floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
1708433075b6Spvalchev before being returned.  `zSign' is ignored if the result is a NaN.
1709433075b6Spvalchev The addition is performed according to the IEC/IEEE Standard for Binary
1710433075b6Spvalchev Floating-Point Arithmetic.
1711433075b6Spvalchev -------------------------------------------------------------------------------
1712433075b6Spvalchev */
addFloat32Sigs(float32 a,float32 b,flag zSign)1713433075b6Spvalchev static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
1714433075b6Spvalchev {
1715433075b6Spvalchev     int16 aExp, bExp, zExp;
1716433075b6Spvalchev     bits32 aSig, bSig, zSig;
1717433075b6Spvalchev     int16 expDiff;
1718433075b6Spvalchev 
1719433075b6Spvalchev     aSig = extractFloat32Frac( a );
1720433075b6Spvalchev     aExp = extractFloat32Exp( a );
1721433075b6Spvalchev     bSig = extractFloat32Frac( b );
1722433075b6Spvalchev     bExp = extractFloat32Exp( b );
1723433075b6Spvalchev     expDiff = aExp - bExp;
1724433075b6Spvalchev     aSig <<= 6;
1725433075b6Spvalchev     bSig <<= 6;
1726433075b6Spvalchev     if ( 0 < expDiff ) {
1727433075b6Spvalchev         if ( aExp == 0xFF ) {
1728433075b6Spvalchev             if ( aSig ) return propagateFloat32NaN( a, b );
1729433075b6Spvalchev             return a;
1730433075b6Spvalchev         }
1731433075b6Spvalchev         if ( bExp == 0 ) {
1732433075b6Spvalchev             --expDiff;
1733433075b6Spvalchev         }
1734433075b6Spvalchev         else {
1735433075b6Spvalchev             bSig |= 0x20000000;
1736433075b6Spvalchev         }
1737433075b6Spvalchev         shift32RightJamming( bSig, expDiff, &bSig );
1738433075b6Spvalchev         zExp = aExp;
1739433075b6Spvalchev     }
1740433075b6Spvalchev     else if ( expDiff < 0 ) {
1741433075b6Spvalchev         if ( bExp == 0xFF ) {
1742433075b6Spvalchev             if ( bSig ) return propagateFloat32NaN( a, b );
1743433075b6Spvalchev             return packFloat32( zSign, 0xFF, 0 );
1744433075b6Spvalchev         }
1745433075b6Spvalchev         if ( aExp == 0 ) {
1746433075b6Spvalchev             ++expDiff;
1747433075b6Spvalchev         }
1748433075b6Spvalchev         else {
1749433075b6Spvalchev             aSig |= 0x20000000;
1750433075b6Spvalchev         }
1751433075b6Spvalchev         shift32RightJamming( aSig, - expDiff, &aSig );
1752433075b6Spvalchev         zExp = bExp;
1753433075b6Spvalchev     }
1754433075b6Spvalchev     else {
1755433075b6Spvalchev         if ( aExp == 0xFF ) {
1756433075b6Spvalchev             if ( aSig | bSig ) return propagateFloat32NaN( a, b );
1757433075b6Spvalchev             return a;
1758433075b6Spvalchev         }
1759433075b6Spvalchev         if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
1760433075b6Spvalchev         zSig = 0x40000000 + aSig + bSig;
1761433075b6Spvalchev         zExp = aExp;
1762433075b6Spvalchev         goto roundAndPack;
1763433075b6Spvalchev     }
1764433075b6Spvalchev     aSig |= 0x20000000;
1765433075b6Spvalchev     zSig = ( aSig + bSig )<<1;
1766433075b6Spvalchev     --zExp;
1767433075b6Spvalchev     if ( (sbits32) zSig < 0 ) {
1768433075b6Spvalchev         zSig = aSig + bSig;
1769433075b6Spvalchev         ++zExp;
1770433075b6Spvalchev     }
1771433075b6Spvalchev  roundAndPack:
1772433075b6Spvalchev     return roundAndPackFloat32( zSign, zExp, zSig );
1773433075b6Spvalchev 
1774433075b6Spvalchev }
1775433075b6Spvalchev 
1776433075b6Spvalchev /*
1777433075b6Spvalchev -------------------------------------------------------------------------------
1778433075b6Spvalchev Returns the result of subtracting the absolute values of the single-
1779433075b6Spvalchev precision floating-point values `a' and `b'.  If `zSign' is 1, the
1780433075b6Spvalchev difference is negated before being returned.  `zSign' is ignored if the
1781433075b6Spvalchev result is a NaN.  The subtraction is performed according to the IEC/IEEE
1782433075b6Spvalchev Standard for Binary Floating-Point Arithmetic.
1783433075b6Spvalchev -------------------------------------------------------------------------------
1784433075b6Spvalchev */
subFloat32Sigs(float32 a,float32 b,flag zSign)1785433075b6Spvalchev static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
1786433075b6Spvalchev {
1787433075b6Spvalchev     int16 aExp, bExp, zExp;
1788433075b6Spvalchev     bits32 aSig, bSig, zSig;
1789433075b6Spvalchev     int16 expDiff;
1790433075b6Spvalchev 
1791433075b6Spvalchev     aSig = extractFloat32Frac( a );
1792433075b6Spvalchev     aExp = extractFloat32Exp( a );
1793433075b6Spvalchev     bSig = extractFloat32Frac( b );
1794433075b6Spvalchev     bExp = extractFloat32Exp( b );
1795433075b6Spvalchev     expDiff = aExp - bExp;
1796433075b6Spvalchev     aSig <<= 7;
1797433075b6Spvalchev     bSig <<= 7;
1798433075b6Spvalchev     if ( 0 < expDiff ) goto aExpBigger;
1799433075b6Spvalchev     if ( expDiff < 0 ) goto bExpBigger;
1800433075b6Spvalchev     if ( aExp == 0xFF ) {
1801433075b6Spvalchev         if ( aSig | bSig ) return propagateFloat32NaN( a, b );
1802433075b6Spvalchev         float_raise( float_flag_invalid );
1803433075b6Spvalchev         return float32_default_nan;
1804433075b6Spvalchev     }
1805433075b6Spvalchev     if ( aExp == 0 ) {
1806433075b6Spvalchev         aExp = 1;
1807433075b6Spvalchev         bExp = 1;
1808433075b6Spvalchev     }
1809433075b6Spvalchev     if ( bSig < aSig ) goto aBigger;
1810433075b6Spvalchev     if ( aSig < bSig ) goto bBigger;
1811433075b6Spvalchev     return packFloat32( float_rounding_mode() == float_round_down, 0, 0 );
1812433075b6Spvalchev  bExpBigger:
1813433075b6Spvalchev     if ( bExp == 0xFF ) {
1814433075b6Spvalchev         if ( bSig ) return propagateFloat32NaN( a, b );
1815433075b6Spvalchev         return packFloat32( zSign ^ 1, 0xFF, 0 );
1816433075b6Spvalchev     }
1817433075b6Spvalchev     if ( aExp == 0 ) {
1818433075b6Spvalchev         ++expDiff;
1819433075b6Spvalchev     }
1820433075b6Spvalchev     else {
1821433075b6Spvalchev         aSig |= 0x40000000;
1822433075b6Spvalchev     }
1823433075b6Spvalchev     shift32RightJamming( aSig, - expDiff, &aSig );
1824433075b6Spvalchev     bSig |= 0x40000000;
1825433075b6Spvalchev  bBigger:
1826433075b6Spvalchev     zSig = bSig - aSig;
1827433075b6Spvalchev     zExp = bExp;
1828433075b6Spvalchev     zSign ^= 1;
1829433075b6Spvalchev     goto normalizeRoundAndPack;
1830433075b6Spvalchev  aExpBigger:
1831433075b6Spvalchev     if ( aExp == 0xFF ) {
1832433075b6Spvalchev         if ( aSig ) return propagateFloat32NaN( a, b );
1833433075b6Spvalchev         return a;
1834433075b6Spvalchev     }
1835433075b6Spvalchev     if ( bExp == 0 ) {
1836433075b6Spvalchev         --expDiff;
1837433075b6Spvalchev     }
1838433075b6Spvalchev     else {
1839433075b6Spvalchev         bSig |= 0x40000000;
1840433075b6Spvalchev     }
1841433075b6Spvalchev     shift32RightJamming( bSig, expDiff, &bSig );
1842433075b6Spvalchev     aSig |= 0x40000000;
1843433075b6Spvalchev  aBigger:
1844433075b6Spvalchev     zSig = aSig - bSig;
1845433075b6Spvalchev     zExp = aExp;
1846433075b6Spvalchev  normalizeRoundAndPack:
1847433075b6Spvalchev     --zExp;
1848433075b6Spvalchev     return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
1849433075b6Spvalchev 
1850433075b6Spvalchev }
1851433075b6Spvalchev 
1852433075b6Spvalchev /*
1853433075b6Spvalchev -------------------------------------------------------------------------------
1854433075b6Spvalchev Returns the result of adding the single-precision floating-point values `a'
1855433075b6Spvalchev and `b'.  The operation is performed according to the IEC/IEEE Standard for
1856433075b6Spvalchev Binary Floating-Point Arithmetic.
1857433075b6Spvalchev -------------------------------------------------------------------------------
1858433075b6Spvalchev */
float32_add(float32 a,float32 b)1859433075b6Spvalchev float32 float32_add( float32 a, float32 b )
1860433075b6Spvalchev {
1861433075b6Spvalchev     flag aSign, bSign;
1862433075b6Spvalchev 
1863433075b6Spvalchev     aSign = extractFloat32Sign( a );
1864433075b6Spvalchev     bSign = extractFloat32Sign( b );
1865433075b6Spvalchev     if ( aSign == bSign ) {
1866433075b6Spvalchev         return addFloat32Sigs( a, b, aSign );
1867433075b6Spvalchev     }
1868433075b6Spvalchev     else {
1869433075b6Spvalchev         return subFloat32Sigs( a, b, aSign );
1870433075b6Spvalchev     }
1871433075b6Spvalchev 
1872433075b6Spvalchev }
1873433075b6Spvalchev 
1874433075b6Spvalchev /*
1875433075b6Spvalchev -------------------------------------------------------------------------------
1876433075b6Spvalchev Returns the result of subtracting the single-precision floating-point values
1877433075b6Spvalchev `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1878433075b6Spvalchev for Binary Floating-Point Arithmetic.
1879433075b6Spvalchev -------------------------------------------------------------------------------
1880433075b6Spvalchev */
float32_sub(float32 a,float32 b)1881433075b6Spvalchev float32 float32_sub( float32 a, float32 b )
1882433075b6Spvalchev {
1883433075b6Spvalchev     flag aSign, bSign;
1884433075b6Spvalchev 
1885433075b6Spvalchev     aSign = extractFloat32Sign( a );
1886433075b6Spvalchev     bSign = extractFloat32Sign( b );
1887433075b6Spvalchev     if ( aSign == bSign ) {
1888433075b6Spvalchev         return subFloat32Sigs( a, b, aSign );
1889433075b6Spvalchev     }
1890433075b6Spvalchev     else {
1891433075b6Spvalchev         return addFloat32Sigs( a, b, aSign );
1892433075b6Spvalchev     }
1893433075b6Spvalchev 
1894433075b6Spvalchev }
1895433075b6Spvalchev 
1896433075b6Spvalchev /*
1897433075b6Spvalchev -------------------------------------------------------------------------------
1898433075b6Spvalchev Returns the result of multiplying the single-precision floating-point values
1899433075b6Spvalchev `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1900433075b6Spvalchev for Binary Floating-Point Arithmetic.
1901433075b6Spvalchev -------------------------------------------------------------------------------
1902433075b6Spvalchev */
float32_mul(float32 a,float32 b)1903433075b6Spvalchev float32 float32_mul( float32 a, float32 b )
1904433075b6Spvalchev {
1905433075b6Spvalchev     flag aSign, bSign, zSign;
1906433075b6Spvalchev     int16 aExp, bExp, zExp;
1907433075b6Spvalchev     bits32 aSig, bSig;
1908433075b6Spvalchev     bits64 zSig64;
1909433075b6Spvalchev     bits32 zSig;
1910433075b6Spvalchev 
1911433075b6Spvalchev     aSig = extractFloat32Frac( a );
1912433075b6Spvalchev     aExp = extractFloat32Exp( a );
1913433075b6Spvalchev     aSign = extractFloat32Sign( a );
1914433075b6Spvalchev     bSig = extractFloat32Frac( b );
1915433075b6Spvalchev     bExp = extractFloat32Exp( b );
1916433075b6Spvalchev     bSign = extractFloat32Sign( b );
1917433075b6Spvalchev     zSign = aSign ^ bSign;
1918433075b6Spvalchev     if ( aExp == 0xFF ) {
1919433075b6Spvalchev         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1920433075b6Spvalchev             return propagateFloat32NaN( a, b );
1921433075b6Spvalchev         }
1922433075b6Spvalchev         if ( ( bExp | bSig ) == 0 ) {
1923433075b6Spvalchev             float_raise( float_flag_invalid );
1924433075b6Spvalchev             return float32_default_nan;
1925433075b6Spvalchev         }
1926433075b6Spvalchev         return packFloat32( zSign, 0xFF, 0 );
1927433075b6Spvalchev     }
1928433075b6Spvalchev     if ( bExp == 0xFF ) {
1929433075b6Spvalchev         if ( bSig ) return propagateFloat32NaN( a, b );
1930433075b6Spvalchev         if ( ( aExp | aSig ) == 0 ) {
1931433075b6Spvalchev             float_raise( float_flag_invalid );
1932433075b6Spvalchev             return float32_default_nan;
1933433075b6Spvalchev         }
1934433075b6Spvalchev         return packFloat32( zSign, 0xFF, 0 );
1935433075b6Spvalchev     }
1936433075b6Spvalchev     if ( aExp == 0 ) {
1937433075b6Spvalchev         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1938433075b6Spvalchev         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1939433075b6Spvalchev     }
1940433075b6Spvalchev     if ( bExp == 0 ) {
1941433075b6Spvalchev         if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1942433075b6Spvalchev         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1943433075b6Spvalchev     }
1944433075b6Spvalchev     zExp = aExp + bExp - 0x7F;
1945433075b6Spvalchev     aSig = ( aSig | 0x00800000 )<<7;
1946433075b6Spvalchev     bSig = ( bSig | 0x00800000 )<<8;
1947433075b6Spvalchev     shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
1948433075b6Spvalchev     zSig = zSig64;
1949433075b6Spvalchev     if ( 0 <= (sbits32) ( zSig<<1 ) ) {
1950433075b6Spvalchev         zSig <<= 1;
1951433075b6Spvalchev         --zExp;
1952433075b6Spvalchev     }
1953433075b6Spvalchev     return roundAndPackFloat32( zSign, zExp, zSig );
1954433075b6Spvalchev 
1955433075b6Spvalchev }
1956433075b6Spvalchev 
1957433075b6Spvalchev /*
1958433075b6Spvalchev -------------------------------------------------------------------------------
1959433075b6Spvalchev Returns the result of dividing the single-precision floating-point value `a'
1960433075b6Spvalchev by the corresponding value `b'.  The operation is performed according to the
1961433075b6Spvalchev IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1962433075b6Spvalchev -------------------------------------------------------------------------------
1963433075b6Spvalchev */
float32_div(float32 a,float32 b)1964433075b6Spvalchev float32 float32_div( float32 a, float32 b )
1965433075b6Spvalchev {
1966433075b6Spvalchev     flag aSign, bSign, zSign;
1967433075b6Spvalchev     int16 aExp, bExp, zExp;
1968433075b6Spvalchev     bits32 aSig, bSig, zSig;
1969433075b6Spvalchev 
1970433075b6Spvalchev     aSig = extractFloat32Frac( a );
1971433075b6Spvalchev     aExp = extractFloat32Exp( a );
1972433075b6Spvalchev     aSign = extractFloat32Sign( a );
1973433075b6Spvalchev     bSig = extractFloat32Frac( b );
1974433075b6Spvalchev     bExp = extractFloat32Exp( b );
1975433075b6Spvalchev     bSign = extractFloat32Sign( b );
1976433075b6Spvalchev     zSign = aSign ^ bSign;
1977433075b6Spvalchev     if ( aExp == 0xFF ) {
1978433075b6Spvalchev         if ( aSig ) return propagateFloat32NaN( a, b );
1979433075b6Spvalchev         if ( bExp == 0xFF ) {
1980433075b6Spvalchev             if ( bSig ) return propagateFloat32NaN( a, b );
1981433075b6Spvalchev             float_raise( float_flag_invalid );
1982433075b6Spvalchev             return float32_default_nan;
1983433075b6Spvalchev         }
1984433075b6Spvalchev         return packFloat32( zSign, 0xFF, 0 );
1985433075b6Spvalchev     }
1986433075b6Spvalchev     if ( bExp == 0xFF ) {
1987433075b6Spvalchev         if ( bSig ) return propagateFloat32NaN( a, b );
1988433075b6Spvalchev         return packFloat32( zSign, 0, 0 );
1989433075b6Spvalchev     }
1990433075b6Spvalchev     if ( bExp == 0 ) {
1991433075b6Spvalchev         if ( bSig == 0 ) {
1992433075b6Spvalchev             if ( ( aExp | aSig ) == 0 ) {
1993433075b6Spvalchev                 float_raise( float_flag_invalid );
1994433075b6Spvalchev                 return float32_default_nan;
1995433075b6Spvalchev             }
1996433075b6Spvalchev             float_raise( float_flag_divbyzero );
1997433075b6Spvalchev             return packFloat32( zSign, 0xFF, 0 );
1998433075b6Spvalchev         }
1999433075b6Spvalchev         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2000433075b6Spvalchev     }
2001433075b6Spvalchev     if ( aExp == 0 ) {
2002433075b6Spvalchev         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
2003433075b6Spvalchev         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2004433075b6Spvalchev     }
2005433075b6Spvalchev     zExp = aExp - bExp + 0x7D;
2006433075b6Spvalchev     aSig = ( aSig | 0x00800000 )<<7;
2007433075b6Spvalchev     bSig = ( bSig | 0x00800000 )<<8;
2008433075b6Spvalchev     if ( bSig <= ( aSig + aSig ) ) {
2009433075b6Spvalchev         aSig >>= 1;
2010433075b6Spvalchev         ++zExp;
2011433075b6Spvalchev     }
2012433075b6Spvalchev     zSig = ( ( (bits64) aSig )<<32 ) / bSig;
2013433075b6Spvalchev     if ( ( zSig & 0x3F ) == 0 ) {
2014433075b6Spvalchev         zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
2015433075b6Spvalchev     }
2016433075b6Spvalchev     return roundAndPackFloat32( zSign, zExp, zSig );
2017433075b6Spvalchev 
2018433075b6Spvalchev }
2019433075b6Spvalchev 
2020433075b6Spvalchev #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
2021433075b6Spvalchev /*
2022433075b6Spvalchev -------------------------------------------------------------------------------
2023433075b6Spvalchev Returns the remainder of the single-precision floating-point value `a'
2024433075b6Spvalchev with respect to the corresponding value `b'.  The operation is performed
2025433075b6Spvalchev according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2026433075b6Spvalchev -------------------------------------------------------------------------------
2027433075b6Spvalchev */
float32_rem(float32 a,float32 b)2028433075b6Spvalchev float32 float32_rem( float32 a, float32 b )
2029433075b6Spvalchev {
2030433075b6Spvalchev     flag aSign, bSign, zSign;
2031433075b6Spvalchev     int16 aExp, bExp, expDiff;
2032433075b6Spvalchev     bits32 aSig, bSig;
2033433075b6Spvalchev     bits32 q;
2034433075b6Spvalchev     bits64 aSig64, bSig64, q64;
2035433075b6Spvalchev     bits32 alternateASig;
2036433075b6Spvalchev     sbits32 sigMean;
2037433075b6Spvalchev 
2038433075b6Spvalchev     aSig = extractFloat32Frac( a );
2039433075b6Spvalchev     aExp = extractFloat32Exp( a );
2040433075b6Spvalchev     aSign = extractFloat32Sign( a );
2041433075b6Spvalchev     bSig = extractFloat32Frac( b );
2042433075b6Spvalchev     bExp = extractFloat32Exp( b );
2043433075b6Spvalchev     bSign = extractFloat32Sign( b );
2044433075b6Spvalchev     if ( aExp == 0xFF ) {
2045433075b6Spvalchev         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
2046433075b6Spvalchev             return propagateFloat32NaN( a, b );
2047433075b6Spvalchev         }
2048433075b6Spvalchev         float_raise( float_flag_invalid );
2049433075b6Spvalchev         return float32_default_nan;
2050433075b6Spvalchev     }
2051433075b6Spvalchev     if ( bExp == 0xFF ) {
2052433075b6Spvalchev         if ( bSig ) return propagateFloat32NaN( a, b );
2053433075b6Spvalchev         return a;
2054433075b6Spvalchev     }
2055433075b6Spvalchev     if ( bExp == 0 ) {
2056433075b6Spvalchev         if ( bSig == 0 ) {
2057433075b6Spvalchev             float_raise( float_flag_invalid );
2058433075b6Spvalchev             return float32_default_nan;
2059433075b6Spvalchev         }
2060433075b6Spvalchev         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2061433075b6Spvalchev     }
2062433075b6Spvalchev     if ( aExp == 0 ) {
2063433075b6Spvalchev         if ( aSig == 0 ) return a;
2064433075b6Spvalchev         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2065433075b6Spvalchev     }
2066433075b6Spvalchev     expDiff = aExp - bExp;
2067433075b6Spvalchev     aSig |= 0x00800000;
2068433075b6Spvalchev     bSig |= 0x00800000;
2069433075b6Spvalchev     if ( expDiff < 32 ) {
2070433075b6Spvalchev         aSig <<= 8;
2071433075b6Spvalchev         bSig <<= 8;
2072433075b6Spvalchev         if ( expDiff < 0 ) {
2073433075b6Spvalchev             if ( expDiff < -1 ) return a;
2074433075b6Spvalchev             aSig >>= 1;
2075433075b6Spvalchev         }
2076433075b6Spvalchev         q = ( bSig <= aSig );
2077433075b6Spvalchev         if ( q ) aSig -= bSig;
2078433075b6Spvalchev         if ( 0 < expDiff ) {
2079433075b6Spvalchev             q = ( ( (bits64) aSig )<<32 ) / bSig;
2080433075b6Spvalchev             q >>= 32 - expDiff;
2081433075b6Spvalchev             bSig >>= 2;
2082433075b6Spvalchev             aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
2083433075b6Spvalchev         }
2084433075b6Spvalchev         else {
2085433075b6Spvalchev             aSig >>= 2;
2086433075b6Spvalchev             bSig >>= 2;
2087433075b6Spvalchev         }
2088433075b6Spvalchev     }
2089433075b6Spvalchev     else {
2090433075b6Spvalchev         if ( bSig <= aSig ) aSig -= bSig;
2091433075b6Spvalchev         aSig64 = ( (bits64) aSig )<<40;
2092433075b6Spvalchev         bSig64 = ( (bits64) bSig )<<40;
2093433075b6Spvalchev         expDiff -= 64;
2094433075b6Spvalchev         while ( 0 < expDiff ) {
2095433075b6Spvalchev             q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2096433075b6Spvalchev             q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2097433075b6Spvalchev             aSig64 = - ( ( bSig * q64 )<<38 );
2098433075b6Spvalchev             expDiff -= 62;
2099433075b6Spvalchev         }
2100433075b6Spvalchev         expDiff += 64;
2101433075b6Spvalchev         q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2102433075b6Spvalchev         q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2103433075b6Spvalchev         q = q64>>( 64 - expDiff );
2104433075b6Spvalchev         bSig <<= 6;
2105433075b6Spvalchev         aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
2106433075b6Spvalchev     }
2107433075b6Spvalchev     do {
2108433075b6Spvalchev         alternateASig = aSig;
2109433075b6Spvalchev         ++q;
2110433075b6Spvalchev         aSig -= bSig;
2111433075b6Spvalchev     } while ( 0 <= (sbits32) aSig );
2112433075b6Spvalchev     sigMean = aSig + alternateASig;
2113433075b6Spvalchev     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
2114433075b6Spvalchev         aSig = alternateASig;
2115433075b6Spvalchev     }
2116433075b6Spvalchev     zSign = ( (sbits32) aSig < 0 );
2117433075b6Spvalchev     if ( zSign ) aSig = - aSig;
2118433075b6Spvalchev     return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
2119433075b6Spvalchev 
2120433075b6Spvalchev }
2121433075b6Spvalchev #endif /* !SOFTFLOAT_FOR_GCC */
2122433075b6Spvalchev 
2123433075b6Spvalchev #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
2124433075b6Spvalchev /*
2125433075b6Spvalchev -------------------------------------------------------------------------------
2126433075b6Spvalchev Returns the square root of the single-precision floating-point value `a'.
2127433075b6Spvalchev The operation is performed according to the IEC/IEEE Standard for Binary
2128433075b6Spvalchev Floating-Point Arithmetic.
2129433075b6Spvalchev -------------------------------------------------------------------------------
2130433075b6Spvalchev */
float32_sqrt(float32 a)2131433075b6Spvalchev float32 float32_sqrt( float32 a )
2132433075b6Spvalchev {
2133433075b6Spvalchev     flag aSign;
2134433075b6Spvalchev     int16 aExp, zExp;
2135433075b6Spvalchev     bits32 aSig, zSig;
2136433075b6Spvalchev     bits64 rem, term;
2137433075b6Spvalchev 
2138433075b6Spvalchev     aSig = extractFloat32Frac( a );
2139433075b6Spvalchev     aExp = extractFloat32Exp( a );
2140433075b6Spvalchev     aSign = extractFloat32Sign( a );
2141433075b6Spvalchev     if ( aExp == 0xFF ) {
2142433075b6Spvalchev         if ( aSig ) return propagateFloat32NaN( a, 0 );
2143433075b6Spvalchev         if ( ! aSign ) return a;
2144433075b6Spvalchev         float_raise( float_flag_invalid );
2145433075b6Spvalchev         return float32_default_nan;
2146433075b6Spvalchev     }
2147433075b6Spvalchev     if ( aSign ) {
2148433075b6Spvalchev         if ( ( aExp | aSig ) == 0 ) return a;
2149433075b6Spvalchev         float_raise( float_flag_invalid );
2150433075b6Spvalchev         return float32_default_nan;
2151433075b6Spvalchev     }
2152433075b6Spvalchev     if ( aExp == 0 ) {
2153433075b6Spvalchev         if ( aSig == 0 ) return 0;
2154433075b6Spvalchev         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2155433075b6Spvalchev     }
2156433075b6Spvalchev     zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
2157433075b6Spvalchev     aSig = ( aSig | 0x00800000 )<<8;
2158433075b6Spvalchev     zSig = estimateSqrt32( aExp, aSig ) + 2;
2159433075b6Spvalchev     if ( ( zSig & 0x7F ) <= 5 ) {
2160433075b6Spvalchev         if ( zSig < 2 ) {
2161433075b6Spvalchev             zSig = 0x7FFFFFFF;
2162433075b6Spvalchev             goto roundAndPack;
2163433075b6Spvalchev         }
2164433075b6Spvalchev         aSig >>= aExp & 1;
2165433075b6Spvalchev         term = ( (bits64) zSig ) * zSig;
2166433075b6Spvalchev         rem = ( ( (bits64) aSig )<<32 ) - term;
2167433075b6Spvalchev         while ( (sbits64) rem < 0 ) {
2168433075b6Spvalchev             --zSig;
2169433075b6Spvalchev             rem += ( ( (bits64) zSig )<<1 ) | 1;
2170433075b6Spvalchev         }
2171433075b6Spvalchev         zSig |= ( rem != 0 );
2172433075b6Spvalchev     }
2173433075b6Spvalchev     shift32RightJamming( zSig, 1, &zSig );
2174433075b6Spvalchev  roundAndPack:
2175433075b6Spvalchev     return roundAndPackFloat32( 0, zExp, zSig );
2176433075b6Spvalchev 
2177433075b6Spvalchev }
2178433075b6Spvalchev #endif /* !SOFTFLOAT_FOR_GCC */
2179433075b6Spvalchev 
2180433075b6Spvalchev /*
2181433075b6Spvalchev -------------------------------------------------------------------------------
2182433075b6Spvalchev Returns 1 if the single-precision floating-point value `a' is equal to
2183433075b6Spvalchev the corresponding value `b', and 0 otherwise.  The comparison is performed
2184433075b6Spvalchev according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2185433075b6Spvalchev -------------------------------------------------------------------------------
2186433075b6Spvalchev */
float32_eq(float32 a,float32 b)2187433075b6Spvalchev flag float32_eq( float32 a, float32 b )
2188433075b6Spvalchev {
2189433075b6Spvalchev 
2190433075b6Spvalchev     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2191433075b6Spvalchev          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2192433075b6Spvalchev        ) {
2193433075b6Spvalchev         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2194433075b6Spvalchev             float_raise( float_flag_invalid );
2195433075b6Spvalchev         }
2196433075b6Spvalchev         return 0;
2197433075b6Spvalchev     }
2198433075b6Spvalchev     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
2199433075b6Spvalchev 
2200433075b6Spvalchev }
2201433075b6Spvalchev 
2202433075b6Spvalchev /*
2203433075b6Spvalchev -------------------------------------------------------------------------------
2204433075b6Spvalchev Returns 1 if the single-precision floating-point value `a' is less than
2205433075b6Spvalchev or equal to the corresponding value `b', and 0 otherwise.  The comparison
2206433075b6Spvalchev is performed according to the IEC/IEEE Standard for Binary Floating-Point
2207433075b6Spvalchev Arithmetic.
2208433075b6Spvalchev -------------------------------------------------------------------------------
2209433075b6Spvalchev */
float32_le(float32 a,float32 b)2210433075b6Spvalchev flag float32_le( float32 a, float32 b )
2211433075b6Spvalchev {
2212433075b6Spvalchev     flag aSign, bSign;
2213433075b6Spvalchev 
2214433075b6Spvalchev     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2215433075b6Spvalchev          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2216433075b6Spvalchev        ) {
2217433075b6Spvalchev         float_raise( float_flag_invalid );
2218433075b6Spvalchev         return 0;
2219433075b6Spvalchev     }
2220433075b6Spvalchev     aSign = extractFloat32Sign( a );
2221433075b6Spvalchev     bSign = extractFloat32Sign( b );
2222433075b6Spvalchev     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
2223433075b6Spvalchev     return ( a == b ) || ( aSign ^ ( a < b ) );
2224433075b6Spvalchev 
2225433075b6Spvalchev }
2226433075b6Spvalchev 
2227433075b6Spvalchev /*
2228433075b6Spvalchev -------------------------------------------------------------------------------
2229433075b6Spvalchev Returns 1 if the single-precision floating-point value `a' is less than
2230433075b6Spvalchev the corresponding value `b', and 0 otherwise.  The comparison is performed
2231433075b6Spvalchev according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2232433075b6Spvalchev -------------------------------------------------------------------------------
2233433075b6Spvalchev */
float32_lt(float32 a,float32 b)2234433075b6Spvalchev flag float32_lt( float32 a, float32 b )
2235433075b6Spvalchev {
2236433075b6Spvalchev     flag aSign, bSign;
2237433075b6Spvalchev 
2238433075b6Spvalchev     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2239433075b6Spvalchev          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2240433075b6Spvalchev        ) {
2241433075b6Spvalchev         float_raise( float_flag_invalid );
2242433075b6Spvalchev         return 0;
2243433075b6Spvalchev     }
2244433075b6Spvalchev     aSign = extractFloat32Sign( a );
2245433075b6Spvalchev     bSign = extractFloat32Sign( b );
2246433075b6Spvalchev     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
2247433075b6Spvalchev     return ( a != b ) && ( aSign ^ ( a < b ) );
2248433075b6Spvalchev 
2249433075b6Spvalchev }
2250433075b6Spvalchev 
2251433075b6Spvalchev #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
2252433075b6Spvalchev /*
2253433075b6Spvalchev -------------------------------------------------------------------------------
2254433075b6Spvalchev Returns 1 if the single-precision floating-point value `a' is equal to
2255433075b6Spvalchev the corresponding value `b', and 0 otherwise.  The invalid exception is
2256433075b6Spvalchev raised if either operand is a NaN.  Otherwise, the comparison is performed
2257433075b6Spvalchev according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2258433075b6Spvalchev -------------------------------------------------------------------------------
2259433075b6Spvalchev */
float32_eq_signaling(float32 a,float32 b)2260433075b6Spvalchev flag float32_eq_signaling( float32 a, float32 b )
2261433075b6Spvalchev {
2262433075b6Spvalchev 
2263433075b6Spvalchev     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2264433075b6Spvalchev          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2265433075b6Spvalchev        ) {
2266433075b6Spvalchev         float_raise( float_flag_invalid );
2267433075b6Spvalchev         return 0;
2268433075b6Spvalchev     }
2269433075b6Spvalchev     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
2270433075b6Spvalchev 
2271433075b6Spvalchev }
2272433075b6Spvalchev 
2273433075b6Spvalchev /*
2274433075b6Spvalchev -------------------------------------------------------------------------------
2275433075b6Spvalchev Returns 1 if the single-precision floating-point value `a' is less than or
2276433075b6Spvalchev equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
2277433075b6Spvalchev cause an exception.  Otherwise, the comparison is performed according to the
2278433075b6Spvalchev IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2279433075b6Spvalchev -------------------------------------------------------------------------------
2280433075b6Spvalchev */
float32_le_quiet(float32 a,float32 b)2281433075b6Spvalchev flag float32_le_quiet( float32 a, float32 b )
2282433075b6Spvalchev {
2283433075b6Spvalchev     flag aSign, bSign;
2284433075b6Spvalchev 
2285433075b6Spvalchev     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2286433075b6Spvalchev          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2287433075b6Spvalchev        ) {
2288433075b6Spvalchev         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2289433075b6Spvalchev             float_raise( float_flag_invalid );
2290433075b6Spvalchev         }
2291433075b6Spvalchev         return 0;
2292433075b6Spvalchev     }
2293433075b6Spvalchev     aSign = extractFloat32Sign( a );
2294433075b6Spvalchev     bSign = extractFloat32Sign( b );
2295433075b6Spvalchev     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
2296433075b6Spvalchev     return ( a == b ) || ( aSign ^ ( a < b ) );
2297433075b6Spvalchev 
2298433075b6Spvalchev }
2299433075b6Spvalchev 
2300433075b6Spvalchev /*
2301433075b6Spvalchev -------------------------------------------------------------------------------
2302433075b6Spvalchev Returns 1 if the single-precision floating-point value `a' is less than
2303433075b6Spvalchev the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
2304433075b6Spvalchev exception.  Otherwise, the comparison is performed according to the IEC/IEEE
2305433075b6Spvalchev Standard for Binary Floating-Point Arithmetic.
2306433075b6Spvalchev -------------------------------------------------------------------------------
2307433075b6Spvalchev */
float32_lt_quiet(float32 a,float32 b)2308433075b6Spvalchev flag float32_lt_quiet( float32 a, float32 b )
2309433075b6Spvalchev {
2310433075b6Spvalchev     flag aSign, bSign;
2311433075b6Spvalchev 
2312433075b6Spvalchev     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2313433075b6Spvalchev          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2314433075b6Spvalchev        ) {
2315433075b6Spvalchev         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2316433075b6Spvalchev             float_raise( float_flag_invalid );
2317433075b6Spvalchev         }
2318433075b6Spvalchev         return 0;
2319433075b6Spvalchev     }
2320433075b6Spvalchev     aSign = extractFloat32Sign( a );
2321433075b6Spvalchev     bSign = extractFloat32Sign( b );
2322433075b6Spvalchev     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
2323433075b6Spvalchev     return ( a != b ) && ( aSign ^ ( a < b ) );
2324433075b6Spvalchev 
2325433075b6Spvalchev }
2326433075b6Spvalchev #endif /* !SOFTFLOAT_FOR_GCC */
2327433075b6Spvalchev 
2328433075b6Spvalchev #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
2329433075b6Spvalchev /*
2330433075b6Spvalchev -------------------------------------------------------------------------------
2331433075b6Spvalchev Returns the result of converting the double-precision floating-point value
2332433075b6Spvalchev `a' to the 32-bit two's complement integer format.  The conversion is
2333433075b6Spvalchev performed according to the IEC/IEEE Standard for Binary Floating-Point
2334433075b6Spvalchev Arithmetic---which means in particular that the conversion is rounded
2335433075b6Spvalchev according to the current rounding mode.  If `a' is a NaN, the largest
2336433075b6Spvalchev positive integer is returned.  Otherwise, if the conversion overflows, the
2337433075b6Spvalchev largest integer with the same sign as `a' is returned.
2338433075b6Spvalchev -------------------------------------------------------------------------------
2339433075b6Spvalchev */
float64_to_int32(float64 a)2340433075b6Spvalchev int32 float64_to_int32( float64 a )
2341433075b6Spvalchev {
2342433075b6Spvalchev     flag aSign;
2343433075b6Spvalchev     int16 aExp, shiftCount;
2344433075b6Spvalchev     bits64 aSig;
2345433075b6Spvalchev 
2346433075b6Spvalchev     aSig = extractFloat64Frac( a );
2347433075b6Spvalchev     aExp = extractFloat64Exp( a );
2348433075b6Spvalchev     aSign = extractFloat64Sign( a );
2349433075b6Spvalchev     if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2350433075b6Spvalchev     if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2351433075b6Spvalchev     shiftCount = 0x42C - aExp;
2352433075b6Spvalchev     if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
2353433075b6Spvalchev     return roundAndPackInt32( aSign, aSig );
2354433075b6Spvalchev 
2355433075b6Spvalchev }
2356433075b6Spvalchev #endif /* !SOFTFLOAT_FOR_GCC */
2357433075b6Spvalchev 
2358433075b6Spvalchev /*
2359433075b6Spvalchev -------------------------------------------------------------------------------
2360433075b6Spvalchev Returns the result of converting the double-precision floating-point value
2361433075b6Spvalchev `a' to the 32-bit two's complement integer format.  The conversion is
2362433075b6Spvalchev performed according to the IEC/IEEE Standard for Binary Floating-Point
2363433075b6Spvalchev Arithmetic, except that the conversion is always rounded toward zero.
2364433075b6Spvalchev If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2365433075b6Spvalchev the conversion overflows, the largest integer with the same sign as `a' is
2366433075b6Spvalchev returned.
2367433075b6Spvalchev -------------------------------------------------------------------------------
2368433075b6Spvalchev */
float64_to_int32_round_to_zero(float64 a)2369433075b6Spvalchev int32 float64_to_int32_round_to_zero( float64 a )
2370433075b6Spvalchev {
2371433075b6Spvalchev     flag aSign;
2372433075b6Spvalchev     int16 aExp, shiftCount;
2373433075b6Spvalchev     bits64 aSig, savedASig;
2374433075b6Spvalchev     int32 z;
2375433075b6Spvalchev 
2376433075b6Spvalchev     aSig = extractFloat64Frac( a );
2377433075b6Spvalchev     aExp = extractFloat64Exp( a );
2378433075b6Spvalchev     aSign = extractFloat64Sign( a );
2379433075b6Spvalchev     if ( 0x41E < aExp ) {
2380433075b6Spvalchev         if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2381433075b6Spvalchev         goto invalid;
2382433075b6Spvalchev     }
2383433075b6Spvalchev     else if ( aExp < 0x3FF ) {
2384433075b6Spvalchev         if ( aExp || aSig ) float_set_inexact();
2385433075b6Spvalchev         return 0;
2386433075b6Spvalchev     }
2387433075b6Spvalchev     aSig |= LIT64( 0x0010000000000000 );
2388433075b6Spvalchev     shiftCount = 0x433 - aExp;
2389433075b6Spvalchev     savedASig = aSig;
2390433075b6Spvalchev     aSig >>= shiftCount;
2391433075b6Spvalchev     z = aSig;
2392433075b6Spvalchev     if ( aSign ) z = - z;
2393433075b6Spvalchev     if ( ( z < 0 ) ^ aSign ) {
2394433075b6Spvalchev  invalid:
2395433075b6Spvalchev         float_raise( float_flag_invalid );
2396433075b6Spvalchev         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
2397433075b6Spvalchev     }
2398433075b6Spvalchev     if ( ( aSig<<shiftCount ) != savedASig ) {
2399433075b6Spvalchev         float_set_inexact();
2400433075b6Spvalchev     }
2401433075b6Spvalchev     return z;
2402433075b6Spvalchev 
2403433075b6Spvalchev }
2404433075b6Spvalchev 
2405433075b6Spvalchev #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
2406433075b6Spvalchev /*
2407433075b6Spvalchev -------------------------------------------------------------------------------
2408433075b6Spvalchev Returns the result of converting the double-precision floating-point value
2409433075b6Spvalchev `a' to the 64-bit two's complement integer format.  The conversion is
2410433075b6Spvalchev performed according to the IEC/IEEE Standard for Binary Floating-Point
2411433075b6Spvalchev Arithmetic---which means in particular that the conversion is rounded
2412433075b6Spvalchev according to the current rounding mode.  If `a' is a NaN, the largest
2413433075b6Spvalchev positive integer is returned.  Otherwise, if the conversion overflows, the
2414433075b6Spvalchev largest integer with the same sign as `a' is returned.
2415433075b6Spvalchev -------------------------------------------------------------------------------
2416433075b6Spvalchev */
float64_to_int64(float64 a)2417433075b6Spvalchev int64 float64_to_int64( float64 a )
2418433075b6Spvalchev {
2419433075b6Spvalchev     flag aSign;
2420433075b6Spvalchev     int16 aExp, shiftCount;
2421433075b6Spvalchev     bits64 aSig, aSigExtra;
2422433075b6Spvalchev 
2423433075b6Spvalchev     aSig = extractFloat64Frac( a );
2424433075b6Spvalchev     aExp = extractFloat64Exp( a );
2425433075b6Spvalchev     aSign = extractFloat64Sign( a );
2426433075b6Spvalchev     if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2427433075b6Spvalchev     shiftCount = 0x433 - aExp;
2428433075b6Spvalchev     if ( shiftCount <= 0 ) {
2429433075b6Spvalchev         if ( 0x43E < aExp ) {
2430433075b6Spvalchev             float_raise( float_flag_invalid );
2431433075b6Spvalchev             if (    ! aSign
2432433075b6Spvalchev                  || (    ( aExp == 0x7FF )
2433433075b6Spvalchev                       && ( aSig != LIT64( 0x0010000000000000 ) ) )
2434433075b6Spvalchev                ) {
2435433075b6Spvalchev                 return LIT64( 0x7FFFFFFFFFFFFFFF );
2436433075b6Spvalchev             }
2437433075b6Spvalchev             return (sbits64) LIT64( 0x8000000000000000 );
2438433075b6Spvalchev         }
2439433075b6Spvalchev         aSigExtra = 0;
2440433075b6Spvalchev         aSig <<= - shiftCount;
2441433075b6Spvalchev     }
2442433075b6Spvalchev     else {
2443433075b6Spvalchev         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
2444433075b6Spvalchev     }
2445433075b6Spvalchev     return roundAndPackInt64( aSign, aSig, aSigExtra );
2446433075b6Spvalchev 
2447433075b6Spvalchev }
2448433075b6Spvalchev 
2449a657e0a7Smartynas #ifdef __alpha__
2450a657e0a7Smartynas /*
2451a657e0a7Smartynas -------------------------------------------------------------------------------
2452a657e0a7Smartynas Returns the result of converting the double-precision floating-point value
2453a657e0a7Smartynas `a' to the 64-bit two's complement integer format.  The conversion is
2454a657e0a7Smartynas performed according to the IEC/IEEE Standard for Binary Floating-Point
2455a657e0a7Smartynas Arithmetic---which means in particular that the conversion is rounded
2456a657e0a7Smartynas according to the current rounding mode.  If `a' is a NaN, the invalid
2457a657e0a7Smartynas exception is raised and zero is returned.
2458a657e0a7Smartynas -------------------------------------------------------------------------------
2459a657e0a7Smartynas */
float64_to_int64_no_overflow(float64 a)2460a657e0a7Smartynas int64 float64_to_int64_no_overflow( float64 a )
2461a657e0a7Smartynas {
2462a657e0a7Smartynas     flag aSign;
2463a657e0a7Smartynas     int16 aExp, shiftCount;
2464a657e0a7Smartynas     bits64 aSig, aSigExtra;
2465a657e0a7Smartynas 
2466a657e0a7Smartynas     aSig = extractFloat64Frac( a );
2467a657e0a7Smartynas     aExp = extractFloat64Exp( a );
2468a657e0a7Smartynas     aSign = extractFloat64Sign( a );
2469a657e0a7Smartynas     if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2470a657e0a7Smartynas     shiftCount = 0x433 - aExp;
2471a657e0a7Smartynas     if ( shiftCount <= 0 ) {
2472a657e0a7Smartynas         if ( 0x43E < aExp ) {
2473a657e0a7Smartynas             float_raise( float_flag_invalid );
2474a657e0a7Smartynas             return 0;
2475a657e0a7Smartynas         }
2476a657e0a7Smartynas         aSigExtra = 0;
2477a657e0a7Smartynas         aSig <<= - shiftCount;
2478a657e0a7Smartynas     }
2479a657e0a7Smartynas     else {
2480a657e0a7Smartynas         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
2481a657e0a7Smartynas     }
2482a657e0a7Smartynas     return roundAndPackInt64NoOverflow( aSign, aSig, aSigExtra );
2483a657e0a7Smartynas 
2484a657e0a7Smartynas }
2485a657e0a7Smartynas #endif /* __alpha__ */
2486a657e0a7Smartynas 
2487433075b6Spvalchev /*
2488433075b6Spvalchev -------------------------------------------------------------------------------
2489433075b6Spvalchev Returns the result of converting the double-precision floating-point value
2490433075b6Spvalchev `a' to the 64-bit two's complement integer format.  The conversion is
2491433075b6Spvalchev performed according to the IEC/IEEE Standard for Binary Floating-Point
2492433075b6Spvalchev Arithmetic, except that the conversion is always rounded toward zero.
2493433075b6Spvalchev If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2494433075b6Spvalchev the conversion overflows, the largest integer with the same sign as `a' is
2495433075b6Spvalchev returned.
2496433075b6Spvalchev -------------------------------------------------------------------------------
2497433075b6Spvalchev */
float64_to_int64_round_to_zero(float64 a)2498433075b6Spvalchev int64 float64_to_int64_round_to_zero( float64 a )
2499433075b6Spvalchev {
2500433075b6Spvalchev     flag aSign;
2501433075b6Spvalchev     int16 aExp, shiftCount;
2502433075b6Spvalchev     bits64 aSig;
2503433075b6Spvalchev     int64 z;
2504433075b6Spvalchev 
2505433075b6Spvalchev     aSig = extractFloat64Frac( a );
2506433075b6Spvalchev     aExp = extractFloat64Exp( a );
2507433075b6Spvalchev     aSign = extractFloat64Sign( a );
2508433075b6Spvalchev     if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2509433075b6Spvalchev     shiftCount = aExp - 0x433;
2510433075b6Spvalchev     if ( 0 <= shiftCount ) {
2511433075b6Spvalchev         if ( 0x43E <= aExp ) {
2512433075b6Spvalchev             if ( a != LIT64( 0xC3E0000000000000 ) ) {
2513433075b6Spvalchev                 float_raise( float_flag_invalid );
2514433075b6Spvalchev                 if (    ! aSign
2515433075b6Spvalchev                      || (    ( aExp == 0x7FF )
2516433075b6Spvalchev                           && ( aSig != LIT64( 0x0010000000000000 ) ) )
2517433075b6Spvalchev                    ) {
2518433075b6Spvalchev                     return LIT64( 0x7FFFFFFFFFFFFFFF );
2519433075b6Spvalchev                 }
2520433075b6Spvalchev             }
2521433075b6Spvalchev             return (sbits64) LIT64( 0x8000000000000000 );
2522433075b6Spvalchev         }
2523433075b6Spvalchev         z = aSig<<shiftCount;
2524433075b6Spvalchev     }
2525433075b6Spvalchev     else {
2526433075b6Spvalchev         if ( aExp < 0x3FE ) {
2527433075b6Spvalchev             if ( aExp | aSig ) float_set_inexact();
2528433075b6Spvalchev             return 0;
2529433075b6Spvalchev         }
2530433075b6Spvalchev         z = aSig>>( - shiftCount );
2531433075b6Spvalchev         if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
2532433075b6Spvalchev             float_set_inexact();
2533433075b6Spvalchev         }
2534433075b6Spvalchev     }
2535433075b6Spvalchev     if ( aSign ) z = - z;
2536433075b6Spvalchev     return z;
2537433075b6Spvalchev 
2538433075b6Spvalchev }
2539433075b6Spvalchev #endif /* !SOFTFLOAT_FOR_GCC */
2540433075b6Spvalchev 
2541433075b6Spvalchev /*
2542433075b6Spvalchev -------------------------------------------------------------------------------
2543433075b6Spvalchev Returns the result of converting the double-precision floating-point value
2544433075b6Spvalchev `a' to the single-precision floating-point format.  The conversion is
2545433075b6Spvalchev performed according to the IEC/IEEE Standard for Binary Floating-Point
2546433075b6Spvalchev Arithmetic.
2547433075b6Spvalchev -------------------------------------------------------------------------------
2548433075b6Spvalchev */
float64_to_float32(float64 a)2549433075b6Spvalchev float32 float64_to_float32( float64 a )
2550433075b6Spvalchev {
2551433075b6Spvalchev     flag aSign;
2552433075b6Spvalchev     int16 aExp;
2553433075b6Spvalchev     bits64 aSig;
2554433075b6Spvalchev     bits32 zSig;
2555433075b6Spvalchev 
2556433075b6Spvalchev     aSig = extractFloat64Frac( a );
2557433075b6Spvalchev     aExp = extractFloat64Exp( a );
2558433075b6Spvalchev     aSign = extractFloat64Sign( a );
2559433075b6Spvalchev     if ( aExp == 0x7FF ) {
2560433075b6Spvalchev         if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
2561433075b6Spvalchev         return packFloat32( aSign, 0xFF, 0 );
2562433075b6Spvalchev     }
2563433075b6Spvalchev     shift64RightJamming( aSig, 22, &aSig );
2564433075b6Spvalchev     zSig = aSig;
2565433075b6Spvalchev     if ( aExp || zSig ) {
2566433075b6Spvalchev         zSig |= 0x40000000;
2567433075b6Spvalchev         aExp -= 0x381;
2568433075b6Spvalchev     }
2569433075b6Spvalchev     return roundAndPackFloat32( aSign, aExp, zSig );
2570433075b6Spvalchev 
2571433075b6Spvalchev }
2572433075b6Spvalchev 
2573433075b6Spvalchev #ifdef FLOATX80
2574433075b6Spvalchev 
2575433075b6Spvalchev /*
2576433075b6Spvalchev -------------------------------------------------------------------------------
2577433075b6Spvalchev Returns the result of converting the double-precision floating-point value
2578433075b6Spvalchev `a' to the extended double-precision floating-point format.  The conversion
2579433075b6Spvalchev is performed according to the IEC/IEEE Standard for Binary Floating-Point
2580433075b6Spvalchev Arithmetic.
2581433075b6Spvalchev -------------------------------------------------------------------------------
2582433075b6Spvalchev */
float64_to_floatx80(float64 a)2583433075b6Spvalchev floatx80 float64_to_floatx80( float64 a )
2584433075b6Spvalchev {
2585433075b6Spvalchev     flag aSign;
2586433075b6Spvalchev     int16 aExp;
2587433075b6Spvalchev     bits64 aSig;
2588433075b6Spvalchev 
2589433075b6Spvalchev     aSig = extractFloat64Frac( a );
2590433075b6Spvalchev     aExp = extractFloat64Exp( a );
2591433075b6Spvalchev     aSign = extractFloat64Sign( a );
2592433075b6Spvalchev     if ( aExp == 0x7FF ) {
2593433075b6Spvalchev         if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
2594433075b6Spvalchev         return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2595433075b6Spvalchev     }
2596433075b6Spvalchev     if ( aExp == 0 ) {
2597433075b6Spvalchev         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
2598433075b6Spvalchev         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2599433075b6Spvalchev     }
2600433075b6Spvalchev     return
2601433075b6Spvalchev         packFloatx80(
2602433075b6Spvalchev             aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
2603433075b6Spvalchev 
2604433075b6Spvalchev }
2605433075b6Spvalchev 
2606433075b6Spvalchev #endif
2607433075b6Spvalchev 
2608433075b6Spvalchev #ifdef FLOAT128
2609433075b6Spvalchev 
2610433075b6Spvalchev /*
2611433075b6Spvalchev -------------------------------------------------------------------------------
2612433075b6Spvalchev Returns the result of converting the double-precision floating-point value
2613433075b6Spvalchev `a' to the quadruple-precision floating-point format.  The conversion is
2614433075b6Spvalchev performed according to the IEC/IEEE Standard for Binary Floating-Point
2615433075b6Spvalchev Arithmetic.
2616433075b6Spvalchev -------------------------------------------------------------------------------
2617433075b6Spvalchev */
float64_to_float128(float64 a)2618433075b6Spvalchev float128 float64_to_float128( float64 a )
2619433075b6Spvalchev {
2620433075b6Spvalchev     flag aSign;
2621433075b6Spvalchev     int16 aExp;
2622433075b6Spvalchev     bits64 aSig, zSig0, zSig1;
2623433075b6Spvalchev 
2624433075b6Spvalchev     aSig = extractFloat64Frac( a );
2625433075b6Spvalchev     aExp = extractFloat64Exp( a );
2626433075b6Spvalchev     aSign = extractFloat64Sign( a );
2627433075b6Spvalchev     if ( aExp == 0x7FF ) {
2628433075b6Spvalchev         if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a ) );
2629433075b6Spvalchev         return packFloat128( aSign, 0x7FFF, 0, 0 );
2630433075b6Spvalchev     }
2631433075b6Spvalchev     if ( aExp == 0 ) {
2632433075b6Spvalchev         if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
2633433075b6Spvalchev         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2634433075b6Spvalchev         --aExp;
2635433075b6Spvalchev     }
2636433075b6Spvalchev     shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
2637433075b6Spvalchev     return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
2638433075b6Spvalchev 
2639433075b6Spvalchev }
2640433075b6Spvalchev 
2641433075b6Spvalchev #endif
2642433075b6Spvalchev 
2643433075b6Spvalchev #ifndef SOFTFLOAT_FOR_GCC
2644433075b6Spvalchev /*
2645433075b6Spvalchev -------------------------------------------------------------------------------
2646433075b6Spvalchev Rounds the double-precision floating-point value `a' to an integer, and
2647433075b6Spvalchev returns the result as a double-precision floating-point value.  The
2648433075b6Spvalchev operation is performed according to the IEC/IEEE Standard for Binary
2649433075b6Spvalchev Floating-Point Arithmetic.
2650433075b6Spvalchev -------------------------------------------------------------------------------
2651433075b6Spvalchev */
float64_round_to_int(float64 a)2652433075b6Spvalchev float64 float64_round_to_int( float64 a )
2653433075b6Spvalchev {
2654433075b6Spvalchev     flag aSign;
2655433075b6Spvalchev     int16 aExp;
2656433075b6Spvalchev     bits64 lastBitMask, roundBitsMask;
2657433075b6Spvalchev     int8 roundingMode;
2658433075b6Spvalchev     float64 z;
2659433075b6Spvalchev 
2660433075b6Spvalchev     aExp = extractFloat64Exp( a );
2661433075b6Spvalchev     if ( 0x433 <= aExp ) {
2662433075b6Spvalchev         if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
2663433075b6Spvalchev             return propagateFloat64NaN( a, a );
2664433075b6Spvalchev         }
2665433075b6Spvalchev         return a;
2666433075b6Spvalchev     }
2667433075b6Spvalchev     if ( aExp < 0x3FF ) {
2668433075b6Spvalchev         if ( (bits64) ( a<<1 ) == 0 ) return a;
2669433075b6Spvalchev         float_set_inexact();
2670433075b6Spvalchev         aSign = extractFloat64Sign( a );
2671433075b6Spvalchev         switch ( float_rounding_mode() ) {
2672433075b6Spvalchev          case float_round_nearest_even:
2673433075b6Spvalchev             if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
2674433075b6Spvalchev                 return packFloat64( aSign, 0x3FF, 0 );
2675433075b6Spvalchev             }
2676433075b6Spvalchev             break;
2677433075b6Spvalchev          case float_round_down:
2678433075b6Spvalchev             return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
2679433075b6Spvalchev          case float_round_up:
2680433075b6Spvalchev             return
2681433075b6Spvalchev             aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
2682433075b6Spvalchev         }
2683433075b6Spvalchev         return packFloat64( aSign, 0, 0 );
2684433075b6Spvalchev     }
2685433075b6Spvalchev     lastBitMask = 1;
2686433075b6Spvalchev     lastBitMask <<= 0x433 - aExp;
2687433075b6Spvalchev     roundBitsMask = lastBitMask - 1;
2688433075b6Spvalchev     z = a;
2689433075b6Spvalchev     roundingMode = float_rounding_mode();
2690433075b6Spvalchev     if ( roundingMode == float_round_nearest_even ) {
2691433075b6Spvalchev         z += lastBitMask>>1;
2692433075b6Spvalchev         if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
2693433075b6Spvalchev     }
2694433075b6Spvalchev     else if ( roundingMode != float_round_to_zero ) {
2695433075b6Spvalchev         if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
2696433075b6Spvalchev             z += roundBitsMask;
2697433075b6Spvalchev         }
2698433075b6Spvalchev     }
2699433075b6Spvalchev     z &= ~ roundBitsMask;
2700433075b6Spvalchev     if ( z != a ) float_set_inexact();
2701433075b6Spvalchev     return z;
2702433075b6Spvalchev 
2703433075b6Spvalchev }
2704433075b6Spvalchev #endif
2705433075b6Spvalchev 
2706433075b6Spvalchev /*
2707433075b6Spvalchev -------------------------------------------------------------------------------
2708433075b6Spvalchev Returns the result of adding the absolute values of the double-precision
2709433075b6Spvalchev floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
2710433075b6Spvalchev before being returned.  `zSign' is ignored if the result is a NaN.
2711433075b6Spvalchev The addition is performed according to the IEC/IEEE Standard for Binary
2712433075b6Spvalchev Floating-Point Arithmetic.
2713433075b6Spvalchev -------------------------------------------------------------------------------
2714433075b6Spvalchev */
addFloat64Sigs(float64 a,float64 b,flag zSign)2715433075b6Spvalchev static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
2716433075b6Spvalchev {
2717433075b6Spvalchev     int16 aExp, bExp, zExp;
2718433075b6Spvalchev     bits64 aSig, bSig, zSig;
2719433075b6Spvalchev     int16 expDiff;
2720433075b6Spvalchev 
2721433075b6Spvalchev     aSig = extractFloat64Frac( a );
2722433075b6Spvalchev     aExp = extractFloat64Exp( a );
2723433075b6Spvalchev     bSig = extractFloat64Frac( b );
2724433075b6Spvalchev     bExp = extractFloat64Exp( b );
2725433075b6Spvalchev     expDiff = aExp - bExp;
2726433075b6Spvalchev     aSig <<= 9;
2727433075b6Spvalchev     bSig <<= 9;
2728433075b6Spvalchev     if ( 0 < expDiff ) {
2729433075b6Spvalchev         if ( aExp == 0x7FF ) {
2730433075b6Spvalchev             if ( aSig ) return propagateFloat64NaN( a, b );
2731433075b6Spvalchev             return a;
2732433075b6Spvalchev         }
2733433075b6Spvalchev         if ( bExp == 0 ) {
2734433075b6Spvalchev             --expDiff;
2735433075b6Spvalchev         }
2736433075b6Spvalchev         else {
2737433075b6Spvalchev             bSig |= LIT64( 0x2000000000000000 );
2738433075b6Spvalchev         }
2739433075b6Spvalchev         shift64RightJamming( bSig, expDiff, &bSig );
2740433075b6Spvalchev         zExp = aExp;
2741433075b6Spvalchev     }
2742433075b6Spvalchev     else if ( expDiff < 0 ) {
2743433075b6Spvalchev         if ( bExp == 0x7FF ) {
2744433075b6Spvalchev             if ( bSig ) return propagateFloat64NaN( a, b );
2745433075b6Spvalchev             return packFloat64( zSign, 0x7FF, 0 );
2746433075b6Spvalchev         }
2747433075b6Spvalchev         if ( aExp == 0 ) {
2748433075b6Spvalchev             ++expDiff;
2749433075b6Spvalchev         }
2750433075b6Spvalchev         else {
2751433075b6Spvalchev             aSig |= LIT64( 0x2000000000000000 );
2752433075b6Spvalchev         }
2753433075b6Spvalchev         shift64RightJamming( aSig, - expDiff, &aSig );
2754433075b6Spvalchev         zExp = bExp;
2755433075b6Spvalchev     }
2756433075b6Spvalchev     else {
2757433075b6Spvalchev         if ( aExp == 0x7FF ) {
2758433075b6Spvalchev             if ( aSig | bSig ) return propagateFloat64NaN( a, b );
2759433075b6Spvalchev             return a;
2760433075b6Spvalchev         }
2761433075b6Spvalchev         if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
2762433075b6Spvalchev         zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
2763433075b6Spvalchev         zExp = aExp;
2764433075b6Spvalchev         goto roundAndPack;
2765433075b6Spvalchev     }
2766433075b6Spvalchev     aSig |= LIT64( 0x2000000000000000 );
2767433075b6Spvalchev     zSig = ( aSig + bSig )<<1;
2768433075b6Spvalchev     --zExp;
2769433075b6Spvalchev     if ( (sbits64) zSig < 0 ) {
2770433075b6Spvalchev         zSig = aSig + bSig;
2771433075b6Spvalchev         ++zExp;
2772433075b6Spvalchev     }
2773433075b6Spvalchev  roundAndPack:
2774433075b6Spvalchev     return roundAndPackFloat64( zSign, zExp, zSig );
2775433075b6Spvalchev 
2776433075b6Spvalchev }
2777433075b6Spvalchev 
2778433075b6Spvalchev /*
2779433075b6Spvalchev -------------------------------------------------------------------------------
2780433075b6Spvalchev Returns the result of subtracting the absolute values of the double-
2781433075b6Spvalchev precision floating-point values `a' and `b'.  If `zSign' is 1, the
2782433075b6Spvalchev difference is negated before being returned.  `zSign' is ignored if the
2783433075b6Spvalchev result is a NaN.  The subtraction is performed according to the IEC/IEEE
2784433075b6Spvalchev Standard for Binary Floating-Point Arithmetic.
2785433075b6Spvalchev -------------------------------------------------------------------------------
2786433075b6Spvalchev */
subFloat64Sigs(float64 a,float64 b,flag zSign)2787433075b6Spvalchev static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
2788433075b6Spvalchev {
2789433075b6Spvalchev     int16 aExp, bExp, zExp;
2790433075b6Spvalchev     bits64 aSig, bSig, zSig;
2791433075b6Spvalchev     int16 expDiff;
2792433075b6Spvalchev 
2793433075b6Spvalchev     aSig = extractFloat64Frac( a );
2794433075b6Spvalchev     aExp = extractFloat64Exp( a );
2795433075b6Spvalchev     bSig = extractFloat64Frac( b );
2796433075b6Spvalchev     bExp = extractFloat64Exp( b );
2797433075b6Spvalchev     expDiff = aExp - bExp;
2798433075b6Spvalchev     aSig <<= 10;
2799433075b6Spvalchev     bSig <<= 10;
2800433075b6Spvalchev     if ( 0 < expDiff ) goto aExpBigger;
2801433075b6Spvalchev     if ( expDiff < 0 ) goto bExpBigger;
2802433075b6Spvalchev     if ( aExp == 0x7FF ) {
2803433075b6Spvalchev         if ( aSig | bSig ) return propagateFloat64NaN( a, b );
2804433075b6Spvalchev         float_raise( float_flag_invalid );
2805433075b6Spvalchev         return float64_default_nan;
2806433075b6Spvalchev     }
2807433075b6Spvalchev     if ( aExp == 0 ) {
2808433075b6Spvalchev         aExp = 1;
2809433075b6Spvalchev         bExp = 1;
2810433075b6Spvalchev     }
2811433075b6Spvalchev     if ( bSig < aSig ) goto aBigger;
2812433075b6Spvalchev     if ( aSig < bSig ) goto bBigger;
2813433075b6Spvalchev     return packFloat64( float_rounding_mode() == float_round_down, 0, 0 );
2814433075b6Spvalchev  bExpBigger:
2815433075b6Spvalchev     if ( bExp == 0x7FF ) {
2816433075b6Spvalchev         if ( bSig ) return propagateFloat64NaN( a, b );
2817433075b6Spvalchev         return packFloat64( zSign ^ 1, 0x7FF, 0 );
2818433075b6Spvalchev     }
2819433075b6Spvalchev     if ( aExp == 0 ) {
2820433075b6Spvalchev         ++expDiff;
2821433075b6Spvalchev     }
2822433075b6Spvalchev     else {
2823433075b6Spvalchev         aSig |= LIT64( 0x4000000000000000 );
2824433075b6Spvalchev     }
2825433075b6Spvalchev     shift64RightJamming( aSig, - expDiff, &aSig );
2826433075b6Spvalchev     bSig |= LIT64( 0x4000000000000000 );
2827433075b6Spvalchev  bBigger:
2828433075b6Spvalchev     zSig = bSig - aSig;
2829433075b6Spvalchev     zExp = bExp;
2830433075b6Spvalchev     zSign ^= 1;
2831433075b6Spvalchev     goto normalizeRoundAndPack;
2832433075b6Spvalchev  aExpBigger:
2833433075b6Spvalchev     if ( aExp == 0x7FF ) {
2834433075b6Spvalchev         if ( aSig ) return propagateFloat64NaN( a, b );
2835433075b6Spvalchev         return a;
2836433075b6Spvalchev     }
2837433075b6Spvalchev     if ( bExp == 0 ) {
2838433075b6Spvalchev         --expDiff;
2839433075b6Spvalchev     }
2840433075b6Spvalchev     else {
2841433075b6Spvalchev         bSig |= LIT64( 0x4000000000000000 );
2842433075b6Spvalchev     }
2843433075b6Spvalchev     shift64RightJamming( bSig, expDiff, &bSig );
2844433075b6Spvalchev     aSig |= LIT64( 0x4000000000000000 );
2845433075b6Spvalchev  aBigger:
2846433075b6Spvalchev     zSig = aSig - bSig;
2847433075b6Spvalchev     zExp = aExp;
2848433075b6Spvalchev  normalizeRoundAndPack:
2849433075b6Spvalchev     --zExp;
2850433075b6Spvalchev     return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
2851433075b6Spvalchev 
2852433075b6Spvalchev }
2853433075b6Spvalchev 
2854433075b6Spvalchev /*
2855433075b6Spvalchev -------------------------------------------------------------------------------
2856433075b6Spvalchev Returns the result of adding the double-precision floating-point values `a'
2857433075b6Spvalchev and `b'.  The operation is performed according to the IEC/IEEE Standard for
2858433075b6Spvalchev Binary Floating-Point Arithmetic.
2859433075b6Spvalchev -------------------------------------------------------------------------------
2860433075b6Spvalchev */
float64_add(float64 a,float64 b)2861433075b6Spvalchev float64 float64_add( float64 a, float64 b )
2862433075b6Spvalchev {
2863433075b6Spvalchev     flag aSign, bSign;
2864433075b6Spvalchev 
2865433075b6Spvalchev     aSign = extractFloat64Sign( a );
2866433075b6Spvalchev     bSign = extractFloat64Sign( b );
2867433075b6Spvalchev     if ( aSign == bSign ) {
2868433075b6Spvalchev         return addFloat64Sigs( a, b, aSign );
2869433075b6Spvalchev     }
2870433075b6Spvalchev     else {
2871433075b6Spvalchev         return subFloat64Sigs( a, b, aSign );
2872433075b6Spvalchev     }
2873433075b6Spvalchev 
2874433075b6Spvalchev }
2875433075b6Spvalchev 
2876433075b6Spvalchev /*
2877433075b6Spvalchev -------------------------------------------------------------------------------
2878433075b6Spvalchev Returns the result of subtracting the double-precision floating-point values
2879433075b6Spvalchev `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
2880433075b6Spvalchev for Binary Floating-Point Arithmetic.
2881433075b6Spvalchev -------------------------------------------------------------------------------
2882433075b6Spvalchev */
float64_sub(float64 a,float64 b)2883433075b6Spvalchev float64 float64_sub( float64 a, float64 b )
2884433075b6Spvalchev {
2885433075b6Spvalchev     flag aSign, bSign;
2886433075b6Spvalchev 
2887433075b6Spvalchev     aSign = extractFloat64Sign( a );
2888433075b6Spvalchev     bSign = extractFloat64Sign( b );
2889433075b6Spvalchev     if ( aSign == bSign ) {
2890433075b6Spvalchev         return subFloat64Sigs( a, b, aSign );
2891433075b6Spvalchev     }
2892433075b6Spvalchev     else {
2893433075b6Spvalchev         return addFloat64Sigs( a, b, aSign );
2894433075b6Spvalchev     }
2895433075b6Spvalchev 
2896433075b6Spvalchev }
2897433075b6Spvalchev 
2898433075b6Spvalchev /*
2899433075b6Spvalchev -------------------------------------------------------------------------------
2900433075b6Spvalchev Returns the result of multiplying the double-precision floating-point values
2901433075b6Spvalchev `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
2902433075b6Spvalchev for Binary Floating-Point Arithmetic.
2903433075b6Spvalchev -------------------------------------------------------------------------------
2904433075b6Spvalchev */
float64_mul(float64 a,float64 b)2905433075b6Spvalchev float64 float64_mul( float64 a, float64 b )
2906433075b6Spvalchev {
2907433075b6Spvalchev     flag aSign, bSign, zSign;
2908433075b6Spvalchev     int16 aExp, bExp, zExp;
2909433075b6Spvalchev     bits64 aSig, bSig, zSig0, zSig1;
2910433075b6Spvalchev 
2911433075b6Spvalchev     aSig = extractFloat64Frac( a );
2912433075b6Spvalchev     aExp = extractFloat64Exp( a );
2913433075b6Spvalchev     aSign = extractFloat64Sign( a );
2914433075b6Spvalchev     bSig = extractFloat64Frac( b );
2915433075b6Spvalchev     bExp = extractFloat64Exp( b );
2916433075b6Spvalchev     bSign = extractFloat64Sign( b );
2917433075b6Spvalchev     zSign = aSign ^ bSign;
2918433075b6Spvalchev     if ( aExp == 0x7FF ) {
2919433075b6Spvalchev         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
2920433075b6Spvalchev             return propagateFloat64NaN( a, b );
2921433075b6Spvalchev         }
2922433075b6Spvalchev         if ( ( bExp | bSig ) == 0 ) {
2923433075b6Spvalchev             float_raise( float_flag_invalid );
2924433075b6Spvalchev             return float64_default_nan;
2925433075b6Spvalchev         }
2926433075b6Spvalchev         return packFloat64( zSign, 0x7FF, 0 );
2927433075b6Spvalchev     }
2928433075b6Spvalchev     if ( bExp == 0x7FF ) {
2929433075b6Spvalchev         if ( bSig ) return propagateFloat64NaN( a, b );
2930433075b6Spvalchev         if ( ( aExp | aSig ) == 0 ) {
2931433075b6Spvalchev             float_raise( float_flag_invalid );
2932433075b6Spvalchev             return float64_default_nan;
2933433075b6Spvalchev         }
2934433075b6Spvalchev         return packFloat64( zSign, 0x7FF, 0 );
2935433075b6Spvalchev     }
2936433075b6Spvalchev     if ( aExp == 0 ) {
2937433075b6Spvalchev         if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
2938433075b6Spvalchev         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2939433075b6Spvalchev     }
2940433075b6Spvalchev     if ( bExp == 0 ) {
2941433075b6Spvalchev         if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
2942433075b6Spvalchev         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2943433075b6Spvalchev     }
2944433075b6Spvalchev     zExp = aExp + bExp - 0x3FF;
2945433075b6Spvalchev     aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
2946433075b6Spvalchev     bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2947433075b6Spvalchev     mul64To128( aSig, bSig, &zSig0, &zSig1 );
2948433075b6Spvalchev     zSig0 |= ( zSig1 != 0 );
2949433075b6Spvalchev     if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
2950433075b6Spvalchev         zSig0 <<= 1;
2951433075b6Spvalchev         --zExp;
2952433075b6Spvalchev     }
2953433075b6Spvalchev     return roundAndPackFloat64( zSign, zExp, zSig0 );
2954433075b6Spvalchev 
2955433075b6Spvalchev }
2956433075b6Spvalchev 
2957433075b6Spvalchev /*
2958433075b6Spvalchev -------------------------------------------------------------------------------
2959433075b6Spvalchev Returns the result of dividing the double-precision floating-point value `a'
2960433075b6Spvalchev by the corresponding value `b'.  The operation is performed according to
2961433075b6Spvalchev the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2962433075b6Spvalchev -------------------------------------------------------------------------------
2963433075b6Spvalchev */
float64_div(float64 a,float64 b)2964433075b6Spvalchev float64 float64_div( float64 a, float64 b )
2965433075b6Spvalchev {
2966433075b6Spvalchev     flag aSign, bSign, zSign;
2967433075b6Spvalchev     int16 aExp, bExp, zExp;
2968433075b6Spvalchev     bits64 aSig, bSig, zSig;
2969433075b6Spvalchev     bits64 rem0, rem1;
2970433075b6Spvalchev     bits64 term0, term1;
2971433075b6Spvalchev 
2972433075b6Spvalchev     aSig = extractFloat64Frac( a );
2973433075b6Spvalchev     aExp = extractFloat64Exp( a );
2974433075b6Spvalchev     aSign = extractFloat64Sign( a );
2975433075b6Spvalchev     bSig = extractFloat64Frac( b );
2976433075b6Spvalchev     bExp = extractFloat64Exp( b );
2977433075b6Spvalchev     bSign = extractFloat64Sign( b );
2978433075b6Spvalchev     zSign = aSign ^ bSign;
2979433075b6Spvalchev     if ( aExp == 0x7FF ) {
2980433075b6Spvalchev         if ( aSig ) return propagateFloat64NaN( a, b );
2981433075b6Spvalchev         if ( bExp == 0x7FF ) {
2982433075b6Spvalchev             if ( bSig ) return propagateFloat64NaN( a, b );
2983433075b6Spvalchev             float_raise( float_flag_invalid );
2984433075b6Spvalchev             return float64_default_nan;
2985433075b6Spvalchev         }
2986433075b6Spvalchev         return packFloat64( zSign, 0x7FF, 0 );
2987433075b6Spvalchev     }
2988433075b6Spvalchev     if ( bExp == 0x7FF ) {
2989433075b6Spvalchev         if ( bSig ) return propagateFloat64NaN( a, b );
2990433075b6Spvalchev         return packFloat64( zSign, 0, 0 );
2991433075b6Spvalchev     }
2992433075b6Spvalchev     if ( bExp == 0 ) {
2993433075b6Spvalchev         if ( bSig == 0 ) {
2994433075b6Spvalchev             if ( ( aExp | aSig ) == 0 ) {
2995433075b6Spvalchev                 float_raise( float_flag_invalid );
2996433075b6Spvalchev                 return float64_default_nan;
2997433075b6Spvalchev             }
2998433075b6Spvalchev             float_raise( float_flag_divbyzero );
2999433075b6Spvalchev             return packFloat64( zSign, 0x7FF, 0 );
3000433075b6Spvalchev         }
3001433075b6Spvalchev         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3002433075b6Spvalchev     }
3003433075b6Spvalchev     if ( aExp == 0 ) {
3004433075b6Spvalchev         if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
3005433075b6Spvalchev         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3006433075b6Spvalchev     }
3007433075b6Spvalchev     zExp = aExp - bExp + 0x3FD;
3008433075b6Spvalchev     aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
3009433075b6Spvalchev     bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3010433075b6Spvalchev     if ( bSig <= ( aSig + aSig ) ) {
3011433075b6Spvalchev         aSig >>= 1;
3012433075b6Spvalchev         ++zExp;
3013433075b6Spvalchev     }
3014433075b6Spvalchev     zSig = estimateDiv128To64( aSig, 0, bSig );
3015433075b6Spvalchev     if ( ( zSig & 0x1FF ) <= 2 ) {
3016433075b6Spvalchev         mul64To128( bSig, zSig, &term0, &term1 );
3017433075b6Spvalchev         sub128( aSig, 0, term0, term1, &rem0, &rem1 );
3018433075b6Spvalchev         while ( (sbits64) rem0 < 0 ) {
3019433075b6Spvalchev             --zSig;
3020433075b6Spvalchev             add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
3021433075b6Spvalchev         }
3022433075b6Spvalchev         zSig |= ( rem1 != 0 );
3023433075b6Spvalchev     }
3024433075b6Spvalchev     return roundAndPackFloat64( zSign, zExp, zSig );
3025433075b6Spvalchev 
3026433075b6Spvalchev }
3027433075b6Spvalchev 
3028433075b6Spvalchev #ifndef SOFTFLOAT_FOR_GCC
3029433075b6Spvalchev /*
3030433075b6Spvalchev -------------------------------------------------------------------------------
3031433075b6Spvalchev Returns the remainder of the double-precision floating-point value `a'
3032433075b6Spvalchev with respect to the corresponding value `b'.  The operation is performed
3033433075b6Spvalchev according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3034433075b6Spvalchev -------------------------------------------------------------------------------
3035433075b6Spvalchev */
float64_rem(float64 a,float64 b)3036433075b6Spvalchev float64 float64_rem( float64 a, float64 b )
3037433075b6Spvalchev {
3038433075b6Spvalchev     flag aSign, bSign, zSign;
3039433075b6Spvalchev     int16 aExp, bExp, expDiff;
3040433075b6Spvalchev     bits64 aSig, bSig;
3041433075b6Spvalchev     bits64 q, alternateASig;
3042433075b6Spvalchev     sbits64 sigMean;
3043433075b6Spvalchev 
3044433075b6Spvalchev     aSig = extractFloat64Frac( a );
3045433075b6Spvalchev     aExp = extractFloat64Exp( a );
3046433075b6Spvalchev     aSign = extractFloat64Sign( a );
3047433075b6Spvalchev     bSig = extractFloat64Frac( b );
3048433075b6Spvalchev     bExp = extractFloat64Exp( b );
3049433075b6Spvalchev     bSign = extractFloat64Sign( b );
3050433075b6Spvalchev     if ( aExp == 0x7FF ) {
3051433075b6Spvalchev         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
3052433075b6Spvalchev             return propagateFloat64NaN( a, b );
3053433075b6Spvalchev         }
3054433075b6Spvalchev         float_raise( float_flag_invalid );
3055433075b6Spvalchev         return float64_default_nan;
3056433075b6Spvalchev     }
3057433075b6Spvalchev     if ( bExp == 0x7FF ) {
3058433075b6Spvalchev         if ( bSig ) return propagateFloat64NaN( a, b );
3059433075b6Spvalchev         return a;
3060433075b6Spvalchev     }
3061433075b6Spvalchev     if ( bExp == 0 ) {
3062433075b6Spvalchev         if ( bSig == 0 ) {
3063433075b6Spvalchev             float_raise( float_flag_invalid );
3064433075b6Spvalchev             return float64_default_nan;
3065433075b6Spvalchev         }
3066433075b6Spvalchev         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3067433075b6Spvalchev     }
3068433075b6Spvalchev     if ( aExp == 0 ) {
3069433075b6Spvalchev         if ( aSig == 0 ) return a;
3070433075b6Spvalchev         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3071433075b6Spvalchev     }
3072433075b6Spvalchev     expDiff = aExp - bExp;
3073433075b6Spvalchev     aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
3074433075b6Spvalchev     bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3075433075b6Spvalchev     if ( expDiff < 0 ) {
3076433075b6Spvalchev         if ( expDiff < -1 ) return a;
3077433075b6Spvalchev         aSig >>= 1;
3078433075b6Spvalchev     }
3079433075b6Spvalchev     q = ( bSig <= aSig );
3080433075b6Spvalchev     if ( q ) aSig -= bSig;
3081433075b6Spvalchev     expDiff -= 64;
3082433075b6Spvalchev     while ( 0 < expDiff ) {
3083433075b6Spvalchev         q = estimateDiv128To64( aSig, 0, bSig );
3084433075b6Spvalchev         q = ( 2 < q ) ? q - 2 : 0;
3085433075b6Spvalchev         aSig = - ( ( bSig>>2 ) * q );
3086433075b6Spvalchev         expDiff -= 62;
3087433075b6Spvalchev     }
3088433075b6Spvalchev     expDiff += 64;
3089433075b6Spvalchev     if ( 0 < expDiff ) {
3090433075b6Spvalchev         q = estimateDiv128To64( aSig, 0, bSig );
3091433075b6Spvalchev         q = ( 2 < q ) ? q - 2 : 0;
3092433075b6Spvalchev         q >>= 64 - expDiff;
3093433075b6Spvalchev         bSig >>= 2;
3094433075b6Spvalchev         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
3095433075b6Spvalchev     }
3096433075b6Spvalchev     else {
3097433075b6Spvalchev         aSig >>= 2;
3098433075b6Spvalchev         bSig >>= 2;
3099433075b6Spvalchev     }
3100433075b6Spvalchev     do {
3101433075b6Spvalchev         alternateASig = aSig;
3102433075b6Spvalchev         ++q;
3103433075b6Spvalchev         aSig -= bSig;
3104433075b6Spvalchev     } while ( 0 <= (sbits64) aSig );
3105433075b6Spvalchev     sigMean = aSig + alternateASig;
3106433075b6Spvalchev     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
3107433075b6Spvalchev         aSig = alternateASig;
3108433075b6Spvalchev     }
3109433075b6Spvalchev     zSign = ( (sbits64) aSig < 0 );
3110433075b6Spvalchev     if ( zSign ) aSig = - aSig;
3111433075b6Spvalchev     return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
3112433075b6Spvalchev 
3113433075b6Spvalchev }
3114433075b6Spvalchev 
3115433075b6Spvalchev /*
3116433075b6Spvalchev -------------------------------------------------------------------------------
3117433075b6Spvalchev Returns the square root of the double-precision floating-point value `a'.
3118433075b6Spvalchev The operation is performed according to the IEC/IEEE Standard for Binary
3119433075b6Spvalchev Floating-Point Arithmetic.
3120433075b6Spvalchev -------------------------------------------------------------------------------
3121433075b6Spvalchev */
float64_sqrt(float64 a)3122433075b6Spvalchev float64 float64_sqrt( float64 a )
3123433075b6Spvalchev {
3124433075b6Spvalchev     flag aSign;
3125433075b6Spvalchev     int16 aExp, zExp;
3126433075b6Spvalchev     bits64 aSig, zSig, doubleZSig;
3127433075b6Spvalchev     bits64 rem0, rem1, term0, term1;
3128433075b6Spvalchev 
3129433075b6Spvalchev     aSig = extractFloat64Frac( a );
3130433075b6Spvalchev     aExp = extractFloat64Exp( a );
3131433075b6Spvalchev     aSign = extractFloat64Sign( a );
3132433075b6Spvalchev     if ( aExp == 0x7FF ) {
3133433075b6Spvalchev         if ( aSig ) return propagateFloat64NaN( a, a );
3134433075b6Spvalchev         if ( ! aSign ) return a;
3135433075b6Spvalchev         float_raise( float_flag_invalid );
3136433075b6Spvalchev         return float64_default_nan;
3137433075b6Spvalchev     }
3138433075b6Spvalchev     if ( aSign ) {
3139433075b6Spvalchev         if ( ( aExp | aSig ) == 0 ) return a;
3140433075b6Spvalchev         float_raise( float_flag_invalid );
3141433075b6Spvalchev         return float64_default_nan;
3142433075b6Spvalchev     }
3143433075b6Spvalchev     if ( aExp == 0 ) {
3144433075b6Spvalchev         if ( aSig == 0 ) return 0;
3145433075b6Spvalchev         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3146433075b6Spvalchev     }
3147433075b6Spvalchev     zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
3148433075b6Spvalchev     aSig |= LIT64( 0x0010000000000000 );
3149433075b6Spvalchev     zSig = estimateSqrt32( aExp, aSig>>21 );
3150433075b6Spvalchev     aSig <<= 9 - ( aExp & 1 );
3151433075b6Spvalchev     zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
3152433075b6Spvalchev     if ( ( zSig & 0x1FF ) <= 5 ) {
3153433075b6Spvalchev         doubleZSig = zSig<<1;
3154433075b6Spvalchev         mul64To128( zSig, zSig, &term0, &term1 );
3155433075b6Spvalchev         sub128( aSig, 0, term0, term1, &rem0, &rem1 );
3156433075b6Spvalchev         while ( (sbits64) rem0 < 0 ) {
3157433075b6Spvalchev             --zSig;
3158433075b6Spvalchev             doubleZSig -= 2;
3159433075b6Spvalchev             add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
3160433075b6Spvalchev         }
3161433075b6Spvalchev         zSig |= ( ( rem0 | rem1 ) != 0 );
3162433075b6Spvalchev     }
3163433075b6Spvalchev     return roundAndPackFloat64( 0, zExp, zSig );
3164433075b6Spvalchev 
3165433075b6Spvalchev }
3166433075b6Spvalchev #endif
3167433075b6Spvalchev 
3168433075b6Spvalchev /*
3169433075b6Spvalchev -------------------------------------------------------------------------------
3170433075b6Spvalchev Returns 1 if the double-precision floating-point value `a' is equal to the
3171433075b6Spvalchev corresponding value `b', and 0 otherwise.  The comparison is performed
3172433075b6Spvalchev according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3173433075b6Spvalchev -------------------------------------------------------------------------------
3174433075b6Spvalchev */
float64_eq(float64 a,float64 b)3175433075b6Spvalchev flag float64_eq( float64 a, float64 b )
3176433075b6Spvalchev {
3177433075b6Spvalchev 
3178433075b6Spvalchev     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3179433075b6Spvalchev          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3180433075b6Spvalchev        ) {
3181433075b6Spvalchev         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3182433075b6Spvalchev             float_raise( float_flag_invalid );
3183433075b6Spvalchev         }
3184433075b6Spvalchev         return 0;
3185433075b6Spvalchev     }
3186433075b6Spvalchev     return ( a == b ) ||
3187433075b6Spvalchev 	( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
3188433075b6Spvalchev 
3189433075b6Spvalchev }
3190433075b6Spvalchev 
3191433075b6Spvalchev /*
3192433075b6Spvalchev -------------------------------------------------------------------------------
3193433075b6Spvalchev Returns 1 if the double-precision floating-point value `a' is less than or
3194433075b6Spvalchev equal to the corresponding value `b', and 0 otherwise.  The comparison is
3195433075b6Spvalchev performed according to the IEC/IEEE Standard for Binary Floating-Point
3196433075b6Spvalchev Arithmetic.
3197433075b6Spvalchev -------------------------------------------------------------------------------
3198433075b6Spvalchev */
float64_le(float64 a,float64 b)3199433075b6Spvalchev flag float64_le( float64 a, float64 b )
3200433075b6Spvalchev {
3201433075b6Spvalchev     flag aSign, bSign;
3202433075b6Spvalchev 
3203433075b6Spvalchev     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3204433075b6Spvalchev          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3205433075b6Spvalchev        ) {
3206433075b6Spvalchev         float_raise( float_flag_invalid );
3207433075b6Spvalchev         return 0;
3208433075b6Spvalchev     }
3209433075b6Spvalchev     aSign = extractFloat64Sign( a );
3210433075b6Spvalchev     bSign = extractFloat64Sign( b );
3211433075b6Spvalchev     if ( aSign != bSign )
3212433075b6Spvalchev 	return aSign ||
3213433075b6Spvalchev 	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
3214433075b6Spvalchev 	      0 );
3215433075b6Spvalchev     return ( a == b ) ||
3216433075b6Spvalchev 	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
3217433075b6Spvalchev 
3218433075b6Spvalchev }
3219433075b6Spvalchev 
3220433075b6Spvalchev /*
3221433075b6Spvalchev -------------------------------------------------------------------------------
3222433075b6Spvalchev Returns 1 if the double-precision floating-point value `a' is less than
3223433075b6Spvalchev the corresponding value `b', and 0 otherwise.  The comparison is performed
3224433075b6Spvalchev according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3225433075b6Spvalchev -------------------------------------------------------------------------------
3226433075b6Spvalchev */
float64_lt(float64 a,float64 b)3227433075b6Spvalchev flag float64_lt( float64 a, float64 b )
3228433075b6Spvalchev {
3229433075b6Spvalchev     flag aSign, bSign;
3230433075b6Spvalchev 
3231433075b6Spvalchev     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3232433075b6Spvalchev          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3233433075b6Spvalchev        ) {
3234433075b6Spvalchev         float_raise( float_flag_invalid );
3235433075b6Spvalchev         return 0;
3236433075b6Spvalchev     }
3237433075b6Spvalchev     aSign = extractFloat64Sign( a );
3238433075b6Spvalchev     bSign = extractFloat64Sign( b );
3239433075b6Spvalchev     if ( aSign != bSign )
3240433075b6Spvalchev 	return aSign &&
3241433075b6Spvalchev 	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
3242433075b6Spvalchev 	      0 );
3243433075b6Spvalchev     return ( a != b ) &&
3244433075b6Spvalchev 	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
3245433075b6Spvalchev 
3246433075b6Spvalchev }
3247433075b6Spvalchev 
3248433075b6Spvalchev #ifndef SOFTFLOAT_FOR_GCC
3249433075b6Spvalchev /*
3250433075b6Spvalchev -------------------------------------------------------------------------------
3251433075b6Spvalchev Returns 1 if the double-precision floating-point value `a' is equal to the
3252433075b6Spvalchev corresponding value `b', and 0 otherwise.  The invalid exception is raised
3253433075b6Spvalchev if either operand is a NaN.  Otherwise, the comparison is performed
3254433075b6Spvalchev according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3255433075b6Spvalchev -------------------------------------------------------------------------------
3256433075b6Spvalchev */
float64_eq_signaling(float64 a,float64 b)3257433075b6Spvalchev flag float64_eq_signaling( float64 a, float64 b )
3258433075b6Spvalchev {
3259433075b6Spvalchev 
3260433075b6Spvalchev     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3261433075b6Spvalchev          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3262433075b6Spvalchev        ) {
3263433075b6Spvalchev         float_raise( float_flag_invalid );
3264433075b6Spvalchev         return 0;
3265433075b6Spvalchev     }
3266433075b6Spvalchev     return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
3267433075b6Spvalchev 
3268433075b6Spvalchev }
3269433075b6Spvalchev 
3270433075b6Spvalchev /*
3271433075b6Spvalchev -------------------------------------------------------------------------------
3272433075b6Spvalchev Returns 1 if the double-precision floating-point value `a' is less than or
3273433075b6Spvalchev equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
3274433075b6Spvalchev cause an exception.  Otherwise, the comparison is performed according to the
3275433075b6Spvalchev IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3276433075b6Spvalchev -------------------------------------------------------------------------------
3277433075b6Spvalchev */
float64_le_quiet(float64 a,float64 b)3278433075b6Spvalchev flag float64_le_quiet( float64 a, float64 b )
3279433075b6Spvalchev {
3280433075b6Spvalchev     flag aSign, bSign;
3281433075b6Spvalchev 
3282433075b6Spvalchev     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3283433075b6Spvalchev          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3284433075b6Spvalchev        ) {
3285433075b6Spvalchev         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3286433075b6Spvalchev             float_raise( float_flag_invalid );
3287433075b6Spvalchev         }
3288433075b6Spvalchev         return 0;
3289433075b6Spvalchev     }
3290433075b6Spvalchev     aSign = extractFloat64Sign( a );
3291433075b6Spvalchev     bSign = extractFloat64Sign( b );
3292433075b6Spvalchev     if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
3293433075b6Spvalchev     return ( a == b ) || ( aSign ^ ( a < b ) );
3294433075b6Spvalchev 
3295433075b6Spvalchev }
3296433075b6Spvalchev 
3297433075b6Spvalchev /*
3298433075b6Spvalchev -------------------------------------------------------------------------------
3299433075b6Spvalchev Returns 1 if the double-precision floating-point value `a' is less than
3300433075b6Spvalchev the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
3301433075b6Spvalchev exception.  Otherwise, the comparison is performed according to the IEC/IEEE
3302433075b6Spvalchev Standard for Binary Floating-Point Arithmetic.
3303433075b6Spvalchev -------------------------------------------------------------------------------
3304433075b6Spvalchev */
float64_lt_quiet(float64 a,float64 b)3305433075b6Spvalchev flag float64_lt_quiet( float64 a, float64 b )
3306433075b6Spvalchev {
3307433075b6Spvalchev     flag aSign, bSign;
3308433075b6Spvalchev 
3309433075b6Spvalchev     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3310433075b6Spvalchev          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3311433075b6Spvalchev        ) {
3312433075b6Spvalchev         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3313433075b6Spvalchev             float_raise( float_flag_invalid );
3314433075b6Spvalchev         }
3315433075b6Spvalchev         return 0;
3316433075b6Spvalchev     }
3317433075b6Spvalchev     aSign = extractFloat64Sign( a );
3318433075b6Spvalchev     bSign = extractFloat64Sign( b );
3319433075b6Spvalchev     if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
3320433075b6Spvalchev     return ( a != b ) && ( aSign ^ ( a < b ) );
3321433075b6Spvalchev 
3322433075b6Spvalchev }
3323433075b6Spvalchev #endif
3324433075b6Spvalchev 
3325433075b6Spvalchev #ifdef FLOATX80
3326433075b6Spvalchev 
3327433075b6Spvalchev /*
3328433075b6Spvalchev -------------------------------------------------------------------------------
3329433075b6Spvalchev Returns the result of converting the extended double-precision floating-
3330433075b6Spvalchev point value `a' to the 32-bit two's complement integer format.  The
3331433075b6Spvalchev conversion is performed according to the IEC/IEEE Standard for Binary
3332433075b6Spvalchev Floating-Point Arithmetic---which means in particular that the conversion
3333433075b6Spvalchev is rounded according to the current rounding mode.  If `a' is a NaN, the
3334433075b6Spvalchev largest positive integer is returned.  Otherwise, if the conversion
3335433075b6Spvalchev overflows, the largest integer with the same sign as `a' is returned.
3336433075b6Spvalchev -------------------------------------------------------------------------------
3337433075b6Spvalchev */
floatx80_to_int32(floatx80 a)3338433075b6Spvalchev int32 floatx80_to_int32( floatx80 a )
3339433075b6Spvalchev {
3340433075b6Spvalchev     flag aSign;
3341433075b6Spvalchev     int32 aExp, shiftCount;
3342433075b6Spvalchev     bits64 aSig;
3343433075b6Spvalchev 
3344433075b6Spvalchev     aSig = extractFloatx80Frac( a );
3345433075b6Spvalchev     aExp = extractFloatx80Exp( a );
3346433075b6Spvalchev     aSign = extractFloatx80Sign( a );
3347433075b6Spvalchev     if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
3348433075b6Spvalchev     shiftCount = 0x4037 - aExp;
3349433075b6Spvalchev     if ( shiftCount <= 0 ) shiftCount = 1;
3350433075b6Spvalchev     shift64RightJamming( aSig, shiftCount, &aSig );
3351433075b6Spvalchev     return roundAndPackInt32( aSign, aSig );
3352433075b6Spvalchev 
3353433075b6Spvalchev }
3354433075b6Spvalchev 
3355433075b6Spvalchev /*
3356433075b6Spvalchev -------------------------------------------------------------------------------
3357433075b6Spvalchev Returns the result of converting the extended double-precision floating-
3358433075b6Spvalchev point value `a' to the 32-bit two's complement integer format.  The
3359433075b6Spvalchev conversion is performed according to the IEC/IEEE Standard for Binary
3360433075b6Spvalchev Floating-Point Arithmetic, except that the conversion is always rounded
3361433075b6Spvalchev toward zero.  If `a' is a NaN, the largest positive integer is returned.
3362433075b6Spvalchev Otherwise, if the conversion overflows, the largest integer with the same
3363433075b6Spvalchev sign as `a' is returned.
3364433075b6Spvalchev -------------------------------------------------------------------------------
3365433075b6Spvalchev */
floatx80_to_int32_round_to_zero(floatx80 a)3366433075b6Spvalchev int32 floatx80_to_int32_round_to_zero( floatx80 a )
3367433075b6Spvalchev {
3368433075b6Spvalchev     flag aSign;
3369433075b6Spvalchev     int32 aExp, shiftCount;
3370433075b6Spvalchev     bits64 aSig, savedASig;
3371433075b6Spvalchev     int32 z;
3372433075b6Spvalchev 
3373433075b6Spvalchev     aSig = extractFloatx80Frac( a );
3374433075b6Spvalchev     aExp = extractFloatx80Exp( a );
3375433075b6Spvalchev     aSign = extractFloatx80Sign( a );
3376433075b6Spvalchev     if ( 0x401E < aExp ) {
3377433075b6Spvalchev         if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
3378433075b6Spvalchev         goto invalid;
3379433075b6Spvalchev     }
3380433075b6Spvalchev     else if ( aExp < 0x3FFF ) {
3381433075b6Spvalchev         if ( aExp || aSig ) float_set_inexact();
3382433075b6Spvalchev         return 0;
3383433075b6Spvalchev     }
3384433075b6Spvalchev     shiftCount = 0x403E - aExp;
3385433075b6Spvalchev     savedASig = aSig;
3386433075b6Spvalchev     aSig >>= shiftCount;
3387433075b6Spvalchev     z = aSig;
3388433075b6Spvalchev     if ( aSign ) z = - z;
3389433075b6Spvalchev     if ( ( z < 0 ) ^ aSign ) {
3390433075b6Spvalchev  invalid:
3391433075b6Spvalchev         float_raise( float_flag_invalid );
3392433075b6Spvalchev         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
3393433075b6Spvalchev     }
3394433075b6Spvalchev     if ( ( aSig<<shiftCount ) != savedASig ) {
3395433075b6Spvalchev         float_set_inexact();
3396433075b6Spvalchev     }
3397433075b6Spvalchev     return z;
3398433075b6Spvalchev 
3399433075b6Spvalchev }
3400433075b6Spvalchev 
3401433075b6Spvalchev /*
3402433075b6Spvalchev -------------------------------------------------------------------------------
3403433075b6Spvalchev Returns the result of converting the extended double-precision floating-
3404433075b6Spvalchev point value `a' to the 64-bit two's complement integer format.  The
3405433075b6Spvalchev conversion is performed according to the IEC/IEEE Standard for Binary
3406433075b6Spvalchev Floating-Point Arithmetic---which means in particular that the conversion
3407433075b6Spvalchev is rounded according to the current rounding mode.  If `a' is a NaN,
3408433075b6Spvalchev the largest positive integer is returned.  Otherwise, if the conversion
3409433075b6Spvalchev overflows, the largest integer with the same sign as `a' is returned.
3410433075b6Spvalchev -------------------------------------------------------------------------------
3411433075b6Spvalchev */
floatx80_to_int64(floatx80 a)3412433075b6Spvalchev int64 floatx80_to_int64( floatx80 a )
3413433075b6Spvalchev {
3414433075b6Spvalchev     flag aSign;
3415433075b6Spvalchev     int32 aExp, shiftCount;
3416433075b6Spvalchev     bits64 aSig, aSigExtra;
3417433075b6Spvalchev 
3418433075b6Spvalchev     aSig = extractFloatx80Frac( a );
3419433075b6Spvalchev     aExp = extractFloatx80Exp( a );
3420433075b6Spvalchev     aSign = extractFloatx80Sign( a );
3421433075b6Spvalchev     shiftCount = 0x403E - aExp;
3422433075b6Spvalchev     if ( shiftCount <= 0 ) {
3423433075b6Spvalchev         if ( shiftCount ) {
3424433075b6Spvalchev             float_raise( float_flag_invalid );
3425433075b6Spvalchev             if (    ! aSign
3426433075b6Spvalchev                  || (    ( aExp == 0x7FFF )
3427433075b6Spvalchev                       && ( aSig != LIT64( 0x8000000000000000 ) ) )
3428433075b6Spvalchev                ) {
3429433075b6Spvalchev                 return LIT64( 0x7FFFFFFFFFFFFFFF );
3430433075b6Spvalchev             }
3431433075b6Spvalchev             return (sbits64) LIT64( 0x8000000000000000 );
3432433075b6Spvalchev         }
3433433075b6Spvalchev         aSigExtra = 0;
3434433075b6Spvalchev     }
3435433075b6Spvalchev     else {
3436433075b6Spvalchev         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
3437433075b6Spvalchev     }
3438433075b6Spvalchev     return roundAndPackInt64( aSign, aSig, aSigExtra );
3439433075b6Spvalchev 
3440433075b6Spvalchev }
3441433075b6Spvalchev 
3442433075b6Spvalchev /*
3443433075b6Spvalchev -------------------------------------------------------------------------------
3444433075b6Spvalchev Returns the result of converting the extended double-precision floating-
3445433075b6Spvalchev point value `a' to the 64-bit two's complement integer format.  The
3446433075b6Spvalchev conversion is performed according to the IEC/IEEE Standard for Binary
3447433075b6Spvalchev Floating-Point Arithmetic, except that the conversion is always rounded
3448433075b6Spvalchev toward zero.  If `a' is a NaN, the largest positive integer is returned.
3449433075b6Spvalchev Otherwise, if the conversion overflows, the largest integer with the same
3450433075b6Spvalchev sign as `a' is returned.
3451433075b6Spvalchev -------------------------------------------------------------------------------
3452433075b6Spvalchev */
floatx80_to_int64_round_to_zero(floatx80 a)3453433075b6Spvalchev int64 floatx80_to_int64_round_to_zero( floatx80 a )
3454433075b6Spvalchev {
3455433075b6Spvalchev     flag aSign;
3456433075b6Spvalchev     int32 aExp, shiftCount;
3457433075b6Spvalchev     bits64 aSig;
3458433075b6Spvalchev     int64 z;
3459433075b6Spvalchev 
3460433075b6Spvalchev     aSig = extractFloatx80Frac( a );
3461433075b6Spvalchev     aExp = extractFloatx80Exp( a );
3462433075b6Spvalchev     aSign = extractFloatx80Sign( a );
3463433075b6Spvalchev     shiftCount = aExp - 0x403E;
3464433075b6Spvalchev     if ( 0 <= shiftCount ) {
3465433075b6Spvalchev         aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
3466433075b6Spvalchev         if ( ( a.high != 0xC03E ) || aSig ) {
3467433075b6Spvalchev             float_raise( float_flag_invalid );
3468433075b6Spvalchev             if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
3469433075b6Spvalchev                 return LIT64( 0x7FFFFFFFFFFFFFFF );
3470433075b6Spvalchev             }
3471433075b6Spvalchev         }
3472433075b6Spvalchev         return (sbits64) LIT64( 0x8000000000000000 );
3473433075b6Spvalchev     }
3474433075b6Spvalchev     else if ( aExp < 0x3FFF ) {
3475433075b6Spvalchev         if ( aExp | aSig ) float_set_inexact();
3476433075b6Spvalchev         return 0;
3477433075b6Spvalchev     }
3478433075b6Spvalchev     z = aSig>>( - shiftCount );
3479433075b6Spvalchev     if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
3480433075b6Spvalchev         float_set_inexact();
3481433075b6Spvalchev     }
3482433075b6Spvalchev     if ( aSign ) z = - z;
3483433075b6Spvalchev     return z;
3484433075b6Spvalchev 
3485433075b6Spvalchev }
3486433075b6Spvalchev 
3487433075b6Spvalchev /*
3488433075b6Spvalchev -------------------------------------------------------------------------------
3489433075b6Spvalchev Returns the result of converting the extended double-precision floating-
3490433075b6Spvalchev point value `a' to the single-precision floating-point format.  The
3491433075b6Spvalchev conversion is performed according to the IEC/IEEE Standard for Binary
3492433075b6Spvalchev Floating-Point Arithmetic.
3493433075b6Spvalchev -------------------------------------------------------------------------------
3494433075b6Spvalchev */
floatx80_to_float32(floatx80 a)3495433075b6Spvalchev float32 floatx80_to_float32( floatx80 a )
3496433075b6Spvalchev {
3497433075b6Spvalchev     flag aSign;
3498433075b6Spvalchev     int32 aExp;
3499433075b6Spvalchev     bits64 aSig;
3500433075b6Spvalchev 
3501433075b6Spvalchev     aSig = extractFloatx80Frac( a );
3502433075b6Spvalchev     aExp = extractFloatx80Exp( a );
3503433075b6Spvalchev     aSign = extractFloatx80Sign( a );
3504433075b6Spvalchev     if ( aExp == 0x7FFF ) {
3505433075b6Spvalchev         if ( (bits64) ( aSig<<1 ) ) {
3506433075b6Spvalchev             return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
3507433075b6Spvalchev         }
3508433075b6Spvalchev         return packFloat32( aSign, 0xFF, 0 );
3509433075b6Spvalchev     }
3510433075b6Spvalchev     shift64RightJamming( aSig, 33, &aSig );
3511433075b6Spvalchev     if ( aExp || aSig ) aExp -= 0x3F81;
3512433075b6Spvalchev     return roundAndPackFloat32( aSign, aExp, aSig );
3513433075b6Spvalchev 
3514433075b6Spvalchev }
3515433075b6Spvalchev 
3516433075b6Spvalchev /*
3517433075b6Spvalchev -------------------------------------------------------------------------------
3518433075b6Spvalchev Returns the result of converting the extended double-precision floating-
3519433075b6Spvalchev point value `a' to the double-precision floating-point format.  The
3520433075b6Spvalchev conversion is performed according to the IEC/IEEE Standard for Binary
3521433075b6Spvalchev Floating-Point Arithmetic.
3522433075b6Spvalchev -------------------------------------------------------------------------------
3523433075b6Spvalchev */
floatx80_to_float64(floatx80 a)3524433075b6Spvalchev float64 floatx80_to_float64( floatx80 a )
3525433075b6Spvalchev {
3526433075b6Spvalchev     flag aSign;
3527433075b6Spvalchev     int32 aExp;
3528433075b6Spvalchev     bits64 aSig, zSig;
3529433075b6Spvalchev 
3530433075b6Spvalchev     aSig = extractFloatx80Frac( a );
3531433075b6Spvalchev     aExp = extractFloatx80Exp( a );
3532433075b6Spvalchev     aSign = extractFloatx80Sign( a );
3533433075b6Spvalchev     if ( aExp == 0x7FFF ) {
3534433075b6Spvalchev         if ( (bits64) ( aSig<<1 ) ) {
3535433075b6Spvalchev             return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
3536433075b6Spvalchev         }
3537433075b6Spvalchev         return packFloat64( aSign, 0x7FF, 0 );
3538433075b6Spvalchev     }
3539433075b6Spvalchev     shift64RightJamming( aSig, 1, &zSig );
3540433075b6Spvalchev     if ( aExp || aSig ) aExp -= 0x3C01;
3541433075b6Spvalchev     return roundAndPackFloat64( aSign, aExp, zSig );
3542433075b6Spvalchev 
3543433075b6Spvalchev }
3544433075b6Spvalchev 
3545433075b6Spvalchev #ifdef FLOAT128
3546433075b6Spvalchev 
3547433075b6Spvalchev /*
3548433075b6Spvalchev -------------------------------------------------------------------------------
3549433075b6Spvalchev Returns the result of converting the extended double-precision floating-
3550433075b6Spvalchev point value `a' to the quadruple-precision floating-point format.  The
3551433075b6Spvalchev conversion is performed according to the IEC/IEEE Standard for Binary
3552433075b6Spvalchev Floating-Point Arithmetic.
3553433075b6Spvalchev -------------------------------------------------------------------------------
3554433075b6Spvalchev */
floatx80_to_float128(floatx80 a)3555433075b6Spvalchev float128 floatx80_to_float128( floatx80 a )
3556433075b6Spvalchev {
3557433075b6Spvalchev     flag aSign;
3558433075b6Spvalchev     int16 aExp;
3559433075b6Spvalchev     bits64 aSig, zSig0, zSig1;
3560433075b6Spvalchev 
3561433075b6Spvalchev     aSig = extractFloatx80Frac( a );
3562433075b6Spvalchev     aExp = extractFloatx80Exp( a );
3563433075b6Spvalchev     aSign = extractFloatx80Sign( a );
3564433075b6Spvalchev     if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
3565433075b6Spvalchev         return commonNaNToFloat128( floatx80ToCommonNaN( a ) );
3566433075b6Spvalchev     }
3567433075b6Spvalchev     shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
3568433075b6Spvalchev     return packFloat128( aSign, aExp, zSig0, zSig1 );
3569433075b6Spvalchev 
3570433075b6Spvalchev }
3571433075b6Spvalchev 
3572433075b6Spvalchev #endif
3573433075b6Spvalchev 
3574433075b6Spvalchev /*
3575433075b6Spvalchev -------------------------------------------------------------------------------
3576433075b6Spvalchev Rounds the extended double-precision floating-point value `a' to an integer,
3577433075b6Spvalchev and returns the result as an extended quadruple-precision floating-point
3578433075b6Spvalchev value.  The operation is performed according to the IEC/IEEE Standard for
3579433075b6Spvalchev Binary Floating-Point Arithmetic.
3580433075b6Spvalchev -------------------------------------------------------------------------------
3581433075b6Spvalchev */
floatx80_round_to_int(floatx80 a)3582433075b6Spvalchev floatx80 floatx80_round_to_int( floatx80 a )
3583433075b6Spvalchev {
3584433075b6Spvalchev     flag aSign;
3585433075b6Spvalchev     int32 aExp;
3586433075b6Spvalchev     bits64 lastBitMask, roundBitsMask;
3587433075b6Spvalchev     int8 roundingMode;
3588433075b6Spvalchev     floatx80 z;
3589433075b6Spvalchev 
3590433075b6Spvalchev     aExp = extractFloatx80Exp( a );
3591433075b6Spvalchev     if ( 0x403E <= aExp ) {
3592433075b6Spvalchev         if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
3593433075b6Spvalchev             return propagateFloatx80NaN( a, a );
3594433075b6Spvalchev         }
3595433075b6Spvalchev         return a;
3596433075b6Spvalchev     }
3597433075b6Spvalchev     if ( aExp < 0x3FFF ) {
3598433075b6Spvalchev         if (    ( aExp == 0 )
3599433075b6Spvalchev              && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
3600433075b6Spvalchev             return a;
3601433075b6Spvalchev         }
3602433075b6Spvalchev         float_set_inexact();
3603433075b6Spvalchev         aSign = extractFloatx80Sign( a );
3604433075b6Spvalchev         switch ( float_rounding_mode() ) {
3605433075b6Spvalchev          case float_round_nearest_even:
3606433075b6Spvalchev             if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
3607433075b6Spvalchev                ) {
3608433075b6Spvalchev                 return
3609433075b6Spvalchev                     packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
3610433075b6Spvalchev             }
3611433075b6Spvalchev             break;
3612433075b6Spvalchev          case float_round_down:
3613433075b6Spvalchev             return
3614433075b6Spvalchev                   aSign ?
3615433075b6Spvalchev                       packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
3616433075b6Spvalchev                 : packFloatx80( 0, 0, 0 );
3617433075b6Spvalchev          case float_round_up:
3618433075b6Spvalchev             return
3619433075b6Spvalchev                   aSign ? packFloatx80( 1, 0, 0 )
3620433075b6Spvalchev                 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
3621433075b6Spvalchev         }
3622433075b6Spvalchev         return packFloatx80( aSign, 0, 0 );
3623433075b6Spvalchev     }
3624433075b6Spvalchev     lastBitMask = 1;
3625433075b6Spvalchev     lastBitMask <<= 0x403E - aExp;
3626433075b6Spvalchev     roundBitsMask = lastBitMask - 1;
3627433075b6Spvalchev     z = a;
3628433075b6Spvalchev     roundingMode = float_rounding_mode();
3629433075b6Spvalchev     if ( roundingMode == float_round_nearest_even ) {
3630433075b6Spvalchev         z.low += lastBitMask>>1;
3631433075b6Spvalchev         if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
3632433075b6Spvalchev     }
3633433075b6Spvalchev     else if ( roundingMode != float_round_to_zero ) {
3634433075b6Spvalchev         if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
3635433075b6Spvalchev             z.low += roundBitsMask;
3636433075b6Spvalchev         }
3637433075b6Spvalchev     }
3638433075b6Spvalchev     z.low &= ~ roundBitsMask;
3639433075b6Spvalchev     if ( z.low == 0 ) {
3640433075b6Spvalchev         ++z.high;
3641433075b6Spvalchev         z.low = LIT64( 0x8000000000000000 );
3642433075b6Spvalchev     }
3643433075b6Spvalchev     if ( z.low != a.low ) float_set_inexact();
3644433075b6Spvalchev     return z;
3645433075b6Spvalchev 
3646433075b6Spvalchev }
3647433075b6Spvalchev 
3648433075b6Spvalchev /*
3649433075b6Spvalchev -------------------------------------------------------------------------------
3650433075b6Spvalchev Returns the result of adding the absolute values of the extended double-
3651433075b6Spvalchev precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
3652433075b6Spvalchev negated before being returned.  `zSign' is ignored if the result is a NaN.
3653433075b6Spvalchev The addition is performed according to the IEC/IEEE Standard for Binary
3654433075b6Spvalchev Floating-Point Arithmetic.
3655433075b6Spvalchev -------------------------------------------------------------------------------
3656433075b6Spvalchev */
addFloatx80Sigs(floatx80 a,floatx80 b,flag zSign)3657433075b6Spvalchev static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
3658433075b6Spvalchev {
3659433075b6Spvalchev     int32 aExp, bExp, zExp;
3660433075b6Spvalchev     bits64 aSig, bSig, zSig0, zSig1;
3661433075b6Spvalchev     int32 expDiff;
3662433075b6Spvalchev 
3663433075b6Spvalchev     aSig = extractFloatx80Frac( a );
3664433075b6Spvalchev     aExp = extractFloatx80Exp( a );
3665433075b6Spvalchev     bSig = extractFloatx80Frac( b );
3666433075b6Spvalchev     bExp = extractFloatx80Exp( b );
3667433075b6Spvalchev     expDiff = aExp - bExp;
3668433075b6Spvalchev     if ( 0 < expDiff ) {
3669433075b6Spvalchev         if ( aExp == 0x7FFF ) {
3670433075b6Spvalchev             if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
3671433075b6Spvalchev             return a;
3672433075b6Spvalchev         }
3673433075b6Spvalchev         if ( bExp == 0 ) --expDiff;
3674433075b6Spvalchev         shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
3675433075b6Spvalchev         zExp = aExp;
3676433075b6Spvalchev     }
3677433075b6Spvalchev     else if ( expDiff < 0 ) {
3678433075b6Spvalchev         if ( bExp == 0x7FFF ) {
3679433075b6Spvalchev             if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3680433075b6Spvalchev             return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3681433075b6Spvalchev         }
3682433075b6Spvalchev         if ( aExp == 0 ) ++expDiff;
3683433075b6Spvalchev         shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
3684433075b6Spvalchev         zExp = bExp;
3685433075b6Spvalchev     }
3686433075b6Spvalchev     else {
3687433075b6Spvalchev         if ( aExp == 0x7FFF ) {
3688433075b6Spvalchev             if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
3689433075b6Spvalchev                 return propagateFloatx80NaN( a, b );
3690433075b6Spvalchev             }
3691433075b6Spvalchev             return a;
3692433075b6Spvalchev         }
3693433075b6Spvalchev         zSig1 = 0;
3694433075b6Spvalchev         zSig0 = aSig + bSig;
3695433075b6Spvalchev         if ( aExp == 0 ) {
3696433075b6Spvalchev             normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
3697433075b6Spvalchev             goto roundAndPack;
3698433075b6Spvalchev         }
3699433075b6Spvalchev         zExp = aExp;
3700433075b6Spvalchev         goto shiftRight1;
3701433075b6Spvalchev     }
3702433075b6Spvalchev     zSig0 = aSig + bSig;
3703433075b6Spvalchev     if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
3704433075b6Spvalchev  shiftRight1:
3705433075b6Spvalchev     shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
3706433075b6Spvalchev     zSig0 |= LIT64( 0x8000000000000000 );
3707433075b6Spvalchev     ++zExp;
3708433075b6Spvalchev  roundAndPack:
3709433075b6Spvalchev     return
3710433075b6Spvalchev         roundAndPackFloatx80(
3711433075b6Spvalchev             floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3712433075b6Spvalchev 
3713433075b6Spvalchev }
3714433075b6Spvalchev 
3715433075b6Spvalchev /*
3716433075b6Spvalchev -------------------------------------------------------------------------------
3717433075b6Spvalchev Returns the result of subtracting the absolute values of the extended
3718433075b6Spvalchev double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
3719433075b6Spvalchev difference is negated before being returned.  `zSign' is ignored if the
3720433075b6Spvalchev result is a NaN.  The subtraction is performed according to the IEC/IEEE
3721433075b6Spvalchev Standard for Binary Floating-Point Arithmetic.
3722433075b6Spvalchev -------------------------------------------------------------------------------
3723433075b6Spvalchev */
subFloatx80Sigs(floatx80 a,floatx80 b,flag zSign)3724433075b6Spvalchev static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
3725433075b6Spvalchev {
3726433075b6Spvalchev     int32 aExp, bExp, zExp;
3727433075b6Spvalchev     bits64 aSig, bSig, zSig0, zSig1;
3728433075b6Spvalchev     int32 expDiff;
3729433075b6Spvalchev     floatx80 z;
3730433075b6Spvalchev 
3731433075b6Spvalchev     aSig = extractFloatx80Frac( a );
3732433075b6Spvalchev     aExp = extractFloatx80Exp( a );
3733433075b6Spvalchev     bSig = extractFloatx80Frac( b );
3734433075b6Spvalchev     bExp = extractFloatx80Exp( b );
3735433075b6Spvalchev     expDiff = aExp - bExp;
3736433075b6Spvalchev     if ( 0 < expDiff ) goto aExpBigger;
3737433075b6Spvalchev     if ( expDiff < 0 ) goto bExpBigger;
3738433075b6Spvalchev     if ( aExp == 0x7FFF ) {
3739433075b6Spvalchev         if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
3740433075b6Spvalchev             return propagateFloatx80NaN( a, b );
3741433075b6Spvalchev         }
3742433075b6Spvalchev         float_raise( float_flag_invalid );
3743433075b6Spvalchev         z.low = floatx80_default_nan_low;
3744433075b6Spvalchev         z.high = floatx80_default_nan_high;
3745433075b6Spvalchev         return z;
3746433075b6Spvalchev     }
3747433075b6Spvalchev     if ( aExp == 0 ) {
3748433075b6Spvalchev         aExp = 1;
3749433075b6Spvalchev         bExp = 1;
3750433075b6Spvalchev     }
3751433075b6Spvalchev     zSig1 = 0;
3752433075b6Spvalchev     if ( bSig < aSig ) goto aBigger;
3753433075b6Spvalchev     if ( aSig < bSig ) goto bBigger;
3754433075b6Spvalchev     return packFloatx80( float_rounding_mode() == float_round_down, 0, 0 );
3755433075b6Spvalchev  bExpBigger:
3756433075b6Spvalchev     if ( bExp == 0x7FFF ) {
3757433075b6Spvalchev         if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3758433075b6Spvalchev         return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
3759433075b6Spvalchev     }
3760433075b6Spvalchev     if ( aExp == 0 ) ++expDiff;
3761433075b6Spvalchev     shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
3762433075b6Spvalchev  bBigger:
3763433075b6Spvalchev     sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
3764433075b6Spvalchev     zExp = bExp;
3765433075b6Spvalchev     zSign ^= 1;
3766433075b6Spvalchev     goto normalizeRoundAndPack;
3767433075b6Spvalchev  aExpBigger:
3768433075b6Spvalchev     if ( aExp == 0x7FFF ) {
3769433075b6Spvalchev         if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
3770433075b6Spvalchev         return a;
3771433075b6Spvalchev     }
3772433075b6Spvalchev     if ( bExp == 0 ) --expDiff;
3773433075b6Spvalchev     shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
3774433075b6Spvalchev  aBigger:
3775433075b6Spvalchev     sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
3776433075b6Spvalchev     zExp = aExp;
3777433075b6Spvalchev  normalizeRoundAndPack:
3778433075b6Spvalchev     return
3779433075b6Spvalchev         normalizeRoundAndPackFloatx80(
3780433075b6Spvalchev             floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3781433075b6Spvalchev 
3782433075b6Spvalchev }
3783433075b6Spvalchev 
3784433075b6Spvalchev /*
3785433075b6Spvalchev -------------------------------------------------------------------------------
3786433075b6Spvalchev Returns the result of adding the extended double-precision floating-point
3787433075b6Spvalchev values `a' and `b'.  The operation is performed according to the IEC/IEEE
3788433075b6Spvalchev Standard for Binary Floating-Point Arithmetic.
3789433075b6Spvalchev -------------------------------------------------------------------------------
3790433075b6Spvalchev */
floatx80_add(floatx80 a,floatx80 b)3791433075b6Spvalchev floatx80 floatx80_add( floatx80 a, floatx80 b )
3792433075b6Spvalchev {
3793433075b6Spvalchev     flag aSign, bSign;
3794433075b6Spvalchev 
3795433075b6Spvalchev     aSign = extractFloatx80Sign( a );
3796433075b6Spvalchev     bSign = extractFloatx80Sign( b );
3797433075b6Spvalchev     if ( aSign == bSign ) {
3798433075b6Spvalchev         return addFloatx80Sigs( a, b, aSign );
3799433075b6Spvalchev     }
3800433075b6Spvalchev     else {
3801433075b6Spvalchev         return subFloatx80Sigs( a, b, aSign );
3802433075b6Spvalchev     }
3803433075b6Spvalchev 
3804433075b6Spvalchev }
3805433075b6Spvalchev 
3806433075b6Spvalchev /*
3807433075b6Spvalchev -------------------------------------------------------------------------------
3808433075b6Spvalchev Returns the result of subtracting the extended double-precision floating-
3809433075b6Spvalchev point values `a' and `b'.  The operation is performed according to the
3810433075b6Spvalchev IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3811433075b6Spvalchev -------------------------------------------------------------------------------
3812433075b6Spvalchev */
floatx80_sub(floatx80 a,floatx80 b)3813433075b6Spvalchev floatx80 floatx80_sub( floatx80 a, floatx80 b )
3814433075b6Spvalchev {
3815433075b6Spvalchev     flag aSign, bSign;
3816433075b6Spvalchev 
3817433075b6Spvalchev     aSign = extractFloatx80Sign( a );
3818433075b6Spvalchev     bSign = extractFloatx80Sign( b );
3819433075b6Spvalchev     if ( aSign == bSign ) {
3820433075b6Spvalchev         return subFloatx80Sigs( a, b, aSign );
3821433075b6Spvalchev     }
3822433075b6Spvalchev     else {
3823433075b6Spvalchev         return addFloatx80Sigs( a, b, aSign );
3824433075b6Spvalchev     }
3825433075b6Spvalchev 
3826433075b6Spvalchev }
3827433075b6Spvalchev 
3828433075b6Spvalchev /*
3829433075b6Spvalchev -------------------------------------------------------------------------------
3830433075b6Spvalchev Returns the result of multiplying the extended double-precision floating-
3831433075b6Spvalchev point values `a' and `b'.  The operation is performed according to the
3832433075b6Spvalchev IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3833433075b6Spvalchev -------------------------------------------------------------------------------
3834433075b6Spvalchev */
floatx80_mul(floatx80 a,floatx80 b)3835433075b6Spvalchev floatx80 floatx80_mul( floatx80 a, floatx80 b )
3836433075b6Spvalchev {
3837433075b6Spvalchev     flag aSign, bSign, zSign;
3838433075b6Spvalchev     int32 aExp, bExp, zExp;
3839433075b6Spvalchev     bits64 aSig, bSig, zSig0, zSig1;
3840433075b6Spvalchev     floatx80 z;
3841433075b6Spvalchev 
3842433075b6Spvalchev     aSig = extractFloatx80Frac( a );
3843433075b6Spvalchev     aExp = extractFloatx80Exp( a );
3844433075b6Spvalchev     aSign = extractFloatx80Sign( a );
3845433075b6Spvalchev     bSig = extractFloatx80Frac( b );
3846433075b6Spvalchev     bExp = extractFloatx80Exp( b );
3847433075b6Spvalchev     bSign = extractFloatx80Sign( b );
3848433075b6Spvalchev     zSign = aSign ^ bSign;
3849433075b6Spvalchev     if ( aExp == 0x7FFF ) {
3850433075b6Spvalchev         if (    (bits64) ( aSig<<1 )
3851433075b6Spvalchev              || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
3852433075b6Spvalchev             return propagateFloatx80NaN( a, b );
3853433075b6Spvalchev         }
3854433075b6Spvalchev         if ( ( bExp | bSig ) == 0 ) goto invalid;
3855433075b6Spvalchev         return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3856433075b6Spvalchev     }
3857433075b6Spvalchev     if ( bExp == 0x7FFF ) {
3858433075b6Spvalchev         if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3859433075b6Spvalchev         if ( ( aExp | aSig ) == 0 ) {
3860433075b6Spvalchev  invalid:
3861433075b6Spvalchev             float_raise( float_flag_invalid );
3862433075b6Spvalchev             z.low = floatx80_default_nan_low;
3863433075b6Spvalchev             z.high = floatx80_default_nan_high;
3864433075b6Spvalchev             return z;
3865433075b6Spvalchev         }
3866433075b6Spvalchev         return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3867433075b6Spvalchev     }
3868433075b6Spvalchev     if ( aExp == 0 ) {
3869433075b6Spvalchev         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3870433075b6Spvalchev         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3871433075b6Spvalchev     }
3872433075b6Spvalchev     if ( bExp == 0 ) {
3873433075b6Spvalchev         if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
3874433075b6Spvalchev         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3875433075b6Spvalchev     }
3876433075b6Spvalchev     zExp = aExp + bExp - 0x3FFE;
3877433075b6Spvalchev     mul64To128( aSig, bSig, &zSig0, &zSig1 );
3878433075b6Spvalchev     if ( 0 < (sbits64) zSig0 ) {
3879433075b6Spvalchev         shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
3880433075b6Spvalchev         --zExp;
3881433075b6Spvalchev     }
3882433075b6Spvalchev     return
3883433075b6Spvalchev         roundAndPackFloatx80(
3884433075b6Spvalchev             floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3885433075b6Spvalchev 
3886433075b6Spvalchev }
3887433075b6Spvalchev 
3888433075b6Spvalchev /*
3889433075b6Spvalchev -------------------------------------------------------------------------------
3890433075b6Spvalchev Returns the result of dividing the extended double-precision floating-point
3891433075b6Spvalchev value `a' by the corresponding value `b'.  The operation is performed
3892433075b6Spvalchev according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3893433075b6Spvalchev -------------------------------------------------------------------------------
3894433075b6Spvalchev */
floatx80_div(floatx80 a,floatx80 b)3895433075b6Spvalchev floatx80 floatx80_div( floatx80 a, floatx80 b )
3896433075b6Spvalchev {
3897433075b6Spvalchev     flag aSign, bSign, zSign;
3898433075b6Spvalchev     int32 aExp, bExp, zExp;
3899433075b6Spvalchev     bits64 aSig, bSig, zSig0, zSig1;
3900433075b6Spvalchev     bits64 rem0, rem1, rem2, term0, term1, term2;
3901433075b6Spvalchev     floatx80 z;
3902433075b6Spvalchev 
3903433075b6Spvalchev     aSig = extractFloatx80Frac( a );
3904433075b6Spvalchev     aExp = extractFloatx80Exp( a );
3905433075b6Spvalchev     aSign = extractFloatx80Sign( a );
3906433075b6Spvalchev     bSig = extractFloatx80Frac( b );
3907433075b6Spvalchev     bExp = extractFloatx80Exp( b );
3908433075b6Spvalchev     bSign = extractFloatx80Sign( b );
3909433075b6Spvalchev     zSign = aSign ^ bSign;
3910433075b6Spvalchev     if ( aExp == 0x7FFF ) {
3911433075b6Spvalchev         if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
3912433075b6Spvalchev         if ( bExp == 0x7FFF ) {
3913433075b6Spvalchev             if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3914433075b6Spvalchev             goto invalid;
3915433075b6Spvalchev         }
3916433075b6Spvalchev         return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3917433075b6Spvalchev     }
3918433075b6Spvalchev     if ( bExp == 0x7FFF ) {
3919433075b6Spvalchev         if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3920433075b6Spvalchev         return packFloatx80( zSign, 0, 0 );
3921433075b6Spvalchev     }
3922433075b6Spvalchev     if ( bExp == 0 ) {
3923433075b6Spvalchev         if ( bSig == 0 ) {
3924433075b6Spvalchev             if ( ( aExp | aSig ) == 0 ) {
3925433075b6Spvalchev  invalid:
3926433075b6Spvalchev                 float_raise( float_flag_invalid );
3927433075b6Spvalchev                 z.low = floatx80_default_nan_low;
3928433075b6Spvalchev                 z.high = floatx80_default_nan_high;
3929433075b6Spvalchev                 return z;
3930433075b6Spvalchev             }
3931433075b6Spvalchev             float_raise( float_flag_divbyzero );
3932433075b6Spvalchev             return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3933433075b6Spvalchev         }
3934433075b6Spvalchev         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3935433075b6Spvalchev     }
3936433075b6Spvalchev     if ( aExp == 0 ) {
3937433075b6Spvalchev         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3938433075b6Spvalchev         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3939433075b6Spvalchev     }
3940433075b6Spvalchev     zExp = aExp - bExp + 0x3FFE;
3941433075b6Spvalchev     rem1 = 0;
3942433075b6Spvalchev     if ( bSig <= aSig ) {
3943433075b6Spvalchev         shift128Right( aSig, 0, 1, &aSig, &rem1 );
3944433075b6Spvalchev         ++zExp;
3945433075b6Spvalchev     }
3946433075b6Spvalchev     zSig0 = estimateDiv128To64( aSig, rem1, bSig );
3947433075b6Spvalchev     mul64To128( bSig, zSig0, &term0, &term1 );
3948433075b6Spvalchev     sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
3949433075b6Spvalchev     while ( (sbits64) rem0 < 0 ) {
3950433075b6Spvalchev         --zSig0;
3951433075b6Spvalchev         add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
3952433075b6Spvalchev     }
3953433075b6Spvalchev     zSig1 = estimateDiv128To64( rem1, 0, bSig );
3954433075b6Spvalchev     if ( (bits64) ( zSig1<<1 ) <= 8 ) {
3955433075b6Spvalchev         mul64To128( bSig, zSig1, &term1, &term2 );
3956433075b6Spvalchev         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
3957433075b6Spvalchev         while ( (sbits64) rem1 < 0 ) {
3958433075b6Spvalchev             --zSig1;
3959433075b6Spvalchev             add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
3960433075b6Spvalchev         }
3961433075b6Spvalchev         zSig1 |= ( ( rem1 | rem2 ) != 0 );
3962433075b6Spvalchev     }
3963433075b6Spvalchev     return
3964433075b6Spvalchev         roundAndPackFloatx80(
3965433075b6Spvalchev             floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3966433075b6Spvalchev 
3967433075b6Spvalchev }
3968433075b6Spvalchev 
3969433075b6Spvalchev /*
3970433075b6Spvalchev -------------------------------------------------------------------------------
3971433075b6Spvalchev Returns the remainder of the extended double-precision floating-point value
3972433075b6Spvalchev `a' with respect to the corresponding value `b'.  The operation is performed
3973433075b6Spvalchev according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3974433075b6Spvalchev -------------------------------------------------------------------------------
3975433075b6Spvalchev */
floatx80_rem(floatx80 a,floatx80 b)3976433075b6Spvalchev floatx80 floatx80_rem( floatx80 a, floatx80 b )
3977433075b6Spvalchev {
3978433075b6Spvalchev     flag aSign, bSign, zSign;
3979433075b6Spvalchev     int32 aExp, bExp, expDiff;
3980433075b6Spvalchev     bits64 aSig0, aSig1, bSig;
3981433075b6Spvalchev     bits64 q, term0, term1, alternateASig0, alternateASig1;
3982433075b6Spvalchev     floatx80 z;
3983433075b6Spvalchev 
3984433075b6Spvalchev     aSig0 = extractFloatx80Frac( a );
3985433075b6Spvalchev     aExp = extractFloatx80Exp( a );
3986433075b6Spvalchev     aSign = extractFloatx80Sign( a );
3987433075b6Spvalchev     bSig = extractFloatx80Frac( b );
3988433075b6Spvalchev     bExp = extractFloatx80Exp( b );
3989433075b6Spvalchev     bSign = extractFloatx80Sign( b );
3990433075b6Spvalchev     if ( aExp == 0x7FFF ) {
3991433075b6Spvalchev         if (    (bits64) ( aSig0<<1 )
3992433075b6Spvalchev              || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
3993433075b6Spvalchev             return propagateFloatx80NaN( a, b );
3994433075b6Spvalchev         }
3995433075b6Spvalchev         goto invalid;
3996433075b6Spvalchev     }
3997433075b6Spvalchev     if ( bExp == 0x7FFF ) {
3998433075b6Spvalchev         if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3999433075b6Spvalchev         return a;
4000433075b6Spvalchev     }
4001433075b6Spvalchev     if ( bExp == 0 ) {
4002433075b6Spvalchev         if ( bSig == 0 ) {
4003433075b6Spvalchev  invalid:
4004433075b6Spvalchev             float_raise( float_flag_invalid );
4005433075b6Spvalchev             z.low = floatx80_default_nan_low;
4006433075b6Spvalchev             z.high = floatx80_default_nan_high;
4007433075b6Spvalchev             return z;
4008433075b6Spvalchev         }
4009433075b6Spvalchev         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4010433075b6Spvalchev     }
4011433075b6Spvalchev     if ( aExp == 0 ) {
4012433075b6Spvalchev         if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
4013433075b6Spvalchev         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
4014433075b6Spvalchev     }
4015433075b6Spvalchev     bSig |= LIT64( 0x8000000000000000 );
4016433075b6Spvalchev     zSign = aSign;
4017433075b6Spvalchev     expDiff = aExp - bExp;
4018433075b6Spvalchev     aSig1 = 0;
4019433075b6Spvalchev     if ( expDiff < 0 ) {
4020433075b6Spvalchev         if ( expDiff < -1 ) return a;
4021433075b6Spvalchev         shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
4022433075b6Spvalchev         expDiff = 0;
4023433075b6Spvalchev     }
4024433075b6Spvalchev     q = ( bSig <= aSig0 );
4025433075b6Spvalchev     if ( q ) aSig0 -= bSig;
4026433075b6Spvalchev     expDiff -= 64;
4027433075b6Spvalchev     while ( 0 < expDiff ) {
4028433075b6Spvalchev         q = estimateDiv128To64( aSig0, aSig1, bSig );
4029433075b6Spvalchev         q = ( 2 < q ) ? q - 2 : 0;
4030433075b6Spvalchev         mul64To128( bSig, q, &term0, &term1 );
4031433075b6Spvalchev         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4032433075b6Spvalchev         shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
4033433075b6Spvalchev         expDiff -= 62;
4034433075b6Spvalchev     }
4035433075b6Spvalchev     expDiff += 64;
4036433075b6Spvalchev     if ( 0 < expDiff ) {
4037433075b6Spvalchev         q = estimateDiv128To64( aSig0, aSig1, bSig );
4038433075b6Spvalchev         q = ( 2 < q ) ? q - 2 : 0;
4039433075b6Spvalchev         q >>= 64 - expDiff;
4040433075b6Spvalchev         mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
4041433075b6Spvalchev         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4042433075b6Spvalchev         shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
4043433075b6Spvalchev         while ( le128( term0, term1, aSig0, aSig1 ) ) {
4044433075b6Spvalchev             ++q;
4045433075b6Spvalchev             sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4046433075b6Spvalchev         }
4047433075b6Spvalchev     }
4048433075b6Spvalchev     else {
4049433075b6Spvalchev         term1 = 0;
4050433075b6Spvalchev         term0 = bSig;
4051433075b6Spvalchev     }
4052433075b6Spvalchev     sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
4053433075b6Spvalchev     if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
4054433075b6Spvalchev          || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
4055433075b6Spvalchev               && ( q & 1 ) )
4056433075b6Spvalchev        ) {
4057433075b6Spvalchev         aSig0 = alternateASig0;
4058433075b6Spvalchev         aSig1 = alternateASig1;
4059433075b6Spvalchev         zSign = ! zSign;
4060433075b6Spvalchev     }
4061433075b6Spvalchev     return
4062433075b6Spvalchev         normalizeRoundAndPackFloatx80(
4063433075b6Spvalchev             80, zSign, bExp + expDiff, aSig0, aSig1 );
4064433075b6Spvalchev 
4065433075b6Spvalchev }
4066433075b6Spvalchev 
4067433075b6Spvalchev /*
4068433075b6Spvalchev -------------------------------------------------------------------------------
4069433075b6Spvalchev Returns the square root of the extended double-precision floating-point
4070433075b6Spvalchev value `a'.  The operation is performed according to the IEC/IEEE Standard
4071433075b6Spvalchev for Binary Floating-Point Arithmetic.
4072433075b6Spvalchev -------------------------------------------------------------------------------
4073433075b6Spvalchev */
floatx80_sqrt(floatx80 a)4074433075b6Spvalchev floatx80 floatx80_sqrt( floatx80 a )
4075433075b6Spvalchev {
4076433075b6Spvalchev     flag aSign;
4077433075b6Spvalchev     int32 aExp, zExp;
4078433075b6Spvalchev     bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
4079433075b6Spvalchev     bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
4080433075b6Spvalchev     floatx80 z;
4081433075b6Spvalchev 
4082433075b6Spvalchev     aSig0 = extractFloatx80Frac( a );
4083433075b6Spvalchev     aExp = extractFloatx80Exp( a );
4084433075b6Spvalchev     aSign = extractFloatx80Sign( a );
4085433075b6Spvalchev     if ( aExp == 0x7FFF ) {
4086433075b6Spvalchev         if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
4087433075b6Spvalchev         if ( ! aSign ) return a;
4088433075b6Spvalchev         goto invalid;
4089433075b6Spvalchev     }
4090433075b6Spvalchev     if ( aSign ) {
4091433075b6Spvalchev         if ( ( aExp | aSig0 ) == 0 ) return a;
4092433075b6Spvalchev  invalid:
4093433075b6Spvalchev         float_raise( float_flag_invalid );
4094433075b6Spvalchev         z.low = floatx80_default_nan_low;
4095433075b6Spvalchev         z.high = floatx80_default_nan_high;
4096433075b6Spvalchev         return z;
4097433075b6Spvalchev     }
4098433075b6Spvalchev     if ( aExp == 0 ) {
4099433075b6Spvalchev         if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
4100433075b6Spvalchev         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
4101433075b6Spvalchev     }
4102433075b6Spvalchev     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
4103433075b6Spvalchev     zSig0 = estimateSqrt32( aExp, aSig0>>32 );
4104433075b6Spvalchev     shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
4105433075b6Spvalchev     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
4106433075b6Spvalchev     doubleZSig0 = zSig0<<1;
4107433075b6Spvalchev     mul64To128( zSig0, zSig0, &term0, &term1 );
4108433075b6Spvalchev     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
4109433075b6Spvalchev     while ( (sbits64) rem0 < 0 ) {
4110433075b6Spvalchev         --zSig0;
4111433075b6Spvalchev         doubleZSig0 -= 2;
4112433075b6Spvalchev         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
4113433075b6Spvalchev     }
4114433075b6Spvalchev     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
4115433075b6Spvalchev     if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
4116433075b6Spvalchev         if ( zSig1 == 0 ) zSig1 = 1;
4117433075b6Spvalchev         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
4118433075b6Spvalchev         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
4119433075b6Spvalchev         mul64To128( zSig1, zSig1, &term2, &term3 );
4120433075b6Spvalchev         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
4121433075b6Spvalchev         while ( (sbits64) rem1 < 0 ) {
4122433075b6Spvalchev             --zSig1;
4123433075b6Spvalchev             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
4124433075b6Spvalchev             term3 |= 1;
4125433075b6Spvalchev             term2 |= doubleZSig0;
4126433075b6Spvalchev             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
4127433075b6Spvalchev         }
4128433075b6Spvalchev         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
4129433075b6Spvalchev     }
4130433075b6Spvalchev     shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
4131433075b6Spvalchev     zSig0 |= doubleZSig0;
4132433075b6Spvalchev     return
4133433075b6Spvalchev         roundAndPackFloatx80(
4134433075b6Spvalchev             floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
4135433075b6Spvalchev 
4136433075b6Spvalchev }
4137433075b6Spvalchev 
4138433075b6Spvalchev /*
4139433075b6Spvalchev -------------------------------------------------------------------------------
4140433075b6Spvalchev Returns 1 if the extended double-precision floating-point value `a' is
4141433075b6Spvalchev equal to the corresponding value `b', and 0 otherwise.  The comparison is
4142433075b6Spvalchev performed according to the IEC/IEEE Standard for Binary Floating-Point
4143433075b6Spvalchev Arithmetic.
4144433075b6Spvalchev -------------------------------------------------------------------------------
4145433075b6Spvalchev */
floatx80_eq(floatx80 a,floatx80 b)4146433075b6Spvalchev flag floatx80_eq( floatx80 a, floatx80 b )
4147433075b6Spvalchev {
4148433075b6Spvalchev 
4149433075b6Spvalchev     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4150433075b6Spvalchev               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4151433075b6Spvalchev          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4152433075b6Spvalchev               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4153433075b6Spvalchev        ) {
4154433075b6Spvalchev         if (    floatx80_is_signaling_nan( a )
4155433075b6Spvalchev              || floatx80_is_signaling_nan( b ) ) {
4156433075b6Spvalchev             float_raise( float_flag_invalid );
4157433075b6Spvalchev         }
4158433075b6Spvalchev         return 0;
4159433075b6Spvalchev     }
4160433075b6Spvalchev     return
4161433075b6Spvalchev            ( a.low == b.low )
4162433075b6Spvalchev         && (    ( a.high == b.high )
4163433075b6Spvalchev              || (    ( a.low == 0 )
4164433075b6Spvalchev                   && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
4165433075b6Spvalchev            );
4166433075b6Spvalchev 
4167433075b6Spvalchev }
4168433075b6Spvalchev 
4169433075b6Spvalchev /*
4170433075b6Spvalchev -------------------------------------------------------------------------------
4171433075b6Spvalchev Returns 1 if the extended double-precision floating-point value `a' is
4172433075b6Spvalchev less than or equal to the corresponding value `b', and 0 otherwise.  The
4173433075b6Spvalchev comparison is performed according to the IEC/IEEE Standard for Binary
4174433075b6Spvalchev Floating-Point Arithmetic.
4175433075b6Spvalchev -------------------------------------------------------------------------------
4176433075b6Spvalchev */
floatx80_le(floatx80 a,floatx80 b)4177433075b6Spvalchev flag floatx80_le( floatx80 a, floatx80 b )
4178433075b6Spvalchev {
4179433075b6Spvalchev     flag aSign, bSign;
4180433075b6Spvalchev 
4181433075b6Spvalchev     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4182433075b6Spvalchev               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4183433075b6Spvalchev          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4184433075b6Spvalchev               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4185433075b6Spvalchev        ) {
4186433075b6Spvalchev         float_raise( float_flag_invalid );
4187433075b6Spvalchev         return 0;
4188433075b6Spvalchev     }
4189433075b6Spvalchev     aSign = extractFloatx80Sign( a );
4190433075b6Spvalchev     bSign = extractFloatx80Sign( b );
4191433075b6Spvalchev     if ( aSign != bSign ) {
4192433075b6Spvalchev         return
4193433075b6Spvalchev                aSign
4194433075b6Spvalchev             || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4195433075b6Spvalchev                  == 0 );
4196433075b6Spvalchev     }
4197433075b6Spvalchev     return
4198433075b6Spvalchev           aSign ? le128( b.high, b.low, a.high, a.low )
4199433075b6Spvalchev         : le128( a.high, a.low, b.high, b.low );
4200433075b6Spvalchev 
4201433075b6Spvalchev }
4202433075b6Spvalchev 
4203433075b6Spvalchev /*
4204433075b6Spvalchev -------------------------------------------------------------------------------
4205433075b6Spvalchev Returns 1 if the extended double-precision floating-point value `a' is
4206433075b6Spvalchev less than the corresponding value `b', and 0 otherwise.  The comparison
4207433075b6Spvalchev is performed according to the IEC/IEEE Standard for Binary Floating-Point
4208433075b6Spvalchev Arithmetic.
4209433075b6Spvalchev -------------------------------------------------------------------------------
4210433075b6Spvalchev */
floatx80_lt(floatx80 a,floatx80 b)4211433075b6Spvalchev flag floatx80_lt( floatx80 a, floatx80 b )
4212433075b6Spvalchev {
4213433075b6Spvalchev     flag aSign, bSign;
4214433075b6Spvalchev 
4215433075b6Spvalchev     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4216433075b6Spvalchev               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4217433075b6Spvalchev          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4218433075b6Spvalchev               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4219433075b6Spvalchev        ) {
4220433075b6Spvalchev         float_raise( float_flag_invalid );
4221433075b6Spvalchev         return 0;
4222433075b6Spvalchev     }
4223433075b6Spvalchev     aSign = extractFloatx80Sign( a );
4224433075b6Spvalchev     bSign = extractFloatx80Sign( b );
4225433075b6Spvalchev     if ( aSign != bSign ) {
4226433075b6Spvalchev         return
4227433075b6Spvalchev                aSign
4228433075b6Spvalchev             && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4229433075b6Spvalchev                  != 0 );
4230433075b6Spvalchev     }
4231433075b6Spvalchev     return
4232433075b6Spvalchev           aSign ? lt128( b.high, b.low, a.high, a.low )
4233433075b6Spvalchev         : lt128( a.high, a.low, b.high, b.low );
4234433075b6Spvalchev 
4235433075b6Spvalchev }
4236433075b6Spvalchev 
4237433075b6Spvalchev /*
4238433075b6Spvalchev -------------------------------------------------------------------------------
4239433075b6Spvalchev Returns 1 if the extended double-precision floating-point value `a' is equal
4240433075b6Spvalchev to the corresponding value `b', and 0 otherwise.  The invalid exception is
4241433075b6Spvalchev raised if either operand is a NaN.  Otherwise, the comparison is performed
4242433075b6Spvalchev according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4243433075b6Spvalchev -------------------------------------------------------------------------------
4244433075b6Spvalchev */
floatx80_eq_signaling(floatx80 a,floatx80 b)4245433075b6Spvalchev flag floatx80_eq_signaling( floatx80 a, floatx80 b )
4246433075b6Spvalchev {
4247433075b6Spvalchev 
4248433075b6Spvalchev     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4249433075b6Spvalchev               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4250433075b6Spvalchev          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4251433075b6Spvalchev               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4252433075b6Spvalchev        ) {
4253433075b6Spvalchev         float_raise( float_flag_invalid );
4254433075b6Spvalchev         return 0;
4255433075b6Spvalchev     }
4256433075b6Spvalchev     return
4257433075b6Spvalchev            ( a.low == b.low )
4258433075b6Spvalchev         && (    ( a.high == b.high )
4259433075b6Spvalchev              || (    ( a.low == 0 )
4260433075b6Spvalchev                   && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
4261433075b6Spvalchev            );
4262433075b6Spvalchev 
4263433075b6Spvalchev }
4264433075b6Spvalchev 
4265433075b6Spvalchev /*
4266433075b6Spvalchev -------------------------------------------------------------------------------
4267433075b6Spvalchev Returns 1 if the extended double-precision floating-point value `a' is less
4268433075b6Spvalchev than or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
4269433075b6Spvalchev do not cause an exception.  Otherwise, the comparison is performed according
4270433075b6Spvalchev to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4271433075b6Spvalchev -------------------------------------------------------------------------------
4272433075b6Spvalchev */
floatx80_le_quiet(floatx80 a,floatx80 b)4273433075b6Spvalchev flag floatx80_le_quiet( floatx80 a, floatx80 b )
4274433075b6Spvalchev {
4275433075b6Spvalchev     flag aSign, bSign;
4276433075b6Spvalchev 
4277433075b6Spvalchev     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4278433075b6Spvalchev               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4279433075b6Spvalchev          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4280433075b6Spvalchev               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4281433075b6Spvalchev        ) {
4282433075b6Spvalchev         if (    floatx80_is_signaling_nan( a )
4283433075b6Spvalchev              || floatx80_is_signaling_nan( b ) ) {
4284433075b6Spvalchev             float_raise( float_flag_invalid );
4285433075b6Spvalchev         }
4286433075b6Spvalchev         return 0;
4287433075b6Spvalchev     }
4288433075b6Spvalchev     aSign = extractFloatx80Sign( a );
4289433075b6Spvalchev     bSign = extractFloatx80Sign( b );
4290433075b6Spvalchev     if ( aSign != bSign ) {
4291433075b6Spvalchev         return
4292433075b6Spvalchev                aSign
4293433075b6Spvalchev             || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4294433075b6Spvalchev                  == 0 );
4295433075b6Spvalchev     }
4296433075b6Spvalchev     return
4297433075b6Spvalchev           aSign ? le128( b.high, b.low, a.high, a.low )
4298433075b6Spvalchev         : le128( a.high, a.low, b.high, b.low );
4299433075b6Spvalchev 
4300433075b6Spvalchev }
4301433075b6Spvalchev 
4302433075b6Spvalchev /*
4303433075b6Spvalchev -------------------------------------------------------------------------------
4304433075b6Spvalchev Returns 1 if the extended double-precision floating-point value `a' is less
4305433075b6Spvalchev than the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
4306433075b6Spvalchev an exception.  Otherwise, the comparison is performed according to the
4307433075b6Spvalchev IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4308433075b6Spvalchev -------------------------------------------------------------------------------
4309433075b6Spvalchev */
floatx80_lt_quiet(floatx80 a,floatx80 b)4310433075b6Spvalchev flag floatx80_lt_quiet( floatx80 a, floatx80 b )
4311433075b6Spvalchev {
4312433075b6Spvalchev     flag aSign, bSign;
4313433075b6Spvalchev 
4314433075b6Spvalchev     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4315433075b6Spvalchev               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4316433075b6Spvalchev          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4317433075b6Spvalchev               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4318433075b6Spvalchev        ) {
4319433075b6Spvalchev         if (    floatx80_is_signaling_nan( a )
4320433075b6Spvalchev              || floatx80_is_signaling_nan( b ) ) {
4321433075b6Spvalchev             float_raise( float_flag_invalid );
4322433075b6Spvalchev         }
4323433075b6Spvalchev         return 0;
4324433075b6Spvalchev     }
4325433075b6Spvalchev     aSign = extractFloatx80Sign( a );
4326433075b6Spvalchev     bSign = extractFloatx80Sign( b );
4327433075b6Spvalchev     if ( aSign != bSign ) {
4328433075b6Spvalchev         return
4329433075b6Spvalchev                aSign
4330433075b6Spvalchev             && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4331433075b6Spvalchev                  != 0 );
4332433075b6Spvalchev     }
4333433075b6Spvalchev     return
4334433075b6Spvalchev           aSign ? lt128( b.high, b.low, a.high, a.low )
4335433075b6Spvalchev         : lt128( a.high, a.low, b.high, b.low );
4336433075b6Spvalchev 
4337433075b6Spvalchev }
4338433075b6Spvalchev 
4339433075b6Spvalchev #endif
4340433075b6Spvalchev 
4341433075b6Spvalchev #ifdef FLOAT128
4342433075b6Spvalchev 
4343433075b6Spvalchev /*
4344433075b6Spvalchev -------------------------------------------------------------------------------
4345433075b6Spvalchev Returns the result of converting the quadruple-precision floating-point
4346433075b6Spvalchev value `a' to the 32-bit two's complement integer format.  The conversion
4347433075b6Spvalchev is performed according to the IEC/IEEE Standard for Binary Floating-Point
4348433075b6Spvalchev Arithmetic---which means in particular that the conversion is rounded
4349433075b6Spvalchev according to the current rounding mode.  If `a' is a NaN, the largest
4350433075b6Spvalchev positive integer is returned.  Otherwise, if the conversion overflows, the
4351433075b6Spvalchev largest integer with the same sign as `a' is returned.
4352433075b6Spvalchev -------------------------------------------------------------------------------
4353433075b6Spvalchev */
float128_to_int32(float128 a)4354433075b6Spvalchev int32 float128_to_int32( float128 a )
4355433075b6Spvalchev {
4356433075b6Spvalchev     flag aSign;
4357433075b6Spvalchev     int32 aExp, shiftCount;
4358433075b6Spvalchev     bits64 aSig0, aSig1;
4359433075b6Spvalchev 
4360433075b6Spvalchev     aSig1 = extractFloat128Frac1( a );
4361433075b6Spvalchev     aSig0 = extractFloat128Frac0( a );
4362433075b6Spvalchev     aExp = extractFloat128Exp( a );
4363433075b6Spvalchev     aSign = extractFloat128Sign( a );
4364433075b6Spvalchev     if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
4365433075b6Spvalchev     if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4366433075b6Spvalchev     aSig0 |= ( aSig1 != 0 );
4367433075b6Spvalchev     shiftCount = 0x4028 - aExp;
4368433075b6Spvalchev     if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
4369433075b6Spvalchev     return roundAndPackInt32( aSign, aSig0 );
4370433075b6Spvalchev 
4371433075b6Spvalchev }
4372433075b6Spvalchev 
4373433075b6Spvalchev /*
4374433075b6Spvalchev -------------------------------------------------------------------------------
4375433075b6Spvalchev Returns the result of converting the quadruple-precision floating-point
4376433075b6Spvalchev value `a' to the 32-bit two's complement integer format.  The conversion
4377433075b6Spvalchev is performed according to the IEC/IEEE Standard for Binary Floating-Point
4378433075b6Spvalchev Arithmetic, except that the conversion is always rounded toward zero.  If
4379433075b6Spvalchev `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
4380433075b6Spvalchev conversion overflows, the largest integer with the same sign as `a' is
4381433075b6Spvalchev returned.
4382433075b6Spvalchev -------------------------------------------------------------------------------
4383433075b6Spvalchev */
float128_to_int32_round_to_zero(float128 a)4384433075b6Spvalchev int32 float128_to_int32_round_to_zero( float128 a )
4385433075b6Spvalchev {
4386433075b6Spvalchev     flag aSign;
4387433075b6Spvalchev     int32 aExp, shiftCount;
4388433075b6Spvalchev     bits64 aSig0, aSig1, savedASig;
4389433075b6Spvalchev     int32 z;
4390433075b6Spvalchev 
4391433075b6Spvalchev     aSig1 = extractFloat128Frac1( a );
4392433075b6Spvalchev     aSig0 = extractFloat128Frac0( a );
4393433075b6Spvalchev     aExp = extractFloat128Exp( a );
4394433075b6Spvalchev     aSign = extractFloat128Sign( a );
4395433075b6Spvalchev     aSig0 |= ( aSig1 != 0 );
4396433075b6Spvalchev     if ( 0x401E < aExp ) {
4397433075b6Spvalchev         if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
4398433075b6Spvalchev         goto invalid;
4399433075b6Spvalchev     }
4400433075b6Spvalchev     else if ( aExp < 0x3FFF ) {
4401433075b6Spvalchev         if ( aExp || aSig0 ) float_set_inexact();
4402433075b6Spvalchev         return 0;
4403433075b6Spvalchev     }
4404433075b6Spvalchev     aSig0 |= LIT64( 0x0001000000000000 );
4405433075b6Spvalchev     shiftCount = 0x402F - aExp;
4406433075b6Spvalchev     savedASig = aSig0;
4407433075b6Spvalchev     aSig0 >>= shiftCount;
4408433075b6Spvalchev     z = aSig0;
4409433075b6Spvalchev     if ( aSign ) z = - z;
4410433075b6Spvalchev     if ( ( z < 0 ) ^ aSign ) {
4411433075b6Spvalchev  invalid:
4412433075b6Spvalchev         float_raise( float_flag_invalid );
4413433075b6Spvalchev         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
4414433075b6Spvalchev     }
4415433075b6Spvalchev     if ( ( aSig0<<shiftCount ) != savedASig ) {
4416433075b6Spvalchev         float_set_inexact();
4417433075b6Spvalchev     }
4418433075b6Spvalchev     return z;
4419433075b6Spvalchev 
4420433075b6Spvalchev }
4421433075b6Spvalchev 
4422433075b6Spvalchev /*
4423433075b6Spvalchev -------------------------------------------------------------------------------
4424433075b6Spvalchev Returns the result of converting the quadruple-precision floating-point
4425433075b6Spvalchev value `a' to the 64-bit two's complement integer format.  The conversion
4426433075b6Spvalchev is performed according to the IEC/IEEE Standard for Binary Floating-Point
4427433075b6Spvalchev Arithmetic---which means in particular that the conversion is rounded
4428433075b6Spvalchev according to the current rounding mode.  If `a' is a NaN, the largest
4429433075b6Spvalchev positive integer is returned.  Otherwise, if the conversion overflows, the
4430433075b6Spvalchev largest integer with the same sign as `a' is returned.
4431433075b6Spvalchev -------------------------------------------------------------------------------
4432433075b6Spvalchev */
float128_to_int64(float128 a)4433433075b6Spvalchev int64 float128_to_int64( float128 a )
4434433075b6Spvalchev {
4435433075b6Spvalchev     flag aSign;
4436433075b6Spvalchev     int32 aExp, shiftCount;
4437433075b6Spvalchev     bits64 aSig0, aSig1;
4438433075b6Spvalchev 
4439433075b6Spvalchev     aSig1 = extractFloat128Frac1( a );
4440433075b6Spvalchev     aSig0 = extractFloat128Frac0( a );
4441433075b6Spvalchev     aExp = extractFloat128Exp( a );
4442433075b6Spvalchev     aSign = extractFloat128Sign( a );
4443433075b6Spvalchev     if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4444433075b6Spvalchev     shiftCount = 0x402F - aExp;
4445433075b6Spvalchev     if ( shiftCount <= 0 ) {
4446433075b6Spvalchev         if ( 0x403E < aExp ) {
4447433075b6Spvalchev             float_raise( float_flag_invalid );
4448433075b6Spvalchev             if (    ! aSign
4449433075b6Spvalchev                  || (    ( aExp == 0x7FFF )
4450433075b6Spvalchev                       && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
4451433075b6Spvalchev                     )
4452433075b6Spvalchev                ) {
4453433075b6Spvalchev                 return LIT64( 0x7FFFFFFFFFFFFFFF );
4454433075b6Spvalchev             }
4455433075b6Spvalchev             return (sbits64) LIT64( 0x8000000000000000 );
4456433075b6Spvalchev         }
4457433075b6Spvalchev         shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
4458433075b6Spvalchev     }
4459433075b6Spvalchev     else {
4460433075b6Spvalchev         shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
4461433075b6Spvalchev     }
4462433075b6Spvalchev     return roundAndPackInt64( aSign, aSig0, aSig1 );
4463433075b6Spvalchev 
4464433075b6Spvalchev }
4465433075b6Spvalchev 
4466433075b6Spvalchev /*
4467433075b6Spvalchev -------------------------------------------------------------------------------
4468433075b6Spvalchev Returns the result of converting the quadruple-precision floating-point
4469433075b6Spvalchev value `a' to the 64-bit two's complement integer format.  The conversion
4470433075b6Spvalchev is performed according to the IEC/IEEE Standard for Binary Floating-Point
4471433075b6Spvalchev Arithmetic, except that the conversion is always rounded toward zero.
4472433075b6Spvalchev If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
4473433075b6Spvalchev the conversion overflows, the largest integer with the same sign as `a' is
4474433075b6Spvalchev returned.
4475433075b6Spvalchev -------------------------------------------------------------------------------
4476433075b6Spvalchev */
float128_to_int64_round_to_zero(float128 a)4477433075b6Spvalchev int64 float128_to_int64_round_to_zero( float128 a )
4478433075b6Spvalchev {
4479433075b6Spvalchev     flag aSign;
4480433075b6Spvalchev     int32 aExp, shiftCount;
4481433075b6Spvalchev     bits64 aSig0, aSig1;
4482433075b6Spvalchev     int64 z;
4483433075b6Spvalchev 
4484433075b6Spvalchev     aSig1 = extractFloat128Frac1( a );
4485433075b6Spvalchev     aSig0 = extractFloat128Frac0( a );
4486433075b6Spvalchev     aExp = extractFloat128Exp( a );
4487433075b6Spvalchev     aSign = extractFloat128Sign( a );
4488433075b6Spvalchev     if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4489433075b6Spvalchev     shiftCount = aExp - 0x402F;
4490433075b6Spvalchev     if ( 0 < shiftCount ) {
4491433075b6Spvalchev         if ( 0x403E <= aExp ) {
4492433075b6Spvalchev             aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
4493433075b6Spvalchev             if (    ( a.high == LIT64( 0xC03E000000000000 ) )
4494433075b6Spvalchev                  && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
4495433075b6Spvalchev                 if ( aSig1 ) float_set_inexact();
4496433075b6Spvalchev             }
4497433075b6Spvalchev             else {
4498433075b6Spvalchev                 float_raise( float_flag_invalid );
4499433075b6Spvalchev                 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
4500433075b6Spvalchev                     return LIT64( 0x7FFFFFFFFFFFFFFF );
4501433075b6Spvalchev                 }
4502433075b6Spvalchev             }
4503433075b6Spvalchev             return (sbits64) LIT64( 0x8000000000000000 );
4504433075b6Spvalchev         }
4505433075b6Spvalchev         z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
4506433075b6Spvalchev         if ( (bits64) ( aSig1<<shiftCount ) ) {
4507433075b6Spvalchev             float_set_inexact();
4508433075b6Spvalchev         }
4509433075b6Spvalchev     }
4510433075b6Spvalchev     else {
4511433075b6Spvalchev         if ( aExp < 0x3FFF ) {
4512433075b6Spvalchev             if ( aExp | aSig0 | aSig1 ) {
4513433075b6Spvalchev                 float_set_inexact();
4514433075b6Spvalchev             }
4515433075b6Spvalchev             return 0;
4516433075b6Spvalchev         }
4517433075b6Spvalchev         z = aSig0>>( - shiftCount );
4518433075b6Spvalchev         if (    aSig1
4519433075b6Spvalchev              || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
4520433075b6Spvalchev             float_set_inexact();
4521433075b6Spvalchev         }
4522433075b6Spvalchev     }
4523433075b6Spvalchev     if ( aSign ) z = - z;
4524433075b6Spvalchev     return z;
4525433075b6Spvalchev 
4526433075b6Spvalchev }
4527433075b6Spvalchev 
4528433075b6Spvalchev /*
4529433075b6Spvalchev -------------------------------------------------------------------------------
4530433075b6Spvalchev Returns the result of converting the quadruple-precision floating-point
4531433075b6Spvalchev value `a' to the single-precision floating-point format.  The conversion
4532433075b6Spvalchev is performed according to the IEC/IEEE Standard for Binary Floating-Point
4533433075b6Spvalchev Arithmetic.
4534433075b6Spvalchev -------------------------------------------------------------------------------
4535433075b6Spvalchev */
float128_to_float32(float128 a)4536433075b6Spvalchev float32 float128_to_float32( float128 a )
4537433075b6Spvalchev {
4538433075b6Spvalchev     flag aSign;
4539433075b6Spvalchev     int32 aExp;
4540433075b6Spvalchev     bits64 aSig0, aSig1;
4541433075b6Spvalchev     bits32 zSig;
4542433075b6Spvalchev 
4543433075b6Spvalchev     aSig1 = extractFloat128Frac1( a );
4544433075b6Spvalchev     aSig0 = extractFloat128Frac0( a );
4545433075b6Spvalchev     aExp = extractFloat128Exp( a );
4546433075b6Spvalchev     aSign = extractFloat128Sign( a );
4547433075b6Spvalchev     if ( aExp == 0x7FFF ) {
4548433075b6Spvalchev         if ( aSig0 | aSig1 ) {
4549433075b6Spvalchev             return commonNaNToFloat32( float128ToCommonNaN( a ) );
4550433075b6Spvalchev         }
4551433075b6Spvalchev         return packFloat32( aSign, 0xFF, 0 );
4552433075b6Spvalchev     }
4553433075b6Spvalchev     aSig0 |= ( aSig1 != 0 );
4554433075b6Spvalchev     shift64RightJamming( aSig0, 18, &aSig0 );
4555433075b6Spvalchev     zSig = aSig0;
4556433075b6Spvalchev     if ( aExp || zSig ) {
4557433075b6Spvalchev         zSig |= 0x40000000;
4558433075b6Spvalchev         aExp -= 0x3F81;
4559433075b6Spvalchev     }
4560433075b6Spvalchev     return roundAndPackFloat32( aSign, aExp, zSig );
4561433075b6Spvalchev 
4562433075b6Spvalchev }
4563433075b6Spvalchev 
4564433075b6Spvalchev /*
4565433075b6Spvalchev -------------------------------------------------------------------------------
4566433075b6Spvalchev Returns the result of converting the quadruple-precision floating-point
4567433075b6Spvalchev value `a' to the double-precision floating-point format.  The conversion
4568433075b6Spvalchev is performed according to the IEC/IEEE Standard for Binary Floating-Point
4569433075b6Spvalchev Arithmetic.
4570433075b6Spvalchev -------------------------------------------------------------------------------
4571433075b6Spvalchev */
float128_to_float64(float128 a)4572433075b6Spvalchev float64 float128_to_float64( float128 a )
4573433075b6Spvalchev {
4574433075b6Spvalchev     flag aSign;
4575433075b6Spvalchev     int32 aExp;
4576433075b6Spvalchev     bits64 aSig0, aSig1;
4577433075b6Spvalchev 
4578433075b6Spvalchev     aSig1 = extractFloat128Frac1( a );
4579433075b6Spvalchev     aSig0 = extractFloat128Frac0( a );
4580433075b6Spvalchev     aExp = extractFloat128Exp( a );
4581433075b6Spvalchev     aSign = extractFloat128Sign( a );
4582433075b6Spvalchev     if ( aExp == 0x7FFF ) {
4583433075b6Spvalchev         if ( aSig0 | aSig1 ) {
4584433075b6Spvalchev             return commonNaNToFloat64( float128ToCommonNaN( a ) );
4585433075b6Spvalchev         }
4586433075b6Spvalchev         return packFloat64( aSign, 0x7FF, 0 );
4587433075b6Spvalchev     }
4588433075b6Spvalchev     shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
4589433075b6Spvalchev     aSig0 |= ( aSig1 != 0 );
4590433075b6Spvalchev     if ( aExp || aSig0 ) {
4591433075b6Spvalchev         aSig0 |= LIT64( 0x4000000000000000 );
4592433075b6Spvalchev         aExp -= 0x3C01;
4593433075b6Spvalchev     }
4594433075b6Spvalchev     return roundAndPackFloat64( aSign, aExp, aSig0 );
4595433075b6Spvalchev 
4596433075b6Spvalchev }
4597433075b6Spvalchev 
4598433075b6Spvalchev #ifdef FLOATX80
4599433075b6Spvalchev 
4600433075b6Spvalchev /*
4601433075b6Spvalchev -------------------------------------------------------------------------------
4602433075b6Spvalchev Returns the result of converting the quadruple-precision floating-point
4603433075b6Spvalchev value `a' to the extended double-precision floating-point format.  The
4604433075b6Spvalchev conversion is performed according to the IEC/IEEE Standard for Binary
4605433075b6Spvalchev Floating-Point Arithmetic.
4606433075b6Spvalchev -------------------------------------------------------------------------------
4607433075b6Spvalchev */
float128_to_floatx80(float128 a)4608433075b6Spvalchev floatx80 float128_to_floatx80( float128 a )
4609433075b6Spvalchev {
4610433075b6Spvalchev     flag aSign;
4611433075b6Spvalchev     int32 aExp;
4612433075b6Spvalchev     bits64 aSig0, aSig1;
4613433075b6Spvalchev 
4614433075b6Spvalchev     aSig1 = extractFloat128Frac1( a );
4615433075b6Spvalchev     aSig0 = extractFloat128Frac0( a );
4616433075b6Spvalchev     aExp = extractFloat128Exp( a );
4617433075b6Spvalchev     aSign = extractFloat128Sign( a );
4618433075b6Spvalchev     if ( aExp == 0x7FFF ) {
4619433075b6Spvalchev         if ( aSig0 | aSig1 ) {
4620433075b6Spvalchev             return commonNaNToFloatx80( float128ToCommonNaN( a ) );
4621433075b6Spvalchev         }
4622433075b6Spvalchev         return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4623433075b6Spvalchev     }
4624433075b6Spvalchev     if ( aExp == 0 ) {
4625433075b6Spvalchev         if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
4626433075b6Spvalchev         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
4627433075b6Spvalchev     }
4628433075b6Spvalchev     else {
4629433075b6Spvalchev         aSig0 |= LIT64( 0x0001000000000000 );
4630433075b6Spvalchev     }
4631433075b6Spvalchev     shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
4632433075b6Spvalchev     return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 );
4633433075b6Spvalchev 
4634433075b6Spvalchev }
4635433075b6Spvalchev 
4636433075b6Spvalchev #endif
4637433075b6Spvalchev 
4638433075b6Spvalchev /*
4639433075b6Spvalchev -------------------------------------------------------------------------------
4640433075b6Spvalchev Rounds the quadruple-precision floating-point value `a' to an integer, and
4641433075b6Spvalchev returns the result as a quadruple-precision floating-point value.  The
4642433075b6Spvalchev operation is performed according to the IEC/IEEE Standard for Binary
4643433075b6Spvalchev Floating-Point Arithmetic.
4644433075b6Spvalchev -------------------------------------------------------------------------------
4645433075b6Spvalchev */
float128_round_to_int(float128 a)4646433075b6Spvalchev float128 float128_round_to_int( float128 a )
4647433075b6Spvalchev {
4648433075b6Spvalchev     flag aSign;
4649433075b6Spvalchev     int32 aExp;
4650433075b6Spvalchev     bits64 lastBitMask, roundBitsMask;
4651433075b6Spvalchev     int8 roundingMode;
4652433075b6Spvalchev     float128 z;
4653433075b6Spvalchev 
4654433075b6Spvalchev     aExp = extractFloat128Exp( a );
4655433075b6Spvalchev     if ( 0x402F <= aExp ) {
4656433075b6Spvalchev         if ( 0x406F <= aExp ) {
4657433075b6Spvalchev             if (    ( aExp == 0x7FFF )
4658433075b6Spvalchev                  && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
4659433075b6Spvalchev                ) {
4660433075b6Spvalchev                 return propagateFloat128NaN( a, a );
4661433075b6Spvalchev             }
4662433075b6Spvalchev             return a;
4663433075b6Spvalchev         }
4664433075b6Spvalchev         lastBitMask = 1;
4665433075b6Spvalchev         lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
4666433075b6Spvalchev         roundBitsMask = lastBitMask - 1;
4667433075b6Spvalchev         z = a;
4668433075b6Spvalchev         roundingMode = float_rounding_mode();
4669433075b6Spvalchev         if ( roundingMode == float_round_nearest_even ) {
4670433075b6Spvalchev             if ( lastBitMask ) {
4671433075b6Spvalchev                 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
4672433075b6Spvalchev                 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
4673433075b6Spvalchev             }
4674433075b6Spvalchev             else {
4675433075b6Spvalchev                 if ( (sbits64) z.low < 0 ) {
4676433075b6Spvalchev                     ++z.high;
4677433075b6Spvalchev                     if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
4678433075b6Spvalchev                 }
4679433075b6Spvalchev             }
4680433075b6Spvalchev         }
4681433075b6Spvalchev         else if ( roundingMode != float_round_to_zero ) {
4682433075b6Spvalchev             if (   extractFloat128Sign( z )
4683433075b6Spvalchev                  ^ ( roundingMode == float_round_up ) ) {
4684433075b6Spvalchev                 add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
4685433075b6Spvalchev             }
4686433075b6Spvalchev         }
4687433075b6Spvalchev         z.low &= ~ roundBitsMask;
4688433075b6Spvalchev     }
4689433075b6Spvalchev     else {
4690433075b6Spvalchev         if ( aExp < 0x3FFF ) {
4691433075b6Spvalchev             if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
4692433075b6Spvalchev             float_set_inexact();
4693433075b6Spvalchev             aSign = extractFloat128Sign( a );
4694433075b6Spvalchev             switch ( float_rounding_mode() ) {
4695433075b6Spvalchev              case float_round_nearest_even:
4696433075b6Spvalchev                 if (    ( aExp == 0x3FFE )
4697433075b6Spvalchev                      && (   extractFloat128Frac0( a )
4698433075b6Spvalchev                           | extractFloat128Frac1( a ) )
4699433075b6Spvalchev                    ) {
4700433075b6Spvalchev                     return packFloat128( aSign, 0x3FFF, 0, 0 );
4701433075b6Spvalchev                 }
4702433075b6Spvalchev                 break;
4703433075b6Spvalchev              case float_round_down:
4704433075b6Spvalchev                 return
4705433075b6Spvalchev                       aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
4706433075b6Spvalchev                     : packFloat128( 0, 0, 0, 0 );
4707433075b6Spvalchev              case float_round_up:
4708433075b6Spvalchev                 return
4709433075b6Spvalchev                       aSign ? packFloat128( 1, 0, 0, 0 )
4710433075b6Spvalchev                     : packFloat128( 0, 0x3FFF, 0, 0 );
4711433075b6Spvalchev             }
4712433075b6Spvalchev             return packFloat128( aSign, 0, 0, 0 );
4713433075b6Spvalchev         }
4714433075b6Spvalchev         lastBitMask = 1;
4715433075b6Spvalchev         lastBitMask <<= 0x402F - aExp;
4716433075b6Spvalchev         roundBitsMask = lastBitMask - 1;
4717433075b6Spvalchev         z.low = 0;
4718433075b6Spvalchev         z.high = a.high;
4719433075b6Spvalchev         roundingMode = float_rounding_mode();
4720433075b6Spvalchev         if ( roundingMode == float_round_nearest_even ) {
4721433075b6Spvalchev             z.high += lastBitMask>>1;
4722433075b6Spvalchev             if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
4723433075b6Spvalchev                 z.high &= ~ lastBitMask;
4724433075b6Spvalchev             }
4725433075b6Spvalchev         }
4726433075b6Spvalchev         else if ( roundingMode != float_round_to_zero ) {
4727433075b6Spvalchev             if (   extractFloat128Sign( z )
4728433075b6Spvalchev                  ^ ( roundingMode == float_round_up ) ) {
4729433075b6Spvalchev                 z.high |= ( a.low != 0 );
4730433075b6Spvalchev                 z.high += roundBitsMask;
4731433075b6Spvalchev             }
4732433075b6Spvalchev         }
4733433075b6Spvalchev         z.high &= ~ roundBitsMask;
4734433075b6Spvalchev     }
4735433075b6Spvalchev     if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
4736433075b6Spvalchev         float_set_inexact();
4737433075b6Spvalchev     }
4738433075b6Spvalchev     return z;
4739433075b6Spvalchev 
4740433075b6Spvalchev }
4741433075b6Spvalchev 
4742433075b6Spvalchev /*
4743433075b6Spvalchev -------------------------------------------------------------------------------
4744433075b6Spvalchev Returns the result of adding the absolute values of the quadruple-precision
4745433075b6Spvalchev floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
4746433075b6Spvalchev before being returned.  `zSign' is ignored if the result is a NaN.
4747433075b6Spvalchev The addition is performed according to the IEC/IEEE Standard for Binary
4748433075b6Spvalchev Floating-Point Arithmetic.
4749433075b6Spvalchev -------------------------------------------------------------------------------
4750433075b6Spvalchev */
addFloat128Sigs(float128 a,float128 b,flag zSign)4751433075b6Spvalchev static float128 addFloat128Sigs( float128 a, float128 b, flag zSign )
4752433075b6Spvalchev {
4753433075b6Spvalchev     int32 aExp, bExp, zExp;
4754433075b6Spvalchev     bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
4755433075b6Spvalchev     int32 expDiff;
4756433075b6Spvalchev 
4757433075b6Spvalchev     aSig1 = extractFloat128Frac1( a );
4758433075b6Spvalchev     aSig0 = extractFloat128Frac0( a );
4759433075b6Spvalchev     aExp = extractFloat128Exp( a );
4760433075b6Spvalchev     bSig1 = extractFloat128Frac1( b );
4761433075b6Spvalchev     bSig0 = extractFloat128Frac0( b );
4762433075b6Spvalchev     bExp = extractFloat128Exp( b );
4763433075b6Spvalchev     expDiff = aExp - bExp;
4764433075b6Spvalchev     if ( 0 < expDiff ) {
4765433075b6Spvalchev         if ( aExp == 0x7FFF ) {
4766433075b6Spvalchev             if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
4767433075b6Spvalchev             return a;
4768433075b6Spvalchev         }
4769433075b6Spvalchev         if ( bExp == 0 ) {
4770433075b6Spvalchev             --expDiff;
4771433075b6Spvalchev         }
4772433075b6Spvalchev         else {
4773433075b6Spvalchev             bSig0 |= LIT64( 0x0001000000000000 );
4774433075b6Spvalchev         }
4775433075b6Spvalchev         shift128ExtraRightJamming(
4776433075b6Spvalchev             bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
4777433075b6Spvalchev         zExp = aExp;
4778433075b6Spvalchev     }
4779433075b6Spvalchev     else if ( expDiff < 0 ) {
4780433075b6Spvalchev         if ( bExp == 0x7FFF ) {
4781433075b6Spvalchev             if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
4782433075b6Spvalchev             return packFloat128( zSign, 0x7FFF, 0, 0 );
4783433075b6Spvalchev         }
4784433075b6Spvalchev         if ( aExp == 0 ) {
4785433075b6Spvalchev             ++expDiff;
4786433075b6Spvalchev         }
4787433075b6Spvalchev         else {
4788433075b6Spvalchev             aSig0 |= LIT64( 0x0001000000000000 );
4789433075b6Spvalchev         }
4790433075b6Spvalchev         shift128ExtraRightJamming(
4791433075b6Spvalchev             aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
4792433075b6Spvalchev         zExp = bExp;
4793433075b6Spvalchev     }
4794433075b6Spvalchev     else {
4795433075b6Spvalchev         if ( aExp == 0x7FFF ) {
4796433075b6Spvalchev             if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
4797433075b6Spvalchev                 return propagateFloat128NaN( a, b );
4798433075b6Spvalchev             }
4799433075b6Spvalchev             return a;
4800433075b6Spvalchev         }
4801433075b6Spvalchev         add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4802433075b6Spvalchev         if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
4803433075b6Spvalchev         zSig2 = 0;
4804433075b6Spvalchev         zSig0 |= LIT64( 0x0002000000000000 );
4805433075b6Spvalchev         zExp = aExp;
4806433075b6Spvalchev         goto shiftRight1;
4807433075b6Spvalchev     }
4808433075b6Spvalchev     aSig0 |= LIT64( 0x0001000000000000 );
4809433075b6Spvalchev     add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4810433075b6Spvalchev     --zExp;
4811433075b6Spvalchev     if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
4812433075b6Spvalchev     ++zExp;
4813433075b6Spvalchev  shiftRight1:
4814433075b6Spvalchev     shift128ExtraRightJamming(
4815433075b6Spvalchev         zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
4816433075b6Spvalchev  roundAndPack:
4817433075b6Spvalchev     return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
4818433075b6Spvalchev 
4819433075b6Spvalchev }
4820433075b6Spvalchev 
4821433075b6Spvalchev /*
4822433075b6Spvalchev -------------------------------------------------------------------------------
4823433075b6Spvalchev Returns the result of subtracting the absolute values of the quadruple-
4824433075b6Spvalchev precision floating-point values `a' and `b'.  If `zSign' is 1, the
4825433075b6Spvalchev difference is negated before being returned.  `zSign' is ignored if the
4826433075b6Spvalchev result is a NaN.  The subtraction is performed according to the IEC/IEEE
4827433075b6Spvalchev Standard for Binary Floating-Point Arithmetic.
4828433075b6Spvalchev -------------------------------------------------------------------------------
4829433075b6Spvalchev */
subFloat128Sigs(float128 a,float128 b,flag zSign)4830433075b6Spvalchev static float128 subFloat128Sigs( float128 a, float128 b, flag zSign )
4831433075b6Spvalchev {
4832433075b6Spvalchev     int32 aExp, bExp, zExp;
4833433075b6Spvalchev     bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
4834433075b6Spvalchev     int32 expDiff;
4835433075b6Spvalchev     float128 z;
4836433075b6Spvalchev 
4837433075b6Spvalchev     aSig1 = extractFloat128Frac1( a );
4838433075b6Spvalchev     aSig0 = extractFloat128Frac0( a );
4839433075b6Spvalchev     aExp = extractFloat128Exp( a );
4840433075b6Spvalchev     bSig1 = extractFloat128Frac1( b );
4841433075b6Spvalchev     bSig0 = extractFloat128Frac0( b );
4842433075b6Spvalchev     bExp = extractFloat128Exp( b );
4843433075b6Spvalchev     expDiff = aExp - bExp;
4844433075b6Spvalchev     shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
4845433075b6Spvalchev     shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
4846433075b6Spvalchev     if ( 0 < expDiff ) goto aExpBigger;
4847433075b6Spvalchev     if ( expDiff < 0 ) goto bExpBigger;
4848433075b6Spvalchev     if ( aExp == 0x7FFF ) {
4849433075b6Spvalchev         if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
4850433075b6Spvalchev             return propagateFloat128NaN( a, b );
4851433075b6Spvalchev         }
4852433075b6Spvalchev         float_raise( float_flag_invalid );
4853433075b6Spvalchev         z.low = float128_default_nan_low;
4854433075b6Spvalchev         z.high = float128_default_nan_high;
4855433075b6Spvalchev         return z;
4856433075b6Spvalchev     }
4857433075b6Spvalchev     if ( aExp == 0 ) {
4858433075b6Spvalchev         aExp = 1;
4859433075b6Spvalchev         bExp = 1;
4860433075b6Spvalchev     }
4861433075b6Spvalchev     if ( bSig0 < aSig0 ) goto aBigger;
4862433075b6Spvalchev     if ( aSig0 < bSig0 ) goto bBigger;
4863433075b6Spvalchev     if ( bSig1 < aSig1 ) goto aBigger;
4864433075b6Spvalchev     if ( aSig1 < bSig1 ) goto bBigger;
4865433075b6Spvalchev     return packFloat128( float_rounding_mode() == float_round_down, 0, 0, 0 );
4866433075b6Spvalchev  bExpBigger:
4867433075b6Spvalchev     if ( bExp == 0x7FFF ) {
4868433075b6Spvalchev         if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
4869433075b6Spvalchev         return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
4870433075b6Spvalchev     }
4871433075b6Spvalchev     if ( aExp == 0 ) {
4872433075b6Spvalchev         ++expDiff;
4873433075b6Spvalchev     }
4874433075b6Spvalchev     else {
4875433075b6Spvalchev         aSig0 |= LIT64( 0x4000000000000000 );
4876433075b6Spvalchev     }
4877433075b6Spvalchev     shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
4878433075b6Spvalchev     bSig0 |= LIT64( 0x4000000000000000 );
4879433075b6Spvalchev  bBigger:
4880433075b6Spvalchev     sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
4881433075b6Spvalchev     zExp = bExp;
4882433075b6Spvalchev     zSign ^= 1;
4883433075b6Spvalchev     goto normalizeRoundAndPack;
4884433075b6Spvalchev  aExpBigger:
4885433075b6Spvalchev     if ( aExp == 0x7FFF ) {
4886433075b6Spvalchev         if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
4887433075b6Spvalchev         return a;
4888433075b6Spvalchev     }
4889433075b6Spvalchev     if ( bExp == 0 ) {
4890433075b6Spvalchev         --expDiff;
4891433075b6Spvalchev     }
4892433075b6Spvalchev     else {
4893433075b6Spvalchev         bSig0 |= LIT64( 0x4000000000000000 );
4894433075b6Spvalchev     }
4895433075b6Spvalchev     shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
4896433075b6Spvalchev     aSig0 |= LIT64( 0x4000000000000000 );
4897433075b6Spvalchev  aBigger:
4898433075b6Spvalchev     sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4899433075b6Spvalchev     zExp = aExp;
4900433075b6Spvalchev  normalizeRoundAndPack:
4901433075b6Spvalchev     --zExp;
4902433075b6Spvalchev     return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 );
4903433075b6Spvalchev 
4904433075b6Spvalchev }
4905433075b6Spvalchev 
4906433075b6Spvalchev /*
4907433075b6Spvalchev -------------------------------------------------------------------------------
4908433075b6Spvalchev Returns the result of adding the quadruple-precision floating-point values
4909433075b6Spvalchev `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
4910433075b6Spvalchev for Binary Floating-Point Arithmetic.
4911433075b6Spvalchev -------------------------------------------------------------------------------
4912433075b6Spvalchev */
float128_add(float128 a,float128 b)4913433075b6Spvalchev float128 float128_add( float128 a, float128 b )
4914433075b6Spvalchev {
4915433075b6Spvalchev     flag aSign, bSign;
4916433075b6Spvalchev 
4917433075b6Spvalchev     aSign = extractFloat128Sign( a );
4918433075b6Spvalchev     bSign = extractFloat128Sign( b );
4919433075b6Spvalchev     if ( aSign == bSign ) {
4920433075b6Spvalchev         return addFloat128Sigs( a, b, aSign );
4921433075b6Spvalchev     }
4922433075b6Spvalchev     else {
4923433075b6Spvalchev         return subFloat128Sigs( a, b, aSign );
4924433075b6Spvalchev     }
4925433075b6Spvalchev 
4926433075b6Spvalchev }
4927433075b6Spvalchev 
4928433075b6Spvalchev /*
4929433075b6Spvalchev -------------------------------------------------------------------------------
4930433075b6Spvalchev Returns the result of subtracting the quadruple-precision floating-point
4931433075b6Spvalchev values `a' and `b'.  The operation is performed according to the IEC/IEEE
4932433075b6Spvalchev Standard for Binary Floating-Point Arithmetic.
4933433075b6Spvalchev -------------------------------------------------------------------------------
4934433075b6Spvalchev */
float128_sub(float128 a,float128 b)4935433075b6Spvalchev float128 float128_sub( float128 a, float128 b )
4936433075b6Spvalchev {
4937433075b6Spvalchev     flag aSign, bSign;
4938433075b6Spvalchev 
4939433075b6Spvalchev     aSign = extractFloat128Sign( a );
4940433075b6Spvalchev     bSign = extractFloat128Sign( b );
4941433075b6Spvalchev     if ( aSign == bSign ) {
4942433075b6Spvalchev         return subFloat128Sigs( a, b, aSign );
4943433075b6Spvalchev     }
4944433075b6Spvalchev     else {
4945433075b6Spvalchev         return addFloat128Sigs( a, b, aSign );
4946433075b6Spvalchev     }
4947433075b6Spvalchev 
4948433075b6Spvalchev }
4949433075b6Spvalchev 
4950433075b6Spvalchev /*
4951433075b6Spvalchev -------------------------------------------------------------------------------
4952433075b6Spvalchev Returns the result of multiplying the quadruple-precision floating-point
4953433075b6Spvalchev values `a' and `b'.  The operation is performed according to the IEC/IEEE
4954433075b6Spvalchev Standard for Binary Floating-Point Arithmetic.
4955433075b6Spvalchev -------------------------------------------------------------------------------
4956433075b6Spvalchev */
float128_mul(float128 a,float128 b)4957433075b6Spvalchev float128 float128_mul( float128 a, float128 b )
4958433075b6Spvalchev {
4959433075b6Spvalchev     flag aSign, bSign, zSign;
4960433075b6Spvalchev     int32 aExp, bExp, zExp;
4961433075b6Spvalchev     bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
4962433075b6Spvalchev     float128 z;
4963433075b6Spvalchev 
4964433075b6Spvalchev     aSig1 = extractFloat128Frac1( a );
4965433075b6Spvalchev     aSig0 = extractFloat128Frac0( a );
4966433075b6Spvalchev     aExp = extractFloat128Exp( a );
4967433075b6Spvalchev     aSign = extractFloat128Sign( a );
4968433075b6Spvalchev     bSig1 = extractFloat128Frac1( b );
4969433075b6Spvalchev     bSig0 = extractFloat128Frac0( b );
4970433075b6Spvalchev     bExp = extractFloat128Exp( b );
4971433075b6Spvalchev     bSign = extractFloat128Sign( b );
4972433075b6Spvalchev     zSign = aSign ^ bSign;
4973433075b6Spvalchev     if ( aExp == 0x7FFF ) {
4974433075b6Spvalchev         if (    ( aSig0 | aSig1 )
4975433075b6Spvalchev              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
4976433075b6Spvalchev             return propagateFloat128NaN( a, b );
4977433075b6Spvalchev         }
4978433075b6Spvalchev         if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
4979433075b6Spvalchev         return packFloat128( zSign, 0x7FFF, 0, 0 );
4980433075b6Spvalchev     }
4981433075b6Spvalchev     if ( bExp == 0x7FFF ) {
4982433075b6Spvalchev         if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
4983433075b6Spvalchev         if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
4984433075b6Spvalchev  invalid:
4985433075b6Spvalchev             float_raise( float_flag_invalid );
4986433075b6Spvalchev             z.low = float128_default_nan_low;
4987433075b6Spvalchev             z.high = float128_default_nan_high;
4988433075b6Spvalchev             return z;
4989433075b6Spvalchev         }
4990433075b6Spvalchev         return packFloat128( zSign, 0x7FFF, 0, 0 );
4991433075b6Spvalchev     }
4992433075b6Spvalchev     if ( aExp == 0 ) {
4993433075b6Spvalchev         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
4994433075b6Spvalchev         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
4995433075b6Spvalchev     }
4996433075b6Spvalchev     if ( bExp == 0 ) {
4997433075b6Spvalchev         if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
4998433075b6Spvalchev         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
4999433075b6Spvalchev     }
5000433075b6Spvalchev     zExp = aExp + bExp - 0x4000;
5001433075b6Spvalchev     aSig0 |= LIT64( 0x0001000000000000 );
5002433075b6Spvalchev     shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
5003433075b6Spvalchev     mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
5004433075b6Spvalchev     add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
5005433075b6Spvalchev     zSig2 |= ( zSig3 != 0 );
5006433075b6Spvalchev     if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
5007433075b6Spvalchev         shift128ExtraRightJamming(
5008433075b6Spvalchev             zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
5009433075b6Spvalchev         ++zExp;
5010433075b6Spvalchev     }
5011433075b6Spvalchev     return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
5012433075b6Spvalchev 
5013433075b6Spvalchev }
5014433075b6Spvalchev 
5015433075b6Spvalchev /*
5016433075b6Spvalchev -------------------------------------------------------------------------------
5017433075b6Spvalchev Returns the result of dividing the quadruple-precision floating-point value
5018433075b6Spvalchev `a' by the corresponding value `b'.  The operation is performed according to
5019433075b6Spvalchev the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5020433075b6Spvalchev -------------------------------------------------------------------------------
5021433075b6Spvalchev */
float128_div(float128 a,float128 b)5022433075b6Spvalchev float128 float128_div( float128 a, float128 b )
5023433075b6Spvalchev {
5024433075b6Spvalchev     flag aSign, bSign, zSign;
5025433075b6Spvalchev     int32 aExp, bExp, zExp;
5026433075b6Spvalchev     bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
5027433075b6Spvalchev     bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5028433075b6Spvalchev     float128 z;
5029433075b6Spvalchev 
5030433075b6Spvalchev     aSig1 = extractFloat128Frac1( a );
5031433075b6Spvalchev     aSig0 = extractFloat128Frac0( a );
5032433075b6Spvalchev     aExp = extractFloat128Exp( a );
5033433075b6Spvalchev     aSign = extractFloat128Sign( a );
5034433075b6Spvalchev     bSig1 = extractFloat128Frac1( b );
5035433075b6Spvalchev     bSig0 = extractFloat128Frac0( b );
5036433075b6Spvalchev     bExp = extractFloat128Exp( b );
5037433075b6Spvalchev     bSign = extractFloat128Sign( b );
5038433075b6Spvalchev     zSign = aSign ^ bSign;
5039433075b6Spvalchev     if ( aExp == 0x7FFF ) {
5040433075b6Spvalchev         if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
5041433075b6Spvalchev         if ( bExp == 0x7FFF ) {
5042433075b6Spvalchev             if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
5043433075b6Spvalchev             goto invalid;
5044433075b6Spvalchev         }
5045433075b6Spvalchev         return packFloat128( zSign, 0x7FFF, 0, 0 );
5046433075b6Spvalchev     }
5047433075b6Spvalchev     if ( bExp == 0x7FFF ) {
5048433075b6Spvalchev         if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
5049433075b6Spvalchev         return packFloat128( zSign, 0, 0, 0 );
5050433075b6Spvalchev     }
5051433075b6Spvalchev     if ( bExp == 0 ) {
5052433075b6Spvalchev         if ( ( bSig0 | bSig1 ) == 0 ) {
5053433075b6Spvalchev             if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
5054433075b6Spvalchev  invalid:
5055433075b6Spvalchev                 float_raise( float_flag_invalid );
5056433075b6Spvalchev                 z.low = float128_default_nan_low;
5057433075b6Spvalchev                 z.high = float128_default_nan_high;
5058433075b6Spvalchev                 return z;
5059433075b6Spvalchev             }
5060433075b6Spvalchev             float_raise( float_flag_divbyzero );
5061433075b6Spvalchev             return packFloat128( zSign, 0x7FFF, 0, 0 );
5062433075b6Spvalchev         }
5063433075b6Spvalchev         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5064433075b6Spvalchev     }
5065433075b6Spvalchev     if ( aExp == 0 ) {
5066433075b6Spvalchev         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5067433075b6Spvalchev         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5068433075b6Spvalchev     }
5069433075b6Spvalchev     zExp = aExp - bExp + 0x3FFD;
5070433075b6Spvalchev     shortShift128Left(
5071433075b6Spvalchev         aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
5072433075b6Spvalchev     shortShift128Left(
5073433075b6Spvalchev         bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
5074433075b6Spvalchev     if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
5075433075b6Spvalchev         shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
5076433075b6Spvalchev         ++zExp;
5077433075b6Spvalchev     }
5078433075b6Spvalchev     zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
5079433075b6Spvalchev     mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
5080433075b6Spvalchev     sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
5081433075b6Spvalchev     while ( (sbits64) rem0 < 0 ) {
5082433075b6Spvalchev         --zSig0;
5083433075b6Spvalchev         add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
5084433075b6Spvalchev     }
5085433075b6Spvalchev     zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
5086433075b6Spvalchev     if ( ( zSig1 & 0x3FFF ) <= 4 ) {
5087433075b6Spvalchev         mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
5088433075b6Spvalchev         sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
5089433075b6Spvalchev         while ( (sbits64) rem1 < 0 ) {
5090433075b6Spvalchev             --zSig1;
5091433075b6Spvalchev             add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
5092433075b6Spvalchev         }
5093433075b6Spvalchev         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5094433075b6Spvalchev     }
5095433075b6Spvalchev     shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
5096433075b6Spvalchev     return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
5097433075b6Spvalchev 
5098433075b6Spvalchev }
5099433075b6Spvalchev 
5100433075b6Spvalchev /*
5101433075b6Spvalchev -------------------------------------------------------------------------------
5102433075b6Spvalchev Returns the remainder of the quadruple-precision floating-point value `a'
5103433075b6Spvalchev with respect to the corresponding value `b'.  The operation is performed
5104433075b6Spvalchev according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5105433075b6Spvalchev -------------------------------------------------------------------------------
5106433075b6Spvalchev */
float128_rem(float128 a,float128 b)5107433075b6Spvalchev float128 float128_rem( float128 a, float128 b )
5108433075b6Spvalchev {
5109433075b6Spvalchev     flag aSign, bSign, zSign;
5110433075b6Spvalchev     int32 aExp, bExp, expDiff;
5111433075b6Spvalchev     bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
5112433075b6Spvalchev     bits64 allZero, alternateASig0, alternateASig1, sigMean1;
5113433075b6Spvalchev     sbits64 sigMean0;
5114433075b6Spvalchev     float128 z;
5115433075b6Spvalchev 
5116433075b6Spvalchev     aSig1 = extractFloat128Frac1( a );
5117433075b6Spvalchev     aSig0 = extractFloat128Frac0( a );
5118433075b6Spvalchev     aExp = extractFloat128Exp( a );
5119433075b6Spvalchev     aSign = extractFloat128Sign( a );
5120433075b6Spvalchev     bSig1 = extractFloat128Frac1( b );
5121433075b6Spvalchev     bSig0 = extractFloat128Frac0( b );
5122433075b6Spvalchev     bExp = extractFloat128Exp( b );
5123433075b6Spvalchev     bSign = extractFloat128Sign( b );
5124433075b6Spvalchev     if ( aExp == 0x7FFF ) {
5125433075b6Spvalchev         if (    ( aSig0 | aSig1 )
5126433075b6Spvalchev              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
5127433075b6Spvalchev             return propagateFloat128NaN( a, b );
5128433075b6Spvalchev         }
5129433075b6Spvalchev         goto invalid;
5130433075b6Spvalchev     }
5131433075b6Spvalchev     if ( bExp == 0x7FFF ) {
5132433075b6Spvalchev         if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
5133433075b6Spvalchev         return a;
5134433075b6Spvalchev     }
5135433075b6Spvalchev     if ( bExp == 0 ) {
5136433075b6Spvalchev         if ( ( bSig0 | bSig1 ) == 0 ) {
5137433075b6Spvalchev  invalid:
5138433075b6Spvalchev             float_raise( float_flag_invalid );
5139433075b6Spvalchev             z.low = float128_default_nan_low;
5140433075b6Spvalchev             z.high = float128_default_nan_high;
5141433075b6Spvalchev             return z;
5142433075b6Spvalchev         }
5143433075b6Spvalchev         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5144433075b6Spvalchev     }
5145433075b6Spvalchev     if ( aExp == 0 ) {
5146433075b6Spvalchev         if ( ( aSig0 | aSig1 ) == 0 ) return a;
5147433075b6Spvalchev         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5148433075b6Spvalchev     }
5149433075b6Spvalchev     expDiff = aExp - bExp;
5150433075b6Spvalchev     if ( expDiff < -1 ) return a;
5151433075b6Spvalchev     shortShift128Left(
5152433075b6Spvalchev         aSig0 | LIT64( 0x0001000000000000 ),
5153433075b6Spvalchev         aSig1,
5154433075b6Spvalchev         15 - ( expDiff < 0 ),
5155433075b6Spvalchev         &aSig0,
5156433075b6Spvalchev         &aSig1
5157433075b6Spvalchev     );
5158433075b6Spvalchev     shortShift128Left(
5159433075b6Spvalchev         bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
5160433075b6Spvalchev     q = le128( bSig0, bSig1, aSig0, aSig1 );
5161433075b6Spvalchev     if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5162433075b6Spvalchev     expDiff -= 64;
5163433075b6Spvalchev     while ( 0 < expDiff ) {
5164433075b6Spvalchev         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5165433075b6Spvalchev         q = ( 4 < q ) ? q - 4 : 0;
5166433075b6Spvalchev         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5167433075b6Spvalchev         shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
5168433075b6Spvalchev         shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
5169433075b6Spvalchev         sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
5170433075b6Spvalchev         expDiff -= 61;
5171433075b6Spvalchev     }
5172433075b6Spvalchev     if ( -64 < expDiff ) {
5173433075b6Spvalchev         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5174433075b6Spvalchev         q = ( 4 < q ) ? q - 4 : 0;
5175433075b6Spvalchev         q >>= - expDiff;
5176433075b6Spvalchev         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5177433075b6Spvalchev         expDiff += 52;
5178433075b6Spvalchev         if ( expDiff < 0 ) {
5179433075b6Spvalchev             shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
5180433075b6Spvalchev         }
5181433075b6Spvalchev         else {
5182433075b6Spvalchev             shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
5183433075b6Spvalchev         }
5184433075b6Spvalchev         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5185433075b6Spvalchev         sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
5186433075b6Spvalchev     }
5187433075b6Spvalchev     else {
5188433075b6Spvalchev         shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
5189433075b6Spvalchev         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5190433075b6Spvalchev     }
5191433075b6Spvalchev     do {
5192433075b6Spvalchev         alternateASig0 = aSig0;
5193433075b6Spvalchev         alternateASig1 = aSig1;
5194433075b6Spvalchev         ++q;
5195433075b6Spvalchev         sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5196433075b6Spvalchev     } while ( 0 <= (sbits64) aSig0 );
5197433075b6Spvalchev     add128(
5198433075b6Spvalchev         aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
5199433075b6Spvalchev     if (    ( sigMean0 < 0 )
5200433075b6Spvalchev          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
5201433075b6Spvalchev         aSig0 = alternateASig0;
5202433075b6Spvalchev         aSig1 = alternateASig1;
5203433075b6Spvalchev     }
5204433075b6Spvalchev     zSign = ( (sbits64) aSig0 < 0 );
5205433075b6Spvalchev     if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
5206433075b6Spvalchev     return
5207433075b6Spvalchev         normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
5208433075b6Spvalchev 
5209433075b6Spvalchev }
5210433075b6Spvalchev 
5211433075b6Spvalchev /*
5212433075b6Spvalchev -------------------------------------------------------------------------------
5213433075b6Spvalchev Returns the square root of the quadruple-precision floating-point value `a'.
5214433075b6Spvalchev The operation is performed according to the IEC/IEEE Standard for Binary
5215433075b6Spvalchev Floating-Point Arithmetic.
5216433075b6Spvalchev -------------------------------------------------------------------------------
5217433075b6Spvalchev */
float128_sqrt(float128 a)5218433075b6Spvalchev float128 float128_sqrt( float128 a )
5219433075b6Spvalchev {
5220433075b6Spvalchev     flag aSign;
5221433075b6Spvalchev     int32 aExp, zExp;
5222433075b6Spvalchev     bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
5223433075b6Spvalchev     bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5224433075b6Spvalchev     float128 z;
5225433075b6Spvalchev 
5226433075b6Spvalchev     aSig1 = extractFloat128Frac1( a );
5227433075b6Spvalchev     aSig0 = extractFloat128Frac0( a );
5228433075b6Spvalchev     aExp = extractFloat128Exp( a );
5229433075b6Spvalchev     aSign = extractFloat128Sign( a );
5230433075b6Spvalchev     if ( aExp == 0x7FFF ) {
5231433075b6Spvalchev         if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a );
5232433075b6Spvalchev         if ( ! aSign ) return a;
5233433075b6Spvalchev         goto invalid;
5234433075b6Spvalchev     }
5235433075b6Spvalchev     if ( aSign ) {
5236433075b6Spvalchev         if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
5237433075b6Spvalchev  invalid:
5238433075b6Spvalchev         float_raise( float_flag_invalid );
5239433075b6Spvalchev         z.low = float128_default_nan_low;
5240433075b6Spvalchev         z.high = float128_default_nan_high;
5241433075b6Spvalchev         return z;
5242433075b6Spvalchev     }
5243433075b6Spvalchev     if ( aExp == 0 ) {
5244433075b6Spvalchev         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
5245433075b6Spvalchev         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5246433075b6Spvalchev     }
5247433075b6Spvalchev     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
5248433075b6Spvalchev     aSig0 |= LIT64( 0x0001000000000000 );
5249433075b6Spvalchev     zSig0 = estimateSqrt32( aExp, aSig0>>17 );
5250433075b6Spvalchev     shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
5251433075b6Spvalchev     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5252433075b6Spvalchev     doubleZSig0 = zSig0<<1;
5253433075b6Spvalchev     mul64To128( zSig0, zSig0, &term0, &term1 );
5254433075b6Spvalchev     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
5255433075b6Spvalchev     while ( (sbits64) rem0 < 0 ) {
5256433075b6Spvalchev         --zSig0;
5257433075b6Spvalchev         doubleZSig0 -= 2;
5258433075b6Spvalchev         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
5259433075b6Spvalchev     }
5260433075b6Spvalchev     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5261433075b6Spvalchev     if ( ( zSig1 & 0x1FFF ) <= 5 ) {
5262433075b6Spvalchev         if ( zSig1 == 0 ) zSig1 = 1;
5263433075b6Spvalchev         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5264433075b6Spvalchev         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5265433075b6Spvalchev         mul64To128( zSig1, zSig1, &term2, &term3 );
5266433075b6Spvalchev         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
5267433075b6Spvalchev         while ( (sbits64) rem1 < 0 ) {
5268433075b6Spvalchev             --zSig1;
5269433075b6Spvalchev             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5270433075b6Spvalchev             term3 |= 1;
5271433075b6Spvalchev             term2 |= doubleZSig0;
5272433075b6Spvalchev             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5273433075b6Spvalchev         }
5274433075b6Spvalchev         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5275433075b6Spvalchev     }
5276433075b6Spvalchev     shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
5277433075b6Spvalchev     return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 );
5278433075b6Spvalchev 
5279433075b6Spvalchev }
5280433075b6Spvalchev 
5281433075b6Spvalchev /*
5282433075b6Spvalchev -------------------------------------------------------------------------------
5283433075b6Spvalchev Returns 1 if the quadruple-precision floating-point value `a' is equal to
5284433075b6Spvalchev the corresponding value `b', and 0 otherwise.  The comparison is performed
5285433075b6Spvalchev according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5286433075b6Spvalchev -------------------------------------------------------------------------------
5287433075b6Spvalchev */
float128_eq(float128 a,float128 b)5288433075b6Spvalchev flag float128_eq( float128 a, float128 b )
5289433075b6Spvalchev {
5290433075b6Spvalchev 
5291433075b6Spvalchev     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5292433075b6Spvalchev               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5293433075b6Spvalchev          || (    ( extractFloat128Exp( b ) == 0x7FFF )
5294433075b6Spvalchev               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5295433075b6Spvalchev        ) {
5296433075b6Spvalchev         if (    float128_is_signaling_nan( a )
5297433075b6Spvalchev              || float128_is_signaling_nan( b ) ) {
5298433075b6Spvalchev             float_raise( float_flag_invalid );
5299433075b6Spvalchev         }
5300433075b6Spvalchev         return 0;
5301433075b6Spvalchev     }
5302433075b6Spvalchev     return
5303433075b6Spvalchev            ( a.low == b.low )
5304433075b6Spvalchev         && (    ( a.high == b.high )
5305433075b6Spvalchev              || (    ( a.low == 0 )
5306433075b6Spvalchev                   && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
5307433075b6Spvalchev            );
5308433075b6Spvalchev 
5309433075b6Spvalchev }
5310433075b6Spvalchev 
5311433075b6Spvalchev /*
5312433075b6Spvalchev -------------------------------------------------------------------------------
5313433075b6Spvalchev Returns 1 if the quadruple-precision floating-point value `a' is less than
5314433075b6Spvalchev or equal to the corresponding value `b', and 0 otherwise.  The comparison
5315433075b6Spvalchev is performed according to the IEC/IEEE Standard for Binary Floating-Point
5316433075b6Spvalchev Arithmetic.
5317433075b6Spvalchev -------------------------------------------------------------------------------
5318433075b6Spvalchev */
float128_le(float128 a,float128 b)5319433075b6Spvalchev flag float128_le( float128 a, float128 b )
5320433075b6Spvalchev {
5321433075b6Spvalchev     flag aSign, bSign;
5322433075b6Spvalchev 
5323433075b6Spvalchev     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5324433075b6Spvalchev               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5325433075b6Spvalchev          || (    ( extractFloat128Exp( b ) == 0x7FFF )
5326433075b6Spvalchev               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5327433075b6Spvalchev        ) {
5328433075b6Spvalchev         float_raise( float_flag_invalid );
5329433075b6Spvalchev         return 0;
5330433075b6Spvalchev     }
5331433075b6Spvalchev     aSign = extractFloat128Sign( a );
5332433075b6Spvalchev     bSign = extractFloat128Sign( b );
5333433075b6Spvalchev     if ( aSign != bSign ) {
5334433075b6Spvalchev         return
5335433075b6Spvalchev                aSign
5336433075b6Spvalchev             || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5337433075b6Spvalchev                  == 0 );
5338433075b6Spvalchev     }
5339433075b6Spvalchev     return
5340433075b6Spvalchev           aSign ? le128( b.high, b.low, a.high, a.low )
5341433075b6Spvalchev         : le128( a.high, a.low, b.high, b.low );
5342433075b6Spvalchev 
5343433075b6Spvalchev }
5344433075b6Spvalchev 
5345433075b6Spvalchev /*
5346433075b6Spvalchev -------------------------------------------------------------------------------
5347433075b6Spvalchev Returns 1 if the quadruple-precision floating-point value `a' is less than
5348433075b6Spvalchev the corresponding value `b', and 0 otherwise.  The comparison is performed
5349433075b6Spvalchev according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5350433075b6Spvalchev -------------------------------------------------------------------------------
5351433075b6Spvalchev */
float128_lt(float128 a,float128 b)5352433075b6Spvalchev flag float128_lt( float128 a, float128 b )
5353433075b6Spvalchev {
5354433075b6Spvalchev     flag aSign, bSign;
5355433075b6Spvalchev 
5356433075b6Spvalchev     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5357433075b6Spvalchev               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5358433075b6Spvalchev          || (    ( extractFloat128Exp( b ) == 0x7FFF )
5359433075b6Spvalchev               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5360433075b6Spvalchev        ) {
5361433075b6Spvalchev         float_raise( float_flag_invalid );
5362433075b6Spvalchev         return 0;
5363433075b6Spvalchev     }
5364433075b6Spvalchev     aSign = extractFloat128Sign( a );
5365433075b6Spvalchev     bSign = extractFloat128Sign( b );
5366433075b6Spvalchev     if ( aSign != bSign ) {
5367433075b6Spvalchev         return
5368433075b6Spvalchev                aSign
5369433075b6Spvalchev             && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5370433075b6Spvalchev                  != 0 );
5371433075b6Spvalchev     }
5372433075b6Spvalchev     return
5373433075b6Spvalchev           aSign ? lt128( b.high, b.low, a.high, a.low )
5374433075b6Spvalchev         : lt128( a.high, a.low, b.high, b.low );
5375433075b6Spvalchev 
5376433075b6Spvalchev }
5377433075b6Spvalchev 
5378433075b6Spvalchev /*
5379433075b6Spvalchev -------------------------------------------------------------------------------
5380433075b6Spvalchev Returns 1 if the quadruple-precision floating-point value `a' is equal to
5381433075b6Spvalchev the corresponding value `b', and 0 otherwise.  The invalid exception is
5382433075b6Spvalchev raised if either operand is a NaN.  Otherwise, the comparison is performed
5383433075b6Spvalchev according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5384433075b6Spvalchev -------------------------------------------------------------------------------
5385433075b6Spvalchev */
float128_eq_signaling(float128 a,float128 b)5386433075b6Spvalchev flag float128_eq_signaling( float128 a, float128 b )
5387433075b6Spvalchev {
5388433075b6Spvalchev 
5389433075b6Spvalchev     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5390433075b6Spvalchev               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5391433075b6Spvalchev          || (    ( extractFloat128Exp( b ) == 0x7FFF )
5392433075b6Spvalchev               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5393433075b6Spvalchev        ) {
5394433075b6Spvalchev         float_raise( float_flag_invalid );
5395433075b6Spvalchev         return 0;
5396433075b6Spvalchev     }
5397433075b6Spvalchev     return
5398433075b6Spvalchev            ( a.low == b.low )
5399433075b6Spvalchev         && (    ( a.high == b.high )
5400433075b6Spvalchev              || (    ( a.low == 0 )
5401433075b6Spvalchev                   && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
5402433075b6Spvalchev            );
5403433075b6Spvalchev 
5404433075b6Spvalchev }
5405433075b6Spvalchev 
5406433075b6Spvalchev /*
5407433075b6Spvalchev -------------------------------------------------------------------------------
5408433075b6Spvalchev Returns 1 if the quadruple-precision floating-point value `a' is less than
5409433075b6Spvalchev or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
5410433075b6Spvalchev cause an exception.  Otherwise, the comparison is performed according to the
5411433075b6Spvalchev IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5412433075b6Spvalchev -------------------------------------------------------------------------------
5413433075b6Spvalchev */
float128_le_quiet(float128 a,float128 b)5414433075b6Spvalchev flag float128_le_quiet( float128 a, float128 b )
5415433075b6Spvalchev {
5416433075b6Spvalchev     flag aSign, bSign;
5417433075b6Spvalchev 
5418433075b6Spvalchev     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5419433075b6Spvalchev               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5420433075b6Spvalchev          || (    ( extractFloat128Exp( b ) == 0x7FFF )
5421433075b6Spvalchev               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5422433075b6Spvalchev        ) {
5423433075b6Spvalchev         if (    float128_is_signaling_nan( a )
5424433075b6Spvalchev              || float128_is_signaling_nan( b ) ) {
5425433075b6Spvalchev             float_raise( float_flag_invalid );
5426433075b6Spvalchev         }
5427433075b6Spvalchev         return 0;
5428433075b6Spvalchev     }
5429433075b6Spvalchev     aSign = extractFloat128Sign( a );
5430433075b6Spvalchev     bSign = extractFloat128Sign( b );
5431433075b6Spvalchev     if ( aSign != bSign ) {
5432433075b6Spvalchev         return
5433433075b6Spvalchev                aSign
5434433075b6Spvalchev             || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5435433075b6Spvalchev                  == 0 );
5436433075b6Spvalchev     }
5437433075b6Spvalchev     return
5438433075b6Spvalchev           aSign ? le128( b.high, b.low, a.high, a.low )
5439433075b6Spvalchev         : le128( a.high, a.low, b.high, b.low );
5440433075b6Spvalchev 
5441433075b6Spvalchev }
5442433075b6Spvalchev 
5443433075b6Spvalchev /*
5444433075b6Spvalchev -------------------------------------------------------------------------------
5445433075b6Spvalchev Returns 1 if the quadruple-precision floating-point value `a' is less than
5446433075b6Spvalchev the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
5447433075b6Spvalchev exception.  Otherwise, the comparison is performed according to the IEC/IEEE
5448433075b6Spvalchev Standard for Binary Floating-Point Arithmetic.
5449433075b6Spvalchev -------------------------------------------------------------------------------
5450433075b6Spvalchev */
float128_lt_quiet(float128 a,float128 b)5451433075b6Spvalchev flag float128_lt_quiet( float128 a, float128 b )
5452433075b6Spvalchev {
5453433075b6Spvalchev     flag aSign, bSign;
5454433075b6Spvalchev 
5455433075b6Spvalchev     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5456433075b6Spvalchev               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5457433075b6Spvalchev          || (    ( extractFloat128Exp( b ) == 0x7FFF )
5458433075b6Spvalchev               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5459433075b6Spvalchev        ) {
5460433075b6Spvalchev         if (    float128_is_signaling_nan( a )
5461433075b6Spvalchev              || float128_is_signaling_nan( b ) ) {
5462433075b6Spvalchev             float_raise( float_flag_invalid );
5463433075b6Spvalchev         }
5464433075b6Spvalchev         return 0;
5465433075b6Spvalchev     }
5466433075b6Spvalchev     aSign = extractFloat128Sign( a );
5467433075b6Spvalchev     bSign = extractFloat128Sign( b );
5468433075b6Spvalchev     if ( aSign != bSign ) {
5469433075b6Spvalchev         return
5470433075b6Spvalchev                aSign
5471433075b6Spvalchev             && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5472433075b6Spvalchev                  != 0 );
5473433075b6Spvalchev     }
5474433075b6Spvalchev     return
5475433075b6Spvalchev           aSign ? lt128( b.high, b.low, a.high, a.low )
5476433075b6Spvalchev         : lt128( a.high, a.low, b.high, b.low );
5477433075b6Spvalchev 
5478433075b6Spvalchev }
5479433075b6Spvalchev 
5480433075b6Spvalchev #endif
5481433075b6Spvalchev 
5482433075b6Spvalchev 
5483433075b6Spvalchev #if defined(SOFTFLOAT_FOR_GCC) && defined(SOFTFLOAT_NEED_FIXUNS)
5484433075b6Spvalchev 
5485433075b6Spvalchev /*
5486433075b6Spvalchev  * These two routines are not part of the original softfloat distribution.
5487433075b6Spvalchev  *
5488433075b6Spvalchev  * They are based on the corresponding conversions to integer but return
5489433075b6Spvalchev  * unsigned numbers instead since these functions are required by GCC.
5490433075b6Spvalchev  *
5491433075b6Spvalchev  * Added by Mark Brinicombe <mark@netbsd.org>	27/09/97
5492433075b6Spvalchev  *
5493433075b6Spvalchev  * float64 version overhauled for SoftFloat 2a [bjh21 2000-07-15]
5494433075b6Spvalchev  */
5495433075b6Spvalchev 
5496433075b6Spvalchev /*
5497433075b6Spvalchev -------------------------------------------------------------------------------
5498433075b6Spvalchev Returns the result of converting the double-precision floating-point value
5499433075b6Spvalchev `a' to the 32-bit unsigned integer format.  The conversion is
5500433075b6Spvalchev performed according to the IEC/IEEE Standard for Binary Floating-point
5501433075b6Spvalchev Arithmetic, except that the conversion is always rounded toward zero.  If
5502433075b6Spvalchev `a' is a NaN, the largest positive integer is returned.  If the conversion
5503433075b6Spvalchev overflows, the largest integer positive is returned.
5504433075b6Spvalchev -------------------------------------------------------------------------------
5505433075b6Spvalchev */
float64_to_uint32_round_to_zero(float64 a)5506433075b6Spvalchev uint32 float64_to_uint32_round_to_zero( float64 a )
5507433075b6Spvalchev {
5508433075b6Spvalchev     flag aSign;
5509433075b6Spvalchev     int16 aExp, shiftCount;
5510433075b6Spvalchev     bits64 aSig, savedASig;
5511433075b6Spvalchev     uint32 z;
5512433075b6Spvalchev 
5513433075b6Spvalchev     aSig = extractFloat64Frac( a );
5514433075b6Spvalchev     aExp = extractFloat64Exp( a );
5515433075b6Spvalchev     aSign = extractFloat64Sign( a );
5516433075b6Spvalchev 
5517433075b6Spvalchev     if (aSign) {
5518433075b6Spvalchev         float_raise( float_flag_invalid );
5519433075b6Spvalchev     	return(0);
5520433075b6Spvalchev     }
5521433075b6Spvalchev 
5522433075b6Spvalchev     if ( 0x41E < aExp ) {
5523433075b6Spvalchev         float_raise( float_flag_invalid );
5524433075b6Spvalchev         return 0xffffffff;
5525433075b6Spvalchev     }
5526433075b6Spvalchev     else if ( aExp < 0x3FF ) {
5527433075b6Spvalchev         if ( aExp || aSig ) float_set_inexact();
5528433075b6Spvalchev         return 0;
5529433075b6Spvalchev     }
5530433075b6Spvalchev     aSig |= LIT64( 0x0010000000000000 );
5531433075b6Spvalchev     shiftCount = 0x433 - aExp;
5532433075b6Spvalchev     savedASig = aSig;
5533433075b6Spvalchev     aSig >>= shiftCount;
5534433075b6Spvalchev     z = aSig;
5535433075b6Spvalchev     if ( ( aSig<<shiftCount ) != savedASig ) {
5536433075b6Spvalchev         float_set_inexact();
5537433075b6Spvalchev     }
5538433075b6Spvalchev     return z;
5539433075b6Spvalchev 
5540433075b6Spvalchev }
5541433075b6Spvalchev 
5542433075b6Spvalchev /*
5543433075b6Spvalchev -------------------------------------------------------------------------------
5544433075b6Spvalchev Returns the result of converting the single-precision floating-point value
5545433075b6Spvalchev `a' to the 32-bit unsigned integer format.  The conversion is
5546433075b6Spvalchev performed according to the IEC/IEEE Standard for Binary Floating-point
5547433075b6Spvalchev Arithmetic, except that the conversion is always rounded toward zero.  If
5548433075b6Spvalchev `a' is a NaN, the largest positive integer is returned.  If the conversion
5549433075b6Spvalchev overflows, the largest positive integer is returned.
5550433075b6Spvalchev -------------------------------------------------------------------------------
5551433075b6Spvalchev */
float32_to_uint32_round_to_zero(float32 a)5552433075b6Spvalchev uint32 float32_to_uint32_round_to_zero( float32 a )
5553433075b6Spvalchev {
5554433075b6Spvalchev     flag aSign;
5555433075b6Spvalchev     int16 aExp, shiftCount;
5556433075b6Spvalchev     bits32 aSig;
5557433075b6Spvalchev     uint32 z;
5558433075b6Spvalchev 
5559433075b6Spvalchev     aSig = extractFloat32Frac( a );
5560433075b6Spvalchev     aExp = extractFloat32Exp( a );
5561433075b6Spvalchev     aSign = extractFloat32Sign( a );
5562433075b6Spvalchev     shiftCount = aExp - 0x9E;
5563433075b6Spvalchev 
5564433075b6Spvalchev     if (aSign) {
5565433075b6Spvalchev         float_raise( float_flag_invalid );
5566433075b6Spvalchev     	return(0);
5567433075b6Spvalchev     }
5568433075b6Spvalchev     if ( 0 < shiftCount ) {
5569433075b6Spvalchev         float_raise( float_flag_invalid );
5570433075b6Spvalchev         return 0xFFFFFFFF;
5571433075b6Spvalchev     }
5572433075b6Spvalchev     else if ( aExp <= 0x7E ) {
5573433075b6Spvalchev         if ( aExp | aSig ) float_set_inexact();
5574433075b6Spvalchev         return 0;
5575433075b6Spvalchev     }
5576433075b6Spvalchev     aSig = ( aSig | 0x800000 )<<8;
5577433075b6Spvalchev     z = aSig>>( - shiftCount );
5578433075b6Spvalchev     if ( aSig<<( shiftCount & 31 ) ) {
5579433075b6Spvalchev         float_set_inexact();
5580433075b6Spvalchev     }
5581433075b6Spvalchev     return z;
5582433075b6Spvalchev 
5583433075b6Spvalchev }
5584433075b6Spvalchev 
5585433075b6Spvalchev #endif
5586