xref: /netbsd-src/lib/libc/softfloat/bits64/softfloat-macros (revision 39052f3b3dc965dfbf70c6299a344b01fd23804d)
1*39052f3bSchristos/* $NetBSD: softfloat-macros,v 1.3 2012/03/21 02:32:26 christos Exp $ */
2936b7f4cSbjh21
3936b7f4cSbjh21/*
4936b7f4cSbjh21===============================================================================
5936b7f4cSbjh21
6936b7f4cSbjh21This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
7936b7f4cSbjh21Arithmetic Package, Release 2a.
8936b7f4cSbjh21
9936b7f4cSbjh21Written by John R. Hauser.  This work was made possible in part by the
10936b7f4cSbjh21International Computer Science Institute, located at Suite 600, 1947 Center
11936b7f4cSbjh21Street, Berkeley, California 94704.  Funding was partially provided by the
12936b7f4cSbjh21National Science Foundation under grant MIP-9311980.  The original version
13936b7f4cSbjh21of this code was written as part of a project to build a fixed-point vector
14936b7f4cSbjh21processor in collaboration with the University of California at Berkeley,
15936b7f4cSbjh21overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
16936b7f4cSbjh21is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
17936b7f4cSbjh21arithmetic/SoftFloat.html'.
18936b7f4cSbjh21
19936b7f4cSbjh21THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
20936b7f4cSbjh21has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
21936b7f4cSbjh21TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
22936b7f4cSbjh21PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
23936b7f4cSbjh21AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
24936b7f4cSbjh21
25936b7f4cSbjh21Derivative works are acceptable, even for commercial purposes, so long as
26936b7f4cSbjh21(1) they include prominent notice that the work is derivative, and (2) they
27936b7f4cSbjh21include prominent notice akin to these four paragraphs for those parts of
28936b7f4cSbjh21this code that are retained.
29936b7f4cSbjh21
30936b7f4cSbjh21===============================================================================
31936b7f4cSbjh21*/
32936b7f4cSbjh21
33936b7f4cSbjh21/*
34936b7f4cSbjh21-------------------------------------------------------------------------------
35936b7f4cSbjh21Shifts `a' right by the number of bits given in `count'.  If any nonzero
36936b7f4cSbjh21bits are shifted off, they are ``jammed'' into the least significant bit of
37936b7f4cSbjh21the result by setting the least significant bit to 1.  The value of `count'
38936b7f4cSbjh21can be arbitrarily large; in particular, if `count' is greater than 32, the
39936b7f4cSbjh21result will be either 0 or 1, depending on whether `a' is zero or nonzero.
40936b7f4cSbjh21The result is stored in the location pointed to by `zPtr'.
41936b7f4cSbjh21-------------------------------------------------------------------------------
42936b7f4cSbjh21*/
43936b7f4cSbjh21INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
44936b7f4cSbjh21{
45936b7f4cSbjh21    bits32 z;
46936b7f4cSbjh21
47936b7f4cSbjh21    if ( count == 0 ) {
48936b7f4cSbjh21        z = a;
49936b7f4cSbjh21    }
50936b7f4cSbjh21    else if ( count < 32 ) {
51936b7f4cSbjh21        z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
52936b7f4cSbjh21    }
53936b7f4cSbjh21    else {
54936b7f4cSbjh21        z = ( a != 0 );
55936b7f4cSbjh21    }
56936b7f4cSbjh21    *zPtr = z;
57936b7f4cSbjh21
58936b7f4cSbjh21}
59936b7f4cSbjh21
60936b7f4cSbjh21/*
61936b7f4cSbjh21-------------------------------------------------------------------------------
62936b7f4cSbjh21Shifts `a' right by the number of bits given in `count'.  If any nonzero
63936b7f4cSbjh21bits are shifted off, they are ``jammed'' into the least significant bit of
64936b7f4cSbjh21the result by setting the least significant bit to 1.  The value of `count'
65936b7f4cSbjh21can be arbitrarily large; in particular, if `count' is greater than 64, the
66936b7f4cSbjh21result will be either 0 or 1, depending on whether `a' is zero or nonzero.
67936b7f4cSbjh21The result is stored in the location pointed to by `zPtr'.
68936b7f4cSbjh21-------------------------------------------------------------------------------
69936b7f4cSbjh21*/
70936b7f4cSbjh21INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
71936b7f4cSbjh21{
72936b7f4cSbjh21    bits64 z;
73936b7f4cSbjh21
74936b7f4cSbjh21    if ( count == 0 ) {
75936b7f4cSbjh21        z = a;
76936b7f4cSbjh21    }
77936b7f4cSbjh21    else if ( count < 64 ) {
78936b7f4cSbjh21        z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
79936b7f4cSbjh21    }
80936b7f4cSbjh21    else {
81936b7f4cSbjh21        z = ( a != 0 );
82936b7f4cSbjh21    }
83936b7f4cSbjh21    *zPtr = z;
84936b7f4cSbjh21
85936b7f4cSbjh21}
86936b7f4cSbjh21
87936b7f4cSbjh21/*
88936b7f4cSbjh21-------------------------------------------------------------------------------
89936b7f4cSbjh21Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
90936b7f4cSbjh21_plus_ the number of bits given in `count'.  The shifted result is at most
91936b7f4cSbjh2164 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
92936b7f4cSbjh21bits shifted off form a second 64-bit result as follows:  The _last_ bit
93936b7f4cSbjh21shifted off is the most-significant bit of the extra result, and the other
94936b7f4cSbjh2163 bits of the extra result are all zero if and only if _all_but_the_last_
95936b7f4cSbjh21bits shifted off were all zero.  This extra result is stored in the location
96936b7f4cSbjh21pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
97936b7f4cSbjh21    (This routine makes more sense if `a0' and `a1' are considered to form a
98936b7f4cSbjh21fixed-point value with binary point between `a0' and `a1'.  This fixed-point
99936b7f4cSbjh21value is shifted right by the number of bits given in `count', and the
100936b7f4cSbjh21integer part of the result is returned at the location pointed to by
101936b7f4cSbjh21`z0Ptr'.  The fractional part of the result may be slightly corrupted as
102936b7f4cSbjh21described above, and is returned at the location pointed to by `z1Ptr'.)
103936b7f4cSbjh21-------------------------------------------------------------------------------
104936b7f4cSbjh21*/
105936b7f4cSbjh21INLINE void
106936b7f4cSbjh21 shift64ExtraRightJamming(
107936b7f4cSbjh21     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
108936b7f4cSbjh21{
109936b7f4cSbjh21    bits64 z0, z1;
110936b7f4cSbjh21    int8 negCount = ( - count ) & 63;
111936b7f4cSbjh21
112936b7f4cSbjh21    if ( count == 0 ) {
113936b7f4cSbjh21        z1 = a1;
114936b7f4cSbjh21        z0 = a0;
115936b7f4cSbjh21    }
116936b7f4cSbjh21    else if ( count < 64 ) {
117936b7f4cSbjh21        z1 = ( a0<<negCount ) | ( a1 != 0 );
118936b7f4cSbjh21        z0 = a0>>count;
119936b7f4cSbjh21    }
120936b7f4cSbjh21    else {
121936b7f4cSbjh21        if ( count == 64 ) {
122936b7f4cSbjh21            z1 = a0 | ( a1 != 0 );
123936b7f4cSbjh21        }
124936b7f4cSbjh21        else {
125936b7f4cSbjh21            z1 = ( ( a0 | a1 ) != 0 );
126936b7f4cSbjh21        }
127936b7f4cSbjh21        z0 = 0;
128936b7f4cSbjh21    }
129936b7f4cSbjh21    *z1Ptr = z1;
130936b7f4cSbjh21    *z0Ptr = z0;
131936b7f4cSbjh21
132936b7f4cSbjh21}
133936b7f4cSbjh21
134936b7f4cSbjh21/*
135936b7f4cSbjh21-------------------------------------------------------------------------------
136936b7f4cSbjh21Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
137936b7f4cSbjh21number of bits given in `count'.  Any bits shifted off are lost.  The value
138936b7f4cSbjh21of `count' can be arbitrarily large; in particular, if `count' is greater
139936b7f4cSbjh21than 128, the result will be 0.  The result is broken into two 64-bit pieces
140936b7f4cSbjh21which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
141936b7f4cSbjh21-------------------------------------------------------------------------------
142936b7f4cSbjh21*/
143936b7f4cSbjh21INLINE void
144936b7f4cSbjh21 shift128Right(
145936b7f4cSbjh21     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
146936b7f4cSbjh21{
147936b7f4cSbjh21    bits64 z0, z1;
148936b7f4cSbjh21    int8 negCount = ( - count ) & 63;
149936b7f4cSbjh21
150936b7f4cSbjh21    if ( count == 0 ) {
151936b7f4cSbjh21        z1 = a1;
152936b7f4cSbjh21        z0 = a0;
153936b7f4cSbjh21    }
154936b7f4cSbjh21    else if ( count < 64 ) {
155936b7f4cSbjh21        z1 = ( a0<<negCount ) | ( a1>>count );
156936b7f4cSbjh21        z0 = a0>>count;
157936b7f4cSbjh21    }
158936b7f4cSbjh21    else {
159936b7f4cSbjh21        z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
160936b7f4cSbjh21        z0 = 0;
161936b7f4cSbjh21    }
162936b7f4cSbjh21    *z1Ptr = z1;
163936b7f4cSbjh21    *z0Ptr = z0;
164936b7f4cSbjh21
165936b7f4cSbjh21}
166936b7f4cSbjh21
167936b7f4cSbjh21/*
168936b7f4cSbjh21-------------------------------------------------------------------------------
169936b7f4cSbjh21Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
170936b7f4cSbjh21number of bits given in `count'.  If any nonzero bits are shifted off, they
171936b7f4cSbjh21are ``jammed'' into the least significant bit of the result by setting the
172936b7f4cSbjh21least significant bit to 1.  The value of `count' can be arbitrarily large;
173936b7f4cSbjh21in particular, if `count' is greater than 128, the result will be either
174936b7f4cSbjh210 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
175936b7f4cSbjh21nonzero.  The result is broken into two 64-bit pieces which are stored at
176936b7f4cSbjh21the locations pointed to by `z0Ptr' and `z1Ptr'.
177936b7f4cSbjh21-------------------------------------------------------------------------------
178936b7f4cSbjh21*/
179936b7f4cSbjh21INLINE void
180936b7f4cSbjh21 shift128RightJamming(
181936b7f4cSbjh21     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
182936b7f4cSbjh21{
183936b7f4cSbjh21    bits64 z0, z1;
184936b7f4cSbjh21    int8 negCount = ( - count ) & 63;
185936b7f4cSbjh21
186936b7f4cSbjh21    if ( count == 0 ) {
187936b7f4cSbjh21        z1 = a1;
188936b7f4cSbjh21        z0 = a0;
189936b7f4cSbjh21    }
190936b7f4cSbjh21    else if ( count < 64 ) {
191936b7f4cSbjh21        z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
192936b7f4cSbjh21        z0 = a0>>count;
193936b7f4cSbjh21    }
194936b7f4cSbjh21    else {
195936b7f4cSbjh21        if ( count == 64 ) {
196936b7f4cSbjh21            z1 = a0 | ( a1 != 0 );
197936b7f4cSbjh21        }
198936b7f4cSbjh21        else if ( count < 128 ) {
199936b7f4cSbjh21            z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
200936b7f4cSbjh21        }
201936b7f4cSbjh21        else {
202936b7f4cSbjh21            z1 = ( ( a0 | a1 ) != 0 );
203936b7f4cSbjh21        }
204936b7f4cSbjh21        z0 = 0;
205936b7f4cSbjh21    }
206936b7f4cSbjh21    *z1Ptr = z1;
207936b7f4cSbjh21    *z0Ptr = z0;
208936b7f4cSbjh21
209936b7f4cSbjh21}
210936b7f4cSbjh21
211936b7f4cSbjh21/*
212936b7f4cSbjh21-------------------------------------------------------------------------------
213936b7f4cSbjh21Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
214936b7f4cSbjh21by 64 _plus_ the number of bits given in `count'.  The shifted result is
215936b7f4cSbjh21at most 128 nonzero bits; these are broken into two 64-bit pieces which are
216936b7f4cSbjh21stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
217936b7f4cSbjh21off form a third 64-bit result as follows:  The _last_ bit shifted off is
218936b7f4cSbjh21the most-significant bit of the extra result, and the other 63 bits of the
219936b7f4cSbjh21extra result are all zero if and only if _all_but_the_last_ bits shifted off
220936b7f4cSbjh21were all zero.  This extra result is stored in the location pointed to by
221936b7f4cSbjh21`z2Ptr'.  The value of `count' can be arbitrarily large.
222936b7f4cSbjh21    (This routine makes more sense if `a0', `a1', and `a2' are considered
223936b7f4cSbjh21to form a fixed-point value with binary point between `a1' and `a2'.  This
224936b7f4cSbjh21fixed-point value is shifted right by the number of bits given in `count',
225936b7f4cSbjh21and the integer part of the result is returned at the locations pointed to
226936b7f4cSbjh21by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
227936b7f4cSbjh21corrupted as described above, and is returned at the location pointed to by
228936b7f4cSbjh21`z2Ptr'.)
229936b7f4cSbjh21-------------------------------------------------------------------------------
230936b7f4cSbjh21*/
231936b7f4cSbjh21INLINE void
232936b7f4cSbjh21 shift128ExtraRightJamming(
233936b7f4cSbjh21     bits64 a0,
234936b7f4cSbjh21     bits64 a1,
235936b7f4cSbjh21     bits64 a2,
236936b7f4cSbjh21     int16 count,
237936b7f4cSbjh21     bits64 *z0Ptr,
238936b7f4cSbjh21     bits64 *z1Ptr,
239936b7f4cSbjh21     bits64 *z2Ptr
240936b7f4cSbjh21 )
241936b7f4cSbjh21{
242936b7f4cSbjh21    bits64 z0, z1, z2;
243936b7f4cSbjh21    int8 negCount = ( - count ) & 63;
244936b7f4cSbjh21
245936b7f4cSbjh21    if ( count == 0 ) {
246936b7f4cSbjh21        z2 = a2;
247936b7f4cSbjh21        z1 = a1;
248936b7f4cSbjh21        z0 = a0;
249936b7f4cSbjh21    }
250936b7f4cSbjh21    else {
251936b7f4cSbjh21        if ( count < 64 ) {
252936b7f4cSbjh21            z2 = a1<<negCount;
253936b7f4cSbjh21            z1 = ( a0<<negCount ) | ( a1>>count );
254936b7f4cSbjh21            z0 = a0>>count;
255936b7f4cSbjh21        }
256936b7f4cSbjh21        else {
257936b7f4cSbjh21            if ( count == 64 ) {
258936b7f4cSbjh21                z2 = a1;
259936b7f4cSbjh21                z1 = a0;
260936b7f4cSbjh21            }
261936b7f4cSbjh21            else {
262936b7f4cSbjh21                a2 |= a1;
263936b7f4cSbjh21                if ( count < 128 ) {
264936b7f4cSbjh21                    z2 = a0<<negCount;
265936b7f4cSbjh21                    z1 = a0>>( count & 63 );
266936b7f4cSbjh21                }
267936b7f4cSbjh21                else {
268936b7f4cSbjh21                    z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
269936b7f4cSbjh21                    z1 = 0;
270936b7f4cSbjh21                }
271936b7f4cSbjh21            }
272936b7f4cSbjh21            z0 = 0;
273936b7f4cSbjh21        }
274936b7f4cSbjh21        z2 |= ( a2 != 0 );
275936b7f4cSbjh21    }
276936b7f4cSbjh21    *z2Ptr = z2;
277936b7f4cSbjh21    *z1Ptr = z1;
278936b7f4cSbjh21    *z0Ptr = z0;
279936b7f4cSbjh21
280936b7f4cSbjh21}
281936b7f4cSbjh21
282936b7f4cSbjh21/*
283936b7f4cSbjh21-------------------------------------------------------------------------------
284936b7f4cSbjh21Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
285936b7f4cSbjh21number of bits given in `count'.  Any bits shifted off are lost.  The value
286936b7f4cSbjh21of `count' must be less than 64.  The result is broken into two 64-bit
287936b7f4cSbjh21pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
288936b7f4cSbjh21-------------------------------------------------------------------------------
289936b7f4cSbjh21*/
290936b7f4cSbjh21INLINE void
291936b7f4cSbjh21 shortShift128Left(
292936b7f4cSbjh21     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
293936b7f4cSbjh21{
294936b7f4cSbjh21
295936b7f4cSbjh21    *z1Ptr = a1<<count;
296936b7f4cSbjh21    *z0Ptr =
297936b7f4cSbjh21        ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
298936b7f4cSbjh21
299936b7f4cSbjh21}
300936b7f4cSbjh21
301936b7f4cSbjh21/*
302936b7f4cSbjh21-------------------------------------------------------------------------------
303936b7f4cSbjh21Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
304936b7f4cSbjh21by the number of bits given in `count'.  Any bits shifted off are lost.
305936b7f4cSbjh21The value of `count' must be less than 64.  The result is broken into three
306936b7f4cSbjh2164-bit pieces which are stored at the locations pointed to by `z0Ptr',
307936b7f4cSbjh21`z1Ptr', and `z2Ptr'.
308936b7f4cSbjh21-------------------------------------------------------------------------------
309936b7f4cSbjh21*/
310936b7f4cSbjh21INLINE void
311936b7f4cSbjh21 shortShift192Left(
312936b7f4cSbjh21     bits64 a0,
313936b7f4cSbjh21     bits64 a1,
314936b7f4cSbjh21     bits64 a2,
315936b7f4cSbjh21     int16 count,
316936b7f4cSbjh21     bits64 *z0Ptr,
317936b7f4cSbjh21     bits64 *z1Ptr,
318936b7f4cSbjh21     bits64 *z2Ptr
319936b7f4cSbjh21 )
320936b7f4cSbjh21{
321936b7f4cSbjh21    bits64 z0, z1, z2;
322936b7f4cSbjh21    int8 negCount;
323936b7f4cSbjh21
324936b7f4cSbjh21    z2 = a2<<count;
325936b7f4cSbjh21    z1 = a1<<count;
326936b7f4cSbjh21    z0 = a0<<count;
327936b7f4cSbjh21    if ( 0 < count ) {
328936b7f4cSbjh21        negCount = ( ( - count ) & 63 );
329936b7f4cSbjh21        z1 |= a2>>negCount;
330936b7f4cSbjh21        z0 |= a1>>negCount;
331936b7f4cSbjh21    }
332936b7f4cSbjh21    *z2Ptr = z2;
333936b7f4cSbjh21    *z1Ptr = z1;
334936b7f4cSbjh21    *z0Ptr = z0;
335936b7f4cSbjh21
336936b7f4cSbjh21}
337936b7f4cSbjh21
338936b7f4cSbjh21/*
339936b7f4cSbjh21-------------------------------------------------------------------------------
340936b7f4cSbjh21Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
341936b7f4cSbjh21value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
342936b7f4cSbjh21any carry out is lost.  The result is broken into two 64-bit pieces which
343936b7f4cSbjh21are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
344936b7f4cSbjh21-------------------------------------------------------------------------------
345936b7f4cSbjh21*/
346936b7f4cSbjh21INLINE void
347936b7f4cSbjh21 add128(
348936b7f4cSbjh21     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
349936b7f4cSbjh21{
350936b7f4cSbjh21    bits64 z1;
351936b7f4cSbjh21
352936b7f4cSbjh21    z1 = a1 + b1;
353936b7f4cSbjh21    *z1Ptr = z1;
354936b7f4cSbjh21    *z0Ptr = a0 + b0 + ( z1 < a1 );
355936b7f4cSbjh21
356936b7f4cSbjh21}
357936b7f4cSbjh21
358936b7f4cSbjh21/*
359936b7f4cSbjh21-------------------------------------------------------------------------------
360936b7f4cSbjh21Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
361936b7f4cSbjh21192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
362936b7f4cSbjh21modulo 2^192, so any carry out is lost.  The result is broken into three
363936b7f4cSbjh2164-bit pieces which are stored at the locations pointed to by `z0Ptr',
364936b7f4cSbjh21`z1Ptr', and `z2Ptr'.
365936b7f4cSbjh21-------------------------------------------------------------------------------
366936b7f4cSbjh21*/
367936b7f4cSbjh21INLINE void
368936b7f4cSbjh21 add192(
369936b7f4cSbjh21     bits64 a0,
370936b7f4cSbjh21     bits64 a1,
371936b7f4cSbjh21     bits64 a2,
372936b7f4cSbjh21     bits64 b0,
373936b7f4cSbjh21     bits64 b1,
374936b7f4cSbjh21     bits64 b2,
375936b7f4cSbjh21     bits64 *z0Ptr,
376936b7f4cSbjh21     bits64 *z1Ptr,
377936b7f4cSbjh21     bits64 *z2Ptr
378936b7f4cSbjh21 )
379936b7f4cSbjh21{
380936b7f4cSbjh21    bits64 z0, z1, z2;
381936b7f4cSbjh21    int8 carry0, carry1;
382936b7f4cSbjh21
383936b7f4cSbjh21    z2 = a2 + b2;
384936b7f4cSbjh21    carry1 = ( z2 < a2 );
385936b7f4cSbjh21    z1 = a1 + b1;
386936b7f4cSbjh21    carry0 = ( z1 < a1 );
387936b7f4cSbjh21    z0 = a0 + b0;
388936b7f4cSbjh21    z1 += carry1;
389ea680523Stron    z0 += ( z1 < (bits64)carry1 );
390936b7f4cSbjh21    z0 += carry0;
391936b7f4cSbjh21    *z2Ptr = z2;
392936b7f4cSbjh21    *z1Ptr = z1;
393936b7f4cSbjh21    *z0Ptr = z0;
394936b7f4cSbjh21
395936b7f4cSbjh21}
396936b7f4cSbjh21
397936b7f4cSbjh21/*
398936b7f4cSbjh21-------------------------------------------------------------------------------
399936b7f4cSbjh21Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
400936b7f4cSbjh21128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
401936b7f4cSbjh212^128, so any borrow out (carry out) is lost.  The result is broken into two
402936b7f4cSbjh2164-bit pieces which are stored at the locations pointed to by `z0Ptr' and
403936b7f4cSbjh21`z1Ptr'.
404936b7f4cSbjh21-------------------------------------------------------------------------------
405936b7f4cSbjh21*/
406936b7f4cSbjh21INLINE void
407936b7f4cSbjh21 sub128(
408936b7f4cSbjh21     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
409936b7f4cSbjh21{
410936b7f4cSbjh21
411936b7f4cSbjh21    *z1Ptr = a1 - b1;
412936b7f4cSbjh21    *z0Ptr = a0 - b0 - ( a1 < b1 );
413936b7f4cSbjh21
414936b7f4cSbjh21}
415936b7f4cSbjh21
416936b7f4cSbjh21/*
417936b7f4cSbjh21-------------------------------------------------------------------------------
418936b7f4cSbjh21Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
419936b7f4cSbjh21from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
420936b7f4cSbjh21Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
421936b7f4cSbjh21result is broken into three 64-bit pieces which are stored at the locations
422936b7f4cSbjh21pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
423936b7f4cSbjh21-------------------------------------------------------------------------------
424936b7f4cSbjh21*/
425936b7f4cSbjh21INLINE void
426936b7f4cSbjh21 sub192(
427936b7f4cSbjh21     bits64 a0,
428936b7f4cSbjh21     bits64 a1,
429936b7f4cSbjh21     bits64 a2,
430936b7f4cSbjh21     bits64 b0,
431936b7f4cSbjh21     bits64 b1,
432936b7f4cSbjh21     bits64 b2,
433936b7f4cSbjh21     bits64 *z0Ptr,
434936b7f4cSbjh21     bits64 *z1Ptr,
435936b7f4cSbjh21     bits64 *z2Ptr
436936b7f4cSbjh21 )
437936b7f4cSbjh21{
438936b7f4cSbjh21    bits64 z0, z1, z2;
439936b7f4cSbjh21    int8 borrow0, borrow1;
440936b7f4cSbjh21
441936b7f4cSbjh21    z2 = a2 - b2;
442936b7f4cSbjh21    borrow1 = ( a2 < b2 );
443936b7f4cSbjh21    z1 = a1 - b1;
444936b7f4cSbjh21    borrow0 = ( a1 < b1 );
445936b7f4cSbjh21    z0 = a0 - b0;
446ea680523Stron    z0 -= ( z1 < (bits64)borrow1 );
447936b7f4cSbjh21    z1 -= borrow1;
448936b7f4cSbjh21    z0 -= borrow0;
449936b7f4cSbjh21    *z2Ptr = z2;
450936b7f4cSbjh21    *z1Ptr = z1;
451936b7f4cSbjh21    *z0Ptr = z0;
452936b7f4cSbjh21
453936b7f4cSbjh21}
454936b7f4cSbjh21
455936b7f4cSbjh21/*
456936b7f4cSbjh21-------------------------------------------------------------------------------
457936b7f4cSbjh21Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
458936b7f4cSbjh21into two 64-bit pieces which are stored at the locations pointed to by
459936b7f4cSbjh21`z0Ptr' and `z1Ptr'.
460936b7f4cSbjh21-------------------------------------------------------------------------------
461936b7f4cSbjh21*/
462936b7f4cSbjh21INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
463936b7f4cSbjh21{
464936b7f4cSbjh21    bits32 aHigh, aLow, bHigh, bLow;
465936b7f4cSbjh21    bits64 z0, zMiddleA, zMiddleB, z1;
466936b7f4cSbjh21
467*39052f3bSchristos    aLow = (bits32)a;
468*39052f3bSchristos    aHigh = (bits32)(a>>32);
469*39052f3bSchristos    bLow = (bits32)b;
470*39052f3bSchristos    bHigh = (bits32)(b>>32);
471936b7f4cSbjh21    z1 = ( (bits64) aLow ) * bLow;
472936b7f4cSbjh21    zMiddleA = ( (bits64) aLow ) * bHigh;
473936b7f4cSbjh21    zMiddleB = ( (bits64) aHigh ) * bLow;
474936b7f4cSbjh21    z0 = ( (bits64) aHigh ) * bHigh;
475936b7f4cSbjh21    zMiddleA += zMiddleB;
476936b7f4cSbjh21    z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
477936b7f4cSbjh21    zMiddleA <<= 32;
478936b7f4cSbjh21    z1 += zMiddleA;
479936b7f4cSbjh21    z0 += ( z1 < zMiddleA );
480936b7f4cSbjh21    *z1Ptr = z1;
481936b7f4cSbjh21    *z0Ptr = z0;
482936b7f4cSbjh21
483936b7f4cSbjh21}
484936b7f4cSbjh21
485936b7f4cSbjh21/*
486936b7f4cSbjh21-------------------------------------------------------------------------------
487936b7f4cSbjh21Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
488936b7f4cSbjh21`b' to obtain a 192-bit product.  The product is broken into three 64-bit
489936b7f4cSbjh21pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
490936b7f4cSbjh21`z2Ptr'.
491936b7f4cSbjh21-------------------------------------------------------------------------------
492936b7f4cSbjh21*/
493936b7f4cSbjh21INLINE void
494936b7f4cSbjh21 mul128By64To192(
495936b7f4cSbjh21     bits64 a0,
496936b7f4cSbjh21     bits64 a1,
497936b7f4cSbjh21     bits64 b,
498936b7f4cSbjh21     bits64 *z0Ptr,
499936b7f4cSbjh21     bits64 *z1Ptr,
500936b7f4cSbjh21     bits64 *z2Ptr
501936b7f4cSbjh21 )
502936b7f4cSbjh21{
503936b7f4cSbjh21    bits64 z0, z1, z2, more1;
504936b7f4cSbjh21
505936b7f4cSbjh21    mul64To128( a1, b, &z1, &z2 );
506936b7f4cSbjh21    mul64To128( a0, b, &z0, &more1 );
507936b7f4cSbjh21    add128( z0, more1, 0, z1, &z0, &z1 );
508936b7f4cSbjh21    *z2Ptr = z2;
509936b7f4cSbjh21    *z1Ptr = z1;
510936b7f4cSbjh21    *z0Ptr = z0;
511936b7f4cSbjh21
512936b7f4cSbjh21}
513936b7f4cSbjh21
514936b7f4cSbjh21/*
515936b7f4cSbjh21-------------------------------------------------------------------------------
516936b7f4cSbjh21Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
517936b7f4cSbjh21128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
518936b7f4cSbjh21product.  The product is broken into four 64-bit pieces which are stored at
519936b7f4cSbjh21the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
520936b7f4cSbjh21-------------------------------------------------------------------------------
521936b7f4cSbjh21*/
522936b7f4cSbjh21INLINE void
523936b7f4cSbjh21 mul128To256(
524936b7f4cSbjh21     bits64 a0,
525936b7f4cSbjh21     bits64 a1,
526936b7f4cSbjh21     bits64 b0,
527936b7f4cSbjh21     bits64 b1,
528936b7f4cSbjh21     bits64 *z0Ptr,
529936b7f4cSbjh21     bits64 *z1Ptr,
530936b7f4cSbjh21     bits64 *z2Ptr,
531936b7f4cSbjh21     bits64 *z3Ptr
532936b7f4cSbjh21 )
533936b7f4cSbjh21{
534936b7f4cSbjh21    bits64 z0, z1, z2, z3;
535936b7f4cSbjh21    bits64 more1, more2;
536936b7f4cSbjh21
537936b7f4cSbjh21    mul64To128( a1, b1, &z2, &z3 );
538936b7f4cSbjh21    mul64To128( a1, b0, &z1, &more2 );
539936b7f4cSbjh21    add128( z1, more2, 0, z2, &z1, &z2 );
540936b7f4cSbjh21    mul64To128( a0, b0, &z0, &more1 );
541936b7f4cSbjh21    add128( z0, more1, 0, z1, &z0, &z1 );
542936b7f4cSbjh21    mul64To128( a0, b1, &more1, &more2 );
543936b7f4cSbjh21    add128( more1, more2, 0, z2, &more1, &z2 );
544936b7f4cSbjh21    add128( z0, z1, 0, more1, &z0, &z1 );
545936b7f4cSbjh21    *z3Ptr = z3;
546936b7f4cSbjh21    *z2Ptr = z2;
547936b7f4cSbjh21    *z1Ptr = z1;
548936b7f4cSbjh21    *z0Ptr = z0;
549936b7f4cSbjh21
550936b7f4cSbjh21}
551936b7f4cSbjh21
552936b7f4cSbjh21/*
553936b7f4cSbjh21-------------------------------------------------------------------------------
554936b7f4cSbjh21Returns an approximation to the 64-bit integer quotient obtained by dividing
555936b7f4cSbjh21`b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
556936b7f4cSbjh21divisor `b' must be at least 2^63.  If q is the exact quotient truncated
557936b7f4cSbjh21toward zero, the approximation returned lies between q and q + 2 inclusive.
558936b7f4cSbjh21If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
559936b7f4cSbjh21unsigned integer is returned.
560936b7f4cSbjh21-------------------------------------------------------------------------------
561936b7f4cSbjh21*/
562936b7f4cSbjh21static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
563936b7f4cSbjh21{
564936b7f4cSbjh21    bits64 b0, b1;
565936b7f4cSbjh21    bits64 rem0, rem1, term0, term1;
566936b7f4cSbjh21    bits64 z;
567936b7f4cSbjh21
568936b7f4cSbjh21    if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
569936b7f4cSbjh21    b0 = b>>32;
570936b7f4cSbjh21    z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
571936b7f4cSbjh21    mul64To128( b, z, &term0, &term1 );
572936b7f4cSbjh21    sub128( a0, a1, term0, term1, &rem0, &rem1 );
573936b7f4cSbjh21    while ( ( (sbits64) rem0 ) < 0 ) {
574936b7f4cSbjh21        z -= LIT64( 0x100000000 );
575936b7f4cSbjh21        b1 = b<<32;
576936b7f4cSbjh21        add128( rem0, rem1, b0, b1, &rem0, &rem1 );
577936b7f4cSbjh21    }
578936b7f4cSbjh21    rem0 = ( rem0<<32 ) | ( rem1>>32 );
579936b7f4cSbjh21    z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
580936b7f4cSbjh21    return z;
581936b7f4cSbjh21
582936b7f4cSbjh21}
583936b7f4cSbjh21
584936b7f4cSbjh21#if !defined(SOFTFLOAT_FOR_GCC) || defined(FLOATX80) || defined(FLOAT128)
585936b7f4cSbjh21/*
586936b7f4cSbjh21-------------------------------------------------------------------------------
587936b7f4cSbjh21Returns an approximation to the square root of the 32-bit significand given
588936b7f4cSbjh21by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
589936b7f4cSbjh21`aExp' (the least significant bit) is 1, the integer returned approximates
590936b7f4cSbjh212^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
591936b7f4cSbjh21is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
592936b7f4cSbjh21case, the approximation returned lies strictly within +/-2 of the exact
593936b7f4cSbjh21value.
594936b7f4cSbjh21-------------------------------------------------------------------------------
595936b7f4cSbjh21*/
596936b7f4cSbjh21static bits32 estimateSqrt32( int16 aExp, bits32 a )
597936b7f4cSbjh21{
598936b7f4cSbjh21    static const bits16 sqrtOddAdjustments[] = {
599936b7f4cSbjh21        0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
600936b7f4cSbjh21        0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
601936b7f4cSbjh21    };
602936b7f4cSbjh21    static const bits16 sqrtEvenAdjustments[] = {
603936b7f4cSbjh21        0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
604936b7f4cSbjh21        0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
605936b7f4cSbjh21    };
606936b7f4cSbjh21    int8 idx;
607936b7f4cSbjh21    bits32 z;
608936b7f4cSbjh21
609936b7f4cSbjh21    idx = ( a>>27 ) & 15;
610936b7f4cSbjh21    if ( aExp & 1 ) {
611936b7f4cSbjh21        z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ idx ];
612936b7f4cSbjh21        z = ( ( a / z )<<14 ) + ( z<<15 );
613936b7f4cSbjh21        a >>= 1;
614936b7f4cSbjh21    }
615936b7f4cSbjh21    else {
616936b7f4cSbjh21        z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ idx ];
617936b7f4cSbjh21        z = a / z + z;
618936b7f4cSbjh21        z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
619*39052f3bSchristos        if ( z <= a ) return (bits32) ( ( (bits32) a )>>1 );
620936b7f4cSbjh21    }
621936b7f4cSbjh21    return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
622936b7f4cSbjh21
623936b7f4cSbjh21}
624936b7f4cSbjh21#endif
625936b7f4cSbjh21
626936b7f4cSbjh21/*
627936b7f4cSbjh21-------------------------------------------------------------------------------
628936b7f4cSbjh21Returns the number of leading 0 bits before the most-significant 1 bit of
629936b7f4cSbjh21`a'.  If `a' is zero, 32 is returned.
630936b7f4cSbjh21-------------------------------------------------------------------------------
631936b7f4cSbjh21*/
632936b7f4cSbjh21static int8 countLeadingZeros32( bits32 a )
633936b7f4cSbjh21{
634936b7f4cSbjh21    static const int8 countLeadingZerosHigh[] = {
635936b7f4cSbjh21        8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
636936b7f4cSbjh21        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
637936b7f4cSbjh21        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
638936b7f4cSbjh21        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
639936b7f4cSbjh21        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
640936b7f4cSbjh21        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
641936b7f4cSbjh21        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
642936b7f4cSbjh21        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
643936b7f4cSbjh21        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
644936b7f4cSbjh21        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
645936b7f4cSbjh21        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
646936b7f4cSbjh21        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
647936b7f4cSbjh21        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
648936b7f4cSbjh21        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
649936b7f4cSbjh21        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
650936b7f4cSbjh21        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
651936b7f4cSbjh21    };
652936b7f4cSbjh21    int8 shiftCount;
653936b7f4cSbjh21
654936b7f4cSbjh21    shiftCount = 0;
655936b7f4cSbjh21    if ( a < 0x10000 ) {
656936b7f4cSbjh21        shiftCount += 16;
657936b7f4cSbjh21        a <<= 16;
658936b7f4cSbjh21    }
659936b7f4cSbjh21    if ( a < 0x1000000 ) {
660936b7f4cSbjh21        shiftCount += 8;
661936b7f4cSbjh21        a <<= 8;
662936b7f4cSbjh21    }
663936b7f4cSbjh21    shiftCount += countLeadingZerosHigh[ a>>24 ];
664936b7f4cSbjh21    return shiftCount;
665936b7f4cSbjh21
666936b7f4cSbjh21}
667936b7f4cSbjh21
668936b7f4cSbjh21/*
669936b7f4cSbjh21-------------------------------------------------------------------------------
670936b7f4cSbjh21Returns the number of leading 0 bits before the most-significant 1 bit of
671936b7f4cSbjh21`a'.  If `a' is zero, 64 is returned.
672936b7f4cSbjh21-------------------------------------------------------------------------------
673936b7f4cSbjh21*/
674936b7f4cSbjh21static int8 countLeadingZeros64( bits64 a )
675936b7f4cSbjh21{
676936b7f4cSbjh21    int8 shiftCount;
677936b7f4cSbjh21
678936b7f4cSbjh21    shiftCount = 0;
679936b7f4cSbjh21    if ( a < ( (bits64) 1 )<<32 ) {
680936b7f4cSbjh21        shiftCount += 32;
681936b7f4cSbjh21    }
682936b7f4cSbjh21    else {
683936b7f4cSbjh21        a >>= 32;
684936b7f4cSbjh21    }
685*39052f3bSchristos    shiftCount += (int8)countLeadingZeros32( (bits32)a );
686936b7f4cSbjh21    return shiftCount;
687936b7f4cSbjh21
688936b7f4cSbjh21}
689936b7f4cSbjh21
690936b7f4cSbjh21/*
691936b7f4cSbjh21-------------------------------------------------------------------------------
692936b7f4cSbjh21Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
693936b7f4cSbjh21is equal to the 128-bit value formed by concatenating `b0' and `b1'.
694936b7f4cSbjh21Otherwise, returns 0.
695936b7f4cSbjh21-------------------------------------------------------------------------------
696936b7f4cSbjh21*/
697936b7f4cSbjh21INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
698936b7f4cSbjh21{
699936b7f4cSbjh21
700936b7f4cSbjh21    return ( a0 == b0 ) && ( a1 == b1 );
701936b7f4cSbjh21
702936b7f4cSbjh21}
703936b7f4cSbjh21
704936b7f4cSbjh21/*
705936b7f4cSbjh21-------------------------------------------------------------------------------
706936b7f4cSbjh21Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
707936b7f4cSbjh21than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
708936b7f4cSbjh21Otherwise, returns 0.
709936b7f4cSbjh21-------------------------------------------------------------------------------
710936b7f4cSbjh21*/
711936b7f4cSbjh21INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
712936b7f4cSbjh21{
713936b7f4cSbjh21
714936b7f4cSbjh21    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
715936b7f4cSbjh21
716936b7f4cSbjh21}
717936b7f4cSbjh21
718936b7f4cSbjh21/*
719936b7f4cSbjh21-------------------------------------------------------------------------------
720936b7f4cSbjh21Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
721936b7f4cSbjh21than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
722936b7f4cSbjh21returns 0.
723936b7f4cSbjh21-------------------------------------------------------------------------------
724936b7f4cSbjh21*/
725936b7f4cSbjh21INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
726936b7f4cSbjh21{
727936b7f4cSbjh21
728936b7f4cSbjh21    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
729936b7f4cSbjh21
730936b7f4cSbjh21}
731936b7f4cSbjh21
732936b7f4cSbjh21/*
733936b7f4cSbjh21-------------------------------------------------------------------------------
734936b7f4cSbjh21Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
735936b7f4cSbjh21not equal to the 128-bit value formed by concatenating `b0' and `b1'.
736936b7f4cSbjh21Otherwise, returns 0.
737936b7f4cSbjh21-------------------------------------------------------------------------------
738936b7f4cSbjh21*/
739936b7f4cSbjh21INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
740936b7f4cSbjh21{
741936b7f4cSbjh21
742936b7f4cSbjh21    return ( a0 != b0 ) || ( a1 != b1 );
743936b7f4cSbjh21
744936b7f4cSbjh21}
745936b7f4cSbjh21
746