xref: /netbsd-src/lib/libc/softfloat/bits32/softfloat-macros (revision d52f6f4b6b6d3d7d2c0c33132c3d93116f327b5c)
1936b7f4cSbjh21
2936b7f4cSbjh21/*
3936b7f4cSbjh21===============================================================================
4936b7f4cSbjh21
5936b7f4cSbjh21This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
6936b7f4cSbjh21Arithmetic Package, Release 2a.
7936b7f4cSbjh21
8936b7f4cSbjh21Written by John R. Hauser.  This work was made possible in part by the
9936b7f4cSbjh21International Computer Science Institute, located at Suite 600, 1947 Center
10936b7f4cSbjh21Street, Berkeley, California 94704.  Funding was partially provided by the
11936b7f4cSbjh21National Science Foundation under grant MIP-9311980.  The original version
12936b7f4cSbjh21of this code was written as part of a project to build a fixed-point vector
13936b7f4cSbjh21processor in collaboration with the University of California at Berkeley,
14936b7f4cSbjh21overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
15936b7f4cSbjh21is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
16936b7f4cSbjh21arithmetic/SoftFloat.html'.
17936b7f4cSbjh21
18936b7f4cSbjh21THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
19936b7f4cSbjh21has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
20936b7f4cSbjh21TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
21936b7f4cSbjh21PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
22936b7f4cSbjh21AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
23936b7f4cSbjh21
24936b7f4cSbjh21Derivative works are acceptable, even for commercial purposes, so long as
25936b7f4cSbjh21(1) they include prominent notice that the work is derivative, and (2) they
26936b7f4cSbjh21include prominent notice akin to these four paragraphs for those parts of
27936b7f4cSbjh21this code that are retained.
28936b7f4cSbjh21
29936b7f4cSbjh21===============================================================================
30936b7f4cSbjh21*/
31936b7f4cSbjh21
32936b7f4cSbjh21/*
33936b7f4cSbjh21-------------------------------------------------------------------------------
34936b7f4cSbjh21Shifts `a' right by the number of bits given in `count'.  If any nonzero
35936b7f4cSbjh21bits are shifted off, they are ``jammed'' into the least significant bit of
36936b7f4cSbjh21the result by setting the least significant bit to 1.  The value of `count'
37936b7f4cSbjh21can be arbitrarily large; in particular, if `count' is greater than 32, the
38936b7f4cSbjh21result will be either 0 or 1, depending on whether `a' is zero or nonzero.
39936b7f4cSbjh21The result is stored in the location pointed to by `zPtr'.
40936b7f4cSbjh21-------------------------------------------------------------------------------
41936b7f4cSbjh21*/
42936b7f4cSbjh21INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
43936b7f4cSbjh21{
44936b7f4cSbjh21    bits32 z;
45936b7f4cSbjh21
46936b7f4cSbjh21    if ( count == 0 ) {
47936b7f4cSbjh21        z = a;
48936b7f4cSbjh21    }
49936b7f4cSbjh21    else if ( count < 32 ) {
50936b7f4cSbjh21        z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
51936b7f4cSbjh21    }
52936b7f4cSbjh21    else {
53936b7f4cSbjh21        z = ( a != 0 );
54936b7f4cSbjh21    }
55936b7f4cSbjh21    *zPtr = z;
56936b7f4cSbjh21
57936b7f4cSbjh21}
58936b7f4cSbjh21
59936b7f4cSbjh21/*
60936b7f4cSbjh21-------------------------------------------------------------------------------
61936b7f4cSbjh21Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the
62936b7f4cSbjh21number of bits given in `count'.  Any bits shifted off are lost.  The value
63936b7f4cSbjh21of `count' can be arbitrarily large; in particular, if `count' is greater
64936b7f4cSbjh21than 64, the result will be 0.  The result is broken into two 32-bit pieces
65936b7f4cSbjh21which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
66936b7f4cSbjh21-------------------------------------------------------------------------------
67936b7f4cSbjh21*/
68936b7f4cSbjh21INLINE void
69936b7f4cSbjh21 shift64Right(
70936b7f4cSbjh21     bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr )
71936b7f4cSbjh21{
72936b7f4cSbjh21    bits32 z0, z1;
73936b7f4cSbjh21    int8 negCount = ( - count ) & 31;
74936b7f4cSbjh21
75936b7f4cSbjh21    if ( count == 0 ) {
76936b7f4cSbjh21        z1 = a1;
77936b7f4cSbjh21        z0 = a0;
78936b7f4cSbjh21    }
79936b7f4cSbjh21    else if ( count < 32 ) {
80936b7f4cSbjh21        z1 = ( a0<<negCount ) | ( a1>>count );
81936b7f4cSbjh21        z0 = a0>>count;
82936b7f4cSbjh21    }
83936b7f4cSbjh21    else {
84936b7f4cSbjh21        z1 = ( count < 64 ) ? ( a0>>( count & 31 ) ) : 0;
85936b7f4cSbjh21        z0 = 0;
86936b7f4cSbjh21    }
87936b7f4cSbjh21    *z1Ptr = z1;
88936b7f4cSbjh21    *z0Ptr = z0;
89936b7f4cSbjh21
90936b7f4cSbjh21}
91936b7f4cSbjh21
92936b7f4cSbjh21/*
93936b7f4cSbjh21-------------------------------------------------------------------------------
94936b7f4cSbjh21Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the
95936b7f4cSbjh21number of bits given in `count'.  If any nonzero bits are shifted off, they
96936b7f4cSbjh21are ``jammed'' into the least significant bit of the result by setting the
97936b7f4cSbjh21least significant bit to 1.  The value of `count' can be arbitrarily large;
98936b7f4cSbjh21in particular, if `count' is greater than 64, the result will be either 0
99936b7f4cSbjh21or 1, depending on whether the concatenation of `a0' and `a1' is zero or
100936b7f4cSbjh21nonzero.  The result is broken into two 32-bit pieces which are stored at
101936b7f4cSbjh21the locations pointed to by `z0Ptr' and `z1Ptr'.
102936b7f4cSbjh21-------------------------------------------------------------------------------
103936b7f4cSbjh21*/
104936b7f4cSbjh21INLINE void
105936b7f4cSbjh21 shift64RightJamming(
106936b7f4cSbjh21     bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr )
107936b7f4cSbjh21{
108936b7f4cSbjh21    bits32 z0, z1;
109936b7f4cSbjh21    int8 negCount = ( - count ) & 31;
110936b7f4cSbjh21
111936b7f4cSbjh21    if ( count == 0 ) {
112936b7f4cSbjh21        z1 = a1;
113936b7f4cSbjh21        z0 = a0;
114936b7f4cSbjh21    }
115936b7f4cSbjh21    else if ( count < 32 ) {
116936b7f4cSbjh21        z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
117936b7f4cSbjh21        z0 = a0>>count;
118936b7f4cSbjh21    }
119936b7f4cSbjh21    else {
120936b7f4cSbjh21        if ( count == 32 ) {
121936b7f4cSbjh21            z1 = a0 | ( a1 != 0 );
122936b7f4cSbjh21        }
123936b7f4cSbjh21        else if ( count < 64 ) {
124936b7f4cSbjh21            z1 = ( a0>>( count & 31 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
125936b7f4cSbjh21        }
126936b7f4cSbjh21        else {
127936b7f4cSbjh21            z1 = ( ( a0 | a1 ) != 0 );
128936b7f4cSbjh21        }
129936b7f4cSbjh21        z0 = 0;
130936b7f4cSbjh21    }
131936b7f4cSbjh21    *z1Ptr = z1;
132936b7f4cSbjh21    *z0Ptr = z0;
133936b7f4cSbjh21
134936b7f4cSbjh21}
135936b7f4cSbjh21
136936b7f4cSbjh21/*
137936b7f4cSbjh21-------------------------------------------------------------------------------
138936b7f4cSbjh21Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' right
139936b7f4cSbjh21by 32 _plus_ the number of bits given in `count'.  The shifted result is
140936b7f4cSbjh21at most 64 nonzero bits; these are broken into two 32-bit pieces which are
141936b7f4cSbjh21stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
142936b7f4cSbjh21off form a third 32-bit result as follows:  The _last_ bit shifted off is
143936b7f4cSbjh21the most-significant bit of the extra result, and the other 31 bits of the
144936b7f4cSbjh21extra result are all zero if and only if _all_but_the_last_ bits shifted off
145936b7f4cSbjh21were all zero.  This extra result is stored in the location pointed to by
146936b7f4cSbjh21`z2Ptr'.  The value of `count' can be arbitrarily large.
147936b7f4cSbjh21    (This routine makes more sense if `a0', `a1', and `a2' are considered
148936b7f4cSbjh21to form a fixed-point value with binary point between `a1' and `a2'.  This
149936b7f4cSbjh21fixed-point value is shifted right by the number of bits given in `count',
150936b7f4cSbjh21and the integer part of the result is returned at the locations pointed to
151936b7f4cSbjh21by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
152936b7f4cSbjh21corrupted as described above, and is returned at the location pointed to by
153936b7f4cSbjh21`z2Ptr'.)
154936b7f4cSbjh21-------------------------------------------------------------------------------
155936b7f4cSbjh21*/
156936b7f4cSbjh21INLINE void
157936b7f4cSbjh21 shift64ExtraRightJamming(
158936b7f4cSbjh21     bits32 a0,
159936b7f4cSbjh21     bits32 a1,
160936b7f4cSbjh21     bits32 a2,
161936b7f4cSbjh21     int16 count,
162936b7f4cSbjh21     bits32 *z0Ptr,
163936b7f4cSbjh21     bits32 *z1Ptr,
164936b7f4cSbjh21     bits32 *z2Ptr
165936b7f4cSbjh21 )
166936b7f4cSbjh21{
167936b7f4cSbjh21    bits32 z0, z1, z2;
168936b7f4cSbjh21    int8 negCount = ( - count ) & 31;
169936b7f4cSbjh21
170936b7f4cSbjh21    if ( count == 0 ) {
171936b7f4cSbjh21        z2 = a2;
172936b7f4cSbjh21        z1 = a1;
173936b7f4cSbjh21        z0 = a0;
174936b7f4cSbjh21    }
175936b7f4cSbjh21    else {
176936b7f4cSbjh21        if ( count < 32 ) {
177936b7f4cSbjh21            z2 = a1<<negCount;
178936b7f4cSbjh21            z1 = ( a0<<negCount ) | ( a1>>count );
179936b7f4cSbjh21            z0 = a0>>count;
180936b7f4cSbjh21        }
181936b7f4cSbjh21        else {
182936b7f4cSbjh21            if ( count == 32 ) {
183936b7f4cSbjh21                z2 = a1;
184936b7f4cSbjh21                z1 = a0;
185936b7f4cSbjh21            }
186936b7f4cSbjh21            else {
187936b7f4cSbjh21                a2 |= a1;
188936b7f4cSbjh21                if ( count < 64 ) {
189936b7f4cSbjh21                    z2 = a0<<negCount;
190936b7f4cSbjh21                    z1 = a0>>( count & 31 );
191936b7f4cSbjh21                }
192936b7f4cSbjh21                else {
193936b7f4cSbjh21                    z2 = ( count == 64 ) ? a0 : ( a0 != 0 );
194936b7f4cSbjh21                    z1 = 0;
195936b7f4cSbjh21                }
196936b7f4cSbjh21            }
197936b7f4cSbjh21            z0 = 0;
198936b7f4cSbjh21        }
199936b7f4cSbjh21        z2 |= ( a2 != 0 );
200936b7f4cSbjh21    }
201936b7f4cSbjh21    *z2Ptr = z2;
202936b7f4cSbjh21    *z1Ptr = z1;
203936b7f4cSbjh21    *z0Ptr = z0;
204936b7f4cSbjh21
205936b7f4cSbjh21}
206936b7f4cSbjh21
207936b7f4cSbjh21/*
208936b7f4cSbjh21-------------------------------------------------------------------------------
209936b7f4cSbjh21Shifts the 64-bit value formed by concatenating `a0' and `a1' left by the
210936b7f4cSbjh21number of bits given in `count'.  Any bits shifted off are lost.  The value
211936b7f4cSbjh21of `count' must be less than 32.  The result is broken into two 32-bit
212936b7f4cSbjh21pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
213936b7f4cSbjh21-------------------------------------------------------------------------------
214936b7f4cSbjh21*/
215936b7f4cSbjh21INLINE void
216936b7f4cSbjh21 shortShift64Left(
217936b7f4cSbjh21     bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr )
218936b7f4cSbjh21{
219936b7f4cSbjh21
220936b7f4cSbjh21    *z1Ptr = a1<<count;
221936b7f4cSbjh21    *z0Ptr =
222936b7f4cSbjh21        ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 31 ) );
223936b7f4cSbjh21
224936b7f4cSbjh21}
225936b7f4cSbjh21
226936b7f4cSbjh21/*
227936b7f4cSbjh21-------------------------------------------------------------------------------
228936b7f4cSbjh21Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' left
229936b7f4cSbjh21by the number of bits given in `count'.  Any bits shifted off are lost.
230936b7f4cSbjh21The value of `count' must be less than 32.  The result is broken into three
231936b7f4cSbjh2132-bit pieces which are stored at the locations pointed to by `z0Ptr',
232936b7f4cSbjh21`z1Ptr', and `z2Ptr'.
233936b7f4cSbjh21-------------------------------------------------------------------------------
234936b7f4cSbjh21*/
235936b7f4cSbjh21INLINE void
236936b7f4cSbjh21 shortShift96Left(
237936b7f4cSbjh21     bits32 a0,
238936b7f4cSbjh21     bits32 a1,
239936b7f4cSbjh21     bits32 a2,
240936b7f4cSbjh21     int16 count,
241936b7f4cSbjh21     bits32 *z0Ptr,
242936b7f4cSbjh21     bits32 *z1Ptr,
243936b7f4cSbjh21     bits32 *z2Ptr
244936b7f4cSbjh21 )
245936b7f4cSbjh21{
246936b7f4cSbjh21    bits32 z0, z1, z2;
247936b7f4cSbjh21    int8 negCount;
248936b7f4cSbjh21
249936b7f4cSbjh21    z2 = a2<<count;
250936b7f4cSbjh21    z1 = a1<<count;
251936b7f4cSbjh21    z0 = a0<<count;
252936b7f4cSbjh21    if ( 0 < count ) {
253936b7f4cSbjh21        negCount = ( ( - count ) & 31 );
254936b7f4cSbjh21        z1 |= a2>>negCount;
255936b7f4cSbjh21        z0 |= a1>>negCount;
256936b7f4cSbjh21    }
257936b7f4cSbjh21    *z2Ptr = z2;
258936b7f4cSbjh21    *z1Ptr = z1;
259936b7f4cSbjh21    *z0Ptr = z0;
260936b7f4cSbjh21
261936b7f4cSbjh21}
262936b7f4cSbjh21
263936b7f4cSbjh21/*
264936b7f4cSbjh21-------------------------------------------------------------------------------
265936b7f4cSbjh21Adds the 64-bit value formed by concatenating `a0' and `a1' to the 64-bit
266936b7f4cSbjh21value formed by concatenating `b0' and `b1'.  Addition is modulo 2^64, so
267936b7f4cSbjh21any carry out is lost.  The result is broken into two 32-bit pieces which
268936b7f4cSbjh21are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
269936b7f4cSbjh21-------------------------------------------------------------------------------
270936b7f4cSbjh21*/
271936b7f4cSbjh21INLINE void
272936b7f4cSbjh21 add64(
273936b7f4cSbjh21     bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr )
274936b7f4cSbjh21{
275936b7f4cSbjh21    bits32 z1;
276936b7f4cSbjh21
277936b7f4cSbjh21    z1 = a1 + b1;
278936b7f4cSbjh21    *z1Ptr = z1;
279936b7f4cSbjh21    *z0Ptr = a0 + b0 + ( z1 < a1 );
280936b7f4cSbjh21
281936b7f4cSbjh21}
282936b7f4cSbjh21
283936b7f4cSbjh21/*
284936b7f4cSbjh21-------------------------------------------------------------------------------
285936b7f4cSbjh21Adds the 96-bit value formed by concatenating `a0', `a1', and `a2' to the
286936b7f4cSbjh2196-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
287936b7f4cSbjh21modulo 2^96, so any carry out is lost.  The result is broken into three
288936b7f4cSbjh2132-bit pieces which are stored at the locations pointed to by `z0Ptr',
289936b7f4cSbjh21`z1Ptr', and `z2Ptr'.
290936b7f4cSbjh21-------------------------------------------------------------------------------
291936b7f4cSbjh21*/
292936b7f4cSbjh21INLINE void
293936b7f4cSbjh21 add96(
294936b7f4cSbjh21     bits32 a0,
295936b7f4cSbjh21     bits32 a1,
296936b7f4cSbjh21     bits32 a2,
297936b7f4cSbjh21     bits32 b0,
298936b7f4cSbjh21     bits32 b1,
299936b7f4cSbjh21     bits32 b2,
300936b7f4cSbjh21     bits32 *z0Ptr,
301936b7f4cSbjh21     bits32 *z1Ptr,
302936b7f4cSbjh21     bits32 *z2Ptr
303936b7f4cSbjh21 )
304936b7f4cSbjh21{
305936b7f4cSbjh21    bits32 z0, z1, z2;
306936b7f4cSbjh21    int8 carry0, carry1;
307936b7f4cSbjh21
308936b7f4cSbjh21    z2 = a2 + b2;
309936b7f4cSbjh21    carry1 = ( z2 < a2 );
310936b7f4cSbjh21    z1 = a1 + b1;
311936b7f4cSbjh21    carry0 = ( z1 < a1 );
312936b7f4cSbjh21    z0 = a0 + b0;
313936b7f4cSbjh21    z1 += carry1;
314*d52f6f4bSlukem    z0 += ( z1 < (bits32)carry1 );
315936b7f4cSbjh21    z0 += carry0;
316936b7f4cSbjh21    *z2Ptr = z2;
317936b7f4cSbjh21    *z1Ptr = z1;
318936b7f4cSbjh21    *z0Ptr = z0;
319936b7f4cSbjh21
320936b7f4cSbjh21}
321936b7f4cSbjh21
322936b7f4cSbjh21/*
323936b7f4cSbjh21-------------------------------------------------------------------------------
324936b7f4cSbjh21Subtracts the 64-bit value formed by concatenating `b0' and `b1' from the
325936b7f4cSbjh2164-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
326936b7f4cSbjh212^64, so any borrow out (carry out) is lost.  The result is broken into two
327936b7f4cSbjh2132-bit pieces which are stored at the locations pointed to by `z0Ptr' and
328936b7f4cSbjh21`z1Ptr'.
329936b7f4cSbjh21-------------------------------------------------------------------------------
330936b7f4cSbjh21*/
331936b7f4cSbjh21INLINE void
332936b7f4cSbjh21 sub64(
333936b7f4cSbjh21     bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr )
334936b7f4cSbjh21{
335936b7f4cSbjh21
336936b7f4cSbjh21    *z1Ptr = a1 - b1;
337936b7f4cSbjh21    *z0Ptr = a0 - b0 - ( a1 < b1 );
338936b7f4cSbjh21
339936b7f4cSbjh21}
340936b7f4cSbjh21
341936b7f4cSbjh21/*
342936b7f4cSbjh21-------------------------------------------------------------------------------
343936b7f4cSbjh21Subtracts the 96-bit value formed by concatenating `b0', `b1', and `b2' from
344936b7f4cSbjh21the 96-bit value formed by concatenating `a0', `a1', and `a2'.  Subtraction
345936b7f4cSbjh21is modulo 2^96, so any borrow out (carry out) is lost.  The result is broken
346936b7f4cSbjh21into three 32-bit pieces which are stored at the locations pointed to by
347936b7f4cSbjh21`z0Ptr', `z1Ptr', and `z2Ptr'.
348936b7f4cSbjh21-------------------------------------------------------------------------------
349936b7f4cSbjh21*/
350936b7f4cSbjh21INLINE void
351936b7f4cSbjh21 sub96(
352936b7f4cSbjh21     bits32 a0,
353936b7f4cSbjh21     bits32 a1,
354936b7f4cSbjh21     bits32 a2,
355936b7f4cSbjh21     bits32 b0,
356936b7f4cSbjh21     bits32 b1,
357936b7f4cSbjh21     bits32 b2,
358936b7f4cSbjh21     bits32 *z0Ptr,
359936b7f4cSbjh21     bits32 *z1Ptr,
360936b7f4cSbjh21     bits32 *z2Ptr
361936b7f4cSbjh21 )
362936b7f4cSbjh21{
363936b7f4cSbjh21    bits32 z0, z1, z2;
364936b7f4cSbjh21    int8 borrow0, borrow1;
365936b7f4cSbjh21
366936b7f4cSbjh21    z2 = a2 - b2;
367936b7f4cSbjh21    borrow1 = ( a2 < b2 );
368936b7f4cSbjh21    z1 = a1 - b1;
369936b7f4cSbjh21    borrow0 = ( a1 < b1 );
370936b7f4cSbjh21    z0 = a0 - b0;
371*d52f6f4bSlukem    z0 -= ( z1 < (bits32)borrow1 );
372936b7f4cSbjh21    z1 -= borrow1;
373936b7f4cSbjh21    z0 -= borrow0;
374936b7f4cSbjh21    *z2Ptr = z2;
375936b7f4cSbjh21    *z1Ptr = z1;
376936b7f4cSbjh21    *z0Ptr = z0;
377936b7f4cSbjh21
378936b7f4cSbjh21}
379936b7f4cSbjh21
380936b7f4cSbjh21/*
381936b7f4cSbjh21-------------------------------------------------------------------------------
382936b7f4cSbjh21Multiplies `a' by `b' to obtain a 64-bit product.  The product is broken
383936b7f4cSbjh21into two 32-bit pieces which are stored at the locations pointed to by
384936b7f4cSbjh21`z0Ptr' and `z1Ptr'.
385936b7f4cSbjh21-------------------------------------------------------------------------------
386936b7f4cSbjh21*/
387936b7f4cSbjh21INLINE void mul32To64( bits32 a, bits32 b, bits32 *z0Ptr, bits32 *z1Ptr )
388936b7f4cSbjh21{
389936b7f4cSbjh21    bits16 aHigh, aLow, bHigh, bLow;
390936b7f4cSbjh21    bits32 z0, zMiddleA, zMiddleB, z1;
391936b7f4cSbjh21
392936b7f4cSbjh21    aLow = a;
393936b7f4cSbjh21    aHigh = a>>16;
394936b7f4cSbjh21    bLow = b;
395936b7f4cSbjh21    bHigh = b>>16;
396936b7f4cSbjh21    z1 = ( (bits32) aLow ) * bLow;
397936b7f4cSbjh21    zMiddleA = ( (bits32) aLow ) * bHigh;
398936b7f4cSbjh21    zMiddleB = ( (bits32) aHigh ) * bLow;
399936b7f4cSbjh21    z0 = ( (bits32) aHigh ) * bHigh;
400936b7f4cSbjh21    zMiddleA += zMiddleB;
401936b7f4cSbjh21    z0 += ( ( (bits32) ( zMiddleA < zMiddleB ) )<<16 ) + ( zMiddleA>>16 );
402936b7f4cSbjh21    zMiddleA <<= 16;
403936b7f4cSbjh21    z1 += zMiddleA;
404936b7f4cSbjh21    z0 += ( z1 < zMiddleA );
405936b7f4cSbjh21    *z1Ptr = z1;
406936b7f4cSbjh21    *z0Ptr = z0;
407936b7f4cSbjh21
408936b7f4cSbjh21}
409936b7f4cSbjh21
410936b7f4cSbjh21/*
411936b7f4cSbjh21-------------------------------------------------------------------------------
412936b7f4cSbjh21Multiplies the 64-bit value formed by concatenating `a0' and `a1' by `b'
413936b7f4cSbjh21to obtain a 96-bit product.  The product is broken into three 32-bit pieces
414936b7f4cSbjh21which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
415936b7f4cSbjh21`z2Ptr'.
416936b7f4cSbjh21-------------------------------------------------------------------------------
417936b7f4cSbjh21*/
418936b7f4cSbjh21INLINE void
419936b7f4cSbjh21 mul64By32To96(
420936b7f4cSbjh21     bits32 a0,
421936b7f4cSbjh21     bits32 a1,
422936b7f4cSbjh21     bits32 b,
423936b7f4cSbjh21     bits32 *z0Ptr,
424936b7f4cSbjh21     bits32 *z1Ptr,
425936b7f4cSbjh21     bits32 *z2Ptr
426936b7f4cSbjh21 )
427936b7f4cSbjh21{
428936b7f4cSbjh21    bits32 z0, z1, z2, more1;
429936b7f4cSbjh21
430936b7f4cSbjh21    mul32To64( a1, b, &z1, &z2 );
431936b7f4cSbjh21    mul32To64( a0, b, &z0, &more1 );
432936b7f4cSbjh21    add64( z0, more1, 0, z1, &z0, &z1 );
433936b7f4cSbjh21    *z2Ptr = z2;
434936b7f4cSbjh21    *z1Ptr = z1;
435936b7f4cSbjh21    *z0Ptr = z0;
436936b7f4cSbjh21
437936b7f4cSbjh21}
438936b7f4cSbjh21
439936b7f4cSbjh21/*
440936b7f4cSbjh21-------------------------------------------------------------------------------
441936b7f4cSbjh21Multiplies the 64-bit value formed by concatenating `a0' and `a1' to the
442936b7f4cSbjh2164-bit value formed by concatenating `b0' and `b1' to obtain a 128-bit
443936b7f4cSbjh21product.  The product is broken into four 32-bit pieces which are stored at
444936b7f4cSbjh21the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
445936b7f4cSbjh21-------------------------------------------------------------------------------
446936b7f4cSbjh21*/
447936b7f4cSbjh21INLINE void
448936b7f4cSbjh21 mul64To128(
449936b7f4cSbjh21     bits32 a0,
450936b7f4cSbjh21     bits32 a1,
451936b7f4cSbjh21     bits32 b0,
452936b7f4cSbjh21     bits32 b1,
453936b7f4cSbjh21     bits32 *z0Ptr,
454936b7f4cSbjh21     bits32 *z1Ptr,
455936b7f4cSbjh21     bits32 *z2Ptr,
456936b7f4cSbjh21     bits32 *z3Ptr
457936b7f4cSbjh21 )
458936b7f4cSbjh21{
459936b7f4cSbjh21    bits32 z0, z1, z2, z3;
460936b7f4cSbjh21    bits32 more1, more2;
461936b7f4cSbjh21
462936b7f4cSbjh21    mul32To64( a1, b1, &z2, &z3 );
463936b7f4cSbjh21    mul32To64( a1, b0, &z1, &more2 );
464936b7f4cSbjh21    add64( z1, more2, 0, z2, &z1, &z2 );
465936b7f4cSbjh21    mul32To64( a0, b0, &z0, &more1 );
466936b7f4cSbjh21    add64( z0, more1, 0, z1, &z0, &z1 );
467936b7f4cSbjh21    mul32To64( a0, b1, &more1, &more2 );
468936b7f4cSbjh21    add64( more1, more2, 0, z2, &more1, &z2 );
469936b7f4cSbjh21    add64( z0, z1, 0, more1, &z0, &z1 );
470936b7f4cSbjh21    *z3Ptr = z3;
471936b7f4cSbjh21    *z2Ptr = z2;
472936b7f4cSbjh21    *z1Ptr = z1;
473936b7f4cSbjh21    *z0Ptr = z0;
474936b7f4cSbjh21
475936b7f4cSbjh21}
476936b7f4cSbjh21
477936b7f4cSbjh21/*
478936b7f4cSbjh21-------------------------------------------------------------------------------
479936b7f4cSbjh21Returns an approximation to the 32-bit integer quotient obtained by dividing
480936b7f4cSbjh21`b' into the 64-bit value formed by concatenating `a0' and `a1'.  The
481936b7f4cSbjh21divisor `b' must be at least 2^31.  If q is the exact quotient truncated
482936b7f4cSbjh21toward zero, the approximation returned lies between q and q + 2 inclusive.
483936b7f4cSbjh21If the exact quotient q is larger than 32 bits, the maximum positive 32-bit
484936b7f4cSbjh21unsigned integer is returned.
485936b7f4cSbjh21-------------------------------------------------------------------------------
486936b7f4cSbjh21*/
487936b7f4cSbjh21static bits32 estimateDiv64To32( bits32 a0, bits32 a1, bits32 b )
488936b7f4cSbjh21{
489936b7f4cSbjh21    bits32 b0, b1;
490936b7f4cSbjh21    bits32 rem0, rem1, term0, term1;
491936b7f4cSbjh21    bits32 z;
492936b7f4cSbjh21
493936b7f4cSbjh21    if ( b <= a0 ) return 0xFFFFFFFF;
494936b7f4cSbjh21    b0 = b>>16;
495936b7f4cSbjh21    z = ( b0<<16 <= a0 ) ? 0xFFFF0000 : ( a0 / b0 )<<16;
496936b7f4cSbjh21    mul32To64( b, z, &term0, &term1 );
497936b7f4cSbjh21    sub64( a0, a1, term0, term1, &rem0, &rem1 );
498936b7f4cSbjh21    while ( ( (sbits32) rem0 ) < 0 ) {
499936b7f4cSbjh21        z -= 0x10000;
500936b7f4cSbjh21        b1 = b<<16;
501936b7f4cSbjh21        add64( rem0, rem1, b0, b1, &rem0, &rem1 );
502936b7f4cSbjh21    }
503936b7f4cSbjh21    rem0 = ( rem0<<16 ) | ( rem1>>16 );
504936b7f4cSbjh21    z |= ( b0<<16 <= rem0 ) ? 0xFFFF : rem0 / b0;
505936b7f4cSbjh21    return z;
506936b7f4cSbjh21
507936b7f4cSbjh21}
508936b7f4cSbjh21
509936b7f4cSbjh21#ifndef SOFTFLOAT_FOR_GCC
510936b7f4cSbjh21/*
511936b7f4cSbjh21-------------------------------------------------------------------------------
512936b7f4cSbjh21Returns an approximation to the square root of the 32-bit significand given
513936b7f4cSbjh21by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
514936b7f4cSbjh21`aExp' (the least significant bit) is 1, the integer returned approximates
515936b7f4cSbjh212^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
516936b7f4cSbjh21is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
517936b7f4cSbjh21case, the approximation returned lies strictly within +/-2 of the exact
518936b7f4cSbjh21value.
519936b7f4cSbjh21-------------------------------------------------------------------------------
520936b7f4cSbjh21*/
521936b7f4cSbjh21static bits32 estimateSqrt32( int16 aExp, bits32 a )
522936b7f4cSbjh21{
523936b7f4cSbjh21    static const bits16 sqrtOddAdjustments[] = {
524936b7f4cSbjh21        0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
525936b7f4cSbjh21        0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
526936b7f4cSbjh21    };
527936b7f4cSbjh21    static const bits16 sqrtEvenAdjustments[] = {
528936b7f4cSbjh21        0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
529936b7f4cSbjh21        0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
530936b7f4cSbjh21    };
531936b7f4cSbjh21    int8 index;
532936b7f4cSbjh21    bits32 z;
533936b7f4cSbjh21
534936b7f4cSbjh21    index = ( a>>27 ) & 15;
535936b7f4cSbjh21    if ( aExp & 1 ) {
536936b7f4cSbjh21        z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
537936b7f4cSbjh21        z = ( ( a / z )<<14 ) + ( z<<15 );
538936b7f4cSbjh21        a >>= 1;
539936b7f4cSbjh21    }
540936b7f4cSbjh21    else {
541936b7f4cSbjh21        z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
542936b7f4cSbjh21        z = a / z + z;
543936b7f4cSbjh21        z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
544936b7f4cSbjh21        if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
545936b7f4cSbjh21    }
546936b7f4cSbjh21    return ( ( estimateDiv64To32( a, 0, z ) )>>1 ) + ( z>>1 );
547936b7f4cSbjh21
548936b7f4cSbjh21}
549936b7f4cSbjh21#endif
550936b7f4cSbjh21
551936b7f4cSbjh21/*
552936b7f4cSbjh21-------------------------------------------------------------------------------
553936b7f4cSbjh21Returns the number of leading 0 bits before the most-significant 1 bit of
554936b7f4cSbjh21`a'.  If `a' is zero, 32 is returned.
555936b7f4cSbjh21-------------------------------------------------------------------------------
556936b7f4cSbjh21*/
557936b7f4cSbjh21static int8 countLeadingZeros32( bits32 a )
558936b7f4cSbjh21{
559936b7f4cSbjh21    static const int8 countLeadingZerosHigh[] = {
560936b7f4cSbjh21        8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
561936b7f4cSbjh21        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
562936b7f4cSbjh21        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
563936b7f4cSbjh21        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
564936b7f4cSbjh21        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
565936b7f4cSbjh21        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
566936b7f4cSbjh21        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
567936b7f4cSbjh21        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
568936b7f4cSbjh21        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
569936b7f4cSbjh21        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
570936b7f4cSbjh21        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571936b7f4cSbjh21        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
572936b7f4cSbjh21        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
573936b7f4cSbjh21        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
574936b7f4cSbjh21        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
575936b7f4cSbjh21        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
576936b7f4cSbjh21    };
577936b7f4cSbjh21    int8 shiftCount;
578936b7f4cSbjh21
579936b7f4cSbjh21    shiftCount = 0;
580936b7f4cSbjh21    if ( a < 0x10000 ) {
581936b7f4cSbjh21        shiftCount += 16;
582936b7f4cSbjh21        a <<= 16;
583936b7f4cSbjh21    }
584936b7f4cSbjh21    if ( a < 0x1000000 ) {
585936b7f4cSbjh21        shiftCount += 8;
586936b7f4cSbjh21        a <<= 8;
587936b7f4cSbjh21    }
588936b7f4cSbjh21    shiftCount += countLeadingZerosHigh[ a>>24 ];
589936b7f4cSbjh21    return shiftCount;
590936b7f4cSbjh21
591936b7f4cSbjh21}
592936b7f4cSbjh21
593936b7f4cSbjh21/*
594936b7f4cSbjh21-------------------------------------------------------------------------------
595936b7f4cSbjh21Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is
596936b7f4cSbjh21equal to the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,
597936b7f4cSbjh21returns 0.
598936b7f4cSbjh21-------------------------------------------------------------------------------
599936b7f4cSbjh21*/
600936b7f4cSbjh21INLINE flag eq64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
601936b7f4cSbjh21{
602936b7f4cSbjh21
603936b7f4cSbjh21    return ( a0 == b0 ) && ( a1 == b1 );
604936b7f4cSbjh21
605936b7f4cSbjh21}
606936b7f4cSbjh21
607936b7f4cSbjh21/*
608936b7f4cSbjh21-------------------------------------------------------------------------------
609936b7f4cSbjh21Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less
610936b7f4cSbjh21than or equal to the 64-bit value formed by concatenating `b0' and `b1'.
611936b7f4cSbjh21Otherwise, returns 0.
612936b7f4cSbjh21-------------------------------------------------------------------------------
613936b7f4cSbjh21*/
614936b7f4cSbjh21INLINE flag le64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
615936b7f4cSbjh21{
616936b7f4cSbjh21
617936b7f4cSbjh21    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
618936b7f4cSbjh21
619936b7f4cSbjh21}
620936b7f4cSbjh21
621936b7f4cSbjh21/*
622936b7f4cSbjh21-------------------------------------------------------------------------------
623936b7f4cSbjh21Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less
624936b7f4cSbjh21than the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,
625936b7f4cSbjh21returns 0.
626936b7f4cSbjh21-------------------------------------------------------------------------------
627936b7f4cSbjh21*/
628936b7f4cSbjh21INLINE flag lt64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
629936b7f4cSbjh21{
630936b7f4cSbjh21
631936b7f4cSbjh21    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
632936b7f4cSbjh21
633936b7f4cSbjh21}
634936b7f4cSbjh21
635936b7f4cSbjh21/*
636936b7f4cSbjh21-------------------------------------------------------------------------------
637936b7f4cSbjh21Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is not
638936b7f4cSbjh21equal to the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,
639936b7f4cSbjh21returns 0.
640936b7f4cSbjh21-------------------------------------------------------------------------------
641936b7f4cSbjh21*/
642936b7f4cSbjh21INLINE flag ne64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
643936b7f4cSbjh21{
644936b7f4cSbjh21
645936b7f4cSbjh21    return ( a0 != b0 ) || ( a1 != b1 );
646936b7f4cSbjh21
647936b7f4cSbjh21}
648936b7f4cSbjh21
649