xref: /minix3/lib/libc/softfloat/bits32/softfloat-macros (revision 2fe8fb192fe7e8720e3e7a77f928da545e872a6a)
1*2fe8fb19SBen Gras
2*2fe8fb19SBen Gras/*
3*2fe8fb19SBen Gras===============================================================================
4*2fe8fb19SBen Gras
5*2fe8fb19SBen GrasThis C source fragment is part of the SoftFloat IEC/IEEE Floating-point
6*2fe8fb19SBen GrasArithmetic Package, Release 2a.
7*2fe8fb19SBen Gras
8*2fe8fb19SBen GrasWritten by John R. Hauser.  This work was made possible in part by the
9*2fe8fb19SBen GrasInternational Computer Science Institute, located at Suite 600, 1947 Center
10*2fe8fb19SBen GrasStreet, Berkeley, California 94704.  Funding was partially provided by the
11*2fe8fb19SBen GrasNational Science Foundation under grant MIP-9311980.  The original version
12*2fe8fb19SBen Grasof this code was written as part of a project to build a fixed-point vector
13*2fe8fb19SBen Grasprocessor in collaboration with the University of California at Berkeley,
14*2fe8fb19SBen Grasoverseen by Profs. Nelson Morgan and John Wawrzynek.  More information
15*2fe8fb19SBen Grasis available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
16*2fe8fb19SBen Grasarithmetic/SoftFloat.html'.
17*2fe8fb19SBen Gras
18*2fe8fb19SBen GrasTHIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
19*2fe8fb19SBen Grashas been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
20*2fe8fb19SBen GrasTIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
21*2fe8fb19SBen GrasPERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
22*2fe8fb19SBen GrasAND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
23*2fe8fb19SBen Gras
24*2fe8fb19SBen GrasDerivative works are acceptable, even for commercial purposes, so long as
25*2fe8fb19SBen Gras(1) they include prominent notice that the work is derivative, and (2) they
26*2fe8fb19SBen Grasinclude prominent notice akin to these four paragraphs for those parts of
27*2fe8fb19SBen Grasthis code that are retained.
28*2fe8fb19SBen Gras
29*2fe8fb19SBen Gras===============================================================================
30*2fe8fb19SBen Gras*/
31*2fe8fb19SBen Gras
32*2fe8fb19SBen Gras/*
33*2fe8fb19SBen Gras-------------------------------------------------------------------------------
34*2fe8fb19SBen GrasShifts `a' right by the number of bits given in `count'.  If any nonzero
35*2fe8fb19SBen Grasbits are shifted off, they are ``jammed'' into the least significant bit of
36*2fe8fb19SBen Grasthe result by setting the least significant bit to 1.  The value of `count'
37*2fe8fb19SBen Grascan be arbitrarily large; in particular, if `count' is greater than 32, the
38*2fe8fb19SBen Grasresult will be either 0 or 1, depending on whether `a' is zero or nonzero.
39*2fe8fb19SBen GrasThe result is stored in the location pointed to by `zPtr'.
40*2fe8fb19SBen Gras-------------------------------------------------------------------------------
41*2fe8fb19SBen Gras*/
42*2fe8fb19SBen GrasINLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
43*2fe8fb19SBen Gras{
44*2fe8fb19SBen Gras    bits32 z;
45*2fe8fb19SBen Gras
46*2fe8fb19SBen Gras    if ( count == 0 ) {
47*2fe8fb19SBen Gras        z = a;
48*2fe8fb19SBen Gras    }
49*2fe8fb19SBen Gras    else if ( count < 32 ) {
50*2fe8fb19SBen Gras        z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
51*2fe8fb19SBen Gras    }
52*2fe8fb19SBen Gras    else {
53*2fe8fb19SBen Gras        z = ( a != 0 );
54*2fe8fb19SBen Gras    }
55*2fe8fb19SBen Gras    *zPtr = z;
56*2fe8fb19SBen Gras
57*2fe8fb19SBen Gras}
58*2fe8fb19SBen Gras
59*2fe8fb19SBen Gras/*
60*2fe8fb19SBen Gras-------------------------------------------------------------------------------
61*2fe8fb19SBen GrasShifts the 64-bit value formed by concatenating `a0' and `a1' right by the
62*2fe8fb19SBen Grasnumber of bits given in `count'.  Any bits shifted off are lost.  The value
63*2fe8fb19SBen Grasof `count' can be arbitrarily large; in particular, if `count' is greater
64*2fe8fb19SBen Grasthan 64, the result will be 0.  The result is broken into two 32-bit pieces
65*2fe8fb19SBen Graswhich are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
66*2fe8fb19SBen Gras-------------------------------------------------------------------------------
67*2fe8fb19SBen Gras*/
68*2fe8fb19SBen GrasINLINE void
69*2fe8fb19SBen Gras shift64Right(
70*2fe8fb19SBen Gras     bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr )
71*2fe8fb19SBen Gras{
72*2fe8fb19SBen Gras    bits32 z0, z1;
73*2fe8fb19SBen Gras    int8 negCount = ( - count ) & 31;
74*2fe8fb19SBen Gras
75*2fe8fb19SBen Gras    if ( count == 0 ) {
76*2fe8fb19SBen Gras        z1 = a1;
77*2fe8fb19SBen Gras        z0 = a0;
78*2fe8fb19SBen Gras    }
79*2fe8fb19SBen Gras    else if ( count < 32 ) {
80*2fe8fb19SBen Gras        z1 = ( a0<<negCount ) | ( a1>>count );
81*2fe8fb19SBen Gras        z0 = a0>>count;
82*2fe8fb19SBen Gras    }
83*2fe8fb19SBen Gras    else {
84*2fe8fb19SBen Gras        z1 = ( count < 64 ) ? ( a0>>( count & 31 ) ) : 0;
85*2fe8fb19SBen Gras        z0 = 0;
86*2fe8fb19SBen Gras    }
87*2fe8fb19SBen Gras    *z1Ptr = z1;
88*2fe8fb19SBen Gras    *z0Ptr = z0;
89*2fe8fb19SBen Gras
90*2fe8fb19SBen Gras}
91*2fe8fb19SBen Gras
92*2fe8fb19SBen Gras/*
93*2fe8fb19SBen Gras-------------------------------------------------------------------------------
94*2fe8fb19SBen GrasShifts the 64-bit value formed by concatenating `a0' and `a1' right by the
95*2fe8fb19SBen Grasnumber of bits given in `count'.  If any nonzero bits are shifted off, they
96*2fe8fb19SBen Grasare ``jammed'' into the least significant bit of the result by setting the
97*2fe8fb19SBen Grasleast significant bit to 1.  The value of `count' can be arbitrarily large;
98*2fe8fb19SBen Grasin particular, if `count' is greater than 64, the result will be either 0
99*2fe8fb19SBen Grasor 1, depending on whether the concatenation of `a0' and `a1' is zero or
100*2fe8fb19SBen Grasnonzero.  The result is broken into two 32-bit pieces which are stored at
101*2fe8fb19SBen Grasthe locations pointed to by `z0Ptr' and `z1Ptr'.
102*2fe8fb19SBen Gras-------------------------------------------------------------------------------
103*2fe8fb19SBen Gras*/
104*2fe8fb19SBen GrasINLINE void
105*2fe8fb19SBen Gras shift64RightJamming(
106*2fe8fb19SBen Gras     bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr )
107*2fe8fb19SBen Gras{
108*2fe8fb19SBen Gras    bits32 z0, z1;
109*2fe8fb19SBen Gras    int8 negCount = ( - count ) & 31;
110*2fe8fb19SBen Gras
111*2fe8fb19SBen Gras    if ( count == 0 ) {
112*2fe8fb19SBen Gras        z1 = a1;
113*2fe8fb19SBen Gras        z0 = a0;
114*2fe8fb19SBen Gras    }
115*2fe8fb19SBen Gras    else if ( count < 32 ) {
116*2fe8fb19SBen Gras        z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
117*2fe8fb19SBen Gras        z0 = a0>>count;
118*2fe8fb19SBen Gras    }
119*2fe8fb19SBen Gras    else {
120*2fe8fb19SBen Gras        if ( count == 32 ) {
121*2fe8fb19SBen Gras            z1 = a0 | ( a1 != 0 );
122*2fe8fb19SBen Gras        }
123*2fe8fb19SBen Gras        else if ( count < 64 ) {
124*2fe8fb19SBen Gras            z1 = ( a0>>( count & 31 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
125*2fe8fb19SBen Gras        }
126*2fe8fb19SBen Gras        else {
127*2fe8fb19SBen Gras            z1 = ( ( a0 | a1 ) != 0 );
128*2fe8fb19SBen Gras        }
129*2fe8fb19SBen Gras        z0 = 0;
130*2fe8fb19SBen Gras    }
131*2fe8fb19SBen Gras    *z1Ptr = z1;
132*2fe8fb19SBen Gras    *z0Ptr = z0;
133*2fe8fb19SBen Gras
134*2fe8fb19SBen Gras}
135*2fe8fb19SBen Gras
136*2fe8fb19SBen Gras/*
137*2fe8fb19SBen Gras-------------------------------------------------------------------------------
138*2fe8fb19SBen GrasShifts the 96-bit value formed by concatenating `a0', `a1', and `a2' right
139*2fe8fb19SBen Grasby 32 _plus_ the number of bits given in `count'.  The shifted result is
140*2fe8fb19SBen Grasat most 64 nonzero bits; these are broken into two 32-bit pieces which are
141*2fe8fb19SBen Grasstored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
142*2fe8fb19SBen Grasoff form a third 32-bit result as follows:  The _last_ bit shifted off is
143*2fe8fb19SBen Grasthe most-significant bit of the extra result, and the other 31 bits of the
144*2fe8fb19SBen Grasextra result are all zero if and only if _all_but_the_last_ bits shifted off
145*2fe8fb19SBen Graswere all zero.  This extra result is stored in the location pointed to by
146*2fe8fb19SBen Gras`z2Ptr'.  The value of `count' can be arbitrarily large.
147*2fe8fb19SBen Gras    (This routine makes more sense if `a0', `a1', and `a2' are considered
148*2fe8fb19SBen Grasto form a fixed-point value with binary point between `a1' and `a2'.  This
149*2fe8fb19SBen Grasfixed-point value is shifted right by the number of bits given in `count',
150*2fe8fb19SBen Grasand the integer part of the result is returned at the locations pointed to
151*2fe8fb19SBen Grasby `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
152*2fe8fb19SBen Grascorrupted as described above, and is returned at the location pointed to by
153*2fe8fb19SBen Gras`z2Ptr'.)
154*2fe8fb19SBen Gras-------------------------------------------------------------------------------
155*2fe8fb19SBen Gras*/
156*2fe8fb19SBen GrasINLINE void
157*2fe8fb19SBen Gras shift64ExtraRightJamming(
158*2fe8fb19SBen Gras     bits32 a0,
159*2fe8fb19SBen Gras     bits32 a1,
160*2fe8fb19SBen Gras     bits32 a2,
161*2fe8fb19SBen Gras     int16 count,
162*2fe8fb19SBen Gras     bits32 *z0Ptr,
163*2fe8fb19SBen Gras     bits32 *z1Ptr,
164*2fe8fb19SBen Gras     bits32 *z2Ptr
165*2fe8fb19SBen Gras )
166*2fe8fb19SBen Gras{
167*2fe8fb19SBen Gras    bits32 z0, z1, z2;
168*2fe8fb19SBen Gras    int8 negCount = ( - count ) & 31;
169*2fe8fb19SBen Gras
170*2fe8fb19SBen Gras    if ( count == 0 ) {
171*2fe8fb19SBen Gras        z2 = a2;
172*2fe8fb19SBen Gras        z1 = a1;
173*2fe8fb19SBen Gras        z0 = a0;
174*2fe8fb19SBen Gras    }
175*2fe8fb19SBen Gras    else {
176*2fe8fb19SBen Gras        if ( count < 32 ) {
177*2fe8fb19SBen Gras            z2 = a1<<negCount;
178*2fe8fb19SBen Gras            z1 = ( a0<<negCount ) | ( a1>>count );
179*2fe8fb19SBen Gras            z0 = a0>>count;
180*2fe8fb19SBen Gras        }
181*2fe8fb19SBen Gras        else {
182*2fe8fb19SBen Gras            if ( count == 32 ) {
183*2fe8fb19SBen Gras                z2 = a1;
184*2fe8fb19SBen Gras                z1 = a0;
185*2fe8fb19SBen Gras            }
186*2fe8fb19SBen Gras            else {
187*2fe8fb19SBen Gras                a2 |= a1;
188*2fe8fb19SBen Gras                if ( count < 64 ) {
189*2fe8fb19SBen Gras                    z2 = a0<<negCount;
190*2fe8fb19SBen Gras                    z1 = a0>>( count & 31 );
191*2fe8fb19SBen Gras                }
192*2fe8fb19SBen Gras                else {
193*2fe8fb19SBen Gras                    z2 = ( count == 64 ) ? a0 : ( a0 != 0 );
194*2fe8fb19SBen Gras                    z1 = 0;
195*2fe8fb19SBen Gras                }
196*2fe8fb19SBen Gras            }
197*2fe8fb19SBen Gras            z0 = 0;
198*2fe8fb19SBen Gras        }
199*2fe8fb19SBen Gras        z2 |= ( a2 != 0 );
200*2fe8fb19SBen Gras    }
201*2fe8fb19SBen Gras    *z2Ptr = z2;
202*2fe8fb19SBen Gras    *z1Ptr = z1;
203*2fe8fb19SBen Gras    *z0Ptr = z0;
204*2fe8fb19SBen Gras
205*2fe8fb19SBen Gras}
206*2fe8fb19SBen Gras
207*2fe8fb19SBen Gras/*
208*2fe8fb19SBen Gras-------------------------------------------------------------------------------
209*2fe8fb19SBen GrasShifts the 64-bit value formed by concatenating `a0' and `a1' left by the
210*2fe8fb19SBen Grasnumber of bits given in `count'.  Any bits shifted off are lost.  The value
211*2fe8fb19SBen Grasof `count' must be less than 32.  The result is broken into two 32-bit
212*2fe8fb19SBen Graspieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
213*2fe8fb19SBen Gras-------------------------------------------------------------------------------
214*2fe8fb19SBen Gras*/
215*2fe8fb19SBen GrasINLINE void
216*2fe8fb19SBen Gras shortShift64Left(
217*2fe8fb19SBen Gras     bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr )
218*2fe8fb19SBen Gras{
219*2fe8fb19SBen Gras
220*2fe8fb19SBen Gras    *z1Ptr = a1<<count;
221*2fe8fb19SBen Gras    *z0Ptr =
222*2fe8fb19SBen Gras        ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 31 ) );
223*2fe8fb19SBen Gras
224*2fe8fb19SBen Gras}
225*2fe8fb19SBen Gras
226*2fe8fb19SBen Gras/*
227*2fe8fb19SBen Gras-------------------------------------------------------------------------------
228*2fe8fb19SBen GrasShifts the 96-bit value formed by concatenating `a0', `a1', and `a2' left
229*2fe8fb19SBen Grasby the number of bits given in `count'.  Any bits shifted off are lost.
230*2fe8fb19SBen GrasThe value of `count' must be less than 32.  The result is broken into three
231*2fe8fb19SBen Gras32-bit pieces which are stored at the locations pointed to by `z0Ptr',
232*2fe8fb19SBen Gras`z1Ptr', and `z2Ptr'.
233*2fe8fb19SBen Gras-------------------------------------------------------------------------------
234*2fe8fb19SBen Gras*/
235*2fe8fb19SBen GrasINLINE void
236*2fe8fb19SBen Gras shortShift96Left(
237*2fe8fb19SBen Gras     bits32 a0,
238*2fe8fb19SBen Gras     bits32 a1,
239*2fe8fb19SBen Gras     bits32 a2,
240*2fe8fb19SBen Gras     int16 count,
241*2fe8fb19SBen Gras     bits32 *z0Ptr,
242*2fe8fb19SBen Gras     bits32 *z1Ptr,
243*2fe8fb19SBen Gras     bits32 *z2Ptr
244*2fe8fb19SBen Gras )
245*2fe8fb19SBen Gras{
246*2fe8fb19SBen Gras    bits32 z0, z1, z2;
247*2fe8fb19SBen Gras    int8 negCount;
248*2fe8fb19SBen Gras
249*2fe8fb19SBen Gras    z2 = a2<<count;
250*2fe8fb19SBen Gras    z1 = a1<<count;
251*2fe8fb19SBen Gras    z0 = a0<<count;
252*2fe8fb19SBen Gras    if ( 0 < count ) {
253*2fe8fb19SBen Gras        negCount = ( ( - count ) & 31 );
254*2fe8fb19SBen Gras        z1 |= a2>>negCount;
255*2fe8fb19SBen Gras        z0 |= a1>>negCount;
256*2fe8fb19SBen Gras    }
257*2fe8fb19SBen Gras    *z2Ptr = z2;
258*2fe8fb19SBen Gras    *z1Ptr = z1;
259*2fe8fb19SBen Gras    *z0Ptr = z0;
260*2fe8fb19SBen Gras
261*2fe8fb19SBen Gras}
262*2fe8fb19SBen Gras
263*2fe8fb19SBen Gras/*
264*2fe8fb19SBen Gras-------------------------------------------------------------------------------
265*2fe8fb19SBen GrasAdds the 64-bit value formed by concatenating `a0' and `a1' to the 64-bit
266*2fe8fb19SBen Grasvalue formed by concatenating `b0' and `b1'.  Addition is modulo 2^64, so
267*2fe8fb19SBen Grasany carry out is lost.  The result is broken into two 32-bit pieces which
268*2fe8fb19SBen Grasare stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
269*2fe8fb19SBen Gras-------------------------------------------------------------------------------
270*2fe8fb19SBen Gras*/
271*2fe8fb19SBen GrasINLINE void
272*2fe8fb19SBen Gras add64(
273*2fe8fb19SBen Gras     bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr )
274*2fe8fb19SBen Gras{
275*2fe8fb19SBen Gras    bits32 z1;
276*2fe8fb19SBen Gras
277*2fe8fb19SBen Gras    z1 = a1 + b1;
278*2fe8fb19SBen Gras    *z1Ptr = z1;
279*2fe8fb19SBen Gras    *z0Ptr = a0 + b0 + ( z1 < a1 );
280*2fe8fb19SBen Gras
281*2fe8fb19SBen Gras}
282*2fe8fb19SBen Gras
283*2fe8fb19SBen Gras/*
284*2fe8fb19SBen Gras-------------------------------------------------------------------------------
285*2fe8fb19SBen GrasAdds the 96-bit value formed by concatenating `a0', `a1', and `a2' to the
286*2fe8fb19SBen Gras96-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
287*2fe8fb19SBen Grasmodulo 2^96, so any carry out is lost.  The result is broken into three
288*2fe8fb19SBen Gras32-bit pieces which are stored at the locations pointed to by `z0Ptr',
289*2fe8fb19SBen Gras`z1Ptr', and `z2Ptr'.
290*2fe8fb19SBen Gras-------------------------------------------------------------------------------
291*2fe8fb19SBen Gras*/
292*2fe8fb19SBen GrasINLINE void
293*2fe8fb19SBen Gras add96(
294*2fe8fb19SBen Gras     bits32 a0,
295*2fe8fb19SBen Gras     bits32 a1,
296*2fe8fb19SBen Gras     bits32 a2,
297*2fe8fb19SBen Gras     bits32 b0,
298*2fe8fb19SBen Gras     bits32 b1,
299*2fe8fb19SBen Gras     bits32 b2,
300*2fe8fb19SBen Gras     bits32 *z0Ptr,
301*2fe8fb19SBen Gras     bits32 *z1Ptr,
302*2fe8fb19SBen Gras     bits32 *z2Ptr
303*2fe8fb19SBen Gras )
304*2fe8fb19SBen Gras{
305*2fe8fb19SBen Gras    bits32 z0, z1, z2;
306*2fe8fb19SBen Gras    int8 carry0, carry1;
307*2fe8fb19SBen Gras
308*2fe8fb19SBen Gras    z2 = a2 + b2;
309*2fe8fb19SBen Gras    carry1 = ( z2 < a2 );
310*2fe8fb19SBen Gras    z1 = a1 + b1;
311*2fe8fb19SBen Gras    carry0 = ( z1 < a1 );
312*2fe8fb19SBen Gras    z0 = a0 + b0;
313*2fe8fb19SBen Gras    z1 += carry1;
314*2fe8fb19SBen Gras    z0 += ( z1 < (bits32)carry1 );
315*2fe8fb19SBen Gras    z0 += carry0;
316*2fe8fb19SBen Gras    *z2Ptr = z2;
317*2fe8fb19SBen Gras    *z1Ptr = z1;
318*2fe8fb19SBen Gras    *z0Ptr = z0;
319*2fe8fb19SBen Gras
320*2fe8fb19SBen Gras}
321*2fe8fb19SBen Gras
322*2fe8fb19SBen Gras/*
323*2fe8fb19SBen Gras-------------------------------------------------------------------------------
324*2fe8fb19SBen GrasSubtracts the 64-bit value formed by concatenating `b0' and `b1' from the
325*2fe8fb19SBen Gras64-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
326*2fe8fb19SBen Gras2^64, so any borrow out (carry out) is lost.  The result is broken into two
327*2fe8fb19SBen Gras32-bit pieces which are stored at the locations pointed to by `z0Ptr' and
328*2fe8fb19SBen Gras`z1Ptr'.
329*2fe8fb19SBen Gras-------------------------------------------------------------------------------
330*2fe8fb19SBen Gras*/
331*2fe8fb19SBen GrasINLINE void
332*2fe8fb19SBen Gras sub64(
333*2fe8fb19SBen Gras     bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr )
334*2fe8fb19SBen Gras{
335*2fe8fb19SBen Gras
336*2fe8fb19SBen Gras    *z1Ptr = a1 - b1;
337*2fe8fb19SBen Gras    *z0Ptr = a0 - b0 - ( a1 < b1 );
338*2fe8fb19SBen Gras
339*2fe8fb19SBen Gras}
340*2fe8fb19SBen Gras
341*2fe8fb19SBen Gras/*
342*2fe8fb19SBen Gras-------------------------------------------------------------------------------
343*2fe8fb19SBen GrasSubtracts the 96-bit value formed by concatenating `b0', `b1', and `b2' from
344*2fe8fb19SBen Grasthe 96-bit value formed by concatenating `a0', `a1', and `a2'.  Subtraction
345*2fe8fb19SBen Grasis modulo 2^96, so any borrow out (carry out) is lost.  The result is broken
346*2fe8fb19SBen Grasinto three 32-bit pieces which are stored at the locations pointed to by
347*2fe8fb19SBen Gras`z0Ptr', `z1Ptr', and `z2Ptr'.
348*2fe8fb19SBen Gras-------------------------------------------------------------------------------
349*2fe8fb19SBen Gras*/
350*2fe8fb19SBen GrasINLINE void
351*2fe8fb19SBen Gras sub96(
352*2fe8fb19SBen Gras     bits32 a0,
353*2fe8fb19SBen Gras     bits32 a1,
354*2fe8fb19SBen Gras     bits32 a2,
355*2fe8fb19SBen Gras     bits32 b0,
356*2fe8fb19SBen Gras     bits32 b1,
357*2fe8fb19SBen Gras     bits32 b2,
358*2fe8fb19SBen Gras     bits32 *z0Ptr,
359*2fe8fb19SBen Gras     bits32 *z1Ptr,
360*2fe8fb19SBen Gras     bits32 *z2Ptr
361*2fe8fb19SBen Gras )
362*2fe8fb19SBen Gras{
363*2fe8fb19SBen Gras    bits32 z0, z1, z2;
364*2fe8fb19SBen Gras    int8 borrow0, borrow1;
365*2fe8fb19SBen Gras
366*2fe8fb19SBen Gras    z2 = a2 - b2;
367*2fe8fb19SBen Gras    borrow1 = ( a2 < b2 );
368*2fe8fb19SBen Gras    z1 = a1 - b1;
369*2fe8fb19SBen Gras    borrow0 = ( a1 < b1 );
370*2fe8fb19SBen Gras    z0 = a0 - b0;
371*2fe8fb19SBen Gras    z0 -= ( z1 < (bits32)borrow1 );
372*2fe8fb19SBen Gras    z1 -= borrow1;
373*2fe8fb19SBen Gras    z0 -= borrow0;
374*2fe8fb19SBen Gras    *z2Ptr = z2;
375*2fe8fb19SBen Gras    *z1Ptr = z1;
376*2fe8fb19SBen Gras    *z0Ptr = z0;
377*2fe8fb19SBen Gras
378*2fe8fb19SBen Gras}
379*2fe8fb19SBen Gras
380*2fe8fb19SBen Gras/*
381*2fe8fb19SBen Gras-------------------------------------------------------------------------------
382*2fe8fb19SBen GrasMultiplies `a' by `b' to obtain a 64-bit product.  The product is broken
383*2fe8fb19SBen Grasinto two 32-bit pieces which are stored at the locations pointed to by
384*2fe8fb19SBen Gras`z0Ptr' and `z1Ptr'.
385*2fe8fb19SBen Gras-------------------------------------------------------------------------------
386*2fe8fb19SBen Gras*/
387*2fe8fb19SBen GrasINLINE void mul32To64( bits32 a, bits32 b, bits32 *z0Ptr, bits32 *z1Ptr )
388*2fe8fb19SBen Gras{
389*2fe8fb19SBen Gras    bits16 aHigh, aLow, bHigh, bLow;
390*2fe8fb19SBen Gras    bits32 z0, zMiddleA, zMiddleB, z1;
391*2fe8fb19SBen Gras
392*2fe8fb19SBen Gras    aLow = a;
393*2fe8fb19SBen Gras    aHigh = a>>16;
394*2fe8fb19SBen Gras    bLow = b;
395*2fe8fb19SBen Gras    bHigh = b>>16;
396*2fe8fb19SBen Gras    z1 = ( (bits32) aLow ) * bLow;
397*2fe8fb19SBen Gras    zMiddleA = ( (bits32) aLow ) * bHigh;
398*2fe8fb19SBen Gras    zMiddleB = ( (bits32) aHigh ) * bLow;
399*2fe8fb19SBen Gras    z0 = ( (bits32) aHigh ) * bHigh;
400*2fe8fb19SBen Gras    zMiddleA += zMiddleB;
401*2fe8fb19SBen Gras    z0 += ( ( (bits32) ( zMiddleA < zMiddleB ) )<<16 ) + ( zMiddleA>>16 );
402*2fe8fb19SBen Gras    zMiddleA <<= 16;
403*2fe8fb19SBen Gras    z1 += zMiddleA;
404*2fe8fb19SBen Gras    z0 += ( z1 < zMiddleA );
405*2fe8fb19SBen Gras    *z1Ptr = z1;
406*2fe8fb19SBen Gras    *z0Ptr = z0;
407*2fe8fb19SBen Gras
408*2fe8fb19SBen Gras}
409*2fe8fb19SBen Gras
410*2fe8fb19SBen Gras/*
411*2fe8fb19SBen Gras-------------------------------------------------------------------------------
412*2fe8fb19SBen GrasMultiplies the 64-bit value formed by concatenating `a0' and `a1' by `b'
413*2fe8fb19SBen Grasto obtain a 96-bit product.  The product is broken into three 32-bit pieces
414*2fe8fb19SBen Graswhich are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
415*2fe8fb19SBen Gras`z2Ptr'.
416*2fe8fb19SBen Gras-------------------------------------------------------------------------------
417*2fe8fb19SBen Gras*/
418*2fe8fb19SBen GrasINLINE void
419*2fe8fb19SBen Gras mul64By32To96(
420*2fe8fb19SBen Gras     bits32 a0,
421*2fe8fb19SBen Gras     bits32 a1,
422*2fe8fb19SBen Gras     bits32 b,
423*2fe8fb19SBen Gras     bits32 *z0Ptr,
424*2fe8fb19SBen Gras     bits32 *z1Ptr,
425*2fe8fb19SBen Gras     bits32 *z2Ptr
426*2fe8fb19SBen Gras )
427*2fe8fb19SBen Gras{
428*2fe8fb19SBen Gras    bits32 z0, z1, z2, more1;
429*2fe8fb19SBen Gras
430*2fe8fb19SBen Gras    mul32To64( a1, b, &z1, &z2 );
431*2fe8fb19SBen Gras    mul32To64( a0, b, &z0, &more1 );
432*2fe8fb19SBen Gras    add64( z0, more1, 0, z1, &z0, &z1 );
433*2fe8fb19SBen Gras    *z2Ptr = z2;
434*2fe8fb19SBen Gras    *z1Ptr = z1;
435*2fe8fb19SBen Gras    *z0Ptr = z0;
436*2fe8fb19SBen Gras
437*2fe8fb19SBen Gras}
438*2fe8fb19SBen Gras
439*2fe8fb19SBen Gras/*
440*2fe8fb19SBen Gras-------------------------------------------------------------------------------
441*2fe8fb19SBen GrasMultiplies the 64-bit value formed by concatenating `a0' and `a1' to the
442*2fe8fb19SBen Gras64-bit value formed by concatenating `b0' and `b1' to obtain a 128-bit
443*2fe8fb19SBen Grasproduct.  The product is broken into four 32-bit pieces which are stored at
444*2fe8fb19SBen Grasthe locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
445*2fe8fb19SBen Gras-------------------------------------------------------------------------------
446*2fe8fb19SBen Gras*/
447*2fe8fb19SBen GrasINLINE void
448*2fe8fb19SBen Gras mul64To128(
449*2fe8fb19SBen Gras     bits32 a0,
450*2fe8fb19SBen Gras     bits32 a1,
451*2fe8fb19SBen Gras     bits32 b0,
452*2fe8fb19SBen Gras     bits32 b1,
453*2fe8fb19SBen Gras     bits32 *z0Ptr,
454*2fe8fb19SBen Gras     bits32 *z1Ptr,
455*2fe8fb19SBen Gras     bits32 *z2Ptr,
456*2fe8fb19SBen Gras     bits32 *z3Ptr
457*2fe8fb19SBen Gras )
458*2fe8fb19SBen Gras{
459*2fe8fb19SBen Gras    bits32 z0, z1, z2, z3;
460*2fe8fb19SBen Gras    bits32 more1, more2;
461*2fe8fb19SBen Gras
462*2fe8fb19SBen Gras    mul32To64( a1, b1, &z2, &z3 );
463*2fe8fb19SBen Gras    mul32To64( a1, b0, &z1, &more2 );
464*2fe8fb19SBen Gras    add64( z1, more2, 0, z2, &z1, &z2 );
465*2fe8fb19SBen Gras    mul32To64( a0, b0, &z0, &more1 );
466*2fe8fb19SBen Gras    add64( z0, more1, 0, z1, &z0, &z1 );
467*2fe8fb19SBen Gras    mul32To64( a0, b1, &more1, &more2 );
468*2fe8fb19SBen Gras    add64( more1, more2, 0, z2, &more1, &z2 );
469*2fe8fb19SBen Gras    add64( z0, z1, 0, more1, &z0, &z1 );
470*2fe8fb19SBen Gras    *z3Ptr = z3;
471*2fe8fb19SBen Gras    *z2Ptr = z2;
472*2fe8fb19SBen Gras    *z1Ptr = z1;
473*2fe8fb19SBen Gras    *z0Ptr = z0;
474*2fe8fb19SBen Gras
475*2fe8fb19SBen Gras}
476*2fe8fb19SBen Gras
477*2fe8fb19SBen Gras/*
478*2fe8fb19SBen Gras-------------------------------------------------------------------------------
479*2fe8fb19SBen GrasReturns an approximation to the 32-bit integer quotient obtained by dividing
480*2fe8fb19SBen Gras`b' into the 64-bit value formed by concatenating `a0' and `a1'.  The
481*2fe8fb19SBen Grasdivisor `b' must be at least 2^31.  If q is the exact quotient truncated
482*2fe8fb19SBen Grastoward zero, the approximation returned lies between q and q + 2 inclusive.
483*2fe8fb19SBen GrasIf the exact quotient q is larger than 32 bits, the maximum positive 32-bit
484*2fe8fb19SBen Grasunsigned integer is returned.
485*2fe8fb19SBen Gras-------------------------------------------------------------------------------
486*2fe8fb19SBen Gras*/
487*2fe8fb19SBen Grasstatic bits32 estimateDiv64To32( bits32 a0, bits32 a1, bits32 b )
488*2fe8fb19SBen Gras{
489*2fe8fb19SBen Gras    bits32 b0, b1;
490*2fe8fb19SBen Gras    bits32 rem0, rem1, term0, term1;
491*2fe8fb19SBen Gras    bits32 z;
492*2fe8fb19SBen Gras
493*2fe8fb19SBen Gras    if ( b <= a0 ) return 0xFFFFFFFF;
494*2fe8fb19SBen Gras    b0 = b>>16;
495*2fe8fb19SBen Gras    z = ( b0<<16 <= a0 ) ? 0xFFFF0000 : ( a0 / b0 )<<16;
496*2fe8fb19SBen Gras    mul32To64( b, z, &term0, &term1 );
497*2fe8fb19SBen Gras    sub64( a0, a1, term0, term1, &rem0, &rem1 );
498*2fe8fb19SBen Gras    while ( ( (sbits32) rem0 ) < 0 ) {
499*2fe8fb19SBen Gras        z -= 0x10000;
500*2fe8fb19SBen Gras        b1 = b<<16;
501*2fe8fb19SBen Gras        add64( rem0, rem1, b0, b1, &rem0, &rem1 );
502*2fe8fb19SBen Gras    }
503*2fe8fb19SBen Gras    rem0 = ( rem0<<16 ) | ( rem1>>16 );
504*2fe8fb19SBen Gras    z |= ( b0<<16 <= rem0 ) ? 0xFFFF : rem0 / b0;
505*2fe8fb19SBen Gras    return z;
506*2fe8fb19SBen Gras
507*2fe8fb19SBen Gras}
508*2fe8fb19SBen Gras
509*2fe8fb19SBen Gras#ifndef SOFTFLOAT_FOR_GCC
510*2fe8fb19SBen Gras/*
511*2fe8fb19SBen Gras-------------------------------------------------------------------------------
512*2fe8fb19SBen GrasReturns an approximation to the square root of the 32-bit significand given
513*2fe8fb19SBen Grasby `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
514*2fe8fb19SBen Gras`aExp' (the least significant bit) is 1, the integer returned approximates
515*2fe8fb19SBen Gras2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
516*2fe8fb19SBen Grasis 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
517*2fe8fb19SBen Grascase, the approximation returned lies strictly within +/-2 of the exact
518*2fe8fb19SBen Grasvalue.
519*2fe8fb19SBen Gras-------------------------------------------------------------------------------
520*2fe8fb19SBen Gras*/
521*2fe8fb19SBen Grasstatic bits32 estimateSqrt32( int16 aExp, bits32 a )
522*2fe8fb19SBen Gras{
523*2fe8fb19SBen Gras    static const bits16 sqrtOddAdjustments[] = {
524*2fe8fb19SBen Gras        0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
525*2fe8fb19SBen Gras        0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
526*2fe8fb19SBen Gras    };
527*2fe8fb19SBen Gras    static const bits16 sqrtEvenAdjustments[] = {
528*2fe8fb19SBen Gras        0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
529*2fe8fb19SBen Gras        0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
530*2fe8fb19SBen Gras    };
531*2fe8fb19SBen Gras    int8 index;
532*2fe8fb19SBen Gras    bits32 z;
533*2fe8fb19SBen Gras
534*2fe8fb19SBen Gras    index = ( a>>27 ) & 15;
535*2fe8fb19SBen Gras    if ( aExp & 1 ) {
536*2fe8fb19SBen Gras        z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
537*2fe8fb19SBen Gras        z = ( ( a / z )<<14 ) + ( z<<15 );
538*2fe8fb19SBen Gras        a >>= 1;
539*2fe8fb19SBen Gras    }
540*2fe8fb19SBen Gras    else {
541*2fe8fb19SBen Gras        z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
542*2fe8fb19SBen Gras        z = a / z + z;
543*2fe8fb19SBen Gras        z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
544*2fe8fb19SBen Gras        if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
545*2fe8fb19SBen Gras    }
546*2fe8fb19SBen Gras    return ( ( estimateDiv64To32( a, 0, z ) )>>1 ) + ( z>>1 );
547*2fe8fb19SBen Gras
548*2fe8fb19SBen Gras}
549*2fe8fb19SBen Gras#endif
550*2fe8fb19SBen Gras
551*2fe8fb19SBen Gras/*
552*2fe8fb19SBen Gras-------------------------------------------------------------------------------
553*2fe8fb19SBen GrasReturns the number of leading 0 bits before the most-significant 1 bit of
554*2fe8fb19SBen Gras`a'.  If `a' is zero, 32 is returned.
555*2fe8fb19SBen Gras-------------------------------------------------------------------------------
556*2fe8fb19SBen Gras*/
557*2fe8fb19SBen Grasstatic int8 countLeadingZeros32( bits32 a )
558*2fe8fb19SBen Gras{
559*2fe8fb19SBen Gras    static const int8 countLeadingZerosHigh[] = {
560*2fe8fb19SBen Gras        8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
561*2fe8fb19SBen Gras        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
562*2fe8fb19SBen Gras        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
563*2fe8fb19SBen Gras        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
564*2fe8fb19SBen Gras        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
565*2fe8fb19SBen Gras        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
566*2fe8fb19SBen Gras        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
567*2fe8fb19SBen Gras        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
568*2fe8fb19SBen Gras        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
569*2fe8fb19SBen Gras        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
570*2fe8fb19SBen Gras        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571*2fe8fb19SBen Gras        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
572*2fe8fb19SBen Gras        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
573*2fe8fb19SBen Gras        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
574*2fe8fb19SBen Gras        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
575*2fe8fb19SBen Gras        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
576*2fe8fb19SBen Gras    };
577*2fe8fb19SBen Gras    int8 shiftCount;
578*2fe8fb19SBen Gras
579*2fe8fb19SBen Gras    shiftCount = 0;
580*2fe8fb19SBen Gras    if ( a < 0x10000 ) {
581*2fe8fb19SBen Gras        shiftCount += 16;
582*2fe8fb19SBen Gras        a <<= 16;
583*2fe8fb19SBen Gras    }
584*2fe8fb19SBen Gras    if ( a < 0x1000000 ) {
585*2fe8fb19SBen Gras        shiftCount += 8;
586*2fe8fb19SBen Gras        a <<= 8;
587*2fe8fb19SBen Gras    }
588*2fe8fb19SBen Gras    shiftCount += countLeadingZerosHigh[ a>>24 ];
589*2fe8fb19SBen Gras    return shiftCount;
590*2fe8fb19SBen Gras
591*2fe8fb19SBen Gras}
592*2fe8fb19SBen Gras
593*2fe8fb19SBen Gras/*
594*2fe8fb19SBen Gras-------------------------------------------------------------------------------
595*2fe8fb19SBen GrasReturns 1 if the 64-bit value formed by concatenating `a0' and `a1' is
596*2fe8fb19SBen Grasequal to the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,
597*2fe8fb19SBen Grasreturns 0.
598*2fe8fb19SBen Gras-------------------------------------------------------------------------------
599*2fe8fb19SBen Gras*/
600*2fe8fb19SBen GrasINLINE flag eq64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
601*2fe8fb19SBen Gras{
602*2fe8fb19SBen Gras
603*2fe8fb19SBen Gras    return ( a0 == b0 ) && ( a1 == b1 );
604*2fe8fb19SBen Gras
605*2fe8fb19SBen Gras}
606*2fe8fb19SBen Gras
607*2fe8fb19SBen Gras/*
608*2fe8fb19SBen Gras-------------------------------------------------------------------------------
609*2fe8fb19SBen GrasReturns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less
610*2fe8fb19SBen Grasthan or equal to the 64-bit value formed by concatenating `b0' and `b1'.
611*2fe8fb19SBen GrasOtherwise, returns 0.
612*2fe8fb19SBen Gras-------------------------------------------------------------------------------
613*2fe8fb19SBen Gras*/
614*2fe8fb19SBen GrasINLINE flag le64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
615*2fe8fb19SBen Gras{
616*2fe8fb19SBen Gras
617*2fe8fb19SBen Gras    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
618*2fe8fb19SBen Gras
619*2fe8fb19SBen Gras}
620*2fe8fb19SBen Gras
621*2fe8fb19SBen Gras/*
622*2fe8fb19SBen Gras-------------------------------------------------------------------------------
623*2fe8fb19SBen GrasReturns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less
624*2fe8fb19SBen Grasthan the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,
625*2fe8fb19SBen Grasreturns 0.
626*2fe8fb19SBen Gras-------------------------------------------------------------------------------
627*2fe8fb19SBen Gras*/
628*2fe8fb19SBen GrasINLINE flag lt64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
629*2fe8fb19SBen Gras{
630*2fe8fb19SBen Gras
631*2fe8fb19SBen Gras    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
632*2fe8fb19SBen Gras
633*2fe8fb19SBen Gras}
634*2fe8fb19SBen Gras
635*2fe8fb19SBen Gras/*
636*2fe8fb19SBen Gras-------------------------------------------------------------------------------
637*2fe8fb19SBen GrasReturns 1 if the 64-bit value formed by concatenating `a0' and `a1' is not
638*2fe8fb19SBen Grasequal to the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,
639*2fe8fb19SBen Grasreturns 0.
640*2fe8fb19SBen Gras-------------------------------------------------------------------------------
641*2fe8fb19SBen Gras*/
642*2fe8fb19SBen GrasINLINE flag ne64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
643*2fe8fb19SBen Gras{
644*2fe8fb19SBen Gras
645*2fe8fb19SBen Gras    return ( a0 != b0 ) || ( a1 != b1 );
646*2fe8fb19SBen Gras
647*2fe8fb19SBen Gras}
648*2fe8fb19SBen Gras
649