xref: /netbsd-src/external/gpl3/gcc/dist/libgcc/config/xtensa/ieee754-sf.S (revision 1580a27b92f58fcdcb23fdfbc04a7c2b54a0b7c8)
1/* IEEE-754 single-precision functions for Xtensa
2   Copyright (C) 2006-2015 Free Software Foundation, Inc.
3   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
4
5   This file is part of GCC.
6
7   GCC is free software; you can redistribute it and/or modify it
8   under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 3, or (at your option)
10   any later version.
11
12   GCC is distributed in the hope that it will be useful, but WITHOUT
13   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15   License for more details.
16
17   Under Section 7 of GPL version 3, you are granted additional
18   permissions described in the GCC Runtime Library Exception, version
19   3.1, as published by the Free Software Foundation.
20
21   You should have received a copy of the GNU General Public License and
22   a copy of the GCC Runtime Library Exception along with this program;
23   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24   <http://www.gnu.org/licenses/>.  */
25
26#ifdef __XTENSA_EB__
27#define xh a2
28#define xl a3
29#define yh a4
30#define yl a5
31#else
32#define xh a3
33#define xl a2
34#define yh a5
35#define yl a4
36#endif
37
38/*  Warning!  The branch displacements for some Xtensa branch instructions
39    are quite small, and this code has been carefully laid out to keep
40    branch targets in range.  If you change anything, be sure to check that
41    the assembler is not relaxing anything to branch over a jump.  */
42
43#ifdef L_negsf2
44
45	.align	4
46	.global	__negsf2
47	.type	__negsf2, @function
48__negsf2:
49	leaf_entry sp, 16
50	movi	a4, 0x80000000
51	xor	a2, a2, a4
52	leaf_return
53
54#endif /* L_negsf2 */
55
56#ifdef L_addsubsf3
57
58	/* Addition */
59__addsf3_aux:
60
61	/* Handle NaNs and Infinities.  (This code is placed before the
62	   start of the function just to keep it in range of the limited
63	   branch displacements.)  */
64
65.Ladd_xnan_or_inf:
66	/* If y is neither Infinity nor NaN, return x.  */
67	bnall	a3, a6, 1f
68	/* If x is a NaN, return it.  Otherwise, return y.  */
69	slli	a7, a2, 9
70	beqz	a7, .Ladd_ynan_or_inf
711:	leaf_return
72
73.Ladd_ynan_or_inf:
74	/* Return y.  */
75	mov	a2, a3
76	leaf_return
77
78.Ladd_opposite_signs:
79	/* Operand signs differ.  Do a subtraction.  */
80	slli	a7, a6, 8
81	xor	a3, a3, a7
82	j	.Lsub_same_sign
83
84	.align	4
85	.global	__addsf3
86	.type	__addsf3, @function
87__addsf3:
88	leaf_entry sp, 16
89	movi	a6, 0x7f800000
90
91	/* Check if the two operands have the same sign.  */
92	xor	a7, a2, a3
93	bltz	a7, .Ladd_opposite_signs
94
95.Ladd_same_sign:
96	/* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
97	ball	a2, a6, .Ladd_xnan_or_inf
98	ball	a3, a6, .Ladd_ynan_or_inf
99
100	/* Compare the exponents.  The smaller operand will be shifted
101	   right by the exponent difference and added to the larger
102	   one.  */
103	extui	a7, a2, 23, 9
104	extui	a8, a3, 23, 9
105	bltu	a7, a8, .Ladd_shiftx
106
107.Ladd_shifty:
108	/* Check if the smaller (or equal) exponent is zero.  */
109	bnone	a3, a6, .Ladd_yexpzero
110
111	/* Replace y sign/exponent with 0x008.  */
112	or	a3, a3, a6
113	slli	a3, a3, 8
114	srli	a3, a3, 8
115
116.Ladd_yexpdiff:
117	/* Compute the exponent difference.  */
118	sub	a10, a7, a8
119
120	/* Exponent difference > 32 -- just return the bigger value.  */
121	bgeui	a10, 32, 1f
122
123	/* Shift y right by the exponent difference.  Any bits that are
124	   shifted out of y are saved in a9 for rounding the result.  */
125	ssr	a10
126	movi	a9, 0
127	src	a9, a3, a9
128	srl	a3, a3
129
130	/* Do the addition.  */
131	add	a2, a2, a3
132
133	/* Check if the add overflowed into the exponent.  */
134	extui	a10, a2, 23, 9
135	beq	a10, a7, .Ladd_round
136	mov	a8, a7
137	j	.Ladd_carry
138
139.Ladd_yexpzero:
140	/* y is a subnormal value.  Replace its sign/exponent with zero,
141	   i.e., no implicit "1.0", and increment the apparent exponent
142	   because subnormals behave as if they had the minimum (nonzero)
143	   exponent.  Test for the case when both exponents are zero.  */
144	slli	a3, a3, 9
145	srli	a3, a3, 9
146	bnone	a2, a6, .Ladd_bothexpzero
147	addi	a8, a8, 1
148	j	.Ladd_yexpdiff
149
150.Ladd_bothexpzero:
151	/* Both exponents are zero.  Handle this as a special case.  There
152	   is no need to shift or round, and the normal code for handling
153	   a carry into the exponent field will not work because it
154	   assumes there is an implicit "1.0" that needs to be added.  */
155	add	a2, a2, a3
1561:	leaf_return
157
158.Ladd_xexpzero:
159	/* Same as "yexpzero" except skip handling the case when both
160	   exponents are zero.  */
161	slli	a2, a2, 9
162	srli	a2, a2, 9
163	addi	a7, a7, 1
164	j	.Ladd_xexpdiff
165
166.Ladd_shiftx:
167	/* Same thing as the "shifty" code, but with x and y swapped.  Also,
168	   because the exponent difference is always nonzero in this version,
169	   the shift sequence can use SLL and skip loading a constant zero.  */
170	bnone	a2, a6, .Ladd_xexpzero
171
172	or	a2, a2, a6
173	slli	a2, a2, 8
174	srli	a2, a2, 8
175
176.Ladd_xexpdiff:
177	sub	a10, a8, a7
178	bgeui	a10, 32, .Ladd_returny
179
180	ssr	a10
181	sll	a9, a2
182	srl	a2, a2
183
184	add	a2, a2, a3
185
186	/* Check if the add overflowed into the exponent.  */
187	extui	a10, a2, 23, 9
188	bne	a10, a8, .Ladd_carry
189
190.Ladd_round:
191	/* Round up if the leftover fraction is >= 1/2.  */
192	bgez	a9, 1f
193	addi	a2, a2, 1
194
195	/* Check if the leftover fraction is exactly 1/2.  */
196	slli	a9, a9, 1
197	beqz	a9, .Ladd_exactlyhalf
1981:	leaf_return
199
200.Ladd_returny:
201	mov	a2, a3
202	leaf_return
203
204.Ladd_carry:
205	/* The addition has overflowed into the exponent field, so the
206	   value needs to be renormalized.  The mantissa of the result
207	   can be recovered by subtracting the original exponent and
208	   adding 0x800000 (which is the explicit "1.0" for the
209	   mantissa of the non-shifted operand -- the "1.0" for the
210	   shifted operand was already added).  The mantissa can then
211	   be shifted right by one bit.  The explicit "1.0" of the
212	   shifted mantissa then needs to be replaced by the exponent,
213	   incremented by one to account for the normalizing shift.
214	   It is faster to combine these operations: do the shift first
215	   and combine the additions and subtractions.  If x is the
216	   original exponent, the result is:
217	       shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
218	   or:
219	       shifted mantissa + ((x + 1) << 22)
220	   Note that the exponent is incremented here by leaving the
221	   explicit "1.0" of the mantissa in the exponent field.  */
222
223	/* Shift x right by one bit.  Save the lsb.  */
224	mov	a10, a2
225	srli	a2, a2, 1
226
227	/* See explanation above.  The original exponent is in a8.  */
228	addi	a8, a8, 1
229	slli	a8, a8, 22
230	add	a2, a2, a8
231
232	/* Return an Infinity if the exponent overflowed.  */
233	ball	a2, a6, .Ladd_infinity
234
235	/* Same thing as the "round" code except the msb of the leftover
236	   fraction is bit 0 of a10, with the rest of the fraction in a9.  */
237	bbci.l	a10, 0, 1f
238	addi	a2, a2, 1
239	beqz	a9, .Ladd_exactlyhalf
2401:	leaf_return
241
242.Ladd_infinity:
243	/* Clear the mantissa.  */
244	srli	a2, a2, 23
245	slli	a2, a2, 23
246
247	/* The sign bit may have been lost in a carry-out.  Put it back.  */
248	slli	a8, a8, 1
249	or	a2, a2, a8
250	leaf_return
251
252.Ladd_exactlyhalf:
253	/* Round down to the nearest even value.  */
254	srli	a2, a2, 1
255	slli	a2, a2, 1
256	leaf_return
257
258
259	/* Subtraction */
260__subsf3_aux:
261
262	/* Handle NaNs and Infinities.  (This code is placed before the
263	   start of the function just to keep it in range of the limited
264	   branch displacements.)  */
265
266.Lsub_xnan_or_inf:
267	/* If y is neither Infinity nor NaN, return x.  */
268	bnall	a3, a6, 1f
269	/* Both x and y are either NaN or Inf, so the result is NaN.  */
270	movi	a4, 0x400000	/* make it a quiet NaN */
271	or	a2, a2, a4
2721:	leaf_return
273
274.Lsub_ynan_or_inf:
275	/* Negate y and return it.  */
276	slli	a7, a6, 8
277	xor	a2, a3, a7
278	leaf_return
279
280.Lsub_opposite_signs:
281	/* Operand signs differ.  Do an addition.  */
282	slli	a7, a6, 8
283	xor	a3, a3, a7
284	j	.Ladd_same_sign
285
286	.align	4
287	.global	__subsf3
288	.type	__subsf3, @function
289__subsf3:
290	leaf_entry sp, 16
291	movi	a6, 0x7f800000
292
293	/* Check if the two operands have the same sign.  */
294	xor	a7, a2, a3
295	bltz	a7, .Lsub_opposite_signs
296
297.Lsub_same_sign:
298	/* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
299	ball	a2, a6, .Lsub_xnan_or_inf
300	ball	a3, a6, .Lsub_ynan_or_inf
301
302	/* Compare the operands.  In contrast to addition, the entire
303	   value matters here.  */
304	extui	a7, a2, 23, 8
305	extui	a8, a3, 23, 8
306	bltu	a2, a3, .Lsub_xsmaller
307
308.Lsub_ysmaller:
309	/* Check if the smaller (or equal) exponent is zero.  */
310	bnone	a3, a6, .Lsub_yexpzero
311
312	/* Replace y sign/exponent with 0x008.  */
313	or	a3, a3, a6
314	slli	a3, a3, 8
315	srli	a3, a3, 8
316
317.Lsub_yexpdiff:
318	/* Compute the exponent difference.  */
319	sub	a10, a7, a8
320
321	/* Exponent difference > 32 -- just return the bigger value.  */
322	bgeui	a10, 32, 1f
323
324	/* Shift y right by the exponent difference.  Any bits that are
325	   shifted out of y are saved in a9 for rounding the result.  */
326	ssr	a10
327	movi	a9, 0
328	src	a9, a3, a9
329	srl	a3, a3
330
331	sub	a2, a2, a3
332
333	/* Subtract the leftover bits in a9 from zero and propagate any
334	   borrow from a2.  */
335	neg	a9, a9
336	addi	a10, a2, -1
337	movnez	a2, a10, a9
338
339	/* Check if the subtract underflowed into the exponent.  */
340	extui	a10, a2, 23, 8
341	beq	a10, a7, .Lsub_round
342	j	.Lsub_borrow
343
344.Lsub_yexpzero:
345	/* Return zero if the inputs are equal.  (For the non-subnormal
346	   case, subtracting the "1.0" will cause a borrow from the exponent
347	   and this case can be detected when handling the borrow.)  */
348	beq	a2, a3, .Lsub_return_zero
349
350	/* y is a subnormal value.  Replace its sign/exponent with zero,
351	   i.e., no implicit "1.0".  Unless x is also a subnormal, increment
352	   y's apparent exponent because subnormals behave as if they had
353	   the minimum (nonzero) exponent.  */
354	slli	a3, a3, 9
355	srli	a3, a3, 9
356	bnone	a2, a6, .Lsub_yexpdiff
357	addi	a8, a8, 1
358	j	.Lsub_yexpdiff
359
360.Lsub_returny:
361	/* Negate and return y.  */
362	slli	a7, a6, 8
363	xor	a2, a3, a7
3641:	leaf_return
365
366.Lsub_xsmaller:
367	/* Same thing as the "ysmaller" code, but with x and y swapped and
368	   with y negated.  */
369	bnone	a2, a6, .Lsub_xexpzero
370
371	or	a2, a2, a6
372	slli	a2, a2, 8
373	srli	a2, a2, 8
374
375.Lsub_xexpdiff:
376	sub	a10, a8, a7
377	bgeui	a10, 32, .Lsub_returny
378
379	ssr	a10
380	movi	a9, 0
381	src	a9, a2, a9
382	srl	a2, a2
383
384	/* Negate y.  */
385	slli	a11, a6, 8
386	xor	a3, a3, a11
387
388	sub	a2, a3, a2
389
390	neg	a9, a9
391	addi	a10, a2, -1
392	movnez	a2, a10, a9
393
394	/* Check if the subtract underflowed into the exponent.  */
395	extui	a10, a2, 23, 8
396	bne	a10, a8, .Lsub_borrow
397
398.Lsub_round:
399	/* Round up if the leftover fraction is >= 1/2.  */
400	bgez	a9, 1f
401	addi	a2, a2, 1
402
403	/* Check if the leftover fraction is exactly 1/2.  */
404	slli	a9, a9, 1
405	beqz	a9, .Lsub_exactlyhalf
4061:	leaf_return
407
408.Lsub_xexpzero:
409	/* Same as "yexpzero".  */
410	beq	a2, a3, .Lsub_return_zero
411	slli	a2, a2, 9
412	srli	a2, a2, 9
413	bnone	a3, a6, .Lsub_xexpdiff
414	addi	a7, a7, 1
415	j	.Lsub_xexpdiff
416
417.Lsub_return_zero:
418	movi	a2, 0
419	leaf_return
420
421.Lsub_borrow:
422	/* The subtraction has underflowed into the exponent field, so the
423	   value needs to be renormalized.  Shift the mantissa left as
424	   needed to remove any leading zeros and adjust the exponent
425	   accordingly.  If the exponent is not large enough to remove
426	   all the leading zeros, the result will be a subnormal value.  */
427
428	slli	a8, a2, 9
429	beqz	a8, .Lsub_xzero
430	do_nsau	a6, a8, a7, a11
431	srli	a8, a8, 9
432	bge	a6, a10, .Lsub_subnormal
433	addi	a6, a6, 1
434
435.Lsub_normalize_shift:
436	/* Shift the mantissa (a8/a9) left by a6.  */
437	ssl	a6
438	src	a8, a8, a9
439	sll	a9, a9
440
441	/* Combine the shifted mantissa with the sign and exponent,
442	   decrementing the exponent by a6.  (The exponent has already
443	   been decremented by one due to the borrow from the subtraction,
444	   but adding the mantissa will increment the exponent by one.)  */
445	srli	a2, a2, 23
446	sub	a2, a2, a6
447	slli	a2, a2, 23
448	add	a2, a2, a8
449	j	.Lsub_round
450
451.Lsub_exactlyhalf:
452	/* Round down to the nearest even value.  */
453	srli	a2, a2, 1
454	slli	a2, a2, 1
455	leaf_return
456
457.Lsub_xzero:
458	/* If there was a borrow from the exponent, and the mantissa and
459	   guard digits are all zero, then the inputs were equal and the
460	   result should be zero.  */
461	beqz	a9, .Lsub_return_zero
462
463	/* Only the guard digit is nonzero.  Shift by min(24, a10).  */
464	addi	a11, a10, -24
465	movi	a6, 24
466	movltz	a6, a10, a11
467	j	.Lsub_normalize_shift
468
469.Lsub_subnormal:
470	/* The exponent is too small to shift away all the leading zeros.
471	   Set a6 to the current exponent (which has already been
472	   decremented by the borrow) so that the exponent of the result
473	   will be zero.  Do not add 1 to a6 in this case, because: (1)
474	   adding the mantissa will not increment the exponent, so there is
475	   no need to subtract anything extra from the exponent to
476	   compensate, and (2) the effective exponent of a subnormal is 1
477	   not 0 so the shift amount must be 1 smaller than normal. */
478	mov	a6, a10
479	j	.Lsub_normalize_shift
480
481#endif /* L_addsubsf3 */
482
483#ifdef L_mulsf3
484
485	/* Multiplication */
486#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
487#define XCHAL_NO_MUL 1
488#endif
489
490	.literal_position
491__mulsf3_aux:
492
493	/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
494	   (This code is placed before the start of the function just to
495	   keep it in range of the limited branch displacements.)  */
496
497.Lmul_xexpzero:
498	/* Clear the sign bit of x.  */
499	slli	a2, a2, 1
500	srli	a2, a2, 1
501
502	/* If x is zero, return zero.  */
503	beqz	a2, .Lmul_return_zero
504
505	/* Normalize x.  Adjust the exponent in a8.  */
506	do_nsau	a10, a2, a11, a12
507	addi	a10, a10, -8
508	ssl	a10
509	sll	a2, a2
510	movi	a8, 1
511	sub	a8, a8, a10
512	j	.Lmul_xnormalized
513
514.Lmul_yexpzero:
515	/* Clear the sign bit of y.  */
516	slli	a3, a3, 1
517	srli	a3, a3, 1
518
519	/* If y is zero, return zero.  */
520	beqz	a3, .Lmul_return_zero
521
522	/* Normalize y.  Adjust the exponent in a9.  */
523	do_nsau	a10, a3, a11, a12
524	addi	a10, a10, -8
525	ssl	a10
526	sll	a3, a3
527	movi	a9, 1
528	sub	a9, a9, a10
529	j	.Lmul_ynormalized
530
531.Lmul_return_zero:
532	/* Return zero with the appropriate sign bit.  */
533	srli	a2, a7, 31
534	slli	a2, a2, 31
535	j	.Lmul_done
536
537.Lmul_xnan_or_inf:
538	/* If y is zero, return NaN.  */
539	slli	a8, a3, 1
540	bnez	a8, 1f
541	movi	a4, 0x400000	/* make it a quiet NaN */
542	or	a2, a2, a4
543	j	.Lmul_done
5441:
545	/* If y is NaN, return y.  */
546	bnall	a3, a6, .Lmul_returnx
547	slli	a8, a3, 9
548	beqz	a8, .Lmul_returnx
549
550.Lmul_returny:
551	mov	a2, a3
552
553.Lmul_returnx:
554	/* Set the sign bit and return.  */
555	extui	a7, a7, 31, 1
556	slli	a2, a2, 1
557	ssai	1
558	src	a2, a7, a2
559	j	.Lmul_done
560
561.Lmul_ynan_or_inf:
562	/* If x is zero, return NaN.  */
563	slli	a8, a2, 1
564	bnez	a8, .Lmul_returny
565	movi	a7, 0x400000	/* make it a quiet NaN */
566	or	a2, a3, a7
567	j	.Lmul_done
568
569	.align	4
570	.global	__mulsf3
571	.type	__mulsf3, @function
572__mulsf3:
573#if __XTENSA_CALL0_ABI__
574	leaf_entry sp, 32
575	addi	sp, sp, -32
576	s32i	a12, sp, 16
577	s32i	a13, sp, 20
578	s32i	a14, sp, 24
579	s32i	a15, sp, 28
580#elif XCHAL_NO_MUL
581	/* This is not really a leaf function; allocate enough stack space
582	   to allow CALL12s to a helper function.  */
583	leaf_entry sp, 64
584#else
585	leaf_entry sp, 32
586#endif
587	movi	a6, 0x7f800000
588
589	/* Get the sign of the result.  */
590	xor	a7, a2, a3
591
592	/* Check for NaN and infinity.  */
593	ball	a2, a6, .Lmul_xnan_or_inf
594	ball	a3, a6, .Lmul_ynan_or_inf
595
596	/* Extract the exponents.  */
597	extui	a8, a2, 23, 8
598	extui	a9, a3, 23, 8
599
600	beqz	a8, .Lmul_xexpzero
601.Lmul_xnormalized:
602	beqz	a9, .Lmul_yexpzero
603.Lmul_ynormalized:
604
605	/* Add the exponents.  */
606	add	a8, a8, a9
607
608	/* Replace sign/exponent fields with explicit "1.0".  */
609	movi	a10, 0xffffff
610	or	a2, a2, a6
611	and	a2, a2, a10
612	or	a3, a3, a6
613	and	a3, a3, a10
614
615	/* Multiply 32x32 to 64 bits.  The result ends up in a2/a6.  */
616
617#if XCHAL_HAVE_MUL32_HIGH
618
619	mull	a6, a2, a3
620	muluh	a2, a2, a3
621
622#else
623
624	/* Break the inputs into 16-bit chunks and compute 4 32-bit partial
625	   products.  These partial products are:
626
627		0 xl * yl
628
629		1 xl * yh
630		2 xh * yl
631
632		3 xh * yh
633
634	   If using the Mul16 or Mul32 multiplier options, these input
635	   chunks must be stored in separate registers.  For Mac16, the
636	   UMUL.AA.* opcodes can specify that the inputs come from either
637	   half of the registers, so there is no need to shift them out
638	   ahead of time.  If there is no multiply hardware, the 16-bit
639	   chunks can be extracted when setting up the arguments to the
640	   separate multiply function.  */
641
642#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
643	/* Calling a separate multiply function will clobber a0 and requires
644	   use of a8 as a temporary, so save those values now.  (The function
645	   uses a custom ABI so nothing else needs to be saved.)  */
646	s32i	a0, sp, 0
647	s32i	a8, sp, 4
648#endif
649
650#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
651
652#define a2h a4
653#define a3h a5
654
655	/* Get the high halves of the inputs into registers.  */
656	srli	a2h, a2, 16
657	srli	a3h, a3, 16
658
659#define a2l a2
660#define a3l a3
661
662#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
663	/* Clear the high halves of the inputs.  This does not matter
664	   for MUL16 because the high bits are ignored.  */
665	extui	a2, a2, 0, 16
666	extui	a3, a3, 0, 16
667#endif
668#endif /* MUL16 || MUL32 */
669
670
671#if XCHAL_HAVE_MUL16
672
673#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
674	mul16u	dst, xreg ## xhalf, yreg ## yhalf
675
676#elif XCHAL_HAVE_MUL32
677
678#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
679	mull	dst, xreg ## xhalf, yreg ## yhalf
680
681#elif XCHAL_HAVE_MAC16
682
683/* The preprocessor insists on inserting a space when concatenating after
684   a period in the definition of do_mul below.  These macros are a workaround
685   using underscores instead of periods when doing the concatenation.  */
686#define umul_aa_ll umul.aa.ll
687#define umul_aa_lh umul.aa.lh
688#define umul_aa_hl umul.aa.hl
689#define umul_aa_hh umul.aa.hh
690
691#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
692	umul_aa_ ## xhalf ## yhalf	xreg, yreg; \
693	rsr	dst, ACCLO
694
695#else /* no multiply hardware */
696
697#define set_arg_l(dst, src) \
698	extui	dst, src, 0, 16
699#define set_arg_h(dst, src) \
700	srli	dst, src, 16
701
702#if __XTENSA_CALL0_ABI__
703#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
704	set_arg_ ## xhalf (a13, xreg); \
705	set_arg_ ## yhalf (a14, yreg); \
706	call0	.Lmul_mulsi3; \
707	mov	dst, a12
708#else
709#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
710	set_arg_ ## xhalf (a14, xreg); \
711	set_arg_ ## yhalf (a15, yreg); \
712	call12	.Lmul_mulsi3; \
713	mov	dst, a14
714#endif /* __XTENSA_CALL0_ABI__ */
715
716#endif /* no multiply hardware */
717
718	/* Add pp1 and pp2 into a6 with carry-out in a9.  */
719	do_mul(a6, a2, l, a3, h)	/* pp 1 */
720	do_mul(a11, a2, h, a3, l)	/* pp 2 */
721	movi	a9, 0
722	add	a6, a6, a11
723	bgeu	a6, a11, 1f
724	addi	a9, a9, 1
7251:
726	/* Shift the high half of a9/a6 into position in a9.  Note that
727	   this value can be safely incremented without any carry-outs.  */
728	ssai	16
729	src	a9, a9, a6
730
731	/* Compute the low word into a6.  */
732	do_mul(a11, a2, l, a3, l)	/* pp 0 */
733	sll	a6, a6
734	add	a6, a6, a11
735	bgeu	a6, a11, 1f
736	addi	a9, a9, 1
7371:
738	/* Compute the high word into a2.  */
739	do_mul(a2, a2, h, a3, h)	/* pp 3 */
740	add	a2, a2, a9
741
742#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
743	/* Restore values saved on the stack during the multiplication.  */
744	l32i	a0, sp, 0
745	l32i	a8, sp, 4
746#endif
747#endif /* ! XCHAL_HAVE_MUL32_HIGH */
748
749	/* Shift left by 9 bits, unless there was a carry-out from the
750	   multiply, in which case, shift by 8 bits and increment the
751	   exponent.  */
752	movi	a4, 9
753	srli	a5, a2, 24 - 9
754	beqz	a5, 1f
755	addi	a4, a4, -1
756	addi	a8, a8, 1
7571:	ssl	a4
758	src	a2, a2, a6
759	sll	a6, a6
760
761	/* Subtract the extra bias from the exponent sum (plus one to account
762	   for the explicit "1.0" of the mantissa that will be added to the
763	   exponent in the final result).  */
764	movi	a4, 0x80
765	sub	a8, a8, a4
766
767	/* Check for over/underflow.  The value in a8 is one less than the
768	   final exponent, so values in the range 0..fd are OK here.  */
769	movi	a4, 0xfe
770	bgeu	a8, a4, .Lmul_overflow
771
772.Lmul_round:
773	/* Round.  */
774	bgez	a6, .Lmul_rounded
775	addi	a2, a2, 1
776	slli	a6, a6, 1
777	beqz	a6, .Lmul_exactlyhalf
778
779.Lmul_rounded:
780	/* Add the exponent to the mantissa.  */
781	slli	a8, a8, 23
782	add	a2, a2, a8
783
784.Lmul_addsign:
785	/* Add the sign bit.  */
786	srli	a7, a7, 31
787	slli	a7, a7, 31
788	or	a2, a2, a7
789
790.Lmul_done:
791#if __XTENSA_CALL0_ABI__
792	l32i	a12, sp, 16
793	l32i	a13, sp, 20
794	l32i	a14, sp, 24
795	l32i	a15, sp, 28
796	addi	sp, sp, 32
797#endif
798	leaf_return
799
800.Lmul_exactlyhalf:
801	/* Round down to the nearest even value.  */
802	srli	a2, a2, 1
803	slli	a2, a2, 1
804	j	.Lmul_rounded
805
806.Lmul_overflow:
807	bltz	a8, .Lmul_underflow
808	/* Return +/- Infinity.  */
809	movi	a8, 0xff
810	slli	a2, a8, 23
811	j	.Lmul_addsign
812
813.Lmul_underflow:
814	/* Create a subnormal value, where the exponent field contains zero,
815	   but the effective exponent is 1.  The value of a8 is one less than
816	   the actual exponent, so just negate it to get the shift amount.  */
817	neg	a8, a8
818	mov	a9, a6
819	ssr	a8
820	bgeui	a8, 32, .Lmul_flush_to_zero
821
822	/* Shift a2 right.  Any bits that are shifted out of a2 are saved
823	   in a6 (combined with the shifted-out bits currently in a6) for
824	   rounding the result.  */
825	sll	a6, a2
826	srl	a2, a2
827
828	/* Set the exponent to zero.  */
829	movi	a8, 0
830
831	/* Pack any nonzero bits shifted out into a6.  */
832	beqz	a9, .Lmul_round
833	movi	a9, 1
834	or	a6, a6, a9
835	j	.Lmul_round
836
837.Lmul_flush_to_zero:
838	/* Return zero with the appropriate sign bit.  */
839	srli	a2, a7, 31
840	slli	a2, a2, 31
841	j	.Lmul_done
842
843#if XCHAL_NO_MUL
844
845	/* For Xtensa processors with no multiply hardware, this simplified
846	   version of _mulsi3 is used for multiplying 16-bit chunks of
847	   the floating-point mantissas.  When using CALL0, this function
848	   uses a custom ABI: the inputs are passed in a13 and a14, the
849	   result is returned in a12, and a8 and a15 are clobbered.  */
850	.align	4
851.Lmul_mulsi3:
852	leaf_entry sp, 16
853	.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
854	movi	\dst, 0
8551:	add	\tmp1, \src2, \dst
856	extui	\tmp2, \src1, 0, 1
857	movnez	\dst, \tmp1, \tmp2
858
859	do_addx2 \tmp1, \src2, \dst, \tmp1
860	extui	\tmp2, \src1, 1, 1
861	movnez	\dst, \tmp1, \tmp2
862
863	do_addx4 \tmp1, \src2, \dst, \tmp1
864	extui	\tmp2, \src1, 2, 1
865	movnez	\dst, \tmp1, \tmp2
866
867	do_addx8 \tmp1, \src2, \dst, \tmp1
868	extui	\tmp2, \src1, 3, 1
869	movnez	\dst, \tmp1, \tmp2
870
871	srli	\src1, \src1, 4
872	slli	\src2, \src2, 4
873	bnez	\src1, 1b
874	.endm
875#if __XTENSA_CALL0_ABI__
876	mul_mulsi3_body a12, a13, a14, a15, a8
877#else
878	/* The result will be written into a2, so save that argument in a4.  */
879	mov	a4, a2
880	mul_mulsi3_body a2, a4, a3, a5, a6
881#endif
882	leaf_return
883#endif /* XCHAL_NO_MUL */
884#endif /* L_mulsf3 */
885
886#ifdef L_divsf3
887
888	.literal_position
889	/* Division */
890__divsf3_aux:
891
892	/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
893	   (This code is placed before the start of the function just to
894	   keep it in range of the limited branch displacements.)  */
895
896.Ldiv_yexpzero:
897	/* Clear the sign bit of y.  */
898	slli	a3, a3, 1
899	srli	a3, a3, 1
900
901	/* Check for division by zero.  */
902	beqz	a3, .Ldiv_yzero
903
904	/* Normalize y.  Adjust the exponent in a9.  */
905	do_nsau	a10, a3, a4, a5
906	addi	a10, a10, -8
907	ssl	a10
908	sll	a3, a3
909	movi	a9, 1
910	sub	a9, a9, a10
911	j	.Ldiv_ynormalized
912
913.Ldiv_yzero:
914	/* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
915	slli	a4, a2, 1
916	srli	a4, a4, 1
917	srli	a2, a7, 31
918	slli	a2, a2, 31
919	or	a2, a2, a6
920	bnez	a4, 1f
921	movi	a4, 0x400000	/* make it a quiet NaN */
922	or	a2, a2, a4
9231:	leaf_return
924
925.Ldiv_xexpzero:
926	/* Clear the sign bit of x.  */
927	slli	a2, a2, 1
928	srli	a2, a2, 1
929
930	/* If x is zero, return zero.  */
931	beqz	a2, .Ldiv_return_zero
932
933	/* Normalize x.  Adjust the exponent in a8.  */
934	do_nsau	a10, a2, a4, a5
935	addi	a10, a10, -8
936	ssl	a10
937	sll	a2, a2
938	movi	a8, 1
939	sub	a8, a8, a10
940	j	.Ldiv_xnormalized
941
942.Ldiv_return_zero:
943	/* Return zero with the appropriate sign bit.  */
944	srli	a2, a7, 31
945	slli	a2, a2, 31
946	leaf_return
947
948.Ldiv_xnan_or_inf:
949	/* Set the sign bit of the result.  */
950	srli	a7, a3, 31
951	slli	a7, a7, 31
952	xor	a2, a2, a7
953	/* If y is NaN or Inf, return NaN.  */
954	bnall	a3, a6, 1f
955	movi	a4, 0x400000	/* make it a quiet NaN */
956	or	a2, a2, a4
9571:	leaf_return
958
959.Ldiv_ynan_or_inf:
960	/* If y is Infinity, return zero.  */
961	slli	a8, a3, 9
962	beqz	a8, .Ldiv_return_zero
963	/* y is NaN; return it.  */
964	mov	a2, a3
965	leaf_return
966
967	.align	4
968	.global	__divsf3
969	.type	__divsf3, @function
970__divsf3:
971	leaf_entry sp, 16
972	movi	a6, 0x7f800000
973
974	/* Get the sign of the result.  */
975	xor	a7, a2, a3
976
977	/* Check for NaN and infinity.  */
978	ball	a2, a6, .Ldiv_xnan_or_inf
979	ball	a3, a6, .Ldiv_ynan_or_inf
980
981	/* Extract the exponents.  */
982	extui	a8, a2, 23, 8
983	extui	a9, a3, 23, 8
984
985	beqz	a9, .Ldiv_yexpzero
986.Ldiv_ynormalized:
987	beqz	a8, .Ldiv_xexpzero
988.Ldiv_xnormalized:
989
990	/* Subtract the exponents.  */
991	sub	a8, a8, a9
992
993	/* Replace sign/exponent fields with explicit "1.0".  */
994	movi	a10, 0xffffff
995	or	a2, a2, a6
996	and	a2, a2, a10
997	or	a3, a3, a6
998	and	a3, a3, a10
999
1000	/* The first digit of the mantissa division must be a one.
1001	   Shift x (and adjust the exponent) as needed to make this true.  */
1002	bltu	a3, a2, 1f
1003	slli	a2, a2, 1
1004	addi	a8, a8, -1
10051:
1006	/* Do the first subtraction and shift.  */
1007	sub	a2, a2, a3
1008	slli	a2, a2, 1
1009
1010	/* Put the quotient into a10.  */
1011	movi	a10, 1
1012
1013	/* Divide one bit at a time for 23 bits.  */
1014	movi	a9, 23
1015#if XCHAL_HAVE_LOOPS
1016	loop	a9, .Ldiv_loopend
1017#endif
1018.Ldiv_loop:
1019	/* Shift the quotient << 1.  */
1020	slli	a10, a10, 1
1021
1022	/* Is this digit a 0 or 1?  */
1023	bltu	a2, a3, 1f
1024
1025	/* Output a 1 and subtract.  */
1026	addi	a10, a10, 1
1027	sub	a2, a2, a3
1028
1029	/* Shift the dividend << 1.  */
10301:	slli	a2, a2, 1
1031
1032#if !XCHAL_HAVE_LOOPS
1033	addi	a9, a9, -1
1034	bnez	a9, .Ldiv_loop
1035#endif
1036.Ldiv_loopend:
1037
1038	/* Add the exponent bias (less one to account for the explicit "1.0"
1039	   of the mantissa that will be added to the exponent in the final
1040	   result).  */
1041	addi	a8, a8, 0x7e
1042
1043	/* Check for over/underflow.  The value in a8 is one less than the
1044	   final exponent, so values in the range 0..fd are OK here.  */
1045	movi	a4, 0xfe
1046	bgeu	a8, a4, .Ldiv_overflow
1047
1048.Ldiv_round:
1049	/* Round.  The remainder (<< 1) is in a2.  */
1050	bltu	a2, a3, .Ldiv_rounded
1051	addi	a10, a10, 1
1052	beq	a2, a3, .Ldiv_exactlyhalf
1053
1054.Ldiv_rounded:
1055	/* Add the exponent to the mantissa.  */
1056	slli	a8, a8, 23
1057	add	a2, a10, a8
1058
1059.Ldiv_addsign:
1060	/* Add the sign bit.  */
1061	srli	a7, a7, 31
1062	slli	a7, a7, 31
1063	or	a2, a2, a7
1064	leaf_return
1065
1066.Ldiv_overflow:
1067	bltz	a8, .Ldiv_underflow
1068	/* Return +/- Infinity.  */
1069	addi	a8, a4, 1	/* 0xff */
1070	slli	a2, a8, 23
1071	j	.Ldiv_addsign
1072
1073.Ldiv_exactlyhalf:
1074	/* Remainder is exactly half the divisor.  Round even.  */
1075	srli	a10, a10, 1
1076	slli	a10, a10, 1
1077	j	.Ldiv_rounded
1078
1079.Ldiv_underflow:
1080	/* Create a subnormal value, where the exponent field contains zero,
1081	   but the effective exponent is 1.  The value of a8 is one less than
1082	   the actual exponent, so just negate it to get the shift amount.  */
1083	neg	a8, a8
1084	ssr	a8
1085	bgeui	a8, 32, .Ldiv_flush_to_zero
1086
1087	/* Shift a10 right.  Any bits that are shifted out of a10 are
1088	   saved in a6 for rounding the result.  */
1089	sll	a6, a10
1090	srl	a10, a10
1091
1092	/* Set the exponent to zero.  */
1093	movi	a8, 0
1094
1095	/* Pack any nonzero remainder (in a2) into a6.  */
1096	beqz	a2, 1f
1097	movi	a9, 1
1098	or	a6, a6, a9
1099
1100	/* Round a10 based on the bits shifted out into a6.  */
11011:	bgez	a6, .Ldiv_rounded
1102	addi	a10, a10, 1
1103	slli	a6, a6, 1
1104	bnez	a6, .Ldiv_rounded
1105	srli	a10, a10, 1
1106	slli	a10, a10, 1
1107	j	.Ldiv_rounded
1108
1109.Ldiv_flush_to_zero:
1110	/* Return zero with the appropriate sign bit.  */
1111	srli	a2, a7, 31
1112	slli	a2, a2, 31
1113	leaf_return
1114
1115#endif /* L_divsf3 */
1116
1117#ifdef L_cmpsf2
1118
1119	/* Equal and Not Equal */
1120
1121	.align	4
1122	.global	__eqsf2
1123	.global	__nesf2
1124	.set	__nesf2, __eqsf2
1125	.type	__eqsf2, @function
1126__eqsf2:
1127	leaf_entry sp, 16
1128	bne	a2, a3, 4f
1129
1130	/* The values are equal but NaN != NaN.  Check the exponent.  */
1131	movi	a6, 0x7f800000
1132	ball	a2, a6, 3f
1133
1134	/* Equal.  */
1135	movi	a2, 0
1136	leaf_return
1137
1138	/* Not equal.  */
11392:	movi	a2, 1
1140	leaf_return
1141
1142	/* Check if the mantissas are nonzero.  */
11433:	slli	a7, a2, 9
1144	j	5f
1145
1146	/* Check if x and y are zero with different signs.  */
11474:	or	a7, a2, a3
1148	slli	a7, a7, 1
1149
1150	/* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
1151	   or x when exponent(x) = 0x7f8 and x == y.  */
11525:	movi	a2, 0
1153	movi	a3, 1
1154	movnez	a2, a3, a7
1155	leaf_return
1156
1157
1158	/* Greater Than */
1159
1160	.align	4
1161	.global	__gtsf2
1162	.type	__gtsf2, @function
1163__gtsf2:
1164	leaf_entry sp, 16
1165	movi	a6, 0x7f800000
1166	ball	a2, a6, 2f
11671:	bnall	a3, a6, .Lle_cmp
1168
1169	/* Check if y is a NaN.  */
1170	slli	a7, a3, 9
1171	beqz	a7, .Lle_cmp
1172	movi	a2, 0
1173	leaf_return
1174
1175	/* Check if x is a NaN.  */
11762:	slli	a7, a2, 9
1177	beqz	a7, 1b
1178	movi	a2, 0
1179	leaf_return
1180
1181
1182	/* Less Than or Equal */
1183
1184	.align	4
1185	.global	__lesf2
1186	.type	__lesf2, @function
1187__lesf2:
1188	leaf_entry sp, 16
1189	movi	a6, 0x7f800000
1190	ball	a2, a6, 2f
11911:	bnall	a3, a6, .Lle_cmp
1192
1193	/* Check if y is a NaN.  */
1194	slli	a7, a3, 9
1195	beqz	a7, .Lle_cmp
1196	movi	a2, 1
1197	leaf_return
1198
1199	/* Check if x is a NaN.  */
12002:	slli	a7, a2, 9
1201	beqz	a7, 1b
1202	movi	a2, 1
1203	leaf_return
1204
1205.Lle_cmp:
1206	/* Check if x and y have different signs.  */
1207	xor	a7, a2, a3
1208	bltz	a7, .Lle_diff_signs
1209
1210	/* Check if x is negative.  */
1211	bltz	a2, .Lle_xneg
1212
1213	/* Check if x <= y.  */
1214	bltu	a3, a2, 5f
12154:	movi	a2, 0
1216	leaf_return
1217
1218.Lle_xneg:
1219	/* Check if y <= x.  */
1220	bgeu	a2, a3, 4b
12215:	movi	a2, 1
1222	leaf_return
1223
1224.Lle_diff_signs:
1225	bltz	a2, 4b
1226
1227	/* Check if both x and y are zero.  */
1228	or	a7, a2, a3
1229	slli	a7, a7, 1
1230	movi	a2, 1
1231	movi	a3, 0
1232	moveqz	a2, a3, a7
1233	leaf_return
1234
1235
1236	/* Greater Than or Equal */
1237
1238	.align	4
1239	.global	__gesf2
1240	.type	__gesf2, @function
1241__gesf2:
1242	leaf_entry sp, 16
1243	movi	a6, 0x7f800000
1244	ball	a2, a6, 2f
12451:	bnall	a3, a6, .Llt_cmp
1246
1247	/* Check if y is a NaN.  */
1248	slli	a7, a3, 9
1249	beqz	a7, .Llt_cmp
1250	movi	a2, -1
1251	leaf_return
1252
1253	/* Check if x is a NaN.  */
12542:	slli	a7, a2, 9
1255	beqz	a7, 1b
1256	movi	a2, -1
1257	leaf_return
1258
1259
1260	/* Less Than */
1261
1262	.align	4
1263	.global	__ltsf2
1264	.type	__ltsf2, @function
1265__ltsf2:
1266	leaf_entry sp, 16
1267	movi	a6, 0x7f800000
1268	ball	a2, a6, 2f
12691:	bnall	a3, a6, .Llt_cmp
1270
1271	/* Check if y is a NaN.  */
1272	slli	a7, a3, 9
1273	beqz	a7, .Llt_cmp
1274	movi	a2, 0
1275	leaf_return
1276
1277	/* Check if x is a NaN.  */
12782:	slli	a7, a2, 9
1279	beqz	a7, 1b
1280	movi	a2, 0
1281	leaf_return
1282
1283.Llt_cmp:
1284	/* Check if x and y have different signs.  */
1285	xor	a7, a2, a3
1286	bltz	a7, .Llt_diff_signs
1287
1288	/* Check if x is negative.  */
1289	bltz	a2, .Llt_xneg
1290
1291	/* Check if x < y.  */
1292	bgeu	a2, a3, 5f
12934:	movi	a2, -1
1294	leaf_return
1295
1296.Llt_xneg:
1297	/* Check if y < x.  */
1298	bltu	a3, a2, 4b
12995:	movi	a2, 0
1300	leaf_return
1301
1302.Llt_diff_signs:
1303	bgez	a2, 5b
1304
1305	/* Check if both x and y are nonzero.  */
1306	or	a7, a2, a3
1307	slli	a7, a7, 1
1308	movi	a2, 0
1309	movi	a3, -1
1310	movnez	a2, a3, a7
1311	leaf_return
1312
1313
1314	/* Unordered */
1315
1316	.align	4
1317	.global	__unordsf2
1318	.type	__unordsf2, @function
1319__unordsf2:
1320	leaf_entry sp, 16
1321	movi	a6, 0x7f800000
1322	ball	a2, a6, 3f
13231:	ball	a3, a6, 4f
13242:	movi	a2, 0
1325	leaf_return
1326
13273:	slli	a7, a2, 9
1328	beqz	a7, 1b
1329	movi	a2, 1
1330	leaf_return
1331
13324:	slli	a7, a3, 9
1333	beqz	a7, 2b
1334	movi	a2, 1
1335	leaf_return
1336
1337#endif /* L_cmpsf2 */
1338
1339#ifdef L_fixsfsi
1340
1341	.align	4
1342	.global	__fixsfsi
1343	.type	__fixsfsi, @function
1344__fixsfsi:
1345	leaf_entry sp, 16
1346
1347	/* Check for NaN and Infinity.  */
1348	movi	a6, 0x7f800000
1349	ball	a2, a6, .Lfixsfsi_nan_or_inf
1350
1351	/* Extract the exponent and check if 0 < (exp - 0x7e) < 32.  */
1352	extui	a4, a2, 23, 8
1353	addi	a4, a4, -0x7e
1354	bgei	a4, 32, .Lfixsfsi_maxint
1355	blti	a4, 1, .Lfixsfsi_zero
1356
1357	/* Add explicit "1.0" and shift << 8.  */
1358	or	a7, a2, a6
1359	slli	a5, a7, 8
1360
1361	/* Shift back to the right, based on the exponent.  */
1362	ssl	a4		/* shift by 32 - a4 */
1363	srl	a5, a5
1364
1365	/* Negate the result if sign != 0.  */
1366	neg	a2, a5
1367	movgez	a2, a5, a7
1368	leaf_return
1369
1370.Lfixsfsi_nan_or_inf:
1371	/* Handle Infinity and NaN.  */
1372	slli	a4, a2, 9
1373	beqz	a4, .Lfixsfsi_maxint
1374
1375	/* Translate NaN to +maxint.  */
1376	movi	a2, 0
1377
1378.Lfixsfsi_maxint:
1379	slli	a4, a6, 8	/* 0x80000000 */
1380	addi	a5, a4, -1	/* 0x7fffffff */
1381	movgez	a4, a5, a2
1382	mov	a2, a4
1383	leaf_return
1384
1385.Lfixsfsi_zero:
1386	movi	a2, 0
1387	leaf_return
1388
1389#endif /* L_fixsfsi */
1390
1391#ifdef L_fixsfdi
1392
1393	.align	4
1394	.global	__fixsfdi
1395	.type	__fixsfdi, @function
1396__fixsfdi:
1397	leaf_entry sp, 16
1398
1399	/* Check for NaN and Infinity.  */
1400	movi	a6, 0x7f800000
1401	ball	a2, a6, .Lfixsfdi_nan_or_inf
1402
1403	/* Extract the exponent and check if 0 < (exp - 0x7e) < 64.  */
1404	extui	a4, a2, 23, 8
1405	addi	a4, a4, -0x7e
1406	bgei	a4, 64, .Lfixsfdi_maxint
1407	blti	a4, 1, .Lfixsfdi_zero
1408
1409	/* Add explicit "1.0" and shift << 8.  */
1410	or	a7, a2, a6
1411	slli	xh, a7, 8
1412
1413	/* Shift back to the right, based on the exponent.  */
1414	ssl	a4		/* shift by 64 - a4 */
1415	bgei	a4, 32, .Lfixsfdi_smallshift
1416	srl	xl, xh
1417	movi	xh, 0
1418
1419.Lfixsfdi_shifted:
1420	/* Negate the result if sign != 0.  */
1421	bgez	a7, 1f
1422	neg	xl, xl
1423	neg	xh, xh
1424	beqz	xl, 1f
1425	addi	xh, xh, -1
14261:	leaf_return
1427
1428.Lfixsfdi_smallshift:
1429	movi	xl, 0
1430	sll	xl, xh
1431	srl	xh, xh
1432	j	.Lfixsfdi_shifted
1433
1434.Lfixsfdi_nan_or_inf:
1435	/* Handle Infinity and NaN.  */
1436	slli	a4, a2, 9
1437	beqz	a4, .Lfixsfdi_maxint
1438
1439	/* Translate NaN to +maxint.  */
1440	movi	a2, 0
1441
1442.Lfixsfdi_maxint:
1443	slli	a7, a6, 8	/* 0x80000000 */
1444	bgez	a2, 1f
1445	mov	xh, a7
1446	movi	xl, 0
1447	leaf_return
1448
14491:	addi	xh, a7, -1	/* 0x7fffffff */
1450	movi	xl, -1
1451	leaf_return
1452
1453.Lfixsfdi_zero:
1454	movi	xh, 0
1455	movi	xl, 0
1456	leaf_return
1457
1458#endif /* L_fixsfdi */
1459
1460#ifdef L_fixunssfsi
1461
1462	.align	4
1463	.global	__fixunssfsi
1464	.type	__fixunssfsi, @function
1465__fixunssfsi:
1466	leaf_entry sp, 16
1467
1468	/* Check for NaN and Infinity.  */
1469	movi	a6, 0x7f800000
1470	ball	a2, a6, .Lfixunssfsi_nan_or_inf
1471
1472	/* Extract the exponent and check if 0 <= (exp - 0x7f) < 32.  */
1473	extui	a4, a2, 23, 8
1474	addi	a4, a4, -0x7f
1475	bgei	a4, 32, .Lfixunssfsi_maxint
1476	bltz	a4, .Lfixunssfsi_zero
1477
1478	/* Add explicit "1.0" and shift << 8.  */
1479	or	a7, a2, a6
1480	slli	a5, a7, 8
1481
1482	/* Shift back to the right, based on the exponent.  */
1483	addi	a4, a4, 1
1484	beqi	a4, 32, .Lfixunssfsi_bigexp
1485	ssl	a4		/* shift by 32 - a4 */
1486	srl	a5, a5
1487
1488	/* Negate the result if sign != 0.  */
1489	neg	a2, a5
1490	movgez	a2, a5, a7
1491	leaf_return
1492
1493.Lfixunssfsi_nan_or_inf:
1494	/* Handle Infinity and NaN.  */
1495	slli	a4, a2, 9
1496	beqz	a4, .Lfixunssfsi_maxint
1497
1498	/* Translate NaN to 0xffffffff.  */
1499	movi	a2, -1
1500	leaf_return
1501
1502.Lfixunssfsi_maxint:
1503	slli	a4, a6, 8	/* 0x80000000 */
1504	movi	a5, -1		/* 0xffffffff */
1505	movgez	a4, a5, a2
1506	mov	a2, a4
1507	leaf_return
1508
1509.Lfixunssfsi_zero:
1510	movi	a2, 0
1511	leaf_return
1512
1513.Lfixunssfsi_bigexp:
1514	/* Handle unsigned maximum exponent case.  */
1515	bltz	a2, 1f
1516	mov	a2, a5		/* no shift needed */
1517	leaf_return
1518
1519	/* Return 0x80000000 if negative.  */
15201:	slli	a2, a6, 8
1521	leaf_return
1522
1523#endif /* L_fixunssfsi */
1524
1525#ifdef L_fixunssfdi
1526
1527	.align	4
1528	.global	__fixunssfdi
1529	.type	__fixunssfdi, @function
1530__fixunssfdi:
1531	leaf_entry sp, 16
1532
1533	/* Check for NaN and Infinity.  */
1534	movi	a6, 0x7f800000
1535	ball	a2, a6, .Lfixunssfdi_nan_or_inf
1536
1537	/* Extract the exponent and check if 0 <= (exp - 0x7f) < 64.  */
1538	extui	a4, a2, 23, 8
1539	addi	a4, a4, -0x7f
1540	bgei	a4, 64, .Lfixunssfdi_maxint
1541	bltz	a4, .Lfixunssfdi_zero
1542
1543	/* Add explicit "1.0" and shift << 8.  */
1544	or	a7, a2, a6
1545	slli	xh, a7, 8
1546
1547	/* Shift back to the right, based on the exponent.  */
1548	addi	a4, a4, 1
1549	beqi	a4, 64, .Lfixunssfdi_bigexp
1550	ssl	a4		/* shift by 64 - a4 */
1551	bgei	a4, 32, .Lfixunssfdi_smallshift
1552	srl	xl, xh
1553	movi	xh, 0
1554
1555.Lfixunssfdi_shifted:
1556	/* Negate the result if sign != 0.  */
1557	bgez	a7, 1f
1558	neg	xl, xl
1559	neg	xh, xh
1560	beqz	xl, 1f
1561	addi	xh, xh, -1
15621:	leaf_return
1563
1564.Lfixunssfdi_smallshift:
1565	movi	xl, 0
1566	src	xl, xh, xl
1567	srl	xh, xh
1568	j	.Lfixunssfdi_shifted
1569
1570.Lfixunssfdi_nan_or_inf:
1571	/* Handle Infinity and NaN.  */
1572	slli	a4, a2, 9
1573	beqz	a4, .Lfixunssfdi_maxint
1574
1575	/* Translate NaN to 0xffffffff.... */
15761:	movi	xh, -1
1577	movi	xl, -1
1578	leaf_return
1579
1580.Lfixunssfdi_maxint:
1581	bgez	a2, 1b
15822:	slli	xh, a6, 8	/* 0x80000000 */
1583	movi	xl, 0
1584	leaf_return
1585
1586.Lfixunssfdi_zero:
1587	movi	xh, 0
1588	movi	xl, 0
1589	leaf_return
1590
1591.Lfixunssfdi_bigexp:
1592	/* Handle unsigned maximum exponent case.  */
1593	bltz	a7, 2b
1594	movi	xl, 0
1595	leaf_return		/* no shift needed */
1596
1597#endif /* L_fixunssfdi */
1598
1599#ifdef L_floatsisf
1600
1601	.align	4
1602	.global	__floatunsisf
1603	.type	__floatunsisf, @function
1604__floatunsisf:
1605	leaf_entry sp, 16
1606	beqz	a2, .Lfloatsisf_return
1607
1608	/* Set the sign to zero and jump to the floatsisf code.  */
1609	movi	a7, 0
1610	j	.Lfloatsisf_normalize
1611
1612	.align	4
1613	.global	__floatsisf
1614	.type	__floatsisf, @function
1615__floatsisf:
1616	leaf_entry sp, 16
1617
1618	/* Check for zero.  */
1619	beqz	a2, .Lfloatsisf_return
1620
1621	/* Save the sign.  */
1622	extui	a7, a2, 31, 1
1623
1624	/* Get the absolute value.  */
1625#if XCHAL_HAVE_ABS
1626	abs	a2, a2
1627#else
1628	neg	a4, a2
1629	movltz	a2, a4, a2
1630#endif
1631
1632.Lfloatsisf_normalize:
1633	/* Normalize with the first 1 bit in the msb.  */
1634	do_nsau	a4, a2, a5, a6
1635	ssl	a4
1636	sll	a5, a2
1637
1638	/* Shift the mantissa into position, with rounding bits in a6.  */
1639	srli	a2, a5, 8
1640	slli	a6, a5, (32 - 8)
1641
1642	/* Set the exponent.  */
1643	movi	a5, 0x9d	/* 0x7e + 31 */
1644	sub	a5, a5, a4
1645	slli	a5, a5, 23
1646	add	a2, a2, a5
1647
1648	/* Add the sign.  */
1649	slli	a7, a7, 31
1650	or	a2, a2, a7
1651
1652	/* Round up if the leftover fraction is >= 1/2.  */
1653	bgez	a6, .Lfloatsisf_return
1654	addi	a2, a2, 1	/* Overflow to the exponent is OK.  */
1655
1656	/* Check if the leftover fraction is exactly 1/2.  */
1657	slli	a6, a6, 1
1658	beqz	a6, .Lfloatsisf_exactlyhalf
1659
1660.Lfloatsisf_return:
1661	leaf_return
1662
1663.Lfloatsisf_exactlyhalf:
1664	/* Round down to the nearest even value.  */
1665	srli	a2, a2, 1
1666	slli	a2, a2, 1
1667	leaf_return
1668
1669#endif /* L_floatsisf */
1670
1671#ifdef L_floatdisf
1672
1673	.align	4
1674	.global	__floatundisf
1675	.type	__floatundisf, @function
1676__floatundisf:
1677	leaf_entry sp, 16
1678
1679	/* Check for zero.  */
1680	or	a4, xh, xl
1681	beqz	a4, 2f
1682
1683	/* Set the sign to zero and jump to the floatdisf code.  */
1684	movi	a7, 0
1685	j	.Lfloatdisf_normalize
1686
1687	.align	4
1688	.global	__floatdisf
1689	.type	__floatdisf, @function
1690__floatdisf:
1691	leaf_entry sp, 16
1692
1693	/* Check for zero.  */
1694	or	a4, xh, xl
1695	beqz	a4, 2f
1696
1697	/* Save the sign.  */
1698	extui	a7, xh, 31, 1
1699
1700	/* Get the absolute value.  */
1701	bgez	xh, .Lfloatdisf_normalize
1702	neg	xl, xl
1703	neg	xh, xh
1704	beqz	xl, .Lfloatdisf_normalize
1705	addi	xh, xh, -1
1706
1707.Lfloatdisf_normalize:
1708	/* Normalize with the first 1 bit in the msb of xh.  */
1709	beqz	xh, .Lfloatdisf_bigshift
1710	do_nsau	a4, xh, a5, a6
1711	ssl	a4
1712	src	xh, xh, xl
1713	sll	xl, xl
1714
1715.Lfloatdisf_shifted:
1716	/* Shift the mantissa into position, with rounding bits in a6.  */
1717	ssai	8
1718	sll	a5, xl
1719	src	a6, xh, xl
1720	srl	xh, xh
1721	beqz	a5, 1f
1722	movi	a5, 1
1723	or	a6, a6, a5
17241:
1725	/* Set the exponent.  */
1726	movi	a5, 0xbd	/* 0x7e + 63 */
1727	sub	a5, a5, a4
1728	slli	a5, a5, 23
1729	add	a2, xh, a5
1730
1731	/* Add the sign.  */
1732	slli	a7, a7, 31
1733	or	a2, a2, a7
1734
1735	/* Round up if the leftover fraction is >= 1/2.  */
1736	bgez	a6, 2f
1737	addi	a2, a2, 1	/* Overflow to the exponent is OK.  */
1738
1739	/* Check if the leftover fraction is exactly 1/2.  */
1740	slli	a6, a6, 1
1741	beqz	a6, .Lfloatdisf_exactlyhalf
17422:	leaf_return
1743
1744.Lfloatdisf_bigshift:
1745	/* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
1746	do_nsau	a4, xl, a5, a6
1747	ssl	a4
1748	sll	xh, xl
1749	movi	xl, 0
1750	addi	a4, a4, 32
1751	j	.Lfloatdisf_shifted
1752
1753.Lfloatdisf_exactlyhalf:
1754	/* Round down to the nearest even value.  */
1755	srli	a2, a2, 1
1756	slli	a2, a2, 1
1757	leaf_return
1758
1759#endif /* L_floatdisf */
1760