xref: /freebsd-src/contrib/llvm-project/compiler-rt/lib/builtins/hexagon/dfmul.S (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1*0b57cec5SDimitry Andric//===----------------------Hexagon builtin routine ------------------------===//
2*0b57cec5SDimitry Andric//
3*0b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information.
5*0b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0b57cec5SDimitry Andric//
7*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
8*0b57cec5SDimitry Andric
9*0b57cec5SDimitry Andric// Double Precision Multiply
10*0b57cec5SDimitry Andric#define A r1:0
11*0b57cec5SDimitry Andric#define AH r1
12*0b57cec5SDimitry Andric#define AL r0
13*0b57cec5SDimitry Andric#define B r3:2
14*0b57cec5SDimitry Andric#define BH r3
15*0b57cec5SDimitry Andric#define BL r2
16*0b57cec5SDimitry Andric
17*0b57cec5SDimitry Andric#define BTMP r5:4
18*0b57cec5SDimitry Andric#define BTMPH r5
19*0b57cec5SDimitry Andric#define BTMPL r4
20*0b57cec5SDimitry Andric
21*0b57cec5SDimitry Andric#define PP_ODD r7:6
22*0b57cec5SDimitry Andric#define PP_ODD_H r7
23*0b57cec5SDimitry Andric#define PP_ODD_L r6
24*0b57cec5SDimitry Andric
25*0b57cec5SDimitry Andric#define ONE r9:8
26*0b57cec5SDimitry Andric#define S_ONE r8
27*0b57cec5SDimitry Andric#define S_ZERO r9
28*0b57cec5SDimitry Andric
29*0b57cec5SDimitry Andric#define PP_HH r11:10
30*0b57cec5SDimitry Andric#define PP_HH_H r11
31*0b57cec5SDimitry Andric#define PP_HH_L r10
32*0b57cec5SDimitry Andric
33*0b57cec5SDimitry Andric#define ATMP r13:12
34*0b57cec5SDimitry Andric#define ATMPH r13
35*0b57cec5SDimitry Andric#define ATMPL r12
36*0b57cec5SDimitry Andric
37*0b57cec5SDimitry Andric#define PP_LL r15:14
38*0b57cec5SDimitry Andric#define PP_LL_H r15
39*0b57cec5SDimitry Andric#define PP_LL_L r14
40*0b57cec5SDimitry Andric
41*0b57cec5SDimitry Andric#define TMP r28
42*0b57cec5SDimitry Andric
43*0b57cec5SDimitry Andric#define MANTBITS 52
44*0b57cec5SDimitry Andric#define HI_MANTBITS 20
45*0b57cec5SDimitry Andric#define EXPBITS 11
46*0b57cec5SDimitry Andric#define BIAS 1024
47*0b57cec5SDimitry Andric#define MANTISSA_TO_INT_BIAS 52
48*0b57cec5SDimitry Andric
49*0b57cec5SDimitry Andric// Some constant to adjust normalization amount in error code
50*0b57cec5SDimitry Andric// Amount to right shift the partial product to get to a denorm
51*0b57cec5SDimitry Andric#define FUDGE 5
52*0b57cec5SDimitry Andric
53*0b57cec5SDimitry Andric#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
54*0b57cec5SDimitry Andric#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
55*0b57cec5SDimitry Andric#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
56*0b57cec5SDimitry Andric#define END(TAG) .size TAG,.-TAG
57*0b57cec5SDimitry Andric
58*0b57cec5SDimitry Andric#define SR_ROUND_OFF 22
59*0b57cec5SDimitry Andric	.text
60*0b57cec5SDimitry Andric	.global __hexagon_muldf3
61*0b57cec5SDimitry Andric	.type __hexagon_muldf3,@function
62*0b57cec5SDimitry Andric	Q6_ALIAS(muldf3)
63*0b57cec5SDimitry Andric  FAST_ALIAS(muldf3)
64*0b57cec5SDimitry Andric  FAST2_ALIAS(muldf3)
65*0b57cec5SDimitry Andric	.p2align 5
66*0b57cec5SDimitry Andric__hexagon_muldf3:
67*0b57cec5SDimitry Andric	{
68*0b57cec5SDimitry Andric		p0 = dfclass(A,#2)
69*0b57cec5SDimitry Andric		p0 = dfclass(B,#2)
70*0b57cec5SDimitry Andric		ATMP = combine(##0x40000000,#0)
71*0b57cec5SDimitry Andric	}
72*0b57cec5SDimitry Andric	{
73*0b57cec5SDimitry Andric		ATMP = insert(A,#MANTBITS,#EXPBITS-1)
74*0b57cec5SDimitry Andric		BTMP = asl(B,#EXPBITS-1)
75*0b57cec5SDimitry Andric		TMP = #-BIAS
76*0b57cec5SDimitry Andric		ONE = #1
77*0b57cec5SDimitry Andric	}
78*0b57cec5SDimitry Andric	{
79*0b57cec5SDimitry Andric		PP_ODD = mpyu(BTMPL,ATMPH)
80*0b57cec5SDimitry Andric		BTMP = insert(ONE,#2,#62)
81*0b57cec5SDimitry Andric	}
82*0b57cec5SDimitry Andric	// since we know that the MSB of the H registers is zero, we should never carry
83*0b57cec5SDimitry Andric	// H <= 2^31-1.  L <= 2^32-1.  Therefore, HL <= 2^63-2^32-2^31+1
84*0b57cec5SDimitry Andric	// Adding 2 HLs, we get 2^64-3*2^32+2 maximum.
85*0b57cec5SDimitry Andric	// Therefore, we can add 3 2^32-1 values safely without carry.  We only need one.
86*0b57cec5SDimitry Andric	{
87*0b57cec5SDimitry Andric		PP_LL = mpyu(ATMPL,BTMPL)
88*0b57cec5SDimitry Andric		PP_ODD += mpyu(ATMPL,BTMPH)
89*0b57cec5SDimitry Andric	}
90*0b57cec5SDimitry Andric	{
91*0b57cec5SDimitry Andric		PP_ODD += lsr(PP_LL,#32)
92*0b57cec5SDimitry Andric		PP_HH = mpyu(ATMPH,BTMPH)
93*0b57cec5SDimitry Andric		BTMP = combine(##BIAS+BIAS-4,#0)
94*0b57cec5SDimitry Andric	}
95*0b57cec5SDimitry Andric	{
96*0b57cec5SDimitry Andric		PP_HH += lsr(PP_ODD,#32)
97*0b57cec5SDimitry Andric		if (!p0) jump .Lmul_abnormal
98*0b57cec5SDimitry Andric		p1 = cmp.eq(PP_LL_L,#0)		// 64 lsb's 0?
99*0b57cec5SDimitry Andric		p1 = cmp.eq(PP_ODD_L,#0)	// 64 lsb's 0?
100*0b57cec5SDimitry Andric	}
101*0b57cec5SDimitry Andric
102*0b57cec5SDimitry Andric	// PP_HH can have a maximum of 0x3FFF_FFFF_FFFF_FFFF or thereabouts
103*0b57cec5SDimitry Andric	// PP_HH can have a minimum of 0x1000_0000_0000_0000 or so
104*0b57cec5SDimitry Andric
105*0b57cec5SDimitry Andric#undef PP_ODD
106*0b57cec5SDimitry Andric#undef PP_ODD_H
107*0b57cec5SDimitry Andric#undef PP_ODD_L
108*0b57cec5SDimitry Andric#define EXP10 r7:6
109*0b57cec5SDimitry Andric#define EXP1 r7
110*0b57cec5SDimitry Andric#define EXP0 r6
111*0b57cec5SDimitry Andric	{
112*0b57cec5SDimitry Andric		if (!p1) PP_HH_L = or(PP_HH_L,S_ONE)
113*0b57cec5SDimitry Andric		EXP0 = extractu(AH,#EXPBITS,#HI_MANTBITS)
114*0b57cec5SDimitry Andric		EXP1 = extractu(BH,#EXPBITS,#HI_MANTBITS)
115*0b57cec5SDimitry Andric	}
116*0b57cec5SDimitry Andric	{
117*0b57cec5SDimitry Andric		PP_LL = neg(PP_HH)
118*0b57cec5SDimitry Andric		EXP0 += add(TMP,EXP1)
119*0b57cec5SDimitry Andric		TMP = xor(AH,BH)
120*0b57cec5SDimitry Andric	}
121*0b57cec5SDimitry Andric	{
122*0b57cec5SDimitry Andric		if (!p2.new) PP_HH = PP_LL
123*0b57cec5SDimitry Andric		p2 = cmp.gt(TMP,#-1)
124*0b57cec5SDimitry Andric		p0 = !cmp.gt(EXP0,BTMPH)
125*0b57cec5SDimitry Andric		p0 = cmp.gt(EXP0,BTMPL)
126*0b57cec5SDimitry Andric		if (!p0.new) jump:nt .Lmul_ovf_unf
127*0b57cec5SDimitry Andric	}
128*0b57cec5SDimitry Andric	{
129*0b57cec5SDimitry Andric		A = convert_d2df(PP_HH)
130*0b57cec5SDimitry Andric		EXP0 = add(EXP0,#-BIAS-58)
131*0b57cec5SDimitry Andric	}
132*0b57cec5SDimitry Andric	{
133*0b57cec5SDimitry Andric		AH += asl(EXP0,#HI_MANTBITS)
134*0b57cec5SDimitry Andric		jumpr r31
135*0b57cec5SDimitry Andric	}
136*0b57cec5SDimitry Andric
137*0b57cec5SDimitry Andric	.falign
138*0b57cec5SDimitry Andric.Lpossible_unf:
139*0b57cec5SDimitry Andric	// We end up with a positive exponent
140*0b57cec5SDimitry Andric	// But we may have rounded up to an exponent of 1.
141*0b57cec5SDimitry Andric	// If the exponent is 1, if we rounded up to it
142*0b57cec5SDimitry Andric	// we need to also raise underflow
143*0b57cec5SDimitry Andric	// Fortunately, this is pretty easy to detect, we must have +/- 0x0010_0000_0000_0000
144*0b57cec5SDimitry Andric	// And the PP should also have more than one bit set
145*0b57cec5SDimitry Andric	//
146*0b57cec5SDimitry Andric	// Note: ATMP should have abs(PP_HH)
147*0b57cec5SDimitry Andric	// Note: BTMPL should have 0x7FEFFFFF
148*0b57cec5SDimitry Andric	{
149*0b57cec5SDimitry Andric		p0 = cmp.eq(AL,#0)
150*0b57cec5SDimitry Andric		p0 = bitsclr(AH,BTMPL)
151*0b57cec5SDimitry Andric		if (!p0.new) jumpr:t r31
152*0b57cec5SDimitry Andric		BTMPH = #0x7fff
153*0b57cec5SDimitry Andric	}
154*0b57cec5SDimitry Andric	{
155*0b57cec5SDimitry Andric		p0 = bitsset(ATMPH,BTMPH)
156*0b57cec5SDimitry Andric		BTMPL = USR
157*0b57cec5SDimitry Andric		BTMPH = #0x030
158*0b57cec5SDimitry Andric	}
159*0b57cec5SDimitry Andric	{
160*0b57cec5SDimitry Andric		if (p0) BTMPL = or(BTMPL,BTMPH)
161*0b57cec5SDimitry Andric	}
162*0b57cec5SDimitry Andric	{
163*0b57cec5SDimitry Andric		USR = BTMPL
164*0b57cec5SDimitry Andric	}
165*0b57cec5SDimitry Andric	{
166*0b57cec5SDimitry Andric		p0 = dfcmp.eq(A,A)
167*0b57cec5SDimitry Andric		jumpr r31
168*0b57cec5SDimitry Andric	}
169*0b57cec5SDimitry Andric	.falign
170*0b57cec5SDimitry Andric.Lmul_ovf_unf:
171*0b57cec5SDimitry Andric	{
172*0b57cec5SDimitry Andric		A = convert_d2df(PP_HH)
173*0b57cec5SDimitry Andric		ATMP = abs(PP_HH)			// take absolute value
174*0b57cec5SDimitry Andric		EXP1 = add(EXP0,#-BIAS-58)
175*0b57cec5SDimitry Andric	}
176*0b57cec5SDimitry Andric	{
177*0b57cec5SDimitry Andric		AH += asl(EXP1,#HI_MANTBITS)
178*0b57cec5SDimitry Andric		EXP1 = extractu(AH,#EXPBITS,#HI_MANTBITS)
179*0b57cec5SDimitry Andric		BTMPL = ##0x7FEFFFFF
180*0b57cec5SDimitry Andric	}
181*0b57cec5SDimitry Andric	{
182*0b57cec5SDimitry Andric		EXP1 += add(EXP0,##-BIAS-58)
183*0b57cec5SDimitry Andric		//BTMPH = add(clb(ATMP),#-2)
184*0b57cec5SDimitry Andric		BTMPH = #0
185*0b57cec5SDimitry Andric	}
186*0b57cec5SDimitry Andric	{
187*0b57cec5SDimitry Andric		p0 = cmp.gt(EXP1,##BIAS+BIAS-2)	// overflow
188*0b57cec5SDimitry Andric		if (p0.new) jump:nt .Lmul_ovf
189*0b57cec5SDimitry Andric	}
190*0b57cec5SDimitry Andric	{
191*0b57cec5SDimitry Andric		p0 = cmp.gt(EXP1,#0)
192*0b57cec5SDimitry Andric		if (p0.new) jump:nt .Lpossible_unf
193*0b57cec5SDimitry Andric		BTMPH = sub(EXP0,BTMPH)
194*0b57cec5SDimitry Andric		TMP = #63				// max amount to shift
195*0b57cec5SDimitry Andric	}
196*0b57cec5SDimitry Andric	// Underflow
197*0b57cec5SDimitry Andric	//
198*0b57cec5SDimitry Andric	// PP_HH has the partial product with sticky LSB.
199*0b57cec5SDimitry Andric	// PP_HH can have a maximum of 0x3FFF_FFFF_FFFF_FFFF or thereabouts
200*0b57cec5SDimitry Andric	// PP_HH can have a minimum of 0x1000_0000_0000_0000 or so
201*0b57cec5SDimitry Andric	// The exponent of PP_HH is in  EXP1, which is non-positive (0 or negative)
202*0b57cec5SDimitry Andric	// That's the exponent that happens after the normalization
203*0b57cec5SDimitry Andric	//
204*0b57cec5SDimitry Andric	// EXP0 has the exponent that, when added to the normalized value, is out of range.
205*0b57cec5SDimitry Andric	//
206*0b57cec5SDimitry Andric	// Strategy:
207*0b57cec5SDimitry Andric	//
208*0b57cec5SDimitry Andric	// * Shift down bits, with sticky bit, such that the bits are aligned according
209*0b57cec5SDimitry Andric	//   to the LZ count and appropriate exponent, but not all the way to mantissa
210*0b57cec5SDimitry Andric	//   field, keep around the last few bits.
211*0b57cec5SDimitry Andric	// * Put a 1 near the MSB
212*0b57cec5SDimitry Andric	// * Check the LSBs for inexact; if inexact also set underflow
213*0b57cec5SDimitry Andric	// * Convert [u]d2df -- will correctly round according to rounding mode
214*0b57cec5SDimitry Andric	// * Replace exponent field with zero
215*0b57cec5SDimitry Andric
216*0b57cec5SDimitry Andric	{
217*0b57cec5SDimitry Andric		BTMPL = #0	 			// offset for extract
218*0b57cec5SDimitry Andric		BTMPH = sub(#FUDGE,BTMPH)		// amount to right shift
219*0b57cec5SDimitry Andric	}
220*0b57cec5SDimitry Andric	{
221*0b57cec5SDimitry Andric		p3 = cmp.gt(PP_HH_H,#-1)		// is it positive?
222*0b57cec5SDimitry Andric		BTMPH = min(BTMPH,TMP)			// Don't shift more than 63
223*0b57cec5SDimitry Andric		PP_HH = ATMP
224*0b57cec5SDimitry Andric	}
225*0b57cec5SDimitry Andric	{
226*0b57cec5SDimitry Andric		TMP = USR
227*0b57cec5SDimitry Andric		PP_LL = extractu(PP_HH,BTMP)
228*0b57cec5SDimitry Andric	}
229*0b57cec5SDimitry Andric	{
230*0b57cec5SDimitry Andric		PP_HH = asr(PP_HH,BTMPH)
231*0b57cec5SDimitry Andric		BTMPL = #0x0030					// underflow flag
232*0b57cec5SDimitry Andric		AH = insert(S_ZERO,#EXPBITS,#HI_MANTBITS)
233*0b57cec5SDimitry Andric	}
234*0b57cec5SDimitry Andric	{
235*0b57cec5SDimitry Andric		p0 = cmp.gtu(ONE,PP_LL)				// Did we extract all zeros?
236*0b57cec5SDimitry Andric		if (!p0.new) PP_HH_L = or(PP_HH_L,S_ONE)	// add sticky bit
237*0b57cec5SDimitry Andric		PP_HH_H = setbit(PP_HH_H,#HI_MANTBITS+3)	// Add back in a bit so we can use convert instruction
238*0b57cec5SDimitry Andric	}
239*0b57cec5SDimitry Andric	{
240*0b57cec5SDimitry Andric		PP_LL = neg(PP_HH)
241*0b57cec5SDimitry Andric		p1 = bitsclr(PP_HH_L,#0x7)		// Are the LSB's clear?
242*0b57cec5SDimitry Andric		if (!p1.new) TMP = or(BTMPL,TMP)	// If not, Inexact+Underflow
243*0b57cec5SDimitry Andric	}
244*0b57cec5SDimitry Andric	{
245*0b57cec5SDimitry Andric		if (!p3) PP_HH = PP_LL
246*0b57cec5SDimitry Andric		USR = TMP
247*0b57cec5SDimitry Andric	}
248*0b57cec5SDimitry Andric	{
249*0b57cec5SDimitry Andric		A = convert_d2df(PP_HH)			// Do rounding
250*0b57cec5SDimitry Andric		p0 = dfcmp.eq(A,A)			// realize exception
251*0b57cec5SDimitry Andric	}
252*0b57cec5SDimitry Andric	{
253*0b57cec5SDimitry Andric		AH = insert(S_ZERO,#EXPBITS-1,#HI_MANTBITS+1)		// Insert correct exponent
254*0b57cec5SDimitry Andric		jumpr r31
255*0b57cec5SDimitry Andric	}
256*0b57cec5SDimitry Andric	.falign
257*0b57cec5SDimitry Andric.Lmul_ovf:
258*0b57cec5SDimitry Andric	// We get either max finite value or infinity.  Either way, overflow+inexact
259*0b57cec5SDimitry Andric	{
260*0b57cec5SDimitry Andric		TMP = USR
261*0b57cec5SDimitry Andric		ATMP = combine(##0x7fefffff,#-1)	// positive max finite
262*0b57cec5SDimitry Andric		A = PP_HH
263*0b57cec5SDimitry Andric	}
264*0b57cec5SDimitry Andric	{
265*0b57cec5SDimitry Andric		PP_LL_L = extractu(TMP,#2,#SR_ROUND_OFF)	// rounding bits
266*0b57cec5SDimitry Andric		TMP = or(TMP,#0x28)			// inexact + overflow
267*0b57cec5SDimitry Andric		BTMP = combine(##0x7ff00000,#0)		// positive infinity
268*0b57cec5SDimitry Andric	}
269*0b57cec5SDimitry Andric	{
270*0b57cec5SDimitry Andric		USR = TMP
271*0b57cec5SDimitry Andric		PP_LL_L ^= lsr(AH,#31)			// Does sign match rounding?
272*0b57cec5SDimitry Andric		TMP = PP_LL_L				// unmodified rounding mode
273*0b57cec5SDimitry Andric	}
274*0b57cec5SDimitry Andric	{
275*0b57cec5SDimitry Andric		p0 = !cmp.eq(TMP,#1)			// If not round-to-zero and
276*0b57cec5SDimitry Andric		p0 = !cmp.eq(PP_LL_L,#2)		// Not rounding the other way,
277*0b57cec5SDimitry Andric		if (p0.new) ATMP = BTMP			// we should get infinity
278*0b57cec5SDimitry Andric		p0 = dfcmp.eq(A,A)			// Realize FP exception if enabled
279*0b57cec5SDimitry Andric	}
280*0b57cec5SDimitry Andric	{
281*0b57cec5SDimitry Andric		A = insert(ATMP,#63,#0)			// insert inf/maxfinite, leave sign
282*0b57cec5SDimitry Andric		jumpr r31
283*0b57cec5SDimitry Andric	}
284*0b57cec5SDimitry Andric
285*0b57cec5SDimitry Andric.Lmul_abnormal:
286*0b57cec5SDimitry Andric	{
287*0b57cec5SDimitry Andric		ATMP = extractu(A,#63,#0)		// strip off sign
288*0b57cec5SDimitry Andric		BTMP = extractu(B,#63,#0)		// strip off sign
289*0b57cec5SDimitry Andric	}
290*0b57cec5SDimitry Andric	{
291*0b57cec5SDimitry Andric		p3 = cmp.gtu(ATMP,BTMP)
292*0b57cec5SDimitry Andric		if (!p3.new) A = B			// sort values
293*0b57cec5SDimitry Andric		if (!p3.new) B = A			// sort values
294*0b57cec5SDimitry Andric	}
295*0b57cec5SDimitry Andric	{
296*0b57cec5SDimitry Andric		// Any NaN --> NaN, possibly raise invalid if sNaN
297*0b57cec5SDimitry Andric		p0 = dfclass(A,#0x0f)		// A not NaN?
298*0b57cec5SDimitry Andric		if (!p0.new) jump:nt .Linvalid_nan
299*0b57cec5SDimitry Andric		if (!p3) ATMP = BTMP
300*0b57cec5SDimitry Andric		if (!p3) BTMP = ATMP
301*0b57cec5SDimitry Andric	}
302*0b57cec5SDimitry Andric	{
303*0b57cec5SDimitry Andric		// Infinity * nonzero number is infinity
304*0b57cec5SDimitry Andric		p1 = dfclass(A,#0x08)		// A is infinity
305*0b57cec5SDimitry Andric		p1 = dfclass(B,#0x0e)		// B is nonzero
306*0b57cec5SDimitry Andric	}
307*0b57cec5SDimitry Andric	{
308*0b57cec5SDimitry Andric		// Infinity * zero --> NaN, raise invalid
309*0b57cec5SDimitry Andric		// Other zeros return zero
310*0b57cec5SDimitry Andric		p0 = dfclass(A,#0x08)		// A is infinity
311*0b57cec5SDimitry Andric		p0 = dfclass(B,#0x01)		// B is zero
312*0b57cec5SDimitry Andric	}
313*0b57cec5SDimitry Andric	{
314*0b57cec5SDimitry Andric		if (p1) jump .Ltrue_inf
315*0b57cec5SDimitry Andric		p2 = dfclass(B,#0x01)
316*0b57cec5SDimitry Andric	}
317*0b57cec5SDimitry Andric	{
318*0b57cec5SDimitry Andric		if (p0) jump .Linvalid_zeroinf
319*0b57cec5SDimitry Andric		if (p2) jump .Ltrue_zero		// so return zero
320*0b57cec5SDimitry Andric		TMP = ##0x7c000000
321*0b57cec5SDimitry Andric	}
322*0b57cec5SDimitry Andric	// We are left with a normal or subnormal times a subnormal. A > B
323*0b57cec5SDimitry Andric	// If A and B are both very small (exp(a) < BIAS-MANTBITS),
324*0b57cec5SDimitry Andric	// we go to a single sticky bit, which we can round easily.
325*0b57cec5SDimitry Andric	// If A and B might multiply to something bigger, decrease A exponent and increase
326*0b57cec5SDimitry Andric	// B exponent and try again
327*0b57cec5SDimitry Andric	{
328*0b57cec5SDimitry Andric		p0 = bitsclr(AH,TMP)
329*0b57cec5SDimitry Andric		if (p0.new) jump:nt .Lmul_tiny
330*0b57cec5SDimitry Andric	}
331*0b57cec5SDimitry Andric	{
332*0b57cec5SDimitry Andric		TMP = cl0(BTMP)
333*0b57cec5SDimitry Andric	}
334*0b57cec5SDimitry Andric	{
335*0b57cec5SDimitry Andric		TMP = add(TMP,#-EXPBITS)
336*0b57cec5SDimitry Andric	}
337*0b57cec5SDimitry Andric	{
338*0b57cec5SDimitry Andric		BTMP = asl(BTMP,TMP)
339*0b57cec5SDimitry Andric	}
340*0b57cec5SDimitry Andric	{
341*0b57cec5SDimitry Andric		B = insert(BTMP,#63,#0)
342*0b57cec5SDimitry Andric		AH -= asl(TMP,#HI_MANTBITS)
343*0b57cec5SDimitry Andric	}
344*0b57cec5SDimitry Andric	jump __hexagon_muldf3
345*0b57cec5SDimitry Andric.Lmul_tiny:
346*0b57cec5SDimitry Andric	{
347*0b57cec5SDimitry Andric		TMP = USR
348*0b57cec5SDimitry Andric		A = xor(A,B)				// get sign bit
349*0b57cec5SDimitry Andric	}
350*0b57cec5SDimitry Andric	{
351*0b57cec5SDimitry Andric		TMP = or(TMP,#0x30)			// Inexact + Underflow
352*0b57cec5SDimitry Andric		A = insert(ONE,#63,#0)			// put in rounded up value
353*0b57cec5SDimitry Andric		BTMPH = extractu(TMP,#2,#SR_ROUND_OFF)	// get rounding mode
354*0b57cec5SDimitry Andric	}
355*0b57cec5SDimitry Andric	{
356*0b57cec5SDimitry Andric		USR = TMP
357*0b57cec5SDimitry Andric		p0 = cmp.gt(BTMPH,#1)			// Round towards pos/neg inf?
358*0b57cec5SDimitry Andric		if (!p0.new) AL = #0			// If not, zero
359*0b57cec5SDimitry Andric		BTMPH ^= lsr(AH,#31)			// rounding my way --> set LSB
360*0b57cec5SDimitry Andric	}
361*0b57cec5SDimitry Andric	{
362*0b57cec5SDimitry Andric		p0 = cmp.eq(BTMPH,#3)			// if rounding towards right inf
363*0b57cec5SDimitry Andric		if (!p0.new) AL = #0			// don't go to zero
364*0b57cec5SDimitry Andric		jumpr r31
365*0b57cec5SDimitry Andric	}
366*0b57cec5SDimitry Andric.Linvalid_zeroinf:
367*0b57cec5SDimitry Andric	{
368*0b57cec5SDimitry Andric		TMP = USR
369*0b57cec5SDimitry Andric	}
370*0b57cec5SDimitry Andric	{
371*0b57cec5SDimitry Andric		A = #-1
372*0b57cec5SDimitry Andric		TMP = or(TMP,#2)
373*0b57cec5SDimitry Andric	}
374*0b57cec5SDimitry Andric	{
375*0b57cec5SDimitry Andric		USR = TMP
376*0b57cec5SDimitry Andric	}
377*0b57cec5SDimitry Andric	{
378*0b57cec5SDimitry Andric		p0 = dfcmp.uo(A,A)			// force exception if enabled
379*0b57cec5SDimitry Andric		jumpr r31
380*0b57cec5SDimitry Andric	}
381*0b57cec5SDimitry Andric.Linvalid_nan:
382*0b57cec5SDimitry Andric	{
383*0b57cec5SDimitry Andric		p0 = dfclass(B,#0x0f)			// if B is not NaN
384*0b57cec5SDimitry Andric		TMP = convert_df2sf(A)			// will generate invalid if sNaN
385*0b57cec5SDimitry Andric		if (p0.new) B = A 			// make it whatever A is
386*0b57cec5SDimitry Andric	}
387*0b57cec5SDimitry Andric	{
388*0b57cec5SDimitry Andric		BL = convert_df2sf(B)			// will generate invalid if sNaN
389*0b57cec5SDimitry Andric		A = #-1
390*0b57cec5SDimitry Andric		jumpr r31
391*0b57cec5SDimitry Andric	}
392*0b57cec5SDimitry Andric	.falign
393*0b57cec5SDimitry Andric.Ltrue_zero:
394*0b57cec5SDimitry Andric	{
395*0b57cec5SDimitry Andric		A = B
396*0b57cec5SDimitry Andric		B = A
397*0b57cec5SDimitry Andric	}
398*0b57cec5SDimitry Andric.Ltrue_inf:
399*0b57cec5SDimitry Andric	{
400*0b57cec5SDimitry Andric		BH = extract(BH,#1,#31)
401*0b57cec5SDimitry Andric	}
402*0b57cec5SDimitry Andric	{
403*0b57cec5SDimitry Andric		AH ^= asl(BH,#31)
404*0b57cec5SDimitry Andric		jumpr r31
405*0b57cec5SDimitry Andric	}
406*0b57cec5SDimitry AndricEND(__hexagon_muldf3)
407*0b57cec5SDimitry Andric
408*0b57cec5SDimitry Andric#undef ATMP
409*0b57cec5SDimitry Andric#undef ATMPL
410*0b57cec5SDimitry Andric#undef ATMPH
411*0b57cec5SDimitry Andric#undef BTMP
412*0b57cec5SDimitry Andric#undef BTMPL
413*0b57cec5SDimitry Andric#undef BTMPH
414