xref: /freebsd-src/contrib/llvm-project/compiler-rt/lib/builtins/hexagon/dffma.S (revision e25152834cdf3b353892835a4f3b157e066a8ed4)
10b57cec5SDimitry Andric//===----------------------Hexagon builtin routine ------------------------===//
20b57cec5SDimitry Andric//
30b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric//
70b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric
90b57cec5SDimitry Andric#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
100b57cec5SDimitry Andric#define END(TAG) .size TAG,.-TAG
110b57cec5SDimitry Andric
120b57cec5SDimitry Andric// Double Precision Multiply
130b57cec5SDimitry Andric
140b57cec5SDimitry Andric
150b57cec5SDimitry Andric#define A r1:0
160b57cec5SDimitry Andric#define AH r1
170b57cec5SDimitry Andric#define AL r0
180b57cec5SDimitry Andric#define B r3:2
190b57cec5SDimitry Andric#define BH r3
200b57cec5SDimitry Andric#define BL r2
210b57cec5SDimitry Andric#define C r5:4
220b57cec5SDimitry Andric#define CH r5
230b57cec5SDimitry Andric#define CL r4
240b57cec5SDimitry Andric
250b57cec5SDimitry Andric
260b57cec5SDimitry Andric
270b57cec5SDimitry Andric#define BTMP r15:14
280b57cec5SDimitry Andric#define BTMPH r15
290b57cec5SDimitry Andric#define BTMPL r14
300b57cec5SDimitry Andric
310b57cec5SDimitry Andric#define ATMP r13:12
320b57cec5SDimitry Andric#define ATMPH r13
330b57cec5SDimitry Andric#define ATMPL r12
340b57cec5SDimitry Andric
350b57cec5SDimitry Andric#define CTMP r11:10
360b57cec5SDimitry Andric#define CTMPH r11
370b57cec5SDimitry Andric#define CTMPL r10
380b57cec5SDimitry Andric
390b57cec5SDimitry Andric#define PP_LL r9:8
400b57cec5SDimitry Andric#define PP_LL_H r9
410b57cec5SDimitry Andric#define PP_LL_L r8
420b57cec5SDimitry Andric
430b57cec5SDimitry Andric#define PP_ODD r7:6
440b57cec5SDimitry Andric#define PP_ODD_H r7
450b57cec5SDimitry Andric#define PP_ODD_L r6
460b57cec5SDimitry Andric
470b57cec5SDimitry Andric
480b57cec5SDimitry Andric#define PP_HH r17:16
490b57cec5SDimitry Andric#define PP_HH_H r17
500b57cec5SDimitry Andric#define PP_HH_L r16
510b57cec5SDimitry Andric
520b57cec5SDimitry Andric#define EXPA r18
530b57cec5SDimitry Andric#define EXPB r19
540b57cec5SDimitry Andric#define EXPBA r19:18
550b57cec5SDimitry Andric
560b57cec5SDimitry Andric#define TMP r28
570b57cec5SDimitry Andric
580b57cec5SDimitry Andric#define P_TMP p0
590b57cec5SDimitry Andric#define PROD_NEG p3
600b57cec5SDimitry Andric#define EXACT p2
610b57cec5SDimitry Andric#define SWAP p1
620b57cec5SDimitry Andric
630b57cec5SDimitry Andric#define MANTBITS 52
640b57cec5SDimitry Andric#define HI_MANTBITS 20
650b57cec5SDimitry Andric#define EXPBITS 11
660b57cec5SDimitry Andric#define BIAS 1023
670b57cec5SDimitry Andric#define STACKSPACE 32
680b57cec5SDimitry Andric
690b57cec5SDimitry Andric#define ADJUST 4
700b57cec5SDimitry Andric
710b57cec5SDimitry Andric#define FUDGE 7
720b57cec5SDimitry Andric#define FUDGE2 3
730b57cec5SDimitry Andric
740b57cec5SDimitry Andric#ifndef SR_ROUND_OFF
750b57cec5SDimitry Andric#define SR_ROUND_OFF 22
760b57cec5SDimitry Andric#endif
770b57cec5SDimitry Andric
780b57cec5SDimitry Andric	// First, classify for normal values, and abort if abnormal
790b57cec5SDimitry Andric	//
800b57cec5SDimitry Andric	// Next, unpack mantissa into 0x1000_0000_0000_0000 + mant<<8
810b57cec5SDimitry Andric	//
820b57cec5SDimitry Andric	// Since we know that the 2 MSBs of the H registers is zero, we should never carry
830b57cec5SDimitry Andric	// the partial products that involve the H registers
840b57cec5SDimitry Andric	//
850b57cec5SDimitry Andric	// Try to buy X slots, at the expense of latency if needed
860b57cec5SDimitry Andric	//
870b57cec5SDimitry Andric	// We will have PP_HH with the upper bits of the product, PP_LL with the lower
880b57cec5SDimitry Andric	// PP_HH can have a maximum of 0x03FF_FFFF_FFFF_FFFF or thereabouts
890b57cec5SDimitry Andric	// PP_HH can have a minimum of 0x0100_0000_0000_0000
900b57cec5SDimitry Andric	//
910b57cec5SDimitry Andric	// 0x0100_0000_0000_0000 has EXP of EXPA+EXPB-BIAS
920b57cec5SDimitry Andric	//
930b57cec5SDimitry Andric	// We need to align CTMP.
940b57cec5SDimitry Andric	// If CTMP >> PP, convert PP to 64 bit with sticky, align CTMP, and follow normal add
950b57cec5SDimitry Andric	// If CTMP << PP align CTMP and add 128 bits.  Then compute sticky
960b57cec5SDimitry Andric	// If CTMP ~= PP, align CTMP and add 128 bits.  May have massive cancellation.
970b57cec5SDimitry Andric	//
980b57cec5SDimitry Andric	// Convert partial product and CTMP to 2's complement prior to addition
990b57cec5SDimitry Andric	//
1000b57cec5SDimitry Andric	// After we add, we need to normalize into upper 64 bits, then compute sticky.
1010b57cec5SDimitry Andric
1020b57cec5SDimitry Andric	.text
1030b57cec5SDimitry Andric	.global __hexagon_fmadf4
1040b57cec5SDimitry Andric        .type __hexagon_fmadf4,@function
1050b57cec5SDimitry Andric	.global __hexagon_fmadf5
1060b57cec5SDimitry Andric        .type __hexagon_fmadf5,@function
1070b57cec5SDimitry Andric	Q6_ALIAS(fmadf5)
1080b57cec5SDimitry Andric	.p2align 5
1090b57cec5SDimitry Andric__hexagon_fmadf4:
1100b57cec5SDimitry Andric__hexagon_fmadf5:
111*5ffd83dbSDimitry Andric.Lfma_begin:
1120b57cec5SDimitry Andric	{
1130b57cec5SDimitry Andric		P_TMP = dfclass(A,#2)
1140b57cec5SDimitry Andric		P_TMP = dfclass(B,#2)
1150b57cec5SDimitry Andric		ATMP = #0
1160b57cec5SDimitry Andric		BTMP = #0
1170b57cec5SDimitry Andric	}
1180b57cec5SDimitry Andric	{
1190b57cec5SDimitry Andric		ATMP = insert(A,#MANTBITS,#EXPBITS-3)
1200b57cec5SDimitry Andric		BTMP = insert(B,#MANTBITS,#EXPBITS-3)
1210b57cec5SDimitry Andric		PP_ODD_H = ##0x10000000
1220b57cec5SDimitry Andric		allocframe(#STACKSPACE)
1230b57cec5SDimitry Andric	}
1240b57cec5SDimitry Andric	{
1250b57cec5SDimitry Andric		PP_LL = mpyu(ATMPL,BTMPL)
1260b57cec5SDimitry Andric		if (!P_TMP) jump .Lfma_abnormal_ab
1270b57cec5SDimitry Andric		ATMPH = or(ATMPH,PP_ODD_H)
1280b57cec5SDimitry Andric		BTMPH = or(BTMPH,PP_ODD_H)
1290b57cec5SDimitry Andric	}
1300b57cec5SDimitry Andric	{
1310b57cec5SDimitry Andric		P_TMP = dfclass(C,#2)
1320b57cec5SDimitry Andric		if (!P_TMP.new) jump:nt .Lfma_abnormal_c
1330b57cec5SDimitry Andric		CTMP = combine(PP_ODD_H,#0)
1340b57cec5SDimitry Andric		PP_ODD = combine(#0,PP_LL_H)
1350b57cec5SDimitry Andric	}
1360b57cec5SDimitry Andric.Lfma_abnormal_c_restart:
1370b57cec5SDimitry Andric	{
1380b57cec5SDimitry Andric		PP_ODD += mpyu(BTMPL,ATMPH)
1390b57cec5SDimitry Andric		CTMP = insert(C,#MANTBITS,#EXPBITS-3)
1400b57cec5SDimitry Andric		memd(r29+#0) = PP_HH
1410b57cec5SDimitry Andric		memd(r29+#8) = EXPBA
1420b57cec5SDimitry Andric	}
1430b57cec5SDimitry Andric	{
1440b57cec5SDimitry Andric		PP_ODD += mpyu(ATMPL,BTMPH)
1450b57cec5SDimitry Andric		EXPBA = neg(CTMP)
1460b57cec5SDimitry Andric		P_TMP = cmp.gt(CH,#-1)
1470b57cec5SDimitry Andric		TMP = xor(AH,BH)
1480b57cec5SDimitry Andric	}
1490b57cec5SDimitry Andric	{
1500b57cec5SDimitry Andric		EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
1510b57cec5SDimitry Andric		EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS)
1520b57cec5SDimitry Andric		PP_HH = combine(#0,PP_ODD_H)
1530b57cec5SDimitry Andric		if (!P_TMP) CTMP = EXPBA
1540b57cec5SDimitry Andric	}
1550b57cec5SDimitry Andric	{
1560b57cec5SDimitry Andric		PP_HH += mpyu(ATMPH,BTMPH)
1570b57cec5SDimitry Andric		PP_LL = combine(PP_ODD_L,PP_LL_L)
1580b57cec5SDimitry Andric#undef PP_ODD
1590b57cec5SDimitry Andric#undef PP_ODD_H
1600b57cec5SDimitry Andric#undef PP_ODD_L
1610b57cec5SDimitry Andric#undef ATMP
1620b57cec5SDimitry Andric#undef ATMPL
1630b57cec5SDimitry Andric#undef ATMPH
1640b57cec5SDimitry Andric#undef BTMP
1650b57cec5SDimitry Andric#undef BTMPL
1660b57cec5SDimitry Andric#undef BTMPH
1670b57cec5SDimitry Andric#define RIGHTLEFTSHIFT r13:12
1680b57cec5SDimitry Andric#define RIGHTSHIFT r13
1690b57cec5SDimitry Andric#define LEFTSHIFT r12
1700b57cec5SDimitry Andric
1710b57cec5SDimitry Andric		EXPA = add(EXPA,EXPB)
1720b57cec5SDimitry Andric#undef EXPB
1730b57cec5SDimitry Andric#undef EXPBA
1740b57cec5SDimitry Andric#define EXPC r19
1750b57cec5SDimitry Andric#define EXPCA r19:18
1760b57cec5SDimitry Andric		EXPC = extractu(CH,#EXPBITS,#HI_MANTBITS)
1770b57cec5SDimitry Andric	}
1780b57cec5SDimitry Andric	// PP_HH:PP_LL now has product
1790b57cec5SDimitry Andric	// CTMP is negated
1800b57cec5SDimitry Andric	// EXPA,B,C are extracted
1810b57cec5SDimitry Andric	// We need to negate PP
1820b57cec5SDimitry Andric	// Since we will be adding with carry later, if we need to negate,
1830b57cec5SDimitry Andric	// just invert all bits now, which we can do conditionally and in parallel
1840b57cec5SDimitry Andric#define PP_HH_TMP r15:14
1850b57cec5SDimitry Andric#define PP_LL_TMP r7:6
1860b57cec5SDimitry Andric	{
1870b57cec5SDimitry Andric		EXPA = add(EXPA,#-BIAS+(ADJUST))
1880b57cec5SDimitry Andric		PROD_NEG = !cmp.gt(TMP,#-1)
1890b57cec5SDimitry Andric		PP_LL_TMP = #0
1900b57cec5SDimitry Andric		PP_HH_TMP = #0
1910b57cec5SDimitry Andric	}
1920b57cec5SDimitry Andric	{
1930b57cec5SDimitry Andric		PP_LL_TMP = sub(PP_LL_TMP,PP_LL,PROD_NEG):carry
1940b57cec5SDimitry Andric		P_TMP = !cmp.gt(TMP,#-1)
1950b57cec5SDimitry Andric		SWAP = cmp.gt(EXPC,EXPA)	// If C >> PP
1960b57cec5SDimitry Andric		if (SWAP.new) EXPCA = combine(EXPA,EXPC)
1970b57cec5SDimitry Andric	}
1980b57cec5SDimitry Andric	{
1990b57cec5SDimitry Andric		PP_HH_TMP = sub(PP_HH_TMP,PP_HH,PROD_NEG):carry
2000b57cec5SDimitry Andric		if (P_TMP) PP_LL = PP_LL_TMP
2010b57cec5SDimitry Andric#undef PP_LL_TMP
2020b57cec5SDimitry Andric#define CTMP2 r7:6
2030b57cec5SDimitry Andric#define CTMP2H r7
2040b57cec5SDimitry Andric#define CTMP2L r6
2050b57cec5SDimitry Andric		CTMP2 = #0
2060b57cec5SDimitry Andric		EXPC = sub(EXPA,EXPC)
2070b57cec5SDimitry Andric	}
2080b57cec5SDimitry Andric	{
2090b57cec5SDimitry Andric		if (P_TMP) PP_HH = PP_HH_TMP
2100b57cec5SDimitry Andric		P_TMP = cmp.gt(EXPC,#63)
2110b57cec5SDimitry Andric		if (SWAP) PP_LL = CTMP2
2120b57cec5SDimitry Andric		if (SWAP) CTMP2 = PP_LL
2130b57cec5SDimitry Andric	}
2140b57cec5SDimitry Andric#undef PP_HH_TMP
2150b57cec5SDimitry Andric//#define ONE r15:14
2160b57cec5SDimitry Andric//#define S_ONE r14
2170b57cec5SDimitry Andric#define ZERO r15:14
2180b57cec5SDimitry Andric#define S_ZERO r15
2190b57cec5SDimitry Andric#undef PROD_NEG
2200b57cec5SDimitry Andric#define P_CARRY p3
2210b57cec5SDimitry Andric	{
2220b57cec5SDimitry Andric		if (SWAP) PP_HH = CTMP	// Swap C and PP
2230b57cec5SDimitry Andric		if (SWAP) CTMP = PP_HH
2240b57cec5SDimitry Andric		if (P_TMP) EXPC = add(EXPC,#-64)
2250b57cec5SDimitry Andric		TMP = #63
2260b57cec5SDimitry Andric	}
2270b57cec5SDimitry Andric	{
2280b57cec5SDimitry Andric		// If diff > 63, pre-shift-right by 64...
2290b57cec5SDimitry Andric		if (P_TMP) CTMP2 = CTMP
2300b57cec5SDimitry Andric		TMP = asr(CTMPH,#31)
2310b57cec5SDimitry Andric		RIGHTSHIFT = min(EXPC,TMP)
2320b57cec5SDimitry Andric		LEFTSHIFT = #0
2330b57cec5SDimitry Andric	}
2340b57cec5SDimitry Andric#undef C
2350b57cec5SDimitry Andric#undef CH
2360b57cec5SDimitry Andric#undef CL
2370b57cec5SDimitry Andric#define STICKIES r5:4
2380b57cec5SDimitry Andric#define STICKIESH r5
2390b57cec5SDimitry Andric#define STICKIESL r4
2400b57cec5SDimitry Andric	{
2410b57cec5SDimitry Andric		if (P_TMP) CTMP = combine(TMP,TMP)	// sign extension of pre-shift-right-64
2420b57cec5SDimitry Andric		STICKIES = extract(CTMP2,RIGHTLEFTSHIFT)
2430b57cec5SDimitry Andric		CTMP2 = lsr(CTMP2,RIGHTSHIFT)
2440b57cec5SDimitry Andric		LEFTSHIFT = sub(#64,RIGHTSHIFT)
2450b57cec5SDimitry Andric	}
2460b57cec5SDimitry Andric	{
2470b57cec5SDimitry Andric		ZERO = #0
2480b57cec5SDimitry Andric		TMP = #-2
2490b57cec5SDimitry Andric		CTMP2 |= lsl(CTMP,LEFTSHIFT)
2500b57cec5SDimitry Andric		CTMP = asr(CTMP,RIGHTSHIFT)
2510b57cec5SDimitry Andric	}
2520b57cec5SDimitry Andric	{
2530b57cec5SDimitry Andric		P_CARRY = cmp.gtu(STICKIES,ZERO)	// If we have sticky bits from C shift
2540b57cec5SDimitry Andric		if (P_CARRY.new) CTMP2L = and(CTMP2L,TMP) // make sure adding 1 == OR
2550b57cec5SDimitry Andric#undef ZERO
2560b57cec5SDimitry Andric#define ONE r15:14
2570b57cec5SDimitry Andric#define S_ONE r14
2580b57cec5SDimitry Andric		ONE = #1
2590b57cec5SDimitry Andric		STICKIES = #0
2600b57cec5SDimitry Andric	}
2610b57cec5SDimitry Andric	{
2620b57cec5SDimitry Andric		PP_LL = add(CTMP2,PP_LL,P_CARRY):carry	// use the carry to add the sticky
2630b57cec5SDimitry Andric	}
2640b57cec5SDimitry Andric	{
2650b57cec5SDimitry Andric		PP_HH = add(CTMP,PP_HH,P_CARRY):carry
2660b57cec5SDimitry Andric		TMP = #62
2670b57cec5SDimitry Andric	}
2680b57cec5SDimitry Andric	// PP_HH:PP_LL now holds the sum
2690b57cec5SDimitry Andric	// We may need to normalize left, up to ??? bits.
2700b57cec5SDimitry Andric	//
2710b57cec5SDimitry Andric	// I think that if we have massive cancellation, the range we normalize by
2720b57cec5SDimitry Andric	// is still limited
2730b57cec5SDimitry Andric	{
2740b57cec5SDimitry Andric		LEFTSHIFT = add(clb(PP_HH),#-2)
2750b57cec5SDimitry Andric		if (!cmp.eq(LEFTSHIFT.new,TMP)) jump:t 1f	// all sign bits?
2760b57cec5SDimitry Andric	}
2770b57cec5SDimitry Andric	// We had all sign bits, shift left by 62.
2780b57cec5SDimitry Andric	{
2790b57cec5SDimitry Andric		CTMP = extractu(PP_LL,#62,#2)
2800b57cec5SDimitry Andric		PP_LL = asl(PP_LL,#62)
2810b57cec5SDimitry Andric		EXPA = add(EXPA,#-62)			// And adjust exponent of result
2820b57cec5SDimitry Andric	}
2830b57cec5SDimitry Andric	{
2840b57cec5SDimitry Andric		PP_HH = insert(CTMP,#62,#0)		// Then shift 63
2850b57cec5SDimitry Andric	}
2860b57cec5SDimitry Andric	{
2870b57cec5SDimitry Andric		LEFTSHIFT = add(clb(PP_HH),#-2)
2880b57cec5SDimitry Andric	}
2890b57cec5SDimitry Andric	.falign
2900b57cec5SDimitry Andric1:
2910b57cec5SDimitry Andric	{
2920b57cec5SDimitry Andric		CTMP = asl(PP_HH,LEFTSHIFT)
2930b57cec5SDimitry Andric		STICKIES |= asl(PP_LL,LEFTSHIFT)
2940b57cec5SDimitry Andric		RIGHTSHIFT = sub(#64,LEFTSHIFT)
2950b57cec5SDimitry Andric		EXPA = sub(EXPA,LEFTSHIFT)
2960b57cec5SDimitry Andric	}
2970b57cec5SDimitry Andric	{
2980b57cec5SDimitry Andric		CTMP |= lsr(PP_LL,RIGHTSHIFT)
2990b57cec5SDimitry Andric		EXACT = cmp.gtu(ONE,STICKIES)
3000b57cec5SDimitry Andric		TMP = #BIAS+BIAS-2
3010b57cec5SDimitry Andric	}
3020b57cec5SDimitry Andric	{
3030b57cec5SDimitry Andric		if (!EXACT) CTMPL = or(CTMPL,S_ONE)
3040b57cec5SDimitry Andric		// If EXPA is overflow/underflow, jump to ovf_unf
3050b57cec5SDimitry Andric		P_TMP = !cmp.gt(EXPA,TMP)
3060b57cec5SDimitry Andric		P_TMP = cmp.gt(EXPA,#1)
3070b57cec5SDimitry Andric		if (!P_TMP.new) jump:nt .Lfma_ovf_unf
3080b57cec5SDimitry Andric	}
3090b57cec5SDimitry Andric	{
3100b57cec5SDimitry Andric		// XXX: FIXME: should PP_HH for check of zero be CTMP?
3110b57cec5SDimitry Andric		P_TMP = cmp.gtu(ONE,CTMP)		// is result true zero?
3120b57cec5SDimitry Andric		A = convert_d2df(CTMP)
3130b57cec5SDimitry Andric		EXPA = add(EXPA,#-BIAS-60)
3140b57cec5SDimitry Andric		PP_HH = memd(r29+#0)
3150b57cec5SDimitry Andric	}
3160b57cec5SDimitry Andric	{
3170b57cec5SDimitry Andric		AH += asl(EXPA,#HI_MANTBITS)
3180b57cec5SDimitry Andric		EXPCA = memd(r29+#8)
3190b57cec5SDimitry Andric		if (!P_TMP) dealloc_return		// not zero, return
3200b57cec5SDimitry Andric	}
3210b57cec5SDimitry Andric.Ladd_yields_zero:
3220b57cec5SDimitry Andric	// We had full cancellation.  Return +/- zero (-0 when round-down)
3230b57cec5SDimitry Andric	{
3240b57cec5SDimitry Andric		TMP = USR
3250b57cec5SDimitry Andric		A = #0
3260b57cec5SDimitry Andric	}
3270b57cec5SDimitry Andric	{
3280b57cec5SDimitry Andric		TMP = extractu(TMP,#2,#SR_ROUND_OFF)
3290b57cec5SDimitry Andric		PP_HH = memd(r29+#0)
3300b57cec5SDimitry Andric		EXPCA = memd(r29+#8)
3310b57cec5SDimitry Andric	}
3320b57cec5SDimitry Andric	{
3330b57cec5SDimitry Andric		p0 = cmp.eq(TMP,#2)
3340b57cec5SDimitry Andric		if (p0.new) AH = ##0x80000000
3350b57cec5SDimitry Andric		dealloc_return
3360b57cec5SDimitry Andric	}
3370b57cec5SDimitry Andric
3380b57cec5SDimitry Andric#undef RIGHTLEFTSHIFT
3390b57cec5SDimitry Andric#undef RIGHTSHIFT
3400b57cec5SDimitry Andric#undef LEFTSHIFT
3410b57cec5SDimitry Andric#undef CTMP2
3420b57cec5SDimitry Andric#undef CTMP2H
3430b57cec5SDimitry Andric#undef CTMP2L
3440b57cec5SDimitry Andric
3450b57cec5SDimitry Andric.Lfma_ovf_unf:
3460b57cec5SDimitry Andric	{
3470b57cec5SDimitry Andric		p0 = cmp.gtu(ONE,CTMP)
3480b57cec5SDimitry Andric		if (p0.new) jump:nt .Ladd_yields_zero
3490b57cec5SDimitry Andric	}
3500b57cec5SDimitry Andric	{
3510b57cec5SDimitry Andric		A = convert_d2df(CTMP)
3520b57cec5SDimitry Andric		EXPA = add(EXPA,#-BIAS-60)
3530b57cec5SDimitry Andric		TMP = EXPA
3540b57cec5SDimitry Andric	}
3550b57cec5SDimitry Andric#define NEW_EXPB r7
3560b57cec5SDimitry Andric#define NEW_EXPA r6
3570b57cec5SDimitry Andric	{
3580b57cec5SDimitry Andric		AH += asl(EXPA,#HI_MANTBITS)
3590b57cec5SDimitry Andric		NEW_EXPB = extractu(AH,#EXPBITS,#HI_MANTBITS)
3600b57cec5SDimitry Andric	}
3610b57cec5SDimitry Andric	{
3620b57cec5SDimitry Andric		NEW_EXPA = add(EXPA,NEW_EXPB)
3630b57cec5SDimitry Andric		PP_HH = memd(r29+#0)
3640b57cec5SDimitry Andric		EXPCA = memd(r29+#8)
3650b57cec5SDimitry Andric#undef PP_HH
3660b57cec5SDimitry Andric#undef PP_HH_H
3670b57cec5SDimitry Andric#undef PP_HH_L
3680b57cec5SDimitry Andric#undef EXPCA
3690b57cec5SDimitry Andric#undef EXPC
3700b57cec5SDimitry Andric#undef EXPA
3710b57cec5SDimitry Andric#undef PP_LL
3720b57cec5SDimitry Andric#undef PP_LL_H
3730b57cec5SDimitry Andric#undef PP_LL_L
3740b57cec5SDimitry Andric#define EXPA r6
3750b57cec5SDimitry Andric#define EXPB r7
3760b57cec5SDimitry Andric#define EXPBA r7:6
3770b57cec5SDimitry Andric#define ATMP r9:8
3780b57cec5SDimitry Andric#define ATMPH r9
3790b57cec5SDimitry Andric#define ATMPL r8
3800b57cec5SDimitry Andric#undef NEW_EXPB
3810b57cec5SDimitry Andric#undef NEW_EXPA
3820b57cec5SDimitry Andric		ATMP = abs(CTMP)
3830b57cec5SDimitry Andric	}
3840b57cec5SDimitry Andric	{
3850b57cec5SDimitry Andric		p0 = cmp.gt(EXPA,##BIAS+BIAS)
3860b57cec5SDimitry Andric		if (p0.new) jump:nt .Lfma_ovf
3870b57cec5SDimitry Andric	}
3880b57cec5SDimitry Andric	{
3890b57cec5SDimitry Andric		p0 = cmp.gt(EXPA,#0)
3900b57cec5SDimitry Andric		if (p0.new) jump:nt .Lpossible_unf
3910b57cec5SDimitry Andric	}
3920b57cec5SDimitry Andric	{
3930b57cec5SDimitry Andric		// TMP has original EXPA.
3940b57cec5SDimitry Andric		// ATMP is corresponding value
3950b57cec5SDimitry Andric		// Normalize ATMP and shift right to correct location
3960b57cec5SDimitry Andric		EXPB = add(clb(ATMP),#-2)		// Amount to left shift to normalize
3970b57cec5SDimitry Andric		EXPA = sub(#1+5,TMP)			// Amount to right shift to denormalize
3980b57cec5SDimitry Andric		p3 = cmp.gt(CTMPH,#-1)
3990b57cec5SDimitry Andric	}
4000b57cec5SDimitry Andric	// Underflow
4010b57cec5SDimitry Andric	// We know that the infinte range exponent should be EXPA
4020b57cec5SDimitry Andric	// CTMP is 2's complement, ATMP is abs(CTMP)
4030b57cec5SDimitry Andric	{
4040b57cec5SDimitry Andric		EXPA = add(EXPA,EXPB)		// how much to shift back right
4050b57cec5SDimitry Andric		ATMP = asl(ATMP,EXPB)		// shift left
4060b57cec5SDimitry Andric		AH = USR
4070b57cec5SDimitry Andric		TMP = #63
4080b57cec5SDimitry Andric	}
4090b57cec5SDimitry Andric	{
4100b57cec5SDimitry Andric		EXPB = min(EXPA,TMP)
4110b57cec5SDimitry Andric		EXPA = #0
4120b57cec5SDimitry Andric		AL = #0x0030
4130b57cec5SDimitry Andric	}
4140b57cec5SDimitry Andric	{
4150b57cec5SDimitry Andric		B = extractu(ATMP,EXPBA)
4160b57cec5SDimitry Andric		ATMP = asr(ATMP,EXPB)
4170b57cec5SDimitry Andric	}
4180b57cec5SDimitry Andric	{
4190b57cec5SDimitry Andric		p0 = cmp.gtu(ONE,B)
4200b57cec5SDimitry Andric		if (!p0.new) ATMPL = or(ATMPL,S_ONE)
4210b57cec5SDimitry Andric		ATMPH = setbit(ATMPH,#HI_MANTBITS+FUDGE2)
4220b57cec5SDimitry Andric	}
4230b57cec5SDimitry Andric	{
4240b57cec5SDimitry Andric		CTMP = neg(ATMP)
4250b57cec5SDimitry Andric		p1 = bitsclr(ATMPL,#(1<<FUDGE2)-1)
4260b57cec5SDimitry Andric		if (!p1.new) AH = or(AH,AL)
4270b57cec5SDimitry Andric		B = #0
4280b57cec5SDimitry Andric	}
4290b57cec5SDimitry Andric	{
4300b57cec5SDimitry Andric		if (p3) CTMP = ATMP
4310b57cec5SDimitry Andric		USR = AH
4320b57cec5SDimitry Andric		TMP = #-BIAS-(MANTBITS+FUDGE2)
4330b57cec5SDimitry Andric	}
4340b57cec5SDimitry Andric	{
4350b57cec5SDimitry Andric		A = convert_d2df(CTMP)
4360b57cec5SDimitry Andric	}
4370b57cec5SDimitry Andric	{
4380b57cec5SDimitry Andric		AH += asl(TMP,#HI_MANTBITS)
4390b57cec5SDimitry Andric		dealloc_return
4400b57cec5SDimitry Andric	}
4410b57cec5SDimitry Andric.Lpossible_unf:
4420b57cec5SDimitry Andric	{
4430b57cec5SDimitry Andric		TMP = ##0x7fefffff
4440b57cec5SDimitry Andric		ATMP = abs(CTMP)
4450b57cec5SDimitry Andric	}
4460b57cec5SDimitry Andric	{
4470b57cec5SDimitry Andric		p0 = cmp.eq(AL,#0)
4480b57cec5SDimitry Andric		p0 = bitsclr(AH,TMP)
4490b57cec5SDimitry Andric		if (!p0.new) dealloc_return:t
4500b57cec5SDimitry Andric		TMP = #0x7fff
4510b57cec5SDimitry Andric	}
4520b57cec5SDimitry Andric	{
4530b57cec5SDimitry Andric		p0 = bitsset(ATMPH,TMP)
4540b57cec5SDimitry Andric		BH = USR
4550b57cec5SDimitry Andric		BL = #0x0030
4560b57cec5SDimitry Andric	}
4570b57cec5SDimitry Andric	{
4580b57cec5SDimitry Andric		if (p0) BH = or(BH,BL)
4590b57cec5SDimitry Andric	}
4600b57cec5SDimitry Andric	{
4610b57cec5SDimitry Andric		USR = BH
4620b57cec5SDimitry Andric	}
4630b57cec5SDimitry Andric	{
4640b57cec5SDimitry Andric		p0 = dfcmp.eq(A,A)
4650b57cec5SDimitry Andric		dealloc_return
4660b57cec5SDimitry Andric	}
4670b57cec5SDimitry Andric.Lfma_ovf:
4680b57cec5SDimitry Andric	{
4690b57cec5SDimitry Andric		TMP = USR
4700b57cec5SDimitry Andric		CTMP = combine(##0x7fefffff,#-1)
4710b57cec5SDimitry Andric		A = CTMP
4720b57cec5SDimitry Andric	}
4730b57cec5SDimitry Andric	{
4740b57cec5SDimitry Andric		ATMP = combine(##0x7ff00000,#0)
4750b57cec5SDimitry Andric		BH = extractu(TMP,#2,#SR_ROUND_OFF)
4760b57cec5SDimitry Andric		TMP = or(TMP,#0x28)
4770b57cec5SDimitry Andric	}
4780b57cec5SDimitry Andric	{
4790b57cec5SDimitry Andric		USR = TMP
4800b57cec5SDimitry Andric		BH ^= lsr(AH,#31)
4810b57cec5SDimitry Andric		BL = BH
4820b57cec5SDimitry Andric	}
4830b57cec5SDimitry Andric	{
4840b57cec5SDimitry Andric		p0 = !cmp.eq(BL,#1)
4850b57cec5SDimitry Andric		p0 = !cmp.eq(BH,#2)
4860b57cec5SDimitry Andric	}
4870b57cec5SDimitry Andric	{
4880b57cec5SDimitry Andric		p0 = dfcmp.eq(ATMP,ATMP)
4890b57cec5SDimitry Andric		if (p0.new) CTMP = ATMP
4900b57cec5SDimitry Andric	}
4910b57cec5SDimitry Andric	{
4920b57cec5SDimitry Andric		A = insert(CTMP,#63,#0)
4930b57cec5SDimitry Andric		dealloc_return
4940b57cec5SDimitry Andric	}
4950b57cec5SDimitry Andric#undef CTMP
4960b57cec5SDimitry Andric#undef CTMPH
4970b57cec5SDimitry Andric#undef CTMPL
4980b57cec5SDimitry Andric#define BTMP r11:10
4990b57cec5SDimitry Andric#define BTMPH r11
5000b57cec5SDimitry Andric#define BTMPL r10
5010b57cec5SDimitry Andric
5020b57cec5SDimitry Andric#undef STICKIES
5030b57cec5SDimitry Andric#undef STICKIESH
5040b57cec5SDimitry Andric#undef STICKIESL
5050b57cec5SDimitry Andric#define C r5:4
5060b57cec5SDimitry Andric#define CH r5
5070b57cec5SDimitry Andric#define CL r4
5080b57cec5SDimitry Andric
5090b57cec5SDimitry Andric.Lfma_abnormal_ab:
5100b57cec5SDimitry Andric	{
5110b57cec5SDimitry Andric		ATMP = extractu(A,#63,#0)
5120b57cec5SDimitry Andric		BTMP = extractu(B,#63,#0)
5130b57cec5SDimitry Andric		deallocframe
5140b57cec5SDimitry Andric	}
5150b57cec5SDimitry Andric	{
5160b57cec5SDimitry Andric		p3 = cmp.gtu(ATMP,BTMP)
5170b57cec5SDimitry Andric		if (!p3.new) A = B		// sort values
5180b57cec5SDimitry Andric		if (!p3.new) B = A
5190b57cec5SDimitry Andric	}
5200b57cec5SDimitry Andric	{
5210b57cec5SDimitry Andric		p0 = dfclass(A,#0x0f)		// A NaN?
5220b57cec5SDimitry Andric		if (!p0.new) jump:nt .Lnan
5230b57cec5SDimitry Andric		if (!p3) ATMP = BTMP
5240b57cec5SDimitry Andric		if (!p3) BTMP = ATMP
5250b57cec5SDimitry Andric	}
5260b57cec5SDimitry Andric	{
5270b57cec5SDimitry Andric		p1 = dfclass(A,#0x08)		// A is infinity
5280b57cec5SDimitry Andric		p1 = dfclass(B,#0x0e)		// B is nonzero
5290b57cec5SDimitry Andric	}
5300b57cec5SDimitry Andric	{
5310b57cec5SDimitry Andric		p0 = dfclass(A,#0x08)		// a is inf
5320b57cec5SDimitry Andric		p0 = dfclass(B,#0x01)		// b is zero
5330b57cec5SDimitry Andric	}
5340b57cec5SDimitry Andric	{
5350b57cec5SDimitry Andric		if (p1) jump .Lab_inf
5360b57cec5SDimitry Andric		p2 = dfclass(B,#0x01)
5370b57cec5SDimitry Andric	}
5380b57cec5SDimitry Andric	{
5390b57cec5SDimitry Andric		if (p0) jump .Linvalid
5400b57cec5SDimitry Andric		if (p2) jump .Lab_true_zero
5410b57cec5SDimitry Andric		TMP = ##0x7c000000
5420b57cec5SDimitry Andric	}
5430b57cec5SDimitry Andric	// We are left with a normal or subnormal times a subnormal, A > B
5440b57cec5SDimitry Andric	// If A and B are both very small, we will go to a single sticky bit; replace
5450b57cec5SDimitry Andric	// A and B lower 63 bits with 0x0010_0000_0000_0000, which yields equivalent results
5460b57cec5SDimitry Andric	// if A and B might multiply to something bigger, decrease A exp and increase B exp
5470b57cec5SDimitry Andric	// and start over
5480b57cec5SDimitry Andric	{
5490b57cec5SDimitry Andric		p0 = bitsclr(AH,TMP)
5500b57cec5SDimitry Andric		if (p0.new) jump:nt .Lfma_ab_tiny
5510b57cec5SDimitry Andric	}
5520b57cec5SDimitry Andric	{
5530b57cec5SDimitry Andric		TMP = add(clb(BTMP),#-EXPBITS)
5540b57cec5SDimitry Andric	}
5550b57cec5SDimitry Andric	{
5560b57cec5SDimitry Andric		BTMP = asl(BTMP,TMP)
5570b57cec5SDimitry Andric	}
5580b57cec5SDimitry Andric	{
5590b57cec5SDimitry Andric		B = insert(BTMP,#63,#0)
5600b57cec5SDimitry Andric		AH -= asl(TMP,#HI_MANTBITS)
5610b57cec5SDimitry Andric	}
562*5ffd83dbSDimitry Andric	jump .Lfma_begin
5630b57cec5SDimitry Andric
5640b57cec5SDimitry Andric.Lfma_ab_tiny:
5650b57cec5SDimitry Andric	ATMP = combine(##0x00100000,#0)
5660b57cec5SDimitry Andric	{
5670b57cec5SDimitry Andric		A = insert(ATMP,#63,#0)
5680b57cec5SDimitry Andric		B = insert(ATMP,#63,#0)
5690b57cec5SDimitry Andric	}
570*5ffd83dbSDimitry Andric	jump .Lfma_begin
5710b57cec5SDimitry Andric
5720b57cec5SDimitry Andric.Lab_inf:
5730b57cec5SDimitry Andric	{
5740b57cec5SDimitry Andric		B = lsr(B,#63)
5750b57cec5SDimitry Andric		p0 = dfclass(C,#0x10)
5760b57cec5SDimitry Andric	}
5770b57cec5SDimitry Andric	{
5780b57cec5SDimitry Andric		A ^= asl(B,#63)
5790b57cec5SDimitry Andric		if (p0) jump .Lnan
5800b57cec5SDimitry Andric	}
5810b57cec5SDimitry Andric	{
5820b57cec5SDimitry Andric		p1 = dfclass(C,#0x08)
5830b57cec5SDimitry Andric		if (p1.new) jump:nt .Lfma_inf_plus_inf
5840b57cec5SDimitry Andric	}
5850b57cec5SDimitry Andric	// A*B is +/- inf, C is finite.  Return A
5860b57cec5SDimitry Andric	{
5870b57cec5SDimitry Andric		jumpr r31
5880b57cec5SDimitry Andric	}
5890b57cec5SDimitry Andric	.falign
5900b57cec5SDimitry Andric.Lfma_inf_plus_inf:
5910b57cec5SDimitry Andric	{	// adding infinities of different signs is invalid
5920b57cec5SDimitry Andric		p0 = dfcmp.eq(A,C)
5930b57cec5SDimitry Andric		if (!p0.new) jump:nt .Linvalid
5940b57cec5SDimitry Andric	}
5950b57cec5SDimitry Andric	{
5960b57cec5SDimitry Andric		jumpr r31
5970b57cec5SDimitry Andric	}
5980b57cec5SDimitry Andric
5990b57cec5SDimitry Andric.Lnan:
6000b57cec5SDimitry Andric	{
6010b57cec5SDimitry Andric		p0 = dfclass(B,#0x10)
6020b57cec5SDimitry Andric		p1 = dfclass(C,#0x10)
6030b57cec5SDimitry Andric		if (!p0.new) B = A
6040b57cec5SDimitry Andric		if (!p1.new) C = A
6050b57cec5SDimitry Andric	}
6060b57cec5SDimitry Andric	{	// find sNaNs
6070b57cec5SDimitry Andric		BH = convert_df2sf(B)
6080b57cec5SDimitry Andric		BL = convert_df2sf(C)
6090b57cec5SDimitry Andric	}
6100b57cec5SDimitry Andric	{
6110b57cec5SDimitry Andric		BH = convert_df2sf(A)
6120b57cec5SDimitry Andric		A = #-1
6130b57cec5SDimitry Andric		jumpr r31
6140b57cec5SDimitry Andric	}
6150b57cec5SDimitry Andric
6160b57cec5SDimitry Andric.Linvalid:
6170b57cec5SDimitry Andric	{
6180b57cec5SDimitry Andric		TMP = ##0x7f800001		// sp snan
6190b57cec5SDimitry Andric	}
6200b57cec5SDimitry Andric	{
6210b57cec5SDimitry Andric		A = convert_sf2df(TMP)
6220b57cec5SDimitry Andric		jumpr r31
6230b57cec5SDimitry Andric	}
6240b57cec5SDimitry Andric
6250b57cec5SDimitry Andric.Lab_true_zero:
6260b57cec5SDimitry Andric	// B is zero, A is finite number
6270b57cec5SDimitry Andric	{
6280b57cec5SDimitry Andric		p0 = dfclass(C,#0x10)
6290b57cec5SDimitry Andric		if (p0.new) jump:nt .Lnan
6300b57cec5SDimitry Andric		if (p0.new) A = C
6310b57cec5SDimitry Andric	}
6320b57cec5SDimitry Andric	{
6330b57cec5SDimitry Andric		p0 = dfcmp.eq(B,C)		// is C also zero?
6340b57cec5SDimitry Andric		AH = lsr(AH,#31)		// get sign
6350b57cec5SDimitry Andric	}
6360b57cec5SDimitry Andric	{
6370b57cec5SDimitry Andric		BH ^= asl(AH,#31)		// form correctly signed zero in B
6380b57cec5SDimitry Andric		if (!p0) A = C			// If C is not zero, return C
6390b57cec5SDimitry Andric		if (!p0) jumpr r31
6400b57cec5SDimitry Andric	}
6410b57cec5SDimitry Andric	// B has correctly signed zero, C is also zero
6420b57cec5SDimitry Andric.Lzero_plus_zero:
6430b57cec5SDimitry Andric	{
6440b57cec5SDimitry Andric		p0 = cmp.eq(B,C)		// yes, scalar equals.  +0++0 or -0+-0
6450b57cec5SDimitry Andric		if (p0.new) jumpr:t r31
6460b57cec5SDimitry Andric		A = B
6470b57cec5SDimitry Andric	}
6480b57cec5SDimitry Andric	{
6490b57cec5SDimitry Andric		TMP = USR
6500b57cec5SDimitry Andric	}
6510b57cec5SDimitry Andric	{
6520b57cec5SDimitry Andric		TMP = extractu(TMP,#2,#SR_ROUND_OFF)
6530b57cec5SDimitry Andric		A = #0
6540b57cec5SDimitry Andric	}
6550b57cec5SDimitry Andric	{
6560b57cec5SDimitry Andric		p0 = cmp.eq(TMP,#2)
6570b57cec5SDimitry Andric		if (p0.new) AH = ##0x80000000
6580b57cec5SDimitry Andric		jumpr r31
6590b57cec5SDimitry Andric	}
6600b57cec5SDimitry Andric#undef BTMP
6610b57cec5SDimitry Andric#undef BTMPH
6620b57cec5SDimitry Andric#undef BTMPL
6630b57cec5SDimitry Andric#define CTMP r11:10
6640b57cec5SDimitry Andric	.falign
6650b57cec5SDimitry Andric.Lfma_abnormal_c:
6660b57cec5SDimitry Andric	// We know that AB is normal * normal
6670b57cec5SDimitry Andric	// C is not normal: zero, subnormal, inf, or NaN.
6680b57cec5SDimitry Andric	{
6690b57cec5SDimitry Andric		p0 = dfclass(C,#0x10)		// is C NaN?
6700b57cec5SDimitry Andric		if (p0.new) jump:nt .Lnan
6710b57cec5SDimitry Andric		if (p0.new) A = C		// move NaN to A
6720b57cec5SDimitry Andric		deallocframe
6730b57cec5SDimitry Andric	}
6740b57cec5SDimitry Andric	{
6750b57cec5SDimitry Andric		p0 = dfclass(C,#0x08)		// is C inf?
6760b57cec5SDimitry Andric		if (p0.new) A = C		// return C
6770b57cec5SDimitry Andric		if (p0.new) jumpr:nt r31
6780b57cec5SDimitry Andric	}
6790b57cec5SDimitry Andric	// zero or subnormal
6800b57cec5SDimitry Andric	// If we have a zero, and we know AB is normal*normal, we can just call normal multiply
6810b57cec5SDimitry Andric	{
6820b57cec5SDimitry Andric		p0 = dfclass(C,#0x01)		// is C zero?
6830b57cec5SDimitry Andric		if (p0.new) jump:nt __hexagon_muldf3
6840b57cec5SDimitry Andric		TMP = #1
6850b57cec5SDimitry Andric	}
6860b57cec5SDimitry Andric	// Left with: subnormal
6870b57cec5SDimitry Andric	// Adjust C and jump back to restart
6880b57cec5SDimitry Andric	{
6890b57cec5SDimitry Andric		allocframe(#STACKSPACE)		// oops, deallocated above, re-allocate frame
6900b57cec5SDimitry Andric		CTMP = #0
6910b57cec5SDimitry Andric		CH = insert(TMP,#EXPBITS,#HI_MANTBITS)
6920b57cec5SDimitry Andric		jump .Lfma_abnormal_c_restart
6930b57cec5SDimitry Andric	}
6940b57cec5SDimitry AndricEND(fma)
695