10b57cec5SDimitry Andric//===----------------------Hexagon builtin routine ------------------------===// 20b57cec5SDimitry Andric// 30b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric// 70b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG 100b57cec5SDimitry Andric#define END(TAG) .size TAG,.-TAG 110b57cec5SDimitry Andric 120b57cec5SDimitry Andric// Double Precision Multiply 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric 150b57cec5SDimitry Andric#define A r1:0 160b57cec5SDimitry Andric#define AH r1 170b57cec5SDimitry Andric#define AL r0 180b57cec5SDimitry Andric#define B r3:2 190b57cec5SDimitry Andric#define BH r3 200b57cec5SDimitry Andric#define BL r2 210b57cec5SDimitry Andric#define C r5:4 220b57cec5SDimitry Andric#define CH r5 230b57cec5SDimitry Andric#define CL r4 240b57cec5SDimitry Andric 250b57cec5SDimitry Andric 260b57cec5SDimitry Andric 270b57cec5SDimitry Andric#define BTMP r15:14 280b57cec5SDimitry Andric#define BTMPH r15 290b57cec5SDimitry Andric#define BTMPL r14 300b57cec5SDimitry Andric 310b57cec5SDimitry Andric#define ATMP r13:12 320b57cec5SDimitry Andric#define ATMPH r13 330b57cec5SDimitry Andric#define ATMPL r12 340b57cec5SDimitry Andric 350b57cec5SDimitry Andric#define CTMP r11:10 360b57cec5SDimitry Andric#define CTMPH r11 370b57cec5SDimitry Andric#define CTMPL r10 380b57cec5SDimitry Andric 390b57cec5SDimitry Andric#define PP_LL r9:8 400b57cec5SDimitry Andric#define PP_LL_H r9 410b57cec5SDimitry Andric#define PP_LL_L r8 420b57cec5SDimitry Andric 430b57cec5SDimitry Andric#define PP_ODD r7:6 440b57cec5SDimitry Andric#define PP_ODD_H r7 450b57cec5SDimitry Andric#define PP_ODD_L r6 460b57cec5SDimitry Andric 470b57cec5SDimitry Andric 480b57cec5SDimitry Andric#define PP_HH r17:16 490b57cec5SDimitry Andric#define PP_HH_H r17 500b57cec5SDimitry Andric#define PP_HH_L r16 510b57cec5SDimitry Andric 520b57cec5SDimitry Andric#define EXPA r18 530b57cec5SDimitry Andric#define EXPB r19 540b57cec5SDimitry Andric#define EXPBA r19:18 550b57cec5SDimitry Andric 560b57cec5SDimitry Andric#define TMP r28 570b57cec5SDimitry Andric 580b57cec5SDimitry Andric#define P_TMP p0 590b57cec5SDimitry Andric#define PROD_NEG p3 600b57cec5SDimitry Andric#define EXACT p2 610b57cec5SDimitry Andric#define SWAP p1 620b57cec5SDimitry Andric 630b57cec5SDimitry Andric#define MANTBITS 52 640b57cec5SDimitry Andric#define HI_MANTBITS 20 650b57cec5SDimitry Andric#define EXPBITS 11 660b57cec5SDimitry Andric#define BIAS 1023 670b57cec5SDimitry Andric#define STACKSPACE 32 680b57cec5SDimitry Andric 690b57cec5SDimitry Andric#define ADJUST 4 700b57cec5SDimitry Andric 710b57cec5SDimitry Andric#define FUDGE 7 720b57cec5SDimitry Andric#define FUDGE2 3 730b57cec5SDimitry Andric 740b57cec5SDimitry Andric#ifndef SR_ROUND_OFF 750b57cec5SDimitry Andric#define SR_ROUND_OFF 22 760b57cec5SDimitry Andric#endif 770b57cec5SDimitry Andric 780b57cec5SDimitry Andric // First, classify for normal values, and abort if abnormal 790b57cec5SDimitry Andric // 800b57cec5SDimitry Andric // Next, unpack mantissa into 0x1000_0000_0000_0000 + mant<<8 810b57cec5SDimitry Andric // 820b57cec5SDimitry Andric // Since we know that the 2 MSBs of the H registers is zero, we should never carry 830b57cec5SDimitry Andric // the partial products that involve the H registers 840b57cec5SDimitry Andric // 850b57cec5SDimitry Andric // Try to buy X slots, at the expense of latency if needed 860b57cec5SDimitry Andric // 870b57cec5SDimitry Andric // We will have PP_HH with the upper bits of the product, PP_LL with the lower 880b57cec5SDimitry Andric // PP_HH can have a maximum of 0x03FF_FFFF_FFFF_FFFF or thereabouts 890b57cec5SDimitry Andric // PP_HH can have a minimum of 0x0100_0000_0000_0000 900b57cec5SDimitry Andric // 910b57cec5SDimitry Andric // 0x0100_0000_0000_0000 has EXP of EXPA+EXPB-BIAS 920b57cec5SDimitry Andric // 930b57cec5SDimitry Andric // We need to align CTMP. 940b57cec5SDimitry Andric // If CTMP >> PP, convert PP to 64 bit with sticky, align CTMP, and follow normal add 950b57cec5SDimitry Andric // If CTMP << PP align CTMP and add 128 bits. Then compute sticky 960b57cec5SDimitry Andric // If CTMP ~= PP, align CTMP and add 128 bits. May have massive cancellation. 970b57cec5SDimitry Andric // 980b57cec5SDimitry Andric // Convert partial product and CTMP to 2's complement prior to addition 990b57cec5SDimitry Andric // 1000b57cec5SDimitry Andric // After we add, we need to normalize into upper 64 bits, then compute sticky. 1010b57cec5SDimitry Andric 1020b57cec5SDimitry Andric .text 1030b57cec5SDimitry Andric .global __hexagon_fmadf4 1040b57cec5SDimitry Andric .type __hexagon_fmadf4,@function 1050b57cec5SDimitry Andric .global __hexagon_fmadf5 1060b57cec5SDimitry Andric .type __hexagon_fmadf5,@function 1070b57cec5SDimitry Andric Q6_ALIAS(fmadf5) 1080b57cec5SDimitry Andric .p2align 5 1090b57cec5SDimitry Andric__hexagon_fmadf4: 1100b57cec5SDimitry Andric__hexagon_fmadf5: 111*5ffd83dbSDimitry Andric.Lfma_begin: 1120b57cec5SDimitry Andric { 1130b57cec5SDimitry Andric P_TMP = dfclass(A,#2) 1140b57cec5SDimitry Andric P_TMP = dfclass(B,#2) 1150b57cec5SDimitry Andric ATMP = #0 1160b57cec5SDimitry Andric BTMP = #0 1170b57cec5SDimitry Andric } 1180b57cec5SDimitry Andric { 1190b57cec5SDimitry Andric ATMP = insert(A,#MANTBITS,#EXPBITS-3) 1200b57cec5SDimitry Andric BTMP = insert(B,#MANTBITS,#EXPBITS-3) 1210b57cec5SDimitry Andric PP_ODD_H = ##0x10000000 1220b57cec5SDimitry Andric allocframe(#STACKSPACE) 1230b57cec5SDimitry Andric } 1240b57cec5SDimitry Andric { 1250b57cec5SDimitry Andric PP_LL = mpyu(ATMPL,BTMPL) 1260b57cec5SDimitry Andric if (!P_TMP) jump .Lfma_abnormal_ab 1270b57cec5SDimitry Andric ATMPH = or(ATMPH,PP_ODD_H) 1280b57cec5SDimitry Andric BTMPH = or(BTMPH,PP_ODD_H) 1290b57cec5SDimitry Andric } 1300b57cec5SDimitry Andric { 1310b57cec5SDimitry Andric P_TMP = dfclass(C,#2) 1320b57cec5SDimitry Andric if (!P_TMP.new) jump:nt .Lfma_abnormal_c 1330b57cec5SDimitry Andric CTMP = combine(PP_ODD_H,#0) 1340b57cec5SDimitry Andric PP_ODD = combine(#0,PP_LL_H) 1350b57cec5SDimitry Andric } 1360b57cec5SDimitry Andric.Lfma_abnormal_c_restart: 1370b57cec5SDimitry Andric { 1380b57cec5SDimitry Andric PP_ODD += mpyu(BTMPL,ATMPH) 1390b57cec5SDimitry Andric CTMP = insert(C,#MANTBITS,#EXPBITS-3) 1400b57cec5SDimitry Andric memd(r29+#0) = PP_HH 1410b57cec5SDimitry Andric memd(r29+#8) = EXPBA 1420b57cec5SDimitry Andric } 1430b57cec5SDimitry Andric { 1440b57cec5SDimitry Andric PP_ODD += mpyu(ATMPL,BTMPH) 1450b57cec5SDimitry Andric EXPBA = neg(CTMP) 1460b57cec5SDimitry Andric P_TMP = cmp.gt(CH,#-1) 1470b57cec5SDimitry Andric TMP = xor(AH,BH) 1480b57cec5SDimitry Andric } 1490b57cec5SDimitry Andric { 1500b57cec5SDimitry Andric EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS) 1510b57cec5SDimitry Andric EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS) 1520b57cec5SDimitry Andric PP_HH = combine(#0,PP_ODD_H) 1530b57cec5SDimitry Andric if (!P_TMP) CTMP = EXPBA 1540b57cec5SDimitry Andric } 1550b57cec5SDimitry Andric { 1560b57cec5SDimitry Andric PP_HH += mpyu(ATMPH,BTMPH) 1570b57cec5SDimitry Andric PP_LL = combine(PP_ODD_L,PP_LL_L) 1580b57cec5SDimitry Andric#undef PP_ODD 1590b57cec5SDimitry Andric#undef PP_ODD_H 1600b57cec5SDimitry Andric#undef PP_ODD_L 1610b57cec5SDimitry Andric#undef ATMP 1620b57cec5SDimitry Andric#undef ATMPL 1630b57cec5SDimitry Andric#undef ATMPH 1640b57cec5SDimitry Andric#undef BTMP 1650b57cec5SDimitry Andric#undef BTMPL 1660b57cec5SDimitry Andric#undef BTMPH 1670b57cec5SDimitry Andric#define RIGHTLEFTSHIFT r13:12 1680b57cec5SDimitry Andric#define RIGHTSHIFT r13 1690b57cec5SDimitry Andric#define LEFTSHIFT r12 1700b57cec5SDimitry Andric 1710b57cec5SDimitry Andric EXPA = add(EXPA,EXPB) 1720b57cec5SDimitry Andric#undef EXPB 1730b57cec5SDimitry Andric#undef EXPBA 1740b57cec5SDimitry Andric#define EXPC r19 1750b57cec5SDimitry Andric#define EXPCA r19:18 1760b57cec5SDimitry Andric EXPC = extractu(CH,#EXPBITS,#HI_MANTBITS) 1770b57cec5SDimitry Andric } 1780b57cec5SDimitry Andric // PP_HH:PP_LL now has product 1790b57cec5SDimitry Andric // CTMP is negated 1800b57cec5SDimitry Andric // EXPA,B,C are extracted 1810b57cec5SDimitry Andric // We need to negate PP 1820b57cec5SDimitry Andric // Since we will be adding with carry later, if we need to negate, 1830b57cec5SDimitry Andric // just invert all bits now, which we can do conditionally and in parallel 1840b57cec5SDimitry Andric#define PP_HH_TMP r15:14 1850b57cec5SDimitry Andric#define PP_LL_TMP r7:6 1860b57cec5SDimitry Andric { 1870b57cec5SDimitry Andric EXPA = add(EXPA,#-BIAS+(ADJUST)) 1880b57cec5SDimitry Andric PROD_NEG = !cmp.gt(TMP,#-1) 1890b57cec5SDimitry Andric PP_LL_TMP = #0 1900b57cec5SDimitry Andric PP_HH_TMP = #0 1910b57cec5SDimitry Andric } 1920b57cec5SDimitry Andric { 1930b57cec5SDimitry Andric PP_LL_TMP = sub(PP_LL_TMP,PP_LL,PROD_NEG):carry 1940b57cec5SDimitry Andric P_TMP = !cmp.gt(TMP,#-1) 1950b57cec5SDimitry Andric SWAP = cmp.gt(EXPC,EXPA) // If C >> PP 1960b57cec5SDimitry Andric if (SWAP.new) EXPCA = combine(EXPA,EXPC) 1970b57cec5SDimitry Andric } 1980b57cec5SDimitry Andric { 1990b57cec5SDimitry Andric PP_HH_TMP = sub(PP_HH_TMP,PP_HH,PROD_NEG):carry 2000b57cec5SDimitry Andric if (P_TMP) PP_LL = PP_LL_TMP 2010b57cec5SDimitry Andric#undef PP_LL_TMP 2020b57cec5SDimitry Andric#define CTMP2 r7:6 2030b57cec5SDimitry Andric#define CTMP2H r7 2040b57cec5SDimitry Andric#define CTMP2L r6 2050b57cec5SDimitry Andric CTMP2 = #0 2060b57cec5SDimitry Andric EXPC = sub(EXPA,EXPC) 2070b57cec5SDimitry Andric } 2080b57cec5SDimitry Andric { 2090b57cec5SDimitry Andric if (P_TMP) PP_HH = PP_HH_TMP 2100b57cec5SDimitry Andric P_TMP = cmp.gt(EXPC,#63) 2110b57cec5SDimitry Andric if (SWAP) PP_LL = CTMP2 2120b57cec5SDimitry Andric if (SWAP) CTMP2 = PP_LL 2130b57cec5SDimitry Andric } 2140b57cec5SDimitry Andric#undef PP_HH_TMP 2150b57cec5SDimitry Andric//#define ONE r15:14 2160b57cec5SDimitry Andric//#define S_ONE r14 2170b57cec5SDimitry Andric#define ZERO r15:14 2180b57cec5SDimitry Andric#define S_ZERO r15 2190b57cec5SDimitry Andric#undef PROD_NEG 2200b57cec5SDimitry Andric#define P_CARRY p3 2210b57cec5SDimitry Andric { 2220b57cec5SDimitry Andric if (SWAP) PP_HH = CTMP // Swap C and PP 2230b57cec5SDimitry Andric if (SWAP) CTMP = PP_HH 2240b57cec5SDimitry Andric if (P_TMP) EXPC = add(EXPC,#-64) 2250b57cec5SDimitry Andric TMP = #63 2260b57cec5SDimitry Andric } 2270b57cec5SDimitry Andric { 2280b57cec5SDimitry Andric // If diff > 63, pre-shift-right by 64... 2290b57cec5SDimitry Andric if (P_TMP) CTMP2 = CTMP 2300b57cec5SDimitry Andric TMP = asr(CTMPH,#31) 2310b57cec5SDimitry Andric RIGHTSHIFT = min(EXPC,TMP) 2320b57cec5SDimitry Andric LEFTSHIFT = #0 2330b57cec5SDimitry Andric } 2340b57cec5SDimitry Andric#undef C 2350b57cec5SDimitry Andric#undef CH 2360b57cec5SDimitry Andric#undef CL 2370b57cec5SDimitry Andric#define STICKIES r5:4 2380b57cec5SDimitry Andric#define STICKIESH r5 2390b57cec5SDimitry Andric#define STICKIESL r4 2400b57cec5SDimitry Andric { 2410b57cec5SDimitry Andric if (P_TMP) CTMP = combine(TMP,TMP) // sign extension of pre-shift-right-64 2420b57cec5SDimitry Andric STICKIES = extract(CTMP2,RIGHTLEFTSHIFT) 2430b57cec5SDimitry Andric CTMP2 = lsr(CTMP2,RIGHTSHIFT) 2440b57cec5SDimitry Andric LEFTSHIFT = sub(#64,RIGHTSHIFT) 2450b57cec5SDimitry Andric } 2460b57cec5SDimitry Andric { 2470b57cec5SDimitry Andric ZERO = #0 2480b57cec5SDimitry Andric TMP = #-2 2490b57cec5SDimitry Andric CTMP2 |= lsl(CTMP,LEFTSHIFT) 2500b57cec5SDimitry Andric CTMP = asr(CTMP,RIGHTSHIFT) 2510b57cec5SDimitry Andric } 2520b57cec5SDimitry Andric { 2530b57cec5SDimitry Andric P_CARRY = cmp.gtu(STICKIES,ZERO) // If we have sticky bits from C shift 2540b57cec5SDimitry Andric if (P_CARRY.new) CTMP2L = and(CTMP2L,TMP) // make sure adding 1 == OR 2550b57cec5SDimitry Andric#undef ZERO 2560b57cec5SDimitry Andric#define ONE r15:14 2570b57cec5SDimitry Andric#define S_ONE r14 2580b57cec5SDimitry Andric ONE = #1 2590b57cec5SDimitry Andric STICKIES = #0 2600b57cec5SDimitry Andric } 2610b57cec5SDimitry Andric { 2620b57cec5SDimitry Andric PP_LL = add(CTMP2,PP_LL,P_CARRY):carry // use the carry to add the sticky 2630b57cec5SDimitry Andric } 2640b57cec5SDimitry Andric { 2650b57cec5SDimitry Andric PP_HH = add(CTMP,PP_HH,P_CARRY):carry 2660b57cec5SDimitry Andric TMP = #62 2670b57cec5SDimitry Andric } 2680b57cec5SDimitry Andric // PP_HH:PP_LL now holds the sum 2690b57cec5SDimitry Andric // We may need to normalize left, up to ??? bits. 2700b57cec5SDimitry Andric // 2710b57cec5SDimitry Andric // I think that if we have massive cancellation, the range we normalize by 2720b57cec5SDimitry Andric // is still limited 2730b57cec5SDimitry Andric { 2740b57cec5SDimitry Andric LEFTSHIFT = add(clb(PP_HH),#-2) 2750b57cec5SDimitry Andric if (!cmp.eq(LEFTSHIFT.new,TMP)) jump:t 1f // all sign bits? 2760b57cec5SDimitry Andric } 2770b57cec5SDimitry Andric // We had all sign bits, shift left by 62. 2780b57cec5SDimitry Andric { 2790b57cec5SDimitry Andric CTMP = extractu(PP_LL,#62,#2) 2800b57cec5SDimitry Andric PP_LL = asl(PP_LL,#62) 2810b57cec5SDimitry Andric EXPA = add(EXPA,#-62) // And adjust exponent of result 2820b57cec5SDimitry Andric } 2830b57cec5SDimitry Andric { 2840b57cec5SDimitry Andric PP_HH = insert(CTMP,#62,#0) // Then shift 63 2850b57cec5SDimitry Andric } 2860b57cec5SDimitry Andric { 2870b57cec5SDimitry Andric LEFTSHIFT = add(clb(PP_HH),#-2) 2880b57cec5SDimitry Andric } 2890b57cec5SDimitry Andric .falign 2900b57cec5SDimitry Andric1: 2910b57cec5SDimitry Andric { 2920b57cec5SDimitry Andric CTMP = asl(PP_HH,LEFTSHIFT) 2930b57cec5SDimitry Andric STICKIES |= asl(PP_LL,LEFTSHIFT) 2940b57cec5SDimitry Andric RIGHTSHIFT = sub(#64,LEFTSHIFT) 2950b57cec5SDimitry Andric EXPA = sub(EXPA,LEFTSHIFT) 2960b57cec5SDimitry Andric } 2970b57cec5SDimitry Andric { 2980b57cec5SDimitry Andric CTMP |= lsr(PP_LL,RIGHTSHIFT) 2990b57cec5SDimitry Andric EXACT = cmp.gtu(ONE,STICKIES) 3000b57cec5SDimitry Andric TMP = #BIAS+BIAS-2 3010b57cec5SDimitry Andric } 3020b57cec5SDimitry Andric { 3030b57cec5SDimitry Andric if (!EXACT) CTMPL = or(CTMPL,S_ONE) 3040b57cec5SDimitry Andric // If EXPA is overflow/underflow, jump to ovf_unf 3050b57cec5SDimitry Andric P_TMP = !cmp.gt(EXPA,TMP) 3060b57cec5SDimitry Andric P_TMP = cmp.gt(EXPA,#1) 3070b57cec5SDimitry Andric if (!P_TMP.new) jump:nt .Lfma_ovf_unf 3080b57cec5SDimitry Andric } 3090b57cec5SDimitry Andric { 3100b57cec5SDimitry Andric // XXX: FIXME: should PP_HH for check of zero be CTMP? 3110b57cec5SDimitry Andric P_TMP = cmp.gtu(ONE,CTMP) // is result true zero? 3120b57cec5SDimitry Andric A = convert_d2df(CTMP) 3130b57cec5SDimitry Andric EXPA = add(EXPA,#-BIAS-60) 3140b57cec5SDimitry Andric PP_HH = memd(r29+#0) 3150b57cec5SDimitry Andric } 3160b57cec5SDimitry Andric { 3170b57cec5SDimitry Andric AH += asl(EXPA,#HI_MANTBITS) 3180b57cec5SDimitry Andric EXPCA = memd(r29+#8) 3190b57cec5SDimitry Andric if (!P_TMP) dealloc_return // not zero, return 3200b57cec5SDimitry Andric } 3210b57cec5SDimitry Andric.Ladd_yields_zero: 3220b57cec5SDimitry Andric // We had full cancellation. Return +/- zero (-0 when round-down) 3230b57cec5SDimitry Andric { 3240b57cec5SDimitry Andric TMP = USR 3250b57cec5SDimitry Andric A = #0 3260b57cec5SDimitry Andric } 3270b57cec5SDimitry Andric { 3280b57cec5SDimitry Andric TMP = extractu(TMP,#2,#SR_ROUND_OFF) 3290b57cec5SDimitry Andric PP_HH = memd(r29+#0) 3300b57cec5SDimitry Andric EXPCA = memd(r29+#8) 3310b57cec5SDimitry Andric } 3320b57cec5SDimitry Andric { 3330b57cec5SDimitry Andric p0 = cmp.eq(TMP,#2) 3340b57cec5SDimitry Andric if (p0.new) AH = ##0x80000000 3350b57cec5SDimitry Andric dealloc_return 3360b57cec5SDimitry Andric } 3370b57cec5SDimitry Andric 3380b57cec5SDimitry Andric#undef RIGHTLEFTSHIFT 3390b57cec5SDimitry Andric#undef RIGHTSHIFT 3400b57cec5SDimitry Andric#undef LEFTSHIFT 3410b57cec5SDimitry Andric#undef CTMP2 3420b57cec5SDimitry Andric#undef CTMP2H 3430b57cec5SDimitry Andric#undef CTMP2L 3440b57cec5SDimitry Andric 3450b57cec5SDimitry Andric.Lfma_ovf_unf: 3460b57cec5SDimitry Andric { 3470b57cec5SDimitry Andric p0 = cmp.gtu(ONE,CTMP) 3480b57cec5SDimitry Andric if (p0.new) jump:nt .Ladd_yields_zero 3490b57cec5SDimitry Andric } 3500b57cec5SDimitry Andric { 3510b57cec5SDimitry Andric A = convert_d2df(CTMP) 3520b57cec5SDimitry Andric EXPA = add(EXPA,#-BIAS-60) 3530b57cec5SDimitry Andric TMP = EXPA 3540b57cec5SDimitry Andric } 3550b57cec5SDimitry Andric#define NEW_EXPB r7 3560b57cec5SDimitry Andric#define NEW_EXPA r6 3570b57cec5SDimitry Andric { 3580b57cec5SDimitry Andric AH += asl(EXPA,#HI_MANTBITS) 3590b57cec5SDimitry Andric NEW_EXPB = extractu(AH,#EXPBITS,#HI_MANTBITS) 3600b57cec5SDimitry Andric } 3610b57cec5SDimitry Andric { 3620b57cec5SDimitry Andric NEW_EXPA = add(EXPA,NEW_EXPB) 3630b57cec5SDimitry Andric PP_HH = memd(r29+#0) 3640b57cec5SDimitry Andric EXPCA = memd(r29+#8) 3650b57cec5SDimitry Andric#undef PP_HH 3660b57cec5SDimitry Andric#undef PP_HH_H 3670b57cec5SDimitry Andric#undef PP_HH_L 3680b57cec5SDimitry Andric#undef EXPCA 3690b57cec5SDimitry Andric#undef EXPC 3700b57cec5SDimitry Andric#undef EXPA 3710b57cec5SDimitry Andric#undef PP_LL 3720b57cec5SDimitry Andric#undef PP_LL_H 3730b57cec5SDimitry Andric#undef PP_LL_L 3740b57cec5SDimitry Andric#define EXPA r6 3750b57cec5SDimitry Andric#define EXPB r7 3760b57cec5SDimitry Andric#define EXPBA r7:6 3770b57cec5SDimitry Andric#define ATMP r9:8 3780b57cec5SDimitry Andric#define ATMPH r9 3790b57cec5SDimitry Andric#define ATMPL r8 3800b57cec5SDimitry Andric#undef NEW_EXPB 3810b57cec5SDimitry Andric#undef NEW_EXPA 3820b57cec5SDimitry Andric ATMP = abs(CTMP) 3830b57cec5SDimitry Andric } 3840b57cec5SDimitry Andric { 3850b57cec5SDimitry Andric p0 = cmp.gt(EXPA,##BIAS+BIAS) 3860b57cec5SDimitry Andric if (p0.new) jump:nt .Lfma_ovf 3870b57cec5SDimitry Andric } 3880b57cec5SDimitry Andric { 3890b57cec5SDimitry Andric p0 = cmp.gt(EXPA,#0) 3900b57cec5SDimitry Andric if (p0.new) jump:nt .Lpossible_unf 3910b57cec5SDimitry Andric } 3920b57cec5SDimitry Andric { 3930b57cec5SDimitry Andric // TMP has original EXPA. 3940b57cec5SDimitry Andric // ATMP is corresponding value 3950b57cec5SDimitry Andric // Normalize ATMP and shift right to correct location 3960b57cec5SDimitry Andric EXPB = add(clb(ATMP),#-2) // Amount to left shift to normalize 3970b57cec5SDimitry Andric EXPA = sub(#1+5,TMP) // Amount to right shift to denormalize 3980b57cec5SDimitry Andric p3 = cmp.gt(CTMPH,#-1) 3990b57cec5SDimitry Andric } 4000b57cec5SDimitry Andric // Underflow 4010b57cec5SDimitry Andric // We know that the infinte range exponent should be EXPA 4020b57cec5SDimitry Andric // CTMP is 2's complement, ATMP is abs(CTMP) 4030b57cec5SDimitry Andric { 4040b57cec5SDimitry Andric EXPA = add(EXPA,EXPB) // how much to shift back right 4050b57cec5SDimitry Andric ATMP = asl(ATMP,EXPB) // shift left 4060b57cec5SDimitry Andric AH = USR 4070b57cec5SDimitry Andric TMP = #63 4080b57cec5SDimitry Andric } 4090b57cec5SDimitry Andric { 4100b57cec5SDimitry Andric EXPB = min(EXPA,TMP) 4110b57cec5SDimitry Andric EXPA = #0 4120b57cec5SDimitry Andric AL = #0x0030 4130b57cec5SDimitry Andric } 4140b57cec5SDimitry Andric { 4150b57cec5SDimitry Andric B = extractu(ATMP,EXPBA) 4160b57cec5SDimitry Andric ATMP = asr(ATMP,EXPB) 4170b57cec5SDimitry Andric } 4180b57cec5SDimitry Andric { 4190b57cec5SDimitry Andric p0 = cmp.gtu(ONE,B) 4200b57cec5SDimitry Andric if (!p0.new) ATMPL = or(ATMPL,S_ONE) 4210b57cec5SDimitry Andric ATMPH = setbit(ATMPH,#HI_MANTBITS+FUDGE2) 4220b57cec5SDimitry Andric } 4230b57cec5SDimitry Andric { 4240b57cec5SDimitry Andric CTMP = neg(ATMP) 4250b57cec5SDimitry Andric p1 = bitsclr(ATMPL,#(1<<FUDGE2)-1) 4260b57cec5SDimitry Andric if (!p1.new) AH = or(AH,AL) 4270b57cec5SDimitry Andric B = #0 4280b57cec5SDimitry Andric } 4290b57cec5SDimitry Andric { 4300b57cec5SDimitry Andric if (p3) CTMP = ATMP 4310b57cec5SDimitry Andric USR = AH 4320b57cec5SDimitry Andric TMP = #-BIAS-(MANTBITS+FUDGE2) 4330b57cec5SDimitry Andric } 4340b57cec5SDimitry Andric { 4350b57cec5SDimitry Andric A = convert_d2df(CTMP) 4360b57cec5SDimitry Andric } 4370b57cec5SDimitry Andric { 4380b57cec5SDimitry Andric AH += asl(TMP,#HI_MANTBITS) 4390b57cec5SDimitry Andric dealloc_return 4400b57cec5SDimitry Andric } 4410b57cec5SDimitry Andric.Lpossible_unf: 4420b57cec5SDimitry Andric { 4430b57cec5SDimitry Andric TMP = ##0x7fefffff 4440b57cec5SDimitry Andric ATMP = abs(CTMP) 4450b57cec5SDimitry Andric } 4460b57cec5SDimitry Andric { 4470b57cec5SDimitry Andric p0 = cmp.eq(AL,#0) 4480b57cec5SDimitry Andric p0 = bitsclr(AH,TMP) 4490b57cec5SDimitry Andric if (!p0.new) dealloc_return:t 4500b57cec5SDimitry Andric TMP = #0x7fff 4510b57cec5SDimitry Andric } 4520b57cec5SDimitry Andric { 4530b57cec5SDimitry Andric p0 = bitsset(ATMPH,TMP) 4540b57cec5SDimitry Andric BH = USR 4550b57cec5SDimitry Andric BL = #0x0030 4560b57cec5SDimitry Andric } 4570b57cec5SDimitry Andric { 4580b57cec5SDimitry Andric if (p0) BH = or(BH,BL) 4590b57cec5SDimitry Andric } 4600b57cec5SDimitry Andric { 4610b57cec5SDimitry Andric USR = BH 4620b57cec5SDimitry Andric } 4630b57cec5SDimitry Andric { 4640b57cec5SDimitry Andric p0 = dfcmp.eq(A,A) 4650b57cec5SDimitry Andric dealloc_return 4660b57cec5SDimitry Andric } 4670b57cec5SDimitry Andric.Lfma_ovf: 4680b57cec5SDimitry Andric { 4690b57cec5SDimitry Andric TMP = USR 4700b57cec5SDimitry Andric CTMP = combine(##0x7fefffff,#-1) 4710b57cec5SDimitry Andric A = CTMP 4720b57cec5SDimitry Andric } 4730b57cec5SDimitry Andric { 4740b57cec5SDimitry Andric ATMP = combine(##0x7ff00000,#0) 4750b57cec5SDimitry Andric BH = extractu(TMP,#2,#SR_ROUND_OFF) 4760b57cec5SDimitry Andric TMP = or(TMP,#0x28) 4770b57cec5SDimitry Andric } 4780b57cec5SDimitry Andric { 4790b57cec5SDimitry Andric USR = TMP 4800b57cec5SDimitry Andric BH ^= lsr(AH,#31) 4810b57cec5SDimitry Andric BL = BH 4820b57cec5SDimitry Andric } 4830b57cec5SDimitry Andric { 4840b57cec5SDimitry Andric p0 = !cmp.eq(BL,#1) 4850b57cec5SDimitry Andric p0 = !cmp.eq(BH,#2) 4860b57cec5SDimitry Andric } 4870b57cec5SDimitry Andric { 4880b57cec5SDimitry Andric p0 = dfcmp.eq(ATMP,ATMP) 4890b57cec5SDimitry Andric if (p0.new) CTMP = ATMP 4900b57cec5SDimitry Andric } 4910b57cec5SDimitry Andric { 4920b57cec5SDimitry Andric A = insert(CTMP,#63,#0) 4930b57cec5SDimitry Andric dealloc_return 4940b57cec5SDimitry Andric } 4950b57cec5SDimitry Andric#undef CTMP 4960b57cec5SDimitry Andric#undef CTMPH 4970b57cec5SDimitry Andric#undef CTMPL 4980b57cec5SDimitry Andric#define BTMP r11:10 4990b57cec5SDimitry Andric#define BTMPH r11 5000b57cec5SDimitry Andric#define BTMPL r10 5010b57cec5SDimitry Andric 5020b57cec5SDimitry Andric#undef STICKIES 5030b57cec5SDimitry Andric#undef STICKIESH 5040b57cec5SDimitry Andric#undef STICKIESL 5050b57cec5SDimitry Andric#define C r5:4 5060b57cec5SDimitry Andric#define CH r5 5070b57cec5SDimitry Andric#define CL r4 5080b57cec5SDimitry Andric 5090b57cec5SDimitry Andric.Lfma_abnormal_ab: 5100b57cec5SDimitry Andric { 5110b57cec5SDimitry Andric ATMP = extractu(A,#63,#0) 5120b57cec5SDimitry Andric BTMP = extractu(B,#63,#0) 5130b57cec5SDimitry Andric deallocframe 5140b57cec5SDimitry Andric } 5150b57cec5SDimitry Andric { 5160b57cec5SDimitry Andric p3 = cmp.gtu(ATMP,BTMP) 5170b57cec5SDimitry Andric if (!p3.new) A = B // sort values 5180b57cec5SDimitry Andric if (!p3.new) B = A 5190b57cec5SDimitry Andric } 5200b57cec5SDimitry Andric { 5210b57cec5SDimitry Andric p0 = dfclass(A,#0x0f) // A NaN? 5220b57cec5SDimitry Andric if (!p0.new) jump:nt .Lnan 5230b57cec5SDimitry Andric if (!p3) ATMP = BTMP 5240b57cec5SDimitry Andric if (!p3) BTMP = ATMP 5250b57cec5SDimitry Andric } 5260b57cec5SDimitry Andric { 5270b57cec5SDimitry Andric p1 = dfclass(A,#0x08) // A is infinity 5280b57cec5SDimitry Andric p1 = dfclass(B,#0x0e) // B is nonzero 5290b57cec5SDimitry Andric } 5300b57cec5SDimitry Andric { 5310b57cec5SDimitry Andric p0 = dfclass(A,#0x08) // a is inf 5320b57cec5SDimitry Andric p0 = dfclass(B,#0x01) // b is zero 5330b57cec5SDimitry Andric } 5340b57cec5SDimitry Andric { 5350b57cec5SDimitry Andric if (p1) jump .Lab_inf 5360b57cec5SDimitry Andric p2 = dfclass(B,#0x01) 5370b57cec5SDimitry Andric } 5380b57cec5SDimitry Andric { 5390b57cec5SDimitry Andric if (p0) jump .Linvalid 5400b57cec5SDimitry Andric if (p2) jump .Lab_true_zero 5410b57cec5SDimitry Andric TMP = ##0x7c000000 5420b57cec5SDimitry Andric } 5430b57cec5SDimitry Andric // We are left with a normal or subnormal times a subnormal, A > B 5440b57cec5SDimitry Andric // If A and B are both very small, we will go to a single sticky bit; replace 5450b57cec5SDimitry Andric // A and B lower 63 bits with 0x0010_0000_0000_0000, which yields equivalent results 5460b57cec5SDimitry Andric // if A and B might multiply to something bigger, decrease A exp and increase B exp 5470b57cec5SDimitry Andric // and start over 5480b57cec5SDimitry Andric { 5490b57cec5SDimitry Andric p0 = bitsclr(AH,TMP) 5500b57cec5SDimitry Andric if (p0.new) jump:nt .Lfma_ab_tiny 5510b57cec5SDimitry Andric } 5520b57cec5SDimitry Andric { 5530b57cec5SDimitry Andric TMP = add(clb(BTMP),#-EXPBITS) 5540b57cec5SDimitry Andric } 5550b57cec5SDimitry Andric { 5560b57cec5SDimitry Andric BTMP = asl(BTMP,TMP) 5570b57cec5SDimitry Andric } 5580b57cec5SDimitry Andric { 5590b57cec5SDimitry Andric B = insert(BTMP,#63,#0) 5600b57cec5SDimitry Andric AH -= asl(TMP,#HI_MANTBITS) 5610b57cec5SDimitry Andric } 562*5ffd83dbSDimitry Andric jump .Lfma_begin 5630b57cec5SDimitry Andric 5640b57cec5SDimitry Andric.Lfma_ab_tiny: 5650b57cec5SDimitry Andric ATMP = combine(##0x00100000,#0) 5660b57cec5SDimitry Andric { 5670b57cec5SDimitry Andric A = insert(ATMP,#63,#0) 5680b57cec5SDimitry Andric B = insert(ATMP,#63,#0) 5690b57cec5SDimitry Andric } 570*5ffd83dbSDimitry Andric jump .Lfma_begin 5710b57cec5SDimitry Andric 5720b57cec5SDimitry Andric.Lab_inf: 5730b57cec5SDimitry Andric { 5740b57cec5SDimitry Andric B = lsr(B,#63) 5750b57cec5SDimitry Andric p0 = dfclass(C,#0x10) 5760b57cec5SDimitry Andric } 5770b57cec5SDimitry Andric { 5780b57cec5SDimitry Andric A ^= asl(B,#63) 5790b57cec5SDimitry Andric if (p0) jump .Lnan 5800b57cec5SDimitry Andric } 5810b57cec5SDimitry Andric { 5820b57cec5SDimitry Andric p1 = dfclass(C,#0x08) 5830b57cec5SDimitry Andric if (p1.new) jump:nt .Lfma_inf_plus_inf 5840b57cec5SDimitry Andric } 5850b57cec5SDimitry Andric // A*B is +/- inf, C is finite. Return A 5860b57cec5SDimitry Andric { 5870b57cec5SDimitry Andric jumpr r31 5880b57cec5SDimitry Andric } 5890b57cec5SDimitry Andric .falign 5900b57cec5SDimitry Andric.Lfma_inf_plus_inf: 5910b57cec5SDimitry Andric { // adding infinities of different signs is invalid 5920b57cec5SDimitry Andric p0 = dfcmp.eq(A,C) 5930b57cec5SDimitry Andric if (!p0.new) jump:nt .Linvalid 5940b57cec5SDimitry Andric } 5950b57cec5SDimitry Andric { 5960b57cec5SDimitry Andric jumpr r31 5970b57cec5SDimitry Andric } 5980b57cec5SDimitry Andric 5990b57cec5SDimitry Andric.Lnan: 6000b57cec5SDimitry Andric { 6010b57cec5SDimitry Andric p0 = dfclass(B,#0x10) 6020b57cec5SDimitry Andric p1 = dfclass(C,#0x10) 6030b57cec5SDimitry Andric if (!p0.new) B = A 6040b57cec5SDimitry Andric if (!p1.new) C = A 6050b57cec5SDimitry Andric } 6060b57cec5SDimitry Andric { // find sNaNs 6070b57cec5SDimitry Andric BH = convert_df2sf(B) 6080b57cec5SDimitry Andric BL = convert_df2sf(C) 6090b57cec5SDimitry Andric } 6100b57cec5SDimitry Andric { 6110b57cec5SDimitry Andric BH = convert_df2sf(A) 6120b57cec5SDimitry Andric A = #-1 6130b57cec5SDimitry Andric jumpr r31 6140b57cec5SDimitry Andric } 6150b57cec5SDimitry Andric 6160b57cec5SDimitry Andric.Linvalid: 6170b57cec5SDimitry Andric { 6180b57cec5SDimitry Andric TMP = ##0x7f800001 // sp snan 6190b57cec5SDimitry Andric } 6200b57cec5SDimitry Andric { 6210b57cec5SDimitry Andric A = convert_sf2df(TMP) 6220b57cec5SDimitry Andric jumpr r31 6230b57cec5SDimitry Andric } 6240b57cec5SDimitry Andric 6250b57cec5SDimitry Andric.Lab_true_zero: 6260b57cec5SDimitry Andric // B is zero, A is finite number 6270b57cec5SDimitry Andric { 6280b57cec5SDimitry Andric p0 = dfclass(C,#0x10) 6290b57cec5SDimitry Andric if (p0.new) jump:nt .Lnan 6300b57cec5SDimitry Andric if (p0.new) A = C 6310b57cec5SDimitry Andric } 6320b57cec5SDimitry Andric { 6330b57cec5SDimitry Andric p0 = dfcmp.eq(B,C) // is C also zero? 6340b57cec5SDimitry Andric AH = lsr(AH,#31) // get sign 6350b57cec5SDimitry Andric } 6360b57cec5SDimitry Andric { 6370b57cec5SDimitry Andric BH ^= asl(AH,#31) // form correctly signed zero in B 6380b57cec5SDimitry Andric if (!p0) A = C // If C is not zero, return C 6390b57cec5SDimitry Andric if (!p0) jumpr r31 6400b57cec5SDimitry Andric } 6410b57cec5SDimitry Andric // B has correctly signed zero, C is also zero 6420b57cec5SDimitry Andric.Lzero_plus_zero: 6430b57cec5SDimitry Andric { 6440b57cec5SDimitry Andric p0 = cmp.eq(B,C) // yes, scalar equals. +0++0 or -0+-0 6450b57cec5SDimitry Andric if (p0.new) jumpr:t r31 6460b57cec5SDimitry Andric A = B 6470b57cec5SDimitry Andric } 6480b57cec5SDimitry Andric { 6490b57cec5SDimitry Andric TMP = USR 6500b57cec5SDimitry Andric } 6510b57cec5SDimitry Andric { 6520b57cec5SDimitry Andric TMP = extractu(TMP,#2,#SR_ROUND_OFF) 6530b57cec5SDimitry Andric A = #0 6540b57cec5SDimitry Andric } 6550b57cec5SDimitry Andric { 6560b57cec5SDimitry Andric p0 = cmp.eq(TMP,#2) 6570b57cec5SDimitry Andric if (p0.new) AH = ##0x80000000 6580b57cec5SDimitry Andric jumpr r31 6590b57cec5SDimitry Andric } 6600b57cec5SDimitry Andric#undef BTMP 6610b57cec5SDimitry Andric#undef BTMPH 6620b57cec5SDimitry Andric#undef BTMPL 6630b57cec5SDimitry Andric#define CTMP r11:10 6640b57cec5SDimitry Andric .falign 6650b57cec5SDimitry Andric.Lfma_abnormal_c: 6660b57cec5SDimitry Andric // We know that AB is normal * normal 6670b57cec5SDimitry Andric // C is not normal: zero, subnormal, inf, or NaN. 6680b57cec5SDimitry Andric { 6690b57cec5SDimitry Andric p0 = dfclass(C,#0x10) // is C NaN? 6700b57cec5SDimitry Andric if (p0.new) jump:nt .Lnan 6710b57cec5SDimitry Andric if (p0.new) A = C // move NaN to A 6720b57cec5SDimitry Andric deallocframe 6730b57cec5SDimitry Andric } 6740b57cec5SDimitry Andric { 6750b57cec5SDimitry Andric p0 = dfclass(C,#0x08) // is C inf? 6760b57cec5SDimitry Andric if (p0.new) A = C // return C 6770b57cec5SDimitry Andric if (p0.new) jumpr:nt r31 6780b57cec5SDimitry Andric } 6790b57cec5SDimitry Andric // zero or subnormal 6800b57cec5SDimitry Andric // If we have a zero, and we know AB is normal*normal, we can just call normal multiply 6810b57cec5SDimitry Andric { 6820b57cec5SDimitry Andric p0 = dfclass(C,#0x01) // is C zero? 6830b57cec5SDimitry Andric if (p0.new) jump:nt __hexagon_muldf3 6840b57cec5SDimitry Andric TMP = #1 6850b57cec5SDimitry Andric } 6860b57cec5SDimitry Andric // Left with: subnormal 6870b57cec5SDimitry Andric // Adjust C and jump back to restart 6880b57cec5SDimitry Andric { 6890b57cec5SDimitry Andric allocframe(#STACKSPACE) // oops, deallocated above, re-allocate frame 6900b57cec5SDimitry Andric CTMP = #0 6910b57cec5SDimitry Andric CH = insert(TMP,#EXPBITS,#HI_MANTBITS) 6920b57cec5SDimitry Andric jump .Lfma_abnormal_c_restart 6930b57cec5SDimitry Andric } 6940b57cec5SDimitry AndricEND(fma) 695