1*0b57cec5SDimitry Andric//===----------------------Hexagon builtin routine ------------------------===// 2*0b57cec5SDimitry Andric// 3*0b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric// 7*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric 9*0b57cec5SDimitry Andric// Double Precision Multiply 10*0b57cec5SDimitry Andric#define A r1:0 11*0b57cec5SDimitry Andric#define AH r1 12*0b57cec5SDimitry Andric#define AL r0 13*0b57cec5SDimitry Andric#define B r3:2 14*0b57cec5SDimitry Andric#define BH r3 15*0b57cec5SDimitry Andric#define BL r2 16*0b57cec5SDimitry Andric 17*0b57cec5SDimitry Andric#define BTMP r5:4 18*0b57cec5SDimitry Andric#define BTMPH r5 19*0b57cec5SDimitry Andric#define BTMPL r4 20*0b57cec5SDimitry Andric 21*0b57cec5SDimitry Andric#define PP_ODD r7:6 22*0b57cec5SDimitry Andric#define PP_ODD_H r7 23*0b57cec5SDimitry Andric#define PP_ODD_L r6 24*0b57cec5SDimitry Andric 25*0b57cec5SDimitry Andric#define ONE r9:8 26*0b57cec5SDimitry Andric#define S_ONE r8 27*0b57cec5SDimitry Andric#define S_ZERO r9 28*0b57cec5SDimitry Andric 29*0b57cec5SDimitry Andric#define PP_HH r11:10 30*0b57cec5SDimitry Andric#define PP_HH_H r11 31*0b57cec5SDimitry Andric#define PP_HH_L r10 32*0b57cec5SDimitry Andric 33*0b57cec5SDimitry Andric#define ATMP r13:12 34*0b57cec5SDimitry Andric#define ATMPH r13 35*0b57cec5SDimitry Andric#define ATMPL r12 36*0b57cec5SDimitry Andric 37*0b57cec5SDimitry Andric#define PP_LL r15:14 38*0b57cec5SDimitry Andric#define PP_LL_H r15 39*0b57cec5SDimitry Andric#define PP_LL_L r14 40*0b57cec5SDimitry Andric 41*0b57cec5SDimitry Andric#define TMP r28 42*0b57cec5SDimitry Andric 43*0b57cec5SDimitry Andric#define MANTBITS 52 44*0b57cec5SDimitry Andric#define HI_MANTBITS 20 45*0b57cec5SDimitry Andric#define EXPBITS 11 46*0b57cec5SDimitry Andric#define BIAS 1024 47*0b57cec5SDimitry Andric#define MANTISSA_TO_INT_BIAS 52 48*0b57cec5SDimitry Andric 49*0b57cec5SDimitry Andric// Some constant to adjust normalization amount in error code 50*0b57cec5SDimitry Andric// Amount to right shift the partial product to get to a denorm 51*0b57cec5SDimitry Andric#define FUDGE 5 52*0b57cec5SDimitry Andric 53*0b57cec5SDimitry Andric#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG 54*0b57cec5SDimitry Andric#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG 55*0b57cec5SDimitry Andric#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG 56*0b57cec5SDimitry Andric#define END(TAG) .size TAG,.-TAG 57*0b57cec5SDimitry Andric 58*0b57cec5SDimitry Andric#define SR_ROUND_OFF 22 59*0b57cec5SDimitry Andric .text 60*0b57cec5SDimitry Andric .global __hexagon_muldf3 61*0b57cec5SDimitry Andric .type __hexagon_muldf3,@function 62*0b57cec5SDimitry Andric Q6_ALIAS(muldf3) 63*0b57cec5SDimitry Andric FAST_ALIAS(muldf3) 64*0b57cec5SDimitry Andric FAST2_ALIAS(muldf3) 65*0b57cec5SDimitry Andric .p2align 5 66*0b57cec5SDimitry Andric__hexagon_muldf3: 67*0b57cec5SDimitry Andric { 68*0b57cec5SDimitry Andric p0 = dfclass(A,#2) 69*0b57cec5SDimitry Andric p0 = dfclass(B,#2) 70*0b57cec5SDimitry Andric ATMP = combine(##0x40000000,#0) 71*0b57cec5SDimitry Andric } 72*0b57cec5SDimitry Andric { 73*0b57cec5SDimitry Andric ATMP = insert(A,#MANTBITS,#EXPBITS-1) 74*0b57cec5SDimitry Andric BTMP = asl(B,#EXPBITS-1) 75*0b57cec5SDimitry Andric TMP = #-BIAS 76*0b57cec5SDimitry Andric ONE = #1 77*0b57cec5SDimitry Andric } 78*0b57cec5SDimitry Andric { 79*0b57cec5SDimitry Andric PP_ODD = mpyu(BTMPL,ATMPH) 80*0b57cec5SDimitry Andric BTMP = insert(ONE,#2,#62) 81*0b57cec5SDimitry Andric } 82*0b57cec5SDimitry Andric // since we know that the MSB of the H registers is zero, we should never carry 83*0b57cec5SDimitry Andric // H <= 2^31-1. L <= 2^32-1. Therefore, HL <= 2^63-2^32-2^31+1 84*0b57cec5SDimitry Andric // Adding 2 HLs, we get 2^64-3*2^32+2 maximum. 85*0b57cec5SDimitry Andric // Therefore, we can add 3 2^32-1 values safely without carry. We only need one. 86*0b57cec5SDimitry Andric { 87*0b57cec5SDimitry Andric PP_LL = mpyu(ATMPL,BTMPL) 88*0b57cec5SDimitry Andric PP_ODD += mpyu(ATMPL,BTMPH) 89*0b57cec5SDimitry Andric } 90*0b57cec5SDimitry Andric { 91*0b57cec5SDimitry Andric PP_ODD += lsr(PP_LL,#32) 92*0b57cec5SDimitry Andric PP_HH = mpyu(ATMPH,BTMPH) 93*0b57cec5SDimitry Andric BTMP = combine(##BIAS+BIAS-4,#0) 94*0b57cec5SDimitry Andric } 95*0b57cec5SDimitry Andric { 96*0b57cec5SDimitry Andric PP_HH += lsr(PP_ODD,#32) 97*0b57cec5SDimitry Andric if (!p0) jump .Lmul_abnormal 98*0b57cec5SDimitry Andric p1 = cmp.eq(PP_LL_L,#0) // 64 lsb's 0? 99*0b57cec5SDimitry Andric p1 = cmp.eq(PP_ODD_L,#0) // 64 lsb's 0? 100*0b57cec5SDimitry Andric } 101*0b57cec5SDimitry Andric 102*0b57cec5SDimitry Andric // PP_HH can have a maximum of 0x3FFF_FFFF_FFFF_FFFF or thereabouts 103*0b57cec5SDimitry Andric // PP_HH can have a minimum of 0x1000_0000_0000_0000 or so 104*0b57cec5SDimitry Andric 105*0b57cec5SDimitry Andric#undef PP_ODD 106*0b57cec5SDimitry Andric#undef PP_ODD_H 107*0b57cec5SDimitry Andric#undef PP_ODD_L 108*0b57cec5SDimitry Andric#define EXP10 r7:6 109*0b57cec5SDimitry Andric#define EXP1 r7 110*0b57cec5SDimitry Andric#define EXP0 r6 111*0b57cec5SDimitry Andric { 112*0b57cec5SDimitry Andric if (!p1) PP_HH_L = or(PP_HH_L,S_ONE) 113*0b57cec5SDimitry Andric EXP0 = extractu(AH,#EXPBITS,#HI_MANTBITS) 114*0b57cec5SDimitry Andric EXP1 = extractu(BH,#EXPBITS,#HI_MANTBITS) 115*0b57cec5SDimitry Andric } 116*0b57cec5SDimitry Andric { 117*0b57cec5SDimitry Andric PP_LL = neg(PP_HH) 118*0b57cec5SDimitry Andric EXP0 += add(TMP,EXP1) 119*0b57cec5SDimitry Andric TMP = xor(AH,BH) 120*0b57cec5SDimitry Andric } 121*0b57cec5SDimitry Andric { 122*0b57cec5SDimitry Andric if (!p2.new) PP_HH = PP_LL 123*0b57cec5SDimitry Andric p2 = cmp.gt(TMP,#-1) 124*0b57cec5SDimitry Andric p0 = !cmp.gt(EXP0,BTMPH) 125*0b57cec5SDimitry Andric p0 = cmp.gt(EXP0,BTMPL) 126*0b57cec5SDimitry Andric if (!p0.new) jump:nt .Lmul_ovf_unf 127*0b57cec5SDimitry Andric } 128*0b57cec5SDimitry Andric { 129*0b57cec5SDimitry Andric A = convert_d2df(PP_HH) 130*0b57cec5SDimitry Andric EXP0 = add(EXP0,#-BIAS-58) 131*0b57cec5SDimitry Andric } 132*0b57cec5SDimitry Andric { 133*0b57cec5SDimitry Andric AH += asl(EXP0,#HI_MANTBITS) 134*0b57cec5SDimitry Andric jumpr r31 135*0b57cec5SDimitry Andric } 136*0b57cec5SDimitry Andric 137*0b57cec5SDimitry Andric .falign 138*0b57cec5SDimitry Andric.Lpossible_unf: 139*0b57cec5SDimitry Andric // We end up with a positive exponent 140*0b57cec5SDimitry Andric // But we may have rounded up to an exponent of 1. 141*0b57cec5SDimitry Andric // If the exponent is 1, if we rounded up to it 142*0b57cec5SDimitry Andric // we need to also raise underflow 143*0b57cec5SDimitry Andric // Fortunately, this is pretty easy to detect, we must have +/- 0x0010_0000_0000_0000 144*0b57cec5SDimitry Andric // And the PP should also have more than one bit set 145*0b57cec5SDimitry Andric // 146*0b57cec5SDimitry Andric // Note: ATMP should have abs(PP_HH) 147*0b57cec5SDimitry Andric // Note: BTMPL should have 0x7FEFFFFF 148*0b57cec5SDimitry Andric { 149*0b57cec5SDimitry Andric p0 = cmp.eq(AL,#0) 150*0b57cec5SDimitry Andric p0 = bitsclr(AH,BTMPL) 151*0b57cec5SDimitry Andric if (!p0.new) jumpr:t r31 152*0b57cec5SDimitry Andric BTMPH = #0x7fff 153*0b57cec5SDimitry Andric } 154*0b57cec5SDimitry Andric { 155*0b57cec5SDimitry Andric p0 = bitsset(ATMPH,BTMPH) 156*0b57cec5SDimitry Andric BTMPL = USR 157*0b57cec5SDimitry Andric BTMPH = #0x030 158*0b57cec5SDimitry Andric } 159*0b57cec5SDimitry Andric { 160*0b57cec5SDimitry Andric if (p0) BTMPL = or(BTMPL,BTMPH) 161*0b57cec5SDimitry Andric } 162*0b57cec5SDimitry Andric { 163*0b57cec5SDimitry Andric USR = BTMPL 164*0b57cec5SDimitry Andric } 165*0b57cec5SDimitry Andric { 166*0b57cec5SDimitry Andric p0 = dfcmp.eq(A,A) 167*0b57cec5SDimitry Andric jumpr r31 168*0b57cec5SDimitry Andric } 169*0b57cec5SDimitry Andric .falign 170*0b57cec5SDimitry Andric.Lmul_ovf_unf: 171*0b57cec5SDimitry Andric { 172*0b57cec5SDimitry Andric A = convert_d2df(PP_HH) 173*0b57cec5SDimitry Andric ATMP = abs(PP_HH) // take absolute value 174*0b57cec5SDimitry Andric EXP1 = add(EXP0,#-BIAS-58) 175*0b57cec5SDimitry Andric } 176*0b57cec5SDimitry Andric { 177*0b57cec5SDimitry Andric AH += asl(EXP1,#HI_MANTBITS) 178*0b57cec5SDimitry Andric EXP1 = extractu(AH,#EXPBITS,#HI_MANTBITS) 179*0b57cec5SDimitry Andric BTMPL = ##0x7FEFFFFF 180*0b57cec5SDimitry Andric } 181*0b57cec5SDimitry Andric { 182*0b57cec5SDimitry Andric EXP1 += add(EXP0,##-BIAS-58) 183*0b57cec5SDimitry Andric //BTMPH = add(clb(ATMP),#-2) 184*0b57cec5SDimitry Andric BTMPH = #0 185*0b57cec5SDimitry Andric } 186*0b57cec5SDimitry Andric { 187*0b57cec5SDimitry Andric p0 = cmp.gt(EXP1,##BIAS+BIAS-2) // overflow 188*0b57cec5SDimitry Andric if (p0.new) jump:nt .Lmul_ovf 189*0b57cec5SDimitry Andric } 190*0b57cec5SDimitry Andric { 191*0b57cec5SDimitry Andric p0 = cmp.gt(EXP1,#0) 192*0b57cec5SDimitry Andric if (p0.new) jump:nt .Lpossible_unf 193*0b57cec5SDimitry Andric BTMPH = sub(EXP0,BTMPH) 194*0b57cec5SDimitry Andric TMP = #63 // max amount to shift 195*0b57cec5SDimitry Andric } 196*0b57cec5SDimitry Andric // Underflow 197*0b57cec5SDimitry Andric // 198*0b57cec5SDimitry Andric // PP_HH has the partial product with sticky LSB. 199*0b57cec5SDimitry Andric // PP_HH can have a maximum of 0x3FFF_FFFF_FFFF_FFFF or thereabouts 200*0b57cec5SDimitry Andric // PP_HH can have a minimum of 0x1000_0000_0000_0000 or so 201*0b57cec5SDimitry Andric // The exponent of PP_HH is in EXP1, which is non-positive (0 or negative) 202*0b57cec5SDimitry Andric // That's the exponent that happens after the normalization 203*0b57cec5SDimitry Andric // 204*0b57cec5SDimitry Andric // EXP0 has the exponent that, when added to the normalized value, is out of range. 205*0b57cec5SDimitry Andric // 206*0b57cec5SDimitry Andric // Strategy: 207*0b57cec5SDimitry Andric // 208*0b57cec5SDimitry Andric // * Shift down bits, with sticky bit, such that the bits are aligned according 209*0b57cec5SDimitry Andric // to the LZ count and appropriate exponent, but not all the way to mantissa 210*0b57cec5SDimitry Andric // field, keep around the last few bits. 211*0b57cec5SDimitry Andric // * Put a 1 near the MSB 212*0b57cec5SDimitry Andric // * Check the LSBs for inexact; if inexact also set underflow 213*0b57cec5SDimitry Andric // * Convert [u]d2df -- will correctly round according to rounding mode 214*0b57cec5SDimitry Andric // * Replace exponent field with zero 215*0b57cec5SDimitry Andric 216*0b57cec5SDimitry Andric { 217*0b57cec5SDimitry Andric BTMPL = #0 // offset for extract 218*0b57cec5SDimitry Andric BTMPH = sub(#FUDGE,BTMPH) // amount to right shift 219*0b57cec5SDimitry Andric } 220*0b57cec5SDimitry Andric { 221*0b57cec5SDimitry Andric p3 = cmp.gt(PP_HH_H,#-1) // is it positive? 222*0b57cec5SDimitry Andric BTMPH = min(BTMPH,TMP) // Don't shift more than 63 223*0b57cec5SDimitry Andric PP_HH = ATMP 224*0b57cec5SDimitry Andric } 225*0b57cec5SDimitry Andric { 226*0b57cec5SDimitry Andric TMP = USR 227*0b57cec5SDimitry Andric PP_LL = extractu(PP_HH,BTMP) 228*0b57cec5SDimitry Andric } 229*0b57cec5SDimitry Andric { 230*0b57cec5SDimitry Andric PP_HH = asr(PP_HH,BTMPH) 231*0b57cec5SDimitry Andric BTMPL = #0x0030 // underflow flag 232*0b57cec5SDimitry Andric AH = insert(S_ZERO,#EXPBITS,#HI_MANTBITS) 233*0b57cec5SDimitry Andric } 234*0b57cec5SDimitry Andric { 235*0b57cec5SDimitry Andric p0 = cmp.gtu(ONE,PP_LL) // Did we extract all zeros? 236*0b57cec5SDimitry Andric if (!p0.new) PP_HH_L = or(PP_HH_L,S_ONE) // add sticky bit 237*0b57cec5SDimitry Andric PP_HH_H = setbit(PP_HH_H,#HI_MANTBITS+3) // Add back in a bit so we can use convert instruction 238*0b57cec5SDimitry Andric } 239*0b57cec5SDimitry Andric { 240*0b57cec5SDimitry Andric PP_LL = neg(PP_HH) 241*0b57cec5SDimitry Andric p1 = bitsclr(PP_HH_L,#0x7) // Are the LSB's clear? 242*0b57cec5SDimitry Andric if (!p1.new) TMP = or(BTMPL,TMP) // If not, Inexact+Underflow 243*0b57cec5SDimitry Andric } 244*0b57cec5SDimitry Andric { 245*0b57cec5SDimitry Andric if (!p3) PP_HH = PP_LL 246*0b57cec5SDimitry Andric USR = TMP 247*0b57cec5SDimitry Andric } 248*0b57cec5SDimitry Andric { 249*0b57cec5SDimitry Andric A = convert_d2df(PP_HH) // Do rounding 250*0b57cec5SDimitry Andric p0 = dfcmp.eq(A,A) // realize exception 251*0b57cec5SDimitry Andric } 252*0b57cec5SDimitry Andric { 253*0b57cec5SDimitry Andric AH = insert(S_ZERO,#EXPBITS-1,#HI_MANTBITS+1) // Insert correct exponent 254*0b57cec5SDimitry Andric jumpr r31 255*0b57cec5SDimitry Andric } 256*0b57cec5SDimitry Andric .falign 257*0b57cec5SDimitry Andric.Lmul_ovf: 258*0b57cec5SDimitry Andric // We get either max finite value or infinity. Either way, overflow+inexact 259*0b57cec5SDimitry Andric { 260*0b57cec5SDimitry Andric TMP = USR 261*0b57cec5SDimitry Andric ATMP = combine(##0x7fefffff,#-1) // positive max finite 262*0b57cec5SDimitry Andric A = PP_HH 263*0b57cec5SDimitry Andric } 264*0b57cec5SDimitry Andric { 265*0b57cec5SDimitry Andric PP_LL_L = extractu(TMP,#2,#SR_ROUND_OFF) // rounding bits 266*0b57cec5SDimitry Andric TMP = or(TMP,#0x28) // inexact + overflow 267*0b57cec5SDimitry Andric BTMP = combine(##0x7ff00000,#0) // positive infinity 268*0b57cec5SDimitry Andric } 269*0b57cec5SDimitry Andric { 270*0b57cec5SDimitry Andric USR = TMP 271*0b57cec5SDimitry Andric PP_LL_L ^= lsr(AH,#31) // Does sign match rounding? 272*0b57cec5SDimitry Andric TMP = PP_LL_L // unmodified rounding mode 273*0b57cec5SDimitry Andric } 274*0b57cec5SDimitry Andric { 275*0b57cec5SDimitry Andric p0 = !cmp.eq(TMP,#1) // If not round-to-zero and 276*0b57cec5SDimitry Andric p0 = !cmp.eq(PP_LL_L,#2) // Not rounding the other way, 277*0b57cec5SDimitry Andric if (p0.new) ATMP = BTMP // we should get infinity 278*0b57cec5SDimitry Andric p0 = dfcmp.eq(A,A) // Realize FP exception if enabled 279*0b57cec5SDimitry Andric } 280*0b57cec5SDimitry Andric { 281*0b57cec5SDimitry Andric A = insert(ATMP,#63,#0) // insert inf/maxfinite, leave sign 282*0b57cec5SDimitry Andric jumpr r31 283*0b57cec5SDimitry Andric } 284*0b57cec5SDimitry Andric 285*0b57cec5SDimitry Andric.Lmul_abnormal: 286*0b57cec5SDimitry Andric { 287*0b57cec5SDimitry Andric ATMP = extractu(A,#63,#0) // strip off sign 288*0b57cec5SDimitry Andric BTMP = extractu(B,#63,#0) // strip off sign 289*0b57cec5SDimitry Andric } 290*0b57cec5SDimitry Andric { 291*0b57cec5SDimitry Andric p3 = cmp.gtu(ATMP,BTMP) 292*0b57cec5SDimitry Andric if (!p3.new) A = B // sort values 293*0b57cec5SDimitry Andric if (!p3.new) B = A // sort values 294*0b57cec5SDimitry Andric } 295*0b57cec5SDimitry Andric { 296*0b57cec5SDimitry Andric // Any NaN --> NaN, possibly raise invalid if sNaN 297*0b57cec5SDimitry Andric p0 = dfclass(A,#0x0f) // A not NaN? 298*0b57cec5SDimitry Andric if (!p0.new) jump:nt .Linvalid_nan 299*0b57cec5SDimitry Andric if (!p3) ATMP = BTMP 300*0b57cec5SDimitry Andric if (!p3) BTMP = ATMP 301*0b57cec5SDimitry Andric } 302*0b57cec5SDimitry Andric { 303*0b57cec5SDimitry Andric // Infinity * nonzero number is infinity 304*0b57cec5SDimitry Andric p1 = dfclass(A,#0x08) // A is infinity 305*0b57cec5SDimitry Andric p1 = dfclass(B,#0x0e) // B is nonzero 306*0b57cec5SDimitry Andric } 307*0b57cec5SDimitry Andric { 308*0b57cec5SDimitry Andric // Infinity * zero --> NaN, raise invalid 309*0b57cec5SDimitry Andric // Other zeros return zero 310*0b57cec5SDimitry Andric p0 = dfclass(A,#0x08) // A is infinity 311*0b57cec5SDimitry Andric p0 = dfclass(B,#0x01) // B is zero 312*0b57cec5SDimitry Andric } 313*0b57cec5SDimitry Andric { 314*0b57cec5SDimitry Andric if (p1) jump .Ltrue_inf 315*0b57cec5SDimitry Andric p2 = dfclass(B,#0x01) 316*0b57cec5SDimitry Andric } 317*0b57cec5SDimitry Andric { 318*0b57cec5SDimitry Andric if (p0) jump .Linvalid_zeroinf 319*0b57cec5SDimitry Andric if (p2) jump .Ltrue_zero // so return zero 320*0b57cec5SDimitry Andric TMP = ##0x7c000000 321*0b57cec5SDimitry Andric } 322*0b57cec5SDimitry Andric // We are left with a normal or subnormal times a subnormal. A > B 323*0b57cec5SDimitry Andric // If A and B are both very small (exp(a) < BIAS-MANTBITS), 324*0b57cec5SDimitry Andric // we go to a single sticky bit, which we can round easily. 325*0b57cec5SDimitry Andric // If A and B might multiply to something bigger, decrease A exponent and increase 326*0b57cec5SDimitry Andric // B exponent and try again 327*0b57cec5SDimitry Andric { 328*0b57cec5SDimitry Andric p0 = bitsclr(AH,TMP) 329*0b57cec5SDimitry Andric if (p0.new) jump:nt .Lmul_tiny 330*0b57cec5SDimitry Andric } 331*0b57cec5SDimitry Andric { 332*0b57cec5SDimitry Andric TMP = cl0(BTMP) 333*0b57cec5SDimitry Andric } 334*0b57cec5SDimitry Andric { 335*0b57cec5SDimitry Andric TMP = add(TMP,#-EXPBITS) 336*0b57cec5SDimitry Andric } 337*0b57cec5SDimitry Andric { 338*0b57cec5SDimitry Andric BTMP = asl(BTMP,TMP) 339*0b57cec5SDimitry Andric } 340*0b57cec5SDimitry Andric { 341*0b57cec5SDimitry Andric B = insert(BTMP,#63,#0) 342*0b57cec5SDimitry Andric AH -= asl(TMP,#HI_MANTBITS) 343*0b57cec5SDimitry Andric } 344*0b57cec5SDimitry Andric jump __hexagon_muldf3 345*0b57cec5SDimitry Andric.Lmul_tiny: 346*0b57cec5SDimitry Andric { 347*0b57cec5SDimitry Andric TMP = USR 348*0b57cec5SDimitry Andric A = xor(A,B) // get sign bit 349*0b57cec5SDimitry Andric } 350*0b57cec5SDimitry Andric { 351*0b57cec5SDimitry Andric TMP = or(TMP,#0x30) // Inexact + Underflow 352*0b57cec5SDimitry Andric A = insert(ONE,#63,#0) // put in rounded up value 353*0b57cec5SDimitry Andric BTMPH = extractu(TMP,#2,#SR_ROUND_OFF) // get rounding mode 354*0b57cec5SDimitry Andric } 355*0b57cec5SDimitry Andric { 356*0b57cec5SDimitry Andric USR = TMP 357*0b57cec5SDimitry Andric p0 = cmp.gt(BTMPH,#1) // Round towards pos/neg inf? 358*0b57cec5SDimitry Andric if (!p0.new) AL = #0 // If not, zero 359*0b57cec5SDimitry Andric BTMPH ^= lsr(AH,#31) // rounding my way --> set LSB 360*0b57cec5SDimitry Andric } 361*0b57cec5SDimitry Andric { 362*0b57cec5SDimitry Andric p0 = cmp.eq(BTMPH,#3) // if rounding towards right inf 363*0b57cec5SDimitry Andric if (!p0.new) AL = #0 // don't go to zero 364*0b57cec5SDimitry Andric jumpr r31 365*0b57cec5SDimitry Andric } 366*0b57cec5SDimitry Andric.Linvalid_zeroinf: 367*0b57cec5SDimitry Andric { 368*0b57cec5SDimitry Andric TMP = USR 369*0b57cec5SDimitry Andric } 370*0b57cec5SDimitry Andric { 371*0b57cec5SDimitry Andric A = #-1 372*0b57cec5SDimitry Andric TMP = or(TMP,#2) 373*0b57cec5SDimitry Andric } 374*0b57cec5SDimitry Andric { 375*0b57cec5SDimitry Andric USR = TMP 376*0b57cec5SDimitry Andric } 377*0b57cec5SDimitry Andric { 378*0b57cec5SDimitry Andric p0 = dfcmp.uo(A,A) // force exception if enabled 379*0b57cec5SDimitry Andric jumpr r31 380*0b57cec5SDimitry Andric } 381*0b57cec5SDimitry Andric.Linvalid_nan: 382*0b57cec5SDimitry Andric { 383*0b57cec5SDimitry Andric p0 = dfclass(B,#0x0f) // if B is not NaN 384*0b57cec5SDimitry Andric TMP = convert_df2sf(A) // will generate invalid if sNaN 385*0b57cec5SDimitry Andric if (p0.new) B = A // make it whatever A is 386*0b57cec5SDimitry Andric } 387*0b57cec5SDimitry Andric { 388*0b57cec5SDimitry Andric BL = convert_df2sf(B) // will generate invalid if sNaN 389*0b57cec5SDimitry Andric A = #-1 390*0b57cec5SDimitry Andric jumpr r31 391*0b57cec5SDimitry Andric } 392*0b57cec5SDimitry Andric .falign 393*0b57cec5SDimitry Andric.Ltrue_zero: 394*0b57cec5SDimitry Andric { 395*0b57cec5SDimitry Andric A = B 396*0b57cec5SDimitry Andric B = A 397*0b57cec5SDimitry Andric } 398*0b57cec5SDimitry Andric.Ltrue_inf: 399*0b57cec5SDimitry Andric { 400*0b57cec5SDimitry Andric BH = extract(BH,#1,#31) 401*0b57cec5SDimitry Andric } 402*0b57cec5SDimitry Andric { 403*0b57cec5SDimitry Andric AH ^= asl(BH,#31) 404*0b57cec5SDimitry Andric jumpr r31 405*0b57cec5SDimitry Andric } 406*0b57cec5SDimitry AndricEND(__hexagon_muldf3) 407*0b57cec5SDimitry Andric 408*0b57cec5SDimitry Andric#undef ATMP 409*0b57cec5SDimitry Andric#undef ATMPL 410*0b57cec5SDimitry Andric#undef ATMPH 411*0b57cec5SDimitry Andric#undef BTMP 412*0b57cec5SDimitry Andric#undef BTMPL 413*0b57cec5SDimitry Andric#undef BTMPH 414