1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s 3 4declare float @llvm.pow.f32(float, float) 5declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>) 6 7declare double @llvm.pow.f64(double, double) 8declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) 9 10declare x86_fp80 @llvm.pow.f80(x86_fp80, x86_fp80) 11 12define float @pow_f32_one_fourth_fmf_ieee(float %x) nounwind { 13; CHECK-LABEL: pow_f32_one_fourth_fmf_ieee: 14; CHECK: # %bb.0: 15; CHECK-NEXT: sqrtss %xmm0, %xmm0 16; CHECK-NEXT: sqrtss %xmm0, %xmm0 17; CHECK-NEXT: retq 18 %r = call nsz ninf afn float @llvm.pow.f32(float %x, float 2.5e-01) 19 ret float %r 20} 21 22define float @pow_f32_one_fourth_fmf_daz(float %x) #0 { 23; CHECK-LABEL: pow_f32_one_fourth_fmf_daz: 24; CHECK: # %bb.0: 25; CHECK-NEXT: sqrtss %xmm0, %xmm0 26; CHECK-NEXT: sqrtss %xmm0, %xmm0 27; CHECK-NEXT: retq 28 %r = call nsz ninf afn float @llvm.pow.f32(float %x, float 2.5e-01) 29 ret float %r 30} 31 32define double @pow_f64_one_fourth_fmf(double %x) nounwind { 33; CHECK-LABEL: pow_f64_one_fourth_fmf: 34; CHECK: # %bb.0: 35; CHECK-NEXT: sqrtsd %xmm0, %xmm0 36; CHECK-NEXT: sqrtsd %xmm0, %xmm0 37; CHECK-NEXT: retq 38 %r = call nsz ninf afn double @llvm.pow.f64(double %x, double 2.5e-01) 39 ret double %r 40} 41 42define <4 x float> @pow_v4f32_one_fourth_fmf(<4 x float> %x) nounwind { 43; CHECK-LABEL: pow_v4f32_one_fourth_fmf: 44; CHECK: # %bb.0: 45; CHECK-NEXT: rsqrtps %xmm0, %xmm1 46; CHECK-NEXT: movaps %xmm0, %xmm2 47; CHECK-NEXT: mulps %xmm1, %xmm2 48; CHECK-NEXT: movaps {{.*#+}} xmm3 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] 49; CHECK-NEXT: movaps %xmm2, %xmm4 50; CHECK-NEXT: mulps %xmm3, %xmm4 51; CHECK-NEXT: mulps %xmm1, %xmm2 52; CHECK-NEXT: movaps {{.*#+}} xmm5 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0] 53; CHECK-NEXT: addps %xmm5, %xmm2 54; CHECK-NEXT: mulps %xmm4, %xmm2 55; CHECK-NEXT: movaps {{.*#+}} xmm4 = [NaN,NaN,NaN,NaN] 56; CHECK-NEXT: andps %xmm4, %xmm0 57; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38] 58; CHECK-NEXT: movaps %xmm1, %xmm6 59; CHECK-NEXT: cmpleps %xmm0, %xmm6 60; CHECK-NEXT: andps %xmm2, %xmm6 61; CHECK-NEXT: rsqrtps %xmm6, %xmm0 62; CHECK-NEXT: movaps %xmm6, %xmm2 63; CHECK-NEXT: mulps %xmm0, %xmm2 64; CHECK-NEXT: mulps %xmm2, %xmm3 65; CHECK-NEXT: mulps %xmm0, %xmm2 66; CHECK-NEXT: addps %xmm5, %xmm2 67; CHECK-NEXT: mulps %xmm3, %xmm2 68; CHECK-NEXT: andps %xmm4, %xmm6 69; CHECK-NEXT: cmpleps %xmm6, %xmm1 70; CHECK-NEXT: andps %xmm2, %xmm1 71; CHECK-NEXT: movaps %xmm1, %xmm0 72; CHECK-NEXT: retq 73 %r = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> <float 2.5e-1, float 2.5e-1, float 2.5e-01, float 2.5e-01>) 74 ret <4 x float> %r 75} 76 77define <2 x double> @pow_v2f64_one_fourth_fmf(<2 x double> %x) nounwind { 78; CHECK-LABEL: pow_v2f64_one_fourth_fmf: 79; CHECK: # %bb.0: 80; CHECK-NEXT: sqrtpd %xmm0, %xmm0 81; CHECK-NEXT: sqrtpd %xmm0, %xmm0 82; CHECK-NEXT: retq 83 %r = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double 2.5e-1, double 2.5e-1>) 84 ret <2 x double> %r 85} 86 87define float @pow_f32_one_fourth_not_enough_fmf(float %x) nounwind { 88; CHECK-LABEL: pow_f32_one_fourth_not_enough_fmf: 89; CHECK: # %bb.0: 90; CHECK-NEXT: movss {{.*#+}} xmm1 = [2.5E-1,0.0E+0,0.0E+0,0.0E+0] 91; CHECK-NEXT: jmp powf@PLT # TAILCALL 92 %r = call afn ninf float @llvm.pow.f32(float %x, float 2.5e-01) 93 ret float %r 94} 95 96define double @pow_f64_one_fourth_not_enough_fmf(double %x) nounwind { 97; CHECK-LABEL: pow_f64_one_fourth_not_enough_fmf: 98; CHECK: # %bb.0: 99; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.5E-1,0.0E+0] 100; CHECK-NEXT: jmp pow@PLT # TAILCALL 101 %r = call nsz ninf double @llvm.pow.f64(double %x, double 2.5e-01) 102 ret double %r 103} 104 105define <4 x float> @pow_v4f32_one_fourth_not_enough_fmf(<4 x float> %x) nounwind { 106; CHECK-LABEL: pow_v4f32_one_fourth_not_enough_fmf: 107; CHECK: # %bb.0: 108; CHECK-NEXT: subq $56, %rsp 109; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 110; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 111; CHECK-NEXT: movss {{.*#+}} xmm1 = [2.5E-1,0.0E+0,0.0E+0,0.0E+0] 112; CHECK-NEXT: callq powf@PLT 113; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 114; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 115; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 116; CHECK-NEXT: movss {{.*#+}} xmm1 = [2.5E-1,0.0E+0,0.0E+0,0.0E+0] 117; CHECK-NEXT: callq powf@PLT 118; CHECK-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload 119; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 120; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 121; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 122; CHECK-NEXT: movss {{.*#+}} xmm1 = [2.5E-1,0.0E+0,0.0E+0,0.0E+0] 123; CHECK-NEXT: callq powf@PLT 124; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 125; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 126; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 127; CHECK-NEXT: movss {{.*#+}} xmm1 = [2.5E-1,0.0E+0,0.0E+0,0.0E+0] 128; CHECK-NEXT: callq powf@PLT 129; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 130; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 131; CHECK-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload 132; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 133; CHECK-NEXT: movaps %xmm1, %xmm0 134; CHECK-NEXT: addq $56, %rsp 135; CHECK-NEXT: retq 136 %r = call afn nsz <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> <float 2.5e-1, float 2.5e-1, float 2.5e-01, float 2.5e-01>) 137 ret <4 x float> %r 138} 139 140define <2 x double> @pow_v2f64_one_fourth_not_enough_fmf(<2 x double> %x) nounwind { 141; CHECK-LABEL: pow_v2f64_one_fourth_not_enough_fmf: 142; CHECK: # %bb.0: 143; CHECK-NEXT: subq $40, %rsp 144; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 145; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.5E-1,0.0E+0] 146; CHECK-NEXT: callq pow@PLT 147; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 148; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 149; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 150; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.5E-1,0.0E+0] 151; CHECK-NEXT: callq pow@PLT 152; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 153; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 154; CHECK-NEXT: movaps %xmm1, %xmm0 155; CHECK-NEXT: addq $40, %rsp 156; CHECK-NEXT: retq 157 %r = call nsz nnan reassoc <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double 2.5e-1, double 2.5e-1>) 158 ret <2 x double> %r 159} 160 161define float @pow_f32_one_third_fmf(float %x) nounwind { 162; CHECK-LABEL: pow_f32_one_third_fmf: 163; CHECK: # %bb.0: 164; CHECK-NEXT: jmp cbrtf@PLT # TAILCALL 165 %one = uitofp i32 1 to float 166 %three = uitofp i32 3 to float 167 %exp = fdiv float %one, %three 168 %r = call nsz nnan ninf afn float @llvm.pow.f32(float %x, float %exp) 169 ret float %r 170} 171 172define double @pow_f64_one_third_fmf(double %x) nounwind { 173; CHECK-LABEL: pow_f64_one_third_fmf: 174; CHECK: # %bb.0: 175; CHECK-NEXT: jmp cbrt@PLT # TAILCALL 176 %one = uitofp i32 1 to double 177 %three = uitofp i32 3 to double 178 %exp = fdiv double %one, %three 179 %r = call nsz nnan ninf afn double @llvm.pow.f64(double %x, double %exp) 180 ret double %r 181} 182 183; TODO: We could turn this into cbrtl, but currently we only handle float/double types. 184 185define x86_fp80 @pow_f80_one_third_fmf(x86_fp80 %x) nounwind { 186; CHECK-LABEL: pow_f80_one_third_fmf: 187; CHECK: # %bb.0: 188; CHECK-NEXT: subq $40, %rsp 189; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) 190; CHECK-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) 191; CHECK-NEXT: fstpt {{[0-9]+}}(%rsp) 192; CHECK-NEXT: fstpt (%rsp) 193; CHECK-NEXT: callq powl@PLT 194; CHECK-NEXT: addq $40, %rsp 195; CHECK-NEXT: retq 196 %one = uitofp i32 1 to x86_fp80 197 %three = uitofp i32 3 to x86_fp80 198 %exp = fdiv x86_fp80 %one, %three 199 %r = call nsz nnan ninf afn x86_fp80 @llvm.pow.f80(x86_fp80 %x, x86_fp80 %exp) 200 ret x86_fp80 %r 201} 202 203; We might want to allow this. The exact hex value for 1/3 as a double is 0x3fd5555555555555. 204 205define double @pow_f64_not_exactly_one_third_fmf(double %x) nounwind { 206; CHECK-LABEL: pow_f64_not_exactly_one_third_fmf: 207; CHECK: # %bb.0: 208; CHECK-NEXT: movsd {{.*#+}} xmm1 = [3.3333333333333337E-1,0.0E+0] 209; CHECK-NEXT: jmp pow@PLT # TAILCALL 210 %r = call nsz nnan ninf afn double @llvm.pow.f64(double %x, double 0x3fd5555555555556) 211 ret double %r 212} 213 214; We require all 4 of nsz, ninf, nnan, afn. 215 216define double @pow_f64_not_enough_fmf(double %x) nounwind { 217; CHECK-LABEL: pow_f64_not_enough_fmf: 218; CHECK: # %bb.0: 219; CHECK-NEXT: movsd {{.*#+}} xmm1 = [3.3333333333333331E-1,0.0E+0] 220; CHECK-NEXT: jmp pow@PLT # TAILCALL 221 %r = call nsz ninf afn double @llvm.pow.f64(double %x, double 0x3fd5555555555555) 222 ret double %r 223} 224 225attributes #0 = { nounwind "denormal-fp-math"="ieee,preserve-sign" } 226