1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+fma,-fma4 | FileCheck %s --check-prefix=CHECK 3 4define <4 x float> @test_mm_fmadd_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 5; CHECK-LABEL: test_mm_fmadd_ps: 6; CHECK: # %bb.0: # %entry 7; CHECK-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 8; CHECK-NEXT: retq 9entry: 10 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2 11 ret <4 x float> %0 12} 13 14define <2 x double> @test_mm_fmadd_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 15; CHECK-LABEL: test_mm_fmadd_pd: 16; CHECK: # %bb.0: # %entry 17; CHECK-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 18; CHECK-NEXT: retq 19entry: 20 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) #2 21 ret <2 x double> %0 22} 23 24define <4 x float> @test_mm_fmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 25; CHECK-LABEL: test_mm_fmadd_ss: 26; CHECK: # %bb.0: # %entry 27; CHECK-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 28; CHECK-NEXT: retq 29entry: 30 %0 = extractelement <4 x float> %a, i64 0 31 %1 = extractelement <4 x float> %b, i64 0 32 %2 = extractelement <4 x float> %c, i64 0 33 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #2 34 %4 = insertelement <4 x float> %a, float %3, i64 0 35 ret <4 x float> %4 36} 37 38define <2 x double> @test_mm_fmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 39; CHECK-LABEL: test_mm_fmadd_sd: 40; CHECK: # %bb.0: # %entry 41; CHECK-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 42; CHECK-NEXT: retq 43entry: 44 %0 = extractelement <2 x double> %a, i64 0 45 %1 = extractelement <2 x double> %b, i64 0 46 %2 = extractelement <2 x double> %c, i64 0 47 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #2 48 %4 = insertelement <2 x double> %a, double %3, i64 0 49 ret <2 x double> %4 50} 51 52define <4 x float> @test_mm_fmsub_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 53; CHECK-LABEL: test_mm_fmsub_ps: 54; CHECK: # %bb.0: # %entry 55; CHECK-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 56; CHECK-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 57; CHECK-NEXT: retq 58entry: 59 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 60 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i) #2 61 ret <4 x float> %0 62} 63 64define <2 x double> @test_mm_fmsub_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 65; CHECK-LABEL: test_mm_fmsub_pd: 66; CHECK: # %bb.0: # %entry 67; CHECK-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 68; CHECK-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 69; CHECK-NEXT: retq 70entry: 71 %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c 72 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %sub.i) #2 73 ret <2 x double> %0 74} 75 76define <4 x float> @test_mm_fmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 77; CHECK-LABEL: test_mm_fmsub_ss: 78; CHECK: # %bb.0: # %entry 79; CHECK-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 80; CHECK-NEXT: retq 81entry: 82 %0 = extractelement <4 x float> %a, i64 0 83 %1 = extractelement <4 x float> %b, i64 0 84 %.rhs.i = extractelement <4 x float> %c, i64 0 85 %2 = fsub float -0.000000e+00, %.rhs.i 86 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #2 87 %4 = insertelement <4 x float> %a, float %3, i64 0 88 ret <4 x float> %4 89} 90 91define <2 x double> @test_mm_fmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 92; CHECK-LABEL: test_mm_fmsub_sd: 93; CHECK: # %bb.0: # %entry 94; CHECK-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 95; CHECK-NEXT: retq 96entry: 97 %0 = extractelement <2 x double> %a, i64 0 98 %1 = extractelement <2 x double> %b, i64 0 99 %.rhs.i = extractelement <2 x double> %c, i64 0 100 %2 = fsub double -0.000000e+00, %.rhs.i 101 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #2 102 %4 = insertelement <2 x double> %a, double %3, i64 0 103 ret <2 x double> %4 104} 105 106define <4 x float> @test_mm_fnmadd_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 107; CHECK-LABEL: test_mm_fnmadd_ps: 108; CHECK: # %bb.0: # %entry 109; CHECK-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 110; CHECK-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 111; CHECK-NEXT: retq 112entry: 113 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a 114 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i, <4 x float> %b, <4 x float> %c) #2 115 ret <4 x float> %0 116} 117 118define <2 x double> @test_mm_fnmadd_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 119; CHECK-LABEL: test_mm_fnmadd_pd: 120; CHECK: # %bb.0: # %entry 121; CHECK-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 122; CHECK-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 123; CHECK-NEXT: retq 124entry: 125 %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a 126 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %sub.i, <2 x double> %b, <2 x double> %c) #2 127 ret <2 x double> %0 128} 129 130define <4 x float> @test_mm_fnmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 131; CHECK-LABEL: test_mm_fnmadd_ss: 132; CHECK: # %bb.0: # %entry 133; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 134; CHECK-NEXT: retq 135entry: 136 %0 = extractelement <4 x float> %a, i64 0 137 %.rhs.i = extractelement <4 x float> %b, i64 0 138 %1 = fsub float -0.000000e+00, %.rhs.i 139 %2 = extractelement <4 x float> %c, i64 0 140 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #2 141 %4 = insertelement <4 x float> %a, float %3, i64 0 142 ret <4 x float> %4 143} 144 145define <2 x double> @test_mm_fnmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 146; CHECK-LABEL: test_mm_fnmadd_sd: 147; CHECK: # %bb.0: # %entry 148; CHECK-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 149; CHECK-NEXT: retq 150entry: 151 %0 = extractelement <2 x double> %a, i64 0 152 %.rhs.i = extractelement <2 x double> %b, i64 0 153 %1 = fsub double -0.000000e+00, %.rhs.i 154 %2 = extractelement <2 x double> %c, i64 0 155 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #2 156 %4 = insertelement <2 x double> %a, double %3, i64 0 157 ret <2 x double> %4 158} 159 160define <4 x float> @test_mm_fnmsub_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 161; CHECK-LABEL: test_mm_fnmsub_ps: 162; CHECK: # %bb.0: # %entry 163; CHECK-NEXT: vbroadcastss {{.*#+}} xmm3 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 164; CHECK-NEXT: vxorps %xmm3, %xmm0, %xmm4 165; CHECK-NEXT: vxorps %xmm3, %xmm2, %xmm0 166; CHECK-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm4) + xmm0 167; CHECK-NEXT: retq 168entry: 169 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a 170 %sub1.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 171 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i, <4 x float> %b, <4 x float> %sub1.i) #2 172 ret <4 x float> %0 173} 174 175define <2 x double> @test_mm_fnmsub_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 176; CHECK-LABEL: test_mm_fnmsub_pd: 177; CHECK: # %bb.0: # %entry 178; CHECK-NEXT: vmovddup {{.*#+}} xmm3 = [-0.0E+0,-0.0E+0] 179; CHECK-NEXT: # xmm3 = mem[0,0] 180; CHECK-NEXT: vxorpd %xmm3, %xmm0, %xmm4 181; CHECK-NEXT: vxorpd %xmm3, %xmm2, %xmm0 182; CHECK-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm4) + xmm0 183; CHECK-NEXT: retq 184entry: 185 %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a 186 %sub1.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c 187 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %sub.i, <2 x double> %b, <2 x double> %sub1.i) #2 188 ret <2 x double> %0 189} 190 191define <4 x float> @test_mm_fnmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 192; CHECK-LABEL: test_mm_fnmsub_ss: 193; CHECK: # %bb.0: # %entry 194; CHECK-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 195; CHECK-NEXT: retq 196entry: 197 %0 = extractelement <4 x float> %a, i64 0 198 %.rhs.i = extractelement <4 x float> %b, i64 0 199 %1 = fsub float -0.000000e+00, %.rhs.i 200 %.rhs2.i = extractelement <4 x float> %c, i64 0 201 %2 = fsub float -0.000000e+00, %.rhs2.i 202 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #2 203 %4 = insertelement <4 x float> %a, float %3, i64 0 204 ret <4 x float> %4 205} 206 207define <2 x double> @test_mm_fnmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 208; CHECK-LABEL: test_mm_fnmsub_sd: 209; CHECK: # %bb.0: # %entry 210; CHECK-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 211; CHECK-NEXT: retq 212entry: 213 %0 = extractelement <2 x double> %a, i64 0 214 %.rhs.i = extractelement <2 x double> %b, i64 0 215 %1 = fsub double -0.000000e+00, %.rhs.i 216 %.rhs2.i = extractelement <2 x double> %c, i64 0 217 %2 = fsub double -0.000000e+00, %.rhs2.i 218 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #2 219 %4 = insertelement <2 x double> %a, double %3, i64 0 220 ret <2 x double> %4 221} 222 223define <4 x float> @test_mm_fmaddsub_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 224; CHECK-LABEL: test_mm_fmaddsub_ps: 225; CHECK: # %bb.0: # %entry 226; CHECK-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 227; CHECK-NEXT: retq 228entry: 229 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2 230 %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 231 %2 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %1) #2 232 %3 = shufflevector <4 x float> %2, <4 x float> %0, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 233 ret <4 x float> %3 234} 235 236define <2 x double> @test_mm_fmaddsub_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 237; CHECK-LABEL: test_mm_fmaddsub_pd: 238; CHECK: # %bb.0: # %entry 239; CHECK-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 240; CHECK-NEXT: retq 241entry: 242 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) #2 243 %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c 244 %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %1) #2 245 %3 = shufflevector <2 x double> %2, <2 x double> %0, <2 x i32> <i32 0, i32 3> 246 ret <2 x double> %3 247} 248 249define <4 x float> @test_mm_fmsubadd_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 250; CHECK-LABEL: test_mm_fmsubadd_ps: 251; CHECK: # %bb.0: # %entry 252; CHECK-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 253; CHECK-NEXT: retq 254entry: 255 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 256 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i) #2 257 %1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2 258 %2 = shufflevector <4 x float> %1, <4 x float> %0, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 259 ret <4 x float> %2 260} 261 262define <2 x double> @test_mm_fmsubadd_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 263; CHECK-LABEL: test_mm_fmsubadd_pd: 264; CHECK: # %bb.0: # %entry 265; CHECK-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 266; CHECK-NEXT: retq 267entry: 268 %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c 269 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %sub.i) #2 270 %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) #2 271 %2 = shufflevector <2 x double> %1, <2 x double> %0, <2 x i32> <i32 0, i32 3> 272 ret <2 x double> %2 273} 274 275define <8 x float> @test_mm256_fmadd_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) { 276; CHECK-LABEL: test_mm256_fmadd_ps: 277; CHECK: # %bb.0: # %entry 278; CHECK-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 279; CHECK-NEXT: retq 280entry: 281 %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2 282 ret <8 x float> %0 283} 284 285define <4 x double> @test_mm256_fmadd_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) { 286; CHECK-LABEL: test_mm256_fmadd_pd: 287; CHECK: # %bb.0: # %entry 288; CHECK-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 289; CHECK-NEXT: retq 290entry: 291 %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) #2 292 ret <4 x double> %0 293} 294 295define <8 x float> @test_mm256_fmsub_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) { 296; CHECK-LABEL: test_mm256_fmsub_ps: 297; CHECK: # %bb.0: # %entry 298; CHECK-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 299; CHECK-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 300; CHECK-NEXT: retq 301entry: 302 %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 303 %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %sub.i) #2 304 ret <8 x float> %0 305} 306 307define <4 x double> @test_mm256_fmsub_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) { 308; CHECK-LABEL: test_mm256_fmsub_pd: 309; CHECK: # %bb.0: # %entry 310; CHECK-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 311; CHECK-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 312; CHECK-NEXT: retq 313entry: 314 %sub.i = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %c 315 %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %sub.i) #2 316 ret <4 x double> %0 317} 318 319define <8 x float> @test_mm256_fnmadd_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) { 320; CHECK-LABEL: test_mm256_fnmadd_ps: 321; CHECK: # %bb.0: # %entry 322; CHECK-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 323; CHECK-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 324; CHECK-NEXT: retq 325entry: 326 %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a 327 %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %sub.i, <8 x float> %b, <8 x float> %c) #2 328 ret <8 x float> %0 329} 330 331define <4 x double> @test_mm256_fnmadd_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) { 332; CHECK-LABEL: test_mm256_fnmadd_pd: 333; CHECK: # %bb.0: # %entry 334; CHECK-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 335; CHECK-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 336; CHECK-NEXT: retq 337entry: 338 %sub.i = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a 339 %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %sub.i, <4 x double> %b, <4 x double> %c) #2 340 ret <4 x double> %0 341} 342 343define <8 x float> @test_mm256_fnmsub_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) { 344; CHECK-LABEL: test_mm256_fnmsub_ps: 345; CHECK: # %bb.0: # %entry 346; CHECK-NEXT: vbroadcastss {{.*#+}} ymm3 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 347; CHECK-NEXT: vxorps %ymm3, %ymm0, %ymm4 348; CHECK-NEXT: vxorps %ymm3, %ymm2, %ymm0 349; CHECK-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm4) + ymm0 350; CHECK-NEXT: retq 351entry: 352 %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a 353 %sub1.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 354 %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %sub.i, <8 x float> %b, <8 x float> %sub1.i) #2 355 ret <8 x float> %0 356} 357 358define <4 x double> @test_mm256_fnmsub_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) { 359; CHECK-LABEL: test_mm256_fnmsub_pd: 360; CHECK: # %bb.0: # %entry 361; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm3 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 362; CHECK-NEXT: vxorpd %ymm3, %ymm0, %ymm4 363; CHECK-NEXT: vxorpd %ymm3, %ymm2, %ymm0 364; CHECK-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm4) + ymm0 365; CHECK-NEXT: retq 366entry: 367 %sub.i = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a 368 %sub1.i = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %c 369 %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %sub.i, <4 x double> %b, <4 x double> %sub1.i) #2 370 ret <4 x double> %0 371} 372 373define <8 x float> @test_mm256_fmaddsub_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) { 374; CHECK-LABEL: test_mm256_fmaddsub_ps: 375; CHECK: # %bb.0: # %entry 376; CHECK-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 377; CHECK-NEXT: retq 378entry: 379 %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2 380 %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 381 %2 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %1) #2 382 %3 = shufflevector <8 x float> %2, <8 x float> %0, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 383 ret <8 x float> %3 384} 385 386define <4 x double> @test_mm256_fmaddsub_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) { 387; CHECK-LABEL: test_mm256_fmaddsub_pd: 388; CHECK: # %bb.0: # %entry 389; CHECK-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 390; CHECK-NEXT: retq 391entry: 392 %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) #2 393 %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %c 394 %2 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %1) #2 395 %3 = shufflevector <4 x double> %2, <4 x double> %0, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 396 ret <4 x double> %3 397} 398 399define <8 x float> @test_mm256_fmsubadd_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) { 400; CHECK-LABEL: test_mm256_fmsubadd_ps: 401; CHECK: # %bb.0: # %entry 402; CHECK-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 403; CHECK-NEXT: retq 404entry: 405 %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 406 %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %sub.i) #2 407 %1 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2 408 %2 = shufflevector <8 x float> %1, <8 x float> %0, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 409 ret <8 x float> %2 410} 411 412define <4 x double> @test_mm256_fmsubadd_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) { 413; CHECK-LABEL: test_mm256_fmsubadd_pd: 414; CHECK: # %bb.0: # %entry 415; CHECK-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 416; CHECK-NEXT: retq 417entry: 418 %sub.i = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %c 419 %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %sub.i) #2 420 %1 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) #2 421 %2 = shufflevector <4 x double> %1, <4 x double> %0, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 422 ret <4 x double> %2 423} 424 425declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #1 426declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #1 427declare float @llvm.fma.f32(float, float, float) #1 428declare double @llvm.fma.f64(double, double, double) #1 429declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) #1 430declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) #1 431