1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512bw -mattr=+avx512vl -mattr=+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=SKX 3; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f -mattr=+fma | FileCheck %s --check-prefix=CHECK --check-prefix=KNL 4 5; This test checks combinations of FNEG and FMA intrinsics on AVX-512 target 6; PR28892 7 8declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) 9declare <16 x float> @llvm.fma.v16f32(<16 x float>, <16 x float>, <16 x float>) 10declare <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i32) 11declare <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 12declare <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 13declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) 14declare <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i32) 15declare <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8, i32) 16declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) 17declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) 18declare <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i32) 19declare <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i32) 20 21define <16 x float> @test1(<16 x float> %a, <16 x float> %b, <16 x float> %c) { 22; CHECK-LABEL: test1: 23; CHECK: # %bb.0: 24; CHECK-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2 25; CHECK-NEXT: retq 26 %sub.i = fneg <16 x float> %c 27 %t0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %sub.i, i32 4) 28 ret <16 x float> %t0 29} 30 31define <16 x float> @test2(<16 x float> %a, <16 x float> %b, <16 x float> %c) { 32; SKX-LABEL: test2: 33; SKX: # %bb.0: 34; SKX-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 35; SKX-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 36; SKX-NEXT: retq 37; 38; KNL-LABEL: test2: 39; KNL: # %bb.0: 40; KNL-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 41; KNL-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 42; KNL-NEXT: retq 43 %fma = call <16 x float> @llvm.fma.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c) 44 %neg = fneg <16 x float> %fma 45 ret <16 x float> %neg 46} 47 48define <16 x float> @test2_nsz(<16 x float> %a, <16 x float> %b, <16 x float> %c) { 49; CHECK-LABEL: test2_nsz: 50; CHECK: # %bb.0: 51; CHECK-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 52; CHECK-NEXT: retq 53 %fma = call nsz <16 x float> @llvm.fma.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c) 54 %neg = fneg <16 x float> %fma 55 ret <16 x float> %neg 56} 57 58define <16 x float> @test3(<16 x float> %a, <16 x float> %b, <16 x float> %c) { 59; SKX-LABEL: test3: 60; SKX: # %bb.0: 61; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2 62; SKX-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 63; SKX-NEXT: retq 64; 65; KNL-LABEL: test3: 66; KNL: # %bb.0: 67; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2 68; KNL-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 69; KNL-NEXT: retq 70 %t0 = fneg <16 x float> %b 71 %t1 = call <16 x float> @llvm.fma.v16f32(<16 x float> %a, <16 x float> %t0, <16 x float> %c) 72 %sub.i = fneg <16 x float> %t1 73 ret <16 x float> %sub.i 74} 75 76define <16 x float> @test3_nsz(<16 x float> %a, <16 x float> %b, <16 x float> %c) { 77; CHECK-LABEL: test3_nsz: 78; CHECK: # %bb.0: 79; CHECK-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2 80; CHECK-NEXT: retq 81 %t0 = fneg <16 x float> %b 82 %t1 = call nsz <16 x float> @llvm.fma.v16f32(<16 x float> %a, <16 x float> %t0, <16 x float> %c) 83 %sub.i = fneg <16 x float> %t1 84 ret <16 x float> %sub.i 85} 86 87define <16 x float> @test4(<16 x float> %a, <16 x float> %b, <16 x float> %c) { 88; SKX-LABEL: test4: 89; SKX: # %bb.0: 90; SKX-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 91; SKX-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 92; SKX-NEXT: retq 93; 94; KNL-LABEL: test4: 95; KNL: # %bb.0: 96; KNL-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 97; KNL-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 98; KNL-NEXT: retq 99 %t0 = fneg <16 x float> %b 100 %t1 = fneg <16 x float> %c 101 %t2 = call <16 x float> @llvm.fma.v16f32(<16 x float> %a, <16 x float> %t0, <16 x float> %t1) 102 %sub.i = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %t2 103 ret <16 x float> %sub.i 104} 105 106define <16 x float> @test4_nsz(<16 x float> %a, <16 x float> %b, <16 x float> %c) { 107; CHECK-LABEL: test4_nsz: 108; CHECK: # %bb.0: 109; CHECK-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 110; CHECK-NEXT: retq 111 %t0 = fneg <16 x float> %b 112 %t1 = fneg <16 x float> %c 113 %t2 = call nsz <16 x float> @llvm.fma.v16f32(<16 x float> %a, <16 x float> %t0, <16 x float> %t1) 114 %sub.i = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %t2 115 ret <16 x float> %sub.i 116} 117 118define <16 x float> @test5(<16 x float> %a, <16 x float> %b, <16 x float> %c) { 119; CHECK-LABEL: test5: 120; CHECK: # %bb.0: # %entry 121; CHECK-NEXT: vfmsub213ps {ru-sae}, %zmm2, %zmm1, %zmm0 122; CHECK-NEXT: retq 123entry: 124 %sub.i = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %c 125 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %sub.i, i32 10) #2 126 ret <16 x float> %0 127} 128 129define <16 x float> @test6(<16 x float> %a, <16 x float> %b, <16 x float> %c) { 130; SKX-LABEL: test6: 131; SKX: # %bb.0: 132; SKX-NEXT: vfnmsub213ps {ru-sae}, %zmm2, %zmm1, %zmm0 133; SKX-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 134; SKX-NEXT: retq 135; 136; KNL-LABEL: test6: 137; KNL: # %bb.0: 138; KNL-NEXT: vfnmsub213ps {ru-sae}, %zmm2, %zmm1, %zmm0 139; KNL-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 140; KNL-NEXT: retq 141 %t0 = fneg <16 x float> %b 142 %t1 = fneg <16 x float> %c 143 %t2 = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %t0, <16 x float> %t1, i32 10) 144 %sub.i = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %t2 145 ret <16 x float> %sub.i 146} 147 148define <16 x float> @test6_nsz(<16 x float> %a, <16 x float> %b, <16 x float> %c) { 149; CHECK-LABEL: test6_nsz: 150; CHECK: # %bb.0: 151; CHECK-NEXT: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 152; CHECK-NEXT: retq 153 %t0 = fneg <16 x float> %b 154 %t1 = fneg <16 x float> %c 155 %t2 = call nsz <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %t0, <16 x float> %t1, i32 10) 156 %sub.i = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %t2 157 ret <16 x float> %sub.i 158} 159 160define <8 x float> @test7(<8 x float> %a, <8 x float> %b, <8 x float> %c) { 161; SKX-LABEL: test7: 162; SKX: # %bb.0: 163; SKX-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 164; SKX-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 165; SKX-NEXT: retq 166; 167; KNL-LABEL: test7: 168; KNL: # %bb.0: 169; KNL-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 170; KNL-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 171; KNL-NEXT: vxorps %ymm1, %ymm0, %ymm0 172; KNL-NEXT: retq 173 %t0 = fneg <8 x float> %c 174 %t1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %t0) 175 %sub.i = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %t1 176 ret <8 x float> %sub.i 177} 178 179define <8 x float> @test7_nsz(<8 x float> %a, <8 x float> %b, <8 x float> %c) { 180; CHECK-LABEL: test7_nsz: 181; CHECK: # %bb.0: 182; CHECK-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 183; CHECK-NEXT: retq 184 %t0 = fneg <8 x float> %c 185 %t1 = call nsz <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %t0) 186 %sub.i = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %t1 187 ret <8 x float> %sub.i 188} 189 190define <8 x float> @test8(<8 x float> %a, <8 x float> %b, <8 x float> %c) { 191; CHECK-LABEL: test8: 192; CHECK: # %bb.0: # %entry 193; CHECK-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 194; CHECK-NEXT: retq 195entry: 196 %sub.c = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %c 197 %0 = tail call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %sub.c) #2 198 ret <8 x float> %0 199} 200 201define <8 x double> @test9(<8 x double> %a, <8 x double> %b, <8 x double> %c) { 202; SKX-LABEL: test9: 203; SKX: # %bb.0: 204; SKX-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 205; SKX-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 206; SKX-NEXT: retq 207; 208; KNL-LABEL: test9: 209; KNL: # %bb.0: 210; KNL-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 211; KNL-NEXT: vpxorq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 212; KNL-NEXT: retq 213 %t0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i32 4) 214 %sub.i = fneg <8 x double> %t0 215 ret <8 x double> %sub.i 216} 217 218define <8 x double> @test9_nsz(<8 x double> %a, <8 x double> %b, <8 x double> %c) { 219; CHECK-LABEL: test9_nsz: 220; CHECK: # %bb.0: 221; CHECK-NEXT: vfnmsub213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 222; CHECK-NEXT: retq 223 %t0 = tail call nsz <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i32 4) 224 %sub.i = fneg <8 x double> %t0 225 ret <8 x double> %sub.i 226} 227 228define <2 x double> @test10(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 229; SKX-LABEL: test10: 230; SKX: # %bb.0: # %entry 231; SKX-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 232; SKX-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 233; SKX-NEXT: retq 234; 235; KNL-LABEL: test10: 236; KNL: # %bb.0: # %entry 237; KNL-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 238; KNL-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 239; KNL-NEXT: retq 240entry: 241 %0 = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 -1, i32 4) #2 242 %sub.i = fsub <2 x double> <double -0.0, double -0.0>, %0 243 ret <2 x double> %sub.i 244} 245 246define <4 x float> @test11(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 { 247; SKX-LABEL: test11: 248; SKX: # %bb.0: # %entry 249; SKX-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm2, %xmm3 250; SKX-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 251; SKX-NEXT: kmovd %edi, %k1 252; SKX-NEXT: vmovss %xmm0, %xmm3, %xmm3 {%k1} 253; SKX-NEXT: vmovaps %xmm3, %xmm0 254; SKX-NEXT: retq 255; 256; KNL-LABEL: test11: 257; KNL: # %bb.0: # %entry 258; KNL-NEXT: vbroadcastss {{.*#+}} xmm3 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 259; KNL-NEXT: vxorps %xmm3, %xmm2, %xmm3 260; KNL-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 261; KNL-NEXT: kmovw %edi, %k1 262; KNL-NEXT: vmovss %xmm0, %xmm3, %xmm3 {%k1} 263; KNL-NEXT: vmovaps %xmm3, %xmm0 264; KNL-NEXT: retq 265entry: 266 %sub.i = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %c 267 %0 = tail call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i, i8 %mask, i32 4) #10 268 ret <4 x float> %0 269} 270 271define <4 x float> @test11b(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 { 272; SKX-LABEL: test11b: 273; SKX: # %bb.0: # %entry 274; SKX-NEXT: kmovd %edi, %k1 275; SKX-NEXT: vfmsub213ss {{.*#+}} xmm0 {%k1} = (xmm1 * xmm0) - xmm2 276; SKX-NEXT: retq 277; 278; KNL-LABEL: test11b: 279; KNL: # %bb.0: # %entry 280; KNL-NEXT: kmovw %edi, %k1 281; KNL-NEXT: vfmsub213ss {{.*#+}} xmm0 {%k1} = (xmm1 * xmm0) - xmm2 282; KNL-NEXT: retq 283entry: 284 %sub.i = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %c 285 %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i, i8 %mask, i32 4) #10 286 ret <4 x float> %0 287} 288 289define <8 x double> @test12(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) { 290; SKX-LABEL: test12: 291; SKX: # %bb.0: # %entry 292; SKX-NEXT: kmovd %edi, %k1 293; SKX-NEXT: vfmadd132pd {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) + zmm2 294; SKX-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 295; SKX-NEXT: retq 296; 297; KNL-LABEL: test12: 298; KNL: # %bb.0: # %entry 299; KNL-NEXT: kmovw %edi, %k1 300; KNL-NEXT: vfmadd132pd {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) + zmm2 301; KNL-NEXT: vpxorq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 302; KNL-NEXT: retq 303entry: 304 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i32 4) #2 305 %bc = bitcast i8 %mask to <8 x i1> 306 %sel = select <8 x i1> %bc, <8 x double> %0, <8 x double> %a 307 %sub.i = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sel 308 ret <8 x double> %sub.i 309} 310 311define <2 x double> @test13(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 312; SKX-LABEL: test13: 313; SKX: # %bb.0: # %entry 314; SKX-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm3 315; SKX-NEXT: vfnmadd213sd {{.*#+}} xmm1 = -(xmm0 * xmm1) + xmm2 316; SKX-NEXT: kmovd %edi, %k1 317; SKX-NEXT: vmovsd %xmm1, %xmm3, %xmm3 {%k1} 318; SKX-NEXT: vmovapd %xmm3, %xmm0 319; SKX-NEXT: retq 320; 321; KNL-LABEL: test13: 322; KNL: # %bb.0: # %entry 323; KNL-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3 324; KNL-NEXT: vfnmadd213sd {{.*#+}} xmm1 = -(xmm0 * xmm1) + xmm2 325; KNL-NEXT: kmovw %edi, %k1 326; KNL-NEXT: vmovsd %xmm1, %xmm3, %xmm3 {%k1} 327; KNL-NEXT: vmovapd %xmm3, %xmm0 328; KNL-NEXT: retq 329entry: 330 %sub.i = fsub <2 x double> <double -0.0, double -0.0>, %a 331 %0 = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %sub.i, <2 x double> %b, <2 x double> %c, i8 %mask, i32 4) 332 ret <2 x double> %0 333} 334 335define <16 x float> @test14(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) { 336; SKX-LABEL: test14: 337; SKX: # %bb.0: # %entry 338; SKX-NEXT: kmovd %edi, %k1 339; SKX-NEXT: vfnmsub132ps {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} 340; SKX-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 341; SKX-NEXT: retq 342; 343; KNL-LABEL: test14: 344; KNL: # %bb.0: # %entry 345; KNL-NEXT: kmovw %edi, %k1 346; KNL-NEXT: vfnmsub132ps {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} 347; KNL-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 348; KNL-NEXT: retq 349entry: 350 %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 10) #2 351 %sub.i = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %0 352 ret <16 x float> %sub.i 353} 354 355define <16 x float> @test15(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) { 356; SKX-LABEL: test15: 357; SKX: # %bb.0: # %entry 358; SKX-NEXT: kmovd %edi, %k1 359; SKX-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm3 360; SKX-NEXT: vfnmadd213ps {ru-sae}, %zmm2, %zmm0, %zmm1 361; SKX-NEXT: vmovaps %zmm1, %zmm3 {%k1} 362; SKX-NEXT: vfnmadd132ps {rd-sae}, %zmm0, %zmm2, %zmm3 {%k1} 363; SKX-NEXT: vmovaps %zmm3, %zmm0 364; SKX-NEXT: retq 365; 366; KNL-LABEL: test15: 367; KNL: # %bb.0: # %entry 368; KNL-NEXT: kmovw %edi, %k1 369; KNL-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm3 370; KNL-NEXT: vfnmadd213ps {ru-sae}, %zmm2, %zmm0, %zmm1 371; KNL-NEXT: vmovaps %zmm1, %zmm3 {%k1} 372; KNL-NEXT: vfnmadd132ps {rd-sae}, %zmm0, %zmm2, %zmm3 {%k1} 373; KNL-NEXT: vmovaps %zmm3, %zmm0 374; KNL-NEXT: retq 375entry: 376 %bc = bitcast i16 %mask to <16 x i1> 377 %sub.i = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %a 378 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sub.i, <16 x float> %b, <16 x float> %c, i32 10) 379 %sel = select <16 x i1> %bc, <16 x float> %0, <16 x float> %sub.i 380 %1 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sel, <16 x float> %sub.i, <16 x float> %c, i32 9) 381 %sel2 = select <16 x i1> %bc, <16 x float> %1, <16 x float> %sel 382 ret <16 x float> %sel2 383} 384 385define <16 x float> @test16(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) { 386; SKX-LABEL: test16: 387; SKX: # %bb.0: 388; SKX-NEXT: kmovd %edi, %k1 389; SKX-NEXT: vfmsubadd132ps {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} 390; SKX-NEXT: retq 391; 392; KNL-LABEL: test16: 393; KNL: # %bb.0: 394; KNL-NEXT: kmovw %edi, %k1 395; KNL-NEXT: vfmsubadd132ps {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} 396; KNL-NEXT: retq 397 %sub.i = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %c 398 %res = call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %sub.i, i32 9) 399 %bc = bitcast i16 %mask to <16 x i1> 400 %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %a 401 ret <16 x float> %sel 402} 403 404define <8 x double> @test17(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) { 405; SKX-LABEL: test17: 406; SKX: # %bb.0: 407; SKX-NEXT: kmovd %edi, %k1 408; SKX-NEXT: vfmsubadd132pd {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) -/+ zmm2 409; SKX-NEXT: retq 410; 411; KNL-LABEL: test17: 412; KNL: # %bb.0: 413; KNL-NEXT: kmovw %edi, %k1 414; KNL-NEXT: vfmsubadd132pd {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) -/+ zmm2 415; KNL-NEXT: retq 416 %sub.i = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %c 417 %res = call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %sub.i, i32 4) 418 %bc = bitcast i8 %mask to <8 x i1> 419 %sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %a 420 ret <8 x double> %sel 421} 422 423define <4 x float> @test18(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 { 424; SKX-LABEL: test18: 425; SKX: # %bb.0: # %entry 426; SKX-NEXT: kmovd %edi, %k1 427; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 {%k1} = -(xmm1 * xmm0) + xmm2 428; SKX-NEXT: retq 429; 430; KNL-LABEL: test18: 431; KNL: # %bb.0: # %entry 432; KNL-NEXT: kmovw %edi, %k1 433; KNL-NEXT: vfnmadd213ss {{.*#+}} xmm0 {%k1} = -(xmm1 * xmm0) + xmm2 434; KNL-NEXT: retq 435entry: 436 %sub.i = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %b 437 %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %c, i8 %mask, i32 4) #10 438 ret <4 x float> %0 439} 440 441define <4 x float> @test19(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 { 442; SKX-LABEL: test19: 443; SKX: # %bb.0: # %entry 444; SKX-NEXT: kmovd %edi, %k1 445; SKX-NEXT: vfnmsub213ss {{.*#+}} xmm0 {%k1} = -(xmm1 * xmm0) - xmm2 446; SKX-NEXT: retq 447; 448; KNL-LABEL: test19: 449; KNL: # %bb.0: # %entry 450; KNL-NEXT: kmovw %edi, %k1 451; KNL-NEXT: vfnmsub213ss {{.*#+}} xmm0 {%k1} = -(xmm1 * xmm0) - xmm2 452; KNL-NEXT: retq 453entry: 454 %sub.i = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %b 455 %sub.i.2 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %c 456 %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %sub.i.2, i8 %mask, i32 4) #10 457 ret <4 x float> %0 458} 459 460define <4 x float> @test20(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 { 461; SKX-LABEL: test20: 462; SKX: # %bb.0: # %entry 463; SKX-NEXT: kmovd %edi, %k1 464; SKX-NEXT: vfnmadd231ss {{.*#+}} xmm2 {%k1} = -(xmm0 * xmm1) + xmm2 465; SKX-NEXT: vmovaps %xmm2, %xmm0 466; SKX-NEXT: retq 467; 468; KNL-LABEL: test20: 469; KNL: # %bb.0: # %entry 470; KNL-NEXT: kmovw %edi, %k1 471; KNL-NEXT: vfnmadd231ss {{.*#+}} xmm2 {%k1} = -(xmm0 * xmm1) + xmm2 472; KNL-NEXT: vmovaps %xmm2, %xmm0 473; KNL-NEXT: retq 474entry: 475 %sub.i = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %b 476 %0 = tail call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %c, i8 %mask, i32 4) #10 477 ret <4 x float> %0 478} 479 480define <4 x float> @test21(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 { 481; SKX-LABEL: test21: 482; SKX: # %bb.0: # %entry 483; SKX-NEXT: kmovd %edi, %k1 484; SKX-NEXT: vfnmadd213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} 485; SKX-NEXT: retq 486; 487; KNL-LABEL: test21: 488; KNL: # %bb.0: # %entry 489; KNL-NEXT: kmovw %edi, %k1 490; KNL-NEXT: vfnmadd213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} 491; KNL-NEXT: retq 492entry: 493 %sub.i = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %b 494 %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %c, i8 %mask, i32 8) #10 495 ret <4 x float> %0 496} 497 498define <4 x float> @test22(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 { 499; SKX-LABEL: test22: 500; SKX: # %bb.0: # %entry 501; SKX-NEXT: kmovd %edi, %k1 502; SKX-NEXT: vfnmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} 503; SKX-NEXT: retq 504; 505; KNL-LABEL: test22: 506; KNL: # %bb.0: # %entry 507; KNL-NEXT: kmovw %edi, %k1 508; KNL-NEXT: vfnmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} 509; KNL-NEXT: retq 510entry: 511 %sub.i = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %b 512 %sub.i.2 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %c 513 %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %sub.i.2, i8 %mask, i32 8) #10 514 ret <4 x float> %0 515} 516 517define <4 x float> @test23(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 { 518; SKX-LABEL: test23: 519; SKX: # %bb.0: # %entry 520; SKX-NEXT: kmovd %edi, %k1 521; SKX-NEXT: vfnmadd231ss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1} 522; SKX-NEXT: vmovaps %xmm2, %xmm0 523; SKX-NEXT: retq 524; 525; KNL-LABEL: test23: 526; KNL: # %bb.0: # %entry 527; KNL-NEXT: kmovw %edi, %k1 528; KNL-NEXT: vfnmadd231ss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1} 529; KNL-NEXT: vmovaps %xmm2, %xmm0 530; KNL-NEXT: retq 531entry: 532 %sub.i = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %b 533 %0 = tail call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %c, i8 %mask, i32 8) #10 534 ret <4 x float> %0 535} 536 537define <4 x float> @test24(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 { 538; SKX-LABEL: test24: 539; SKX: # %bb.0: # %entry 540; SKX-NEXT: kmovd %edi, %k1 541; SKX-NEXT: vfmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} 542; SKX-NEXT: retq 543; 544; KNL-LABEL: test24: 545; KNL: # %bb.0: # %entry 546; KNL-NEXT: kmovw %edi, %k1 547; KNL-NEXT: vfmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} 548; KNL-NEXT: retq 549entry: 550 %sub.i = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %c 551 %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i, i8 %mask, i32 8) #10 552 ret <4 x float> %0 553} 554 555define <16 x float> @test25(<16 x float> %a, <16 x float> %b, <16 x float> %c) { 556; CHECK-LABEL: test25: 557; CHECK: # %bb.0: # %entry 558; CHECK-NEXT: vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 559; CHECK-NEXT: retq 560entry: 561 %sub.i = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %b 562 %sub.i.2 = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %c 563 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %sub.i, <16 x float> %sub.i.2, i32 8) #2 564 ret <16 x float> %0 565} 566