1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s 3target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 4 5declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) 6 7define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) { 8; 9; CHECK-LABEL: @test_add_ss( 10; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 11; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 12; CHECK-NEXT: [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]] 13; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0 14; CHECK-NEXT: ret <4 x float> [[TMP4]] 15; 16 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 17 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 18 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 19 %4 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4) 20 ret <4 x float> %4 21} 22 23define <4 x float> @test_add_ss_round(<4 x float> %a, <4 x float> %b) { 24; 25; CHECK-LABEL: @test_add_ss_round( 26; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> <float undef, float poison, float poison, float poison>, i8 -1, i32 8) 27; CHECK-NEXT: ret <4 x float> [[TMP1]] 28; 29 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 30 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 31 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 32 %4 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 8) 33 ret <4 x float> %4 34} 35 36define <4 x float> @test_add_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 37; 38; CHECK-LABEL: @test_add_ss_mask( 39; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 40; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 41; CHECK-NEXT: [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]] 42; CHECK-NEXT: [[TMP4:%.*]] = trunc i8 [[MASK:%.*]] to i1 43; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 44; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP4]], float [[TMP3]], float [[TMP5]] 45; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0 46; CHECK-NEXT: ret <4 x float> [[TMP7]] 47; 48 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 49 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 50 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 51 %4 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4) 52 ret <4 x float> %4 53} 54 55define <4 x float> @test_add_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 56; 57; CHECK-LABEL: @test_add_ss_mask_round( 58; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8) 59; CHECK-NEXT: ret <4 x float> [[TMP1]] 60; 61 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 62 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 63 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 64 %4 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 8) 65 ret <4 x float> %4 66} 67 68define float @test_add_ss_1(float %a, float %b) { 69; 70; CHECK-LABEL: @test_add_ss_1( 71; CHECK-NEXT: ret float 1.000000e+00 72; 73 %1 = insertelement <4 x float> poison, float %a, i32 0 74 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 75 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 76 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 77 %5 = insertelement <4 x float> poison, float %b, i32 0 78 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 79 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 80 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 81 %9 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %4, <4 x float> %8, <4 x float> undef, i8 -1, i32 8) 82 %10 = extractelement <4 x float> %9, i32 1 83 ret float %10 84} 85 86declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) 87 88define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) { 89; 90; CHECK-LABEL: @test_add_sd( 91; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0 92; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 93; CHECK-NEXT: [[TMP3:%.*]] = fadd double [[TMP1]], [[TMP2]] 94; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[A]], double [[TMP3]], i64 0 95; CHECK-NEXT: ret <2 x double> [[TMP4]] 96; 97 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1 98 %2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4) 99 ret <2 x double> %2 100} 101 102define <2 x double> @test_add_sd_round(<2 x double> %a, <2 x double> %b) { 103; 104; CHECK-LABEL: @test_add_sd_round( 105; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> <double undef, double poison>, i8 -1, i32 8) 106; CHECK-NEXT: ret <2 x double> [[TMP1]] 107; 108 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1 109 %2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8) 110 ret <2 x double> %2 111} 112 113define <2 x double> @test_add_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 114; 115; CHECK-LABEL: @test_add_sd_mask( 116; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0 117; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 118; CHECK-NEXT: [[TMP3:%.*]] = fadd double [[TMP1]], [[TMP2]] 119; CHECK-NEXT: [[TMP4:%.*]] = trunc i8 [[MASK:%.*]] to i1 120; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 121; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP4]], double [[TMP3]], double [[TMP5]] 122; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0 123; CHECK-NEXT: ret <2 x double> [[TMP7]] 124; 125 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 126 %2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4) 127 ret <2 x double> %2 128} 129 130define <2 x double> @test_add_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 131; 132; CHECK-LABEL: @test_add_sd_mask_round( 133; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8) 134; CHECK-NEXT: ret <2 x double> [[TMP1]] 135; 136 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 137 %2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 8) 138 ret <2 x double> %2 139} 140 141define double @test_add_sd_1(double %a, double %b) { 142; 143; CHECK-LABEL: @test_add_sd_1( 144; CHECK-NEXT: ret double 1.000000e+00 145; 146 %1 = insertelement <2 x double> poison, double %a, i32 0 147 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 148 %3 = insertelement <2 x double> poison, double %b, i32 0 149 %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 150 %5 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %2, <2 x double> %4, <2 x double> undef, i8 -1, i32 8) 151 %6 = extractelement <2 x double> %5, i32 1 152 ret double %6 153} 154 155declare <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) 156 157define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) { 158; 159; CHECK-LABEL: @test_sub_ss( 160; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 161; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 162; CHECK-NEXT: [[TMP3:%.*]] = fsub float [[TMP1]], [[TMP2]] 163; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0 164; CHECK-NEXT: ret <4 x float> [[TMP4]] 165; 166 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 167 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 168 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 169 %4 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4) 170 ret <4 x float> %4 171} 172 173define <4 x float> @test_sub_ss_round(<4 x float> %a, <4 x float> %b) { 174; 175; CHECK-LABEL: @test_sub_ss_round( 176; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> <float undef, float poison, float poison, float poison>, i8 -1, i32 8) 177; CHECK-NEXT: ret <4 x float> [[TMP1]] 178; 179 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 180 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 181 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 182 %4 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 8) 183 ret <4 x float> %4 184} 185 186define <4 x float> @test_sub_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 187; 188; CHECK-LABEL: @test_sub_ss_mask( 189; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 190; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 191; CHECK-NEXT: [[TMP3:%.*]] = fsub float [[TMP1]], [[TMP2]] 192; CHECK-NEXT: [[TMP4:%.*]] = trunc i8 [[MASK:%.*]] to i1 193; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 194; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP4]], float [[TMP3]], float [[TMP5]] 195; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0 196; CHECK-NEXT: ret <4 x float> [[TMP7]] 197; 198 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 199 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 200 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 201 %4 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4) 202 ret <4 x float> %4 203} 204 205define <4 x float> @test_sub_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 206; 207; CHECK-LABEL: @test_sub_ss_mask_round( 208; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8) 209; CHECK-NEXT: ret <4 x float> [[TMP1]] 210; 211 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 212 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 213 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 214 %4 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 8) 215 ret <4 x float> %4 216} 217 218define float @test_sub_ss_1(float %a, float %b) { 219; 220; CHECK-LABEL: @test_sub_ss_1( 221; CHECK-NEXT: ret float 1.000000e+00 222; 223 %1 = insertelement <4 x float> poison, float %a, i32 0 224 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 225 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 226 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 227 %5 = insertelement <4 x float> poison, float %b, i32 0 228 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 229 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 230 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 231 %9 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %4, <4 x float> %8, <4 x float> undef, i8 -1, i32 8) 232 %10 = extractelement <4 x float> %9, i32 1 233 ret float %10 234} 235 236declare <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) 237 238define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) { 239; 240; CHECK-LABEL: @test_sub_sd( 241; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0 242; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 243; CHECK-NEXT: [[TMP3:%.*]] = fsub double [[TMP1]], [[TMP2]] 244; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[A]], double [[TMP3]], i64 0 245; CHECK-NEXT: ret <2 x double> [[TMP4]] 246; 247 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1 248 %2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4) 249 ret <2 x double> %2 250} 251 252define <2 x double> @test_sub_sd_round(<2 x double> %a, <2 x double> %b) { 253; 254; CHECK-LABEL: @test_sub_sd_round( 255; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> <double undef, double poison>, i8 -1, i32 8) 256; CHECK-NEXT: ret <2 x double> [[TMP1]] 257; 258 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1 259 %2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8) 260 ret <2 x double> %2 261} 262 263define <2 x double> @test_sub_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 264; 265; CHECK-LABEL: @test_sub_sd_mask( 266; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0 267; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 268; CHECK-NEXT: [[TMP3:%.*]] = fsub double [[TMP1]], [[TMP2]] 269; CHECK-NEXT: [[TMP4:%.*]] = trunc i8 [[MASK:%.*]] to i1 270; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 271; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP4]], double [[TMP3]], double [[TMP5]] 272; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0 273; CHECK-NEXT: ret <2 x double> [[TMP7]] 274; 275 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 276 %2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4) 277 ret <2 x double> %2 278} 279 280define <2 x double> @test_sub_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 281; 282; CHECK-LABEL: @test_sub_sd_mask_round( 283; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8) 284; CHECK-NEXT: ret <2 x double> [[TMP1]] 285; 286 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 287 %2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 8) 288 ret <2 x double> %2 289} 290 291define double @test_sub_sd_1(double %a, double %b) { 292; 293; CHECK-LABEL: @test_sub_sd_1( 294; CHECK-NEXT: ret double 1.000000e+00 295; 296 %1 = insertelement <2 x double> poison, double %a, i32 0 297 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 298 %3 = insertelement <2 x double> poison, double %b, i32 0 299 %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 300 %5 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %2, <2 x double> %4, <2 x double> undef, i8 -1, i32 8) 301 %6 = extractelement <2 x double> %5, i32 1 302 ret double %6 303} 304 305declare <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) 306 307define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) { 308; 309; CHECK-LABEL: @test_mul_ss( 310; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 311; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 312; CHECK-NEXT: [[TMP3:%.*]] = fmul float [[TMP1]], [[TMP2]] 313; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0 314; CHECK-NEXT: ret <4 x float> [[TMP4]] 315; 316 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 317 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 318 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 319 %4 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4) 320 ret <4 x float> %4 321} 322 323define <4 x float> @test_mul_ss_round(<4 x float> %a, <4 x float> %b) { 324; 325; CHECK-LABEL: @test_mul_ss_round( 326; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> <float undef, float poison, float poison, float poison>, i8 -1, i32 8) 327; CHECK-NEXT: ret <4 x float> [[TMP1]] 328; 329 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 330 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 331 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 332 %4 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 8) 333 ret <4 x float> %4 334} 335 336define <4 x float> @test_mul_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 337; 338; CHECK-LABEL: @test_mul_ss_mask( 339; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 340; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 341; CHECK-NEXT: [[TMP3:%.*]] = fmul float [[TMP1]], [[TMP2]] 342; CHECK-NEXT: [[TMP4:%.*]] = trunc i8 [[MASK:%.*]] to i1 343; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 344; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP4]], float [[TMP3]], float [[TMP5]] 345; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0 346; CHECK-NEXT: ret <4 x float> [[TMP7]] 347; 348 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 349 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 350 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 351 %4 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4) 352 ret <4 x float> %4 353} 354 355define <4 x float> @test_mul_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 356; 357; CHECK-LABEL: @test_mul_ss_mask_round( 358; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8) 359; CHECK-NEXT: ret <4 x float> [[TMP1]] 360; 361 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 362 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 363 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 364 %4 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 8) 365 ret <4 x float> %4 366} 367 368define float @test_mul_ss_1(float %a, float %b) { 369; 370; CHECK-LABEL: @test_mul_ss_1( 371; CHECK-NEXT: ret float 1.000000e+00 372; 373 %1 = insertelement <4 x float> poison, float %a, i32 0 374 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 375 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 376 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 377 %5 = insertelement <4 x float> poison, float %b, i32 0 378 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 379 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 380 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 381 %9 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %4, <4 x float> %8, <4 x float> undef, i8 -1, i32 8) 382 %10 = extractelement <4 x float> %9, i32 1 383 ret float %10 384} 385 386declare <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) 387 388define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) { 389; 390; CHECK-LABEL: @test_mul_sd( 391; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0 392; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 393; CHECK-NEXT: [[TMP3:%.*]] = fmul double [[TMP1]], [[TMP2]] 394; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[A]], double [[TMP3]], i64 0 395; CHECK-NEXT: ret <2 x double> [[TMP4]] 396; 397 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1 398 %2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4) 399 ret <2 x double> %2 400} 401 402define <2 x double> @test_mul_sd_round(<2 x double> %a, <2 x double> %b) { 403; 404; CHECK-LABEL: @test_mul_sd_round( 405; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> <double undef, double poison>, i8 -1, i32 8) 406; CHECK-NEXT: ret <2 x double> [[TMP1]] 407; 408 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1 409 %2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8) 410 ret <2 x double> %2 411} 412 413define <2 x double> @test_mul_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 414; 415; CHECK-LABEL: @test_mul_sd_mask( 416; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0 417; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 418; CHECK-NEXT: [[TMP3:%.*]] = fmul double [[TMP1]], [[TMP2]] 419; CHECK-NEXT: [[TMP4:%.*]] = trunc i8 [[MASK:%.*]] to i1 420; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 421; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP4]], double [[TMP3]], double [[TMP5]] 422; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0 423; CHECK-NEXT: ret <2 x double> [[TMP7]] 424; 425 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 426 %2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4) 427 ret <2 x double> %2 428} 429 430define <2 x double> @test_mul_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 431; 432; CHECK-LABEL: @test_mul_sd_mask_round( 433; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8) 434; CHECK-NEXT: ret <2 x double> [[TMP1]] 435; 436 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 437 %2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 8) 438 ret <2 x double> %2 439} 440 441define double @test_mul_sd_1(double %a, double %b) { 442; 443; CHECK-LABEL: @test_mul_sd_1( 444; CHECK-NEXT: ret double 1.000000e+00 445; 446 %1 = insertelement <2 x double> poison, double %a, i32 0 447 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 448 %3 = insertelement <2 x double> poison, double %b, i32 0 449 %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 450 %5 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %2, <2 x double> %4, <2 x double> undef, i8 -1, i32 8) 451 %6 = extractelement <2 x double> %5, i32 1 452 ret double %6 453} 454 455declare <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) 456 457define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) { 458; 459; CHECK-LABEL: @test_div_ss( 460; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 461; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 462; CHECK-NEXT: [[TMP3:%.*]] = fdiv float [[TMP1]], [[TMP2]] 463; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0 464; CHECK-NEXT: ret <4 x float> [[TMP4]] 465; 466 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 467 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 468 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 469 %4 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4) 470 ret <4 x float> %4 471} 472 473define <4 x float> @test_div_ss_round(<4 x float> %a, <4 x float> %b) { 474; 475; CHECK-LABEL: @test_div_ss_round( 476; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> <float undef, float poison, float poison, float poison>, i8 -1, i32 8) 477; CHECK-NEXT: ret <4 x float> [[TMP1]] 478; 479 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 480 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 481 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 482 %4 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 8) 483 ret <4 x float> %4 484} 485 486define <4 x float> @test_div_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 487; 488; CHECK-LABEL: @test_div_ss_mask( 489; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 490; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 491; CHECK-NEXT: [[TMP3:%.*]] = fdiv float [[TMP1]], [[TMP2]] 492; CHECK-NEXT: [[TMP4:%.*]] = trunc i8 [[MASK:%.*]] to i1 493; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 494; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP4]], float [[TMP3]], float [[TMP5]] 495; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0 496; CHECK-NEXT: ret <4 x float> [[TMP7]] 497; 498 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 499 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 500 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 501 %4 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4) 502 ret <4 x float> %4 503} 504 505define <4 x float> @test_div_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 506; 507; CHECK-LABEL: @test_div_ss_mask_round( 508; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8) 509; CHECK-NEXT: ret <4 x float> [[TMP1]] 510; 511 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 512 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 513 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 514 %4 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 8) 515 ret <4 x float> %4 516} 517 518define float @test_div_ss_1(float %a, float %b) { 519; 520; CHECK-LABEL: @test_div_ss_1( 521; CHECK-NEXT: ret float 1.000000e+00 522; 523 %1 = insertelement <4 x float> poison, float %a, i32 0 524 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 525 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 526 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 527 %5 = insertelement <4 x float> poison, float %b, i32 0 528 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 529 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 530 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 531 %9 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %4, <4 x float> %8, <4 x float> undef, i8 -1, i32 8) 532 %10 = extractelement <4 x float> %9, i32 1 533 ret float %10 534} 535 536declare <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) 537 538define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) { 539; 540; CHECK-LABEL: @test_div_sd( 541; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0 542; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 543; CHECK-NEXT: [[TMP3:%.*]] = fdiv double [[TMP1]], [[TMP2]] 544; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[A]], double [[TMP3]], i64 0 545; CHECK-NEXT: ret <2 x double> [[TMP4]] 546; 547 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1 548 %2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4) 549 ret <2 x double> %2 550} 551 552define <2 x double> @test_div_sd_round(<2 x double> %a, <2 x double> %b) { 553; 554; CHECK-LABEL: @test_div_sd_round( 555; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> <double undef, double poison>, i8 -1, i32 8) 556; CHECK-NEXT: ret <2 x double> [[TMP1]] 557; 558 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1 559 %2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8) 560 ret <2 x double> %2 561} 562 563define <2 x double> @test_div_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 564; 565; CHECK-LABEL: @test_div_sd_mask( 566; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0 567; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 568; CHECK-NEXT: [[TMP3:%.*]] = fdiv double [[TMP1]], [[TMP2]] 569; CHECK-NEXT: [[TMP4:%.*]] = trunc i8 [[MASK:%.*]] to i1 570; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 571; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP4]], double [[TMP3]], double [[TMP5]] 572; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0 573; CHECK-NEXT: ret <2 x double> [[TMP7]] 574; 575 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 576 %2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4) 577 ret <2 x double> %2 578} 579 580define <2 x double> @test_div_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 581; 582; CHECK-LABEL: @test_div_sd_mask_round( 583; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8) 584; CHECK-NEXT: ret <2 x double> [[TMP1]] 585; 586 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 587 %2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 8) 588 ret <2 x double> %2 589} 590 591define double @test_div_sd_1(double %a, double %b) { 592; 593; CHECK-LABEL: @test_div_sd_1( 594; CHECK-NEXT: ret double 1.000000e+00 595; 596 %1 = insertelement <2 x double> poison, double %a, i32 0 597 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 598 %3 = insertelement <2 x double> poison, double %b, i32 0 599 %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 600 %5 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %2, <2 x double> %4, <2 x double> undef, i8 -1, i32 8) 601 %6 = extractelement <2 x double> %5, i32 1 602 ret double %6 603} 604 605declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) 606 607define <4 x float> @test_max_ss(<4 x float> %a, <4 x float> %b) { 608; 609; CHECK-LABEL: @test_max_ss( 610; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> <float undef, float poison, float poison, float poison>, i8 -1, i32 4) 611; CHECK-NEXT: ret <4 x float> [[TMP1]] 612; 613 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 614 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 615 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 616 %4 = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4) 617 ret <4 x float> %4 618} 619 620define <4 x float> @test_max_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 621; 622; CHECK-LABEL: @test_max_ss_mask( 623; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4) 624; CHECK-NEXT: ret <4 x float> [[TMP1]] 625; 626 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 627 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 628 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 629 %4 = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4) 630 ret <4 x float> %4 631} 632 633define float @test_max_ss_1(float %a, float %b) { 634; 635; CHECK-LABEL: @test_max_ss_1( 636; CHECK-NEXT: ret float 1.000000e+00 637; 638 %1 = insertelement <4 x float> poison, float %a, i32 0 639 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 640 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 641 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 642 %5 = insertelement <4 x float> poison, float %b, i32 0 643 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 644 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 645 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 646 %9 = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> %4, <4 x float> %8, <4 x float> undef, i8 -1, i32 8) 647 %10 = extractelement <4 x float> %9, i32 1 648 ret float %10 649} 650 651declare <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) 652 653define <2 x double> @test_max_sd(<2 x double> %a, <2 x double> %b) { 654; 655; CHECK-LABEL: @test_max_sd( 656; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> <double undef, double poison>, i8 -1, i32 4) 657; CHECK-NEXT: ret <2 x double> [[TMP1]] 658; 659 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1 660 %2 = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4) 661 ret <2 x double> %2 662} 663 664define <2 x double> @test_max_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 665; 666; CHECK-LABEL: @test_max_sd_mask( 667; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4) 668; CHECK-NEXT: ret <2 x double> [[TMP1]] 669; 670 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 671 %2 = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4) 672 ret <2 x double> %2 673} 674 675define double @test_max_sd_1(double %a, double %b) { 676; 677; CHECK-LABEL: @test_max_sd_1( 678; CHECK-NEXT: ret double 1.000000e+00 679; 680 %1 = insertelement <2 x double> poison, double %a, i32 0 681 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 682 %3 = insertelement <2 x double> poison, double %b, i32 0 683 %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 684 %5 = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> %2, <2 x double> %4, <2 x double> undef, i8 -1, i32 8) 685 %6 = extractelement <2 x double> %5, i32 1 686 ret double %6 687} 688 689declare <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) 690 691define <4 x float> @test_min_ss(<4 x float> %a, <4 x float> %b) { 692; 693; CHECK-LABEL: @test_min_ss( 694; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> <float undef, float poison, float poison, float poison>, i8 -1, i32 4) 695; CHECK-NEXT: ret <4 x float> [[TMP1]] 696; 697 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 698 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 699 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 700 %4 = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4) 701 ret <4 x float> %4 702} 703 704define <4 x float> @test_min_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 705; 706; CHECK-LABEL: @test_min_ss_mask( 707; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4) 708; CHECK-NEXT: ret <4 x float> [[TMP1]] 709; 710 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 711 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 712 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 713 %4 = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4) 714 ret <4 x float> %4 715} 716 717define float @test_min_ss_1(float %a, float %b) { 718; 719; CHECK-LABEL: @test_min_ss_1( 720; CHECK-NEXT: ret float 1.000000e+00 721; 722 %1 = insertelement <4 x float> poison, float %a, i32 0 723 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 724 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 725 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 726 %5 = insertelement <4 x float> poison, float %b, i32 0 727 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 728 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 729 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 730 %9 = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> %4, <4 x float> %8, <4 x float> undef, i8 -1, i32 8) 731 %10 = extractelement <4 x float> %9, i32 1 732 ret float %10 733} 734 735declare <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) 736 737define <2 x double> @test_min_sd(<2 x double> %a, <2 x double> %b) { 738; 739; CHECK-LABEL: @test_min_sd( 740; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> <double undef, double poison>, i8 -1, i32 4) 741; CHECK-NEXT: ret <2 x double> [[TMP1]] 742; 743 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1 744 %2 = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4) 745 ret <2 x double> %2 746} 747 748define <2 x double> @test_min_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 749; 750; CHECK-LABEL: @test_min_sd_mask( 751; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4) 752; CHECK-NEXT: ret <2 x double> [[TMP1]] 753; 754 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 755 %2 = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4) 756 ret <2 x double> %2 757} 758 759define double @test_min_sd_1(double %a, double %b) { 760; 761; CHECK-LABEL: @test_min_sd_1( 762; CHECK-NEXT: ret double 1.000000e+00 763; 764 %1 = insertelement <2 x double> poison, double %a, i32 0 765 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 766 %3 = insertelement <2 x double> poison, double %b, i32 0 767 %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 768 %5 = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> %2, <2 x double> %4, <2 x double> undef, i8 -1, i32 8) 769 %6 = extractelement <2 x double> %5, i32 1 770 ret double %6 771} 772 773declare i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float>, <4 x float>, i32, i8, i32) 774 775define i8 @test_cmp_ss(<4 x float> %a, <4 x float> %b, i8 %mask) { 776; 777; CHECK-LABEL: @test_cmp_ss( 778; CHECK-NEXT: [[TMP1:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 3, i8 [[MASK:%.*]], i32 4) 779; CHECK-NEXT: ret i8 [[TMP1]] 780; 781 %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1 782 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 783 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 784 %4 = insertelement <4 x float> %b, float 4.000000e+00, i32 1 785 %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2 786 %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3 787 %7 = tail call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %3, <4 x float> %6, i32 3, i8 %mask, i32 4) 788 ret i8 %7 789} 790 791declare i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double>, <2 x double>, i32, i8, i32) 792 793define i8 @test_cmp_sd(<2 x double> %a, <2 x double> %b, i8 %mask) { 794; 795; CHECK-LABEL: @test_cmp_sd( 796; CHECK-NEXT: [[TMP1:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], i32 3, i8 [[MASK:%.*]], i32 4) 797; CHECK-NEXT: ret i8 [[TMP1]] 798; 799 %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1 800 %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1 801 %3 = tail call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %1, <2 x double> %2, i32 3, i8 %mask, i32 4) 802 ret i8 %3 803} 804 805define i64 @test(float %f, double %d) { 806; 807; CHECK-LABEL: @test( 808; CHECK-NEXT: [[V03:%.*]] = insertelement <4 x float> poison, float [[F:%.*]], i64 0 809; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> [[V03]], i32 4) 810; CHECK-NEXT: [[V13:%.*]] = insertelement <4 x float> poison, float [[F]], i64 0 811; CHECK-NEXT: [[T1:%.*]] = tail call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> [[V13]], i32 4) 812; CHECK-NEXT: [[V23:%.*]] = insertelement <4 x float> poison, float [[F]], i64 0 813; CHECK-NEXT: [[T2:%.*]] = tail call i32 @llvm.x86.avx512.cvttss2si(<4 x float> [[V23]], i32 4) 814; CHECK-NEXT: [[V33:%.*]] = insertelement <4 x float> poison, float [[F]], i64 0 815; CHECK-NEXT: [[T3:%.*]] = tail call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> [[V33]], i32 4) 816; CHECK-NEXT: [[V41:%.*]] = insertelement <2 x double> poison, double [[D:%.*]], i64 0 817; CHECK-NEXT: [[T4:%.*]] = tail call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> [[V41]], i32 4) 818; CHECK-NEXT: [[V51:%.*]] = insertelement <2 x double> poison, double [[D]], i64 0 819; CHECK-NEXT: [[T5:%.*]] = tail call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> [[V51]], i32 4) 820; CHECK-NEXT: [[V61:%.*]] = insertelement <2 x double> poison, double [[D]], i64 0 821; CHECK-NEXT: [[T6:%.*]] = tail call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> [[V61]], i32 4) 822; CHECK-NEXT: [[V71:%.*]] = insertelement <2 x double> poison, double [[D]], i64 0 823; CHECK-NEXT: [[T7:%.*]] = tail call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> [[V71]], i32 4) 824; CHECK-NEXT: [[T8:%.*]] = add i32 [[T0]], [[T2]] 825; CHECK-NEXT: [[T9:%.*]] = add i32 [[T4]], [[T6]] 826; CHECK-NEXT: [[T10:%.*]] = add i32 [[T8]], [[T9]] 827; CHECK-NEXT: [[T11:%.*]] = sext i32 [[T10]] to i64 828; CHECK-NEXT: [[T12:%.*]] = add i64 [[T1]], [[T3]] 829; CHECK-NEXT: [[T13:%.*]] = add i64 [[T5]], [[T7]] 830; CHECK-NEXT: [[T14:%.*]] = add i64 [[T12]], [[T13]] 831; CHECK-NEXT: [[T15:%.*]] = add i64 [[T14]], [[T11]] 832; CHECK-NEXT: ret i64 [[T15]] 833; 834 %v00 = insertelement <4 x float> poison, float %f, i32 0 835 %v01 = insertelement <4 x float> %v00, float 0.000000e+00, i32 1 836 %v02 = insertelement <4 x float> %v01, float 0.000000e+00, i32 2 837 %v03 = insertelement <4 x float> %v02, float 0.000000e+00, i32 3 838 %t0 = tail call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %v03, i32 4) 839 %v10 = insertelement <4 x float> poison, float %f, i32 0 840 %v11 = insertelement <4 x float> %v10, float 0.000000e+00, i32 1 841 %v12 = insertelement <4 x float> %v11, float 0.000000e+00, i32 2 842 %v13 = insertelement <4 x float> %v12, float 0.000000e+00, i32 3 843 %t1 = tail call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %v13, i32 4) 844 %v20 = insertelement <4 x float> poison, float %f, i32 0 845 %v21 = insertelement <4 x float> %v20, float 0.000000e+00, i32 1 846 %v22 = insertelement <4 x float> %v21, float 0.000000e+00, i32 2 847 %v23 = insertelement <4 x float> %v22, float 0.000000e+00, i32 3 848 %t2 = tail call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %v23, i32 4) 849 %v30 = insertelement <4 x float> poison, float %f, i32 0 850 %v31 = insertelement <4 x float> %v30, float 0.000000e+00, i32 1 851 %v32 = insertelement <4 x float> %v31, float 0.000000e+00, i32 2 852 %v33 = insertelement <4 x float> %v32, float 0.000000e+00, i32 3 853 %t3 = tail call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %v33, i32 4) 854 %v40 = insertelement <2 x double> poison, double %d, i32 0 855 %v41 = insertelement <2 x double> %v40, double 0.000000e+00, i32 1 856 %t4 = tail call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %v41, i32 4) 857 %v50 = insertelement <2 x double> poison, double %d, i32 0 858 %v51 = insertelement <2 x double> %v50, double 0.000000e+00, i32 1 859 %t5 = tail call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %v51, i32 4) 860 %v60 = insertelement <2 x double> poison, double %d, i32 0 861 %v61 = insertelement <2 x double> %v60, double 0.000000e+00, i32 1 862 %t6 = tail call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %v61, i32 4) 863 %v70 = insertelement <2 x double> poison, double %d, i32 0 864 %v71 = insertelement <2 x double> %v70, double 0.000000e+00, i32 1 865 %t7 = tail call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %v71, i32 4) 866 %t8 = add i32 %t0, %t2 867 %t9 = add i32 %t4, %t6 868 %t10 = add i32 %t8, %t9 869 %t11 = sext i32 %t10 to i64 870 %t12 = add i64 %t1, %t3 871 %t13 = add i64 %t5, %t7 872 %t14 = add i64 %t12, %t13 873 %t15 = add i64 %t11, %t14 874 ret i64 %t15 875} 876 877declare i32 @llvm.x86.avx512.vcvtss2si32(<4 x float>, i32) 878declare i64 @llvm.x86.avx512.vcvtss2si64(<4 x float>, i32) 879declare i32 @llvm.x86.avx512.cvttss2si(<4 x float>, i32) 880declare i64 @llvm.x86.avx512.cvttss2si64(<4 x float>, i32) 881declare i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double>, i32) 882declare i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double>, i32) 883declare i32 @llvm.x86.avx512.cvttsd2si(<2 x double>, i32) 884declare i64 @llvm.x86.avx512.cvttsd2si64(<2 x double>, i32) 885 886define i64 @test2(float %f, double %d) { 887; 888; CHECK-LABEL: @test2( 889; CHECK-NEXT: [[V03:%.*]] = insertelement <4 x float> poison, float [[F:%.*]], i64 0 890; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> [[V03]], i32 4) 891; CHECK-NEXT: [[V13:%.*]] = insertelement <4 x float> poison, float [[F]], i64 0 892; CHECK-NEXT: [[T1:%.*]] = tail call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> [[V13]], i32 4) 893; CHECK-NEXT: [[V23:%.*]] = insertelement <4 x float> poison, float [[F]], i64 0 894; CHECK-NEXT: [[T2:%.*]] = tail call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> [[V23]], i32 4) 895; CHECK-NEXT: [[V33:%.*]] = insertelement <4 x float> poison, float [[F]], i64 0 896; CHECK-NEXT: [[T3:%.*]] = tail call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> [[V33]], i32 4) 897; CHECK-NEXT: [[V41:%.*]] = insertelement <2 x double> poison, double [[D:%.*]], i64 0 898; CHECK-NEXT: [[T4:%.*]] = tail call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> [[V41]], i32 4) 899; CHECK-NEXT: [[V51:%.*]] = insertelement <2 x double> poison, double [[D]], i64 0 900; CHECK-NEXT: [[T5:%.*]] = tail call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> [[V51]], i32 4) 901; CHECK-NEXT: [[V61:%.*]] = insertelement <2 x double> poison, double [[D]], i64 0 902; CHECK-NEXT: [[T6:%.*]] = tail call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> [[V61]], i32 4) 903; CHECK-NEXT: [[V71:%.*]] = insertelement <2 x double> poison, double [[D]], i64 0 904; CHECK-NEXT: [[T7:%.*]] = tail call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> [[V71]], i32 4) 905; CHECK-NEXT: [[T8:%.*]] = add i32 [[T0]], [[T2]] 906; CHECK-NEXT: [[T9:%.*]] = add i32 [[T4]], [[T6]] 907; CHECK-NEXT: [[T10:%.*]] = add i32 [[T8]], [[T9]] 908; CHECK-NEXT: [[T11:%.*]] = sext i32 [[T10]] to i64 909; CHECK-NEXT: [[T12:%.*]] = add i64 [[T1]], [[T3]] 910; CHECK-NEXT: [[T13:%.*]] = add i64 [[T5]], [[T7]] 911; CHECK-NEXT: [[T14:%.*]] = add i64 [[T12]], [[T13]] 912; CHECK-NEXT: [[T15:%.*]] = add i64 [[T14]], [[T11]] 913; CHECK-NEXT: ret i64 [[T15]] 914; 915 %v00 = insertelement <4 x float> poison, float %f, i32 0 916 %v01 = insertelement <4 x float> %v00, float 0.000000e+00, i32 1 917 %v02 = insertelement <4 x float> %v01, float 0.000000e+00, i32 2 918 %v03 = insertelement <4 x float> %v02, float 0.000000e+00, i32 3 919 %t0 = tail call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %v03, i32 4) 920 %v10 = insertelement <4 x float> poison, float %f, i32 0 921 %v11 = insertelement <4 x float> %v10, float 0.000000e+00, i32 1 922 %v12 = insertelement <4 x float> %v11, float 0.000000e+00, i32 2 923 %v13 = insertelement <4 x float> %v12, float 0.000000e+00, i32 3 924 %t1 = tail call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %v13, i32 4) 925 %v20 = insertelement <4 x float> poison, float %f, i32 0 926 %v21 = insertelement <4 x float> %v20, float 0.000000e+00, i32 1 927 %v22 = insertelement <4 x float> %v21, float 0.000000e+00, i32 2 928 %v23 = insertelement <4 x float> %v22, float 0.000000e+00, i32 3 929 %t2 = tail call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %v23, i32 4) 930 %v30 = insertelement <4 x float> poison, float %f, i32 0 931 %v31 = insertelement <4 x float> %v30, float 0.000000e+00, i32 1 932 %v32 = insertelement <4 x float> %v31, float 0.000000e+00, i32 2 933 %v33 = insertelement <4 x float> %v32, float 0.000000e+00, i32 3 934 %t3 = tail call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %v33, i32 4) 935 %v40 = insertelement <2 x double> poison, double %d, i32 0 936 %v41 = insertelement <2 x double> %v40, double 0.000000e+00, i32 1 937 %t4 = tail call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %v41, i32 4) 938 %v50 = insertelement <2 x double> poison, double %d, i32 0 939 %v51 = insertelement <2 x double> %v50, double 0.000000e+00, i32 1 940 %t5 = tail call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %v51, i32 4) 941 %v60 = insertelement <2 x double> poison, double %d, i32 0 942 %v61 = insertelement <2 x double> %v60, double 0.000000e+00, i32 1 943 %t6 = tail call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %v61, i32 4) 944 %v70 = insertelement <2 x double> poison, double %d, i32 0 945 %v71 = insertelement <2 x double> %v70, double 0.000000e+00, i32 1 946 %t7 = tail call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %v71, i32 4) 947 %t8 = add i32 %t0, %t2 948 %t9 = add i32 %t4, %t6 949 %t10 = add i32 %t8, %t9 950 %t11 = sext i32 %t10 to i64 951 %t12 = add i64 %t1, %t3 952 %t13 = add i64 %t5, %t7 953 %t14 = add i64 %t12, %t13 954 %t15 = add i64 %t11, %t14 955 ret i64 %t15 956} 957 958declare i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float>, i32) 959declare i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float>, i32) 960declare i32 @llvm.x86.avx512.cvttss2usi(<4 x float>, i32) 961declare i64 @llvm.x86.avx512.cvttss2usi64(<4 x float>, i32) 962declare i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double>, i32) 963declare i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double>, i32) 964declare i32 @llvm.x86.avx512.cvttsd2usi(<2 x double>, i32) 965declare i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double>, i32) 966 967declare float @llvm.fma.f32(float, float, float) #1 968 969define <4 x float> @test_mask_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 970; 971; CHECK-LABEL: @test_mask_vfmadd_ss( 972; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 973; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 974; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 975; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]]) 976; CHECK-NEXT: [[TMP5:%.*]] = trunc i8 [[MASK:%.*]] to i1 977; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float [[TMP1]] 978; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0 979; CHECK-NEXT: ret <4 x float> [[TMP7]] 980; 981 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 982 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 983 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 984 %4 = insertelement <4 x float> %c, float 4.000000e+00, i32 1 985 %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2 986 %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3 987 %7 = extractelement <4 x float> %a, i64 0 988 %8 = extractelement <4 x float> %3, i64 0 989 %9 = extractelement <4 x float> %6, i64 0 990 %10 = call float @llvm.fma.f32(float %7, float %8, float %9) 991 %11 = bitcast i8 %mask to <8 x i1> 992 %12 = extractelement <8 x i1> %11, i64 0 993 %13 = select i1 %12, float %10, float %7 994 %14 = insertelement <4 x float> %a, float %13, i64 0 995 ret <4 x float> %14 996} 997 998define float @test_mask_vfmadd_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 999; 1000; CHECK-LABEL: @test_mask_vfmadd_ss_0( 1001; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 1002; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 1003; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 1004; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]]) 1005; CHECK-NEXT: [[TMP5:%.*]] = trunc i8 [[MASK:%.*]] to i1 1006; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float [[TMP1]] 1007; CHECK-NEXT: ret float [[TMP6]] 1008; 1009 %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1 1010 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 1011 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 1012 %4 = extractelement <4 x float> %3, i64 0 1013 %5 = extractelement <4 x float> %b, i64 0 1014 %6 = extractelement <4 x float> %c, i64 0 1015 %7 = call float @llvm.fma.f32(float %4, float %5, float %6) 1016 %8 = bitcast i8 %mask to <8 x i1> 1017 %9 = extractelement <8 x i1> %8, i64 0 1018 %10 = select i1 %9, float %7, float %4 1019 %11 = insertelement <4 x float> %3, float %10, i64 0 1020 %12 = extractelement <4 x float> %11, i32 0 1021 ret float %12 1022} 1023 1024define float @test_mask_vfmadd_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 1025; 1026; CHECK-LABEL: @test_mask_vfmadd_ss_1( 1027; CHECK-NEXT: ret float 1.000000e+00 1028; 1029 %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1 1030 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 1031 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 1032 %4 = extractelement <4 x float> %3, i64 0 1033 %5 = extractelement <4 x float> %b, i64 0 1034 %6 = extractelement <4 x float> %c, i64 0 1035 %7 = call float @llvm.fma.f32(float %4, float %5, float %6) 1036 %8 = bitcast i8 %mask to <8 x i1> 1037 %9 = extractelement <8 x i1> %8, i64 0 1038 %10 = select i1 %9, float %7, float %4 1039 %11 = insertelement <4 x float> %3, float %10, i64 0 1040 %12 = extractelement <4 x float> %11, i32 1 1041 ret float %12 1042} 1043 1044declare double @llvm.fma.f64(double, double, double) #1 1045 1046define <2 x double> @test_mask_vfmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 1047; 1048; CHECK-LABEL: @test_mask_vfmadd_sd( 1049; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0 1050; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 1051; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 1052; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]]) 1053; CHECK-NEXT: [[TMP5:%.*]] = trunc i8 [[MASK:%.*]] to i1 1054; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], double [[TMP4]], double [[TMP1]] 1055; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0 1056; CHECK-NEXT: ret <2 x double> [[TMP7]] 1057; 1058 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1 1059 %2 = insertelement <2 x double> %c, double 2.000000e+00, i32 1 1060 %3 = extractelement <2 x double> %a, i64 0 1061 %4 = extractelement <2 x double> %1, i64 0 1062 %5 = extractelement <2 x double> %2, i64 0 1063 %6 = call double @llvm.fma.f64(double %3, double %4, double %5) 1064 %7 = bitcast i8 %mask to <8 x i1> 1065 %8 = extractelement <8 x i1> %7, i64 0 1066 %9 = select i1 %8, double %6, double %3 1067 %10 = insertelement <2 x double> %a, double %9, i64 0 1068 ret <2 x double> %10 1069} 1070 1071define double @test_mask_vfmadd_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 1072; 1073; CHECK-LABEL: @test_mask_vfmadd_sd_0( 1074; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0 1075; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 1076; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 1077; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]]) 1078; CHECK-NEXT: [[TMP5:%.*]] = trunc i8 [[MASK:%.*]] to i1 1079; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], double [[TMP4]], double [[TMP1]] 1080; CHECK-NEXT: ret double [[TMP6]] 1081; 1082 %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1 1083 %2 = extractelement <2 x double> %1, i64 0 1084 %3 = extractelement <2 x double> %b, i64 0 1085 %4 = extractelement <2 x double> %c, i64 0 1086 %5 = call double @llvm.fma.f64(double %2, double %3, double %4) 1087 %6 = bitcast i8 %mask to <8 x i1> 1088 %7 = extractelement <8 x i1> %6, i64 0 1089 %8 = select i1 %7, double %5, double %2 1090 %9 = insertelement <2 x double> %1, double %8, i64 0 1091 %10 = extractelement <2 x double> %9, i32 0 1092 ret double %10 1093} 1094 1095define double @test_mask_vfmadd_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 1096; 1097; CHECK-LABEL: @test_mask_vfmadd_sd_1( 1098; CHECK-NEXT: ret double 1.000000e+00 1099; 1100 %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1 1101 %2 = extractelement <2 x double> %1, i64 0 1102 %3 = extractelement <2 x double> %b, i64 0 1103 %4 = extractelement <2 x double> %c, i64 0 1104 %5 = call double @llvm.fma.f64(double %2, double %3, double %4) 1105 %6 = bitcast i8 %mask to <8 x i1> 1106 %7 = extractelement <8 x i1> %6, i64 0 1107 %8 = select i1 %7, double %5, double %2 1108 %9 = insertelement <2 x double> %1, double %8, i64 0 1109 %10 = extractelement <2 x double> %9, i32 1 1110 ret double %10 1111} 1112 1113define <4 x float> @test_maskz_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 1114; 1115; CHECK-LABEL: @test_maskz_vfmadd_ss( 1116; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 1117; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 1118; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 1119; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]]) 1120; CHECK-NEXT: [[TMP5:%.*]] = trunc i8 [[MASK:%.*]] to i1 1121; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float 0.000000e+00 1122; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0 1123; CHECK-NEXT: ret <4 x float> [[TMP7]] 1124; 1125 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 1126 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 1127 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 1128 %4 = insertelement <4 x float> %c, float 4.000000e+00, i32 1 1129 %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2 1130 %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3 1131 %7 = extractelement <4 x float> %a, i64 0 1132 %8 = extractelement <4 x float> %3, i64 0 1133 %9 = extractelement <4 x float> %6, i64 0 1134 %10 = call float @llvm.fma.f32(float %7, float %8, float %9) 1135 %11 = bitcast i8 %mask to <8 x i1> 1136 %12 = extractelement <8 x i1> %11, i64 0 1137 %13 = select i1 %12, float %10, float 0.000000e+00 1138 %14 = insertelement <4 x float> %a, float %13, i64 0 1139 ret <4 x float> %14 1140} 1141 1142define float @test_maskz_vfmadd_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 1143; 1144; CHECK-LABEL: @test_maskz_vfmadd_ss_0( 1145; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 1146; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 1147; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 1148; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]]) 1149; CHECK-NEXT: [[TMP5:%.*]] = trunc i8 [[MASK:%.*]] to i1 1150; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float 0.000000e+00 1151; CHECK-NEXT: ret float [[TMP6]] 1152; 1153 %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1 1154 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 1155 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 1156 %4 = extractelement <4 x float> %3, i64 0 1157 %5 = extractelement <4 x float> %b, i64 0 1158 %6 = extractelement <4 x float> %c, i64 0 1159 %7 = call float @llvm.fma.f32(float %4, float %5, float %6) 1160 %8 = bitcast i8 %mask to <8 x i1> 1161 %9 = extractelement <8 x i1> %8, i64 0 1162 %10 = select i1 %9, float %7, float 0.000000e+00 1163 %11 = insertelement <4 x float> %3, float %10, i64 0 1164 %12 = extractelement <4 x float> %11, i32 0 1165 ret float %12 1166} 1167 1168define float @test_maskz_vfmadd_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 1169; 1170; CHECK-LABEL: @test_maskz_vfmadd_ss_1( 1171; CHECK-NEXT: ret float 1.000000e+00 1172; 1173 %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1 1174 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 1175 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 1176 %4 = extractelement <4 x float> %3, i64 0 1177 %5 = extractelement <4 x float> %b, i64 0 1178 %6 = extractelement <4 x float> %c, i64 0 1179 %7 = call float @llvm.fma.f32(float %4, float %5, float %6) 1180 %8 = bitcast i8 %mask to <8 x i1> 1181 %9 = extractelement <8 x i1> %8, i64 0 1182 %10 = select i1 %9, float %7, float 0.000000e+00 1183 %11 = insertelement <4 x float> %3, float %10, i64 0 1184 %12 = extractelement <4 x float> %11, i32 1 1185 ret float %12 1186} 1187 1188define <2 x double> @test_maskz_vfmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 1189; 1190; CHECK-LABEL: @test_maskz_vfmadd_sd( 1191; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0 1192; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 1193; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 1194; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]]) 1195; CHECK-NEXT: [[TMP5:%.*]] = trunc i8 [[MASK:%.*]] to i1 1196; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], double [[TMP4]], double 0.000000e+00 1197; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0 1198; CHECK-NEXT: ret <2 x double> [[TMP7]] 1199; 1200 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1 1201 %2 = insertelement <2 x double> %c, double 2.000000e+00, i32 1 1202 %3 = extractelement <2 x double> %a, i64 0 1203 %4 = extractelement <2 x double> %1, i64 0 1204 %5 = extractelement <2 x double> %2, i64 0 1205 %6 = call double @llvm.fma.f64(double %3, double %4, double %5) 1206 %7 = bitcast i8 %mask to <8 x i1> 1207 %8 = extractelement <8 x i1> %7, i64 0 1208 %9 = select i1 %8, double %6, double 0.000000e+00 1209 %10 = insertelement <2 x double> %a, double %9, i64 0 1210 ret <2 x double> %10 1211} 1212 1213define double @test_maskz_vfmadd_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 1214; 1215; CHECK-LABEL: @test_maskz_vfmadd_sd_0( 1216; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0 1217; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 1218; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 1219; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]]) 1220; CHECK-NEXT: [[TMP5:%.*]] = trunc i8 [[MASK:%.*]] to i1 1221; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], double [[TMP4]], double 0.000000e+00 1222; CHECK-NEXT: ret double [[TMP6]] 1223; 1224 %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1 1225 %2 = extractelement <2 x double> %1, i64 0 1226 %3 = extractelement <2 x double> %b, i64 0 1227 %4 = extractelement <2 x double> %c, i64 0 1228 %5 = call double @llvm.fma.f64(double %2, double %3, double %4) 1229 %6 = bitcast i8 %mask to <8 x i1> 1230 %7 = extractelement <8 x i1> %6, i64 0 1231 %8 = select i1 %7, double %5, double 0.000000e+00 1232 %9 = insertelement <2 x double> %1, double %8, i64 0 1233 %10 = extractelement <2 x double> %9, i32 0 1234 ret double %10 1235} 1236 1237define double @test_maskz_vfmadd_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 1238; 1239; CHECK-LABEL: @test_maskz_vfmadd_sd_1( 1240; CHECK-NEXT: ret double 1.000000e+00 1241; 1242 %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1 1243 %2 = extractelement <2 x double> %1, i64 0 1244 %3 = extractelement <2 x double> %b, i64 0 1245 %4 = extractelement <2 x double> %c, i64 0 1246 %5 = call double @llvm.fma.f64(double %2, double %3, double %4) 1247 %6 = bitcast i8 %mask to <8 x i1> 1248 %7 = extractelement <8 x i1> %6, i64 0 1249 %8 = select i1 %7, double %5, double 0.000000e+00 1250 %9 = insertelement <2 x double> %1, double %8, i64 0 1251 %10 = extractelement <2 x double> %9, i32 1 1252 ret double %10 1253} 1254 1255define <4 x float> @test_mask3_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 1256; 1257; CHECK-LABEL: @test_mask3_vfmadd_ss( 1258; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 1259; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 1260; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 1261; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]]) 1262; CHECK-NEXT: [[TMP5:%.*]] = trunc i8 [[MASK:%.*]] to i1 1263; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float [[TMP3]] 1264; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[C]], float [[TMP6]], i64 0 1265; CHECK-NEXT: ret <4 x float> [[TMP7]] 1266; 1267 %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1 1268 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 1269 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 1270 %4 = insertelement <4 x float> %b, float 4.000000e+00, i32 1 1271 %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2 1272 %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3 1273 %7 = extractelement <4 x float> %3, i64 0 1274 %8 = extractelement <4 x float> %6, i64 0 1275 %9 = extractelement <4 x float> %c, i64 0 1276 %10 = call float @llvm.fma.f32(float %7, float %8, float %9) 1277 %11 = bitcast i8 %mask to <8 x i1> 1278 %12 = extractelement <8 x i1> %11, i64 0 1279 %13 = select i1 %12, float %10, float %9 1280 %14 = insertelement <4 x float> %c, float %13, i64 0 1281 ret <4 x float> %14 1282} 1283 1284define float @test_mask3_vfmadd_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 1285; 1286; CHECK-LABEL: @test_mask3_vfmadd_ss_0( 1287; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 1288; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 1289; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 1290; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]]) 1291; CHECK-NEXT: [[TMP5:%.*]] = trunc i8 [[MASK:%.*]] to i1 1292; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float [[TMP3]] 1293; CHECK-NEXT: ret float [[TMP6]] 1294; 1295 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 1296 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 1297 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 1298 %4 = extractelement <4 x float> %a, i64 0 1299 %5 = extractelement <4 x float> %b, i64 0 1300 %6 = extractelement <4 x float> %3, i64 0 1301 %7 = call float @llvm.fma.f32(float %4, float %5, float %6) 1302 %8 = bitcast i8 %mask to <8 x i1> 1303 %9 = extractelement <8 x i1> %8, i64 0 1304 %10 = select i1 %9, float %7, float %6 1305 %11 = insertelement <4 x float> %3, float %10, i64 0 1306 %12 = extractelement <4 x float> %11, i32 0 1307 ret float %12 1308} 1309 1310define float @test_mask3_vfmadd_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 1311; 1312; CHECK-LABEL: @test_mask3_vfmadd_ss_1( 1313; CHECK-NEXT: ret float 1.000000e+00 1314; 1315 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 1316 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 1317 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 1318 %4 = extractelement <4 x float> %a, i64 0 1319 %5 = extractelement <4 x float> %b, i64 0 1320 %6 = extractelement <4 x float> %3, i64 0 1321 %7 = call float @llvm.fma.f32(float %4, float %5, float %6) 1322 %8 = bitcast i8 %mask to <8 x i1> 1323 %9 = extractelement <8 x i1> %8, i64 0 1324 %10 = select i1 %9, float %7, float %6 1325 %11 = insertelement <4 x float> %3, float %10, i64 0 1326 %12 = extractelement <4 x float> %11, i32 1 1327 ret float %12 1328} 1329 1330define <2 x double> @test_mask3_vfmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 1331; 1332; CHECK-LABEL: @test_mask3_vfmadd_sd( 1333; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0 1334; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 1335; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 1336; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]]) 1337; CHECK-NEXT: [[TMP5:%.*]] = trunc i8 [[MASK:%.*]] to i1 1338; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], double [[TMP4]], double [[TMP3]] 1339; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[C]], double [[TMP6]], i64 0 1340; CHECK-NEXT: ret <2 x double> [[TMP7]] 1341; 1342 %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1 1343 %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1 1344 %3 = extractelement <2 x double> %1, i64 0 1345 %4 = extractelement <2 x double> %2, i64 0 1346 %5 = extractelement <2 x double> %c, i64 0 1347 %6 = call double @llvm.fma.f64(double %3, double %4, double %5) 1348 %7 = bitcast i8 %mask to <8 x i1> 1349 %8 = extractelement <8 x i1> %7, i64 0 1350 %9 = select i1 %8, double %6, double %5 1351 %10 = insertelement <2 x double> %c, double %9, i64 0 1352 ret <2 x double> %10 1353} 1354 1355define double @test_mask3_vfmadd_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 1356; 1357; CHECK-LABEL: @test_mask3_vfmadd_sd_0( 1358; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0 1359; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 1360; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 1361; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]]) 1362; CHECK-NEXT: [[TMP5:%.*]] = trunc i8 [[MASK:%.*]] to i1 1363; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], double [[TMP4]], double [[TMP3]] 1364; CHECK-NEXT: ret double [[TMP6]] 1365; 1366 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 1367 %2 = extractelement <2 x double> %a, i64 0 1368 %3 = extractelement <2 x double> %b, i64 0 1369 %4 = extractelement <2 x double> %1, i64 0 1370 %5 = call double @llvm.fma.f64(double %2, double %3, double %4) 1371 %6 = bitcast i8 %mask to <8 x i1> 1372 %7 = extractelement <8 x i1> %6, i64 0 1373 %8 = select i1 %7, double %5, double %4 1374 %9 = insertelement <2 x double> %1, double %8, i64 0 1375 %10 = extractelement <2 x double> %9, i32 0 1376 ret double %10 1377} 1378 1379define double @test_mask3_vfmadd_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 1380; 1381; CHECK-LABEL: @test_mask3_vfmadd_sd_1( 1382; CHECK-NEXT: ret double 1.000000e+00 1383; 1384 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 1385 %2 = extractelement <2 x double> %a, i64 0 1386 %3 = extractelement <2 x double> %b, i64 0 1387 %4 = extractelement <2 x double> %1, i64 0 1388 %5 = call double @llvm.fma.f64(double %2, double %3, double %4) 1389 %6 = bitcast i8 %mask to <8 x i1> 1390 %7 = extractelement <8 x i1> %6, i64 0 1391 %8 = select i1 %7, double %5, double %4 1392 %9 = insertelement <2 x double> %1, double %8, i64 0 1393 %10 = extractelement <2 x double> %9, i32 1 1394 ret double %10 1395} 1396 1397define <4 x float> @test_mask3_vfmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 1398; 1399; CHECK-LABEL: @test_mask3_vfmsub_ss( 1400; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 1401; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 1402; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 1403; CHECK-NEXT: [[TMP4:%.*]] = fneg float [[TMP3]] 1404; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP4]]) 1405; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C]], i64 0 1406; CHECK-NEXT: [[TMP7:%.*]] = trunc i8 [[MASK:%.*]] to i1 1407; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP5]], float [[TMP6]] 1408; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> [[C]], float [[TMP8]], i64 0 1409; CHECK-NEXT: ret <4 x float> [[TMP9]] 1410; 1411 %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1 1412 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 1413 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 1414 %4 = insertelement <4 x float> %b, float 4.000000e+00, i32 1 1415 %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2 1416 %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3 1417 %7 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 1418 %8 = extractelement <4 x float> %3, i64 0 1419 %9 = extractelement <4 x float> %6, i64 0 1420 %10 = extractelement <4 x float> %7, i64 0 1421 %11 = call float @llvm.fma.f32(float %8, float %9, float %10) 1422 %12 = extractelement <4 x float> %c, i64 0 1423 %13 = bitcast i8 %mask to <8 x i1> 1424 %14 = extractelement <8 x i1> %13, i64 0 1425 %15 = select i1 %14, float %11, float %12 1426 %16 = insertelement <4 x float> %c, float %15, i64 0 1427 ret <4 x float> %16 1428} 1429 1430define float @test_mask3_vfmsub_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 1431; 1432; CHECK-LABEL: @test_mask3_vfmsub_ss_0( 1433; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 1434; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 1435; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 1436; CHECK-NEXT: [[TMP4:%.*]] = fneg float [[TMP3]] 1437; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP4]]) 1438; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C]], i64 0 1439; CHECK-NEXT: [[TMP7:%.*]] = trunc i8 [[MASK:%.*]] to i1 1440; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP5]], float [[TMP6]] 1441; CHECK-NEXT: ret float [[TMP8]] 1442; 1443 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 1444 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 1445 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 1446 %4 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %3 1447 %5 = extractelement <4 x float> %a, i64 0 1448 %6 = extractelement <4 x float> %b, i64 0 1449 %7 = extractelement <4 x float> %4, i64 0 1450 %8 = call float @llvm.fma.f32(float %5, float %6, float %7) 1451 %9 = extractelement <4 x float> %3, i64 0 1452 %10 = bitcast i8 %mask to <8 x i1> 1453 %11 = extractelement <8 x i1> %10, i64 0 1454 %12 = select i1 %11, float %8, float %9 1455 %13 = insertelement <4 x float> %3, float %12, i64 0 1456 %14 = extractelement <4 x float> %13, i32 0 1457 ret float %14 1458} 1459 1460define float @test_mask3_vfmsub_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 1461; 1462; CHECK-LABEL: @test_mask3_vfmsub_ss_1( 1463; CHECK-NEXT: ret float 1.000000e+00 1464; 1465 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 1466 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 1467 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 1468 %4 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %3 1469 %5 = extractelement <4 x float> %a, i64 0 1470 %6 = extractelement <4 x float> %b, i64 0 1471 %7 = extractelement <4 x float> %4, i64 0 1472 %8 = call float @llvm.fma.f32(float %5, float %6, float %7) 1473 %9 = extractelement <4 x float> %3, i64 0 1474 %10 = bitcast i8 %mask to <8 x i1> 1475 %11 = extractelement <8 x i1> %10, i64 0 1476 %12 = select i1 %11, float %8, float %9 1477 %13 = insertelement <4 x float> %3, float %12, i64 0 1478 %14 = extractelement <4 x float> %13, i32 1 1479 ret float %14 1480} 1481 1482define float @test_mask3_vfmsub_ss_1_unary_fneg(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 1483; 1484; CHECK-LABEL: @test_mask3_vfmsub_ss_1_unary_fneg( 1485; CHECK-NEXT: ret float 1.000000e+00 1486; 1487 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 1488 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 1489 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 1490 %4 = fneg <4 x float> %3 1491 %5 = extractelement <4 x float> %a, i64 0 1492 %6 = extractelement <4 x float> %b, i64 0 1493 %7 = extractelement <4 x float> %4, i64 0 1494 %8 = call float @llvm.fma.f32(float %5, float %6, float %7) 1495 %9 = extractelement <4 x float> %3, i64 0 1496 %10 = bitcast i8 %mask to <8 x i1> 1497 %11 = extractelement <8 x i1> %10, i64 0 1498 %12 = select i1 %11, float %8, float %9 1499 %13 = insertelement <4 x float> %3, float %12, i64 0 1500 %14 = extractelement <4 x float> %13, i32 1 1501 ret float %14 1502} 1503 1504define <2 x double> @test_mask3_vfmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 1505; 1506; CHECK-LABEL: @test_mask3_vfmsub_sd( 1507; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0 1508; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 1509; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 1510; CHECK-NEXT: [[TMP4:%.*]] = fneg double [[TMP3]] 1511; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP4]]) 1512; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C]], i64 0 1513; CHECK-NEXT: [[TMP7:%.*]] = trunc i8 [[MASK:%.*]] to i1 1514; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], double [[TMP5]], double [[TMP6]] 1515; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[C]], double [[TMP8]], i64 0 1516; CHECK-NEXT: ret <2 x double> [[TMP9]] 1517; 1518 %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1 1519 %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1 1520 %3 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c 1521 %4 = extractelement <2 x double> %1, i64 0 1522 %5 = extractelement <2 x double> %2, i64 0 1523 %6 = extractelement <2 x double> %3, i64 0 1524 %7 = call double @llvm.fma.f64(double %4, double %5, double %6) 1525 %8 = extractelement <2 x double> %c, i64 0 1526 %9 = bitcast i8 %mask to <8 x i1> 1527 %10 = extractelement <8 x i1> %9, i64 0 1528 %11 = select i1 %10, double %7, double %8 1529 %12 = insertelement <2 x double> %c, double %11, i64 0 1530 ret <2 x double> %12 1531} 1532 1533define double @test_mask3_vfmsub_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 1534; 1535; CHECK-LABEL: @test_mask3_vfmsub_sd_0( 1536; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0 1537; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 1538; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 1539; CHECK-NEXT: [[TMP4:%.*]] = fneg double [[TMP3]] 1540; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP4]]) 1541; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C]], i64 0 1542; CHECK-NEXT: [[TMP7:%.*]] = trunc i8 [[MASK:%.*]] to i1 1543; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], double [[TMP5]], double [[TMP6]] 1544; CHECK-NEXT: ret double [[TMP8]] 1545; 1546 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 1547 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %1 1548 %3 = extractelement <2 x double> %a, i64 0 1549 %4 = extractelement <2 x double> %b, i64 0 1550 %5 = extractelement <2 x double> %2, i64 0 1551 %6 = call double @llvm.fma.f64(double %3, double %4, double %5) 1552 %7 = extractelement <2 x double> %1, i64 0 1553 %8 = bitcast i8 %mask to <8 x i1> 1554 %9 = extractelement <8 x i1> %8, i64 0 1555 %10 = select i1 %9, double %6, double %7 1556 %11 = insertelement <2 x double> %1, double %10, i64 0 1557 %12 = extractelement <2 x double> %11, i32 0 1558 ret double %12 1559} 1560 1561define double @test_mask3_vfmsub_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 1562; 1563; CHECK-LABEL: @test_mask3_vfmsub_sd_1( 1564; CHECK-NEXT: ret double 1.000000e+00 1565; 1566 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 1567 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %1 1568 %3 = extractelement <2 x double> %a, i64 0 1569 %4 = extractelement <2 x double> %b, i64 0 1570 %5 = extractelement <2 x double> %2, i64 0 1571 %6 = call double @llvm.fma.f64(double %3, double %4, double %5) 1572 %7 = extractelement <2 x double> %1, i64 0 1573 %8 = bitcast i8 %mask to <8 x i1> 1574 %9 = extractelement <8 x i1> %8, i64 0 1575 %10 = select i1 %9, double %6, double %7 1576 %11 = insertelement <2 x double> %1, double %10, i64 0 1577 %12 = extractelement <2 x double> %11, i32 1 1578 ret double %12 1579} 1580 1581define double @test_mask3_vfmsub_sd_1_unary_fneg(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 1582; 1583; CHECK-LABEL: @test_mask3_vfmsub_sd_1_unary_fneg( 1584; CHECK-NEXT: ret double 1.000000e+00 1585; 1586 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 1587 %2 = fneg <2 x double> %1 1588 %3 = extractelement <2 x double> %a, i64 0 1589 %4 = extractelement <2 x double> %b, i64 0 1590 %5 = extractelement <2 x double> %2, i64 0 1591 %6 = call double @llvm.fma.f64(double %3, double %4, double %5) 1592 %7 = extractelement <2 x double> %1, i64 0 1593 %8 = bitcast i8 %mask to <8 x i1> 1594 %9 = extractelement <8 x i1> %8, i64 0 1595 %10 = select i1 %9, double %6, double %7 1596 %11 = insertelement <2 x double> %1, double %10, i64 0 1597 %12 = extractelement <2 x double> %11, i32 1 1598 ret double %12 1599} 1600 1601define <4 x float> @test_mask3_vfnmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 1602; 1603; CHECK-LABEL: @test_mask3_vfnmsub_ss( 1604; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 1605; CHECK-NEXT: [[TMP2:%.*]] = fneg float [[TMP1]] 1606; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 1607; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 1608; CHECK-NEXT: [[TMP5:%.*]] = fneg float [[TMP4]] 1609; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.fma.f32(float [[TMP2]], float [[TMP3]], float [[TMP5]]) 1610; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[C]], i64 0 1611; CHECK-NEXT: [[TMP8:%.*]] = trunc i8 [[MASK:%.*]] to i1 1612; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float [[TMP7]] 1613; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[C]], float [[TMP9]], i64 0 1614; CHECK-NEXT: ret <4 x float> [[TMP10]] 1615; 1616 %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1 1617 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 1618 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 1619 %4 = insertelement <4 x float> %b, float 4.000000e+00, i32 1 1620 %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2 1621 %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3 1622 %7 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %3 1623 %8 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 1624 %9 = extractelement <4 x float> %7, i64 0 1625 %10 = extractelement <4 x float> %6, i64 0 1626 %11 = extractelement <4 x float> %8, i64 0 1627 %12 = call float @llvm.fma.f32(float %9, float %10, float %11) 1628 %13 = extractelement <4 x float> %c, i64 0 1629 %14 = bitcast i8 %mask to <8 x i1> 1630 %15 = extractelement <8 x i1> %14, i64 0 1631 %16 = select i1 %15, float %12, float %13 1632 %17 = insertelement <4 x float> %c, float %16, i64 0 1633 ret <4 x float> %17 1634} 1635 1636define float @test_mask3_vfnmsub_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 1637; 1638; CHECK-LABEL: @test_mask3_vfnmsub_ss_0( 1639; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 1640; CHECK-NEXT: [[TMP2:%.*]] = fneg float [[TMP1]] 1641; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 1642; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 1643; CHECK-NEXT: [[TMP5:%.*]] = fneg float [[TMP4]] 1644; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.fma.f32(float [[TMP2]], float [[TMP3]], float [[TMP5]]) 1645; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[C]], i64 0 1646; CHECK-NEXT: [[TMP8:%.*]] = trunc i8 [[MASK:%.*]] to i1 1647; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float [[TMP7]] 1648; CHECK-NEXT: ret float [[TMP9]] 1649; 1650 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 1651 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 1652 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 1653 %4 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a 1654 %5 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %3 1655 %6 = extractelement <4 x float> %4, i64 0 1656 %7 = extractelement <4 x float> %b, i64 0 1657 %8 = extractelement <4 x float> %5, i64 0 1658 %9 = call float @llvm.fma.f32(float %6, float %7, float %8) 1659 %10 = extractelement <4 x float> %3, i64 0 1660 %11 = bitcast i8 %mask to <8 x i1> 1661 %12 = extractelement <8 x i1> %11, i64 0 1662 %13 = select i1 %12, float %9, float %10 1663 %14 = insertelement <4 x float> %3, float %13, i64 0 1664 %15 = extractelement <4 x float> %14, i32 0 1665 ret float %15 1666} 1667 1668define float @test_mask3_vfnmsub_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 1669; 1670; CHECK-LABEL: @test_mask3_vfnmsub_ss_1( 1671; CHECK-NEXT: ret float 1.000000e+00 1672; 1673 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 1674 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 1675 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 1676 %4 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a 1677 %5 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %3 1678 %6 = extractelement <4 x float> %4, i64 0 1679 %7 = extractelement <4 x float> %b, i64 0 1680 %8 = extractelement <4 x float> %5, i64 0 1681 %9 = call float @llvm.fma.f32(float %6, float %7, float %8) 1682 %10 = extractelement <4 x float> %3, i64 0 1683 %11 = bitcast i8 %mask to <8 x i1> 1684 %12 = extractelement <8 x i1> %11, i64 0 1685 %13 = select i1 %12, float %9, float %10 1686 %14 = insertelement <4 x float> %3, float %13, i64 0 1687 %15 = extractelement <4 x float> %14, i32 1 1688 ret float %15 1689} 1690 1691define float @test_mask3_vfnmsub_ss_1_unary_fneg(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 1692; 1693; CHECK-LABEL: @test_mask3_vfnmsub_ss_1_unary_fneg( 1694; CHECK-NEXT: ret float 1.000000e+00 1695; 1696 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 1697 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 1698 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 1699 %4 = fneg <4 x float> %a 1700 %5 = fneg <4 x float> %3 1701 %6 = extractelement <4 x float> %4, i64 0 1702 %7 = extractelement <4 x float> %b, i64 0 1703 %8 = extractelement <4 x float> %5, i64 0 1704 %9 = call float @llvm.fma.f32(float %6, float %7, float %8) 1705 %10 = extractelement <4 x float> %3, i64 0 1706 %11 = bitcast i8 %mask to <8 x i1> 1707 %12 = extractelement <8 x i1> %11, i64 0 1708 %13 = select i1 %12, float %9, float %10 1709 %14 = insertelement <4 x float> %3, float %13, i64 0 1710 %15 = extractelement <4 x float> %14, i32 1 1711 ret float %15 1712} 1713 1714define <2 x double> @test_mask3_vfnmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 1715; 1716; CHECK-LABEL: @test_mask3_vfnmsub_sd( 1717; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0 1718; CHECK-NEXT: [[TMP2:%.*]] = fneg double [[TMP1]] 1719; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 1720; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 1721; CHECK-NEXT: [[TMP5:%.*]] = fneg double [[TMP4]] 1722; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP2]], double [[TMP3]], double [[TMP5]]) 1723; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[C]], i64 0 1724; CHECK-NEXT: [[TMP8:%.*]] = trunc i8 [[MASK:%.*]] to i1 1725; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], double [[TMP6]], double [[TMP7]] 1726; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[C]], double [[TMP9]], i64 0 1727; CHECK-NEXT: ret <2 x double> [[TMP10]] 1728; 1729 %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1 1730 %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1 1731 %3 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %1 1732 %4 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c 1733 %5 = extractelement <2 x double> %3, i64 0 1734 %6 = extractelement <2 x double> %2, i64 0 1735 %7 = extractelement <2 x double> %4, i64 0 1736 %8 = call double @llvm.fma.f64(double %5, double %6, double %7) 1737 %9 = extractelement <2 x double> %c, i64 0 1738 %10 = bitcast i8 %mask to <8 x i1> 1739 %11 = extractelement <8 x i1> %10, i64 0 1740 %12 = select i1 %11, double %8, double %9 1741 %13 = insertelement <2 x double> %c, double %12, i64 0 1742 ret <2 x double> %13 1743} 1744 1745define double @test_mask3_vfnmsub_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 1746; 1747; CHECK-LABEL: @test_mask3_vfnmsub_sd_0( 1748; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0 1749; CHECK-NEXT: [[TMP2:%.*]] = fneg double [[TMP1]] 1750; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 1751; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 1752; CHECK-NEXT: [[TMP5:%.*]] = fneg double [[TMP4]] 1753; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP2]], double [[TMP3]], double [[TMP5]]) 1754; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[C]], i64 0 1755; CHECK-NEXT: [[TMP8:%.*]] = trunc i8 [[MASK:%.*]] to i1 1756; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], double [[TMP6]], double [[TMP7]] 1757; CHECK-NEXT: ret double [[TMP9]] 1758; 1759 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 1760 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a 1761 %3 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %1 1762 %4 = extractelement <2 x double> %2, i64 0 1763 %5 = extractelement <2 x double> %b, i64 0 1764 %6 = extractelement <2 x double> %3, i64 0 1765 %7 = call double @llvm.fma.f64(double %4, double %5, double %6) 1766 %8 = extractelement <2 x double> %1, i64 0 1767 %9 = bitcast i8 %mask to <8 x i1> 1768 %10 = extractelement <8 x i1> %9, i64 0 1769 %11 = select i1 %10, double %7, double %8 1770 %12 = insertelement <2 x double> %1, double %11, i64 0 1771 %13 = extractelement <2 x double> %12, i32 0 1772 ret double %13 1773} 1774 1775define double @test_mask3_vfnmsub_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 1776; 1777; CHECK-LABEL: @test_mask3_vfnmsub_sd_1( 1778; CHECK-NEXT: ret double 1.000000e+00 1779; 1780 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 1781 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a 1782 %3 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %1 1783 %4 = extractelement <2 x double> %2, i64 0 1784 %5 = extractelement <2 x double> %b, i64 0 1785 %6 = extractelement <2 x double> %3, i64 0 1786 %7 = call double @llvm.fma.f64(double %4, double %5, double %6) 1787 %8 = extractelement <2 x double> %1, i64 0 1788 %9 = bitcast i8 %mask to <8 x i1> 1789 %10 = extractelement <8 x i1> %9, i64 0 1790 %11 = select i1 %10, double %7, double %8 1791 %12 = insertelement <2 x double> %1, double %11, i64 0 1792 %13 = extractelement <2 x double> %12, i32 1 1793 ret double %13 1794} 1795 1796define double @test_mask3_vfnmsub_sd_1_unary_fneg(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 1797; 1798; CHECK-LABEL: @test_mask3_vfnmsub_sd_1_unary_fneg( 1799; CHECK-NEXT: ret double 1.000000e+00 1800; 1801 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 1802 %2 = fneg <2 x double> %a 1803 %3 = fneg <2 x double> %1 1804 %4 = extractelement <2 x double> %2, i64 0 1805 %5 = extractelement <2 x double> %b, i64 0 1806 %6 = extractelement <2 x double> %3, i64 0 1807 %7 = call double @llvm.fma.f64(double %4, double %5, double %6) 1808 %8 = extractelement <2 x double> %1, i64 0 1809 %9 = bitcast i8 %mask to <8 x i1> 1810 %10 = extractelement <8 x i1> %9, i64 0 1811 %11 = select i1 %10, double %7, double %8 1812 %12 = insertelement <2 x double> %1, double %11, i64 0 1813 %13 = extractelement <2 x double> %12, i32 1 1814 ret double %13 1815} 1816 1817declare <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float>, <16 x float>, i32) 1818 1819define <16 x float> @test_add_ps(<16 x float> %a, <16 x float> %b) { 1820; 1821; CHECK-LABEL: @test_add_ps( 1822; CHECK-NEXT: [[TMP1:%.*]] = fadd <16 x float> [[A:%.*]], [[B:%.*]] 1823; CHECK-NEXT: ret <16 x float> [[TMP1]] 1824; 1825 %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a, <16 x float> %b, i32 4) 1826 ret <16 x float> %1 1827} 1828 1829define <16 x float> @test_add_ps_round(<16 x float> %a, <16 x float> %b) { 1830; 1831; CHECK-LABEL: @test_add_ps_round( 1832; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8) 1833; CHECK-NEXT: ret <16 x float> [[TMP1]] 1834; 1835 %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a, <16 x float> %b, i32 8) 1836 ret <16 x float> %1 1837} 1838 1839define <16 x float> @test_add_ps_mask(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) { 1840; 1841; CHECK-LABEL: @test_add_ps_mask( 1842; CHECK-NEXT: [[TMP1:%.*]] = fadd <16 x float> [[A:%.*]], [[B:%.*]] 1843; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> 1844; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]] 1845; CHECK-NEXT: ret <16 x float> [[TMP3]] 1846; 1847 %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a, <16 x float> %b, i32 4) 1848 %2 = bitcast i16 %mask to <16 x i1> 1849 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c 1850 ret <16 x float> %3 1851} 1852 1853define <16 x float> @test_add_ps_mask_round(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) { 1854; 1855; CHECK-LABEL: @test_add_ps_mask_round( 1856; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8) 1857; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> 1858; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]] 1859; CHECK-NEXT: ret <16 x float> [[TMP3]] 1860; 1861 %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a, <16 x float> %b, i32 8) 1862 %2 = bitcast i16 %mask to <16 x i1> 1863 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c 1864 ret <16 x float> %3 1865} 1866 1867declare <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double>, <8 x double>, i32) 1868 1869define <8 x double> @test_add_pd(<8 x double> %a, <8 x double> %b) { 1870; 1871; CHECK-LABEL: @test_add_pd( 1872; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x double> [[A:%.*]], [[B:%.*]] 1873; CHECK-NEXT: ret <8 x double> [[TMP1]] 1874; 1875 %1 = call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> %a, <8 x double> %b, i32 4) 1876 ret <8 x double> %1 1877} 1878 1879define <8 x double> @test_add_pd_round(<8 x double> %a, <8 x double> %b) { 1880; 1881; CHECK-LABEL: @test_add_pd_round( 1882; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8) 1883; CHECK-NEXT: ret <8 x double> [[TMP1]] 1884; 1885 %1 = call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> %a, <8 x double> %b, i32 8) 1886 ret <8 x double> %1 1887} 1888 1889define <8 x double> @test_add_pd_mask(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) { 1890; 1891; CHECK-LABEL: @test_add_pd_mask( 1892; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x double> [[A:%.*]], [[B:%.*]] 1893; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> 1894; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]] 1895; CHECK-NEXT: ret <8 x double> [[TMP3]] 1896; 1897 %1 = call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> %a, <8 x double> %b, i32 4) 1898 %2 = bitcast i8 %mask to <8 x i1> 1899 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c 1900 ret <8 x double> %3 1901} 1902 1903define <8 x double> @test_add_pd_mask_round(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) { 1904; 1905; CHECK-LABEL: @test_add_pd_mask_round( 1906; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8) 1907; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> 1908; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]] 1909; CHECK-NEXT: ret <8 x double> [[TMP3]] 1910; 1911 %1 = call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> %a, <8 x double> %b, i32 8) 1912 %2 = bitcast i8 %mask to <8 x i1> 1913 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c 1914 ret <8 x double> %3 1915} 1916 1917declare <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float>, <16 x float>, i32) 1918 1919define <16 x float> @test_sub_ps(<16 x float> %a, <16 x float> %b) { 1920; 1921; CHECK-LABEL: @test_sub_ps( 1922; CHECK-NEXT: [[TMP1:%.*]] = fsub <16 x float> [[A:%.*]], [[B:%.*]] 1923; CHECK-NEXT: ret <16 x float> [[TMP1]] 1924; 1925 %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a, <16 x float> %b, i32 4) 1926 ret <16 x float> %1 1927} 1928 1929define <16 x float> @test_sub_ps_round(<16 x float> %a, <16 x float> %b) { 1930; 1931; CHECK-LABEL: @test_sub_ps_round( 1932; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8) 1933; CHECK-NEXT: ret <16 x float> [[TMP1]] 1934; 1935 %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a, <16 x float> %b, i32 8) 1936 ret <16 x float> %1 1937} 1938 1939define <16 x float> @test_sub_ps_mask(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) { 1940; 1941; CHECK-LABEL: @test_sub_ps_mask( 1942; CHECK-NEXT: [[TMP1:%.*]] = fsub <16 x float> [[A:%.*]], [[B:%.*]] 1943; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> 1944; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]] 1945; CHECK-NEXT: ret <16 x float> [[TMP3]] 1946; 1947 %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a, <16 x float> %b, i32 4) 1948 %2 = bitcast i16 %mask to <16 x i1> 1949 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c 1950 ret <16 x float> %3 1951} 1952 1953define <16 x float> @test_sub_ps_mask_round(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) { 1954; 1955; CHECK-LABEL: @test_sub_ps_mask_round( 1956; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8) 1957; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> 1958; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]] 1959; CHECK-NEXT: ret <16 x float> [[TMP3]] 1960; 1961 %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a, <16 x float> %b, i32 8) 1962 %2 = bitcast i16 %mask to <16 x i1> 1963 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c 1964 ret <16 x float> %3 1965} 1966 1967declare <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double>, <8 x double>, i32) 1968 1969define <8 x double> @test_sub_pd(<8 x double> %a, <8 x double> %b) { 1970; 1971; CHECK-LABEL: @test_sub_pd( 1972; CHECK-NEXT: [[TMP1:%.*]] = fsub <8 x double> [[A:%.*]], [[B:%.*]] 1973; CHECK-NEXT: ret <8 x double> [[TMP1]] 1974; 1975 %1 = call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> %a, <8 x double> %b, i32 4) 1976 ret <8 x double> %1 1977} 1978 1979define <8 x double> @test_sub_pd_round(<8 x double> %a, <8 x double> %b) { 1980; 1981; CHECK-LABEL: @test_sub_pd_round( 1982; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8) 1983; CHECK-NEXT: ret <8 x double> [[TMP1]] 1984; 1985 %1 = call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> %a, <8 x double> %b, i32 8) 1986 ret <8 x double> %1 1987} 1988 1989define <8 x double> @test_sub_pd_mask(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) { 1990; 1991; CHECK-LABEL: @test_sub_pd_mask( 1992; CHECK-NEXT: [[TMP1:%.*]] = fsub <8 x double> [[A:%.*]], [[B:%.*]] 1993; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> 1994; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]] 1995; CHECK-NEXT: ret <8 x double> [[TMP3]] 1996; 1997 %1 = call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> %a, <8 x double> %b, i32 4) 1998 %2 = bitcast i8 %mask to <8 x i1> 1999 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c 2000 ret <8 x double> %3 2001} 2002 2003define <8 x double> @test_sub_pd_mask_round(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) { 2004; 2005; CHECK-LABEL: @test_sub_pd_mask_round( 2006; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8) 2007; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> 2008; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]] 2009; CHECK-NEXT: ret <8 x double> [[TMP3]] 2010; 2011 %1 = call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> %a, <8 x double> %b, i32 8) 2012 %2 = bitcast i8 %mask to <8 x i1> 2013 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c 2014 ret <8 x double> %3 2015} 2016 2017declare <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float>, <16 x float>, i32) 2018 2019define <16 x float> @test_mul_ps(<16 x float> %a, <16 x float> %b) { 2020; 2021; CHECK-LABEL: @test_mul_ps( 2022; CHECK-NEXT: [[TMP1:%.*]] = fmul <16 x float> [[A:%.*]], [[B:%.*]] 2023; CHECK-NEXT: ret <16 x float> [[TMP1]] 2024; 2025 %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a, <16 x float> %b, i32 4) 2026 ret <16 x float> %1 2027} 2028 2029define <16 x float> @test_mul_ps_round(<16 x float> %a, <16 x float> %b) { 2030; 2031; CHECK-LABEL: @test_mul_ps_round( 2032; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8) 2033; CHECK-NEXT: ret <16 x float> [[TMP1]] 2034; 2035 %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a, <16 x float> %b, i32 8) 2036 ret <16 x float> %1 2037} 2038 2039define <16 x float> @test_mul_ps_mask(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) { 2040; 2041; CHECK-LABEL: @test_mul_ps_mask( 2042; CHECK-NEXT: [[TMP1:%.*]] = fmul <16 x float> [[A:%.*]], [[B:%.*]] 2043; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> 2044; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]] 2045; CHECK-NEXT: ret <16 x float> [[TMP3]] 2046; 2047 %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a, <16 x float> %b, i32 4) 2048 %2 = bitcast i16 %mask to <16 x i1> 2049 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c 2050 ret <16 x float> %3 2051} 2052 2053define <16 x float> @test_mul_ps_mask_round(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) { 2054; 2055; CHECK-LABEL: @test_mul_ps_mask_round( 2056; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8) 2057; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> 2058; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]] 2059; CHECK-NEXT: ret <16 x float> [[TMP3]] 2060; 2061 %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a, <16 x float> %b, i32 8) 2062 %2 = bitcast i16 %mask to <16 x i1> 2063 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c 2064 ret <16 x float> %3 2065} 2066 2067declare <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double>, <8 x double>, i32) 2068 2069define <8 x double> @test_mul_pd(<8 x double> %a, <8 x double> %b) { 2070; 2071; CHECK-LABEL: @test_mul_pd( 2072; CHECK-NEXT: [[TMP1:%.*]] = fmul <8 x double> [[A:%.*]], [[B:%.*]] 2073; CHECK-NEXT: ret <8 x double> [[TMP1]] 2074; 2075 %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a, <8 x double> %b, i32 4) 2076 ret <8 x double> %1 2077} 2078 2079define <8 x double> @test_mul_pd_round(<8 x double> %a, <8 x double> %b) { 2080; 2081; CHECK-LABEL: @test_mul_pd_round( 2082; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8) 2083; CHECK-NEXT: ret <8 x double> [[TMP1]] 2084; 2085 %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a, <8 x double> %b, i32 8) 2086 ret <8 x double> %1 2087} 2088 2089define <8 x double> @test_mul_pd_mask(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) { 2090; 2091; CHECK-LABEL: @test_mul_pd_mask( 2092; CHECK-NEXT: [[TMP1:%.*]] = fmul <8 x double> [[A:%.*]], [[B:%.*]] 2093; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> 2094; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]] 2095; CHECK-NEXT: ret <8 x double> [[TMP3]] 2096; 2097 %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a, <8 x double> %b, i32 4) 2098 %2 = bitcast i8 %mask to <8 x i1> 2099 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c 2100 ret <8 x double> %3 2101} 2102 2103define <8 x double> @test_mul_pd_mask_round(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) { 2104; 2105; CHECK-LABEL: @test_mul_pd_mask_round( 2106; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8) 2107; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> 2108; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]] 2109; CHECK-NEXT: ret <8 x double> [[TMP3]] 2110; 2111 %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a, <8 x double> %b, i32 8) 2112 %2 = bitcast i8 %mask to <8 x i1> 2113 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c 2114 ret <8 x double> %3 2115} 2116 2117declare <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float>, <16 x float>, i32) 2118 2119define <16 x float> @test_div_ps(<16 x float> %a, <16 x float> %b) { 2120; 2121; CHECK-LABEL: @test_div_ps( 2122; CHECK-NEXT: [[TMP1:%.*]] = fdiv <16 x float> [[A:%.*]], [[B:%.*]] 2123; CHECK-NEXT: ret <16 x float> [[TMP1]] 2124; 2125 %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a, <16 x float> %b, i32 4) 2126 ret <16 x float> %1 2127} 2128 2129define <16 x float> @test_div_ps_round(<16 x float> %a, <16 x float> %b) { 2130; 2131; CHECK-LABEL: @test_div_ps_round( 2132; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8) 2133; CHECK-NEXT: ret <16 x float> [[TMP1]] 2134; 2135 %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a, <16 x float> %b, i32 8) 2136 ret <16 x float> %1 2137} 2138 2139define <16 x float> @test_div_ps_mask(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) { 2140; 2141; CHECK-LABEL: @test_div_ps_mask( 2142; CHECK-NEXT: [[TMP1:%.*]] = fdiv <16 x float> [[A:%.*]], [[B:%.*]] 2143; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> 2144; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]] 2145; CHECK-NEXT: ret <16 x float> [[TMP3]] 2146; 2147 %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a, <16 x float> %b, i32 4) 2148 %2 = bitcast i16 %mask to <16 x i1> 2149 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c 2150 ret <16 x float> %3 2151} 2152 2153define <16 x float> @test_div_ps_mask_round(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) { 2154; 2155; CHECK-LABEL: @test_div_ps_mask_round( 2156; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8) 2157; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> 2158; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]] 2159; CHECK-NEXT: ret <16 x float> [[TMP3]] 2160; 2161 %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a, <16 x float> %b, i32 8) 2162 %2 = bitcast i16 %mask to <16 x i1> 2163 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c 2164 ret <16 x float> %3 2165} 2166 2167declare <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double>, <8 x double>, i32) 2168 2169define <8 x double> @test_div_pd(<8 x double> %a, <8 x double> %b) { 2170; 2171; CHECK-LABEL: @test_div_pd( 2172; CHECK-NEXT: [[TMP1:%.*]] = fdiv <8 x double> [[A:%.*]], [[B:%.*]] 2173; CHECK-NEXT: ret <8 x double> [[TMP1]] 2174; 2175 %1 = call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> %a, <8 x double> %b, i32 4) 2176 ret <8 x double> %1 2177} 2178 2179define <8 x double> @test_div_pd_round(<8 x double> %a, <8 x double> %b) { 2180; 2181; CHECK-LABEL: @test_div_pd_round( 2182; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8) 2183; CHECK-NEXT: ret <8 x double> [[TMP1]] 2184; 2185 %1 = call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> %a, <8 x double> %b, i32 8) 2186 ret <8 x double> %1 2187} 2188 2189define <8 x double> @test_div_pd_mask(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) { 2190; 2191; CHECK-LABEL: @test_div_pd_mask( 2192; CHECK-NEXT: [[TMP1:%.*]] = fdiv <8 x double> [[A:%.*]], [[B:%.*]] 2193; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> 2194; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]] 2195; CHECK-NEXT: ret <8 x double> [[TMP3]] 2196; 2197 %1 = call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> %a, <8 x double> %b, i32 4) 2198 %2 = bitcast i8 %mask to <8 x i1> 2199 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c 2200 ret <8 x double> %3 2201} 2202 2203define <8 x double> @test_div_pd_mask_round(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) { 2204; 2205; CHECK-LABEL: @test_div_pd_mask_round( 2206; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8) 2207; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> 2208; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]] 2209; CHECK-NEXT: ret <8 x double> [[TMP3]] 2210; 2211 %1 = call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> %a, <8 x double> %b, i32 8) 2212 %2 = bitcast i8 %mask to <8 x i1> 2213 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c 2214 ret <8 x double> %3 2215} 2216 2217declare i32 @llvm.x86.avx512.vcomi.ss(<4 x float>, <4 x float>, i32, i32) 2218 2219define i32 @test_comi_ss_0(float %a, float %b) { 2220; 2221; CHECK-LABEL: @test_comi_ss_0( 2222; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0 2223; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0 2224; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]], i32 0, i32 4) 2225; CHECK-NEXT: ret i32 [[TMP3]] 2226; 2227 %1 = insertelement <4 x float> poison, float %a, i32 0 2228 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 2229 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 2230 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 2231 %5 = insertelement <4 x float> poison, float %b, i32 0 2232 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 2233 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 2234 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 2235 %9 = tail call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> %4, <4 x float> %8, i32 0, i32 4) 2236 ret i32 %9 2237} 2238 2239declare i32 @llvm.x86.avx512.vcomi.sd(<2 x double>, <2 x double>, i32, i32) 2240 2241define i32 @test_comi_sd_0(double %a, double %b) { 2242; 2243; CHECK-LABEL: @test_comi_sd_0( 2244; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A:%.*]], i64 0 2245; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[B:%.*]], i64 0 2246; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]], i32 0, i32 4) 2247; CHECK-NEXT: ret i32 [[TMP3]] 2248; 2249 %1 = insertelement <2 x double> poison, double %a, i32 0 2250 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 2251 %3 = insertelement <2 x double> poison, double %b, i32 0 2252 %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 2253 %5 = tail call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %2, <2 x double> %4, i32 0, i32 4) 2254 ret i32 %5 2255} 2256