1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512cd,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512cd,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64 4 5declare <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32>, <4 x i32>, i8) 6 7define <4 x i32>@test_int_x86_avx512_vplzcnt_d_128(<4 x i32> %x0, <4 x i32> %x1) { 8; CHECK-LABEL: test_int_x86_avx512_vplzcnt_d_128: 9; CHECK: # %bb.0: 10; CHECK-NEXT: vplzcntd %xmm0, %xmm0 11; CHECK-NEXT: ret{{[l|q]}} 12 %res = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1) 13 ret <4 x i32> %res 14} 15 16define <4 x i32>@test_int_x86_avx512_mask_vplzcnt_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) { 17; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_d_128: 18; X86: # %bb.0: 19; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 20; X86-NEXT: kmovw %eax, %k1 21; X86-NEXT: vplzcntd %xmm0, %xmm1 {%k1} 22; X86-NEXT: vmovdqa %xmm1, %xmm0 23; X86-NEXT: retl 24; 25; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_d_128: 26; X64: # %bb.0: 27; X64-NEXT: kmovw %edi, %k1 28; X64-NEXT: vplzcntd %xmm0, %xmm1 {%k1} 29; X64-NEXT: vmovdqa %xmm1, %xmm0 30; X64-NEXT: retq 31 %res = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) 32 ret <4 x i32> %res 33} 34 35define <4 x i32>@test_int_x86_avx512_maskz_vplzcnt_d_128(<4 x i32> %x0, i8 %x2) { 36; X86-LABEL: test_int_x86_avx512_maskz_vplzcnt_d_128: 37; X86: # %bb.0: 38; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 39; X86-NEXT: kmovw %eax, %k1 40; X86-NEXT: vplzcntd %xmm0, %xmm0 {%k1} {z} 41; X86-NEXT: retl 42; 43; X64-LABEL: test_int_x86_avx512_maskz_vplzcnt_d_128: 44; X64: # %bb.0: 45; X64-NEXT: kmovw %edi, %k1 46; X64-NEXT: vplzcntd %xmm0, %xmm0 {%k1} {z} 47; X64-NEXT: retq 48 %res = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x2) 49 ret <4 x i32> %res 50} 51 52declare <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32>, <8 x i32>, i8) 53 54define <8 x i32>@test_int_x86_avx512_vplzcnt_d_256(<8 x i32> %x0, <8 x i32> %x1) { 55; CHECK-LABEL: test_int_x86_avx512_vplzcnt_d_256: 56; CHECK: # %bb.0: 57; CHECK-NEXT: vplzcntd %ymm0, %ymm0 58; CHECK-NEXT: ret{{[l|q]}} 59 %res = call <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1) 60 ret <8 x i32> %res 61} 62 63define <8 x i32>@test_int_x86_avx512_mask_vplzcnt_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { 64; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_d_256: 65; X86: # %bb.0: 66; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 67; X86-NEXT: kmovw %eax, %k1 68; X86-NEXT: vplzcntd %ymm0, %ymm1 {%k1} 69; X86-NEXT: vmovdqa %ymm1, %ymm0 70; X86-NEXT: retl 71; 72; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_d_256: 73; X64: # %bb.0: 74; X64-NEXT: kmovw %edi, %k1 75; X64-NEXT: vplzcntd %ymm0, %ymm1 {%k1} 76; X64-NEXT: vmovdqa %ymm1, %ymm0 77; X64-NEXT: retq 78 %res = call <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) 79 ret <8 x i32> %res 80} 81 82declare <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64>, <2 x i64>, i8) 83 84define <2 x i64>@test_int_x86_avx512_vplzcnt_q_128(<2 x i64> %x0, <2 x i64> %x1) { 85; CHECK-LABEL: test_int_x86_avx512_vplzcnt_q_128: 86; CHECK: # %bb.0: 87; CHECK-NEXT: vplzcntq %xmm0, %xmm0 88; CHECK-NEXT: ret{{[l|q]}} 89 %res = call <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1) 90 ret <2 x i64> %res 91} 92 93define <2 x i64>@test_int_x86_avx512_mask_vplzcnt_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) { 94; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_q_128: 95; X86: # %bb.0: 96; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 97; X86-NEXT: kmovw %eax, %k1 98; X86-NEXT: vplzcntq %xmm0, %xmm1 {%k1} 99; X86-NEXT: vmovdqa %xmm1, %xmm0 100; X86-NEXT: retl 101; 102; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_q_128: 103; X64: # %bb.0: 104; X64-NEXT: kmovw %edi, %k1 105; X64-NEXT: vplzcntq %xmm0, %xmm1 {%k1} 106; X64-NEXT: vmovdqa %xmm1, %xmm0 107; X64-NEXT: retq 108 %res = call <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) 109 ret <2 x i64> %res 110} 111 112declare <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64>, <4 x i64>, i8) 113 114define <4 x i64>@test_int_x86_avx512_vplzcnt_q_256(<4 x i64> %x0, <4 x i64> %x1) { 115; CHECK-LABEL: test_int_x86_avx512_vplzcnt_q_256: 116; CHECK: # %bb.0: 117; CHECK-NEXT: vplzcntq %ymm0, %ymm0 118; CHECK-NEXT: ret{{[l|q]}} 119 %res = call <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1) 120 ret <4 x i64> %res 121} 122 123define <4 x i64>@test_int_x86_avx512_mask_vplzcnt_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) { 124; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_q_256: 125; X86: # %bb.0: 126; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 127; X86-NEXT: kmovw %eax, %k1 128; X86-NEXT: vplzcntq %ymm0, %ymm1 {%k1} 129; X86-NEXT: vmovdqa %ymm1, %ymm0 130; X86-NEXT: retl 131; 132; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_q_256: 133; X64: # %bb.0: 134; X64-NEXT: kmovw %edi, %k1 135; X64-NEXT: vplzcntq %ymm0, %ymm1 {%k1} 136; X64-NEXT: vmovdqa %ymm1, %ymm0 137; X64-NEXT: retq 138 %res = call <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) 139 ret <4 x i64> %res 140} 141 142define <8 x i32> @test_x86_vbroadcastmw_256(i16 %a0) { 143; X86-LABEL: test_x86_vbroadcastmw_256: 144; X86: # %bb.0: 145; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 146; X86-NEXT: vpbroadcastd %eax, %ymm0 147; X86-NEXT: retl 148; 149; X64-LABEL: test_x86_vbroadcastmw_256: 150; X64: # %bb.0: 151; X64-NEXT: movzwl %di, %eax 152; X64-NEXT: vpbroadcastd %eax, %ymm0 153; X64-NEXT: retq 154 %res = call <8 x i32> @llvm.x86.avx512.broadcastmw.256(i16 %a0) ; 155 ret <8 x i32> %res 156} 157declare <8 x i32> @llvm.x86.avx512.broadcastmw.256(i16) 158 159define <4 x i32> @test_x86_vbroadcastmw_128(i16 %a0) { 160; X86-LABEL: test_x86_vbroadcastmw_128: 161; X86: # %bb.0: 162; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 163; X86-NEXT: vpbroadcastd %eax, %xmm0 164; X86-NEXT: retl 165; 166; X64-LABEL: test_x86_vbroadcastmw_128: 167; X64: # %bb.0: 168; X64-NEXT: movzwl %di, %eax 169; X64-NEXT: vpbroadcastd %eax, %xmm0 170; X64-NEXT: retq 171 %res = call <4 x i32> @llvm.x86.avx512.broadcastmw.128(i16 %a0) ; 172 ret <4 x i32> %res 173} 174declare <4 x i32> @llvm.x86.avx512.broadcastmw.128(i16) 175 176define <4 x i64> @test_x86_broadcastmb_256(i8 %a0) { 177; X86-LABEL: test_x86_broadcastmb_256: 178; X86: # %bb.0: 179; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 180; X86-NEXT: vmovd %eax, %xmm0 181; X86-NEXT: vpbroadcastq %xmm0, %ymm0 182; X86-NEXT: retl 183; 184; X64-LABEL: test_x86_broadcastmb_256: 185; X64: # %bb.0: 186; X64-NEXT: movzbl %dil, %eax 187; X64-NEXT: vpbroadcastq %rax, %ymm0 188; X64-NEXT: retq 189 %res = call <4 x i64> @llvm.x86.avx512.broadcastmb.256(i8 %a0) ; 190 ret <4 x i64> %res 191} 192declare <4 x i64> @llvm.x86.avx512.broadcastmb.256(i8) 193 194define <2 x i64> @test_x86_broadcastmb_128(i8 %a0) { 195; X86-LABEL: test_x86_broadcastmb_128: 196; X86: # %bb.0: 197; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 198; X86-NEXT: vmovd %eax, %xmm0 199; X86-NEXT: vpbroadcastq %xmm0, %xmm0 200; X86-NEXT: retl 201; 202; X64-LABEL: test_x86_broadcastmb_128: 203; X64: # %bb.0: 204; X64-NEXT: movzbl %dil, %eax 205; X64-NEXT: vpbroadcastq %rax, %xmm0 206; X64-NEXT: retq 207 %res = call <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8 %a0) ; 208 ret <2 x i64> %res 209} 210declare <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8) 211 212declare <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32>, <4 x i32>, i8) 213 214define <4 x i32> @test_int_x86_avx512_vpconflict_d_128(<4 x i32> %x0, <4 x i32> %x1) { 215; CHECK-LABEL: test_int_x86_avx512_vpconflict_d_128: 216; CHECK: # %bb.0: 217; CHECK-NEXT: vpconflictd %xmm0, %xmm0 218; CHECK-NEXT: ret{{[l|q]}} 219 %res = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1) 220 ret <4 x i32> %res 221} 222 223define <4 x i32> @test_int_x86_avx512_mask_vpconflict_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) { 224; X86-LABEL: test_int_x86_avx512_mask_vpconflict_d_128: 225; X86: # %bb.0: 226; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 227; X86-NEXT: kmovw %eax, %k1 228; X86-NEXT: vpconflictd %xmm0, %xmm1 {%k1} 229; X86-NEXT: vmovdqa %xmm1, %xmm0 230; X86-NEXT: retl 231; 232; X64-LABEL: test_int_x86_avx512_mask_vpconflict_d_128: 233; X64: # %bb.0: 234; X64-NEXT: kmovw %edi, %k1 235; X64-NEXT: vpconflictd %xmm0, %xmm1 {%k1} 236; X64-NEXT: vmovdqa %xmm1, %xmm0 237; X64-NEXT: retq 238 %res = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) 239 ret <4 x i32> %res 240} 241 242define <4 x i32> @test_int_x86_avx512_maskz_vpconflict_d_128(<4 x i32> %x0, i8 %x2) { 243; X86-LABEL: test_int_x86_avx512_maskz_vpconflict_d_128: 244; X86: # %bb.0: 245; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 246; X86-NEXT: kmovw %eax, %k1 247; X86-NEXT: vpconflictd %xmm0, %xmm0 {%k1} {z} 248; X86-NEXT: retl 249; 250; X64-LABEL: test_int_x86_avx512_maskz_vpconflict_d_128: 251; X64: # %bb.0: 252; X64-NEXT: kmovw %edi, %k1 253; X64-NEXT: vpconflictd %xmm0, %xmm0 {%k1} {z} 254; X64-NEXT: retq 255 %res = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x2) 256 ret <4 x i32> %res 257} 258 259declare <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32>, <8 x i32>, i8) 260 261define <8 x i32> @test_int_x86_avx512_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1) { 262; CHECK-LABEL: test_int_x86_avx512_vpconflict_d_256: 263; CHECK: # %bb.0: 264; CHECK-NEXT: vpconflictd %ymm0, %ymm0 265; CHECK-NEXT: ret{{[l|q]}} 266 %res = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1) 267 ret <8 x i32> %res 268} 269 270define <8 x i32> @test_int_x86_avx512_mask_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { 271; X86-LABEL: test_int_x86_avx512_mask_vpconflict_d_256: 272; X86: # %bb.0: 273; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 274; X86-NEXT: kmovw %eax, %k1 275; X86-NEXT: vpconflictd %ymm0, %ymm1 {%k1} 276; X86-NEXT: vmovdqa %ymm1, %ymm0 277; X86-NEXT: retl 278; 279; X64-LABEL: test_int_x86_avx512_mask_vpconflict_d_256: 280; X64: # %bb.0: 281; X64-NEXT: kmovw %edi, %k1 282; X64-NEXT: vpconflictd %ymm0, %ymm1 {%k1} 283; X64-NEXT: vmovdqa %ymm1, %ymm0 284; X64-NEXT: retq 285 %res = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) 286 ret <8 x i32> %res 287} 288 289define <8 x i32> @test_int_x86_avx512_maskz_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { 290; X86-LABEL: test_int_x86_avx512_maskz_vpconflict_d_256: 291; X86: # %bb.0: 292; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 293; X86-NEXT: kmovw %eax, %k1 294; X86-NEXT: vpconflictd %ymm0, %ymm0 {%k1} {z} 295; X86-NEXT: retl 296; 297; X64-LABEL: test_int_x86_avx512_maskz_vpconflict_d_256: 298; X64: # %bb.0: 299; X64-NEXT: kmovw %edi, %k1 300; X64-NEXT: vpconflictd %ymm0, %ymm0 {%k1} {z} 301; X64-NEXT: retq 302 %res = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> zeroinitializer, i8 %x2) 303 ret <8 x i32> %res 304} 305 306declare <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64>, <2 x i64>, i8) 307 308define <2 x i64> @test_int_x86_avx512_vpconflict_q_128(<2 x i64> %x0, <2 x i64> %x1) { 309; CHECK-LABEL: test_int_x86_avx512_vpconflict_q_128: 310; CHECK: # %bb.0: 311; CHECK-NEXT: vpconflictq %xmm0, %xmm0 312; CHECK-NEXT: ret{{[l|q]}} 313 %res = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1) 314 ret <2 x i64> %res 315} 316 317define <2 x i64> @test_int_x86_avx512_mask_vpconflict_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) { 318; X86-LABEL: test_int_x86_avx512_mask_vpconflict_q_128: 319; X86: # %bb.0: 320; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 321; X86-NEXT: kmovw %eax, %k1 322; X86-NEXT: vpconflictq %xmm0, %xmm1 {%k1} 323; X86-NEXT: vmovdqa %xmm1, %xmm0 324; X86-NEXT: retl 325; 326; X64-LABEL: test_int_x86_avx512_mask_vpconflict_q_128: 327; X64: # %bb.0: 328; X64-NEXT: kmovw %edi, %k1 329; X64-NEXT: vpconflictq %xmm0, %xmm1 {%k1} 330; X64-NEXT: vmovdqa %xmm1, %xmm0 331; X64-NEXT: retq 332 %res = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) 333 ret <2 x i64> %res 334} 335 336define <2 x i64> @test_int_x86_avx512_maskz_vpconflict_q_128(<2 x i64> %x0, i8 %x2) { 337; X86-LABEL: test_int_x86_avx512_maskz_vpconflict_q_128: 338; X86: # %bb.0: 339; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 340; X86-NEXT: kmovw %eax, %k1 341; X86-NEXT: vpconflictq %xmm0, %xmm0 {%k1} {z} 342; X86-NEXT: retl 343; 344; X64-LABEL: test_int_x86_avx512_maskz_vpconflict_q_128: 345; X64: # %bb.0: 346; X64-NEXT: kmovw %edi, %k1 347; X64-NEXT: vpconflictq %xmm0, %xmm0 {%k1} {z} 348; X64-NEXT: retq 349 %res = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> zeroinitializer, i8 %x2) 350 ret <2 x i64> %res 351} 352 353declare <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64>, <4 x i64>, i8) 354 355define <4 x i64> @test_int_x86_avx512_vpconflict_q_256(<4 x i64> %x0, <4 x i64> %x1) { 356; CHECK-LABEL: test_int_x86_avx512_vpconflict_q_256: 357; CHECK: # %bb.0: 358; CHECK-NEXT: vpconflictq %ymm0, %ymm0 359; CHECK-NEXT: ret{{[l|q]}} 360 %res = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1) 361 ret <4 x i64> %res 362} 363 364define <4 x i64> @test_int_x86_avx512_mask_vpconflict_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) { 365; X86-LABEL: test_int_x86_avx512_mask_vpconflict_q_256: 366; X86: # %bb.0: 367; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 368; X86-NEXT: kmovw %eax, %k1 369; X86-NEXT: vpconflictq %ymm0, %ymm1 {%k1} 370; X86-NEXT: vmovdqa %ymm1, %ymm0 371; X86-NEXT: retl 372; 373; X64-LABEL: test_int_x86_avx512_mask_vpconflict_q_256: 374; X64: # %bb.0: 375; X64-NEXT: kmovw %edi, %k1 376; X64-NEXT: vpconflictq %ymm0, %ymm1 {%k1} 377; X64-NEXT: vmovdqa %ymm1, %ymm0 378; X64-NEXT: retq 379 %res = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) 380 ret <4 x i64> %res 381} 382 383define <4 x i64> @test_int_x86_avx512_maskz_vpconflict_q_256(<4 x i64> %x0, i8 %x2) { 384; X86-LABEL: test_int_x86_avx512_maskz_vpconflict_q_256: 385; X86: # %bb.0: 386; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 387; X86-NEXT: kmovw %eax, %k1 388; X86-NEXT: vpconflictq %ymm0, %ymm0 {%k1} {z} 389; X86-NEXT: retl 390; 391; X64-LABEL: test_int_x86_avx512_maskz_vpconflict_q_256: 392; X64: # %bb.0: 393; X64-NEXT: kmovw %edi, %k1 394; X64-NEXT: vpconflictq %ymm0, %ymm0 {%k1} {z} 395; X64-NEXT: retq 396 %res = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> zeroinitializer, i8 %x2) 397 ret <4 x i64> %res 398} 399 400