1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512cd,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512cd,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64 4 5define <4 x i32> @test_int_x86_avx512_mask_vplzcnt_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) { 6; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_d_128: 7; X86: # %bb.0: 8; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 9; X86-NEXT: kmovw %eax, %k1 10; X86-NEXT: vplzcntd %xmm0, %xmm1 {%k1} 11; X86-NEXT: vmovdqa %xmm1, %xmm0 12; X86-NEXT: retl 13; 14; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_d_128: 15; X64: # %bb.0: 16; X64-NEXT: kmovw %edi, %k1 17; X64-NEXT: vplzcntd %xmm0, %xmm1 {%k1} 18; X64-NEXT: vmovdqa %xmm1, %xmm0 19; X64-NEXT: retq 20 %1 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %x0, i1 false) 21 %2 = bitcast i8 %x2 to <8 x i1> 22 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 23 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x1 24 ret <4 x i32> %3 25} 26 27define <4 x i32> @test_int_x86_avx512_maskz_vplzcnt_d_128(<4 x i32> %x0, i8 %x2) { 28; X86-LABEL: test_int_x86_avx512_maskz_vplzcnt_d_128: 29; X86: # %bb.0: 30; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 31; X86-NEXT: kmovw %eax, %k1 32; X86-NEXT: vplzcntd %xmm0, %xmm0 {%k1} {z} 33; X86-NEXT: retl 34; 35; X64-LABEL: test_int_x86_avx512_maskz_vplzcnt_d_128: 36; X64: # %bb.0: 37; X64-NEXT: kmovw %edi, %k1 38; X64-NEXT: vplzcntd %xmm0, %xmm0 {%k1} {z} 39; X64-NEXT: retq 40 %1 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %x0, i1 false) 41 %2 = bitcast i8 %x2 to <8 x i1> 42 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 43 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> zeroinitializer 44 ret <4 x i32> %3 45} 46declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) #0 47 48define <8 x i32> @test_int_x86_avx512_mask_vplzcnt_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { 49; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_d_256: 50; X86: # %bb.0: 51; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 52; X86-NEXT: kmovw %eax, %k1 53; X86-NEXT: vplzcntd %ymm0, %ymm1 {%k1} 54; X86-NEXT: vmovdqa %ymm1, %ymm0 55; X86-NEXT: retl 56; 57; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_d_256: 58; X64: # %bb.0: 59; X64-NEXT: kmovw %edi, %k1 60; X64-NEXT: vplzcntd %ymm0, %ymm1 {%k1} 61; X64-NEXT: vmovdqa %ymm1, %ymm0 62; X64-NEXT: retq 63 %1 = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %x0, i1 false) 64 %2 = bitcast i8 %x2 to <8 x i1> 65 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x1 66 ret <8 x i32> %3 67} 68declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1) #0 69 70define <2 x i64> @test_int_x86_avx512_mask_vplzcnt_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) { 71; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_q_128: 72; X86: # %bb.0: 73; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 74; X86-NEXT: kmovw %eax, %k1 75; X86-NEXT: vplzcntq %xmm0, %xmm1 {%k1} 76; X86-NEXT: vmovdqa %xmm1, %xmm0 77; X86-NEXT: retl 78; 79; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_q_128: 80; X64: # %bb.0: 81; X64-NEXT: kmovw %edi, %k1 82; X64-NEXT: vplzcntq %xmm0, %xmm1 {%k1} 83; X64-NEXT: vmovdqa %xmm1, %xmm0 84; X64-NEXT: retq 85 %1 = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %x0, i1 false) 86 %2 = bitcast i8 %x2 to <8 x i1> 87 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1> 88 %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x1 89 ret <2 x i64> %3 90} 91declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) #0 92 93define <4 x i64> @test_int_x86_avx512_mask_vplzcnt_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) { 94; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_q_256: 95; X86: # %bb.0: 96; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 97; X86-NEXT: kmovw %eax, %k1 98; X86-NEXT: vplzcntq %ymm0, %ymm1 {%k1} 99; X86-NEXT: vmovdqa %ymm1, %ymm0 100; X86-NEXT: retl 101; 102; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_q_256: 103; X64: # %bb.0: 104; X64-NEXT: kmovw %edi, %k1 105; X64-NEXT: vplzcntq %ymm0, %ymm1 {%k1} 106; X64-NEXT: vmovdqa %ymm1, %ymm0 107; X64-NEXT: retq 108 %1 = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %x0, i1 false) 109 %2 = bitcast i8 %x2 to <8 x i1> 110 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 111 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x1 112 ret <4 x i64> %3 113} 114declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) #0 115 116define <4 x i32> @test_int_x86_avx512_vpconflict_d_128(<4 x i32> %x0, <4 x i32> %x1) { 117; CHECK-LABEL: test_int_x86_avx512_vpconflict_d_128: 118; CHECK: # %bb.0: 119; CHECK-NEXT: vpconflictd %xmm0, %xmm0 120; CHECK-NEXT: ret{{[l|q]}} 121 %1 = call <4 x i32> @llvm.x86.avx512.conflict.d.128(<4 x i32> %x0) 122 ret <4 x i32> %1 123} 124 125define <4 x i32> @test_int_x86_avx512_mask_vpconflict_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) { 126; X86-LABEL: test_int_x86_avx512_mask_vpconflict_d_128: 127; X86: # %bb.0: 128; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 129; X86-NEXT: kmovw %eax, %k1 130; X86-NEXT: vpconflictd %xmm0, %xmm1 {%k1} 131; X86-NEXT: vmovdqa %xmm1, %xmm0 132; X86-NEXT: retl 133; 134; X64-LABEL: test_int_x86_avx512_mask_vpconflict_d_128: 135; X64: # %bb.0: 136; X64-NEXT: kmovw %edi, %k1 137; X64-NEXT: vpconflictd %xmm0, %xmm1 {%k1} 138; X64-NEXT: vmovdqa %xmm1, %xmm0 139; X64-NEXT: retq 140 %1 = call <4 x i32> @llvm.x86.avx512.conflict.d.128(<4 x i32> %x0) 141 %2 = bitcast i8 %x2 to <8 x i1> 142 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 143 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x1 144 ret <4 x i32> %3 145} 146 147define <4 x i32> @test_int_x86_avx512_maskz_vpconflict_d_128(<4 x i32> %x0, i8 %x2) { 148; X86-LABEL: test_int_x86_avx512_maskz_vpconflict_d_128: 149; X86: # %bb.0: 150; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 151; X86-NEXT: kmovw %eax, %k1 152; X86-NEXT: vpconflictd %xmm0, %xmm0 {%k1} {z} 153; X86-NEXT: retl 154; 155; X64-LABEL: test_int_x86_avx512_maskz_vpconflict_d_128: 156; X64: # %bb.0: 157; X64-NEXT: kmovw %edi, %k1 158; X64-NEXT: vpconflictd %xmm0, %xmm0 {%k1} {z} 159; X64-NEXT: retq 160 %1 = call <4 x i32> @llvm.x86.avx512.conflict.d.128(<4 x i32> %x0) 161 %2 = bitcast i8 %x2 to <8 x i1> 162 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 163 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> zeroinitializer 164 ret <4 x i32> %3 165} 166 167define <8 x i32> @test_int_x86_avx512_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1) { 168; CHECK-LABEL: test_int_x86_avx512_vpconflict_d_256: 169; CHECK: # %bb.0: 170; CHECK-NEXT: vpconflictd %ymm0, %ymm0 171; CHECK-NEXT: ret{{[l|q]}} 172 %1 = call <8 x i32> @llvm.x86.avx512.conflict.d.256(<8 x i32> %x0) 173 ret <8 x i32> %1 174} 175 176define <8 x i32> @test_int_x86_avx512_mask_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { 177; X86-LABEL: test_int_x86_avx512_mask_vpconflict_d_256: 178; X86: # %bb.0: 179; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 180; X86-NEXT: kmovw %eax, %k1 181; X86-NEXT: vpconflictd %ymm0, %ymm1 {%k1} 182; X86-NEXT: vmovdqa %ymm1, %ymm0 183; X86-NEXT: retl 184; 185; X64-LABEL: test_int_x86_avx512_mask_vpconflict_d_256: 186; X64: # %bb.0: 187; X64-NEXT: kmovw %edi, %k1 188; X64-NEXT: vpconflictd %ymm0, %ymm1 {%k1} 189; X64-NEXT: vmovdqa %ymm1, %ymm0 190; X64-NEXT: retq 191 %1 = call <8 x i32> @llvm.x86.avx512.conflict.d.256(<8 x i32> %x0) 192 %2 = bitcast i8 %x2 to <8 x i1> 193 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x1 194 ret <8 x i32> %3 195} 196 197define <8 x i32> @test_int_x86_avx512_maskz_vpconflict_d_256(<8 x i32> %x0, i8 %x2) { 198; X86-LABEL: test_int_x86_avx512_maskz_vpconflict_d_256: 199; X86: # %bb.0: 200; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 201; X86-NEXT: kmovw %eax, %k1 202; X86-NEXT: vpconflictd %ymm0, %ymm0 {%k1} {z} 203; X86-NEXT: retl 204; 205; X64-LABEL: test_int_x86_avx512_maskz_vpconflict_d_256: 206; X64: # %bb.0: 207; X64-NEXT: kmovw %edi, %k1 208; X64-NEXT: vpconflictd %ymm0, %ymm0 {%k1} {z} 209; X64-NEXT: retq 210 %1 = call <8 x i32> @llvm.x86.avx512.conflict.d.256(<8 x i32> %x0) 211 %2 = bitcast i8 %x2 to <8 x i1> 212 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer 213 ret <8 x i32> %3 214} 215 216define <2 x i64> @test_int_x86_avx512_vpconflict_q_128(<2 x i64> %x0, <2 x i64> %x1) { 217; CHECK-LABEL: test_int_x86_avx512_vpconflict_q_128: 218; CHECK: # %bb.0: 219; CHECK-NEXT: vpconflictq %xmm0, %xmm0 220; CHECK-NEXT: ret{{[l|q]}} 221 %1 = call <2 x i64> @llvm.x86.avx512.conflict.q.128(<2 x i64> %x0) 222 ret <2 x i64> %1 223} 224 225define <2 x i64> @test_int_x86_avx512_mask_vpconflict_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) { 226; X86-LABEL: test_int_x86_avx512_mask_vpconflict_q_128: 227; X86: # %bb.0: 228; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 229; X86-NEXT: kmovw %eax, %k1 230; X86-NEXT: vpconflictq %xmm0, %xmm1 {%k1} 231; X86-NEXT: vmovdqa %xmm1, %xmm0 232; X86-NEXT: retl 233; 234; X64-LABEL: test_int_x86_avx512_mask_vpconflict_q_128: 235; X64: # %bb.0: 236; X64-NEXT: kmovw %edi, %k1 237; X64-NEXT: vpconflictq %xmm0, %xmm1 {%k1} 238; X64-NEXT: vmovdqa %xmm1, %xmm0 239; X64-NEXT: retq 240 %1 = call <2 x i64> @llvm.x86.avx512.conflict.q.128(<2 x i64> %x0) 241 %2 = bitcast i8 %x2 to <8 x i1> 242 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1> 243 %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> %x1 244 ret <2 x i64> %3 245} 246 247define <2 x i64> @test_int_x86_avx512_maskz_vpconflict_q_128(<2 x i64> %x0, i8 %x2) { 248; X86-LABEL: test_int_x86_avx512_maskz_vpconflict_q_128: 249; X86: # %bb.0: 250; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 251; X86-NEXT: kmovw %eax, %k1 252; X86-NEXT: vpconflictq %xmm0, %xmm0 {%k1} {z} 253; X86-NEXT: retl 254; 255; X64-LABEL: test_int_x86_avx512_maskz_vpconflict_q_128: 256; X64: # %bb.0: 257; X64-NEXT: kmovw %edi, %k1 258; X64-NEXT: vpconflictq %xmm0, %xmm0 {%k1} {z} 259; X64-NEXT: retq 260 %1 = call <2 x i64> @llvm.x86.avx512.conflict.q.128(<2 x i64> %x0) 261 %2 = bitcast i8 %x2 to <8 x i1> 262 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1> 263 %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> zeroinitializer 264 ret <2 x i64> %3 265} 266 267define <4 x i64> @test_int_x86_avx512_vpconflict_q_256(<4 x i64> %x0, <4 x i64> %x1) { 268; CHECK-LABEL: test_int_x86_avx512_vpconflict_q_256: 269; CHECK: # %bb.0: 270; CHECK-NEXT: vpconflictq %ymm0, %ymm0 271; CHECK-NEXT: ret{{[l|q]}} 272 %1 = call <4 x i64> @llvm.x86.avx512.conflict.q.256(<4 x i64> %x0) 273 ret <4 x i64> %1 274} 275 276define <4 x i64> @test_int_x86_avx512_mask_vpconflict_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) { 277; X86-LABEL: test_int_x86_avx512_mask_vpconflict_q_256: 278; X86: # %bb.0: 279; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 280; X86-NEXT: kmovw %eax, %k1 281; X86-NEXT: vpconflictq %ymm0, %ymm1 {%k1} 282; X86-NEXT: vmovdqa %ymm1, %ymm0 283; X86-NEXT: retl 284; 285; X64-LABEL: test_int_x86_avx512_mask_vpconflict_q_256: 286; X64: # %bb.0: 287; X64-NEXT: kmovw %edi, %k1 288; X64-NEXT: vpconflictq %ymm0, %ymm1 {%k1} 289; X64-NEXT: vmovdqa %ymm1, %ymm0 290; X64-NEXT: retq 291 %1 = call <4 x i64> @llvm.x86.avx512.conflict.q.256(<4 x i64> %x0) 292 %2 = bitcast i8 %x2 to <8 x i1> 293 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 294 %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> %x1 295 ret <4 x i64> %3 296} 297 298define <4 x i64> @test_int_x86_avx512_maskz_vpconflict_q_256(<4 x i64> %x0, i8 %x2) { 299; X86-LABEL: test_int_x86_avx512_maskz_vpconflict_q_256: 300; X86: # %bb.0: 301; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 302; X86-NEXT: kmovw %eax, %k1 303; X86-NEXT: vpconflictq %ymm0, %ymm0 {%k1} {z} 304; X86-NEXT: retl 305; 306; X64-LABEL: test_int_x86_avx512_maskz_vpconflict_q_256: 307; X64: # %bb.0: 308; X64-NEXT: kmovw %edi, %k1 309; X64-NEXT: vpconflictq %ymm0, %ymm0 {%k1} {z} 310; X64-NEXT: retq 311 %1 = call <4 x i64> @llvm.x86.avx512.conflict.q.256(<4 x i64> %x0) 312 %2 = bitcast i8 %x2 to <8 x i1> 313 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 314 %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> zeroinitializer 315 ret <4 x i64> %3 316} 317 318declare <4 x i32> @llvm.x86.avx512.conflict.d.128(<4 x i32>) 319declare <8 x i32> @llvm.x86.avx512.conflict.d.256(<8 x i32>) 320declare <2 x i64> @llvm.x86.avx512.conflict.q.128(<2 x i64>) 321declare <4 x i64> @llvm.x86.avx512.conflict.q.256(<4 x i64>) 322