1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512vbmi2 | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2 | FileCheck %s --check-prefixes=CHECK,X64 4 5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vbmi2-builtins.c 6 7define <8 x i64> @test_mm512_mask_compress_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__D) { 8; X86-LABEL: test_mm512_mask_compress_epi16: 9; X86: # %bb.0: # %entry 10; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 11; X86-NEXT: vpcompressw %zmm1, %zmm0 {%k1} 12; X86-NEXT: retl 13; 14; X64-LABEL: test_mm512_mask_compress_epi16: 15; X64: # %bb.0: # %entry 16; X64-NEXT: kmovd %edi, %k1 17; X64-NEXT: vpcompressw %zmm1, %zmm0 {%k1} 18; X64-NEXT: retq 19entry: 20 %0 = bitcast <8 x i64> %__D to <32 x i16> 21 %1 = bitcast <8 x i64> %__S to <32 x i16> 22 %2 = tail call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %0, <32 x i16> %1, i32 %__U) 23 %3 = bitcast <32 x i16> %2 to <8 x i64> 24 ret <8 x i64> %3 25} 26 27define <8 x i64> @test_mm512_maskz_compress_epi16(i32 %__U, <8 x i64> %__D) { 28; X86-LABEL: test_mm512_maskz_compress_epi16: 29; X86: # %bb.0: # %entry 30; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 31; X86-NEXT: vpcompressw %zmm0, %zmm0 {%k1} {z} 32; X86-NEXT: retl 33; 34; X64-LABEL: test_mm512_maskz_compress_epi16: 35; X64: # %bb.0: # %entry 36; X64-NEXT: kmovd %edi, %k1 37; X64-NEXT: vpcompressw %zmm0, %zmm0 {%k1} {z} 38; X64-NEXT: retq 39entry: 40 %0 = bitcast <8 x i64> %__D to <32 x i16> 41 %1 = tail call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %0, <32 x i16> zeroinitializer, i32 %__U) 42 %2 = bitcast <32 x i16> %1 to <8 x i64> 43 ret <8 x i64> %2 44} 45 46define <8 x i64> @test_mm512_mask_compress_epi8(<8 x i64> %__S, i64 %__U, <8 x i64> %__D) { 47; X86-LABEL: test_mm512_mask_compress_epi8: 48; X86: # %bb.0: # %entry 49; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 50; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 51; X86-NEXT: kunpckdq %k1, %k0, %k1 52; X86-NEXT: vpcompressb %zmm1, %zmm0 {%k1} 53; X86-NEXT: retl 54; 55; X64-LABEL: test_mm512_mask_compress_epi8: 56; X64: # %bb.0: # %entry 57; X64-NEXT: kmovq %rdi, %k1 58; X64-NEXT: vpcompressb %zmm1, %zmm0 {%k1} 59; X64-NEXT: retq 60entry: 61 %0 = bitcast <8 x i64> %__D to <64 x i8> 62 %1 = bitcast <8 x i64> %__S to <64 x i8> 63 %2 = tail call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %0, <64 x i8> %1, i64 %__U) 64 %3 = bitcast <64 x i8> %2 to <8 x i64> 65 ret <8 x i64> %3 66} 67 68define <8 x i64> @test_mm512_maskz_compress_epi8(i64 %__U, <8 x i64> %__D) { 69; X86-LABEL: test_mm512_maskz_compress_epi8: 70; X86: # %bb.0: # %entry 71; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 72; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 73; X86-NEXT: kunpckdq %k1, %k0, %k1 74; X86-NEXT: vpcompressb %zmm0, %zmm0 {%k1} {z} 75; X86-NEXT: retl 76; 77; X64-LABEL: test_mm512_maskz_compress_epi8: 78; X64: # %bb.0: # %entry 79; X64-NEXT: kmovq %rdi, %k1 80; X64-NEXT: vpcompressb %zmm0, %zmm0 {%k1} {z} 81; X64-NEXT: retq 82entry: 83 %0 = bitcast <8 x i64> %__D to <64 x i8> 84 %1 = tail call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %0, <64 x i8> zeroinitializer, i64 %__U) 85 %2 = bitcast <64 x i8> %1 to <8 x i64> 86 ret <8 x i64> %2 87} 88 89define void @test_mm512_mask_compressstoreu_epi16(ptr %__P, i32 %__U, <8 x i64> %__D) { 90; X86-LABEL: test_mm512_mask_compressstoreu_epi16: 91; X86: # %bb.0: # %entry 92; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 93; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 94; X86-NEXT: vpcompressw %zmm0, (%eax) {%k1} 95; X86-NEXT: vzeroupper 96; X86-NEXT: retl 97; 98; X64-LABEL: test_mm512_mask_compressstoreu_epi16: 99; X64: # %bb.0: # %entry 100; X64-NEXT: kmovd %esi, %k1 101; X64-NEXT: vpcompressw %zmm0, (%rdi) {%k1} 102; X64-NEXT: vzeroupper 103; X64-NEXT: retq 104entry: 105 %0 = bitcast <8 x i64> %__D to <32 x i16> 106 %1 = bitcast i32 %__U to <32 x i1> 107 tail call void @llvm.masked.compressstore.v32i16(<32 x i16> %0, ptr %__P, <32 x i1> %1) 108 ret void 109} 110 111define void @test_mm512_mask_compressstoreu_epi8(ptr %__P, i64 %__U, <8 x i64> %__D) { 112; X86-LABEL: test_mm512_mask_compressstoreu_epi8: 113; X86: # %bb.0: # %entry 114; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 115; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 116; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 117; X86-NEXT: kunpckdq %k1, %k0, %k1 118; X86-NEXT: vpcompressb %zmm0, (%eax) {%k1} 119; X86-NEXT: vzeroupper 120; X86-NEXT: retl 121; 122; X64-LABEL: test_mm512_mask_compressstoreu_epi8: 123; X64: # %bb.0: # %entry 124; X64-NEXT: kmovq %rsi, %k1 125; X64-NEXT: vpcompressb %zmm0, (%rdi) {%k1} 126; X64-NEXT: vzeroupper 127; X64-NEXT: retq 128entry: 129 %0 = bitcast <8 x i64> %__D to <64 x i8> 130 %1 = bitcast i64 %__U to <64 x i1> 131 tail call void @llvm.masked.compressstore.v64i8(<64 x i8> %0, ptr %__P, <64 x i1> %1) 132 ret void 133} 134 135define <8 x i64> @test_mm512_mask_expand_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__D) { 136; X86-LABEL: test_mm512_mask_expand_epi16: 137; X86: # %bb.0: # %entry 138; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 139; X86-NEXT: vpexpandw %zmm1, %zmm0 {%k1} 140; X86-NEXT: retl 141; 142; X64-LABEL: test_mm512_mask_expand_epi16: 143; X64: # %bb.0: # %entry 144; X64-NEXT: kmovd %edi, %k1 145; X64-NEXT: vpexpandw %zmm1, %zmm0 {%k1} 146; X64-NEXT: retq 147entry: 148 %0 = bitcast <8 x i64> %__D to <32 x i16> 149 %1 = bitcast <8 x i64> %__S to <32 x i16> 150 %2 = tail call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %0, <32 x i16> %1, i32 %__U) 151 %3 = bitcast <32 x i16> %2 to <8 x i64> 152 ret <8 x i64> %3 153} 154 155define <8 x i64> @test_mm512_maskz_expand_epi16(i32 %__U, <8 x i64> %__D) { 156; X86-LABEL: test_mm512_maskz_expand_epi16: 157; X86: # %bb.0: # %entry 158; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 159; X86-NEXT: vpexpandw %zmm0, %zmm0 {%k1} {z} 160; X86-NEXT: retl 161; 162; X64-LABEL: test_mm512_maskz_expand_epi16: 163; X64: # %bb.0: # %entry 164; X64-NEXT: kmovd %edi, %k1 165; X64-NEXT: vpexpandw %zmm0, %zmm0 {%k1} {z} 166; X64-NEXT: retq 167entry: 168 %0 = bitcast <8 x i64> %__D to <32 x i16> 169 %1 = tail call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %0, <32 x i16> zeroinitializer, i32 %__U) 170 %2 = bitcast <32 x i16> %1 to <8 x i64> 171 ret <8 x i64> %2 172} 173 174define <8 x i64> @test_mm512_mask_expand_epi8(<8 x i64> %__S, i64 %__U, <8 x i64> %__D) { 175; X86-LABEL: test_mm512_mask_expand_epi8: 176; X86: # %bb.0: # %entry 177; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 178; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 179; X86-NEXT: kunpckdq %k1, %k0, %k1 180; X86-NEXT: vpexpandb %zmm1, %zmm0 {%k1} 181; X86-NEXT: retl 182; 183; X64-LABEL: test_mm512_mask_expand_epi8: 184; X64: # %bb.0: # %entry 185; X64-NEXT: kmovq %rdi, %k1 186; X64-NEXT: vpexpandb %zmm1, %zmm0 {%k1} 187; X64-NEXT: retq 188entry: 189 %0 = bitcast <8 x i64> %__D to <64 x i8> 190 %1 = bitcast <8 x i64> %__S to <64 x i8> 191 %2 = tail call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %0, <64 x i8> %1, i64 %__U) 192 %3 = bitcast <64 x i8> %2 to <8 x i64> 193 ret <8 x i64> %3 194} 195 196define <8 x i64> @test_mm512_maskz_expand_epi8(i64 %__U, <8 x i64> %__D) { 197; X86-LABEL: test_mm512_maskz_expand_epi8: 198; X86: # %bb.0: # %entry 199; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 200; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 201; X86-NEXT: kunpckdq %k1, %k0, %k1 202; X86-NEXT: vpexpandb %zmm0, %zmm0 {%k1} {z} 203; X86-NEXT: retl 204; 205; X64-LABEL: test_mm512_maskz_expand_epi8: 206; X64: # %bb.0: # %entry 207; X64-NEXT: kmovq %rdi, %k1 208; X64-NEXT: vpexpandb %zmm0, %zmm0 {%k1} {z} 209; X64-NEXT: retq 210entry: 211 %0 = bitcast <8 x i64> %__D to <64 x i8> 212 %1 = tail call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %0, <64 x i8> zeroinitializer, i64 %__U) 213 %2 = bitcast <64 x i8> %1 to <8 x i64> 214 ret <8 x i64> %2 215} 216 217define <8 x i64> @test_mm512_mask_expandloadu_epi16(<8 x i64> %__S, i32 %__U, ptr readonly %__P) { 218; X86-LABEL: test_mm512_mask_expandloadu_epi16: 219; X86: # %bb.0: # %entry 220; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 221; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 222; X86-NEXT: vpexpandw (%eax), %zmm0 {%k1} 223; X86-NEXT: retl 224; 225; X64-LABEL: test_mm512_mask_expandloadu_epi16: 226; X64: # %bb.0: # %entry 227; X64-NEXT: kmovd %edi, %k1 228; X64-NEXT: vpexpandw (%rsi), %zmm0 {%k1} 229; X64-NEXT: retq 230entry: 231 %0 = bitcast <8 x i64> %__S to <32 x i16> 232 %1 = bitcast i32 %__U to <32 x i1> 233 %2 = tail call <32 x i16> @llvm.masked.expandload.v32i16(ptr %__P, <32 x i1> %1, <32 x i16> %0) 234 %3 = bitcast <32 x i16> %2 to <8 x i64> 235 ret <8 x i64> %3 236} 237 238define <8 x i64> @test_mm512_maskz_expandloadu_epi16(i32 %__U, ptr readonly %__P) { 239; X86-LABEL: test_mm512_maskz_expandloadu_epi16: 240; X86: # %bb.0: # %entry 241; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 242; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 243; X86-NEXT: vpexpandw (%eax), %zmm0 {%k1} {z} 244; X86-NEXT: retl 245; 246; X64-LABEL: test_mm512_maskz_expandloadu_epi16: 247; X64: # %bb.0: # %entry 248; X64-NEXT: kmovd %edi, %k1 249; X64-NEXT: vpexpandw (%rsi), %zmm0 {%k1} {z} 250; X64-NEXT: retq 251entry: 252 %0 = bitcast i32 %__U to <32 x i1> 253 %1 = tail call <32 x i16> @llvm.masked.expandload.v32i16(ptr %__P, <32 x i1> %0, <32 x i16> zeroinitializer) 254 %2 = bitcast <32 x i16> %1 to <8 x i64> 255 ret <8 x i64> %2 256} 257 258define <8 x i64> @test_mm512_mask_expandloadu_epi8(<8 x i64> %__S, i64 %__U, ptr readonly %__P) { 259; X86-LABEL: test_mm512_mask_expandloadu_epi8: 260; X86: # %bb.0: # %entry 261; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 262; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 263; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 264; X86-NEXT: kunpckdq %k1, %k0, %k1 265; X86-NEXT: vpexpandb (%eax), %zmm0 {%k1} 266; X86-NEXT: retl 267; 268; X64-LABEL: test_mm512_mask_expandloadu_epi8: 269; X64: # %bb.0: # %entry 270; X64-NEXT: kmovq %rdi, %k1 271; X64-NEXT: vpexpandb (%rsi), %zmm0 {%k1} 272; X64-NEXT: retq 273entry: 274 %0 = bitcast <8 x i64> %__S to <64 x i8> 275 %1 = bitcast i64 %__U to <64 x i1> 276 %2 = tail call <64 x i8> @llvm.masked.expandload.v64i8(ptr %__P, <64 x i1> %1, <64 x i8> %0) 277 %3 = bitcast <64 x i8> %2 to <8 x i64> 278 ret <8 x i64> %3 279} 280 281define <8 x i64> @test_mm512_maskz_expandloadu_epi8(i64 %__U, ptr readonly %__P) { 282; X86-LABEL: test_mm512_maskz_expandloadu_epi8: 283; X86: # %bb.0: # %entry 284; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 285; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 286; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 287; X86-NEXT: kunpckdq %k1, %k0, %k1 288; X86-NEXT: vpexpandb (%eax), %zmm0 {%k1} {z} 289; X86-NEXT: retl 290; 291; X64-LABEL: test_mm512_maskz_expandloadu_epi8: 292; X64: # %bb.0: # %entry 293; X64-NEXT: kmovq %rdi, %k1 294; X64-NEXT: vpexpandb (%rsi), %zmm0 {%k1} {z} 295; X64-NEXT: retq 296entry: 297 %0 = bitcast i64 %__U to <64 x i1> 298 %1 = tail call <64 x i8> @llvm.masked.expandload.v64i8(ptr %__P, <64 x i1> %0, <64 x i8> zeroinitializer) 299 %2 = bitcast <64 x i8> %1 to <8 x i64> 300 ret <8 x i64> %2 301} 302 303define <8 x i64> @test_mm512_mask_shldi_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 304; X86-LABEL: test_mm512_mask_shldi_epi64: 305; X86: # %bb.0: # %entry 306; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 307; X86-NEXT: kmovd %eax, %k1 308; X86-NEXT: vpshldq $47, %zmm2, %zmm1, %zmm0 {%k1} 309; X86-NEXT: retl 310; 311; X64-LABEL: test_mm512_mask_shldi_epi64: 312; X64: # %bb.0: # %entry 313; X64-NEXT: kmovd %edi, %k1 314; X64-NEXT: vpshldq $47, %zmm2, %zmm1, %zmm0 {%k1} 315; X64-NEXT: retq 316entry: 317 %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> <i64 47, i64 47, i64 47, i64 47, i64 47, i64 47, i64 47, i64 47>) 318 %1 = bitcast i8 %__U to <8 x i1> 319 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__S 320 ret <8 x i64> %2 321} 322 323declare <8 x i64> @llvm.fshl.v8i64(<8 x i64>, <8 x i64>, <8 x i64>) 324 325define <8 x i64> @test_mm512_maskz_shldi_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 326; X86-LABEL: test_mm512_maskz_shldi_epi64: 327; X86: # %bb.0: # %entry 328; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 329; X86-NEXT: kmovd %eax, %k1 330; X86-NEXT: vpshldq $63, %zmm1, %zmm0, %zmm0 {%k1} {z} 331; X86-NEXT: retl 332; 333; X64-LABEL: test_mm512_maskz_shldi_epi64: 334; X64: # %bb.0: # %entry 335; X64-NEXT: kmovd %edi, %k1 336; X64-NEXT: vpshldq $63, %zmm1, %zmm0, %zmm0 {%k1} {z} 337; X64-NEXT: retq 338entry: 339 %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>) 340 %1 = bitcast i8 %__U to <8 x i1> 341 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer 342 ret <8 x i64> %2 343} 344 345define <8 x i64> @test_mm512_shldi_epi64(<8 x i64> %__A, <8 x i64> %__B) { 346; CHECK-LABEL: test_mm512_shldi_epi64: 347; CHECK: # %bb.0: # %entry 348; CHECK-NEXT: vpshldq $31, %zmm1, %zmm0, %zmm0 349; CHECK-NEXT: ret{{[l|q]}} 350entry: 351 %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> <i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31>) 352 ret <8 x i64> %0 353} 354 355define <8 x i64> @test_mm512_mask_shldi_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 356; X86-LABEL: test_mm512_mask_shldi_epi32: 357; X86: # %bb.0: # %entry 358; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 359; X86-NEXT: vpshldd $7, %zmm2, %zmm1, %zmm0 {%k1} 360; X86-NEXT: retl 361; 362; X64-LABEL: test_mm512_mask_shldi_epi32: 363; X64: # %bb.0: # %entry 364; X64-NEXT: kmovd %edi, %k1 365; X64-NEXT: vpshldd $7, %zmm2, %zmm1, %zmm0 {%k1} 366; X64-NEXT: retq 367entry: 368 %0 = bitcast <8 x i64> %__A to <16 x i32> 369 %1 = bitcast <8 x i64> %__B to <16 x i32> 370 %2 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>) 371 %3 = bitcast <8 x i64> %__S to <16 x i32> 372 %4 = bitcast i16 %__U to <16 x i1> 373 %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3 374 %6 = bitcast <16 x i32> %5 to <8 x i64> 375 ret <8 x i64> %6 376} 377 378declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) 379 380define <8 x i64> @test_mm512_maskz_shldi_epi32(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 381; X86-LABEL: test_mm512_maskz_shldi_epi32: 382; X86: # %bb.0: # %entry 383; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 384; X86-NEXT: vpshldd $15, %zmm1, %zmm0, %zmm0 {%k1} {z} 385; X86-NEXT: retl 386; 387; X64-LABEL: test_mm512_maskz_shldi_epi32: 388; X64: # %bb.0: # %entry 389; X64-NEXT: kmovd %edi, %k1 390; X64-NEXT: vpshldd $15, %zmm1, %zmm0, %zmm0 {%k1} {z} 391; X64-NEXT: retq 392entry: 393 %0 = bitcast <8 x i64> %__A to <16 x i32> 394 %1 = bitcast <8 x i64> %__B to <16 x i32> 395 %2 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>) 396 %3 = bitcast i16 %__U to <16 x i1> 397 %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer 398 %5 = bitcast <16 x i32> %4 to <8 x i64> 399 ret <8 x i64> %5 400} 401 402define <8 x i64> @test_mm512_shldi_epi32(<8 x i64> %__A, <8 x i64> %__B) { 403; CHECK-LABEL: test_mm512_shldi_epi32: 404; CHECK: # %bb.0: # %entry 405; CHECK-NEXT: vpshldd $31, %zmm1, %zmm0, %zmm0 406; CHECK-NEXT: ret{{[l|q]}} 407entry: 408 %0 = bitcast <8 x i64> %__A to <16 x i32> 409 %1 = bitcast <8 x i64> %__B to <16 x i32> 410 %2 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>) 411 %3 = bitcast <16 x i32> %2 to <8 x i64> 412 ret <8 x i64> %3 413} 414 415define <8 x i64> @test_mm512_mask_shldi_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) { 416; X86-LABEL: test_mm512_mask_shldi_epi16: 417; X86: # %bb.0: # %entry 418; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 419; X86-NEXT: vpshldw $3, %zmm2, %zmm1, %zmm0 {%k1} 420; X86-NEXT: retl 421; 422; X64-LABEL: test_mm512_mask_shldi_epi16: 423; X64: # %bb.0: # %entry 424; X64-NEXT: kmovd %edi, %k1 425; X64-NEXT: vpshldw $3, %zmm2, %zmm1, %zmm0 {%k1} 426; X64-NEXT: retq 427entry: 428 %0 = bitcast <8 x i64> %__A to <32 x i16> 429 %1 = bitcast <8 x i64> %__B to <32 x i16> 430 %2 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>) 431 %3 = bitcast <8 x i64> %__S to <32 x i16> 432 %4 = bitcast i32 %__U to <32 x i1> 433 %5 = select <32 x i1> %4, <32 x i16> %2, <32 x i16> %3 434 %6 = bitcast <32 x i16> %5 to <8 x i64> 435 ret <8 x i64> %6 436} 437 438declare <32 x i16> @llvm.fshl.v32i16(<32 x i16>, <32 x i16>, <32 x i16>) 439 440define <8 x i64> @test_mm512_maskz_shldi_epi16(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) { 441; X86-LABEL: test_mm512_maskz_shldi_epi16: 442; X86: # %bb.0: # %entry 443; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 444; X86-NEXT: vpshldw $7, %zmm1, %zmm0, %zmm0 {%k1} {z} 445; X86-NEXT: retl 446; 447; X64-LABEL: test_mm512_maskz_shldi_epi16: 448; X64: # %bb.0: # %entry 449; X64-NEXT: kmovd %edi, %k1 450; X64-NEXT: vpshldw $7, %zmm1, %zmm0, %zmm0 {%k1} {z} 451; X64-NEXT: retq 452entry: 453 %0 = bitcast <8 x i64> %__A to <32 x i16> 454 %1 = bitcast <8 x i64> %__B to <32 x i16> 455 %2 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>) 456 %3 = bitcast i32 %__U to <32 x i1> 457 %4 = select <32 x i1> %3, <32 x i16> %2, <32 x i16> zeroinitializer 458 %5 = bitcast <32 x i16> %4 to <8 x i64> 459 ret <8 x i64> %5 460} 461 462define <8 x i64> @test_mm512_shldi_epi16(<8 x i64> %__A, <8 x i64> %__B) { 463; CHECK-LABEL: test_mm512_shldi_epi16: 464; CHECK: # %bb.0: # %entry 465; CHECK-NEXT: vpshldw $15, %zmm1, %zmm0, %zmm0 466; CHECK-NEXT: ret{{[l|q]}} 467entry: 468 %0 = bitcast <8 x i64> %__A to <32 x i16> 469 %1 = bitcast <8 x i64> %__B to <32 x i16> 470 %2 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>) 471 %3 = bitcast <32 x i16> %2 to <8 x i64> 472 ret <8 x i64> %3 473} 474 475define <8 x i64> @test_mm512_mask_shrdi_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 476; X86-LABEL: test_mm512_mask_shrdi_epi64: 477; X86: # %bb.0: # %entry 478; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 479; X86-NEXT: kmovd %eax, %k1 480; X86-NEXT: vpshrdq $47, %zmm2, %zmm1, %zmm0 {%k1} 481; X86-NEXT: retl 482; 483; X64-LABEL: test_mm512_mask_shrdi_epi64: 484; X64: # %bb.0: # %entry 485; X64-NEXT: kmovd %edi, %k1 486; X64-NEXT: vpshrdq $47, %zmm2, %zmm1, %zmm0 {%k1} 487; X64-NEXT: retq 488entry: 489 %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__B, <8 x i64> %__A, <8 x i64> <i64 47, i64 47, i64 47, i64 47, i64 47, i64 47, i64 47, i64 47>) 490 %1 = bitcast i8 %__U to <8 x i1> 491 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__S 492 ret <8 x i64> %2 493} 494 495declare <8 x i64> @llvm.fshr.v8i64(<8 x i64>, <8 x i64>, <8 x i64>) 496 497define <8 x i64> @test_mm512_maskz_shrdi_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 498; X86-LABEL: test_mm512_maskz_shrdi_epi64: 499; X86: # %bb.0: # %entry 500; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 501; X86-NEXT: kmovd %eax, %k1 502; X86-NEXT: vpshrdq $63, %zmm1, %zmm0, %zmm0 {%k1} {z} 503; X86-NEXT: retl 504; 505; X64-LABEL: test_mm512_maskz_shrdi_epi64: 506; X64: # %bb.0: # %entry 507; X64-NEXT: kmovd %edi, %k1 508; X64-NEXT: vpshrdq $63, %zmm1, %zmm0, %zmm0 {%k1} {z} 509; X64-NEXT: retq 510entry: 511 %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__B, <8 x i64> %__A, <8 x i64> <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>) 512 %1 = bitcast i8 %__U to <8 x i1> 513 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer 514 ret <8 x i64> %2 515} 516 517define <8 x i64> @test_mm512_shrdi_epi64(<8 x i64> %__A, <8 x i64> %__B) { 518; CHECK-LABEL: test_mm512_shrdi_epi64: 519; CHECK: # %bb.0: # %entry 520; CHECK-NEXT: vpshrdq $31, %zmm1, %zmm0, %zmm0 521; CHECK-NEXT: ret{{[l|q]}} 522entry: 523 %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__B, <8 x i64> %__A, <8 x i64> <i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31>) 524 ret <8 x i64> %0 525} 526 527define <8 x i64> @test_mm512_mask_shrdi_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 528; X86-LABEL: test_mm512_mask_shrdi_epi32: 529; X86: # %bb.0: # %entry 530; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 531; X86-NEXT: vpshrdd $7, %zmm2, %zmm1, %zmm0 {%k1} 532; X86-NEXT: retl 533; 534; X64-LABEL: test_mm512_mask_shrdi_epi32: 535; X64: # %bb.0: # %entry 536; X64-NEXT: kmovd %edi, %k1 537; X64-NEXT: vpshrdd $7, %zmm2, %zmm1, %zmm0 {%k1} 538; X64-NEXT: retq 539entry: 540 %0 = bitcast <8 x i64> %__A to <16 x i32> 541 %1 = bitcast <8 x i64> %__B to <16 x i32> 542 %2 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>) 543 %3 = bitcast <8 x i64> %__S to <16 x i32> 544 %4 = bitcast i16 %__U to <16 x i1> 545 %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3 546 %6 = bitcast <16 x i32> %5 to <8 x i64> 547 ret <8 x i64> %6 548} 549 550declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) 551 552define <8 x i64> @test_mm512_maskz_shrdi_epi32(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 553; X86-LABEL: test_mm512_maskz_shrdi_epi32: 554; X86: # %bb.0: # %entry 555; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 556; X86-NEXT: vpshrdd $15, %zmm1, %zmm0, %zmm0 {%k1} {z} 557; X86-NEXT: retl 558; 559; X64-LABEL: test_mm512_maskz_shrdi_epi32: 560; X64: # %bb.0: # %entry 561; X64-NEXT: kmovd %edi, %k1 562; X64-NEXT: vpshrdd $15, %zmm1, %zmm0, %zmm0 {%k1} {z} 563; X64-NEXT: retq 564entry: 565 %0 = bitcast <8 x i64> %__A to <16 x i32> 566 %1 = bitcast <8 x i64> %__B to <16 x i32> 567 %2 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>) 568 %3 = bitcast i16 %__U to <16 x i1> 569 %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer 570 %5 = bitcast <16 x i32> %4 to <8 x i64> 571 ret <8 x i64> %5 572} 573 574define <8 x i64> @test_mm512_shrdi_epi32(<8 x i64> %__A, <8 x i64> %__B) { 575; CHECK-LABEL: test_mm512_shrdi_epi32: 576; CHECK: # %bb.0: # %entry 577; CHECK-NEXT: vpshrdd $31, %zmm1, %zmm0, %zmm0 578; CHECK-NEXT: ret{{[l|q]}} 579entry: 580 %0 = bitcast <8 x i64> %__A to <16 x i32> 581 %1 = bitcast <8 x i64> %__B to <16 x i32> 582 %2 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>) 583 %3 = bitcast <16 x i32> %2 to <8 x i64> 584 ret <8 x i64> %3 585} 586 587define <8 x i64> @test_mm512_mask_shrdi_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) { 588; X86-LABEL: test_mm512_mask_shrdi_epi16: 589; X86: # %bb.0: # %entry 590; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 591; X86-NEXT: vpshrdw $3, %zmm2, %zmm1, %zmm0 {%k1} 592; X86-NEXT: retl 593; 594; X64-LABEL: test_mm512_mask_shrdi_epi16: 595; X64: # %bb.0: # %entry 596; X64-NEXT: kmovd %edi, %k1 597; X64-NEXT: vpshrdw $3, %zmm2, %zmm1, %zmm0 {%k1} 598; X64-NEXT: retq 599entry: 600 %0 = bitcast <8 x i64> %__A to <32 x i16> 601 %1 = bitcast <8 x i64> %__B to <32 x i16> 602 %2 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>) 603 %3 = bitcast <8 x i64> %__S to <32 x i16> 604 %4 = bitcast i32 %__U to <32 x i1> 605 %5 = select <32 x i1> %4, <32 x i16> %2, <32 x i16> %3 606 %6 = bitcast <32 x i16> %5 to <8 x i64> 607 ret <8 x i64> %6 608} 609 610declare <32 x i16> @llvm.fshr.v32i16(<32 x i16>, <32 x i16>, <32 x i16>) 611 612define <8 x i64> @test_mm512_maskz_shrdi_epi16(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) { 613; X86-LABEL: test_mm512_maskz_shrdi_epi16: 614; X86: # %bb.0: # %entry 615; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 616; X86-NEXT: vpshrdw $15, %zmm1, %zmm0, %zmm0 {%k1} {z} 617; X86-NEXT: retl 618; 619; X64-LABEL: test_mm512_maskz_shrdi_epi16: 620; X64: # %bb.0: # %entry 621; X64-NEXT: kmovd %edi, %k1 622; X64-NEXT: vpshrdw $15, %zmm1, %zmm0, %zmm0 {%k1} {z} 623; X64-NEXT: retq 624entry: 625 %0 = bitcast <8 x i64> %__A to <32 x i16> 626 %1 = bitcast <8 x i64> %__B to <32 x i16> 627 %2 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>) 628 %3 = bitcast i32 %__U to <32 x i1> 629 %4 = select <32 x i1> %3, <32 x i16> %2, <32 x i16> zeroinitializer 630 %5 = bitcast <32 x i16> %4 to <8 x i64> 631 ret <8 x i64> %5 632} 633 634define <8 x i64> @test_mm512_shrdi_epi16(<8 x i64> %__A, <8 x i64> %__B) { 635; CHECK-LABEL: test_mm512_shrdi_epi16: 636; CHECK: # %bb.0: # %entry 637; CHECK-NEXT: vpshrdw $15, %zmm1, %zmm0, %zmm0 638; CHECK-NEXT: ret{{[l|q]}} 639entry: 640 %0 = bitcast <8 x i64> %__A to <32 x i16> 641 %1 = bitcast <8 x i64> %__B to <32 x i16> 642 %2 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>) 643 %3 = bitcast <32 x i16> %2 to <8 x i64> 644 ret <8 x i64> %3 645} 646 647define <8 x i64> @test_mm512_mask_shldv_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 648; X86-LABEL: test_mm512_mask_shldv_epi64: 649; X86: # %bb.0: # %entry 650; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 651; X86-NEXT: kmovd %eax, %k1 652; X86-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1} 653; X86-NEXT: retl 654; 655; X64-LABEL: test_mm512_mask_shldv_epi64: 656; X64: # %bb.0: # %entry 657; X64-NEXT: kmovd %edi, %k1 658; X64-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1} 659; X64-NEXT: retq 660entry: 661 %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) 662 %1 = bitcast i8 %__U to <8 x i1> 663 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__S 664 ret <8 x i64> %2 665} 666 667define <8 x i64> @test_mm512_maskz_shldv_epi64(i8 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 668; X86-LABEL: test_mm512_maskz_shldv_epi64: 669; X86: # %bb.0: # %entry 670; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 671; X86-NEXT: kmovd %eax, %k1 672; X86-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z} 673; X86-NEXT: retl 674; 675; X64-LABEL: test_mm512_maskz_shldv_epi64: 676; X64: # %bb.0: # %entry 677; X64-NEXT: kmovd %edi, %k1 678; X64-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z} 679; X64-NEXT: retq 680entry: 681 %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) 682 %1 = bitcast i8 %__U to <8 x i1> 683 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer 684 ret <8 x i64> %2 685} 686 687define <8 x i64> @test_mm512_shldv_epi64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 688; CHECK-LABEL: test_mm512_shldv_epi64: 689; CHECK: # %bb.0: # %entry 690; CHECK-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 691; CHECK-NEXT: ret{{[l|q]}} 692entry: 693 %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) 694 ret <8 x i64> %0 695} 696 697define <8 x i64> @test_mm512_mask_shldv_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 698; X86-LABEL: test_mm512_mask_shldv_epi32: 699; X86: # %bb.0: # %entry 700; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 701; X86-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 {%k1} 702; X86-NEXT: retl 703; 704; X64-LABEL: test_mm512_mask_shldv_epi32: 705; X64: # %bb.0: # %entry 706; X64-NEXT: kmovd %edi, %k1 707; X64-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 {%k1} 708; X64-NEXT: retq 709entry: 710 %0 = bitcast <8 x i64> %__S to <16 x i32> 711 %1 = bitcast <8 x i64> %__A to <16 x i32> 712 %2 = bitcast <8 x i64> %__B to <16 x i32> 713 %3 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2) 714 %4 = bitcast i16 %__U to <16 x i1> 715 %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> %0 716 %6 = bitcast <16 x i32> %5 to <8 x i64> 717 ret <8 x i64> %6 718} 719 720define <8 x i64> @test_mm512_maskz_shldv_epi32(i16 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 721; X86-LABEL: test_mm512_maskz_shldv_epi32: 722; X86: # %bb.0: # %entry 723; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 724; X86-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 {%k1} {z} 725; X86-NEXT: retl 726; 727; X64-LABEL: test_mm512_maskz_shldv_epi32: 728; X64: # %bb.0: # %entry 729; X64-NEXT: kmovd %edi, %k1 730; X64-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 {%k1} {z} 731; X64-NEXT: retq 732entry: 733 %0 = bitcast <8 x i64> %__S to <16 x i32> 734 %1 = bitcast <8 x i64> %__A to <16 x i32> 735 %2 = bitcast <8 x i64> %__B to <16 x i32> 736 %3 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2) 737 %4 = bitcast i16 %__U to <16 x i1> 738 %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> zeroinitializer 739 %6 = bitcast <16 x i32> %5 to <8 x i64> 740 ret <8 x i64> %6 741} 742 743define <8 x i64> @test_mm512_shldv_epi32(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 744; CHECK-LABEL: test_mm512_shldv_epi32: 745; CHECK: # %bb.0: # %entry 746; CHECK-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 747; CHECK-NEXT: ret{{[l|q]}} 748entry: 749 %0 = bitcast <8 x i64> %__S to <16 x i32> 750 %1 = bitcast <8 x i64> %__A to <16 x i32> 751 %2 = bitcast <8 x i64> %__B to <16 x i32> 752 %3 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2) 753 %4 = bitcast <16 x i32> %3 to <8 x i64> 754 ret <8 x i64> %4 755} 756 757define <8 x i64> @test_mm512_mask_shldv_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) { 758; X86-LABEL: test_mm512_mask_shldv_epi16: 759; X86: # %bb.0: # %entry 760; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 761; X86-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 {%k1} 762; X86-NEXT: retl 763; 764; X64-LABEL: test_mm512_mask_shldv_epi16: 765; X64: # %bb.0: # %entry 766; X64-NEXT: kmovd %edi, %k1 767; X64-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 {%k1} 768; X64-NEXT: retq 769entry: 770 %0 = bitcast <8 x i64> %__S to <32 x i16> 771 %1 = bitcast <8 x i64> %__A to <32 x i16> 772 %2 = bitcast <8 x i64> %__B to <32 x i16> 773 %3 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2) 774 %4 = bitcast i32 %__U to <32 x i1> 775 %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> %0 776 %6 = bitcast <32 x i16> %5 to <8 x i64> 777 ret <8 x i64> %6 778} 779 780define <8 x i64> @test_mm512_maskz_shldv_epi16(i32 %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 781; X86-LABEL: test_mm512_maskz_shldv_epi16: 782; X86: # %bb.0: # %entry 783; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 784; X86-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 {%k1} {z} 785; X86-NEXT: retl 786; 787; X64-LABEL: test_mm512_maskz_shldv_epi16: 788; X64: # %bb.0: # %entry 789; X64-NEXT: kmovd %edi, %k1 790; X64-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 {%k1} {z} 791; X64-NEXT: retq 792entry: 793 %0 = bitcast <8 x i64> %__S to <32 x i16> 794 %1 = bitcast <8 x i64> %__A to <32 x i16> 795 %2 = bitcast <8 x i64> %__B to <32 x i16> 796 %3 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2) 797 %4 = bitcast i32 %__U to <32 x i1> 798 %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> zeroinitializer 799 %6 = bitcast <32 x i16> %5 to <8 x i64> 800 ret <8 x i64> %6 801} 802 803define <8 x i64> @test_mm512_shldv_epi16(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 804; CHECK-LABEL: test_mm512_shldv_epi16: 805; CHECK: # %bb.0: # %entry 806; CHECK-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 807; CHECK-NEXT: ret{{[l|q]}} 808entry: 809 %0 = bitcast <8 x i64> %__S to <32 x i16> 810 %1 = bitcast <8 x i64> %__A to <32 x i16> 811 %2 = bitcast <8 x i64> %__B to <32 x i16> 812 %3 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2) 813 %4 = bitcast <32 x i16> %3 to <8 x i64> 814 ret <8 x i64> %4 815} 816 817define <8 x i64> @test_mm512_mask_shrdv_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 818; X86-LABEL: test_mm512_mask_shrdv_epi64: 819; X86: # %bb.0: # %entry 820; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 821; X86-NEXT: kmovd %eax, %k1 822; X86-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} 823; X86-NEXT: retl 824; 825; X64-LABEL: test_mm512_mask_shrdv_epi64: 826; X64: # %bb.0: # %entry 827; X64-NEXT: kmovd %edi, %k1 828; X64-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} 829; X64-NEXT: retq 830entry: 831 %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__A, <8 x i64> %__S, <8 x i64> %__B) 832 %1 = bitcast i8 %__U to <8 x i1> 833 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__S 834 ret <8 x i64> %2 835} 836 837define <8 x i64> @test_mm512_maskz_shrdv_epi64(i8 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 838; X86-LABEL: test_mm512_maskz_shrdv_epi64: 839; X86: # %bb.0: # %entry 840; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 841; X86-NEXT: kmovd %eax, %k1 842; X86-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z} 843; X86-NEXT: retl 844; 845; X64-LABEL: test_mm512_maskz_shrdv_epi64: 846; X64: # %bb.0: # %entry 847; X64-NEXT: kmovd %edi, %k1 848; X64-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z} 849; X64-NEXT: retq 850entry: 851 %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__A, <8 x i64> %__S, <8 x i64> %__B) 852 %1 = bitcast i8 %__U to <8 x i1> 853 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer 854 ret <8 x i64> %2 855} 856 857define <8 x i64> @test_mm512_shrdv_epi64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 858; CHECK-LABEL: test_mm512_shrdv_epi64: 859; CHECK: # %bb.0: # %entry 860; CHECK-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 861; CHECK-NEXT: ret{{[l|q]}} 862entry: 863 %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__A, <8 x i64> %__S, <8 x i64> %__B) 864 ret <8 x i64> %0 865} 866 867define <8 x i64> @test_mm512_mask_shrdv_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 868; X86-LABEL: test_mm512_mask_shrdv_epi32: 869; X86: # %bb.0: # %entry 870; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 871; X86-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} 872; X86-NEXT: retl 873; 874; X64-LABEL: test_mm512_mask_shrdv_epi32: 875; X64: # %bb.0: # %entry 876; X64-NEXT: kmovd %edi, %k1 877; X64-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} 878; X64-NEXT: retq 879entry: 880 %0 = bitcast <8 x i64> %__S to <16 x i32> 881 %1 = bitcast <8 x i64> %__A to <16 x i32> 882 %2 = bitcast <8 x i64> %__B to <16 x i32> 883 %3 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> %2) 884 %4 = bitcast i16 %__U to <16 x i1> 885 %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> %0 886 %6 = bitcast <16 x i32> %5 to <8 x i64> 887 ret <8 x i64> %6 888} 889 890define <8 x i64> @test_mm512_maskz_shrdv_epi32(i16 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 891; X86-LABEL: test_mm512_maskz_shrdv_epi32: 892; X86: # %bb.0: # %entry 893; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 894; X86-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} {z} 895; X86-NEXT: retl 896; 897; X64-LABEL: test_mm512_maskz_shrdv_epi32: 898; X64: # %bb.0: # %entry 899; X64-NEXT: kmovd %edi, %k1 900; X64-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} {z} 901; X64-NEXT: retq 902entry: 903 %0 = bitcast <8 x i64> %__S to <16 x i32> 904 %1 = bitcast <8 x i64> %__A to <16 x i32> 905 %2 = bitcast <8 x i64> %__B to <16 x i32> 906 %3 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> %2) 907 %4 = bitcast i16 %__U to <16 x i1> 908 %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> zeroinitializer 909 %6 = bitcast <16 x i32> %5 to <8 x i64> 910 ret <8 x i64> %6 911} 912 913define <8 x i64> @test_mm512_shrdv_epi32(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 914; CHECK-LABEL: test_mm512_shrdv_epi32: 915; CHECK: # %bb.0: # %entry 916; CHECK-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 917; CHECK-NEXT: ret{{[l|q]}} 918entry: 919 %0 = bitcast <8 x i64> %__S to <16 x i32> 920 %1 = bitcast <8 x i64> %__A to <16 x i32> 921 %2 = bitcast <8 x i64> %__B to <16 x i32> 922 %3 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> %2) 923 %4 = bitcast <16 x i32> %3 to <8 x i64> 924 ret <8 x i64> %4 925} 926 927define <8 x i64> @test_mm512_mask_shrdv_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) { 928; X86-LABEL: test_mm512_mask_shrdv_epi16: 929; X86: # %bb.0: # %entry 930; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 931; X86-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 {%k1} 932; X86-NEXT: retl 933; 934; X64-LABEL: test_mm512_mask_shrdv_epi16: 935; X64: # %bb.0: # %entry 936; X64-NEXT: kmovd %edi, %k1 937; X64-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 {%k1} 938; X64-NEXT: retq 939entry: 940 %0 = bitcast <8 x i64> %__S to <32 x i16> 941 %1 = bitcast <8 x i64> %__A to <32 x i16> 942 %2 = bitcast <8 x i64> %__B to <32 x i16> 943 %3 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> %2) 944 %4 = bitcast i32 %__U to <32 x i1> 945 %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> %0 946 %6 = bitcast <32 x i16> %5 to <8 x i64> 947 ret <8 x i64> %6 948} 949 950define <8 x i64> @test_mm512_maskz_shrdv_epi16(i32 %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 951; X86-LABEL: test_mm512_maskz_shrdv_epi16: 952; X86: # %bb.0: # %entry 953; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 954; X86-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 {%k1} {z} 955; X86-NEXT: retl 956; 957; X64-LABEL: test_mm512_maskz_shrdv_epi16: 958; X64: # %bb.0: # %entry 959; X64-NEXT: kmovd %edi, %k1 960; X64-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 {%k1} {z} 961; X64-NEXT: retq 962entry: 963 %0 = bitcast <8 x i64> %__S to <32 x i16> 964 %1 = bitcast <8 x i64> %__A to <32 x i16> 965 %2 = bitcast <8 x i64> %__B to <32 x i16> 966 %3 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> %2) 967 %4 = bitcast i32 %__U to <32 x i1> 968 %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> zeroinitializer 969 %6 = bitcast <32 x i16> %5 to <8 x i64> 970 ret <8 x i64> %6 971} 972 973define <8 x i64> @test_mm512_shrdv_epi16(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 974; CHECK-LABEL: test_mm512_shrdv_epi16: 975; CHECK: # %bb.0: # %entry 976; CHECK-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 977; CHECK-NEXT: ret{{[l|q]}} 978entry: 979 %0 = bitcast <8 x i64> %__S to <32 x i16> 980 %1 = bitcast <8 x i64> %__A to <32 x i16> 981 %2 = bitcast <8 x i64> %__B to <32 x i16> 982 %3 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> %2) 983 %4 = bitcast <32 x i16> %3 to <8 x i64> 984 ret <8 x i64> %4 985} 986 987declare <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16>, <32 x i16>, i32) 988declare <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8>, <64 x i8>, i64) 989declare void @llvm.masked.compressstore.v32i16(<32 x i16>, ptr, <32 x i1>) 990declare void @llvm.masked.compressstore.v64i8(<64 x i8>, ptr, <64 x i1>) 991declare <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16>, <32 x i16>, i32) 992declare <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8>, <64 x i8>, i64) 993declare <32 x i16> @llvm.masked.expandload.v32i16(ptr, <32 x i1>, <32 x i16>) 994declare <64 x i8> @llvm.masked.expandload.v64i8(ptr, <64 x i1>, <64 x i8>) 995