1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64 4 5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vlvbmi2-builtins.c 6 7define <2 x i64> @test_mm_mask_compress_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__D) { 8; X86-LABEL: test_mm_mask_compress_epi16: 9; X86: # %bb.0: # %entry 10; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 11; X86-NEXT: kmovd %eax, %k1 12; X86-NEXT: vpcompressw %xmm1, %xmm0 {%k1} 13; X86-NEXT: retl 14; 15; X64-LABEL: test_mm_mask_compress_epi16: 16; X64: # %bb.0: # %entry 17; X64-NEXT: kmovd %edi, %k1 18; X64-NEXT: vpcompressw %xmm1, %xmm0 {%k1} 19; X64-NEXT: retq 20entry: 21 %0 = bitcast <2 x i64> %__D to <8 x i16> 22 %1 = bitcast <2 x i64> %__S to <8 x i16> 23 %2 = tail call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %0, <8 x i16> %1, i8 %__U) 24 %3 = bitcast <8 x i16> %2 to <2 x i64> 25 ret <2 x i64> %3 26} 27 28define <2 x i64> @test_mm_maskz_compress_epi16(i8 zeroext %__U, <2 x i64> %__D) { 29; X86-LABEL: test_mm_maskz_compress_epi16: 30; X86: # %bb.0: # %entry 31; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 32; X86-NEXT: kmovd %eax, %k1 33; X86-NEXT: vpcompressw %xmm0, %xmm0 {%k1} {z} 34; X86-NEXT: retl 35; 36; X64-LABEL: test_mm_maskz_compress_epi16: 37; X64: # %bb.0: # %entry 38; X64-NEXT: kmovd %edi, %k1 39; X64-NEXT: vpcompressw %xmm0, %xmm0 {%k1} {z} 40; X64-NEXT: retq 41entry: 42 %0 = bitcast <2 x i64> %__D to <8 x i16> 43 %1 = tail call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %0, <8 x i16> zeroinitializer, i8 %__U) 44 %2 = bitcast <8 x i16> %1 to <2 x i64> 45 ret <2 x i64> %2 46} 47 48define <2 x i64> @test_mm_mask_compress_epi8(<2 x i64> %__S, i16 zeroext %__U, <2 x i64> %__D) { 49; X86-LABEL: test_mm_mask_compress_epi8: 50; X86: # %bb.0: # %entry 51; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 52; X86-NEXT: vpcompressb %xmm1, %xmm0 {%k1} 53; X86-NEXT: retl 54; 55; X64-LABEL: test_mm_mask_compress_epi8: 56; X64: # %bb.0: # %entry 57; X64-NEXT: kmovd %edi, %k1 58; X64-NEXT: vpcompressb %xmm1, %xmm0 {%k1} 59; X64-NEXT: retq 60entry: 61 %0 = bitcast <2 x i64> %__D to <16 x i8> 62 %1 = bitcast <2 x i64> %__S to <16 x i8> 63 %2 = tail call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %0, <16 x i8> %1, i16 %__U) 64 %3 = bitcast <16 x i8> %2 to <2 x i64> 65 ret <2 x i64> %3 66} 67 68define <2 x i64> @test_mm_maskz_compress_epi8(i16 zeroext %__U, <2 x i64> %__D) { 69; X86-LABEL: test_mm_maskz_compress_epi8: 70; X86: # %bb.0: # %entry 71; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 72; X86-NEXT: vpcompressb %xmm0, %xmm0 {%k1} {z} 73; X86-NEXT: retl 74; 75; X64-LABEL: test_mm_maskz_compress_epi8: 76; X64: # %bb.0: # %entry 77; X64-NEXT: kmovd %edi, %k1 78; X64-NEXT: vpcompressb %xmm0, %xmm0 {%k1} {z} 79; X64-NEXT: retq 80entry: 81 %0 = bitcast <2 x i64> %__D to <16 x i8> 82 %1 = tail call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %0, <16 x i8> zeroinitializer, i16 %__U) 83 %2 = bitcast <16 x i8> %1 to <2 x i64> 84 ret <2 x i64> %2 85} 86 87define void @test_mm_mask_compressstoreu_epi16(ptr %__P, i8 zeroext %__U, <2 x i64> %__D) { 88; X86-LABEL: test_mm_mask_compressstoreu_epi16: 89; X86: # %bb.0: # %entry 90; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 91; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 92; X86-NEXT: kmovd %eax, %k1 93; X86-NEXT: vpcompressw %xmm0, (%ecx) {%k1} 94; X86-NEXT: retl 95; 96; X64-LABEL: test_mm_mask_compressstoreu_epi16: 97; X64: # %bb.0: # %entry 98; X64-NEXT: kmovd %esi, %k1 99; X64-NEXT: vpcompressw %xmm0, (%rdi) {%k1} 100; X64-NEXT: retq 101entry: 102 %0 = bitcast <2 x i64> %__D to <8 x i16> 103 %1 = bitcast i8 %__U to <8 x i1> 104 tail call void @llvm.masked.compressstore.v8i16(<8 x i16> %0, ptr %__P, <8 x i1> %1) 105 ret void 106} 107 108define void @test_mm_mask_compressstoreu_epi8(ptr %__P, i16 zeroext %__U, <2 x i64> %__D) { 109; X86-LABEL: test_mm_mask_compressstoreu_epi8: 110; X86: # %bb.0: # %entry 111; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 112; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 113; X86-NEXT: vpcompressb %xmm0, (%eax) {%k1} 114; X86-NEXT: retl 115; 116; X64-LABEL: test_mm_mask_compressstoreu_epi8: 117; X64: # %bb.0: # %entry 118; X64-NEXT: kmovd %esi, %k1 119; X64-NEXT: vpcompressb %xmm0, (%rdi) {%k1} 120; X64-NEXT: retq 121entry: 122 %0 = bitcast <2 x i64> %__D to <16 x i8> 123 %1 = bitcast i16 %__U to <16 x i1> 124 tail call void @llvm.masked.compressstore.v16i8(<16 x i8> %0, ptr %__P, <16 x i1> %1) 125 ret void 126} 127 128define <2 x i64> @test_mm_mask_expand_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__D) { 129; X86-LABEL: test_mm_mask_expand_epi16: 130; X86: # %bb.0: # %entry 131; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 132; X86-NEXT: kmovd %eax, %k1 133; X86-NEXT: vpexpandw %xmm1, %xmm0 {%k1} 134; X86-NEXT: retl 135; 136; X64-LABEL: test_mm_mask_expand_epi16: 137; X64: # %bb.0: # %entry 138; X64-NEXT: kmovd %edi, %k1 139; X64-NEXT: vpexpandw %xmm1, %xmm0 {%k1} 140; X64-NEXT: retq 141entry: 142 %0 = bitcast <2 x i64> %__D to <8 x i16> 143 %1 = bitcast <2 x i64> %__S to <8 x i16> 144 %2 = tail call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %0, <8 x i16> %1, i8 %__U) 145 %3 = bitcast <8 x i16> %2 to <2 x i64> 146 ret <2 x i64> %3 147} 148 149define <2 x i64> @test_mm_maskz_expand_epi16(i8 zeroext %__U, <2 x i64> %__D) { 150; X86-LABEL: test_mm_maskz_expand_epi16: 151; X86: # %bb.0: # %entry 152; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 153; X86-NEXT: kmovd %eax, %k1 154; X86-NEXT: vpexpandw %xmm0, %xmm0 {%k1} {z} 155; X86-NEXT: retl 156; 157; X64-LABEL: test_mm_maskz_expand_epi16: 158; X64: # %bb.0: # %entry 159; X64-NEXT: kmovd %edi, %k1 160; X64-NEXT: vpexpandw %xmm0, %xmm0 {%k1} {z} 161; X64-NEXT: retq 162entry: 163 %0 = bitcast <2 x i64> %__D to <8 x i16> 164 %1 = tail call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %0, <8 x i16> zeroinitializer, i8 %__U) 165 %2 = bitcast <8 x i16> %1 to <2 x i64> 166 ret <2 x i64> %2 167} 168 169define <2 x i64> @test_mm_mask_expand_epi8(<2 x i64> %__S, i16 zeroext %__U, <2 x i64> %__D) { 170; X86-LABEL: test_mm_mask_expand_epi8: 171; X86: # %bb.0: # %entry 172; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 173; X86-NEXT: vpexpandb %xmm1, %xmm0 {%k1} 174; X86-NEXT: retl 175; 176; X64-LABEL: test_mm_mask_expand_epi8: 177; X64: # %bb.0: # %entry 178; X64-NEXT: kmovd %edi, %k1 179; X64-NEXT: vpexpandb %xmm1, %xmm0 {%k1} 180; X64-NEXT: retq 181entry: 182 %0 = bitcast <2 x i64> %__D to <16 x i8> 183 %1 = bitcast <2 x i64> %__S to <16 x i8> 184 %2 = tail call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %0, <16 x i8> %1, i16 %__U) 185 %3 = bitcast <16 x i8> %2 to <2 x i64> 186 ret <2 x i64> %3 187} 188 189define <2 x i64> @test_mm_maskz_expand_epi8(i16 zeroext %__U, <2 x i64> %__D) { 190; X86-LABEL: test_mm_maskz_expand_epi8: 191; X86: # %bb.0: # %entry 192; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 193; X86-NEXT: vpexpandb %xmm0, %xmm0 {%k1} {z} 194; X86-NEXT: retl 195; 196; X64-LABEL: test_mm_maskz_expand_epi8: 197; X64: # %bb.0: # %entry 198; X64-NEXT: kmovd %edi, %k1 199; X64-NEXT: vpexpandb %xmm0, %xmm0 {%k1} {z} 200; X64-NEXT: retq 201entry: 202 %0 = bitcast <2 x i64> %__D to <16 x i8> 203 %1 = tail call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %0, <16 x i8> zeroinitializer, i16 %__U) 204 %2 = bitcast <16 x i8> %1 to <2 x i64> 205 ret <2 x i64> %2 206} 207 208define <2 x i64> @test_mm_mask_expandloadu_epi16(<2 x i64> %__S, i8 zeroext %__U, ptr readonly %__P) { 209; X86-LABEL: test_mm_mask_expandloadu_epi16: 210; X86: # %bb.0: # %entry 211; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 212; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 213; X86-NEXT: kmovd %ecx, %k1 214; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} 215; X86-NEXT: retl 216; 217; X64-LABEL: test_mm_mask_expandloadu_epi16: 218; X64: # %bb.0: # %entry 219; X64-NEXT: kmovd %edi, %k1 220; X64-NEXT: vpexpandw (%rsi), %xmm0 {%k1} 221; X64-NEXT: retq 222entry: 223 %0 = bitcast <2 x i64> %__S to <8 x i16> 224 %1 = bitcast i8 %__U to <8 x i1> 225 %2 = tail call <8 x i16> @llvm.masked.expandload.v8i16(ptr %__P, <8 x i1> %1, <8 x i16> %0) 226 %3 = bitcast <8 x i16> %2 to <2 x i64> 227 ret <2 x i64> %3 228} 229 230define <2 x i64> @test_mm_maskz_expandloadu_epi16(i8 zeroext %__U, ptr readonly %__P) { 231; X86-LABEL: test_mm_maskz_expandloadu_epi16: 232; X86: # %bb.0: # %entry 233; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 234; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 235; X86-NEXT: kmovd %ecx, %k1 236; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} {z} 237; X86-NEXT: retl 238; 239; X64-LABEL: test_mm_maskz_expandloadu_epi16: 240; X64: # %bb.0: # %entry 241; X64-NEXT: kmovd %edi, %k1 242; X64-NEXT: vpexpandw (%rsi), %xmm0 {%k1} {z} 243; X64-NEXT: retq 244entry: 245 %0 = bitcast i8 %__U to <8 x i1> 246 %1 = tail call <8 x i16> @llvm.masked.expandload.v8i16(ptr %__P, <8 x i1> %0, <8 x i16> zeroinitializer) 247 %2 = bitcast <8 x i16> %1 to <2 x i64> 248 ret <2 x i64> %2 249} 250 251define <2 x i64> @test_mm_mask_expandloadu_epi8(<2 x i64> %__S, i16 zeroext %__U, ptr readonly %__P) { 252; X86-LABEL: test_mm_mask_expandloadu_epi8: 253; X86: # %bb.0: # %entry 254; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 255; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 256; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1} 257; X86-NEXT: retl 258; 259; X64-LABEL: test_mm_mask_expandloadu_epi8: 260; X64: # %bb.0: # %entry 261; X64-NEXT: kmovd %edi, %k1 262; X64-NEXT: vpexpandb (%rsi), %xmm0 {%k1} 263; X64-NEXT: retq 264entry: 265 %0 = bitcast <2 x i64> %__S to <16 x i8> 266 %1 = bitcast i16 %__U to <16 x i1> 267 %2 = tail call <16 x i8> @llvm.masked.expandload.v16i8(ptr %__P, <16 x i1> %1, <16 x i8> %0) 268 %3 = bitcast <16 x i8> %2 to <2 x i64> 269 ret <2 x i64> %3 270} 271 272define <2 x i64> @test_mm_maskz_expandloadu_epi8(i16 zeroext %__U, ptr readonly %__P) { 273; X86-LABEL: test_mm_maskz_expandloadu_epi8: 274; X86: # %bb.0: # %entry 275; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 276; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 277; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1} {z} 278; X86-NEXT: retl 279; 280; X64-LABEL: test_mm_maskz_expandloadu_epi8: 281; X64: # %bb.0: # %entry 282; X64-NEXT: kmovd %edi, %k1 283; X64-NEXT: vpexpandb (%rsi), %xmm0 {%k1} {z} 284; X64-NEXT: retq 285entry: 286 %0 = bitcast i16 %__U to <16 x i1> 287 %1 = tail call <16 x i8> @llvm.masked.expandload.v16i8(ptr %__P, <16 x i1> %0, <16 x i8> zeroinitializer) 288 %2 = bitcast <16 x i8> %1 to <2 x i64> 289 ret <2 x i64> %2 290} 291 292define <4 x i64> @test_mm256_mask_compress_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__D) { 293; X86-LABEL: test_mm256_mask_compress_epi16: 294; X86: # %bb.0: # %entry 295; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 296; X86-NEXT: vpcompressw %ymm1, %ymm0 {%k1} 297; X86-NEXT: retl 298; 299; X64-LABEL: test_mm256_mask_compress_epi16: 300; X64: # %bb.0: # %entry 301; X64-NEXT: kmovd %edi, %k1 302; X64-NEXT: vpcompressw %ymm1, %ymm0 {%k1} 303; X64-NEXT: retq 304entry: 305 %0 = bitcast <4 x i64> %__D to <16 x i16> 306 %1 = bitcast <4 x i64> %__S to <16 x i16> 307 %2 = tail call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %0, <16 x i16> %1, i16 %__U) 308 %3 = bitcast <16 x i16> %2 to <4 x i64> 309 ret <4 x i64> %3 310} 311 312define <4 x i64> @test_mm256_maskz_compress_epi16(i16 zeroext %__U, <4 x i64> %__D) { 313; X86-LABEL: test_mm256_maskz_compress_epi16: 314; X86: # %bb.0: # %entry 315; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 316; X86-NEXT: vpcompressw %ymm0, %ymm0 {%k1} {z} 317; X86-NEXT: retl 318; 319; X64-LABEL: test_mm256_maskz_compress_epi16: 320; X64: # %bb.0: # %entry 321; X64-NEXT: kmovd %edi, %k1 322; X64-NEXT: vpcompressw %ymm0, %ymm0 {%k1} {z} 323; X64-NEXT: retq 324entry: 325 %0 = bitcast <4 x i64> %__D to <16 x i16> 326 %1 = tail call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %0, <16 x i16> zeroinitializer, i16 %__U) 327 %2 = bitcast <16 x i16> %1 to <4 x i64> 328 ret <4 x i64> %2 329} 330 331define <4 x i64> @test_mm256_mask_compress_epi8(<4 x i64> %__S, i32 %__U, <4 x i64> %__D) { 332; X86-LABEL: test_mm256_mask_compress_epi8: 333; X86: # %bb.0: # %entry 334; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 335; X86-NEXT: vpcompressb %ymm1, %ymm0 {%k1} 336; X86-NEXT: retl 337; 338; X64-LABEL: test_mm256_mask_compress_epi8: 339; X64: # %bb.0: # %entry 340; X64-NEXT: kmovd %edi, %k1 341; X64-NEXT: vpcompressb %ymm1, %ymm0 {%k1} 342; X64-NEXT: retq 343entry: 344 %0 = bitcast <4 x i64> %__D to <32 x i8> 345 %1 = bitcast <4 x i64> %__S to <32 x i8> 346 %2 = tail call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %0, <32 x i8> %1, i32 %__U) 347 %3 = bitcast <32 x i8> %2 to <4 x i64> 348 ret <4 x i64> %3 349} 350 351define <4 x i64> @test_mm256_maskz_compress_epi8(i32 %__U, <4 x i64> %__D) { 352; X86-LABEL: test_mm256_maskz_compress_epi8: 353; X86: # %bb.0: # %entry 354; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 355; X86-NEXT: vpcompressb %ymm0, %ymm0 {%k1} {z} 356; X86-NEXT: retl 357; 358; X64-LABEL: test_mm256_maskz_compress_epi8: 359; X64: # %bb.0: # %entry 360; X64-NEXT: kmovd %edi, %k1 361; X64-NEXT: vpcompressb %ymm0, %ymm0 {%k1} {z} 362; X64-NEXT: retq 363entry: 364 %0 = bitcast <4 x i64> %__D to <32 x i8> 365 %1 = tail call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %0, <32 x i8> zeroinitializer, i32 %__U) 366 %2 = bitcast <32 x i8> %1 to <4 x i64> 367 ret <4 x i64> %2 368} 369 370define void @test_mm256_mask_compressstoreu_epi16(ptr %__P, i16 zeroext %__U, <4 x i64> %__D) { 371; X86-LABEL: test_mm256_mask_compressstoreu_epi16: 372; X86: # %bb.0: # %entry 373; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 374; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 375; X86-NEXT: vpcompressw %ymm0, (%eax) {%k1} 376; X86-NEXT: vzeroupper 377; X86-NEXT: retl 378; 379; X64-LABEL: test_mm256_mask_compressstoreu_epi16: 380; X64: # %bb.0: # %entry 381; X64-NEXT: kmovd %esi, %k1 382; X64-NEXT: vpcompressw %ymm0, (%rdi) {%k1} 383; X64-NEXT: vzeroupper 384; X64-NEXT: retq 385entry: 386 %0 = bitcast <4 x i64> %__D to <16 x i16> 387 %1 = bitcast i16 %__U to <16 x i1> 388 tail call void @llvm.masked.compressstore.v16i16(<16 x i16> %0, ptr %__P, <16 x i1> %1) 389 ret void 390} 391 392define void @test_mm256_mask_compressstoreu_epi8(ptr %__P, i32 %__U, <4 x i64> %__D) { 393; X86-LABEL: test_mm256_mask_compressstoreu_epi8: 394; X86: # %bb.0: # %entry 395; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 396; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 397; X86-NEXT: vpcompressb %ymm0, (%eax) {%k1} 398; X86-NEXT: vzeroupper 399; X86-NEXT: retl 400; 401; X64-LABEL: test_mm256_mask_compressstoreu_epi8: 402; X64: # %bb.0: # %entry 403; X64-NEXT: kmovd %esi, %k1 404; X64-NEXT: vpcompressb %ymm0, (%rdi) {%k1} 405; X64-NEXT: vzeroupper 406; X64-NEXT: retq 407entry: 408 %0 = bitcast <4 x i64> %__D to <32 x i8> 409 %1 = bitcast i32 %__U to <32 x i1> 410 tail call void @llvm.masked.compressstore.v32i8(<32 x i8> %0, ptr %__P, <32 x i1> %1) 411 ret void 412} 413 414define <4 x i64> @test_mm256_mask_expand_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__D) { 415; X86-LABEL: test_mm256_mask_expand_epi16: 416; X86: # %bb.0: # %entry 417; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 418; X86-NEXT: vpexpandw %ymm1, %ymm0 {%k1} 419; X86-NEXT: retl 420; 421; X64-LABEL: test_mm256_mask_expand_epi16: 422; X64: # %bb.0: # %entry 423; X64-NEXT: kmovd %edi, %k1 424; X64-NEXT: vpexpandw %ymm1, %ymm0 {%k1} 425; X64-NEXT: retq 426entry: 427 %0 = bitcast <4 x i64> %__D to <16 x i16> 428 %1 = bitcast <4 x i64> %__S to <16 x i16> 429 %2 = tail call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %0, <16 x i16> %1, i16 %__U) 430 %3 = bitcast <16 x i16> %2 to <4 x i64> 431 ret <4 x i64> %3 432} 433 434define <4 x i64> @test_mm256_maskz_expand_epi16(i16 zeroext %__U, <4 x i64> %__D) { 435; X86-LABEL: test_mm256_maskz_expand_epi16: 436; X86: # %bb.0: # %entry 437; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 438; X86-NEXT: vpexpandw %ymm0, %ymm0 {%k1} {z} 439; X86-NEXT: retl 440; 441; X64-LABEL: test_mm256_maskz_expand_epi16: 442; X64: # %bb.0: # %entry 443; X64-NEXT: kmovd %edi, %k1 444; X64-NEXT: vpexpandw %ymm0, %ymm0 {%k1} {z} 445; X64-NEXT: retq 446entry: 447 %0 = bitcast <4 x i64> %__D to <16 x i16> 448 %1 = tail call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %0, <16 x i16> zeroinitializer, i16 %__U) 449 %2 = bitcast <16 x i16> %1 to <4 x i64> 450 ret <4 x i64> %2 451} 452 453define <4 x i64> @test_mm256_mask_expand_epi8(<4 x i64> %__S, i32 %__U, <4 x i64> %__D) { 454; X86-LABEL: test_mm256_mask_expand_epi8: 455; X86: # %bb.0: # %entry 456; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 457; X86-NEXT: vpexpandb %ymm1, %ymm0 {%k1} 458; X86-NEXT: retl 459; 460; X64-LABEL: test_mm256_mask_expand_epi8: 461; X64: # %bb.0: # %entry 462; X64-NEXT: kmovd %edi, %k1 463; X64-NEXT: vpexpandb %ymm1, %ymm0 {%k1} 464; X64-NEXT: retq 465entry: 466 %0 = bitcast <4 x i64> %__D to <32 x i8> 467 %1 = bitcast <4 x i64> %__S to <32 x i8> 468 %2 = tail call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %0, <32 x i8> %1, i32 %__U) 469 %3 = bitcast <32 x i8> %2 to <4 x i64> 470 ret <4 x i64> %3 471} 472 473define <4 x i64> @test_mm256_maskz_expand_epi8(i32 %__U, <4 x i64> %__D) { 474; X86-LABEL: test_mm256_maskz_expand_epi8: 475; X86: # %bb.0: # %entry 476; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 477; X86-NEXT: vpexpandb %ymm0, %ymm0 {%k1} {z} 478; X86-NEXT: retl 479; 480; X64-LABEL: test_mm256_maskz_expand_epi8: 481; X64: # %bb.0: # %entry 482; X64-NEXT: kmovd %edi, %k1 483; X64-NEXT: vpexpandb %ymm0, %ymm0 {%k1} {z} 484; X64-NEXT: retq 485entry: 486 %0 = bitcast <4 x i64> %__D to <32 x i8> 487 %1 = tail call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %0, <32 x i8> zeroinitializer, i32 %__U) 488 %2 = bitcast <32 x i8> %1 to <4 x i64> 489 ret <4 x i64> %2 490} 491 492define <4 x i64> @test_mm256_mask_expandloadu_epi16(<4 x i64> %__S, i16 zeroext %__U, ptr readonly %__P) { 493; X86-LABEL: test_mm256_mask_expandloadu_epi16: 494; X86: # %bb.0: # %entry 495; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 496; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 497; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1} 498; X86-NEXT: retl 499; 500; X64-LABEL: test_mm256_mask_expandloadu_epi16: 501; X64: # %bb.0: # %entry 502; X64-NEXT: kmovd %edi, %k1 503; X64-NEXT: vpexpandw (%rsi), %ymm0 {%k1} 504; X64-NEXT: retq 505entry: 506 %0 = bitcast <4 x i64> %__S to <16 x i16> 507 %1 = bitcast i16 %__U to <16 x i1> 508 %2 = tail call <16 x i16> @llvm.masked.expandload.v16i16(ptr %__P, <16 x i1> %1, <16 x i16> %0) 509 %3 = bitcast <16 x i16> %2 to <4 x i64> 510 ret <4 x i64> %3 511} 512 513define <4 x i64> @test_mm256_maskz_expandloadu_epi16(i16 zeroext %__U, ptr readonly %__P) { 514; X86-LABEL: test_mm256_maskz_expandloadu_epi16: 515; X86: # %bb.0: # %entry 516; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 517; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 518; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1} {z} 519; X86-NEXT: retl 520; 521; X64-LABEL: test_mm256_maskz_expandloadu_epi16: 522; X64: # %bb.0: # %entry 523; X64-NEXT: kmovd %edi, %k1 524; X64-NEXT: vpexpandw (%rsi), %ymm0 {%k1} {z} 525; X64-NEXT: retq 526entry: 527 %0 = bitcast i16 %__U to <16 x i1> 528 %1 = tail call <16 x i16> @llvm.masked.expandload.v16i16(ptr %__P, <16 x i1> %0, <16 x i16> zeroinitializer) 529 %2 = bitcast <16 x i16> %1 to <4 x i64> 530 ret <4 x i64> %2 531} 532 533define <4 x i64> @test_mm256_mask_expandloadu_epi8(<4 x i64> %__S, i32 %__U, ptr readonly %__P) { 534; X86-LABEL: test_mm256_mask_expandloadu_epi8: 535; X86: # %bb.0: # %entry 536; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 537; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 538; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1} 539; X86-NEXT: retl 540; 541; X64-LABEL: test_mm256_mask_expandloadu_epi8: 542; X64: # %bb.0: # %entry 543; X64-NEXT: kmovd %edi, %k1 544; X64-NEXT: vpexpandb (%rsi), %ymm0 {%k1} 545; X64-NEXT: retq 546entry: 547 %0 = bitcast <4 x i64> %__S to <32 x i8> 548 %1 = bitcast i32 %__U to <32 x i1> 549 %2 = tail call <32 x i8> @llvm.masked.expandload.v32i8(ptr %__P, <32 x i1> %1, <32 x i8> %0) 550 %3 = bitcast <32 x i8> %2 to <4 x i64> 551 ret <4 x i64> %3 552} 553 554define <4 x i64> @test_mm256_maskz_expandloadu_epi8(i32 %__U, ptr readonly %__P) { 555; X86-LABEL: test_mm256_maskz_expandloadu_epi8: 556; X86: # %bb.0: # %entry 557; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 558; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 559; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1} {z} 560; X86-NEXT: retl 561; 562; X64-LABEL: test_mm256_maskz_expandloadu_epi8: 563; X64: # %bb.0: # %entry 564; X64-NEXT: kmovd %edi, %k1 565; X64-NEXT: vpexpandb (%rsi), %ymm0 {%k1} {z} 566; X64-NEXT: retq 567entry: 568 %0 = bitcast i32 %__U to <32 x i1> 569 %1 = tail call <32 x i8> @llvm.masked.expandload.v32i8(ptr %__P, <32 x i1> %0, <32 x i8> zeroinitializer) 570 %2 = bitcast <32 x i8> %1 to <4 x i64> 571 ret <4 x i64> %2 572} 573 574define <4 x i64> @test_mm256_mask_shldi_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 575; X86-LABEL: test_mm256_mask_shldi_epi64: 576; X86: # %bb.0: # %entry 577; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 578; X86-NEXT: kmovd %eax, %k1 579; X86-NEXT: vpshldq $47, %ymm2, %ymm1, %ymm0 {%k1} 580; X86-NEXT: retl 581; 582; X64-LABEL: test_mm256_mask_shldi_epi64: 583; X64: # %bb.0: # %entry 584; X64-NEXT: kmovd %edi, %k1 585; X64-NEXT: vpshldq $47, %ymm2, %ymm1, %ymm0 {%k1} 586; X64-NEXT: retq 587entry: 588 %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__B, <4 x i64> <i64 47, i64 47, i64 47, i64 47>) 589 %1 = bitcast i8 %__U to <8 x i1> 590 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 591 %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> %__S 592 ret <4 x i64> %2 593} 594 595declare <4 x i64> @llvm.fshl.v4i64(<4 x i64>, <4 x i64>, <4 x i64>) 596 597define <4 x i64> @test_mm256_maskz_shldi_epi64(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 598; X86-LABEL: test_mm256_maskz_shldi_epi64: 599; X86: # %bb.0: # %entry 600; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 601; X86-NEXT: kmovd %eax, %k1 602; X86-NEXT: vpshldq $63, %ymm1, %ymm0, %ymm0 {%k1} {z} 603; X86-NEXT: retl 604; 605; X64-LABEL: test_mm256_maskz_shldi_epi64: 606; X64: # %bb.0: # %entry 607; X64-NEXT: kmovd %edi, %k1 608; X64-NEXT: vpshldq $63, %ymm1, %ymm0, %ymm0 {%k1} {z} 609; X64-NEXT: retq 610entry: 611 %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__B, <4 x i64> <i64 63, i64 63, i64 63, i64 63>) 612 %1 = bitcast i8 %__U to <8 x i1> 613 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 614 %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> zeroinitializer 615 ret <4 x i64> %2 616} 617 618define <4 x i64> @test_mm256_shldi_epi64(<4 x i64> %__A, <4 x i64> %__B) { 619; CHECK-LABEL: test_mm256_shldi_epi64: 620; CHECK: # %bb.0: # %entry 621; CHECK-NEXT: vpshldq $31, %ymm1, %ymm0, %ymm0 622; CHECK-NEXT: ret{{[l|q]}} 623entry: 624 %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__B, <4 x i64> <i64 31, i64 31, i64 31, i64 31>) 625 ret <4 x i64> %0 626} 627 628define <2 x i64> @test_mm_mask_shldi_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 629; X86-LABEL: test_mm_mask_shldi_epi64: 630; X86: # %bb.0: # %entry 631; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 632; X86-NEXT: kmovd %eax, %k1 633; X86-NEXT: vpshldq $47, %xmm2, %xmm1, %xmm0 {%k1} 634; X86-NEXT: retl 635; 636; X64-LABEL: test_mm_mask_shldi_epi64: 637; X64: # %bb.0: # %entry 638; X64-NEXT: kmovd %edi, %k1 639; X64-NEXT: vpshldq $47, %xmm2, %xmm1, %xmm0 {%k1} 640; X64-NEXT: retq 641entry: 642 %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__B, <2 x i64> <i64 47, i64 47>) 643 %1 = bitcast i8 %__U to <8 x i1> 644 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 645 %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> %__S 646 ret <2 x i64> %2 647} 648 649declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) 650 651define <2 x i64> @test_mm_maskz_shldi_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 652; X86-LABEL: test_mm_maskz_shldi_epi64: 653; X86: # %bb.0: # %entry 654; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 655; X86-NEXT: kmovd %eax, %k1 656; X86-NEXT: vpshldq $63, %xmm1, %xmm0, %xmm0 {%k1} {z} 657; X86-NEXT: retl 658; 659; X64-LABEL: test_mm_maskz_shldi_epi64: 660; X64: # %bb.0: # %entry 661; X64-NEXT: kmovd %edi, %k1 662; X64-NEXT: vpshldq $63, %xmm1, %xmm0, %xmm0 {%k1} {z} 663; X64-NEXT: retq 664entry: 665 %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__B, <2 x i64> <i64 63, i64 63>) 666 %1 = bitcast i8 %__U to <8 x i1> 667 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 668 %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> zeroinitializer 669 ret <2 x i64> %2 670} 671 672define <2 x i64> @test_mm_shldi_epi64(<2 x i64> %__A, <2 x i64> %__B) { 673; CHECK-LABEL: test_mm_shldi_epi64: 674; CHECK: # %bb.0: # %entry 675; CHECK-NEXT: vpshldq $31, %xmm1, %xmm0, %xmm0 676; CHECK-NEXT: ret{{[l|q]}} 677entry: 678 %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__B, <2 x i64> <i64 31, i64 31>) 679 ret <2 x i64> %0 680} 681 682define <4 x i64> @test_mm256_mask_shldi_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 683; X86-LABEL: test_mm256_mask_shldi_epi32: 684; X86: # %bb.0: # %entry 685; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 686; X86-NEXT: kmovd %eax, %k1 687; X86-NEXT: vpshldd $7, %ymm2, %ymm1, %ymm0 {%k1} 688; X86-NEXT: retl 689; 690; X64-LABEL: test_mm256_mask_shldi_epi32: 691; X64: # %bb.0: # %entry 692; X64-NEXT: kmovd %edi, %k1 693; X64-NEXT: vpshldd $7, %ymm2, %ymm1, %ymm0 {%k1} 694; X64-NEXT: retq 695entry: 696 %0 = bitcast <4 x i64> %__A to <8 x i32> 697 %1 = bitcast <4 x i64> %__B to <8 x i32> 698 %2 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>) 699 %3 = bitcast <4 x i64> %__S to <8 x i32> 700 %4 = bitcast i8 %__U to <8 x i1> 701 %5 = select <8 x i1> %4, <8 x i32> %2, <8 x i32> %3 702 %6 = bitcast <8 x i32> %5 to <4 x i64> 703 ret <4 x i64> %6 704} 705 706declare <8 x i32> @llvm.fshl.v8i32(<8 x i32>, <8 x i32>, <8 x i32>) 707 708define <4 x i64> @test_mm256_maskz_shldi_epi32(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 709; X86-LABEL: test_mm256_maskz_shldi_epi32: 710; X86: # %bb.0: # %entry 711; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 712; X86-NEXT: kmovd %eax, %k1 713; X86-NEXT: vpshldd $15, %ymm1, %ymm0, %ymm0 {%k1} {z} 714; X86-NEXT: retl 715; 716; X64-LABEL: test_mm256_maskz_shldi_epi32: 717; X64: # %bb.0: # %entry 718; X64-NEXT: kmovd %edi, %k1 719; X64-NEXT: vpshldd $15, %ymm1, %ymm0, %ymm0 {%k1} {z} 720; X64-NEXT: retq 721entry: 722 %0 = bitcast <4 x i64> %__A to <8 x i32> 723 %1 = bitcast <4 x i64> %__B to <8 x i32> 724 %2 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>) 725 %3 = bitcast i8 %__U to <8 x i1> 726 %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer 727 %5 = bitcast <8 x i32> %4 to <4 x i64> 728 ret <4 x i64> %5 729} 730 731define <4 x i64> @test_mm256_shldi_epi32(<4 x i64> %__A, <4 x i64> %__B) { 732; CHECK-LABEL: test_mm256_shldi_epi32: 733; CHECK: # %bb.0: # %entry 734; CHECK-NEXT: vpshldd $31, %ymm1, %ymm0, %ymm0 735; CHECK-NEXT: ret{{[l|q]}} 736entry: 737 %0 = bitcast <4 x i64> %__A to <8 x i32> 738 %1 = bitcast <4 x i64> %__B to <8 x i32> 739 %2 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i32> <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>) 740 %3 = bitcast <8 x i32> %2 to <4 x i64> 741 ret <4 x i64> %3 742} 743 744define <2 x i64> @test_mm_mask_shldi_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 745; X86-LABEL: test_mm_mask_shldi_epi32: 746; X86: # %bb.0: # %entry 747; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 748; X86-NEXT: kmovd %eax, %k1 749; X86-NEXT: vpshldd $7, %xmm2, %xmm1, %xmm0 {%k1} 750; X86-NEXT: retl 751; 752; X64-LABEL: test_mm_mask_shldi_epi32: 753; X64: # %bb.0: # %entry 754; X64-NEXT: kmovd %edi, %k1 755; X64-NEXT: vpshldd $7, %xmm2, %xmm1, %xmm0 {%k1} 756; X64-NEXT: retq 757entry: 758 %0 = bitcast <2 x i64> %__A to <4 x i32> 759 %1 = bitcast <2 x i64> %__B to <4 x i32> 760 %2 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 7, i32 7, i32 7, i32 7>) 761 %3 = bitcast <2 x i64> %__S to <4 x i32> 762 %4 = bitcast i8 %__U to <8 x i1> 763 %extract = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 764 %5 = select <4 x i1> %extract, <4 x i32> %2, <4 x i32> %3 765 %6 = bitcast <4 x i32> %5 to <2 x i64> 766 ret <2 x i64> %6 767} 768 769declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 770 771define <2 x i64> @test_mm_maskz_shldi_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 772; X86-LABEL: test_mm_maskz_shldi_epi32: 773; X86: # %bb.0: # %entry 774; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 775; X86-NEXT: kmovd %eax, %k1 776; X86-NEXT: vpshldd $15, %xmm1, %xmm0, %xmm0 {%k1} {z} 777; X86-NEXT: retl 778; 779; X64-LABEL: test_mm_maskz_shldi_epi32: 780; X64: # %bb.0: # %entry 781; X64-NEXT: kmovd %edi, %k1 782; X64-NEXT: vpshldd $15, %xmm1, %xmm0, %xmm0 {%k1} {z} 783; X64-NEXT: retq 784entry: 785 %0 = bitcast <2 x i64> %__A to <4 x i32> 786 %1 = bitcast <2 x i64> %__B to <4 x i32> 787 %2 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 15, i32 15, i32 15, i32 15>) 788 %3 = bitcast i8 %__U to <8 x i1> 789 %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 790 %4 = select <4 x i1> %extract, <4 x i32> %2, <4 x i32> zeroinitializer 791 %5 = bitcast <4 x i32> %4 to <2 x i64> 792 ret <2 x i64> %5 793} 794 795define <2 x i64> @test_mm_shldi_epi32(<2 x i64> %__A, <2 x i64> %__B) { 796; CHECK-LABEL: test_mm_shldi_epi32: 797; CHECK: # %bb.0: # %entry 798; CHECK-NEXT: vpshldd $31, %xmm1, %xmm0, %xmm0 799; CHECK-NEXT: ret{{[l|q]}} 800entry: 801 %0 = bitcast <2 x i64> %__A to <4 x i32> 802 %1 = bitcast <2 x i64> %__B to <4 x i32> 803 %2 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 31, i32 31, i32 31, i32 31>) 804 %3 = bitcast <4 x i32> %2 to <2 x i64> 805 ret <2 x i64> %3 806} 807 808define <4 x i64> @test_mm256_mask_shldi_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 809; X86-LABEL: test_mm256_mask_shldi_epi16: 810; X86: # %bb.0: # %entry 811; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 812; X86-NEXT: vpshldw $3, %ymm2, %ymm1, %ymm0 {%k1} 813; X86-NEXT: retl 814; 815; X64-LABEL: test_mm256_mask_shldi_epi16: 816; X64: # %bb.0: # %entry 817; X64-NEXT: kmovd %edi, %k1 818; X64-NEXT: vpshldw $3, %ymm2, %ymm1, %ymm0 {%k1} 819; X64-NEXT: retq 820entry: 821 %0 = bitcast <4 x i64> %__A to <16 x i16> 822 %1 = bitcast <4 x i64> %__B to <16 x i16> 823 %2 = tail call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %0, <16 x i16> %1, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>) 824 %3 = bitcast <4 x i64> %__S to <16 x i16> 825 %4 = bitcast i16 %__U to <16 x i1> 826 %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 827 %6 = bitcast <16 x i16> %5 to <4 x i64> 828 ret <4 x i64> %6 829} 830 831declare <16 x i16> @llvm.fshl.v16i16(<16 x i16>, <16 x i16>, <16 x i16>) 832 833define <4 x i64> @test_mm256_maskz_shldi_epi16(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 834; X86-LABEL: test_mm256_maskz_shldi_epi16: 835; X86: # %bb.0: # %entry 836; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 837; X86-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm0 {%k1} {z} 838; X86-NEXT: retl 839; 840; X64-LABEL: test_mm256_maskz_shldi_epi16: 841; X64: # %bb.0: # %entry 842; X64-NEXT: kmovd %edi, %k1 843; X64-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm0 {%k1} {z} 844; X64-NEXT: retq 845entry: 846 %0 = bitcast <4 x i64> %__A to <16 x i16> 847 %1 = bitcast <4 x i64> %__B to <16 x i16> 848 %2 = tail call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %0, <16 x i16> %1, <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>) 849 %3 = bitcast i16 %__U to <16 x i1> 850 %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer 851 %5 = bitcast <16 x i16> %4 to <4 x i64> 852 ret <4 x i64> %5 853} 854 855define <4 x i64> @test_mm256_shldi_epi16(<4 x i64> %__A, <4 x i64> %__B) { 856; CHECK-LABEL: test_mm256_shldi_epi16: 857; CHECK: # %bb.0: # %entry 858; CHECK-NEXT: vpshldw $15, %ymm1, %ymm0, %ymm0 859; CHECK-NEXT: ret{{[l|q]}} 860entry: 861 %0 = bitcast <4 x i64> %__A to <16 x i16> 862 %1 = bitcast <4 x i64> %__B to <16 x i16> 863 %2 = tail call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %0, <16 x i16> %1, <16 x i16> <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>) 864 %3 = bitcast <16 x i16> %2 to <4 x i64> 865 ret <4 x i64> %3 866} 867 868define <2 x i64> @test_mm_mask_shldi_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 869; X86-LABEL: test_mm_mask_shldi_epi16: 870; X86: # %bb.0: # %entry 871; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 872; X86-NEXT: kmovd %eax, %k1 873; X86-NEXT: vpshldw $3, %xmm2, %xmm1, %xmm0 {%k1} 874; X86-NEXT: retl 875; 876; X64-LABEL: test_mm_mask_shldi_epi16: 877; X64: # %bb.0: # %entry 878; X64-NEXT: kmovd %edi, %k1 879; X64-NEXT: vpshldw $3, %xmm2, %xmm1, %xmm0 {%k1} 880; X64-NEXT: retq 881entry: 882 %0 = bitcast <2 x i64> %__A to <8 x i16> 883 %1 = bitcast <2 x i64> %__B to <8 x i16> 884 %2 = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %0, <8 x i16> %1, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>) 885 %3 = bitcast <2 x i64> %__S to <8 x i16> 886 %4 = bitcast i8 %__U to <8 x i1> 887 %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 888 %6 = bitcast <8 x i16> %5 to <2 x i64> 889 ret <2 x i64> %6 890} 891 892declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) 893 894define <2 x i64> @test_mm_maskz_shldi_epi16(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 895; X86-LABEL: test_mm_maskz_shldi_epi16: 896; X86: # %bb.0: # %entry 897; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 898; X86-NEXT: kmovd %eax, %k1 899; X86-NEXT: vpshldw $7, %xmm1, %xmm0, %xmm0 {%k1} {z} 900; X86-NEXT: retl 901; 902; X64-LABEL: test_mm_maskz_shldi_epi16: 903; X64: # %bb.0: # %entry 904; X64-NEXT: kmovd %edi, %k1 905; X64-NEXT: vpshldw $7, %xmm1, %xmm0, %xmm0 {%k1} {z} 906; X64-NEXT: retq 907entry: 908 %0 = bitcast <2 x i64> %__A to <8 x i16> 909 %1 = bitcast <2 x i64> %__B to <8 x i16> 910 %2 = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %0, <8 x i16> %1, <8 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>) 911 %3 = bitcast i8 %__U to <8 x i1> 912 %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer 913 %5 = bitcast <8 x i16> %4 to <2 x i64> 914 ret <2 x i64> %5 915} 916 917define <2 x i64> @test_mm_shldi_epi16(<2 x i64> %__A, <2 x i64> %__B) { 918; CHECK-LABEL: test_mm_shldi_epi16: 919; CHECK: # %bb.0: # %entry 920; CHECK-NEXT: vpshldw $15, %xmm1, %xmm0, %xmm0 921; CHECK-NEXT: ret{{[l|q]}} 922entry: 923 %0 = bitcast <2 x i64> %__A to <8 x i16> 924 %1 = bitcast <2 x i64> %__B to <8 x i16> 925 %2 = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %0, <8 x i16> %1, <8 x i16> <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>) 926 %3 = bitcast <8 x i16> %2 to <2 x i64> 927 ret <2 x i64> %3 928} 929 930define <4 x i64> @test_mm256_mask_shrdi_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 931; X86-LABEL: test_mm256_mask_shrdi_epi64: 932; X86: # %bb.0: # %entry 933; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 934; X86-NEXT: kmovd %eax, %k1 935; X86-NEXT: vpshrdq $47, %ymm2, %ymm1, %ymm0 {%k1} 936; X86-NEXT: retl 937; 938; X64-LABEL: test_mm256_mask_shrdi_epi64: 939; X64: # %bb.0: # %entry 940; X64-NEXT: kmovd %edi, %k1 941; X64-NEXT: vpshrdq $47, %ymm2, %ymm1, %ymm0 {%k1} 942; X64-NEXT: retq 943entry: 944 %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__B, <4 x i64> %__A, <4 x i64> <i64 47, i64 47, i64 47, i64 47>) 945 %1 = bitcast i8 %__U to <8 x i1> 946 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 947 %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> %__S 948 ret <4 x i64> %2 949} 950 951declare <4 x i64> @llvm.fshr.v4i64(<4 x i64>, <4 x i64>, <4 x i64>) 952 953define <4 x i64> @test_mm256_maskz_shrdi_epi64(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 954; X86-LABEL: test_mm256_maskz_shrdi_epi64: 955; X86: # %bb.0: # %entry 956; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 957; X86-NEXT: kmovd %eax, %k1 958; X86-NEXT: vpshrdq $63, %ymm1, %ymm0, %ymm0 {%k1} {z} 959; X86-NEXT: retl 960; 961; X64-LABEL: test_mm256_maskz_shrdi_epi64: 962; X64: # %bb.0: # %entry 963; X64-NEXT: kmovd %edi, %k1 964; X64-NEXT: vpshrdq $63, %ymm1, %ymm0, %ymm0 {%k1} {z} 965; X64-NEXT: retq 966entry: 967 %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__B, <4 x i64> %__A, <4 x i64> <i64 63, i64 63, i64 63, i64 63>) 968 %1 = bitcast i8 %__U to <8 x i1> 969 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 970 %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> zeroinitializer 971 ret <4 x i64> %2 972} 973 974define <4 x i64> @test_mm256_shrdi_epi64(<4 x i64> %__A, <4 x i64> %__B) { 975; CHECK-LABEL: test_mm256_shrdi_epi64: 976; CHECK: # %bb.0: # %entry 977; CHECK-NEXT: vpshrdq $31, %ymm1, %ymm0, %ymm0 978; CHECK-NEXT: ret{{[l|q]}} 979entry: 980 %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__B, <4 x i64> %__A, <4 x i64> <i64 31, i64 31, i64 31, i64 31>) 981 ret <4 x i64> %0 982} 983 984define <2 x i64> @test_mm_mask_shrdi_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 985; X86-LABEL: test_mm_mask_shrdi_epi64: 986; X86: # %bb.0: # %entry 987; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 988; X86-NEXT: kmovd %eax, %k1 989; X86-NEXT: vpshrdq $47, %xmm2, %xmm1, %xmm0 {%k1} 990; X86-NEXT: retl 991; 992; X64-LABEL: test_mm_mask_shrdi_epi64: 993; X64: # %bb.0: # %entry 994; X64-NEXT: kmovd %edi, %k1 995; X64-NEXT: vpshrdq $47, %xmm2, %xmm1, %xmm0 {%k1} 996; X64-NEXT: retq 997entry: 998 %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__B, <2 x i64> %__A, <2 x i64> <i64 47, i64 47>) 999 %1 = bitcast i8 %__U to <8 x i1> 1000 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 1001 %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> %__S 1002 ret <2 x i64> %2 1003} 1004 1005declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) 1006 1007define <2 x i64> @test_mm_maskz_shrdi_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 1008; X86-LABEL: test_mm_maskz_shrdi_epi64: 1009; X86: # %bb.0: # %entry 1010; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1011; X86-NEXT: kmovd %eax, %k1 1012; X86-NEXT: vpshrdq $63, %xmm1, %xmm0, %xmm0 {%k1} {z} 1013; X86-NEXT: retl 1014; 1015; X64-LABEL: test_mm_maskz_shrdi_epi64: 1016; X64: # %bb.0: # %entry 1017; X64-NEXT: kmovd %edi, %k1 1018; X64-NEXT: vpshrdq $63, %xmm1, %xmm0, %xmm0 {%k1} {z} 1019; X64-NEXT: retq 1020entry: 1021 %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__B, <2 x i64> %__A, <2 x i64> <i64 63, i64 63>) 1022 %1 = bitcast i8 %__U to <8 x i1> 1023 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 1024 %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> zeroinitializer 1025 ret <2 x i64> %2 1026} 1027 1028define <2 x i64> @test_mm_shrdi_epi64(<2 x i64> %__A, <2 x i64> %__B) { 1029; CHECK-LABEL: test_mm_shrdi_epi64: 1030; CHECK: # %bb.0: # %entry 1031; CHECK-NEXT: vpshrdq $31, %xmm1, %xmm0, %xmm0 1032; CHECK-NEXT: ret{{[l|q]}} 1033entry: 1034 %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__B, <2 x i64> %__A, <2 x i64> <i64 31, i64 31>) 1035 ret <2 x i64> %0 1036} 1037 1038define <4 x i64> @test_mm256_mask_shrdi_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 1039; X86-LABEL: test_mm256_mask_shrdi_epi32: 1040; X86: # %bb.0: # %entry 1041; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1042; X86-NEXT: kmovd %eax, %k1 1043; X86-NEXT: vpshrdd $7, %ymm2, %ymm1, %ymm0 {%k1} 1044; X86-NEXT: retl 1045; 1046; X64-LABEL: test_mm256_mask_shrdi_epi32: 1047; X64: # %bb.0: # %entry 1048; X64-NEXT: kmovd %edi, %k1 1049; X64-NEXT: vpshrdd $7, %ymm2, %ymm1, %ymm0 {%k1} 1050; X64-NEXT: retq 1051entry: 1052 %0 = bitcast <4 x i64> %__A to <8 x i32> 1053 %1 = bitcast <4 x i64> %__B to <8 x i32> 1054 %2 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %1, <8 x i32> %0, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>) 1055 %3 = bitcast <4 x i64> %__S to <8 x i32> 1056 %4 = bitcast i8 %__U to <8 x i1> 1057 %5 = select <8 x i1> %4, <8 x i32> %2, <8 x i32> %3 1058 %6 = bitcast <8 x i32> %5 to <4 x i64> 1059 ret <4 x i64> %6 1060} 1061 1062declare <8 x i32> @llvm.fshr.v8i32(<8 x i32>, <8 x i32>, <8 x i32>) 1063 1064define <4 x i64> @test_mm256_maskz_shrdi_epi32(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 1065; X86-LABEL: test_mm256_maskz_shrdi_epi32: 1066; X86: # %bb.0: # %entry 1067; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1068; X86-NEXT: kmovd %eax, %k1 1069; X86-NEXT: vpshrdd $15, %ymm1, %ymm0, %ymm0 {%k1} {z} 1070; X86-NEXT: retl 1071; 1072; X64-LABEL: test_mm256_maskz_shrdi_epi32: 1073; X64: # %bb.0: # %entry 1074; X64-NEXT: kmovd %edi, %k1 1075; X64-NEXT: vpshrdd $15, %ymm1, %ymm0, %ymm0 {%k1} {z} 1076; X64-NEXT: retq 1077entry: 1078 %0 = bitcast <4 x i64> %__A to <8 x i32> 1079 %1 = bitcast <4 x i64> %__B to <8 x i32> 1080 %2 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %1, <8 x i32> %0, <8 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>) 1081 %3 = bitcast i8 %__U to <8 x i1> 1082 %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer 1083 %5 = bitcast <8 x i32> %4 to <4 x i64> 1084 ret <4 x i64> %5 1085} 1086 1087define <4 x i64> @test_mm256_shrdi_epi32(<4 x i64> %__A, <4 x i64> %__B) { 1088; CHECK-LABEL: test_mm256_shrdi_epi32: 1089; CHECK: # %bb.0: # %entry 1090; CHECK-NEXT: vpshrdd $31, %ymm1, %ymm0, %ymm0 1091; CHECK-NEXT: ret{{[l|q]}} 1092entry: 1093 %0 = bitcast <4 x i64> %__A to <8 x i32> 1094 %1 = bitcast <4 x i64> %__B to <8 x i32> 1095 %2 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %1, <8 x i32> %0, <8 x i32> <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>) 1096 %3 = bitcast <8 x i32> %2 to <4 x i64> 1097 ret <4 x i64> %3 1098} 1099 1100define <2 x i64> @test_mm_mask_shrdi_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 1101; X86-LABEL: test_mm_mask_shrdi_epi32: 1102; X86: # %bb.0: # %entry 1103; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1104; X86-NEXT: kmovd %eax, %k1 1105; X86-NEXT: vpshrdd $7, %xmm2, %xmm1, %xmm0 {%k1} 1106; X86-NEXT: retl 1107; 1108; X64-LABEL: test_mm_mask_shrdi_epi32: 1109; X64: # %bb.0: # %entry 1110; X64-NEXT: kmovd %edi, %k1 1111; X64-NEXT: vpshrdd $7, %xmm2, %xmm1, %xmm0 {%k1} 1112; X64-NEXT: retq 1113entry: 1114 %0 = bitcast <2 x i64> %__A to <4 x i32> 1115 %1 = bitcast <2 x i64> %__B to <4 x i32> 1116 %2 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 7, i32 7, i32 7, i32 7>) 1117 %3 = bitcast <2 x i64> %__S to <4 x i32> 1118 %4 = bitcast i8 %__U to <8 x i1> 1119 %extract = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1120 %5 = select <4 x i1> %extract, <4 x i32> %2, <4 x i32> %3 1121 %6 = bitcast <4 x i32> %5 to <2 x i64> 1122 ret <2 x i64> %6 1123} 1124 1125declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 1126 1127define <2 x i64> @test_mm_maskz_shrdi_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 1128; X86-LABEL: test_mm_maskz_shrdi_epi32: 1129; X86: # %bb.0: # %entry 1130; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1131; X86-NEXT: kmovd %eax, %k1 1132; X86-NEXT: vpshrdd $15, %xmm1, %xmm0, %xmm0 {%k1} {z} 1133; X86-NEXT: retl 1134; 1135; X64-LABEL: test_mm_maskz_shrdi_epi32: 1136; X64: # %bb.0: # %entry 1137; X64-NEXT: kmovd %edi, %k1 1138; X64-NEXT: vpshrdd $15, %xmm1, %xmm0, %xmm0 {%k1} {z} 1139; X64-NEXT: retq 1140entry: 1141 %0 = bitcast <2 x i64> %__A to <4 x i32> 1142 %1 = bitcast <2 x i64> %__B to <4 x i32> 1143 %2 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 15, i32 15, i32 15, i32 15>) 1144 %3 = bitcast i8 %__U to <8 x i1> 1145 %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1146 %4 = select <4 x i1> %extract, <4 x i32> %2, <4 x i32> zeroinitializer 1147 %5 = bitcast <4 x i32> %4 to <2 x i64> 1148 ret <2 x i64> %5 1149} 1150 1151define <2 x i64> @test_mm_shrdi_epi32(<2 x i64> %__A, <2 x i64> %__B) { 1152; CHECK-LABEL: test_mm_shrdi_epi32: 1153; CHECK: # %bb.0: # %entry 1154; CHECK-NEXT: vpshrdd $31, %xmm1, %xmm0, %xmm0 1155; CHECK-NEXT: ret{{[l|q]}} 1156entry: 1157 %0 = bitcast <2 x i64> %__A to <4 x i32> 1158 %1 = bitcast <2 x i64> %__B to <4 x i32> 1159 %2 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 31, i32 31, i32 31, i32 31>) 1160 %3 = bitcast <4 x i32> %2 to <2 x i64> 1161 ret <2 x i64> %3 1162} 1163 1164define <4 x i64> @test_mm256_mask_shrdi_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 1165; X86-LABEL: test_mm256_mask_shrdi_epi16: 1166; X86: # %bb.0: # %entry 1167; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 1168; X86-NEXT: vpshrdw $3, %ymm2, %ymm1, %ymm0 {%k1} 1169; X86-NEXT: retl 1170; 1171; X64-LABEL: test_mm256_mask_shrdi_epi16: 1172; X64: # %bb.0: # %entry 1173; X64-NEXT: kmovd %edi, %k1 1174; X64-NEXT: vpshrdw $3, %ymm2, %ymm1, %ymm0 {%k1} 1175; X64-NEXT: retq 1176entry: 1177 %0 = bitcast <4 x i64> %__A to <16 x i16> 1178 %1 = bitcast <4 x i64> %__B to <16 x i16> 1179 %2 = tail call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %1, <16 x i16> %0, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>) 1180 %3 = bitcast <4 x i64> %__S to <16 x i16> 1181 %4 = bitcast i16 %__U to <16 x i1> 1182 %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3 1183 %6 = bitcast <16 x i16> %5 to <4 x i64> 1184 ret <4 x i64> %6 1185} 1186 1187declare <16 x i16> @llvm.fshr.v16i16(<16 x i16>, <16 x i16>, <16 x i16>) 1188 1189define <4 x i64> @test_mm256_maskz_shrdi_epi16(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 1190; X86-LABEL: test_mm256_maskz_shrdi_epi16: 1191; X86: # %bb.0: # %entry 1192; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 1193; X86-NEXT: vpshrdw $7, %ymm1, %ymm0, %ymm0 {%k1} {z} 1194; X86-NEXT: retl 1195; 1196; X64-LABEL: test_mm256_maskz_shrdi_epi16: 1197; X64: # %bb.0: # %entry 1198; X64-NEXT: kmovd %edi, %k1 1199; X64-NEXT: vpshrdw $7, %ymm1, %ymm0, %ymm0 {%k1} {z} 1200; X64-NEXT: retq 1201entry: 1202 %0 = bitcast <4 x i64> %__A to <16 x i16> 1203 %1 = bitcast <4 x i64> %__B to <16 x i16> 1204 %2 = tail call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %1, <16 x i16> %0, <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>) 1205 %3 = bitcast i16 %__U to <16 x i1> 1206 %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer 1207 %5 = bitcast <16 x i16> %4 to <4 x i64> 1208 ret <4 x i64> %5 1209} 1210 1211define <4 x i64> @test_mm256_shrdi_epi16(<4 x i64> %__A, <4 x i64> %__B) { 1212; CHECK-LABEL: test_mm256_shrdi_epi16: 1213; CHECK: # %bb.0: # %entry 1214; CHECK-NEXT: vpshrdw $15, %ymm1, %ymm0, %ymm0 1215; CHECK-NEXT: ret{{[l|q]}} 1216entry: 1217 %0 = bitcast <4 x i64> %__A to <16 x i16> 1218 %1 = bitcast <4 x i64> %__B to <16 x i16> 1219 %2 = tail call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %1, <16 x i16> %0, <16 x i16> <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>) 1220 %3 = bitcast <16 x i16> %2 to <4 x i64> 1221 ret <4 x i64> %3 1222} 1223 1224define <2 x i64> @test_mm_mask_shrdi_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 1225; X86-LABEL: test_mm_mask_shrdi_epi16: 1226; X86: # %bb.0: # %entry 1227; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1228; X86-NEXT: kmovd %eax, %k1 1229; X86-NEXT: vpshrdw $3, %xmm2, %xmm1, %xmm0 {%k1} 1230; X86-NEXT: retl 1231; 1232; X64-LABEL: test_mm_mask_shrdi_epi16: 1233; X64: # %bb.0: # %entry 1234; X64-NEXT: kmovd %edi, %k1 1235; X64-NEXT: vpshrdw $3, %xmm2, %xmm1, %xmm0 {%k1} 1236; X64-NEXT: retq 1237entry: 1238 %0 = bitcast <2 x i64> %__A to <8 x i16> 1239 %1 = bitcast <2 x i64> %__B to <8 x i16> 1240 %2 = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %1, <8 x i16> %0, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>) 1241 %3 = bitcast <2 x i64> %__S to <8 x i16> 1242 %4 = bitcast i8 %__U to <8 x i1> 1243 %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3 1244 %6 = bitcast <8 x i16> %5 to <2 x i64> 1245 ret <2 x i64> %6 1246} 1247 1248declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) 1249 1250define <2 x i64> @test_mm_maskz_shrdi_epi16(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 1251; X86-LABEL: test_mm_maskz_shrdi_epi16: 1252; X86: # %bb.0: # %entry 1253; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1254; X86-NEXT: kmovd %eax, %k1 1255; X86-NEXT: vpshrdw $7, %xmm1, %xmm0, %xmm0 {%k1} {z} 1256; X86-NEXT: retl 1257; 1258; X64-LABEL: test_mm_maskz_shrdi_epi16: 1259; X64: # %bb.0: # %entry 1260; X64-NEXT: kmovd %edi, %k1 1261; X64-NEXT: vpshrdw $7, %xmm1, %xmm0, %xmm0 {%k1} {z} 1262; X64-NEXT: retq 1263entry: 1264 %0 = bitcast <2 x i64> %__A to <8 x i16> 1265 %1 = bitcast <2 x i64> %__B to <8 x i16> 1266 %2 = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %1, <8 x i16> %0, <8 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>) 1267 %3 = bitcast i8 %__U to <8 x i1> 1268 %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer 1269 %5 = bitcast <8 x i16> %4 to <2 x i64> 1270 ret <2 x i64> %5 1271} 1272 1273define <2 x i64> @test_mm_shrdi_epi16(<2 x i64> %__A, <2 x i64> %__B) { 1274; CHECK-LABEL: test_mm_shrdi_epi16: 1275; CHECK: # %bb.0: # %entry 1276; CHECK-NEXT: vpshrdw $15, %xmm1, %xmm0, %xmm0 1277; CHECK-NEXT: ret{{[l|q]}} 1278entry: 1279 %0 = bitcast <2 x i64> %__A to <8 x i16> 1280 %1 = bitcast <2 x i64> %__B to <8 x i16> 1281 %2 = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %1, <8 x i16> %0, <8 x i16> <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>) 1282 %3 = bitcast <8 x i16> %2 to <2 x i64> 1283 ret <2 x i64> %3 1284} 1285 1286define <4 x i64> @test_mm256_mask_shldv_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 1287; X86-LABEL: test_mm256_mask_shldv_epi64: 1288; X86: # %bb.0: # %entry 1289; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1290; X86-NEXT: kmovd %eax, %k1 1291; X86-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 {%k1} 1292; X86-NEXT: retl 1293; 1294; X64-LABEL: test_mm256_mask_shldv_epi64: 1295; X64: # %bb.0: # %entry 1296; X64-NEXT: kmovd %edi, %k1 1297; X64-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 {%k1} 1298; X64-NEXT: retq 1299entry: 1300 %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) 1301 %1 = bitcast i8 %__U to <8 x i1> 1302 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1303 %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__S 1304 ret <4 x i64> %2 1305} 1306 1307define <4 x i64> @test_mm256_maskz_shldv_epi64(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1308; X86-LABEL: test_mm256_maskz_shldv_epi64: 1309; X86: # %bb.0: # %entry 1310; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1311; X86-NEXT: kmovd %eax, %k1 1312; X86-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 {%k1} {z} 1313; X86-NEXT: retl 1314; 1315; X64-LABEL: test_mm256_maskz_shldv_epi64: 1316; X64: # %bb.0: # %entry 1317; X64-NEXT: kmovd %edi, %k1 1318; X64-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 {%k1} {z} 1319; X64-NEXT: retq 1320entry: 1321 %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) 1322 %1 = bitcast i8 %__U to <8 x i1> 1323 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1324 %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer 1325 ret <4 x i64> %2 1326} 1327 1328define <4 x i64> @test_mm256_shldv_epi64(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1329; CHECK-LABEL: test_mm256_shldv_epi64: 1330; CHECK: # %bb.0: # %entry 1331; CHECK-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 1332; CHECK-NEXT: ret{{[l|q]}} 1333entry: 1334 %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) 1335 ret <4 x i64> %0 1336} 1337 1338define <2 x i64> @test_mm_mask_shldv_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 1339; X86-LABEL: test_mm_mask_shldv_epi64: 1340; X86: # %bb.0: # %entry 1341; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1342; X86-NEXT: kmovd %eax, %k1 1343; X86-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 {%k1} 1344; X86-NEXT: retl 1345; 1346; X64-LABEL: test_mm_mask_shldv_epi64: 1347; X64: # %bb.0: # %entry 1348; X64-NEXT: kmovd %edi, %k1 1349; X64-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 {%k1} 1350; X64-NEXT: retq 1351entry: 1352 %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) 1353 %1 = bitcast i8 %__U to <8 x i1> 1354 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 1355 %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__S 1356 ret <2 x i64> %2 1357} 1358 1359define <2 x i64> @test_mm_maskz_shldv_epi64(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1360; X86-LABEL: test_mm_maskz_shldv_epi64: 1361; X86: # %bb.0: # %entry 1362; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1363; X86-NEXT: kmovd %eax, %k1 1364; X86-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 {%k1} {z} 1365; X86-NEXT: retl 1366; 1367; X64-LABEL: test_mm_maskz_shldv_epi64: 1368; X64: # %bb.0: # %entry 1369; X64-NEXT: kmovd %edi, %k1 1370; X64-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 {%k1} {z} 1371; X64-NEXT: retq 1372entry: 1373 %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) 1374 %1 = bitcast i8 %__U to <8 x i1> 1375 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 1376 %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer 1377 ret <2 x i64> %2 1378} 1379 1380define <2 x i64> @test_mm_shldv_epi64(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1381; CHECK-LABEL: test_mm_shldv_epi64: 1382; CHECK: # %bb.0: # %entry 1383; CHECK-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 1384; CHECK-NEXT: ret{{[l|q]}} 1385entry: 1386 %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) 1387 ret <2 x i64> %0 1388} 1389 1390define <4 x i64> @test_mm256_mask_shldv_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 1391; X86-LABEL: test_mm256_mask_shldv_epi32: 1392; X86: # %bb.0: # %entry 1393; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1394; X86-NEXT: kmovd %eax, %k1 1395; X86-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 {%k1} 1396; X86-NEXT: retl 1397; 1398; X64-LABEL: test_mm256_mask_shldv_epi32: 1399; X64: # %bb.0: # %entry 1400; X64-NEXT: kmovd %edi, %k1 1401; X64-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 {%k1} 1402; X64-NEXT: retq 1403entry: 1404 %0 = bitcast <4 x i64> %__S to <8 x i32> 1405 %1 = bitcast <4 x i64> %__A to <8 x i32> 1406 %2 = bitcast <4 x i64> %__B to <8 x i32> 1407 %3 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2) 1408 %4 = bitcast i8 %__U to <8 x i1> 1409 %5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> %0 1410 %6 = bitcast <8 x i32> %5 to <4 x i64> 1411 ret <4 x i64> %6 1412} 1413 1414define <4 x i64> @test_mm256_maskz_shldv_epi32(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1415; X86-LABEL: test_mm256_maskz_shldv_epi32: 1416; X86: # %bb.0: # %entry 1417; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1418; X86-NEXT: kmovd %eax, %k1 1419; X86-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 {%k1} {z} 1420; X86-NEXT: retl 1421; 1422; X64-LABEL: test_mm256_maskz_shldv_epi32: 1423; X64: # %bb.0: # %entry 1424; X64-NEXT: kmovd %edi, %k1 1425; X64-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 {%k1} {z} 1426; X64-NEXT: retq 1427entry: 1428 %0 = bitcast <4 x i64> %__S to <8 x i32> 1429 %1 = bitcast <4 x i64> %__A to <8 x i32> 1430 %2 = bitcast <4 x i64> %__B to <8 x i32> 1431 %3 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2) 1432 %4 = bitcast i8 %__U to <8 x i1> 1433 %5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> zeroinitializer 1434 %6 = bitcast <8 x i32> %5 to <4 x i64> 1435 ret <4 x i64> %6 1436} 1437 1438define <4 x i64> @test_mm256_shldv_epi32(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1439; CHECK-LABEL: test_mm256_shldv_epi32: 1440; CHECK: # %bb.0: # %entry 1441; CHECK-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 1442; CHECK-NEXT: ret{{[l|q]}} 1443entry: 1444 %0 = bitcast <4 x i64> %__S to <8 x i32> 1445 %1 = bitcast <4 x i64> %__A to <8 x i32> 1446 %2 = bitcast <4 x i64> %__B to <8 x i32> 1447 %3 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2) 1448 %4 = bitcast <8 x i32> %3 to <4 x i64> 1449 ret <4 x i64> %4 1450} 1451 1452define <2 x i64> @test_mm_mask_shldv_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 1453; X86-LABEL: test_mm_mask_shldv_epi32: 1454; X86: # %bb.0: # %entry 1455; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1456; X86-NEXT: kmovd %eax, %k1 1457; X86-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 {%k1} 1458; X86-NEXT: retl 1459; 1460; X64-LABEL: test_mm_mask_shldv_epi32: 1461; X64: # %bb.0: # %entry 1462; X64-NEXT: kmovd %edi, %k1 1463; X64-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 {%k1} 1464; X64-NEXT: retq 1465entry: 1466 %0 = bitcast <2 x i64> %__S to <4 x i32> 1467 %1 = bitcast <2 x i64> %__A to <4 x i32> 1468 %2 = bitcast <2 x i64> %__B to <4 x i32> 1469 %3 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) 1470 %4 = bitcast i8 %__U to <8 x i1> 1471 %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1472 %5 = select <4 x i1> %extract.i, <4 x i32> %3, <4 x i32> %0 1473 %6 = bitcast <4 x i32> %5 to <2 x i64> 1474 ret <2 x i64> %6 1475} 1476 1477define <2 x i64> @test_mm_maskz_shldv_epi32(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1478; X86-LABEL: test_mm_maskz_shldv_epi32: 1479; X86: # %bb.0: # %entry 1480; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1481; X86-NEXT: kmovd %eax, %k1 1482; X86-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 {%k1} {z} 1483; X86-NEXT: retl 1484; 1485; X64-LABEL: test_mm_maskz_shldv_epi32: 1486; X64: # %bb.0: # %entry 1487; X64-NEXT: kmovd %edi, %k1 1488; X64-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 {%k1} {z} 1489; X64-NEXT: retq 1490entry: 1491 %0 = bitcast <2 x i64> %__S to <4 x i32> 1492 %1 = bitcast <2 x i64> %__A to <4 x i32> 1493 %2 = bitcast <2 x i64> %__B to <4 x i32> 1494 %3 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) 1495 %4 = bitcast i8 %__U to <8 x i1> 1496 %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1497 %5 = select <4 x i1> %extract.i, <4 x i32> %3, <4 x i32> zeroinitializer 1498 %6 = bitcast <4 x i32> %5 to <2 x i64> 1499 ret <2 x i64> %6 1500} 1501 1502define <2 x i64> @test_mm_shldv_epi32(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1503; CHECK-LABEL: test_mm_shldv_epi32: 1504; CHECK: # %bb.0: # %entry 1505; CHECK-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 1506; CHECK-NEXT: ret{{[l|q]}} 1507entry: 1508 %0 = bitcast <2 x i64> %__S to <4 x i32> 1509 %1 = bitcast <2 x i64> %__A to <4 x i32> 1510 %2 = bitcast <2 x i64> %__B to <4 x i32> 1511 %3 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) 1512 %4 = bitcast <4 x i32> %3 to <2 x i64> 1513 ret <2 x i64> %4 1514} 1515 1516define <4 x i64> @test_mm256_mask_shldv_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 1517; X86-LABEL: test_mm256_mask_shldv_epi16: 1518; X86: # %bb.0: # %entry 1519; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 1520; X86-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 {%k1} 1521; X86-NEXT: retl 1522; 1523; X64-LABEL: test_mm256_mask_shldv_epi16: 1524; X64: # %bb.0: # %entry 1525; X64-NEXT: kmovd %edi, %k1 1526; X64-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 {%k1} 1527; X64-NEXT: retq 1528entry: 1529 %0 = bitcast <4 x i64> %__S to <16 x i16> 1530 %1 = bitcast <4 x i64> %__A to <16 x i16> 1531 %2 = bitcast <4 x i64> %__B to <16 x i16> 1532 %3 = tail call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2) 1533 %4 = bitcast i16 %__U to <16 x i1> 1534 %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> %0 1535 %6 = bitcast <16 x i16> %5 to <4 x i64> 1536 ret <4 x i64> %6 1537} 1538 1539define <4 x i64> @test_mm256_maskz_shldv_epi16(i16 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1540; X86-LABEL: test_mm256_maskz_shldv_epi16: 1541; X86: # %bb.0: # %entry 1542; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 1543; X86-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 {%k1} {z} 1544; X86-NEXT: retl 1545; 1546; X64-LABEL: test_mm256_maskz_shldv_epi16: 1547; X64: # %bb.0: # %entry 1548; X64-NEXT: kmovd %edi, %k1 1549; X64-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 {%k1} {z} 1550; X64-NEXT: retq 1551entry: 1552 %0 = bitcast <4 x i64> %__S to <16 x i16> 1553 %1 = bitcast <4 x i64> %__A to <16 x i16> 1554 %2 = bitcast <4 x i64> %__B to <16 x i16> 1555 %3 = tail call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2) 1556 %4 = bitcast i16 %__U to <16 x i1> 1557 %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> zeroinitializer 1558 %6 = bitcast <16 x i16> %5 to <4 x i64> 1559 ret <4 x i64> %6 1560} 1561 1562define <4 x i64> @test_mm256_shldv_epi16(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1563; CHECK-LABEL: test_mm256_shldv_epi16: 1564; CHECK: # %bb.0: # %entry 1565; CHECK-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 1566; CHECK-NEXT: ret{{[l|q]}} 1567entry: 1568 %0 = bitcast <4 x i64> %__S to <16 x i16> 1569 %1 = bitcast <4 x i64> %__A to <16 x i16> 1570 %2 = bitcast <4 x i64> %__B to <16 x i16> 1571 %3 = tail call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2) 1572 %4 = bitcast <16 x i16> %3 to <4 x i64> 1573 ret <4 x i64> %4 1574} 1575 1576define <2 x i64> @test_mm_mask_shldv_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 1577; X86-LABEL: test_mm_mask_shldv_epi16: 1578; X86: # %bb.0: # %entry 1579; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1580; X86-NEXT: kmovd %eax, %k1 1581; X86-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 {%k1} 1582; X86-NEXT: retl 1583; 1584; X64-LABEL: test_mm_mask_shldv_epi16: 1585; X64: # %bb.0: # %entry 1586; X64-NEXT: kmovd %edi, %k1 1587; X64-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 {%k1} 1588; X64-NEXT: retq 1589entry: 1590 %0 = bitcast <2 x i64> %__S to <8 x i16> 1591 %1 = bitcast <2 x i64> %__A to <8 x i16> 1592 %2 = bitcast <2 x i64> %__B to <8 x i16> 1593 %3 = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) 1594 %4 = bitcast i8 %__U to <8 x i1> 1595 %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> %0 1596 %6 = bitcast <8 x i16> %5 to <2 x i64> 1597 ret <2 x i64> %6 1598} 1599 1600define <2 x i64> @test_mm_maskz_shldv_epi16(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1601; X86-LABEL: test_mm_maskz_shldv_epi16: 1602; X86: # %bb.0: # %entry 1603; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1604; X86-NEXT: kmovd %eax, %k1 1605; X86-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 {%k1} {z} 1606; X86-NEXT: retl 1607; 1608; X64-LABEL: test_mm_maskz_shldv_epi16: 1609; X64: # %bb.0: # %entry 1610; X64-NEXT: kmovd %edi, %k1 1611; X64-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 {%k1} {z} 1612; X64-NEXT: retq 1613entry: 1614 %0 = bitcast <2 x i64> %__S to <8 x i16> 1615 %1 = bitcast <2 x i64> %__A to <8 x i16> 1616 %2 = bitcast <2 x i64> %__B to <8 x i16> 1617 %3 = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) 1618 %4 = bitcast i8 %__U to <8 x i1> 1619 %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> zeroinitializer 1620 %6 = bitcast <8 x i16> %5 to <2 x i64> 1621 ret <2 x i64> %6 1622} 1623 1624define <2 x i64> @test_mm_shldv_epi16(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1625; CHECK-LABEL: test_mm_shldv_epi16: 1626; CHECK: # %bb.0: # %entry 1627; CHECK-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 1628; CHECK-NEXT: ret{{[l|q]}} 1629entry: 1630 %0 = bitcast <2 x i64> %__S to <8 x i16> 1631 %1 = bitcast <2 x i64> %__A to <8 x i16> 1632 %2 = bitcast <2 x i64> %__B to <8 x i16> 1633 %3 = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) 1634 %4 = bitcast <8 x i16> %3 to <2 x i64> 1635 ret <2 x i64> %4 1636} 1637 1638define <4 x i64> @test_mm256_mask_shrdv_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 1639; X86-LABEL: test_mm256_mask_shrdv_epi64: 1640; X86: # %bb.0: # %entry 1641; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1642; X86-NEXT: kmovd %eax, %k1 1643; X86-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} 1644; X86-NEXT: retl 1645; 1646; X64-LABEL: test_mm256_mask_shrdv_epi64: 1647; X64: # %bb.0: # %entry 1648; X64-NEXT: kmovd %edi, %k1 1649; X64-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} 1650; X64-NEXT: retq 1651entry: 1652 %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__A, <4 x i64> %__S, <4 x i64> %__B) 1653 %1 = bitcast i8 %__U to <8 x i1> 1654 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1655 %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__S 1656 ret <4 x i64> %2 1657} 1658 1659define <4 x i64> @test_mm256_maskz_shrdv_epi64(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1660; X86-LABEL: test_mm256_maskz_shrdv_epi64: 1661; X86: # %bb.0: # %entry 1662; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1663; X86-NEXT: kmovd %eax, %k1 1664; X86-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} {z} 1665; X86-NEXT: retl 1666; 1667; X64-LABEL: test_mm256_maskz_shrdv_epi64: 1668; X64: # %bb.0: # %entry 1669; X64-NEXT: kmovd %edi, %k1 1670; X64-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} {z} 1671; X64-NEXT: retq 1672entry: 1673 %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__A, <4 x i64> %__S, <4 x i64> %__B) 1674 %1 = bitcast i8 %__U to <8 x i1> 1675 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1676 %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer 1677 ret <4 x i64> %2 1678} 1679 1680define <4 x i64> @test_mm256_shrdv_epi64(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1681; CHECK-LABEL: test_mm256_shrdv_epi64: 1682; CHECK: # %bb.0: # %entry 1683; CHECK-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 1684; CHECK-NEXT: ret{{[l|q]}} 1685entry: 1686 %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__A, <4 x i64> %__S, <4 x i64> %__B) 1687 ret <4 x i64> %0 1688} 1689 1690define <2 x i64> @test_mm_mask_shrdv_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 1691; X86-LABEL: test_mm_mask_shrdv_epi64: 1692; X86: # %bb.0: # %entry 1693; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1694; X86-NEXT: kmovd %eax, %k1 1695; X86-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} 1696; X86-NEXT: retl 1697; 1698; X64-LABEL: test_mm_mask_shrdv_epi64: 1699; X64: # %bb.0: # %entry 1700; X64-NEXT: kmovd %edi, %k1 1701; X64-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} 1702; X64-NEXT: retq 1703entry: 1704 %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__A, <2 x i64> %__S, <2 x i64> %__B) 1705 %1 = bitcast i8 %__U to <8 x i1> 1706 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 1707 %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__S 1708 ret <2 x i64> %2 1709} 1710 1711define <2 x i64> @test_mm_maskz_shrdv_epi64(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1712; X86-LABEL: test_mm_maskz_shrdv_epi64: 1713; X86: # %bb.0: # %entry 1714; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1715; X86-NEXT: kmovd %eax, %k1 1716; X86-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} {z} 1717; X86-NEXT: retl 1718; 1719; X64-LABEL: test_mm_maskz_shrdv_epi64: 1720; X64: # %bb.0: # %entry 1721; X64-NEXT: kmovd %edi, %k1 1722; X64-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} {z} 1723; X64-NEXT: retq 1724entry: 1725 %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__A, <2 x i64> %__S, <2 x i64> %__B) 1726 %1 = bitcast i8 %__U to <8 x i1> 1727 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 1728 %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer 1729 ret <2 x i64> %2 1730} 1731 1732define <2 x i64> @test_mm_shrdv_epi64(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1733; CHECK-LABEL: test_mm_shrdv_epi64: 1734; CHECK: # %bb.0: # %entry 1735; CHECK-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 1736; CHECK-NEXT: ret{{[l|q]}} 1737entry: 1738 %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__A, <2 x i64> %__S, <2 x i64> %__B) 1739 ret <2 x i64> %0 1740} 1741 1742define <4 x i64> @test_mm256_mask_shrdv_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 1743; X86-LABEL: test_mm256_mask_shrdv_epi32: 1744; X86: # %bb.0: # %entry 1745; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1746; X86-NEXT: kmovd %eax, %k1 1747; X86-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} 1748; X86-NEXT: retl 1749; 1750; X64-LABEL: test_mm256_mask_shrdv_epi32: 1751; X64: # %bb.0: # %entry 1752; X64-NEXT: kmovd %edi, %k1 1753; X64-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} 1754; X64-NEXT: retq 1755entry: 1756 %0 = bitcast <4 x i64> %__S to <8 x i32> 1757 %1 = bitcast <4 x i64> %__A to <8 x i32> 1758 %2 = bitcast <4 x i64> %__B to <8 x i32> 1759 %3 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %1, <8 x i32> %0, <8 x i32> %2) 1760 %4 = bitcast i8 %__U to <8 x i1> 1761 %5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> %0 1762 %6 = bitcast <8 x i32> %5 to <4 x i64> 1763 ret <4 x i64> %6 1764} 1765 1766define <4 x i64> @test_mm256_maskz_shrdv_epi32(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1767; X86-LABEL: test_mm256_maskz_shrdv_epi32: 1768; X86: # %bb.0: # %entry 1769; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1770; X86-NEXT: kmovd %eax, %k1 1771; X86-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} {z} 1772; X86-NEXT: retl 1773; 1774; X64-LABEL: test_mm256_maskz_shrdv_epi32: 1775; X64: # %bb.0: # %entry 1776; X64-NEXT: kmovd %edi, %k1 1777; X64-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} {z} 1778; X64-NEXT: retq 1779entry: 1780 %0 = bitcast <4 x i64> %__S to <8 x i32> 1781 %1 = bitcast <4 x i64> %__A to <8 x i32> 1782 %2 = bitcast <4 x i64> %__B to <8 x i32> 1783 %3 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %1, <8 x i32> %0, <8 x i32> %2) 1784 %4 = bitcast i8 %__U to <8 x i1> 1785 %5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> zeroinitializer 1786 %6 = bitcast <8 x i32> %5 to <4 x i64> 1787 ret <4 x i64> %6 1788} 1789 1790define <4 x i64> @test_mm256_shrdv_epi32(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1791; CHECK-LABEL: test_mm256_shrdv_epi32: 1792; CHECK: # %bb.0: # %entry 1793; CHECK-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 1794; CHECK-NEXT: ret{{[l|q]}} 1795entry: 1796 %0 = bitcast <4 x i64> %__S to <8 x i32> 1797 %1 = bitcast <4 x i64> %__A to <8 x i32> 1798 %2 = bitcast <4 x i64> %__B to <8 x i32> 1799 %3 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %1, <8 x i32> %0, <8 x i32> %2) 1800 %4 = bitcast <8 x i32> %3 to <4 x i64> 1801 ret <4 x i64> %4 1802} 1803 1804define <2 x i64> @test_mm_mask_shrdv_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 1805; X86-LABEL: test_mm_mask_shrdv_epi32: 1806; X86: # %bb.0: # %entry 1807; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1808; X86-NEXT: kmovd %eax, %k1 1809; X86-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} 1810; X86-NEXT: retl 1811; 1812; X64-LABEL: test_mm_mask_shrdv_epi32: 1813; X64: # %bb.0: # %entry 1814; X64-NEXT: kmovd %edi, %k1 1815; X64-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} 1816; X64-NEXT: retq 1817entry: 1818 %0 = bitcast <2 x i64> %__S to <4 x i32> 1819 %1 = bitcast <2 x i64> %__A to <4 x i32> 1820 %2 = bitcast <2 x i64> %__B to <4 x i32> 1821 %3 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %1, <4 x i32> %0, <4 x i32> %2) 1822 %4 = bitcast i8 %__U to <8 x i1> 1823 %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1824 %5 = select <4 x i1> %extract.i, <4 x i32> %3, <4 x i32> %0 1825 %6 = bitcast <4 x i32> %5 to <2 x i64> 1826 ret <2 x i64> %6 1827} 1828 1829define <2 x i64> @test_mm_maskz_shrdv_epi32(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1830; X86-LABEL: test_mm_maskz_shrdv_epi32: 1831; X86: # %bb.0: # %entry 1832; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1833; X86-NEXT: kmovd %eax, %k1 1834; X86-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} {z} 1835; X86-NEXT: retl 1836; 1837; X64-LABEL: test_mm_maskz_shrdv_epi32: 1838; X64: # %bb.0: # %entry 1839; X64-NEXT: kmovd %edi, %k1 1840; X64-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} {z} 1841; X64-NEXT: retq 1842entry: 1843 %0 = bitcast <2 x i64> %__S to <4 x i32> 1844 %1 = bitcast <2 x i64> %__A to <4 x i32> 1845 %2 = bitcast <2 x i64> %__B to <4 x i32> 1846 %3 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %1, <4 x i32> %0, <4 x i32> %2) 1847 %4 = bitcast i8 %__U to <8 x i1> 1848 %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1849 %5 = select <4 x i1> %extract.i, <4 x i32> %3, <4 x i32> zeroinitializer 1850 %6 = bitcast <4 x i32> %5 to <2 x i64> 1851 ret <2 x i64> %6 1852} 1853 1854define <2 x i64> @test_mm_shrdv_epi32(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1855; CHECK-LABEL: test_mm_shrdv_epi32: 1856; CHECK: # %bb.0: # %entry 1857; CHECK-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 1858; CHECK-NEXT: ret{{[l|q]}} 1859entry: 1860 %0 = bitcast <2 x i64> %__S to <4 x i32> 1861 %1 = bitcast <2 x i64> %__A to <4 x i32> 1862 %2 = bitcast <2 x i64> %__B to <4 x i32> 1863 %3 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %1, <4 x i32> %0, <4 x i32> %2) 1864 %4 = bitcast <4 x i32> %3 to <2 x i64> 1865 ret <2 x i64> %4 1866} 1867 1868define <4 x i64> @test_mm256_mask_shrdv_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 1869; X86-LABEL: test_mm256_mask_shrdv_epi16: 1870; X86: # %bb.0: # %entry 1871; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 1872; X86-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 {%k1} 1873; X86-NEXT: retl 1874; 1875; X64-LABEL: test_mm256_mask_shrdv_epi16: 1876; X64: # %bb.0: # %entry 1877; X64-NEXT: kmovd %edi, %k1 1878; X64-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 {%k1} 1879; X64-NEXT: retq 1880entry: 1881 %0 = bitcast <4 x i64> %__S to <16 x i16> 1882 %1 = bitcast <4 x i64> %__A to <16 x i16> 1883 %2 = bitcast <4 x i64> %__B to <16 x i16> 1884 %3 = tail call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %1, <16 x i16> %0, <16 x i16> %2) 1885 %4 = bitcast i16 %__U to <16 x i1> 1886 %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> %0 1887 %6 = bitcast <16 x i16> %5 to <4 x i64> 1888 ret <4 x i64> %6 1889} 1890 1891define <4 x i64> @test_mm256_maskz_shrdv_epi16(i16 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1892; X86-LABEL: test_mm256_maskz_shrdv_epi16: 1893; X86: # %bb.0: # %entry 1894; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 1895; X86-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 {%k1} {z} 1896; X86-NEXT: retl 1897; 1898; X64-LABEL: test_mm256_maskz_shrdv_epi16: 1899; X64: # %bb.0: # %entry 1900; X64-NEXT: kmovd %edi, %k1 1901; X64-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 {%k1} {z} 1902; X64-NEXT: retq 1903entry: 1904 %0 = bitcast <4 x i64> %__S to <16 x i16> 1905 %1 = bitcast <4 x i64> %__A to <16 x i16> 1906 %2 = bitcast <4 x i64> %__B to <16 x i16> 1907 %3 = tail call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %1, <16 x i16> %0, <16 x i16> %2) 1908 %4 = bitcast i16 %__U to <16 x i1> 1909 %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> zeroinitializer 1910 %6 = bitcast <16 x i16> %5 to <4 x i64> 1911 ret <4 x i64> %6 1912} 1913 1914define <4 x i64> @test_mm256_shrdv_epi16(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { 1915; CHECK-LABEL: test_mm256_shrdv_epi16: 1916; CHECK: # %bb.0: # %entry 1917; CHECK-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 1918; CHECK-NEXT: ret{{[l|q]}} 1919entry: 1920 %0 = bitcast <4 x i64> %__S to <16 x i16> 1921 %1 = bitcast <4 x i64> %__A to <16 x i16> 1922 %2 = bitcast <4 x i64> %__B to <16 x i16> 1923 %3 = tail call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %1, <16 x i16> %0, <16 x i16> %2) 1924 %4 = bitcast <16 x i16> %3 to <4 x i64> 1925 ret <4 x i64> %4 1926} 1927 1928define <2 x i64> @test_mm_mask_shrdv_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 1929; X86-LABEL: test_mm_mask_shrdv_epi16: 1930; X86: # %bb.0: # %entry 1931; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1932; X86-NEXT: kmovd %eax, %k1 1933; X86-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} 1934; X86-NEXT: retl 1935; 1936; X64-LABEL: test_mm_mask_shrdv_epi16: 1937; X64: # %bb.0: # %entry 1938; X64-NEXT: kmovd %edi, %k1 1939; X64-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} 1940; X64-NEXT: retq 1941entry: 1942 %0 = bitcast <2 x i64> %__S to <8 x i16> 1943 %1 = bitcast <2 x i64> %__A to <8 x i16> 1944 %2 = bitcast <2 x i64> %__B to <8 x i16> 1945 %3 = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %1, <8 x i16> %0, <8 x i16> %2) 1946 %4 = bitcast i8 %__U to <8 x i1> 1947 %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> %0 1948 %6 = bitcast <8 x i16> %5 to <2 x i64> 1949 ret <2 x i64> %6 1950} 1951 1952define <2 x i64> @test_mm_maskz_shrdv_epi16(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1953; X86-LABEL: test_mm_maskz_shrdv_epi16: 1954; X86: # %bb.0: # %entry 1955; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1956; X86-NEXT: kmovd %eax, %k1 1957; X86-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} {z} 1958; X86-NEXT: retl 1959; 1960; X64-LABEL: test_mm_maskz_shrdv_epi16: 1961; X64: # %bb.0: # %entry 1962; X64-NEXT: kmovd %edi, %k1 1963; X64-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} {z} 1964; X64-NEXT: retq 1965entry: 1966 %0 = bitcast <2 x i64> %__S to <8 x i16> 1967 %1 = bitcast <2 x i64> %__A to <8 x i16> 1968 %2 = bitcast <2 x i64> %__B to <8 x i16> 1969 %3 = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %1, <8 x i16> %0, <8 x i16> %2) 1970 %4 = bitcast i8 %__U to <8 x i1> 1971 %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> zeroinitializer 1972 %6 = bitcast <8 x i16> %5 to <2 x i64> 1973 ret <2 x i64> %6 1974} 1975 1976define <2 x i64> @test_mm_shrdv_epi16(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { 1977; CHECK-LABEL: test_mm_shrdv_epi16: 1978; CHECK: # %bb.0: # %entry 1979; CHECK-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 1980; CHECK-NEXT: ret{{[l|q]}} 1981entry: 1982 %0 = bitcast <2 x i64> %__S to <8 x i16> 1983 %1 = bitcast <2 x i64> %__A to <8 x i16> 1984 %2 = bitcast <2 x i64> %__B to <8 x i16> 1985 %3 = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %1, <8 x i16> %0, <8 x i16> %2) 1986 %4 = bitcast <8 x i16> %3 to <2 x i64> 1987 ret <2 x i64> %4 1988} 1989 1990declare <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16>, <8 x i16>, i8) 1991declare <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8>, <16 x i8>, i16) 1992declare void @llvm.masked.compressstore.v8i16(<8 x i16>, ptr, <8 x i1>) 1993declare void @llvm.masked.compressstore.v16i8(<16 x i8>, ptr, <16 x i1>) 1994declare <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16>, <8 x i16>, i8) 1995declare <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8>, <16 x i8>, i16) 1996declare <8 x i16> @llvm.masked.expandload.v8i16(ptr, <8 x i1>, <8 x i16>) 1997declare <16 x i8> @llvm.masked.expandload.v16i8(ptr, <16 x i1>, <16 x i8>) 1998declare <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16>, <16 x i16>, i16) 1999declare <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8>, <32 x i8>, i32) 2000declare void @llvm.masked.compressstore.v16i16(<16 x i16>, ptr, <16 x i1>) 2001declare void @llvm.masked.compressstore.v32i8(<32 x i8>, ptr, <32 x i1>) 2002declare <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16>, <16 x i16>, i16) 2003declare <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8>, <32 x i8>, i32) 2004declare <16 x i16> @llvm.masked.expandload.v16i16(ptr, <16 x i1>, <16 x i16>) 2005declare <32 x i8> @llvm.masked.expandload.v32i8(ptr, <32 x i1>, <32 x i8>) 2006