1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5define <8 x i16> @test_mask_expand_load_w_128(ptr %addr, <8 x i16> %data, i8 %mask) { 6; X86-LABEL: test_mask_expand_load_w_128: 7; X86: # %bb.0: 8; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 10; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 11; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x00] 12; X86-NEXT: retl # encoding: [0xc3] 13; 14; X64-LABEL: test_mask_expand_load_w_128: 15; X64: # %bb.0: 16; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 17; X64-NEXT: vpexpandw (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x07] 18; X64-NEXT: retq # encoding: [0xc3] 19 %res = call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(ptr %addr, <8 x i16> %data, i8 %mask) 20 ret <8 x i16> %res 21} 22 23define <8 x i16> @test_maskz_expand_load_w_128(ptr %addr, i8 %mask) { 24; X86-LABEL: test_maskz_expand_load_w_128: 25; X86: # %bb.0: 26; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 27; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 28; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 29; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0x00] 30; X86-NEXT: retl # encoding: [0xc3] 31; 32; X64-LABEL: test_maskz_expand_load_w_128: 33; X64: # %bb.0: 34; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 35; X64-NEXT: vpexpandw (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0x07] 36; X64-NEXT: retq # encoding: [0xc3] 37 %res = call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(ptr %addr, <8 x i16> zeroinitializer, i8 %mask) 38 ret <8 x i16> %res 39} 40 41declare <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(ptr %addr, <8 x i16> %data, i8 %mask) 42 43define <8 x i16> @test_expand_load_w_128(ptr %addr, <8 x i16> %data) { 44; X86-LABEL: test_expand_load_w_128: 45; X86: # %bb.0: 46; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 47; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 48; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x00] 49; X86-NEXT: retl # encoding: [0xc3] 50; 51; X64-LABEL: test_expand_load_w_128: 52; X64: # %bb.0: 53; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 54; X64-NEXT: vpexpandw (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x07] 55; X64-NEXT: retq # encoding: [0xc3] 56 %res = call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(ptr %addr, <8 x i16> %data, i8 -1) 57 ret <8 x i16> %res 58} 59 60define <8 x i16> @test_expand_w_128(<8 x i16> %data) { 61; CHECK-LABEL: test_expand_w_128: 62; CHECK: # %bb.0: 63; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 64 %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> undef, i8 -1) 65 ret <8 x i16> %res 66} 67 68define <8 x i16> @test_mask_expand_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) { 69; X86-LABEL: test_mask_expand_w_128: 70; X86: # %bb.0: 71; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 72; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 73; X86-NEXT: vpexpandw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0xc8] 74; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 75; X86-NEXT: retl # encoding: [0xc3] 76; 77; X64-LABEL: test_mask_expand_w_128: 78; X64: # %bb.0: 79; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 80; X64-NEXT: vpexpandw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0xc8] 81; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 82; X64-NEXT: retq # encoding: [0xc3] 83 %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) 84 ret <8 x i16> %res 85} 86 87define <8 x i16> @test_maskz_expand_w_128(<8 x i16> %data, i8 %mask) { 88; X86-LABEL: test_maskz_expand_w_128: 89; X86: # %bb.0: 90; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 91; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 92; X86-NEXT: vpexpandw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0xc0] 93; X86-NEXT: retl # encoding: [0xc3] 94; 95; X64-LABEL: test_maskz_expand_w_128: 96; X64: # %bb.0: 97; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 98; X64-NEXT: vpexpandw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0xc0] 99; X64-NEXT: retq # encoding: [0xc3] 100 %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> zeroinitializer, i8 %mask) 101 ret <8 x i16> %res 102} 103 104declare <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> %src0, i8 %mask) 105 106define <16 x i8> @test_mask_expand_load_b_128(ptr %addr, <16 x i8> %data, i16 %mask) { 107; X86-LABEL: test_mask_expand_load_b_128: 108; X86: # %bb.0: 109; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 110; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 111; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x00] 112; X86-NEXT: retl # encoding: [0xc3] 113; 114; X64-LABEL: test_mask_expand_load_b_128: 115; X64: # %bb.0: 116; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 117; X64-NEXT: vpexpandb (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x07] 118; X64-NEXT: retq # encoding: [0xc3] 119 %res = call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(ptr %addr, <16 x i8> %data, i16 %mask) 120 ret <16 x i8> %res 121} 122 123define <16 x i8> @test_maskz_expand_load_b_128(ptr %addr, i16 %mask) { 124; X86-LABEL: test_maskz_expand_load_b_128: 125; X86: # %bb.0: 126; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 127; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 128; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0x00] 129; X86-NEXT: retl # encoding: [0xc3] 130; 131; X64-LABEL: test_maskz_expand_load_b_128: 132; X64: # %bb.0: 133; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 134; X64-NEXT: vpexpandb (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0x07] 135; X64-NEXT: retq # encoding: [0xc3] 136 %res = call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(ptr %addr, <16 x i8> zeroinitializer, i16 %mask) 137 ret <16 x i8> %res 138} 139 140declare <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(ptr %addr, <16 x i8> %data, i16 %mask) 141 142define <16 x i8> @test_expand_load_b_128(ptr %addr, <16 x i8> %data) { 143; X86-LABEL: test_expand_load_b_128: 144; X86: # %bb.0: 145; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 146; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 147; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x00] 148; X86-NEXT: retl # encoding: [0xc3] 149; 150; X64-LABEL: test_expand_load_b_128: 151; X64: # %bb.0: 152; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 153; X64-NEXT: vpexpandb (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x07] 154; X64-NEXT: retq # encoding: [0xc3] 155 %res = call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(ptr %addr, <16 x i8> %data, i16 -1) 156 ret <16 x i8> %res 157} 158 159define <16 x i8> @test_expand_b_128(<16 x i8> %data) { 160; CHECK-LABEL: test_expand_b_128: 161; CHECK: # %bb.0: 162; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 163 %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> undef, i16 -1) 164 ret <16 x i8> %res 165} 166 167define <16 x i8> @test_mask_expand_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) { 168; X86-LABEL: test_mask_expand_b_128: 169; X86: # %bb.0: 170; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 171; X86-NEXT: vpexpandb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0xc8] 172; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 173; X86-NEXT: retl # encoding: [0xc3] 174; 175; X64-LABEL: test_mask_expand_b_128: 176; X64: # %bb.0: 177; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 178; X64-NEXT: vpexpandb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0xc8] 179; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 180; X64-NEXT: retq # encoding: [0xc3] 181 %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) 182 ret <16 x i8> %res 183} 184 185define <16 x i8> @test_maskz_expand_b_128(<16 x i8> %data, i16 %mask) { 186; X86-LABEL: test_maskz_expand_b_128: 187; X86: # %bb.0: 188; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 189; X86-NEXT: vpexpandb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0xc0] 190; X86-NEXT: retl # encoding: [0xc3] 191; 192; X64-LABEL: test_maskz_expand_b_128: 193; X64: # %bb.0: 194; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 195; X64-NEXT: vpexpandb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0xc0] 196; X64-NEXT: retq # encoding: [0xc3] 197 %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> zeroinitializer, i16 %mask) 198 ret <16 x i8> %res 199} 200 201declare <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> %src0, i16 %mask) 202 203define void @test_mask_compress_store_w_128(ptr %addr, <8 x i16> %data, i8 %mask) { 204; X86-LABEL: test_mask_compress_store_w_128: 205; X86: # %bb.0: 206; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 207; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 208; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 209; X86-NEXT: vpcompressw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x00] 210; X86-NEXT: retl # encoding: [0xc3] 211; 212; X64-LABEL: test_mask_compress_store_w_128: 213; X64: # %bb.0: 214; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 215; X64-NEXT: vpcompressw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x07] 216; X64-NEXT: retq # encoding: [0xc3] 217 call void @llvm.x86.avx512.mask.compress.store.w.128(ptr %addr, <8 x i16> %data, i8 %mask) 218 ret void 219} 220 221declare void @llvm.x86.avx512.mask.compress.store.w.128(ptr %addr, <8 x i16> %data, i8 %mask) 222 223define void @test_compress_store_w_128(ptr %addr, <8 x i16> %data) { 224; X86-LABEL: test_compress_store_w_128: 225; X86: # %bb.0: 226; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 227; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 228; X86-NEXT: vpcompressw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x00] 229; X86-NEXT: retl # encoding: [0xc3] 230; 231; X64-LABEL: test_compress_store_w_128: 232; X64: # %bb.0: 233; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 234; X64-NEXT: vpcompressw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x07] 235; X64-NEXT: retq # encoding: [0xc3] 236 call void @llvm.x86.avx512.mask.compress.store.w.128(ptr %addr, <8 x i16> %data, i8 -1) 237 ret void 238} 239 240define <8 x i16> @test_mask_compress_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) { 241; X86-LABEL: test_mask_compress_w_128: 242; X86: # %bb.0: 243; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 244; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 245; X86-NEXT: vpcompressw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0xc1] 246; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 247; X86-NEXT: retl # encoding: [0xc3] 248; 249; X64-LABEL: test_mask_compress_w_128: 250; X64: # %bb.0: 251; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 252; X64-NEXT: vpcompressw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0xc1] 253; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 254; X64-NEXT: retq # encoding: [0xc3] 255 %res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) 256 ret <8 x i16> %res 257} 258 259define <8 x i16> @test_maskz_compress_w_128(<8 x i16> %data, i8 %mask) { 260; X86-LABEL: test_maskz_compress_w_128: 261; X86: # %bb.0: 262; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 263; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 264; X86-NEXT: vpcompressw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x63,0xc0] 265; X86-NEXT: retl # encoding: [0xc3] 266; 267; X64-LABEL: test_maskz_compress_w_128: 268; X64: # %bb.0: 269; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 270; X64-NEXT: vpcompressw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x63,0xc0] 271; X64-NEXT: retq # encoding: [0xc3] 272 %res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> zeroinitializer, i8 %mask) 273 ret <8 x i16> %res 274} 275 276define <8 x i16> @test_compress_w_128(<8 x i16> %data) { 277; CHECK-LABEL: test_compress_w_128: 278; CHECK: # %bb.0: 279; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 280 %res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> undef, i8 -1) 281 ret <8 x i16> %res 282} 283 284declare <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> %src0, i8 %mask) 285 286define void @test_mask_compress_store_b_128(ptr %addr, <16 x i8> %data, i16 %mask) { 287; X86-LABEL: test_mask_compress_store_b_128: 288; X86: # %bb.0: 289; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 290; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 291; X86-NEXT: vpcompressb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x00] 292; X86-NEXT: retl # encoding: [0xc3] 293; 294; X64-LABEL: test_mask_compress_store_b_128: 295; X64: # %bb.0: 296; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 297; X64-NEXT: vpcompressb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x07] 298; X64-NEXT: retq # encoding: [0xc3] 299 call void @llvm.x86.avx512.mask.compress.store.b.128(ptr %addr, <16 x i8> %data, i16 %mask) 300 ret void 301} 302 303declare void @llvm.x86.avx512.mask.compress.store.b.128(ptr %addr, <16 x i8> %data, i16 %mask) 304 305define void @test_compress_store_b_128(ptr %addr, <16 x i8> %data) { 306; X86-LABEL: test_compress_store_b_128: 307; X86: # %bb.0: 308; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 309; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 310; X86-NEXT: vpcompressb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x00] 311; X86-NEXT: retl # encoding: [0xc3] 312; 313; X64-LABEL: test_compress_store_b_128: 314; X64: # %bb.0: 315; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 316; X64-NEXT: vpcompressb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x07] 317; X64-NEXT: retq # encoding: [0xc3] 318 call void @llvm.x86.avx512.mask.compress.store.b.128(ptr %addr, <16 x i8> %data, i16 -1) 319 ret void 320} 321 322define <16 x i8> @test_mask_compress_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) { 323; X86-LABEL: test_mask_compress_b_128: 324; X86: # %bb.0: 325; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 326; X86-NEXT: vpcompressb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0xc1] 327; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 328; X86-NEXT: retl # encoding: [0xc3] 329; 330; X64-LABEL: test_mask_compress_b_128: 331; X64: # %bb.0: 332; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 333; X64-NEXT: vpcompressb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0xc1] 334; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 335; X64-NEXT: retq # encoding: [0xc3] 336 %res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) 337 ret <16 x i8> %res 338} 339 340define <16 x i8> @test_maskz_compress_b_128(<16 x i8> %data, i16 %mask) { 341; X86-LABEL: test_maskz_compress_b_128: 342; X86: # %bb.0: 343; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 344; X86-NEXT: vpcompressb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x63,0xc0] 345; X86-NEXT: retl # encoding: [0xc3] 346; 347; X64-LABEL: test_maskz_compress_b_128: 348; X64: # %bb.0: 349; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 350; X64-NEXT: vpcompressb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x63,0xc0] 351; X64-NEXT: retq # encoding: [0xc3] 352 %res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> zeroinitializer, i16 %mask) 353 ret <16 x i8> %res 354} 355 356define <16 x i8> @test_compress_b_128(<16 x i8> %data) { 357; CHECK-LABEL: test_compress_b_128: 358; CHECK: # %bb.0: 359; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 360 %res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> undef, i16 -1) 361 ret <16 x i8> %res 362} 363 364declare <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> %src0, i16 %mask) 365 366define <16 x i16> @test_mask_expand_load_w_256(ptr %addr, <16 x i16> %data, i16 %mask) { 367; X86-LABEL: test_mask_expand_load_w_256: 368; X86: # %bb.0: 369; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 370; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 371; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x00] 372; X86-NEXT: retl # encoding: [0xc3] 373; 374; X64-LABEL: test_mask_expand_load_w_256: 375; X64: # %bb.0: 376; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 377; X64-NEXT: vpexpandw (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x07] 378; X64-NEXT: retq # encoding: [0xc3] 379 %res = call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(ptr %addr, <16 x i16> %data, i16 %mask) 380 ret <16 x i16> %res 381} 382 383define <16 x i16> @test_maskz_expand_load_w_256(ptr %addr, i16 %mask) { 384; X86-LABEL: test_maskz_expand_load_w_256: 385; X86: # %bb.0: 386; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 387; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 388; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0x00] 389; X86-NEXT: retl # encoding: [0xc3] 390; 391; X64-LABEL: test_maskz_expand_load_w_256: 392; X64: # %bb.0: 393; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 394; X64-NEXT: vpexpandw (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0x07] 395; X64-NEXT: retq # encoding: [0xc3] 396 %res = call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(ptr %addr, <16 x i16> zeroinitializer, i16 %mask) 397 ret <16 x i16> %res 398} 399 400declare <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(ptr %addr, <16 x i16> %data, i16 %mask) 401 402define <16 x i16> @test_expand_load_w_256(ptr %addr, <16 x i16> %data) { 403; X86-LABEL: test_expand_load_w_256: 404; X86: # %bb.0: 405; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 406; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 407; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x00] 408; X86-NEXT: retl # encoding: [0xc3] 409; 410; X64-LABEL: test_expand_load_w_256: 411; X64: # %bb.0: 412; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 413; X64-NEXT: vpexpandw (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x07] 414; X64-NEXT: retq # encoding: [0xc3] 415 %res = call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(ptr %addr, <16 x i16> %data, i16 -1) 416 ret <16 x i16> %res 417} 418 419define <16 x i16> @test_expand_w_256(<16 x i16> %data) { 420; CHECK-LABEL: test_expand_w_256: 421; CHECK: # %bb.0: 422; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 423 %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> undef, i16 -1) 424 ret <16 x i16> %res 425} 426 427define <16 x i16> @test_mask_expand_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) { 428; X86-LABEL: test_mask_expand_w_256: 429; X86: # %bb.0: 430; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 431; X86-NEXT: vpexpandw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0xc8] 432; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 433; X86-NEXT: retl # encoding: [0xc3] 434; 435; X64-LABEL: test_mask_expand_w_256: 436; X64: # %bb.0: 437; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 438; X64-NEXT: vpexpandw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0xc8] 439; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 440; X64-NEXT: retq # encoding: [0xc3] 441 %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) 442 ret <16 x i16> %res 443} 444 445define <16 x i16> @test_maskz_expand_w_256(<16 x i16> %data, i16 %mask) { 446; X86-LABEL: test_maskz_expand_w_256: 447; X86: # %bb.0: 448; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 449; X86-NEXT: vpexpandw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0xc0] 450; X86-NEXT: retl # encoding: [0xc3] 451; 452; X64-LABEL: test_maskz_expand_w_256: 453; X64: # %bb.0: 454; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 455; X64-NEXT: vpexpandw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0xc0] 456; X64-NEXT: retq # encoding: [0xc3] 457 %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> zeroinitializer, i16 %mask) 458 ret <16 x i16> %res 459} 460 461declare <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> %src0, i16 %mask) 462 463define <32 x i8> @test_mask_expand_load_b_256(ptr %addr, <32 x i8> %data, i32 %mask) { 464; X86-LABEL: test_mask_expand_load_b_256: 465; X86: # %bb.0: 466; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 467; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 468; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x00] 469; X86-NEXT: retl # encoding: [0xc3] 470; 471; X64-LABEL: test_mask_expand_load_b_256: 472; X64: # %bb.0: 473; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 474; X64-NEXT: vpexpandb (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x07] 475; X64-NEXT: retq # encoding: [0xc3] 476 %res = call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(ptr %addr, <32 x i8> %data, i32 %mask) 477 ret <32 x i8> %res 478} 479 480define <32 x i8> @test_maskz_expand_load_b_256(ptr %addr, i32 %mask) { 481; X86-LABEL: test_maskz_expand_load_b_256: 482; X86: # %bb.0: 483; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 484; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 485; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0x00] 486; X86-NEXT: retl # encoding: [0xc3] 487; 488; X64-LABEL: test_maskz_expand_load_b_256: 489; X64: # %bb.0: 490; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 491; X64-NEXT: vpexpandb (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0x07] 492; X64-NEXT: retq # encoding: [0xc3] 493 %res = call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(ptr %addr, <32 x i8> zeroinitializer, i32 %mask) 494 ret <32 x i8> %res 495} 496 497declare <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(ptr %addr, <32 x i8> %data, i32 %mask) 498 499define <32 x i8> @test_expand_load_b_256(ptr %addr, <32 x i8> %data) { 500; X86-LABEL: test_expand_load_b_256: 501; X86: # %bb.0: 502; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 503; X86-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 504; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x00] 505; X86-NEXT: retl # encoding: [0xc3] 506; 507; X64-LABEL: test_expand_load_b_256: 508; X64: # %bb.0: 509; X64-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 510; X64-NEXT: vpexpandb (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x07] 511; X64-NEXT: retq # encoding: [0xc3] 512 %res = call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(ptr %addr, <32 x i8> %data, i32 -1) 513 ret <32 x i8> %res 514} 515 516define <32 x i8> @test_expand_b_256(<32 x i8> %data) { 517; CHECK-LABEL: test_expand_b_256: 518; CHECK: # %bb.0: 519; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 520 %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> undef, i32 -1) 521 ret <32 x i8> %res 522} 523 524define <32 x i8> @test_mask_expand_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) { 525; X86-LABEL: test_mask_expand_b_256: 526; X86: # %bb.0: 527; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 528; X86-NEXT: vpexpandb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0xc8] 529; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 530; X86-NEXT: retl # encoding: [0xc3] 531; 532; X64-LABEL: test_mask_expand_b_256: 533; X64: # %bb.0: 534; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 535; X64-NEXT: vpexpandb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0xc8] 536; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 537; X64-NEXT: retq # encoding: [0xc3] 538 %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) 539 ret <32 x i8> %res 540} 541 542define <32 x i8> @test_maskz_expand_b_256(<32 x i8> %data, i32 %mask) { 543; X86-LABEL: test_maskz_expand_b_256: 544; X86: # %bb.0: 545; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 546; X86-NEXT: vpexpandb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0xc0] 547; X86-NEXT: retl # encoding: [0xc3] 548; 549; X64-LABEL: test_maskz_expand_b_256: 550; X64: # %bb.0: 551; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 552; X64-NEXT: vpexpandb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0xc0] 553; X64-NEXT: retq # encoding: [0xc3] 554 %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> zeroinitializer, i32 %mask) 555 ret <32 x i8> %res 556} 557 558declare <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> %src0, i32 %mask) 559 560define void @test_mask_compress_store_w_256(ptr %addr, <16 x i16> %data, i16 %mask) { 561; X86-LABEL: test_mask_compress_store_w_256: 562; X86: # %bb.0: 563; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 564; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 565; X86-NEXT: vpcompressw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x00] 566; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 567; X86-NEXT: retl # encoding: [0xc3] 568; 569; X64-LABEL: test_mask_compress_store_w_256: 570; X64: # %bb.0: 571; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 572; X64-NEXT: vpcompressw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x07] 573; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 574; X64-NEXT: retq # encoding: [0xc3] 575 call void @llvm.x86.avx512.mask.compress.store.w.256(ptr %addr, <16 x i16> %data, i16 %mask) 576 ret void 577} 578 579declare void @llvm.x86.avx512.mask.compress.store.w.256(ptr %addr, <16 x i16> %data, i16 %mask) 580 581define void @test_compress_store_w_256(ptr %addr, <16 x i16> %data) { 582; X86-LABEL: test_compress_store_w_256: 583; X86: # %bb.0: 584; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 585; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 586; X86-NEXT: vpcompressw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x00] 587; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 588; X86-NEXT: retl # encoding: [0xc3] 589; 590; X64-LABEL: test_compress_store_w_256: 591; X64: # %bb.0: 592; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 593; X64-NEXT: vpcompressw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x07] 594; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 595; X64-NEXT: retq # encoding: [0xc3] 596 call void @llvm.x86.avx512.mask.compress.store.w.256(ptr %addr, <16 x i16> %data, i16 -1) 597 ret void 598} 599 600define <16 x i16> @test_mask_compress_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) { 601; X86-LABEL: test_mask_compress_w_256: 602; X86: # %bb.0: 603; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 604; X86-NEXT: vpcompressw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0xc1] 605; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 606; X86-NEXT: retl # encoding: [0xc3] 607; 608; X64-LABEL: test_mask_compress_w_256: 609; X64: # %bb.0: 610; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 611; X64-NEXT: vpcompressw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0xc1] 612; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 613; X64-NEXT: retq # encoding: [0xc3] 614 %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) 615 ret <16 x i16> %res 616} 617 618define <16 x i16> @test_maskz_compress_w_256(<16 x i16> %data, i16 %mask) { 619; X86-LABEL: test_maskz_compress_w_256: 620; X86: # %bb.0: 621; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 622; X86-NEXT: vpcompressw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x63,0xc0] 623; X86-NEXT: retl # encoding: [0xc3] 624; 625; X64-LABEL: test_maskz_compress_w_256: 626; X64: # %bb.0: 627; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 628; X64-NEXT: vpcompressw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x63,0xc0] 629; X64-NEXT: retq # encoding: [0xc3] 630 %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> zeroinitializer, i16 %mask) 631 ret <16 x i16> %res 632} 633 634define <16 x i16> @test_compress_w_256(<16 x i16> %data) { 635; CHECK-LABEL: test_compress_w_256: 636; CHECK: # %bb.0: 637; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 638 %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> undef, i16 -1) 639 ret <16 x i16> %res 640} 641 642declare <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> %src0, i16 %mask) 643 644define void @test_mask_compress_store_b_256(ptr %addr, <32 x i8> %data, i32 %mask) { 645; X86-LABEL: test_mask_compress_store_b_256: 646; X86: # %bb.0: 647; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 648; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 649; X86-NEXT: vpcompressb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x00] 650; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 651; X86-NEXT: retl # encoding: [0xc3] 652; 653; X64-LABEL: test_mask_compress_store_b_256: 654; X64: # %bb.0: 655; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 656; X64-NEXT: vpcompressb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x07] 657; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 658; X64-NEXT: retq # encoding: [0xc3] 659 call void @llvm.x86.avx512.mask.compress.store.b.256(ptr %addr, <32 x i8> %data, i32 %mask) 660 ret void 661} 662 663declare void @llvm.x86.avx512.mask.compress.store.b.256(ptr %addr, <32 x i8> %data, i32 %mask) 664 665define void @test_compress_store_b_256(ptr %addr, <32 x i8> %data) { 666; X86-LABEL: test_compress_store_b_256: 667; X86: # %bb.0: 668; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 669; X86-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 670; X86-NEXT: vpcompressb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x00] 671; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 672; X86-NEXT: retl # encoding: [0xc3] 673; 674; X64-LABEL: test_compress_store_b_256: 675; X64: # %bb.0: 676; X64-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 677; X64-NEXT: vpcompressb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x07] 678; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 679; X64-NEXT: retq # encoding: [0xc3] 680 call void @llvm.x86.avx512.mask.compress.store.b.256(ptr %addr, <32 x i8> %data, i32 -1) 681 ret void 682} 683 684define <32 x i8> @test_mask_compress_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) { 685; X86-LABEL: test_mask_compress_b_256: 686; X86: # %bb.0: 687; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 688; X86-NEXT: vpcompressb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0xc1] 689; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 690; X86-NEXT: retl # encoding: [0xc3] 691; 692; X64-LABEL: test_mask_compress_b_256: 693; X64: # %bb.0: 694; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 695; X64-NEXT: vpcompressb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0xc1] 696; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 697; X64-NEXT: retq # encoding: [0xc3] 698 %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) 699 ret <32 x i8> %res 700} 701 702define <32 x i8> @test_maskz_compress_b_256(<32 x i8> %data, i32 %mask) { 703; X86-LABEL: test_maskz_compress_b_256: 704; X86: # %bb.0: 705; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 706; X86-NEXT: vpcompressb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x63,0xc0] 707; X86-NEXT: retl # encoding: [0xc3] 708; 709; X64-LABEL: test_maskz_compress_b_256: 710; X64: # %bb.0: 711; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 712; X64-NEXT: vpcompressb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x63,0xc0] 713; X64-NEXT: retq # encoding: [0xc3] 714 %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> zeroinitializer, i32 %mask) 715 ret <32 x i8> %res 716} 717 718define <32 x i8> @test_compress_b_256(<32 x i8> %data) { 719; CHECK-LABEL: test_compress_b_256: 720; CHECK: # %bb.0: 721; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 722 %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> undef, i32 -1) 723 ret <32 x i8> %res 724} 725 726declare <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> %src0, i32 %mask) 727 728define { <4 x i32>, <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpshld_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) { 729; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_128: 730; X86: # %bb.0: 731; X86-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2] 732; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 733; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 734; X86-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xe1,0x16] 735; X86-NEXT: vpshldd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x17] 736; X86-NEXT: vpshldd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xd1,0x18] 737; X86-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4] 738; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 739; X86-NEXT: retl # encoding: [0xc3] 740; 741; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_128: 742; X64: # %bb.0: 743; X64-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2] 744; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 745; X64-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xe1,0x16] 746; X64-NEXT: vpshldd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x17] 747; X64-NEXT: vpshldd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xd1,0x18] 748; X64-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4] 749; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 750; X64-NEXT: retq # encoding: [0xc3] 751 %res0 = call <4 x i32> @llvm.x86.avx512.mask.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 %x4) 752 %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 23, <4 x i32> %x3, i8 -1) 753 %res2 = call <4 x i32> @llvm.x86.avx512.mask.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 24, <4 x i32> zeroinitializer,i8 %x4) 754 %res3 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } poison, <4 x i32> %res0, 0 755 %res4 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } %res3, <4 x i32> %res1, 1 756 %res5 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } %res4, <4 x i32> %res2, 2 757 ret { <4 x i32>, <4 x i32>, <4 x i32> } %res5 758} 759declare <4 x i32> @llvm.x86.avx512.mask.vpshld.d.128(<4 x i32>, <4 x i32>, i32, <4 x i32>, i8) 760 761define { <8 x i32>, <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpshld_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) { 762; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_256: 763; X86: # %bb.0: 764; X86-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2] 765; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 766; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 767; X86-NEXT: vpshldd $22, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xe1,0x16] 768; X86-NEXT: vpshldd $23, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xd9,0x17] 769; X86-NEXT: vpshldd $24, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x71,0xd1,0x18] 770; X86-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4] 771; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 772; X86-NEXT: retl # encoding: [0xc3] 773; 774; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_256: 775; X64: # %bb.0: 776; X64-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2] 777; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 778; X64-NEXT: vpshldd $22, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xe1,0x16] 779; X64-NEXT: vpshldd $23, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xd9,0x17] 780; X64-NEXT: vpshldd $24, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x71,0xd1,0x18] 781; X64-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4] 782; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 783; X64-NEXT: retq # encoding: [0xc3] 784 %res0 = call <8 x i32> @llvm.x86.avx512.mask.vpshld.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4) 785 %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpshld.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 23, <8 x i32> %x3, i8 -1) 786 %res2 = call <8 x i32> @llvm.x86.avx512.mask.vpshld.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 24, <8 x i32> zeroinitializer, i8 %x4) 787 %res3 = insertvalue { <8 x i32>, <8 x i32>, <8 x i32> } poison, <8 x i32> %res0, 0 788 %res4 = insertvalue { <8 x i32>, <8 x i32>, <8 x i32> } %res3, <8 x i32> %res1, 1 789 %res5 = insertvalue { <8 x i32>, <8 x i32>, <8 x i32> } %res4, <8 x i32> %res2, 2 790 ret { <8 x i32>, <8 x i32>, <8 x i32> } %res5 791} 792declare <8 x i32> @llvm.x86.avx512.mask.vpshld.d.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8) 793 794define { <2 x i64>, <2 x i64>, <2 x i64> } @test_int_x86_avx512_mask_vpshld_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) { 795; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_128: 796; X86: # %bb.0: 797; X86-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2] 798; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 799; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 800; X86-NEXT: vpshldq $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xe1,0x16] 801; X86-NEXT: vpshldq $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xd9,0x17] 802; X86-NEXT: vpshldq $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x71,0xd1,0x18] 803; X86-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4] 804; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 805; X86-NEXT: retl # encoding: [0xc3] 806; 807; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_128: 808; X64: # %bb.0: 809; X64-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2] 810; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 811; X64-NEXT: vpshldq $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xe1,0x16] 812; X64-NEXT: vpshldq $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xd9,0x17] 813; X64-NEXT: vpshldq $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x71,0xd1,0x18] 814; X64-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4] 815; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 816; X64-NEXT: retq # encoding: [0xc3] 817 %res0 = call <2 x i64> @llvm.x86.avx512.mask.vpshld.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 %x4) 818 %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpshld.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 23, <2 x i64> %x3, i8 -1) 819 %res2 = call <2 x i64> @llvm.x86.avx512.mask.vpshld.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 24, <2 x i64> zeroinitializer, i8 %x4) 820 %res3 = insertvalue { <2 x i64>, <2 x i64>, <2 x i64> } poison, <2 x i64> %res0, 0 821 %res4 = insertvalue { <2 x i64>, <2 x i64>, <2 x i64> } %res3, <2 x i64> %res1, 1 822 %res5 = insertvalue { <2 x i64>, <2 x i64>, <2 x i64> } %res4, <2 x i64> %res2, 2 823 ret { <2 x i64>, <2 x i64>, <2 x i64> } %res5 824} 825declare <2 x i64> @llvm.x86.avx512.mask.vpshld.q.128(<2 x i64>, <2 x i64>, i32, <2 x i64>, i8) 826 827define { <4 x i64>, <4 x i64>, <4 x i64> } @test_int_x86_avx512_mask_vpshld_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) { 828; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_256: 829; X86: # %bb.0: 830; X86-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2] 831; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 832; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 833; X86-NEXT: vpshldq $22, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xe1,0x16] 834; X86-NEXT: vpshldq $23, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xd9,0x17] 835; X86-NEXT: vpshldq $24, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x71,0xd1,0x18] 836; X86-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4] 837; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 838; X86-NEXT: retl # encoding: [0xc3] 839; 840; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_256: 841; X64: # %bb.0: 842; X64-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2] 843; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 844; X64-NEXT: vpshldq $22, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xe1,0x16] 845; X64-NEXT: vpshldq $23, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xd9,0x17] 846; X64-NEXT: vpshldq $24, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x71,0xd1,0x18] 847; X64-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4] 848; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 849; X64-NEXT: retq # encoding: [0xc3] 850 %res0 = call <4 x i64> @llvm.x86.avx512.mask.vpshld.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4) 851 %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpshld.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 23, <4 x i64> %x3, i8 -1) 852 %res2 = call <4 x i64> @llvm.x86.avx512.mask.vpshld.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 24, <4 x i64> zeroinitializer, i8 %x4) 853 %res3 = insertvalue { <4 x i64>, <4 x i64>, <4 x i64> } poison, <4 x i64> %res0, 0 854 %res4 = insertvalue { <4 x i64>, <4 x i64>, <4 x i64> } %res3, <4 x i64> %res1, 1 855 %res5 = insertvalue { <4 x i64>, <4 x i64>, <4 x i64> } %res4, <4 x i64> %res2, 2 856 ret { <4 x i64>, <4 x i64>, <4 x i64> } %res5 857} 858declare <4 x i64> @llvm.x86.avx512.mask.vpshld.q.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8) 859 860define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_vpshld_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) { 861; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_128: 862; X86: # %bb.0: 863; X86-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2] 864; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 865; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 866; X86-NEXT: vpshldw $6, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xe1,0x06] 867; X86-NEXT: vpshldw $7, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xd9,0x07] 868; X86-NEXT: vpshldw $8, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x70,0xd1,0x08] 869; X86-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4] 870; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 871; X86-NEXT: retl # encoding: [0xc3] 872; 873; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_128: 874; X64: # %bb.0: 875; X64-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2] 876; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 877; X64-NEXT: vpshldw $6, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xe1,0x06] 878; X64-NEXT: vpshldw $7, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xd9,0x07] 879; X64-NEXT: vpshldw $8, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x70,0xd1,0x08] 880; X64-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4] 881; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 882; X64-NEXT: retq # encoding: [0xc3] 883 %res0 = call <8 x i16> @llvm.x86.avx512.mask.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 6, <8 x i16> %x3, i8 %x4) 884 %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 7, <8 x i16> %x3, i8 -1) 885 %res2 = call <8 x i16> @llvm.x86.avx512.mask.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 8, <8 x i16> zeroinitializer, i8 %x4) 886 %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0 887 %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res3, <8 x i16> %res1, 1 888 %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res4, <8 x i16> %res2, 2 889 ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5 890} 891declare <8 x i16> @llvm.x86.avx512.mask.vpshld.w.128(<8 x i16>, <8 x i16>, i32, <8 x i16>, i8) 892 893define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_vpshld_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) { 894; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_256: 895; X86: # %bb.0: 896; X86-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2] 897; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 898; X86-NEXT: vpshldw $6, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xe1,0x06] 899; X86-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xd9,0x07] 900; X86-NEXT: vpshldw $8, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x70,0xd1,0x08] 901; X86-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4] 902; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 903; X86-NEXT: retl # encoding: [0xc3] 904; 905; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_256: 906; X64: # %bb.0: 907; X64-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2] 908; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 909; X64-NEXT: vpshldw $6, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xe1,0x06] 910; X64-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xd9,0x07] 911; X64-NEXT: vpshldw $8, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x70,0xd1,0x08] 912; X64-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4] 913; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 914; X64-NEXT: retq # encoding: [0xc3] 915 %res0 = call <16 x i16> @llvm.x86.avx512.mask.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 6, <16 x i16> %x3, i16 %x4) 916 %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 7, <16 x i16> %x3, i16 -1) 917 %res2 = call <16 x i16> @llvm.x86.avx512.mask.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 8, <16 x i16> zeroinitializer, i16 %x4) 918 %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res0, 0 919 %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res3, <16 x i16> %res1, 1 920 %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res4, <16 x i16> %res2, 2 921 ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5 922} 923declare <16 x i16> @llvm.x86.avx512.mask.vpshld.w.256(<16 x i16>, <16 x i16>, i32, <16 x i16>, i16) 924 925define { <4 x i32>, <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpshrd_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) { 926; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_128: 927; X86: # %bb.0: 928; X86-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2] 929; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 930; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 931; X86-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xe1,0x16] 932; X86-NEXT: vpshrdd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x17] 933; X86-NEXT: vpshrdd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xd1,0x18] 934; X86-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4] 935; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 936; X86-NEXT: retl # encoding: [0xc3] 937; 938; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_128: 939; X64: # %bb.0: 940; X64-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2] 941; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 942; X64-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xe1,0x16] 943; X64-NEXT: vpshrdd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x17] 944; X64-NEXT: vpshrdd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xd1,0x18] 945; X64-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4] 946; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 947; X64-NEXT: retq # encoding: [0xc3] 948 %res0 = call <4 x i32> @llvm.x86.avx512.mask.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 %x4) 949 %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 23, <4 x i32> %x3, i8 -1) 950 %res2 = call <4 x i32> @llvm.x86.avx512.mask.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 24, <4 x i32> zeroinitializer,i8 %x4) 951 %res3 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } poison, <4 x i32> %res0, 0 952 %res4 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } %res3, <4 x i32> %res1, 1 953 %res5 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } %res4, <4 x i32> %res2, 2 954 ret { <4 x i32>, <4 x i32>, <4 x i32> } %res5 955} 956declare <4 x i32> @llvm.x86.avx512.mask.vpshrd.d.128(<4 x i32>, <4 x i32>, i32, <4 x i32>, i8) 957 958define { <8 x i32>, <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpshrd_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) { 959; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_256: 960; X86: # %bb.0: 961; X86-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2] 962; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 963; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 964; X86-NEXT: vpshrdd $22, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xe1,0x16] 965; X86-NEXT: vpshrdd $23, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xd9,0x17] 966; X86-NEXT: vpshrdd $24, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x73,0xd1,0x18] 967; X86-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4] 968; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 969; X86-NEXT: retl # encoding: [0xc3] 970; 971; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_256: 972; X64: # %bb.0: 973; X64-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2] 974; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 975; X64-NEXT: vpshrdd $22, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xe1,0x16] 976; X64-NEXT: vpshrdd $23, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xd9,0x17] 977; X64-NEXT: vpshrdd $24, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x73,0xd1,0x18] 978; X64-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4] 979; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 980; X64-NEXT: retq # encoding: [0xc3] 981 %res0 = call <8 x i32> @llvm.x86.avx512.mask.vpshrd.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4) 982 %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpshrd.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 23, <8 x i32> %x3, i8 -1) 983 %res2 = call <8 x i32> @llvm.x86.avx512.mask.vpshrd.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 24, <8 x i32> zeroinitializer, i8 %x4) 984 %res3 = insertvalue { <8 x i32>, <8 x i32>, <8 x i32> } poison, <8 x i32> %res0, 0 985 %res4 = insertvalue { <8 x i32>, <8 x i32>, <8 x i32> } %res3, <8 x i32> %res1, 1 986 %res5 = insertvalue { <8 x i32>, <8 x i32>, <8 x i32> } %res4, <8 x i32> %res2, 2 987 ret { <8 x i32>, <8 x i32>, <8 x i32> } %res5 988} 989declare <8 x i32> @llvm.x86.avx512.mask.vpshrd.d.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8) 990 991define { <2 x i64>, <2 x i64>, <2 x i64> } @test_int_x86_avx512_mask_vpshrd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) { 992; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_128: 993; X86: # %bb.0: 994; X86-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2] 995; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 996; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 997; X86-NEXT: vpshrdq $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xe1,0x16] 998; X86-NEXT: vpshrdq $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xd9,0x17] 999; X86-NEXT: vpshrdq $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x73,0xd1,0x18] 1000; X86-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4] 1001; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 1002; X86-NEXT: retl # encoding: [0xc3] 1003; 1004; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_128: 1005; X64: # %bb.0: 1006; X64-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2] 1007; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1008; X64-NEXT: vpshrdq $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xe1,0x16] 1009; X64-NEXT: vpshrdq $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xd9,0x17] 1010; X64-NEXT: vpshrdq $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x73,0xd1,0x18] 1011; X64-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4] 1012; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 1013; X64-NEXT: retq # encoding: [0xc3] 1014 %res0 = call <2 x i64> @llvm.x86.avx512.mask.vpshrd.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 %x4) 1015 %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpshrd.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 23, <2 x i64> %x3, i8 -1) 1016 %res2 = call <2 x i64> @llvm.x86.avx512.mask.vpshrd.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 24, <2 x i64> zeroinitializer, i8 %x4) 1017 %res3 = insertvalue { <2 x i64>, <2 x i64>, <2 x i64> } poison, <2 x i64> %res0, 0 1018 %res4 = insertvalue { <2 x i64>, <2 x i64>, <2 x i64> } %res3, <2 x i64> %res1, 1 1019 %res5 = insertvalue { <2 x i64>, <2 x i64>, <2 x i64> } %res4, <2 x i64> %res2, 2 1020 ret { <2 x i64>, <2 x i64>, <2 x i64> } %res5 1021} 1022declare <2 x i64> @llvm.x86.avx512.mask.vpshrd.q.128(<2 x i64>, <2 x i64>, i32, <2 x i64>, i8) 1023 1024define { <4 x i64>, <4 x i64>, <4 x i64> } @test_int_x86_avx512_mask_vpshrd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) { 1025; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_256: 1026; X86: # %bb.0: 1027; X86-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2] 1028; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1029; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1030; X86-NEXT: vpshrdq $22, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xe1,0x16] 1031; X86-NEXT: vpshrdq $23, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xd9,0x17] 1032; X86-NEXT: vpshrdq $24, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x73,0xd1,0x18] 1033; X86-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4] 1034; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 1035; X86-NEXT: retl # encoding: [0xc3] 1036; 1037; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_256: 1038; X64: # %bb.0: 1039; X64-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2] 1040; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1041; X64-NEXT: vpshrdq $22, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xe1,0x16] 1042; X64-NEXT: vpshrdq $23, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xd9,0x17] 1043; X64-NEXT: vpshrdq $24, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x73,0xd1,0x18] 1044; X64-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4] 1045; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 1046; X64-NEXT: retq # encoding: [0xc3] 1047 %res0 = call <4 x i64> @llvm.x86.avx512.mask.vpshrd.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4) 1048 %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpshrd.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 23, <4 x i64> %x3, i8 -1) 1049 %res2 = call <4 x i64> @llvm.x86.avx512.mask.vpshrd.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 24, <4 x i64> zeroinitializer, i8 %x4) 1050 %res3 = insertvalue { <4 x i64>, <4 x i64>, <4 x i64> } poison, <4 x i64> %res0, 0 1051 %res4 = insertvalue { <4 x i64>, <4 x i64>, <4 x i64> } %res3, <4 x i64> %res1, 1 1052 %res5 = insertvalue { <4 x i64>, <4 x i64>, <4 x i64> } %res4, <4 x i64> %res2, 2 1053 ret { <4 x i64>, <4 x i64>, <4 x i64> } %res5 1054} 1055declare <4 x i64> @llvm.x86.avx512.mask.vpshrd.q.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8) 1056 1057define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_vpshrd_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) { 1058; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_128: 1059; X86: # %bb.0: 1060; X86-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2] 1061; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1062; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1063; X86-NEXT: vpshrdw $6, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xe1,0x06] 1064; X86-NEXT: vpshrdw $7, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xd9,0x07] 1065; X86-NEXT: vpshrdw $8, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x72,0xd1,0x08] 1066; X86-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4] 1067; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 1068; X86-NEXT: retl # encoding: [0xc3] 1069; 1070; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_128: 1071; X64: # %bb.0: 1072; X64-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2] 1073; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1074; X64-NEXT: vpshrdw $6, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xe1,0x06] 1075; X64-NEXT: vpshrdw $7, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xd9,0x07] 1076; X64-NEXT: vpshrdw $8, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x72,0xd1,0x08] 1077; X64-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4] 1078; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 1079; X64-NEXT: retq # encoding: [0xc3] 1080 %res0 = call <8 x i16> @llvm.x86.avx512.mask.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 6, <8 x i16> %x3, i8 %x4) 1081 %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 7, <8 x i16> %x3, i8 -1) 1082 %res2 = call <8 x i16> @llvm.x86.avx512.mask.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 8, <8 x i16> zeroinitializer, i8 %x4) 1083 %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0 1084 %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res3, <8 x i16> %res1, 1 1085 %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res4, <8 x i16> %res2, 2 1086 ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5 1087} 1088declare <8 x i16> @llvm.x86.avx512.mask.vpshrd.w.128(<8 x i16>, <8 x i16>, i32, <8 x i16>, i8) 1089 1090define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_vpshrd_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) { 1091; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_256: 1092; X86: # %bb.0: 1093; X86-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2] 1094; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1095; X86-NEXT: vpshrdw $6, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xe1,0x06] 1096; X86-NEXT: vpshrdw $7, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xd9,0x07] 1097; X86-NEXT: vpshrdw $8, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x72,0xd1,0x08] 1098; X86-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4] 1099; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 1100; X86-NEXT: retl # encoding: [0xc3] 1101; 1102; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_256: 1103; X64: # %bb.0: 1104; X64-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2] 1105; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1106; X64-NEXT: vpshrdw $6, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xe1,0x06] 1107; X64-NEXT: vpshrdw $7, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xd9,0x07] 1108; X64-NEXT: vpshrdw $8, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x72,0xd1,0x08] 1109; X64-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4] 1110; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 1111; X64-NEXT: retq # encoding: [0xc3] 1112 %res0 = call <16 x i16> @llvm.x86.avx512.mask.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 6, <16 x i16> %x3, i16 %x4) 1113 %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 7, <16 x i16> %x3, i16 -1) 1114 %res2 = call <16 x i16> @llvm.x86.avx512.mask.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 8, <16 x i16> zeroinitializer, i16 %x4) 1115 %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res0, 0 1116 %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res3, <16 x i16> %res1, 1 1117 %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res4, <16 x i16> %res2, 2 1118 ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5 1119} 1120declare <16 x i16> @llvm.x86.avx512.mask.vpshrd.w.256(<16 x i16>, <16 x i16>, i32, <16 x i16>, i16) 1121 1122define { <4 x i32>, <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpshld_d_128_2(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) { 1123; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_128_2: 1124; X86: # %bb.0: 1125; X86-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2] 1126; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1127; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1128; X86-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xe1,0x16] 1129; X86-NEXT: vpshldd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x17] 1130; X86-NEXT: vpshldd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xd1,0x18] 1131; X86-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4] 1132; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 1133; X86-NEXT: retl # encoding: [0xc3] 1134; 1135; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_128_2: 1136; X64: # %bb.0: 1137; X64-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2] 1138; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1139; X64-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xe1,0x16] 1140; X64-NEXT: vpshldd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x17] 1141; X64-NEXT: vpshldd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xd1,0x18] 1142; X64-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4] 1143; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 1144; X64-NEXT: retq # encoding: [0xc3] 1145 %1 = call <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22) 1146 %2 = bitcast i8 %x4 to <8 x i1> 1147 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1148 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x3 1149 %4 = call <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 23) 1150 %5 = call <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 24) 1151 %6 = bitcast i8 %x4 to <8 x i1> 1152 %extract = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1153 %7 = select <4 x i1> %extract, <4 x i32> %5, <4 x i32> zeroinitializer 1154 %res3 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } poison, <4 x i32> %3, 0 1155 %res4 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } %res3, <4 x i32> %4, 1 1156 %res5 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } %res4, <4 x i32> %7, 2 1157 ret { <4 x i32>, <4 x i32>, <4 x i32> } %res5 1158} 1159declare <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32>, <4 x i32>, i32) 1160 1161define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpshld_d_256_2(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) { 1162; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_256_2: 1163; X86: # %bb.0: 1164; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1165; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1166; X86-NEXT: vpshldd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xd1,0x16] 1167; X86-NEXT: vpshldd $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xc9,0x17] 1168; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1169; X86-NEXT: retl # encoding: [0xc3] 1170; 1171; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_256_2: 1172; X64: # %bb.0: 1173; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1174; X64-NEXT: vpshldd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xd1,0x16] 1175; X64-NEXT: vpshldd $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xc9,0x17] 1176; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1177; X64-NEXT: retq # encoding: [0xc3] 1178 %1 = call <8 x i32> @llvm.x86.avx512.vpshld.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22) 1179 %2 = bitcast i8 %x4 to <8 x i1> 1180 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x3 1181 %4 = call <8 x i32> @llvm.x86.avx512.vpshld.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 23) 1182 %res2 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %3, 0 1183 %res3 = insertvalue { <8 x i32>, <8 x i32> } %res2, <8 x i32> %4, 1 1184 ret { <8 x i32>, <8 x i32> } %res3 1185} 1186declare <8 x i32> @llvm.x86.avx512.vpshld.d.256(<8 x i32>, <8 x i32>, i32) 1187 1188define { <2 x i64>, <2 x i64> } @test_int_x86_avx512_mask_vpshld_q_128_2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) { 1189; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_128_2: 1190; X86: # %bb.0: 1191; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1192; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1193; X86-NEXT: vpshldq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xd1,0x16] 1194; X86-NEXT: vpshldq $23, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xc9,0x17] 1195; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1196; X86-NEXT: retl # encoding: [0xc3] 1197; 1198; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_128_2: 1199; X64: # %bb.0: 1200; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1201; X64-NEXT: vpshldq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xd1,0x16] 1202; X64-NEXT: vpshldq $23, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xc9,0x17] 1203; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1204; X64-NEXT: retq # encoding: [0xc3] 1205 %1 = call <2 x i64> @llvm.x86.avx512.vpshld.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22) 1206 %2 = bitcast i8 %x4 to <8 x i1> 1207 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1> 1208 %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x3 1209 %4 = call <2 x i64> @llvm.x86.avx512.vpshld.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 23) 1210 %res2 = insertvalue { <2 x i64>, <2 x i64> } poison, <2 x i64> %3, 0 1211 %res3 = insertvalue { <2 x i64>, <2 x i64> } %res2, <2 x i64> %4, 1 1212 ret { <2 x i64>, <2 x i64> } %res3 1213} 1214declare <2 x i64> @llvm.x86.avx512.vpshld.q.128(<2 x i64>, <2 x i64>, i32) 1215 1216define { <4 x i64>, <4 x i64> } @test_int_x86_avx512_mask_vpshld_q_256_2(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) { 1217; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_256_2: 1218; X86: # %bb.0: 1219; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1220; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1221; X86-NEXT: vpshldq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xd1,0x16] 1222; X86-NEXT: vpshldq $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xc9,0x17] 1223; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1224; X86-NEXT: retl # encoding: [0xc3] 1225; 1226; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_256_2: 1227; X64: # %bb.0: 1228; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1229; X64-NEXT: vpshldq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xd1,0x16] 1230; X64-NEXT: vpshldq $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xc9,0x17] 1231; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1232; X64-NEXT: retq # encoding: [0xc3] 1233 %1 = call <4 x i64> @llvm.x86.avx512.vpshld.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22) 1234 %2 = bitcast i8 %x4 to <8 x i1> 1235 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1236 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x3 1237 %4 = call <4 x i64> @llvm.x86.avx512.vpshld.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 23) 1238 %res2 = insertvalue { <4 x i64>, <4 x i64> } poison, <4 x i64> %3, 0 1239 %res3 = insertvalue { <4 x i64>, <4 x i64> } %res2, <4 x i64> %4, 1 1240 ret { <4 x i64>, <4 x i64> } %res3 1241} 1242declare <4 x i64> @llvm.x86.avx512.vpshld.q.256(<4 x i64>, <4 x i64>, i32) 1243 1244define { <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_vpshld_w_128_2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) { 1245; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_128_2: 1246; X86: # %bb.0: 1247; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1248; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1249; X86-NEXT: vpshldw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x06] 1250; X86-NEXT: vpshldw $7, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xc9,0x07] 1251; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1252; X86-NEXT: retl # encoding: [0xc3] 1253; 1254; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_128_2: 1255; X64: # %bb.0: 1256; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1257; X64-NEXT: vpshldw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x06] 1258; X64-NEXT: vpshldw $7, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xc9,0x07] 1259; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1260; X64-NEXT: retq # encoding: [0xc3] 1261 %1 = call <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 6) 1262 %2 = bitcast i8 %x4 to <8 x i1> 1263 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x3 1264 %4 = call <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 7) 1265 %res2 = insertvalue { <8 x i16>, <8 x i16> } poison, <8 x i16> %3, 0 1266 %res3 = insertvalue { <8 x i16>, <8 x i16> } %res2, <8 x i16> %4, 1 1267 ret { <8 x i16>, <8 x i16> } %res3 1268} 1269declare <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16>, <8 x i16>, i32) 1270 1271define { <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_vpshld_w_256_2(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) { 1272; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_256_2: 1273; X86: # %bb.0: 1274; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1275; X86-NEXT: vpshldw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x06] 1276; X86-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xc9,0x07] 1277; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1278; X86-NEXT: retl # encoding: [0xc3] 1279; 1280; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_256_2: 1281; X64: # %bb.0: 1282; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1283; X64-NEXT: vpshldw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x06] 1284; X64-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xc9,0x07] 1285; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1286; X64-NEXT: retq # encoding: [0xc3] 1287 %1 = call <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 6) 1288 %2 = bitcast i16 %x4 to <16 x i1> 1289 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x3 1290 %4 = call <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 7) 1291 %res2 = insertvalue { <16 x i16>, <16 x i16> } poison, <16 x i16> %3, 0 1292 %res3 = insertvalue { <16 x i16>, <16 x i16> } %res2, <16 x i16> %4, 1 1293 ret { <16 x i16>, <16 x i16> } %res3 1294} 1295declare <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16>, <16 x i16>, i32) 1296 1297define { <4 x i32>, <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpshrd_d_128_2(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) { 1298; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_128_2: 1299; X86: # %bb.0: 1300; X86-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2] 1301; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1302; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1303; X86-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xe1,0x16] 1304; X86-NEXT: vpshrdd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x17] 1305; X86-NEXT: vpshrdd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xd1,0x18] 1306; X86-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4] 1307; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 1308; X86-NEXT: retl # encoding: [0xc3] 1309; 1310; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_128_2: 1311; X64: # %bb.0: 1312; X64-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2] 1313; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1314; X64-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xe1,0x16] 1315; X64-NEXT: vpshrdd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x17] 1316; X64-NEXT: vpshrdd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xd1,0x18] 1317; X64-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4] 1318; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 1319; X64-NEXT: retq # encoding: [0xc3] 1320 %1 = call <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22) 1321 %2 = bitcast i8 %x4 to <8 x i1> 1322 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1323 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x3 1324 %4 = call <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 23) 1325 %5 = call <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 24) 1326 %6 = bitcast i8 %x4 to <8 x i1> 1327 %extract = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1328 %7 = select <4 x i1> %extract, <4 x i32> %5, <4 x i32> zeroinitializer 1329 %res3 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } poison, <4 x i32> %3, 0 1330 %res4 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } %res3, <4 x i32> %4, 1 1331 %res5 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } %res4, <4 x i32> %7, 2 1332 ret { <4 x i32>, <4 x i32>, <4 x i32> } %res5 1333} 1334declare <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32>, <4 x i32>, i32) 1335 1336define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpshrd_d_256_2(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) { 1337; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_256_2: 1338; X86: # %bb.0: 1339; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1340; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1341; X86-NEXT: vpshrdd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xd1,0x16] 1342; X86-NEXT: vpshrdd $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xc9,0x17] 1343; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1344; X86-NEXT: retl # encoding: [0xc3] 1345; 1346; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_256_2: 1347; X64: # %bb.0: 1348; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1349; X64-NEXT: vpshrdd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xd1,0x16] 1350; X64-NEXT: vpshrdd $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xc9,0x17] 1351; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1352; X64-NEXT: retq # encoding: [0xc3] 1353 %1 = call <8 x i32> @llvm.x86.avx512.vpshrd.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22) 1354 %2 = bitcast i8 %x4 to <8 x i1> 1355 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x3 1356 %4 = call <8 x i32> @llvm.x86.avx512.vpshrd.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 23) 1357 %res2 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %3, 0 1358 %res3 = insertvalue { <8 x i32>, <8 x i32> } %res2, <8 x i32> %4, 1 1359 ret { <8 x i32>, <8 x i32> } %res3 1360} 1361declare <8 x i32> @llvm.x86.avx512.vpshrd.d.256(<8 x i32>, <8 x i32>, i32) 1362 1363define { <2 x i64>, <2 x i64> } @test_int_x86_avx512_mask_vpshrd_q_128_2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) { 1364; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_128_2: 1365; X86: # %bb.0: 1366; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1367; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1368; X86-NEXT: vpshrdq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xd1,0x16] 1369; X86-NEXT: vpshrdq $23, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xc9,0x17] 1370; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1371; X86-NEXT: retl # encoding: [0xc3] 1372; 1373; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_128_2: 1374; X64: # %bb.0: 1375; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1376; X64-NEXT: vpshrdq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xd1,0x16] 1377; X64-NEXT: vpshrdq $23, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xc9,0x17] 1378; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1379; X64-NEXT: retq # encoding: [0xc3] 1380 %1 = call <2 x i64> @llvm.x86.avx512.vpshrd.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22) 1381 %2 = bitcast i8 %x4 to <8 x i1> 1382 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1> 1383 %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x3 1384 %4 = call <2 x i64> @llvm.x86.avx512.vpshrd.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 23) 1385 %res2 = insertvalue { <2 x i64>, <2 x i64> } poison, <2 x i64> %3, 0 1386 %res3 = insertvalue { <2 x i64>, <2 x i64> } %res2, <2 x i64> %4, 1 1387 ret { <2 x i64>, <2 x i64> } %res3 1388} 1389declare <2 x i64> @llvm.x86.avx512.vpshrd.q.128(<2 x i64>, <2 x i64>, i32) 1390 1391define { <4 x i64>, <4 x i64> } @test_int_x86_avx512_mask_vpshrd_q_256_2(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) { 1392; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_256_2: 1393; X86: # %bb.0: 1394; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1395; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1396; X86-NEXT: vpshrdq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xd1,0x16] 1397; X86-NEXT: vpshrdq $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xc9,0x17] 1398; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1399; X86-NEXT: retl # encoding: [0xc3] 1400; 1401; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_256_2: 1402; X64: # %bb.0: 1403; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1404; X64-NEXT: vpshrdq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xd1,0x16] 1405; X64-NEXT: vpshrdq $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xc9,0x17] 1406; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1407; X64-NEXT: retq # encoding: [0xc3] 1408 %1 = call <4 x i64> @llvm.x86.avx512.vpshrd.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22) 1409 %2 = bitcast i8 %x4 to <8 x i1> 1410 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1411 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x3 1412 %4 = call <4 x i64> @llvm.x86.avx512.vpshrd.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 23) 1413 %res2 = insertvalue { <4 x i64>, <4 x i64> } poison, <4 x i64> %3, 0 1414 %res3 = insertvalue { <4 x i64>, <4 x i64> } %res2, <4 x i64> %4, 1 1415 ret { <4 x i64>, <4 x i64> } %res3 1416} 1417declare <4 x i64> @llvm.x86.avx512.vpshrd.q.256(<4 x i64>, <4 x i64>, i32) 1418 1419define { <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_vpshrd_w_128_2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) { 1420; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_128_2: 1421; X86: # %bb.0: 1422; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1423; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1424; X86-NEXT: vpshrdw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x06] 1425; X86-NEXT: vpshrdw $7, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xc9,0x07] 1426; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1427; X86-NEXT: retl # encoding: [0xc3] 1428; 1429; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_128_2: 1430; X64: # %bb.0: 1431; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1432; X64-NEXT: vpshrdw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x06] 1433; X64-NEXT: vpshrdw $7, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xc9,0x07] 1434; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1435; X64-NEXT: retq # encoding: [0xc3] 1436 %1 = call <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 6) 1437 %2 = bitcast i8 %x4 to <8 x i1> 1438 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x3 1439 %4 = call <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 7) 1440 %res2 = insertvalue { <8 x i16>, <8 x i16> } poison, <8 x i16> %3, 0 1441 %res3 = insertvalue { <8 x i16>, <8 x i16> } %res2, <8 x i16> %4, 1 1442 ret { <8 x i16>, <8 x i16> } %res3 1443} 1444declare <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16>, <8 x i16>, i32) 1445 1446define { <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_vpshrd_w_256_2(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) { 1447; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_256_2: 1448; X86: # %bb.0: 1449; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1450; X86-NEXT: vpshrdw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x06] 1451; X86-NEXT: vpshrdw $7, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xc9,0x07] 1452; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1453; X86-NEXT: retl # encoding: [0xc3] 1454; 1455; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_256_2: 1456; X64: # %bb.0: 1457; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1458; X64-NEXT: vpshrdw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x06] 1459; X64-NEXT: vpshrdw $7, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xc9,0x07] 1460; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1461; X64-NEXT: retq # encoding: [0xc3] 1462 %1 = call <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 6) 1463 %2 = bitcast i16 %x4 to <16 x i1> 1464 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x3 1465 %4 = call <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 7) 1466 %res2 = insertvalue { <16 x i16>, <16 x i16> } poison, <16 x i16> %3, 0 1467 %res3 = insertvalue { <16 x i16>, <16 x i16> } %res2, <16 x i16> %4, 1 1468 ret { <16 x i16>, <16 x i16> } %res3 1469} 1470declare <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16>, <16 x i16>, i32) 1471 1472declare <8 x i32> @llvm.x86.avx512.mask.vpshrdv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 1473declare <8 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 1474 1475define <8 x i32>@test_int_x86_avx512_vpshrdv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { 1476; CHECK-LABEL: test_int_x86_avx512_vpshrdv_d_256: 1477; CHECK: # %bb.0: 1478; CHECK-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x73,0xc2] 1479; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1480 %res = call <8 x i32> @llvm.x86.avx512.mask.vpshrdv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 1481 ret <8 x i32> %res 1482} 1483 1484define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpshrdv_d_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4, i8 %x3) { 1485; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_d_256: 1486; X86: # %bb.0: 1487; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1488; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1489; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1490; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1491; X86-NEXT: vpshrdvd (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x73,0x00] 1492; X86-NEXT: vpshrdvd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x73,0xda] 1493; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 1494; X86-NEXT: retl # encoding: [0xc3] 1495; 1496; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_256: 1497; X64: # %bb.0: 1498; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1499; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1500; X64-NEXT: vpshrdvd (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x73,0x07] 1501; X64-NEXT: vpshrdvd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x73,0xda] 1502; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 1503; X64-NEXT: retq # encoding: [0xc3] 1504 %x2 = load <8 x i32>, ptr %x2p 1505 %res0 = call <8 x i32> @llvm.x86.avx512.mask.vpshrdv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 1506 %res1 = call <8 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 %x3) 1507 %res2 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %res0, 0 1508 %res3 = insertvalue { <8 x i32>, <8 x i32> } %res2, <8 x i32> %res1, 1 1509 ret { <8 x i32>, <8 x i32> } %res3 1510} 1511 1512declare <4 x i32> @llvm.x86.avx512.mask.vpshrdv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 1513declare <4 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 1514 1515define <4 x i32>@test_int_x86_avx512_vpshrdv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) { 1516; CHECK-LABEL: test_int_x86_avx512_vpshrdv_d_128: 1517; CHECK: # %bb.0: 1518; CHECK-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x73,0xc2] 1519; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1520 %res = call <4 x i32> @llvm.x86.avx512.mask.vpshrdv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 1521 ret <4 x i32> %res 1522} 1523 1524define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpshrdv_d_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4, i8 %x3) { 1525; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_d_128: 1526; X86: # %bb.0: 1527; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1528; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1529; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1530; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1531; X86-NEXT: vpshrdvd (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x73,0x00] 1532; X86-NEXT: vpshrdvd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x73,0xda] 1533; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 1534; X86-NEXT: retl # encoding: [0xc3] 1535; 1536; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_128: 1537; X64: # %bb.0: 1538; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1539; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1540; X64-NEXT: vpshrdvd (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x73,0x07] 1541; X64-NEXT: vpshrdvd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x73,0xda] 1542; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 1543; X64-NEXT: retq # encoding: [0xc3] 1544 %x2 = load <4 x i32>, ptr %x2p 1545 %res0 = call <4 x i32> @llvm.x86.avx512.mask.vpshrdv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 1546 %res1 = call <4 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 %x3) 1547 %res2 = insertvalue { <4 x i32>, <4 x i32> } poison, <4 x i32> %res0, 0 1548 %res3 = insertvalue { <4 x i32>, <4 x i32> } %res2, <4 x i32> %res1, 1 1549 ret { <4 x i32>, <4 x i32> } %res3 1550} 1551 1552declare <4 x i64> @llvm.x86.avx512.mask.vpshrdv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 1553declare <4 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 1554 1555define <4 x i64>@test_int_x86_avx512_vpshrdv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) { 1556; CHECK-LABEL: test_int_x86_avx512_vpshrdv_q_256: 1557; CHECK: # %bb.0: 1558; CHECK-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x73,0xc2] 1559; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1560 %res = call <4 x i64> @llvm.x86.avx512.mask.vpshrdv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 1561 ret <4 x i64> %res 1562} 1563 1564define { <4 x i64>, <4 x i64> } @test_int_x86_avx512_mask_vpshrdv_q_256(<4 x i64> %x0, <4 x i64> %x1, ptr %x2p, <4 x i64> %x4, i8 %x3) { 1565; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_q_256: 1566; X86: # %bb.0: 1567; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1568; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1569; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1570; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1571; X86-NEXT: vpshrdvq (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x73,0x00] 1572; X86-NEXT: vpshrdvq %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x73,0xda] 1573; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 1574; X86-NEXT: retl # encoding: [0xc3] 1575; 1576; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_256: 1577; X64: # %bb.0: 1578; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1579; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1580; X64-NEXT: vpshrdvq (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x73,0x07] 1581; X64-NEXT: vpshrdvq %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x73,0xda] 1582; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 1583; X64-NEXT: retq # encoding: [0xc3] 1584 %x2 = load <4 x i64>, ptr %x2p 1585 %res0 = call <4 x i64> @llvm.x86.avx512.mask.vpshrdv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 1586 %res1 = call <4 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x4, i8 %x3) 1587 %res2 = insertvalue { <4 x i64>, <4 x i64> } poison, <4 x i64> %res0, 0 1588 %res3 = insertvalue { <4 x i64>, <4 x i64> } %res2, <4 x i64> %res1, 1 1589 ret { <4 x i64>, <4 x i64> } %res3 1590} 1591 1592declare <2 x i64> @llvm.x86.avx512.mask.vpshrdv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 1593declare <2 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 1594 1595define <2 x i64>@test_int_x86_avx512_vpshrdv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { 1596; CHECK-LABEL: test_int_x86_avx512_vpshrdv_q_128: 1597; CHECK: # %bb.0: 1598; CHECK-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x73,0xc2] 1599; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1600 %res = call <2 x i64> @llvm.x86.avx512.mask.vpshrdv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 1601 ret <2 x i64> %res 1602} 1603 1604define { <2 x i64>, <2 x i64> } @test_int_x86_avx512_mask_vpshrdv_q_128(<2 x i64> %x0, <2 x i64> %x1, ptr %x2p, <2 x i64> %x4, i8 %x3) { 1605; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_q_128: 1606; X86: # %bb.0: 1607; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1608; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1609; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1610; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1611; X86-NEXT: vpshrdvq (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x73,0x00] 1612; X86-NEXT: vpshrdvq %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x73,0xda] 1613; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 1614; X86-NEXT: retl # encoding: [0xc3] 1615; 1616; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_128: 1617; X64: # %bb.0: 1618; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1619; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1620; X64-NEXT: vpshrdvq (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x73,0x07] 1621; X64-NEXT: vpshrdvq %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x73,0xda] 1622; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 1623; X64-NEXT: retq # encoding: [0xc3] 1624 %x2 = load <2 x i64>, ptr %x2p 1625 %res0 = call <2 x i64> @llvm.x86.avx512.mask.vpshrdv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 1626 %res1 = call <2 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x4, i8 %x3) 1627 %res2 = insertvalue { <2 x i64>, <2 x i64> } poison, <2 x i64> %res0, 0 1628 %res3 = insertvalue { <2 x i64>, <2 x i64> } %res2, <2 x i64> %res1, 1 1629 ret { <2 x i64>, <2 x i64> } %res3 1630} 1631 1632declare <16 x i16> @llvm.x86.avx512.mask.vpshrdv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 1633declare <16 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 1634 1635define <16 x i16>@test_int_x86_avx512_vpshrdv_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) { 1636; CHECK-LABEL: test_int_x86_avx512_vpshrdv_w_256: 1637; CHECK: # %bb.0: 1638; CHECK-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x72,0xc2] 1639; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1640 %res = call <16 x i16> @llvm.x86.avx512.mask.vpshrdv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 1641 ret <16 x i16> %res 1642} 1643 1644define { <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_vpshrdv_w_256(<16 x i16> %x0, <16 x i16> %x1, ptr %x2p, <16 x i16> %x4, i16 %x3) { 1645; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_w_256: 1646; X86: # %bb.0: 1647; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1648; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1649; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1650; X86-NEXT: vpshrdvw (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x72,0x00] 1651; X86-NEXT: vpshrdvw %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x72,0xda] 1652; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 1653; X86-NEXT: retl # encoding: [0xc3] 1654; 1655; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_256: 1656; X64: # %bb.0: 1657; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1658; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1659; X64-NEXT: vpshrdvw (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x72,0x07] 1660; X64-NEXT: vpshrdvw %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x72,0xda] 1661; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 1662; X64-NEXT: retq # encoding: [0xc3] 1663 %x2 = load <16 x i16>, ptr %x2p 1664 %res0 = call <16 x i16> @llvm.x86.avx512.mask.vpshrdv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 1665 %res1 = call <16 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x4, i16 %x3) 1666 %res2 = insertvalue { <16 x i16>, <16 x i16> } poison, <16 x i16> %res0, 0 1667 %res3 = insertvalue { <16 x i16>, <16 x i16> } %res2, <16 x i16> %res1, 1 1668 ret { <16 x i16>, <16 x i16> } %res3 1669} 1670 1671declare <8 x i16> @llvm.x86.avx512.mask.vpshrdv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 1672declare <8 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 1673 1674define <8 x i16>@test_int_x86_avx512_vpshrdv_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 1675; CHECK-LABEL: test_int_x86_avx512_vpshrdv_w_128: 1676; CHECK: # %bb.0: 1677; CHECK-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x72,0xc2] 1678; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1679 %res = call <8 x i16> @llvm.x86.avx512.mask.vpshrdv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 1680 ret <8 x i16> %res 1681} 1682 1683define { <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_vpshrdv_w_128(<8 x i16> %x0, <8 x i16> %x1, ptr %x2p, <8 x i16> %x4, i8 %x3) { 1684; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_w_128: 1685; X86: # %bb.0: 1686; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1687; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1688; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1689; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1690; X86-NEXT: vpshrdvw (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x72,0x00] 1691; X86-NEXT: vpshrdvw %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x72,0xda] 1692; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 1693; X86-NEXT: retl # encoding: [0xc3] 1694; 1695; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_128: 1696; X64: # %bb.0: 1697; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1698; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1699; X64-NEXT: vpshrdvw (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x72,0x07] 1700; X64-NEXT: vpshrdvw %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x72,0xda] 1701; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 1702; X64-NEXT: retq # encoding: [0xc3] 1703 %x2 = load <8 x i16>, ptr %x2p 1704 %res0 = call <8 x i16> @llvm.x86.avx512.mask.vpshrdv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 1705 %res1 = call <8 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x4, i8 %x3) 1706 %res2 = insertvalue { <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0 1707 %res3 = insertvalue { <8 x i16>, <8 x i16> } %res2, <8 x i16> %res1, 1 1708 ret { <8 x i16>, <8 x i16> } %res3 1709} 1710 1711declare <8 x i32> @llvm.x86.avx512.mask.vpshldv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 1712declare <8 x i32> @llvm.x86.avx512.maskz.vpshldv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 1713 1714define <8 x i32>@test_int_x86_avx512_vpshldv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { 1715; CHECK-LABEL: test_int_x86_avx512_vpshldv_d_256: 1716; CHECK: # %bb.0: 1717; CHECK-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x71,0xc2] 1718; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1719 %res = call <8 x i32> @llvm.x86.avx512.mask.vpshldv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 1720 ret <8 x i32> %res 1721} 1722 1723define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpshldv_d_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4, i8 %x3) { 1724; X86-LABEL: test_int_x86_avx512_mask_vpshldv_d_256: 1725; X86: # %bb.0: 1726; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1727; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1728; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1729; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1730; X86-NEXT: vpshldvd (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x71,0x00] 1731; X86-NEXT: vpshldvd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x71,0xda] 1732; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 1733; X86-NEXT: retl # encoding: [0xc3] 1734; 1735; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_256: 1736; X64: # %bb.0: 1737; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1738; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1739; X64-NEXT: vpshldvd (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x71,0x07] 1740; X64-NEXT: vpshldvd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x71,0xda] 1741; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 1742; X64-NEXT: retq # encoding: [0xc3] 1743 %x2 = load <8 x i32>, ptr %x2p 1744 %res0 = call <8 x i32> @llvm.x86.avx512.mask.vpshldv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 1745 %res1 = call <8 x i32> @llvm.x86.avx512.maskz.vpshldv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 %x3) 1746 %res2 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %res0, 0 1747 %res3 = insertvalue { <8 x i32>, <8 x i32> } %res2, <8 x i32> %res1, 1 1748 ret { <8 x i32>, <8 x i32> } %res3 1749} 1750 1751declare <4 x i32> @llvm.x86.avx512.mask.vpshldv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 1752declare <4 x i32> @llvm.x86.avx512.maskz.vpshldv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 1753 1754define <4 x i32>@test_int_x86_avx512_vpshldv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) { 1755; CHECK-LABEL: test_int_x86_avx512_vpshldv_d_128: 1756; CHECK: # %bb.0: 1757; CHECK-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x71,0xc2] 1758; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1759 %res = call <4 x i32> @llvm.x86.avx512.mask.vpshldv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 1760 ret <4 x i32> %res 1761} 1762 1763define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpshldv_d_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4, i8 %x3) { 1764; X86-LABEL: test_int_x86_avx512_mask_vpshldv_d_128: 1765; X86: # %bb.0: 1766; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1767; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1768; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1769; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1770; X86-NEXT: vpshldvd (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x71,0x00] 1771; X86-NEXT: vpshldvd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x71,0xda] 1772; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 1773; X86-NEXT: retl # encoding: [0xc3] 1774; 1775; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_128: 1776; X64: # %bb.0: 1777; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1778; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1779; X64-NEXT: vpshldvd (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x71,0x07] 1780; X64-NEXT: vpshldvd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x71,0xda] 1781; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 1782; X64-NEXT: retq # encoding: [0xc3] 1783 %x2 = load <4 x i32>, ptr %x2p 1784 %res0 = call <4 x i32> @llvm.x86.avx512.mask.vpshldv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 1785 %res1 = call <4 x i32> @llvm.x86.avx512.maskz.vpshldv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 %x3) 1786 %res2 = insertvalue { <4 x i32>, <4 x i32> } poison, <4 x i32> %res0, 0 1787 %res3 = insertvalue { <4 x i32>, <4 x i32> } %res2, <4 x i32> %res1, 1 1788 ret { <4 x i32>, <4 x i32> } %res3 1789} 1790 1791declare <4 x i64> @llvm.x86.avx512.mask.vpshldv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 1792declare <4 x i64> @llvm.x86.avx512.maskz.vpshldv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 1793 1794define <4 x i64>@test_int_x86_avx512_vpshldv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) { 1795; CHECK-LABEL: test_int_x86_avx512_vpshldv_q_256: 1796; CHECK: # %bb.0: 1797; CHECK-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x71,0xc2] 1798; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1799 %res = call <4 x i64> @llvm.x86.avx512.mask.vpshldv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 1800 ret <4 x i64> %res 1801} 1802 1803define { <4 x i64>, <4 x i64> } @test_int_x86_avx512_mask_vpshldv_q_256(<4 x i64> %x0, <4 x i64> %x1, ptr %x2p, <4 x i64> %x4, i8 %x3) { 1804; X86-LABEL: test_int_x86_avx512_mask_vpshldv_q_256: 1805; X86: # %bb.0: 1806; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1807; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1808; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1809; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1810; X86-NEXT: vpshldvq (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x71,0x00] 1811; X86-NEXT: vpshldvq %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x71,0xda] 1812; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 1813; X86-NEXT: retl # encoding: [0xc3] 1814; 1815; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_256: 1816; X64: # %bb.0: 1817; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1818; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1819; X64-NEXT: vpshldvq (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x71,0x07] 1820; X64-NEXT: vpshldvq %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x71,0xda] 1821; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 1822; X64-NEXT: retq # encoding: [0xc3] 1823 %x2 = load <4 x i64>, ptr %x2p 1824 %res0 = call <4 x i64> @llvm.x86.avx512.mask.vpshldv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 1825 %res1 = call <4 x i64> @llvm.x86.avx512.maskz.vpshldv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x4, i8 %x3) 1826 %res2 = insertvalue { <4 x i64>, <4 x i64> } poison, <4 x i64> %res0, 0 1827 %res3 = insertvalue { <4 x i64>, <4 x i64> } %res2, <4 x i64> %res1, 1 1828 ret { <4 x i64>, <4 x i64> } %res3 1829} 1830 1831declare <2 x i64> @llvm.x86.avx512.mask.vpshldv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 1832declare <2 x i64> @llvm.x86.avx512.maskz.vpshldv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 1833 1834define <2 x i64>@test_int_x86_avx512_vpshldv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { 1835; CHECK-LABEL: test_int_x86_avx512_vpshldv_q_128: 1836; CHECK: # %bb.0: 1837; CHECK-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x71,0xc2] 1838; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1839 %res = call <2 x i64> @llvm.x86.avx512.mask.vpshldv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 1840 ret <2 x i64> %res 1841} 1842 1843define { <2 x i64>, <2 x i64> } @test_int_x86_avx512_mask_vpshldv_q_128(<2 x i64> %x0, <2 x i64> %x1, ptr %x2p, <2 x i64> %x4, i8 %x3) { 1844; X86-LABEL: test_int_x86_avx512_mask_vpshldv_q_128: 1845; X86: # %bb.0: 1846; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1847; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1848; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1849; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1850; X86-NEXT: vpshldvq (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x71,0x00] 1851; X86-NEXT: vpshldvq %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x71,0xda] 1852; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 1853; X86-NEXT: retl # encoding: [0xc3] 1854; 1855; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_128: 1856; X64: # %bb.0: 1857; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1858; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1859; X64-NEXT: vpshldvq (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x71,0x07] 1860; X64-NEXT: vpshldvq %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x71,0xda] 1861; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 1862; X64-NEXT: retq # encoding: [0xc3] 1863 %x2 = load <2 x i64>, ptr %x2p 1864 %res0 = call <2 x i64> @llvm.x86.avx512.mask.vpshldv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 1865 %res1 = call <2 x i64> @llvm.x86.avx512.maskz.vpshldv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x4, i8 %x3) 1866 %res2 = insertvalue { <2 x i64>, <2 x i64> } poison, <2 x i64> %res0, 0 1867 %res3 = insertvalue { <2 x i64>, <2 x i64> } %res2, <2 x i64> %res1, 1 1868 ret { <2 x i64>, <2 x i64> } %res3 1869} 1870 1871declare <16 x i16> @llvm.x86.avx512.mask.vpshldv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 1872declare <16 x i16> @llvm.x86.avx512.maskz.vpshldv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 1873 1874define <16 x i16>@test_int_x86_avx512_vpshldv_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) { 1875; CHECK-LABEL: test_int_x86_avx512_vpshldv_w_256: 1876; CHECK: # %bb.0: 1877; CHECK-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x70,0xc2] 1878; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1879 %res = call <16 x i16> @llvm.x86.avx512.mask.vpshldv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 1880 ret <16 x i16> %res 1881} 1882 1883define { <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_vpshldv_w_256(<16 x i16> %x0, <16 x i16> %x1, ptr %x2p, <16 x i16> %x4, i16 %x3) { 1884; X86-LABEL: test_int_x86_avx512_mask_vpshldv_w_256: 1885; X86: # %bb.0: 1886; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1887; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1888; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1889; X86-NEXT: vpshldvw (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x70,0x00] 1890; X86-NEXT: vpshldvw %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x70,0xda] 1891; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 1892; X86-NEXT: retl # encoding: [0xc3] 1893; 1894; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_256: 1895; X64: # %bb.0: 1896; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1897; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1898; X64-NEXT: vpshldvw (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x70,0x07] 1899; X64-NEXT: vpshldvw %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x70,0xda] 1900; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 1901; X64-NEXT: retq # encoding: [0xc3] 1902 %x2 = load <16 x i16>, ptr %x2p 1903 %res0 = call <16 x i16> @llvm.x86.avx512.mask.vpshldv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 1904 %res1 = call <16 x i16> @llvm.x86.avx512.maskz.vpshldv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x4, i16 %x3) 1905 %res2 = insertvalue { <16 x i16>, <16 x i16> } poison, <16 x i16> %res0, 0 1906 %res3 = insertvalue { <16 x i16>, <16 x i16> } %res2, <16 x i16> %res1, 1 1907 ret { <16 x i16>, <16 x i16> } %res3 1908} 1909 1910declare <8 x i16> @llvm.x86.avx512.mask.vpshldv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 1911declare <8 x i16> @llvm.x86.avx512.maskz.vpshldv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 1912 1913define <8 x i16>@test_int_x86_avx512_vpshldv_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 1914; CHECK-LABEL: test_int_x86_avx512_vpshldv_w_128: 1915; CHECK: # %bb.0: 1916; CHECK-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x70,0xc2] 1917; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1918 %res = call <8 x i16> @llvm.x86.avx512.mask.vpshldv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 1919 ret <8 x i16> %res 1920} 1921 1922define { <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_vpshldv_w_128(<8 x i16> %x0, <8 x i16> %x1, ptr %x2p, <8 x i16> %x4, i8 %x3) { 1923; X86-LABEL: test_int_x86_avx512_mask_vpshldv_w_128: 1924; X86: # %bb.0: 1925; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1926; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1927; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1928; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1929; X86-NEXT: vpshldvw (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x70,0x00] 1930; X86-NEXT: vpshldvw %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x70,0xda] 1931; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 1932; X86-NEXT: retl # encoding: [0xc3] 1933; 1934; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_128: 1935; X64: # %bb.0: 1936; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1937; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1938; X64-NEXT: vpshldvw (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x70,0x07] 1939; X64-NEXT: vpshldvw %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x70,0xda] 1940; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 1941; X64-NEXT: retq # encoding: [0xc3] 1942 %x2 = load <8 x i16>, ptr %x2p 1943 %res0 = call <8 x i16> @llvm.x86.avx512.mask.vpshldv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 1944 %res1 = call <8 x i16> @llvm.x86.avx512.maskz.vpshldv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x4, i8 %x3) 1945 %res2 = insertvalue { <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0 1946 %res3 = insertvalue { <8 x i16>, <8 x i16> } %res2, <8 x i16> %res1, 1 1947 ret { <8 x i16>, <8 x i16> } %res3 1948} 1949