1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5define <8 x i16> @test_mask_expand_load_w_128(ptr %addr, <8 x i16> %data, i8 %mask) { 6; X86-LABEL: test_mask_expand_load_w_128: 7; X86: # %bb.0: 8; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 10; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 11; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x00] 12; X86-NEXT: retl # encoding: [0xc3] 13; 14; X64-LABEL: test_mask_expand_load_w_128: 15; X64: # %bb.0: 16; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 17; X64-NEXT: vpexpandw (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x07] 18; X64-NEXT: retq # encoding: [0xc3] 19 %1 = bitcast i8 %mask to <8 x i1> 20 %2 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr %addr, <8 x i1> %1, <8 x i16> %data) 21 ret <8 x i16> %2 22} 23 24define <8 x i16> @test_maskz_expand_load_w_128(ptr %addr, i8 %mask) { 25; X86-LABEL: test_maskz_expand_load_w_128: 26; X86: # %bb.0: 27; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 28; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 29; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 30; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0x00] 31; X86-NEXT: retl # encoding: [0xc3] 32; 33; X64-LABEL: test_maskz_expand_load_w_128: 34; X64: # %bb.0: 35; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 36; X64-NEXT: vpexpandw (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0x07] 37; X64-NEXT: retq # encoding: [0xc3] 38 %1 = bitcast i8 %mask to <8 x i1> 39 %2 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr %addr, <8 x i1> %1, <8 x i16> zeroinitializer) 40 ret <8 x i16> %2 41} 42 43define <8 x i16> @test_expand_load_w_128(ptr %addr, <8 x i16> %data) { 44; X86-LABEL: test_expand_load_w_128: 45; X86: # %bb.0: 46; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 47; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 48; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x00] 49; X86-NEXT: retl # encoding: [0xc3] 50; 51; X64-LABEL: test_expand_load_w_128: 52; X64: # %bb.0: 53; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 54; X64-NEXT: vpexpandw (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x07] 55; X64-NEXT: retq # encoding: [0xc3] 56 %1 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr %addr, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> %data) 57 ret <8 x i16> %1 58} 59 60define <8 x i16> @test_expand_w_128(<8 x i16> %data) { 61; CHECK-LABEL: test_expand_w_128: 62; CHECK: # %bb.0: 63; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 64 %1 = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> %data, <8 x i16> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 65 ret <8 x i16> %1 66} 67 68define <8 x i16> @test_mask_expand_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) { 69; X86-LABEL: test_mask_expand_w_128: 70; X86: # %bb.0: 71; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 72; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 73; X86-NEXT: vpexpandw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0xc8] 74; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 75; X86-NEXT: retl # encoding: [0xc3] 76; 77; X64-LABEL: test_mask_expand_w_128: 78; X64: # %bb.0: 79; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 80; X64-NEXT: vpexpandw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0xc8] 81; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 82; X64-NEXT: retq # encoding: [0xc3] 83 %1 = bitcast i8 %mask to <8 x i1> 84 %2 = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> %data, <8 x i16> %passthru, <8 x i1> %1) 85 ret <8 x i16> %2 86} 87 88define <8 x i16> @test_maskz_expand_w_128(<8 x i16> %data, i8 %mask) { 89; X86-LABEL: test_maskz_expand_w_128: 90; X86: # %bb.0: 91; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 92; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 93; X86-NEXT: vpexpandw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0xc0] 94; X86-NEXT: retl # encoding: [0xc3] 95; 96; X64-LABEL: test_maskz_expand_w_128: 97; X64: # %bb.0: 98; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 99; X64-NEXT: vpexpandw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0xc0] 100; X64-NEXT: retq # encoding: [0xc3] 101 %1 = bitcast i8 %mask to <8 x i1> 102 %2 = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> %data, <8 x i16> zeroinitializer, <8 x i1> %1) 103 ret <8 x i16> %2 104} 105 106define <16 x i8> @test_mask_expand_load_b_128(ptr %addr, <16 x i8> %data, i16 %mask) { 107; X86-LABEL: test_mask_expand_load_b_128: 108; X86: # %bb.0: 109; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 110; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 111; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x00] 112; X86-NEXT: retl # encoding: [0xc3] 113; 114; X64-LABEL: test_mask_expand_load_b_128: 115; X64: # %bb.0: 116; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 117; X64-NEXT: vpexpandb (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x07] 118; X64-NEXT: retq # encoding: [0xc3] 119 %1 = bitcast i16 %mask to <16 x i1> 120 %2 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr %addr, <16 x i1> %1, <16 x i8> %data) 121 ret <16 x i8> %2 122} 123 124define <16 x i8> @test_maskz_expand_load_b_128(ptr %addr, i16 %mask) { 125; X86-LABEL: test_maskz_expand_load_b_128: 126; X86: # %bb.0: 127; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 128; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 129; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0x00] 130; X86-NEXT: retl # encoding: [0xc3] 131; 132; X64-LABEL: test_maskz_expand_load_b_128: 133; X64: # %bb.0: 134; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 135; X64-NEXT: vpexpandb (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0x07] 136; X64-NEXT: retq # encoding: [0xc3] 137 %1 = bitcast i16 %mask to <16 x i1> 138 %2 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr %addr, <16 x i1> %1, <16 x i8> zeroinitializer) 139 ret <16 x i8> %2 140} 141 142define <16 x i8> @test_expand_load_b_128(ptr %addr, <16 x i8> %data) { 143; X86-LABEL: test_expand_load_b_128: 144; X86: # %bb.0: 145; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 146; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 147; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x00] 148; X86-NEXT: retl # encoding: [0xc3] 149; 150; X64-LABEL: test_expand_load_b_128: 151; X64: # %bb.0: 152; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 153; X64-NEXT: vpexpandb (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x07] 154; X64-NEXT: retq # encoding: [0xc3] 155 %1 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr %addr, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> %data) 156 ret <16 x i8> %1 157} 158 159define <16 x i8> @test_expand_b_128(<16 x i8> %data) { 160; CHECK-LABEL: test_expand_b_128: 161; CHECK: # %bb.0: 162; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 163 %1 = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> %data, <16 x i8> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 164 ret <16 x i8> %1 165} 166 167define <16 x i8> @test_mask_expand_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) { 168; X86-LABEL: test_mask_expand_b_128: 169; X86: # %bb.0: 170; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 171; X86-NEXT: vpexpandb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0xc8] 172; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 173; X86-NEXT: retl # encoding: [0xc3] 174; 175; X64-LABEL: test_mask_expand_b_128: 176; X64: # %bb.0: 177; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 178; X64-NEXT: vpexpandb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0xc8] 179; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 180; X64-NEXT: retq # encoding: [0xc3] 181 %1 = bitcast i16 %mask to <16 x i1> 182 %2 = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> %data, <16 x i8> %passthru, <16 x i1> %1) 183 ret <16 x i8> %2 184} 185 186define <16 x i8> @test_maskz_expand_b_128(<16 x i8> %data, i16 %mask) { 187; X86-LABEL: test_maskz_expand_b_128: 188; X86: # %bb.0: 189; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 190; X86-NEXT: vpexpandb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0xc0] 191; X86-NEXT: retl # encoding: [0xc3] 192; 193; X64-LABEL: test_maskz_expand_b_128: 194; X64: # %bb.0: 195; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 196; X64-NEXT: vpexpandb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0xc0] 197; X64-NEXT: retq # encoding: [0xc3] 198 %1 = bitcast i16 %mask to <16 x i1> 199 %2 = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> %data, <16 x i8> zeroinitializer, <16 x i1> %1) 200 ret <16 x i8> %2 201} 202 203define void @test_mask_compress_store_w_128(ptr %addr, <8 x i16> %data, i8 %mask) { 204; X86-LABEL: test_mask_compress_store_w_128: 205; X86: # %bb.0: 206; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 207; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 208; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 209; X86-NEXT: vpcompressw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x00] 210; X86-NEXT: retl # encoding: [0xc3] 211; 212; X64-LABEL: test_mask_compress_store_w_128: 213; X64: # %bb.0: 214; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 215; X64-NEXT: vpcompressw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x07] 216; X64-NEXT: retq # encoding: [0xc3] 217 %1 = bitcast i8 %mask to <8 x i1> 218 call void @llvm.masked.compressstore.v8i16(<8 x i16> %data, ptr %addr, <8 x i1> %1) 219 ret void 220} 221 222define void @test_compress_store_w_128(ptr %addr, <8 x i16> %data) { 223; X86-LABEL: test_compress_store_w_128: 224; X86: # %bb.0: 225; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 226; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 227; X86-NEXT: vpcompressw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x00] 228; X86-NEXT: retl # encoding: [0xc3] 229; 230; X64-LABEL: test_compress_store_w_128: 231; X64: # %bb.0: 232; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 233; X64-NEXT: vpcompressw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x07] 234; X64-NEXT: retq # encoding: [0xc3] 235 call void @llvm.masked.compressstore.v8i16(<8 x i16> %data, ptr %addr, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 236 ret void 237} 238 239define <8 x i16> @test_mask_compress_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) { 240; X86-LABEL: test_mask_compress_w_128: 241; X86: # %bb.0: 242; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 243; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 244; X86-NEXT: vpcompressw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0xc1] 245; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 246; X86-NEXT: retl # encoding: [0xc3] 247; 248; X64-LABEL: test_mask_compress_w_128: 249; X64: # %bb.0: 250; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 251; X64-NEXT: vpcompressw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0xc1] 252; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 253; X64-NEXT: retq # encoding: [0xc3] 254 %1 = bitcast i8 %mask to <8 x i1> 255 %2 = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16> %data, <8 x i16> %passthru, <8 x i1> %1) 256 ret <8 x i16> %2 257} 258 259define <8 x i16> @test_maskz_compress_w_128(<8 x i16> %data, i8 %mask) { 260; X86-LABEL: test_maskz_compress_w_128: 261; X86: # %bb.0: 262; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 263; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 264; X86-NEXT: vpcompressw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x63,0xc0] 265; X86-NEXT: retl # encoding: [0xc3] 266; 267; X64-LABEL: test_maskz_compress_w_128: 268; X64: # %bb.0: 269; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 270; X64-NEXT: vpcompressw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x63,0xc0] 271; X64-NEXT: retq # encoding: [0xc3] 272 %1 = bitcast i8 %mask to <8 x i1> 273 %2 = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16> %data, <8 x i16> zeroinitializer, <8 x i1> %1) 274 ret <8 x i16> %2 275} 276 277define <8 x i16> @test_compress_w_128(<8 x i16> %data) { 278; CHECK-LABEL: test_compress_w_128: 279; CHECK: # %bb.0: 280; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 281 %1 = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16> %data, <8 x i16> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 282 ret <8 x i16> %1 283} 284 285define void @test_mask_compress_store_b_128(ptr %addr, <16 x i8> %data, i16 %mask) { 286; X86-LABEL: test_mask_compress_store_b_128: 287; X86: # %bb.0: 288; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 289; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 290; X86-NEXT: vpcompressb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x00] 291; X86-NEXT: retl # encoding: [0xc3] 292; 293; X64-LABEL: test_mask_compress_store_b_128: 294; X64: # %bb.0: 295; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 296; X64-NEXT: vpcompressb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x07] 297; X64-NEXT: retq # encoding: [0xc3] 298 %1 = bitcast i16 %mask to <16 x i1> 299 call void @llvm.masked.compressstore.v16i8(<16 x i8> %data, ptr %addr, <16 x i1> %1) 300 ret void 301} 302 303define void @test_compress_store_b_128(ptr %addr, <16 x i8> %data) { 304; X86-LABEL: test_compress_store_b_128: 305; X86: # %bb.0: 306; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 307; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 308; X86-NEXT: vpcompressb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x00] 309; X86-NEXT: retl # encoding: [0xc3] 310; 311; X64-LABEL: test_compress_store_b_128: 312; X64: # %bb.0: 313; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 314; X64-NEXT: vpcompressb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x07] 315; X64-NEXT: retq # encoding: [0xc3] 316 call void @llvm.masked.compressstore.v16i8(<16 x i8> %data, ptr %addr, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 317 ret void 318} 319 320define <16 x i8> @test_mask_compress_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) { 321; X86-LABEL: test_mask_compress_b_128: 322; X86: # %bb.0: 323; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 324; X86-NEXT: vpcompressb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0xc1] 325; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 326; X86-NEXT: retl # encoding: [0xc3] 327; 328; X64-LABEL: test_mask_compress_b_128: 329; X64: # %bb.0: 330; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 331; X64-NEXT: vpcompressb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0xc1] 332; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 333; X64-NEXT: retq # encoding: [0xc3] 334 %1 = bitcast i16 %mask to <16 x i1> 335 %2 = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8> %data, <16 x i8> %passthru, <16 x i1> %1) 336 ret <16 x i8> %2 337} 338 339define <16 x i8> @test_maskz_compress_b_128(<16 x i8> %data, i16 %mask) { 340; X86-LABEL: test_maskz_compress_b_128: 341; X86: # %bb.0: 342; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 343; X86-NEXT: vpcompressb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x63,0xc0] 344; X86-NEXT: retl # encoding: [0xc3] 345; 346; X64-LABEL: test_maskz_compress_b_128: 347; X64: # %bb.0: 348; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 349; X64-NEXT: vpcompressb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x63,0xc0] 350; X64-NEXT: retq # encoding: [0xc3] 351 %1 = bitcast i16 %mask to <16 x i1> 352 %2 = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8> %data, <16 x i8> zeroinitializer, <16 x i1> %1) 353 ret <16 x i8> %2 354} 355 356define <16 x i8> @test_compress_b_128(<16 x i8> %data) { 357; CHECK-LABEL: test_compress_b_128: 358; CHECK: # %bb.0: 359; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 360 %1 = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8> %data, <16 x i8> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 361 ret <16 x i8> %1 362} 363 364define <16 x i16> @test_mask_expand_load_w_256(ptr %addr, <16 x i16> %data, i16 %mask) { 365; X86-LABEL: test_mask_expand_load_w_256: 366; X86: # %bb.0: 367; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 368; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 369; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x00] 370; X86-NEXT: retl # encoding: [0xc3] 371; 372; X64-LABEL: test_mask_expand_load_w_256: 373; X64: # %bb.0: 374; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 375; X64-NEXT: vpexpandw (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x07] 376; X64-NEXT: retq # encoding: [0xc3] 377 %1 = bitcast i16 %mask to <16 x i1> 378 %2 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr %addr, <16 x i1> %1, <16 x i16> %data) 379 ret <16 x i16> %2 380} 381 382define <16 x i16> @test_maskz_expand_load_w_256(ptr %addr, i16 %mask) { 383; X86-LABEL: test_maskz_expand_load_w_256: 384; X86: # %bb.0: 385; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 386; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 387; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0x00] 388; X86-NEXT: retl # encoding: [0xc3] 389; 390; X64-LABEL: test_maskz_expand_load_w_256: 391; X64: # %bb.0: 392; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 393; X64-NEXT: vpexpandw (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0x07] 394; X64-NEXT: retq # encoding: [0xc3] 395 %1 = bitcast i16 %mask to <16 x i1> 396 %2 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr %addr, <16 x i1> %1, <16 x i16> zeroinitializer) 397 ret <16 x i16> %2 398} 399 400define <16 x i16> @test_expand_load_w_256(ptr %addr, <16 x i16> %data) { 401; X86-LABEL: test_expand_load_w_256: 402; X86: # %bb.0: 403; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 404; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 405; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x00] 406; X86-NEXT: retl # encoding: [0xc3] 407; 408; X64-LABEL: test_expand_load_w_256: 409; X64: # %bb.0: 410; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 411; X64-NEXT: vpexpandw (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x07] 412; X64-NEXT: retq # encoding: [0xc3] 413 %1 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr %addr, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i16> %data) 414 ret <16 x i16> %1 415} 416 417define <16 x i16> @test_expand_w_256(<16 x i16> %data) { 418; CHECK-LABEL: test_expand_w_256: 419; CHECK: # %bb.0: 420; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 421 %1 = call <16 x i16> @llvm.x86.avx512.mask.expand.v16i16(<16 x i16> %data, <16 x i16> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 422 ret <16 x i16> %1 423} 424 425define <16 x i16> @test_mask_expand_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) { 426; X86-LABEL: test_mask_expand_w_256: 427; X86: # %bb.0: 428; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 429; X86-NEXT: vpexpandw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0xc8] 430; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 431; X86-NEXT: retl # encoding: [0xc3] 432; 433; X64-LABEL: test_mask_expand_w_256: 434; X64: # %bb.0: 435; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 436; X64-NEXT: vpexpandw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0xc8] 437; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 438; X64-NEXT: retq # encoding: [0xc3] 439 %1 = bitcast i16 %mask to <16 x i1> 440 %2 = call <16 x i16> @llvm.x86.avx512.mask.expand.v16i16(<16 x i16> %data, <16 x i16> %passthru, <16 x i1> %1) 441 ret <16 x i16> %2 442} 443 444define <16 x i16> @test_maskz_expand_w_256(<16 x i16> %data, i16 %mask) { 445; X86-LABEL: test_maskz_expand_w_256: 446; X86: # %bb.0: 447; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 448; X86-NEXT: vpexpandw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0xc0] 449; X86-NEXT: retl # encoding: [0xc3] 450; 451; X64-LABEL: test_maskz_expand_w_256: 452; X64: # %bb.0: 453; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 454; X64-NEXT: vpexpandw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0xc0] 455; X64-NEXT: retq # encoding: [0xc3] 456 %1 = bitcast i16 %mask to <16 x i1> 457 %2 = call <16 x i16> @llvm.x86.avx512.mask.expand.v16i16(<16 x i16> %data, <16 x i16> zeroinitializer, <16 x i1> %1) 458 ret <16 x i16> %2 459} 460 461define <32 x i8> @test_mask_expand_load_b_256(ptr %addr, <32 x i8> %data, i32 %mask) { 462; X86-LABEL: test_mask_expand_load_b_256: 463; X86: # %bb.0: 464; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 465; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 466; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x00] 467; X86-NEXT: retl # encoding: [0xc3] 468; 469; X64-LABEL: test_mask_expand_load_b_256: 470; X64: # %bb.0: 471; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 472; X64-NEXT: vpexpandb (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x07] 473; X64-NEXT: retq # encoding: [0xc3] 474 %1 = bitcast i32 %mask to <32 x i1> 475 %2 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr %addr, <32 x i1> %1, <32 x i8> %data) 476 ret <32 x i8> %2 477} 478 479define <32 x i8> @test_maskz_expand_load_b_256(ptr %addr, i32 %mask) { 480; X86-LABEL: test_maskz_expand_load_b_256: 481; X86: # %bb.0: 482; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 483; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 484; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0x00] 485; X86-NEXT: retl # encoding: [0xc3] 486; 487; X64-LABEL: test_maskz_expand_load_b_256: 488; X64: # %bb.0: 489; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 490; X64-NEXT: vpexpandb (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0x07] 491; X64-NEXT: retq # encoding: [0xc3] 492 %1 = bitcast i32 %mask to <32 x i1> 493 %2 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr %addr, <32 x i1> %1, <32 x i8> zeroinitializer) 494 ret <32 x i8> %2 495} 496 497define <32 x i8> @test_expand_load_b_256(ptr %addr, <32 x i8> %data) { 498; X86-LABEL: test_expand_load_b_256: 499; X86: # %bb.0: 500; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 501; X86-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 502; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x00] 503; X86-NEXT: retl # encoding: [0xc3] 504; 505; X64-LABEL: test_expand_load_b_256: 506; X64: # %bb.0: 507; X64-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 508; X64-NEXT: vpexpandb (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x07] 509; X64-NEXT: retq # encoding: [0xc3] 510 %1 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr %addr, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i8> %data) 511 ret <32 x i8> %1 512} 513 514define <32 x i8> @test_expand_b_256(<32 x i8> %data) { 515; CHECK-LABEL: test_expand_b_256: 516; CHECK: # %bb.0: 517; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 518 %1 = call <32 x i8> @llvm.x86.avx512.mask.expand.v32i8(<32 x i8> %data, <32 x i8> undef, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 519 ret <32 x i8> %1 520} 521 522define <32 x i8> @test_mask_expand_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) { 523; X86-LABEL: test_mask_expand_b_256: 524; X86: # %bb.0: 525; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 526; X86-NEXT: vpexpandb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0xc8] 527; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 528; X86-NEXT: retl # encoding: [0xc3] 529; 530; X64-LABEL: test_mask_expand_b_256: 531; X64: # %bb.0: 532; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 533; X64-NEXT: vpexpandb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0xc8] 534; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 535; X64-NEXT: retq # encoding: [0xc3] 536 %1 = bitcast i32 %mask to <32 x i1> 537 %2 = call <32 x i8> @llvm.x86.avx512.mask.expand.v32i8(<32 x i8> %data, <32 x i8> %passthru, <32 x i1> %1) 538 ret <32 x i8> %2 539} 540 541define <32 x i8> @test_maskz_expand_b_256(<32 x i8> %data, i32 %mask) { 542; X86-LABEL: test_maskz_expand_b_256: 543; X86: # %bb.0: 544; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 545; X86-NEXT: vpexpandb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0xc0] 546; X86-NEXT: retl # encoding: [0xc3] 547; 548; X64-LABEL: test_maskz_expand_b_256: 549; X64: # %bb.0: 550; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 551; X64-NEXT: vpexpandb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0xc0] 552; X64-NEXT: retq # encoding: [0xc3] 553 %1 = bitcast i32 %mask to <32 x i1> 554 %2 = call <32 x i8> @llvm.x86.avx512.mask.expand.v32i8(<32 x i8> %data, <32 x i8> zeroinitializer, <32 x i1> %1) 555 ret <32 x i8> %2 556} 557 558define void @test_mask_compress_store_w_256(ptr %addr, <16 x i16> %data, i16 %mask) { 559; X86-LABEL: test_mask_compress_store_w_256: 560; X86: # %bb.0: 561; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 562; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 563; X86-NEXT: vpcompressw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x00] 564; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 565; X86-NEXT: retl # encoding: [0xc3] 566; 567; X64-LABEL: test_mask_compress_store_w_256: 568; X64: # %bb.0: 569; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 570; X64-NEXT: vpcompressw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x07] 571; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 572; X64-NEXT: retq # encoding: [0xc3] 573 %1 = bitcast i16 %mask to <16 x i1> 574 call void @llvm.masked.compressstore.v16i16(<16 x i16> %data, ptr %addr, <16 x i1> %1) 575 ret void 576} 577 578define void @test_compress_store_w_256(ptr %addr, <16 x i16> %data) { 579; X86-LABEL: test_compress_store_w_256: 580; X86: # %bb.0: 581; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 582; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 583; X86-NEXT: vpcompressw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x00] 584; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 585; X86-NEXT: retl # encoding: [0xc3] 586; 587; X64-LABEL: test_compress_store_w_256: 588; X64: # %bb.0: 589; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 590; X64-NEXT: vpcompressw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x07] 591; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 592; X64-NEXT: retq # encoding: [0xc3] 593 call void @llvm.masked.compressstore.v16i16(<16 x i16> %data, ptr %addr, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 594 ret void 595} 596 597define <16 x i16> @test_mask_compress_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) { 598; X86-LABEL: test_mask_compress_w_256: 599; X86: # %bb.0: 600; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 601; X86-NEXT: vpcompressw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0xc1] 602; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 603; X86-NEXT: retl # encoding: [0xc3] 604; 605; X64-LABEL: test_mask_compress_w_256: 606; X64: # %bb.0: 607; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 608; X64-NEXT: vpcompressw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0xc1] 609; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 610; X64-NEXT: retq # encoding: [0xc3] 611 %1 = bitcast i16 %mask to <16 x i1> 612 %2 = call <16 x i16> @llvm.x86.avx512.mask.compress.v16i16(<16 x i16> %data, <16 x i16> %passthru, <16 x i1> %1) 613 ret <16 x i16> %2 614} 615 616define <16 x i16> @test_maskz_compress_w_256(<16 x i16> %data, i16 %mask) { 617; X86-LABEL: test_maskz_compress_w_256: 618; X86: # %bb.0: 619; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 620; X86-NEXT: vpcompressw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x63,0xc0] 621; X86-NEXT: retl # encoding: [0xc3] 622; 623; X64-LABEL: test_maskz_compress_w_256: 624; X64: # %bb.0: 625; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 626; X64-NEXT: vpcompressw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x63,0xc0] 627; X64-NEXT: retq # encoding: [0xc3] 628 %1 = bitcast i16 %mask to <16 x i1> 629 %2 = call <16 x i16> @llvm.x86.avx512.mask.compress.v16i16(<16 x i16> %data, <16 x i16> zeroinitializer, <16 x i1> %1) 630 ret <16 x i16> %2 631} 632 633define <16 x i16> @test_compress_w_256(<16 x i16> %data) { 634; CHECK-LABEL: test_compress_w_256: 635; CHECK: # %bb.0: 636; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 637 %1 = call <16 x i16> @llvm.x86.avx512.mask.compress.v16i16(<16 x i16> %data, <16 x i16> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 638 ret <16 x i16> %1 639} 640 641define void @test_mask_compress_store_b_256(ptr %addr, <32 x i8> %data, i32 %mask) { 642; X86-LABEL: test_mask_compress_store_b_256: 643; X86: # %bb.0: 644; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 645; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 646; X86-NEXT: vpcompressb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x00] 647; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 648; X86-NEXT: retl # encoding: [0xc3] 649; 650; X64-LABEL: test_mask_compress_store_b_256: 651; X64: # %bb.0: 652; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 653; X64-NEXT: vpcompressb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x07] 654; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 655; X64-NEXT: retq # encoding: [0xc3] 656 %1 = bitcast i32 %mask to <32 x i1> 657 call void @llvm.masked.compressstore.v32i8(<32 x i8> %data, ptr %addr, <32 x i1> %1) 658 ret void 659} 660 661define void @test_compress_store_b_256(ptr %addr, <32 x i8> %data) { 662; X86-LABEL: test_compress_store_b_256: 663; X86: # %bb.0: 664; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 665; X86-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 666; X86-NEXT: vpcompressb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x00] 667; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 668; X86-NEXT: retl # encoding: [0xc3] 669; 670; X64-LABEL: test_compress_store_b_256: 671; X64: # %bb.0: 672; X64-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 673; X64-NEXT: vpcompressb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x07] 674; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 675; X64-NEXT: retq # encoding: [0xc3] 676 call void @llvm.masked.compressstore.v32i8(<32 x i8> %data, ptr %addr, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 677 ret void 678} 679 680define <32 x i8> @test_mask_compress_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) { 681; X86-LABEL: test_mask_compress_b_256: 682; X86: # %bb.0: 683; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 684; X86-NEXT: vpcompressb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0xc1] 685; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 686; X86-NEXT: retl # encoding: [0xc3] 687; 688; X64-LABEL: test_mask_compress_b_256: 689; X64: # %bb.0: 690; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 691; X64-NEXT: vpcompressb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0xc1] 692; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 693; X64-NEXT: retq # encoding: [0xc3] 694 %1 = bitcast i32 %mask to <32 x i1> 695 %2 = call <32 x i8> @llvm.x86.avx512.mask.compress.v32i8(<32 x i8> %data, <32 x i8> %passthru, <32 x i1> %1) 696 ret <32 x i8> %2 697} 698 699define <32 x i8> @test_maskz_compress_b_256(<32 x i8> %data, i32 %mask) { 700; X86-LABEL: test_maskz_compress_b_256: 701; X86: # %bb.0: 702; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 703; X86-NEXT: vpcompressb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x63,0xc0] 704; X86-NEXT: retl # encoding: [0xc3] 705; 706; X64-LABEL: test_maskz_compress_b_256: 707; X64: # %bb.0: 708; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 709; X64-NEXT: vpcompressb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x63,0xc0] 710; X64-NEXT: retq # encoding: [0xc3] 711 %1 = bitcast i32 %mask to <32 x i1> 712 %2 = call <32 x i8> @llvm.x86.avx512.mask.compress.v32i8(<32 x i8> %data, <32 x i8> zeroinitializer, <32 x i1> %1) 713 ret <32 x i8> %2 714} 715 716define <32 x i8> @test_compress_b_256(<32 x i8> %data) { 717; CHECK-LABEL: test_compress_b_256: 718; CHECK: # %bb.0: 719; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 720 %1 = call <32 x i8> @llvm.x86.avx512.mask.compress.v32i8(<32 x i8> %data, <32 x i8> undef, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 721 ret <32 x i8> %1 722} 723 724define { <4 x i32>, <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpshld_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) { 725; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_128: 726; X86: # %bb.0: 727; X86-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2] 728; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 729; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 730; X86-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xe1,0x16] 731; X86-NEXT: vpshldd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x17] 732; X86-NEXT: vpshldd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xd1,0x18] 733; X86-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4] 734; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 735; X86-NEXT: retl # encoding: [0xc3] 736; 737; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_128: 738; X64: # %bb.0: 739; X64-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2] 740; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 741; X64-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xe1,0x16] 742; X64-NEXT: vpshldd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x17] 743; X64-NEXT: vpshldd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xd1,0x18] 744; X64-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4] 745; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 746; X64-NEXT: retq # encoding: [0xc3] 747 %1 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> <i32 22, i32 22, i32 22, i32 22>) 748 %2 = bitcast i8 %x4 to <8 x i1> 749 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 750 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x3 751 %4 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> <i32 23, i32 23, i32 23, i32 23>) 752 %5 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> <i32 24, i32 24, i32 24, i32 24>) 753 %6 = bitcast i8 %x4 to <8 x i1> 754 %extract = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 755 %7 = select <4 x i1> %extract, <4 x i32> %5, <4 x i32> zeroinitializer 756 %res3 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } poison, <4 x i32> %3, 0 757 %res4 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } %res3, <4 x i32> %4, 1 758 %res5 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } %res4, <4 x i32> %7, 2 759 ret { <4 x i32>, <4 x i32>, <4 x i32> } %res5 760} 761 762define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpshld_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) { 763; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_256: 764; X86: # %bb.0: 765; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 766; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 767; X86-NEXT: vpshldd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xd1,0x16] 768; X86-NEXT: vpshldd $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xc9,0x17] 769; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 770; X86-NEXT: retl # encoding: [0xc3] 771; 772; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_256: 773; X64: # %bb.0: 774; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 775; X64-NEXT: vpshldd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xd1,0x16] 776; X64-NEXT: vpshldd $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xc9,0x17] 777; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 778; X64-NEXT: retq # encoding: [0xc3] 779 %1 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> <i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22>) 780 %2 = bitcast i8 %x4 to <8 x i1> 781 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x3 782 %4 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>) 783 %5 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %3, 0 784 %6 = insertvalue { <8 x i32>, <8 x i32> } %5, <8 x i32> %4, 1 785 ret { <8 x i32>, <8 x i32> } %6 786} 787 788define { <2 x i64>, <2 x i64> } @test_int_x86_avx512_mask_vpshld_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) { 789; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_128: 790; X86: # %bb.0: 791; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 792; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 793; X86-NEXT: vpshldq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xd1,0x16] 794; X86-NEXT: vpshldq $23, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xc9,0x17] 795; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 796; X86-NEXT: retl # encoding: [0xc3] 797; 798; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_128: 799; X64: # %bb.0: 800; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 801; X64-NEXT: vpshldq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xd1,0x16] 802; X64-NEXT: vpshldq $23, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xc9,0x17] 803; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 804; X64-NEXT: retq # encoding: [0xc3] 805 %1 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> <i64 22, i64 22>) 806 %2 = bitcast i8 %x4 to <8 x i1> 807 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1> 808 %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x3 809 %4 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> <i64 23, i64 23>) 810 %5 = insertvalue { <2 x i64>, <2 x i64> } poison, <2 x i64> %3, 0 811 %6 = insertvalue { <2 x i64>, <2 x i64> } %5, <2 x i64> %4, 1 812 ret { <2 x i64>, <2 x i64> } %6 813} 814 815define { <4 x i64>, <4 x i64> } @test_int_x86_avx512_mask_vpshld_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) { 816; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_256: 817; X86: # %bb.0: 818; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 819; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 820; X86-NEXT: vpshldq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xd1,0x16] 821; X86-NEXT: vpshldq $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xc9,0x17] 822; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 823; X86-NEXT: retl # encoding: [0xc3] 824; 825; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_256: 826; X64: # %bb.0: 827; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 828; X64-NEXT: vpshldq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xd1,0x16] 829; X64-NEXT: vpshldq $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xc9,0x17] 830; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 831; X64-NEXT: retq # encoding: [0xc3] 832 %1 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> <i64 22, i64 22, i64 22, i64 22>) 833 %2 = bitcast i8 %x4 to <8 x i1> 834 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 835 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x3 836 %4 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> <i64 23, i64 23, i64 23, i64 23>) 837 %5 = insertvalue { <4 x i64>, <4 x i64> } poison, <4 x i64> %3, 0 838 %6 = insertvalue { <4 x i64>, <4 x i64> } %5, <4 x i64> %4, 1 839 ret { <4 x i64>, <4 x i64> } %6 840} 841 842define { <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_vpshld_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) { 843; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_128: 844; X86: # %bb.0: 845; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 846; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 847; X86-NEXT: vpshldw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x06] 848; X86-NEXT: vpshldw $7, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xc9,0x07] 849; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 850; X86-NEXT: retl # encoding: [0xc3] 851; 852; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_128: 853; X64: # %bb.0: 854; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 855; X64-NEXT: vpshldw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x06] 856; X64-NEXT: vpshldw $7, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xc9,0x07] 857; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 858; X64-NEXT: retq # encoding: [0xc3] 859 %1 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>) 860 %2 = bitcast i8 %x4 to <8 x i1> 861 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x3 862 %4 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>) 863 %5 = insertvalue { <8 x i16>, <8 x i16> } poison, <8 x i16> %3, 0 864 %6 = insertvalue { <8 x i16>, <8 x i16> } %5, <8 x i16> %4, 1 865 ret { <8 x i16>, <8 x i16> } %6 866} 867 868define { <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_vpshld_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) { 869; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_256: 870; X86: # %bb.0: 871; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 872; X86-NEXT: vpshldw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x06] 873; X86-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xc9,0x07] 874; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 875; X86-NEXT: retl # encoding: [0xc3] 876; 877; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_256: 878; X64: # %bb.0: 879; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 880; X64-NEXT: vpshldw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x06] 881; X64-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xc9,0x07] 882; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 883; X64-NEXT: retq # encoding: [0xc3] 884 %1 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>) 885 %2 = bitcast i16 %x4 to <16 x i1> 886 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x3 887 %4 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>) 888 %5 = insertvalue { <16 x i16>, <16 x i16> } poison, <16 x i16> %3, 0 889 %6 = insertvalue { <16 x i16>, <16 x i16> } %5, <16 x i16> %4, 1 890 ret { <16 x i16>, <16 x i16> } %6 891} 892 893define { <4 x i32>, <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpshrd_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) { 894; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_128: 895; X86: # %bb.0: 896; X86-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2] 897; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 898; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 899; X86-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xe1,0x16] 900; X86-NEXT: vpshrdd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x17] 901; X86-NEXT: vpshrdd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xd1,0x18] 902; X86-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4] 903; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 904; X86-NEXT: retl # encoding: [0xc3] 905; 906; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_128: 907; X64: # %bb.0: 908; X64-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2] 909; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 910; X64-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xe1,0x16] 911; X64-NEXT: vpshrdd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x17] 912; X64-NEXT: vpshrdd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xd1,0x18] 913; X64-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4] 914; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 915; X64-NEXT: retq # encoding: [0xc3] 916 %1 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> <i32 22, i32 22, i32 22, i32 22>) 917 %2 = bitcast i8 %x4 to <8 x i1> 918 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 919 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x3 920 %4 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> <i32 23, i32 23, i32 23, i32 23>) 921 %5 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> <i32 24, i32 24, i32 24, i32 24>) 922 %6 = bitcast i8 %x4 to <8 x i1> 923 %extract = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 924 %7 = select <4 x i1> %extract, <4 x i32> %5, <4 x i32> zeroinitializer 925 %res3 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } poison, <4 x i32> %3, 0 926 %res4 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } %res3, <4 x i32> %4, 1 927 %res5 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } %res4, <4 x i32> %7, 2 928 ret { <4 x i32>, <4 x i32>, <4 x i32> } %res5 929} 930 931define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpshrd_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) { 932; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_256: 933; X86: # %bb.0: 934; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 935; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 936; X86-NEXT: vpshrdd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xd1,0x16] 937; X86-NEXT: vpshrdd $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xc9,0x17] 938; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 939; X86-NEXT: retl # encoding: [0xc3] 940; 941; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_256: 942; X64: # %bb.0: 943; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 944; X64-NEXT: vpshrdd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xd1,0x16] 945; X64-NEXT: vpshrdd $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xc9,0x17] 946; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 947; X64-NEXT: retq # encoding: [0xc3] 948 %1 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> <i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22>) 949 %2 = bitcast i8 %x4 to <8 x i1> 950 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x3 951 %4 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>) 952 %5 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %3, 0 953 %6 = insertvalue { <8 x i32>, <8 x i32> } %5, <8 x i32> %4, 1 954 ret { <8 x i32>, <8 x i32> } %6 955} 956 957define { <2 x i64>, <2 x i64> } @test_int_x86_avx512_mask_vpshrd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) { 958; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_128: 959; X86: # %bb.0: 960; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 961; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 962; X86-NEXT: vpshrdq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xd1,0x16] 963; X86-NEXT: vpshrdq $23, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xc9,0x17] 964; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 965; X86-NEXT: retl # encoding: [0xc3] 966; 967; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_128: 968; X64: # %bb.0: 969; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 970; X64-NEXT: vpshrdq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xd1,0x16] 971; X64-NEXT: vpshrdq $23, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xc9,0x17] 972; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 973; X64-NEXT: retq # encoding: [0xc3] 974 %1 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> <i64 22, i64 22>) 975 %2 = bitcast i8 %x4 to <8 x i1> 976 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1> 977 %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x3 978 %4 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> <i64 23, i64 23>) 979 %5 = insertvalue { <2 x i64>, <2 x i64> } poison, <2 x i64> %3, 0 980 %6 = insertvalue { <2 x i64>, <2 x i64> } %5, <2 x i64> %4, 1 981 ret { <2 x i64>, <2 x i64> } %6 982} 983 984define { <4 x i64>, <4 x i64> } @test_int_x86_avx512_mask_vpshrd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) { 985; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_256: 986; X86: # %bb.0: 987; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 988; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 989; X86-NEXT: vpshrdq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xd1,0x16] 990; X86-NEXT: vpshrdq $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xc9,0x17] 991; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 992; X86-NEXT: retl # encoding: [0xc3] 993; 994; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_256: 995; X64: # %bb.0: 996; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 997; X64-NEXT: vpshrdq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xd1,0x16] 998; X64-NEXT: vpshrdq $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xc9,0x17] 999; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1000; X64-NEXT: retq # encoding: [0xc3] 1001 %1 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> <i64 22, i64 22, i64 22, i64 22>) 1002 %2 = bitcast i8 %x4 to <8 x i1> 1003 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1004 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x3 1005 %4 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> <i64 23, i64 23, i64 23, i64 23>) 1006 %5 = insertvalue { <4 x i64>, <4 x i64> } poison, <4 x i64> %3, 0 1007 %6 = insertvalue { <4 x i64>, <4 x i64> } %5, <4 x i64> %4, 1 1008 ret { <4 x i64>, <4 x i64> } %6 1009} 1010 1011define { <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_vpshrd_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) { 1012; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_128: 1013; X86: # %bb.0: 1014; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1015; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1016; X86-NEXT: vpshrdw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x06] 1017; X86-NEXT: vpshrdw $7, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xc9,0x07] 1018; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1019; X86-NEXT: retl # encoding: [0xc3] 1020; 1021; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_128: 1022; X64: # %bb.0: 1023; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1024; X64-NEXT: vpshrdw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x06] 1025; X64-NEXT: vpshrdw $7, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xc9,0x07] 1026; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1027; X64-NEXT: retq # encoding: [0xc3] 1028 %1 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>) 1029 %2 = bitcast i8 %x4 to <8 x i1> 1030 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x3 1031 %4 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>) 1032 %5 = insertvalue { <8 x i16>, <8 x i16> } poison, <8 x i16> %3, 0 1033 %6 = insertvalue { <8 x i16>, <8 x i16> } %5, <8 x i16> %4, 1 1034 ret { <8 x i16>, <8 x i16> } %6 1035} 1036 1037define { <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_vpshrd_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) { 1038; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_256: 1039; X86: # %bb.0: 1040; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1041; X86-NEXT: vpshrdw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x06] 1042; X86-NEXT: vpshrdw $7, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xc9,0x07] 1043; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1044; X86-NEXT: retl # encoding: [0xc3] 1045; 1046; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_256: 1047; X64: # %bb.0: 1048; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1049; X64-NEXT: vpshrdw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x06] 1050; X64-NEXT: vpshrdw $7, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xc9,0x07] 1051; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1052; X64-NEXT: retq # encoding: [0xc3] 1053 %1 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>) 1054 %2 = bitcast i16 %x4 to <16 x i1> 1055 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x3 1056 %4 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>) 1057 %5 = insertvalue { <16 x i16>, <16 x i16> } poison, <16 x i16> %3, 0 1058 %6 = insertvalue { <16 x i16>, <16 x i16> } %5, <16 x i16> %4, 1 1059 ret { <16 x i16>, <16 x i16> } %6 1060} 1061 1062define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpshrdv_d_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4, i8 %x3) { 1063; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_d_256: 1064; X86: # %bb.0: 1065; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1066; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1067; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1068; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1069; X86-NEXT: vpshrdvd (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x73,0x00] 1070; X86-NEXT: vpshrdvd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x73,0xda] 1071; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 1072; X86-NEXT: retl # encoding: [0xc3] 1073; 1074; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_256: 1075; X64: # %bb.0: 1076; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1077; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1078; X64-NEXT: vpshrdvd (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x73,0x07] 1079; X64-NEXT: vpshrdvd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x73,0xda] 1080; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 1081; X64-NEXT: retq # encoding: [0xc3] 1082 %x2 = load <8 x i32>, ptr %x2p 1083 %1 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> %x2) 1084 %2 = bitcast i8 %x3 to <8 x i1> 1085 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x0 1086 %4 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> %x4) 1087 %5 = bitcast i8 %x3 to <8 x i1> 1088 %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer 1089 %res3 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %3, 0 1090 %res4 = insertvalue { <8 x i32>, <8 x i32> } %res3, <8 x i32> %6, 1 1091 ret { <8 x i32>, <8 x i32> } %res4 1092} 1093 1094define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpshrdv_d_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4, i8 %x3) { 1095; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_d_128: 1096; X86: # %bb.0: 1097; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1098; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1099; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1100; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1101; X86-NEXT: vpshrdvd (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x73,0x00] 1102; X86-NEXT: vpshrdvd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x73,0xda] 1103; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 1104; X86-NEXT: retl # encoding: [0xc3] 1105; 1106; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_128: 1107; X64: # %bb.0: 1108; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1109; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1110; X64-NEXT: vpshrdvd (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x73,0x07] 1111; X64-NEXT: vpshrdvd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x73,0xda] 1112; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 1113; X64-NEXT: retq # encoding: [0xc3] 1114 %x2 = load <4 x i32>, ptr %x2p 1115 %1 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> %x2) 1116 %2 = bitcast i8 %x3 to <8 x i1> 1117 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1118 %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x0 1119 %4 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> %x4) 1120 %5 = bitcast i8 %x3 to <8 x i1> 1121 %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1122 %6 = select <4 x i1> %extract1, <4 x i32> %4, <4 x i32> zeroinitializer 1123 %res3 = insertvalue { <4 x i32>, <4 x i32> } poison, <4 x i32> %3, 0 1124 %res4 = insertvalue { <4 x i32>, <4 x i32> } %res3, <4 x i32> %6, 1 1125 ret { <4 x i32>, <4 x i32> } %res4 1126} 1127 1128define { <4 x i64>, <4 x i64> } @test_int_x86_avx512_mask_vpshrdv_q_256(<4 x i64> %x0, <4 x i64> %x1, ptr %x2p, <4 x i64> %x4, i8 %x3) { 1129; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_q_256: 1130; X86: # %bb.0: 1131; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1132; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1133; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1134; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1135; X86-NEXT: vpshrdvq (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x73,0x00] 1136; X86-NEXT: vpshrdvq %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x73,0xda] 1137; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 1138; X86-NEXT: retl # encoding: [0xc3] 1139; 1140; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_256: 1141; X64: # %bb.0: 1142; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1143; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1144; X64-NEXT: vpshrdvq (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x73,0x07] 1145; X64-NEXT: vpshrdvq %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x73,0xda] 1146; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 1147; X64-NEXT: retq # encoding: [0xc3] 1148 %x2 = load <4 x i64>, ptr %x2p 1149 %1 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> %x2) 1150 %2 = bitcast i8 %x3 to <8 x i1> 1151 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1152 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x0 1153 %4 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> %x4) 1154 %5 = bitcast i8 %x3 to <8 x i1> 1155 %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1156 %6 = select <4 x i1> %extract1, <4 x i64> %4, <4 x i64> zeroinitializer 1157 %res3 = insertvalue { <4 x i64>, <4 x i64> } poison, <4 x i64> %3, 0 1158 %res4 = insertvalue { <4 x i64>, <4 x i64> } %res3, <4 x i64> %6, 1 1159 ret { <4 x i64>, <4 x i64> } %res4 1160} 1161 1162define { <2 x i64>, <2 x i64> } @test_int_x86_avx512_mask_vpshrdv_q_128(<2 x i64> %x0, <2 x i64> %x1, ptr %x2p, <2 x i64> %x4, i8 %x3) { 1163; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_q_128: 1164; X86: # %bb.0: 1165; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1166; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1167; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1168; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1169; X86-NEXT: vpshrdvq (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x73,0x00] 1170; X86-NEXT: vpshrdvq %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x73,0xda] 1171; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 1172; X86-NEXT: retl # encoding: [0xc3] 1173; 1174; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_128: 1175; X64: # %bb.0: 1176; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1177; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1178; X64-NEXT: vpshrdvq (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x73,0x07] 1179; X64-NEXT: vpshrdvq %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x73,0xda] 1180; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 1181; X64-NEXT: retq # encoding: [0xc3] 1182 %x2 = load <2 x i64>, ptr %x2p 1183 %1 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> %x2) 1184 %2 = bitcast i8 %x3 to <8 x i1> 1185 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1> 1186 %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x0 1187 %4 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> %x4) 1188 %5 = bitcast i8 %x3 to <8 x i1> 1189 %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1> 1190 %6 = select <2 x i1> %extract1, <2 x i64> %4, <2 x i64> zeroinitializer 1191 %res3 = insertvalue { <2 x i64>, <2 x i64> } poison, <2 x i64> %3, 0 1192 %res4 = insertvalue { <2 x i64>, <2 x i64> } %res3, <2 x i64> %6, 1 1193 ret { <2 x i64>, <2 x i64> } %res4 1194} 1195 1196define { <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_vpshrdv_w_256(<16 x i16> %x0, <16 x i16> %x1, ptr %x2p, <16 x i16> %x4, i16 %x3) { 1197; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_w_256: 1198; X86: # %bb.0: 1199; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1200; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1201; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1202; X86-NEXT: vpshrdvw (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x72,0x00] 1203; X86-NEXT: vpshrdvw %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x72,0xda] 1204; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 1205; X86-NEXT: retl # encoding: [0xc3] 1206; 1207; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_256: 1208; X64: # %bb.0: 1209; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1210; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1211; X64-NEXT: vpshrdvw (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x72,0x07] 1212; X64-NEXT: vpshrdvw %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x72,0xda] 1213; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 1214; X64-NEXT: retq # encoding: [0xc3] 1215 %x2 = load <16 x i16>, ptr %x2p 1216 %1 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x2) 1217 %2 = bitcast i16 %x3 to <16 x i1> 1218 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x0 1219 %4 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x4) 1220 %5 = bitcast i16 %x3 to <16 x i1> 1221 %6 = select <16 x i1> %5, <16 x i16> %4, <16 x i16> zeroinitializer 1222 %7 = insertvalue { <16 x i16>, <16 x i16> } poison, <16 x i16> %3, 0 1223 %8 = insertvalue { <16 x i16>, <16 x i16> } %7, <16 x i16> %6, 1 1224 ret { <16 x i16>, <16 x i16> } %8 1225} 1226 1227define { <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_vpshrdv_w_128(<8 x i16> %x0, <8 x i16> %x1, ptr %x2p, <8 x i16> %x4, i8 %x3) { 1228; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_w_128: 1229; X86: # %bb.0: 1230; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1231; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1232; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1233; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1234; X86-NEXT: vpshrdvw (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x72,0x00] 1235; X86-NEXT: vpshrdvw %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x72,0xda] 1236; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 1237; X86-NEXT: retl # encoding: [0xc3] 1238; 1239; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_128: 1240; X64: # %bb.0: 1241; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1242; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1243; X64-NEXT: vpshrdvw (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x72,0x07] 1244; X64-NEXT: vpshrdvw %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x72,0xda] 1245; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 1246; X64-NEXT: retq # encoding: [0xc3] 1247 %x2 = load <8 x i16>, ptr %x2p 1248 %1 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x2) 1249 %2 = bitcast i8 %x3 to <8 x i1> 1250 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x0 1251 %4 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x4) 1252 %5 = bitcast i8 %x3 to <8 x i1> 1253 %6 = select <8 x i1> %5, <8 x i16> %4, <8 x i16> zeroinitializer 1254 %7 = insertvalue { <8 x i16>, <8 x i16> } poison, <8 x i16> %3, 0 1255 %8 = insertvalue { <8 x i16>, <8 x i16> } %7, <8 x i16> %6, 1 1256 ret { <8 x i16>, <8 x i16> } %8 1257} 1258 1259define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpshldv_d_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4, i8 %x3) { 1260; X86-LABEL: test_int_x86_avx512_mask_vpshldv_d_256: 1261; X86: # %bb.0: 1262; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1263; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1264; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1265; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1266; X86-NEXT: vpshldvd (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x71,0x00] 1267; X86-NEXT: vpshldvd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x71,0xda] 1268; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 1269; X86-NEXT: retl # encoding: [0xc3] 1270; 1271; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_256: 1272; X64: # %bb.0: 1273; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1274; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1275; X64-NEXT: vpshldvd (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x71,0x07] 1276; X64-NEXT: vpshldvd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x71,0xda] 1277; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 1278; X64-NEXT: retq # encoding: [0xc3] 1279 %x2 = load <8 x i32>, ptr %x2p 1280 %1 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) 1281 %2 = bitcast i8 %x3 to <8 x i1> 1282 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x0 1283 %4 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4) 1284 %5 = bitcast i8 %x3 to <8 x i1> 1285 %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer 1286 %7 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %3, 0 1287 %8 = insertvalue { <8 x i32>, <8 x i32> } %7, <8 x i32> %6, 1 1288 ret { <8 x i32>, <8 x i32> } %8 1289} 1290 1291define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpshldv_d_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4, i8 %x3) { 1292; X86-LABEL: test_int_x86_avx512_mask_vpshldv_d_128: 1293; X86: # %bb.0: 1294; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1295; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1296; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1297; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1298; X86-NEXT: vpshldvd (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x71,0x00] 1299; X86-NEXT: vpshldvd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x71,0xda] 1300; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 1301; X86-NEXT: retl # encoding: [0xc3] 1302; 1303; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_128: 1304; X64: # %bb.0: 1305; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1306; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1307; X64-NEXT: vpshldvd (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x71,0x07] 1308; X64-NEXT: vpshldvd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x71,0xda] 1309; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 1310; X64-NEXT: retq # encoding: [0xc3] 1311 %x2 = load <4 x i32>, ptr %x2p 1312 %1 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) 1313 %2 = bitcast i8 %x3 to <8 x i1> 1314 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1315 %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x0 1316 %4 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4) 1317 %5 = bitcast i8 %x3 to <8 x i1> 1318 %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1319 %6 = select <4 x i1> %extract1, <4 x i32> %4, <4 x i32> zeroinitializer 1320 %7 = insertvalue { <4 x i32>, <4 x i32> } poison, <4 x i32> %3, 0 1321 %8 = insertvalue { <4 x i32>, <4 x i32> } %7, <4 x i32> %6, 1 1322 ret { <4 x i32>, <4 x i32> } %8 1323} 1324 1325define { <4 x i64>, <4 x i64> } @test_int_x86_avx512_mask_vpshldv_q_256(<4 x i64> %x0, <4 x i64> %x1, ptr %x2p, <4 x i64> %x4, i8 %x3) { 1326; X86-LABEL: test_int_x86_avx512_mask_vpshldv_q_256: 1327; X86: # %bb.0: 1328; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1329; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1330; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1331; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1332; X86-NEXT: vpshldvq (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x71,0x00] 1333; X86-NEXT: vpshldvq %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x71,0xda] 1334; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 1335; X86-NEXT: retl # encoding: [0xc3] 1336; 1337; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_256: 1338; X64: # %bb.0: 1339; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1340; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1341; X64-NEXT: vpshldvq (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x71,0x07] 1342; X64-NEXT: vpshldvq %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x71,0xda] 1343; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 1344; X64-NEXT: retq # encoding: [0xc3] 1345 %x2 = load <4 x i64>, ptr %x2p 1346 %1 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) 1347 %2 = bitcast i8 %x3 to <8 x i1> 1348 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1349 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x0 1350 %4 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x4) 1351 %5 = bitcast i8 %x3 to <8 x i1> 1352 %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1353 %6 = select <4 x i1> %extract1, <4 x i64> %4, <4 x i64> zeroinitializer 1354 %7 = insertvalue { <4 x i64>, <4 x i64> } poison, <4 x i64> %3, 0 1355 %8 = insertvalue { <4 x i64>, <4 x i64> } %7, <4 x i64> %6, 1 1356 ret { <4 x i64>, <4 x i64> } %8 1357} 1358 1359define { <2 x i64>, <2 x i64> } @test_int_x86_avx512_mask_vpshldv_q_128(<2 x i64> %x0, <2 x i64> %x1, ptr %x2p, <2 x i64> %x4, i8 %x3) { 1360; X86-LABEL: test_int_x86_avx512_mask_vpshldv_q_128: 1361; X86: # %bb.0: 1362; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1363; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1364; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1365; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1366; X86-NEXT: vpshldvq (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x71,0x00] 1367; X86-NEXT: vpshldvq %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x71,0xda] 1368; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 1369; X86-NEXT: retl # encoding: [0xc3] 1370; 1371; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_128: 1372; X64: # %bb.0: 1373; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1374; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1375; X64-NEXT: vpshldvq (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x71,0x07] 1376; X64-NEXT: vpshldvq %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x71,0xda] 1377; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 1378; X64-NEXT: retq # encoding: [0xc3] 1379 %x2 = load <2 x i64>, ptr %x2p 1380 %1 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) 1381 %2 = bitcast i8 %x3 to <8 x i1> 1382 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1> 1383 %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x0 1384 %4 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x4) 1385 %5 = bitcast i8 %x3 to <8 x i1> 1386 %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1> 1387 %6 = select <2 x i1> %extract1, <2 x i64> %4, <2 x i64> zeroinitializer 1388 %7 = insertvalue { <2 x i64>, <2 x i64> } poison, <2 x i64> %3, 0 1389 %8 = insertvalue { <2 x i64>, <2 x i64> } %7, <2 x i64> %6, 1 1390 ret { <2 x i64>, <2 x i64> } %8 1391} 1392 1393define { <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_vpshldv_w_256(<16 x i16> %x0, <16 x i16> %x1, ptr %x2p, <16 x i16> %x4, i16 %x3) { 1394; X86-LABEL: test_int_x86_avx512_mask_vpshldv_w_256: 1395; X86: # %bb.0: 1396; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1397; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1398; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1399; X86-NEXT: vpshldvw (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x70,0x00] 1400; X86-NEXT: vpshldvw %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x70,0xda] 1401; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 1402; X86-NEXT: retl # encoding: [0xc3] 1403; 1404; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_256: 1405; X64: # %bb.0: 1406; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1407; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1408; X64-NEXT: vpshldvw (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x70,0x07] 1409; X64-NEXT: vpshldvw %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x70,0xda] 1410; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 1411; X64-NEXT: retq # encoding: [0xc3] 1412 %x2 = load <16 x i16>, ptr %x2p 1413 %1 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) 1414 %2 = bitcast i16 %x3 to <16 x i1> 1415 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x0 1416 %4 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x4) 1417 %5 = bitcast i16 %x3 to <16 x i1> 1418 %6 = select <16 x i1> %5, <16 x i16> %4, <16 x i16> zeroinitializer 1419 %7 = insertvalue { <16 x i16>, <16 x i16> } poison, <16 x i16> %3, 0 1420 %8 = insertvalue { <16 x i16>, <16 x i16> } %7, <16 x i16> %6, 1 1421 ret { <16 x i16>, <16 x i16> } %8 1422} 1423 1424define { <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_vpshldv_w_128(<8 x i16> %x0, <8 x i16> %x1, ptr %x2p, <8 x i16> %x4, i8 %x3) { 1425; X86-LABEL: test_int_x86_avx512_mask_vpshldv_w_128: 1426; X86: # %bb.0: 1427; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1428; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1429; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1430; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1431; X86-NEXT: vpshldvw (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x70,0x00] 1432; X86-NEXT: vpshldvw %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x70,0xda] 1433; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 1434; X86-NEXT: retl # encoding: [0xc3] 1435; 1436; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_128: 1437; X64: # %bb.0: 1438; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1439; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1440; X64-NEXT: vpshldvw (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x70,0x07] 1441; X64-NEXT: vpshldvw %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x70,0xda] 1442; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 1443; X64-NEXT: retq # encoding: [0xc3] 1444 %x2 = load <8 x i16>, ptr %x2p 1445 %1 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) 1446 %2 = bitcast i8 %x3 to <8 x i1> 1447 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x0 1448 %4 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x4) 1449 %5 = bitcast i8 %x3 to <8 x i1> 1450 %6 = select <8 x i1> %5, <8 x i16> %4, <8 x i16> zeroinitializer 1451 %7 = insertvalue { <8 x i16>, <8 x i16> } poison, <8 x i16> %3, 0 1452 %8 = insertvalue { <8 x i16>, <8 x i16> } %7, <8 x i16> %6, 1 1453 ret { <8 x i16>, <8 x i16> } %8 1454} 1455 1456declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 1457declare <8 x i32> @llvm.fshl.v8i32(<8 x i32>, <8 x i32>, <8 x i32>) 1458declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) 1459declare <4 x i64> @llvm.fshl.v4i64(<4 x i64>, <4 x i64>, <4 x i64>) 1460declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) 1461declare <16 x i16> @llvm.fshl.v16i16(<16 x i16>, <16 x i16>, <16 x i16>) 1462declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 1463declare <8 x i32> @llvm.fshr.v8i32(<8 x i32>, <8 x i32>, <8 x i32>) 1464declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) 1465declare <4 x i64> @llvm.fshr.v4i64(<4 x i64>, <4 x i64>, <4 x i64>) 1466declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) 1467declare <16 x i16> @llvm.fshr.v16i16(<16 x i16>, <16 x i16>, <16 x i16>) 1468declare <8 x i16> @llvm.masked.expandload.v8i16(ptr, <8 x i1>, <8 x i16>) 1469declare <16 x i8> @llvm.masked.expandload.v16i8(ptr, <16 x i1>, <16 x i8>) 1470declare void @llvm.masked.compressstore.v8i16(<8 x i16>, ptr, <8 x i1>) 1471declare void @llvm.masked.compressstore.v16i8(<16 x i8>, ptr, <16 x i1>) 1472declare <16 x i16> @llvm.masked.expandload.v16i16(ptr, <16 x i1>, <16 x i16>) 1473declare <32 x i8> @llvm.masked.expandload.v32i8(ptr, <32 x i1>, <32 x i8>) 1474declare void @llvm.masked.compressstore.v16i16(<16 x i16>, ptr, <16 x i1>) 1475declare void @llvm.masked.compressstore.v32i8(<32 x i8>, ptr, <32 x i1>) 1476declare <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16>, <8 x i16>, <8 x i1>) 1477declare <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8>, <16 x i8>, <16 x i1>) 1478declare <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16>, <8 x i16>, <8 x i1>) 1479declare <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8>, <16 x i8>, <16 x i1>) 1480declare <16 x i16> @llvm.x86.avx512.mask.expand.v16i16(<16 x i16>, <16 x i16>, <16 x i1>) 1481declare <32 x i8> @llvm.x86.avx512.mask.expand.v32i8(<32 x i8>, <32 x i8>, <32 x i1>) 1482declare <16 x i16> @llvm.x86.avx512.mask.compress.v16i16(<16 x i16>, <16 x i16>, <16 x i1>) 1483declare <32 x i8> @llvm.x86.avx512.mask.compress.v32i8(<32 x i8>, <32 x i8>, <32 x i1>) 1484