1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5define <32 x i16> @test_mask_expand_load_w_512(ptr %addr, <32 x i16> %data, i32 %mask) { 6; X86-LABEL: test_mask_expand_load_w_512: 7; X86: # %bb.0: 8; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 10; X86-NEXT: vpexpandw (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x00] 11; X86-NEXT: retl # encoding: [0xc3] 12; 13; X64-LABEL: test_mask_expand_load_w_512: 14; X64: # %bb.0: 15; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 16; X64-NEXT: vpexpandw (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x07] 17; X64-NEXT: retq # encoding: [0xc3] 18 %1 = bitcast i32 %mask to <32 x i1> 19 %2 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr %addr, <32 x i1> %1, <32 x i16> %data) 20 ret <32 x i16> %2 21} 22 23define <32 x i16> @test_maskz_expand_load_w_512(ptr %addr, i32 %mask) { 24; X86-LABEL: test_maskz_expand_load_w_512: 25; X86: # %bb.0: 26; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 27; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 28; X86-NEXT: vpexpandw (%eax), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0x00] 29; X86-NEXT: retl # encoding: [0xc3] 30; 31; X64-LABEL: test_maskz_expand_load_w_512: 32; X64: # %bb.0: 33; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 34; X64-NEXT: vpexpandw (%rdi), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0x07] 35; X64-NEXT: retq # encoding: [0xc3] 36 %1 = bitcast i32 %mask to <32 x i1> 37 %2 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr %addr, <32 x i1> %1, <32 x i16> zeroinitializer) 38 ret <32 x i16> %2 39} 40 41define <32 x i16> @test_expand_load_w_512(ptr %addr, <32 x i16> %data) { 42; X86-LABEL: test_expand_load_w_512: 43; X86: # %bb.0: 44; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 45; X86-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 46; X86-NEXT: vpexpandw (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x00] 47; X86-NEXT: retl # encoding: [0xc3] 48; 49; X64-LABEL: test_expand_load_w_512: 50; X64: # %bb.0: 51; X64-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 52; X64-NEXT: vpexpandw (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x07] 53; X64-NEXT: retq # encoding: [0xc3] 54 %1 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr %addr, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i16> %data) 55 ret <32 x i16> %1 56} 57 58define <32 x i16> @test_expand_w_512(<32 x i16> %data) { 59; CHECK-LABEL: test_expand_w_512: 60; CHECK: # %bb.0: 61; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 62 %1 = call <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16> %data, <32 x i16> undef, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 63 ret <32 x i16> %1 64} 65 66define <32 x i16> @test_mask_expand_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) { 67; X86-LABEL: test_mask_expand_w_512: 68; X86: # %bb.0: 69; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 70; X86-NEXT: vpexpandw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0xc8] 71; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 72; X86-NEXT: retl # encoding: [0xc3] 73; 74; X64-LABEL: test_mask_expand_w_512: 75; X64: # %bb.0: 76; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 77; X64-NEXT: vpexpandw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0xc8] 78; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 79; X64-NEXT: retq # encoding: [0xc3] 80 %1 = bitcast i32 %mask to <32 x i1> 81 %2 = call <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16> %data, <32 x i16> %passthru, <32 x i1> %1) 82 ret <32 x i16> %2 83} 84 85define <32 x i16> @test_maskz_expand_w_512(<32 x i16> %data, i32 %mask) { 86; X86-LABEL: test_maskz_expand_w_512: 87; X86: # %bb.0: 88; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 89; X86-NEXT: vpexpandw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0xc0] 90; X86-NEXT: retl # encoding: [0xc3] 91; 92; X64-LABEL: test_maskz_expand_w_512: 93; X64: # %bb.0: 94; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 95; X64-NEXT: vpexpandw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0xc0] 96; X64-NEXT: retq # encoding: [0xc3] 97 %1 = bitcast i32 %mask to <32 x i1> 98 %2 = call <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16> %data, <32 x i16> zeroinitializer, <32 x i1> %1) 99 ret <32 x i16> %2 100} 101 102define <64 x i8> @test_mask_expand_load_b_512(ptr %addr, <64 x i8> %data, i64 %mask) { 103; X86-LABEL: test_mask_expand_load_b_512: 104; X86: # %bb.0: 105; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 106; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 107; X86-NEXT: vpexpandb (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x00] 108; X86-NEXT: retl # encoding: [0xc3] 109; 110; X64-LABEL: test_mask_expand_load_b_512: 111; X64: # %bb.0: 112; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 113; X64-NEXT: vpexpandb (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x07] 114; X64-NEXT: retq # encoding: [0xc3] 115 %1 = bitcast i64 %mask to <64 x i1> 116 %2 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr %addr, <64 x i1> %1, <64 x i8> %data) 117 ret <64 x i8> %2 118} 119 120define <64 x i8> @test_maskz_expand_load_b_512(ptr %addr, i64 %mask) { 121; X86-LABEL: test_maskz_expand_load_b_512: 122; X86: # %bb.0: 123; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 124; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 125; X86-NEXT: vpexpandb (%eax), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0x00] 126; X86-NEXT: retl # encoding: [0xc3] 127; 128; X64-LABEL: test_maskz_expand_load_b_512: 129; X64: # %bb.0: 130; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 131; X64-NEXT: vpexpandb (%rdi), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0x07] 132; X64-NEXT: retq # encoding: [0xc3] 133 %1 = bitcast i64 %mask to <64 x i1> 134 %2 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr %addr, <64 x i1> %1, <64 x i8> zeroinitializer) 135 ret <64 x i8> %2 136} 137 138define <64 x i8> @test_expand_load_b_512(ptr %addr, <64 x i8> %data) { 139; X86-LABEL: test_expand_load_b_512: 140; X86: # %bb.0: 141; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 142; X86-NEXT: kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8] 143; X86-NEXT: vpexpandb (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x00] 144; X86-NEXT: retl # encoding: [0xc3] 145; 146; X64-LABEL: test_expand_load_b_512: 147; X64: # %bb.0: 148; X64-NEXT: kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8] 149; X64-NEXT: vpexpandb (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x07] 150; X64-NEXT: retq # encoding: [0xc3] 151 %1 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr %addr, <64 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <64 x i8> %data) 152 ret <64 x i8> %1 153} 154 155define <64 x i8> @test_expand_b_512(<64 x i8> %data) { 156; CHECK-LABEL: test_expand_b_512: 157; CHECK: # %bb.0: 158; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 159 %1 = call <64 x i8> @llvm.x86.avx512.mask.expand.v64i8(<64 x i8> %data, <64 x i8> undef, <64 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 160 ret <64 x i8> %1 161} 162 163define <64 x i8> @test_mask_expand_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) { 164; X86-LABEL: test_mask_expand_b_512: 165; X86: # %bb.0: 166; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 167; X86-NEXT: vpexpandb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0xc8] 168; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 169; X86-NEXT: retl # encoding: [0xc3] 170; 171; X64-LABEL: test_mask_expand_b_512: 172; X64: # %bb.0: 173; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 174; X64-NEXT: vpexpandb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0xc8] 175; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 176; X64-NEXT: retq # encoding: [0xc3] 177 %1 = bitcast i64 %mask to <64 x i1> 178 %2 = call <64 x i8> @llvm.x86.avx512.mask.expand.v64i8(<64 x i8> %data, <64 x i8> %passthru, <64 x i1> %1) 179 ret <64 x i8> %2 180} 181 182define <64 x i8> @test_maskz_expand_b_512(<64 x i8> %data, i64 %mask) { 183; X86-LABEL: test_maskz_expand_b_512: 184; X86: # %bb.0: 185; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 186; X86-NEXT: vpexpandb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0xc0] 187; X86-NEXT: retl # encoding: [0xc3] 188; 189; X64-LABEL: test_maskz_expand_b_512: 190; X64: # %bb.0: 191; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 192; X64-NEXT: vpexpandb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0xc0] 193; X64-NEXT: retq # encoding: [0xc3] 194 %1 = bitcast i64 %mask to <64 x i1> 195 %2 = call <64 x i8> @llvm.x86.avx512.mask.expand.v64i8(<64 x i8> %data, <64 x i8> zeroinitializer, <64 x i1> %1) 196 ret <64 x i8> %2 197} 198 199define void @test_mask_compress_store_w_512(ptr %addr, <32 x i16> %data, i32 %mask) { 200; X86-LABEL: test_mask_compress_store_w_512: 201; X86: # %bb.0: 202; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 203; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 204; X86-NEXT: vpcompressw %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x00] 205; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 206; X86-NEXT: retl # encoding: [0xc3] 207; 208; X64-LABEL: test_mask_compress_store_w_512: 209; X64: # %bb.0: 210; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 211; X64-NEXT: vpcompressw %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x07] 212; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 213; X64-NEXT: retq # encoding: [0xc3] 214 %1 = bitcast i32 %mask to <32 x i1> 215 call void @llvm.masked.compressstore.v32i16(<32 x i16> %data, ptr %addr, <32 x i1> %1) 216 ret void 217} 218 219define void @test_compress_store_w_512(ptr %addr, <32 x i16> %data) { 220; X86-LABEL: test_compress_store_w_512: 221; X86: # %bb.0: 222; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 223; X86-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 224; X86-NEXT: vpcompressw %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x00] 225; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 226; X86-NEXT: retl # encoding: [0xc3] 227; 228; X64-LABEL: test_compress_store_w_512: 229; X64: # %bb.0: 230; X64-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 231; X64-NEXT: vpcompressw %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x07] 232; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 233; X64-NEXT: retq # encoding: [0xc3] 234 call void @llvm.masked.compressstore.v32i16(<32 x i16> %data, ptr %addr, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 235 ret void 236} 237 238define <32 x i16> @test_mask_compress_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) { 239; X86-LABEL: test_mask_compress_w_512: 240; X86: # %bb.0: 241; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 242; X86-NEXT: vpcompressw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0xc1] 243; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 244; X86-NEXT: retl # encoding: [0xc3] 245; 246; X64-LABEL: test_mask_compress_w_512: 247; X64: # %bb.0: 248; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 249; X64-NEXT: vpcompressw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0xc1] 250; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 251; X64-NEXT: retq # encoding: [0xc3] 252 %1 = bitcast i32 %mask to <32 x i1> 253 %2 = call <32 x i16> @llvm.x86.avx512.mask.compress.v32i16(<32 x i16> %data, <32 x i16> %passthru, <32 x i1> %1) 254 ret <32 x i16> %2 255} 256 257define <32 x i16> @test_maskz_compress_w_512(<32 x i16> %data, i32 %mask) { 258; X86-LABEL: test_maskz_compress_w_512: 259; X86: # %bb.0: 260; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 261; X86-NEXT: vpcompressw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x63,0xc0] 262; X86-NEXT: retl # encoding: [0xc3] 263; 264; X64-LABEL: test_maskz_compress_w_512: 265; X64: # %bb.0: 266; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 267; X64-NEXT: vpcompressw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x63,0xc0] 268; X64-NEXT: retq # encoding: [0xc3] 269 %1 = bitcast i32 %mask to <32 x i1> 270 %2 = call <32 x i16> @llvm.x86.avx512.mask.compress.v32i16(<32 x i16> %data, <32 x i16> zeroinitializer, <32 x i1> %1) 271 ret <32 x i16> %2 272} 273 274define <32 x i16> @test_compress_w_512(<32 x i16> %data) { 275; CHECK-LABEL: test_compress_w_512: 276; CHECK: # %bb.0: 277; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 278 %1 = call <32 x i16> @llvm.x86.avx512.mask.compress.v32i16(<32 x i16> %data, <32 x i16> undef, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 279 ret <32 x i16> %1 280} 281 282define void @test_mask_compress_store_b_512(ptr %addr, <64 x i8> %data, i64 %mask) { 283; X86-LABEL: test_mask_compress_store_b_512: 284; X86: # %bb.0: 285; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 286; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 287; X86-NEXT: vpcompressb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x00] 288; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 289; X86-NEXT: retl # encoding: [0xc3] 290; 291; X64-LABEL: test_mask_compress_store_b_512: 292; X64: # %bb.0: 293; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 294; X64-NEXT: vpcompressb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x07] 295; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 296; X64-NEXT: retq # encoding: [0xc3] 297 %1 = bitcast i64 %mask to <64 x i1> 298 call void @llvm.masked.compressstore.v64i8(<64 x i8> %data, ptr %addr, <64 x i1> %1) 299 ret void 300} 301 302define void @test_compress_store_b_512(ptr %addr, <64 x i8> %data) { 303; X86-LABEL: test_compress_store_b_512: 304; X86: # %bb.0: 305; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 306; X86-NEXT: kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8] 307; X86-NEXT: vpcompressb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x00] 308; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 309; X86-NEXT: retl # encoding: [0xc3] 310; 311; X64-LABEL: test_compress_store_b_512: 312; X64: # %bb.0: 313; X64-NEXT: kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8] 314; X64-NEXT: vpcompressb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x07] 315; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 316; X64-NEXT: retq # encoding: [0xc3] 317 call void @llvm.masked.compressstore.v64i8(<64 x i8> %data, ptr %addr, <64 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 318 ret void 319} 320 321define <64 x i8> @test_mask_compress_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) { 322; X86-LABEL: test_mask_compress_b_512: 323; X86: # %bb.0: 324; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 325; X86-NEXT: vpcompressb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0xc1] 326; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 327; X86-NEXT: retl # encoding: [0xc3] 328; 329; X64-LABEL: test_mask_compress_b_512: 330; X64: # %bb.0: 331; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 332; X64-NEXT: vpcompressb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0xc1] 333; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 334; X64-NEXT: retq # encoding: [0xc3] 335 %1 = bitcast i64 %mask to <64 x i1> 336 %2 = call <64 x i8> @llvm.x86.avx512.mask.compress.v64i8(<64 x i8> %data, <64 x i8> %passthru, <64 x i1> %1) 337 ret <64 x i8> %2 338} 339 340define <64 x i8> @test_maskz_compress_b_512(<64 x i8> %data, i64 %mask) { 341; X86-LABEL: test_maskz_compress_b_512: 342; X86: # %bb.0: 343; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 344; X86-NEXT: vpcompressb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x63,0xc0] 345; X86-NEXT: retl # encoding: [0xc3] 346; 347; X64-LABEL: test_maskz_compress_b_512: 348; X64: # %bb.0: 349; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 350; X64-NEXT: vpcompressb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x63,0xc0] 351; X64-NEXT: retq # encoding: [0xc3] 352 %1 = bitcast i64 %mask to <64 x i1> 353 %2 = call <64 x i8> @llvm.x86.avx512.mask.compress.v64i8(<64 x i8> %data, <64 x i8> zeroinitializer, <64 x i1> %1) 354 ret <64 x i8> %2 355} 356 357define <64 x i8> @test_compress_b_512(<64 x i8> %data) { 358; CHECK-LABEL: test_compress_b_512: 359; CHECK: # %bb.0: 360; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 361 %1 = call <64 x i8> @llvm.x86.avx512.mask.compress.v64i8(<64 x i8> %data, <64 x i8> undef, <64 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 362 ret <64 x i8> %1 363} 364 365define { <16 x i32>, <16 x i32> }@test_int_x86_avx512_mask_vpshld_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) { 366; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_512: 367; X86: # %bb.0: 368; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 369; X86-NEXT: vpshldd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x71,0xd1,0x16] 370; X86-NEXT: vpshldd $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0x7d,0x48,0x71,0xc9,0x17] 371; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 372; X86-NEXT: retl # encoding: [0xc3] 373; 374; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_512: 375; X64: # %bb.0: 376; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 377; X64-NEXT: vpshldd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x71,0xd1,0x16] 378; X64-NEXT: vpshldd $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0x7d,0x48,0x71,0xc9,0x17] 379; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 380; X64-NEXT: retq # encoding: [0xc3] 381 %1 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> <i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22>) 382 %2 = bitcast i16 %x4 to <16 x i1> 383 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x3 384 %4 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>) 385 %5 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %3, 0 386 %6 = insertvalue { <16 x i32>, <16 x i32> } %5, <16 x i32> %4, 1 387 ret { <16 x i32>, <16 x i32> } %6 388} 389 390define { <8 x i64>, <8 x i64> } @test_int_x86_avx512_mask_vpshld_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) { 391; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_512: 392; X86: # %bb.0: 393; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 394; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 395; X86-NEXT: vpshldq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x71,0xd1,0x16] 396; X86-NEXT: vpshldq $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x71,0xc9,0x17] 397; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 398; X86-NEXT: retl # encoding: [0xc3] 399; 400; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_512: 401; X64: # %bb.0: 402; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 403; X64-NEXT: vpshldq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x71,0xd1,0x16] 404; X64-NEXT: vpshldq $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x71,0xc9,0x17] 405; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 406; X64-NEXT: retq # encoding: [0xc3] 407 %1 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> <i64 22, i64 22, i64 22, i64 22, i64 22, i64 22, i64 22, i64 22>) 408 %2 = bitcast i8 %x4 to <8 x i1> 409 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x3 410 %4 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> <i64 23, i64 23, i64 23, i64 23, i64 23, i64 23, i64 23, i64 23>) 411 %5 = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> %3, 0 412 %6 = insertvalue { <8 x i64>, <8 x i64> } %5, <8 x i64> %4, 1 413 ret { <8 x i64>, <8 x i64> } %6 414} 415 416define { <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_vpshld_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) { 417; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_512: 418; X86: # %bb.0: 419; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 420; X86-NEXT: vpshldw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x06] 421; X86-NEXT: vpshldw $7, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xc9,0x07] 422; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 423; X86-NEXT: retl # encoding: [0xc3] 424; 425; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_512: 426; X64: # %bb.0: 427; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 428; X64-NEXT: vpshldw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x06] 429; X64-NEXT: vpshldw $7, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xc9,0x07] 430; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 431; X64-NEXT: retq # encoding: [0xc3] 432 %1 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>) 433 %2 = bitcast i32 %x4 to <32 x i1> 434 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x3 435 %4 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>) 436 %5 = insertvalue { <32 x i16>, <32 x i16> } poison, <32 x i16> %3, 0 437 %6 = insertvalue { <32 x i16>, <32 x i16> } %5, <32 x i16> %4, 1 438 ret { <32 x i16>, <32 x i16> } %6 439} 440 441define { <16 x i32>, <16 x i32> }@test_int_x86_avx512_mask_vpshrd_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) { 442; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_512: 443; X86: # %bb.0: 444; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 445; X86-NEXT: vpshrdd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x73,0xd1,0x16] 446; X86-NEXT: vpshrdd $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0x7d,0x48,0x73,0xc9,0x17] 447; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 448; X86-NEXT: retl # encoding: [0xc3] 449; 450; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_512: 451; X64: # %bb.0: 452; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 453; X64-NEXT: vpshrdd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x73,0xd1,0x16] 454; X64-NEXT: vpshrdd $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0x7d,0x48,0x73,0xc9,0x17] 455; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 456; X64-NEXT: retq # encoding: [0xc3] 457 %1 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> <i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22>) 458 %2 = bitcast i16 %x4 to <16 x i1> 459 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x3 460 %4 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>) 461 %5 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %3, 0 462 %6 = insertvalue { <16 x i32>, <16 x i32> } %5, <16 x i32> %4, 1 463 ret { <16 x i32>, <16 x i32> } %6 464} 465 466define { <8 x i64>, <8 x i64> } @test_int_x86_avx512_mask_vpshrd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) { 467; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_512: 468; X86: # %bb.0: 469; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 470; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 471; X86-NEXT: vpshrdq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x73,0xd1,0x16] 472; X86-NEXT: vpshrdq $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x73,0xc9,0x17] 473; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 474; X86-NEXT: retl # encoding: [0xc3] 475; 476; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_512: 477; X64: # %bb.0: 478; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 479; X64-NEXT: vpshrdq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x73,0xd1,0x16] 480; X64-NEXT: vpshrdq $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x73,0xc9,0x17] 481; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 482; X64-NEXT: retq # encoding: [0xc3] 483 %1 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> <i64 22, i64 22, i64 22, i64 22, i64 22, i64 22, i64 22, i64 22>) 484 %2 = bitcast i8 %x4 to <8 x i1> 485 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x3 486 %4 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> <i64 23, i64 23, i64 23, i64 23, i64 23, i64 23, i64 23, i64 23>) 487 %5 = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> %3, 0 488 %6 = insertvalue { <8 x i64>, <8 x i64> } %5, <8 x i64> %4, 1 489 ret { <8 x i64>, <8 x i64> } %6 490} 491 492define { <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_vpshrd_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) { 493; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_512: 494; X86: # %bb.0: 495; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 496; X86-NEXT: vpshrdw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x06] 497; X86-NEXT: vpshrdw $7, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xc9,0x07] 498; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 499; X86-NEXT: retl # encoding: [0xc3] 500; 501; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_512: 502; X64: # %bb.0: 503; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 504; X64-NEXT: vpshrdw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x06] 505; X64-NEXT: vpshrdw $7, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xc9,0x07] 506; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 507; X64-NEXT: retq # encoding: [0xc3] 508 %1 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>) 509 %2 = bitcast i32 %x4 to <32 x i1> 510 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x3 511 %4 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>) 512 %5 = insertvalue { <32 x i16>, <32 x i16> } poison, <32 x i16> %3, 0 513 %6 = insertvalue { <32 x i16>, <32 x i16> } %5, <32 x i16> %4, 1 514 ret { <32 x i16>, <32 x i16> } %6 515} 516 517define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpshrdv_d_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, <16 x i32> %x4, i16 %x3) { 518; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_d_512: 519; X86: # %bb.0: 520; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 521; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 522; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 523; X86-NEXT: vpshrdvd (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x73,0x00] 524; X86-NEXT: vpshrdvd %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x73,0xda] 525; X86-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] 526; X86-NEXT: retl # encoding: [0xc3] 527; 528; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_512: 529; X64: # %bb.0: 530; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 531; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 532; X64-NEXT: vpshrdvd (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x73,0x07] 533; X64-NEXT: vpshrdvd %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x73,0xda] 534; X64-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] 535; X64-NEXT: retq # encoding: [0xc3] 536 %x2 = load <16 x i32>, ptr %x2p 537 %1 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2) 538 %2 = bitcast i16 %x3 to <16 x i1> 539 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0 540 %4 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x4) 541 %5 = bitcast i16 %x3 to <16 x i1> 542 %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer 543 %7 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %3, 0 544 %8 = insertvalue { <16 x i32>, <16 x i32> } %7, <16 x i32> %6, 1 545 ret { <16 x i32>, <16 x i32> } %8 546} 547 548define { <8 x i64>, <8 x i64> } @test_int_x86_avx512_mask_vpshrdv_q_512(<8 x i64> %x0, <8 x i64> %x1, ptr %x2p, <8 x i64> %x4, i8 %x3) { 549; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_q_512: 550; X86: # %bb.0: 551; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 552; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 553; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 554; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 555; X86-NEXT: vpshrdvq (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x73,0x00] 556; X86-NEXT: vpshrdvq %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x73,0xda] 557; X86-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] 558; X86-NEXT: retl # encoding: [0xc3] 559; 560; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_512: 561; X64: # %bb.0: 562; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 563; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 564; X64-NEXT: vpshrdvq (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x73,0x07] 565; X64-NEXT: vpshrdvq %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x73,0xda] 566; X64-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] 567; X64-NEXT: retq # encoding: [0xc3] 568 %x2 = load <8 x i64>, ptr %x2p 569 %1 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> %x2) 570 %2 = bitcast i8 %x3 to <8 x i1> 571 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0 572 %4 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> %x4) 573 %5 = bitcast i8 %x3 to <8 x i1> 574 %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer 575 %7 = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> %3, 0 576 %8 = insertvalue { <8 x i64>, <8 x i64> } %7, <8 x i64> %6, 1 577 ret { <8 x i64>, <8 x i64> } %8 578} 579 580define { <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_vpshrdv_w_512(<32 x i16> %x0, <32 x i16> %x1, ptr %x2p, <32 x i16> %x4, i32 %x3) { 581; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_w_512: 582; X86: # %bb.0: 583; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 584; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 585; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 586; X86-NEXT: vpshrdvw (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x72,0x00] 587; X86-NEXT: vpshrdvw %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x72,0xda] 588; X86-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] 589; X86-NEXT: retl # encoding: [0xc3] 590; 591; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_512: 592; X64: # %bb.0: 593; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 594; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 595; X64-NEXT: vpshrdvw (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x72,0x07] 596; X64-NEXT: vpshrdvw %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x72,0xda] 597; X64-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] 598; X64-NEXT: retq # encoding: [0xc3] 599 %x2 = load <32 x i16>, ptr %x2p 600 %1 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2) 601 %2 = bitcast i32 %x3 to <32 x i1> 602 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x0 603 %4 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x4) 604 %5 = bitcast i32 %x3 to <32 x i1> 605 %6 = select <32 x i1> %5, <32 x i16> %4, <32 x i16> zeroinitializer 606 %7 = insertvalue { <32 x i16>, <32 x i16> } poison, <32 x i16> %3, 0 607 %8 = insertvalue { <32 x i16>, <32 x i16> } %7, <32 x i16> %6, 1 608 ret { <32 x i16>, <32 x i16> } %8 609} 610 611define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpshldv_d_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, <16 x i32> %x4, i16 %x3) { 612; X86-LABEL: test_int_x86_avx512_mask_vpshldv_d_512: 613; X86: # %bb.0: 614; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 615; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 616; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 617; X86-NEXT: vpshldvd (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x71,0x00] 618; X86-NEXT: vpshldvd %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x71,0xda] 619; X86-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] 620; X86-NEXT: retl # encoding: [0xc3] 621; 622; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_512: 623; X64: # %bb.0: 624; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 625; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 626; X64-NEXT: vpshldvd (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x71,0x07] 627; X64-NEXT: vpshldvd %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x71,0xda] 628; X64-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] 629; X64-NEXT: retq # encoding: [0xc3] 630 %x2 = load <16 x i32>, ptr %x2p 631 %1 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) 632 %2 = bitcast i16 %x3 to <16 x i1> 633 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0 634 %4 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4) 635 %5 = bitcast i16 %x3 to <16 x i1> 636 %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer 637 %7 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %3, 0 638 %8 = insertvalue { <16 x i32>, <16 x i32> } %7, <16 x i32> %6, 1 639 ret { <16 x i32>, <16 x i32> } %8 640} 641 642define { <8 x i64>, <8 x i64> } @test_int_x86_avx512_mask_vpshldv_q_512(<8 x i64> %x0, <8 x i64> %x1, ptr %x2p, <8 x i64> %x4, i8 %x3) { 643; X86-LABEL: test_int_x86_avx512_mask_vpshldv_q_512: 644; X86: # %bb.0: 645; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 646; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 647; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 648; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 649; X86-NEXT: vpshldvq (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x71,0x00] 650; X86-NEXT: vpshldvq %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x71,0xda] 651; X86-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] 652; X86-NEXT: retl # encoding: [0xc3] 653; 654; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_512: 655; X64: # %bb.0: 656; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 657; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 658; X64-NEXT: vpshldvq (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x71,0x07] 659; X64-NEXT: vpshldvq %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x71,0xda] 660; X64-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] 661; X64-NEXT: retq # encoding: [0xc3] 662 %x2 = load <8 x i64>, ptr %x2p 663 %1 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) 664 %2 = bitcast i8 %x3 to <8 x i1> 665 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0 666 %4 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x4) 667 %5 = bitcast i8 %x3 to <8 x i1> 668 %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer 669 %7 = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> %3, 0 670 %8 = insertvalue { <8 x i64>, <8 x i64> } %7, <8 x i64> %6, 1 671 ret { <8 x i64>, <8 x i64> } %8 672} 673 674define { <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_vpshldv_w_512(<32 x i16> %x0, <32 x i16> %x1, ptr %x2p, <32 x i16> %x4, i32 %x3) { 675; X86-LABEL: test_int_x86_avx512_mask_vpshldv_w_512: 676; X86: # %bb.0: 677; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 678; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 679; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 680; X86-NEXT: vpshldvw (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x70,0x00] 681; X86-NEXT: vpshldvw %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x70,0xda] 682; X86-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] 683; X86-NEXT: retl # encoding: [0xc3] 684; 685; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_512: 686; X64: # %bb.0: 687; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 688; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 689; X64-NEXT: vpshldvw (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x70,0x07] 690; X64-NEXT: vpshldvw %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x70,0xda] 691; X64-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] 692; X64-NEXT: retq # encoding: [0xc3] 693 %x2 = load <32 x i16>, ptr %x2p 694 %1 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) 695 %2 = bitcast i32 %x3 to <32 x i1> 696 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x0 697 %4 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x4) 698 %5 = bitcast i32 %x3 to <32 x i1> 699 %6 = select <32 x i1> %5, <32 x i16> %4, <32 x i16> zeroinitializer 700 %7 = insertvalue { <32 x i16>, <32 x i16> } poison, <32 x i16> %3, 0 701 %8 = insertvalue { <32 x i16>, <32 x i16> } %7, <32 x i16> %6, 1 702 ret { <32 x i16>, <32 x i16> } %8 703} 704 705declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) 706declare <8 x i64> @llvm.fshl.v8i64(<8 x i64>, <8 x i64>, <8 x i64>) 707declare <32 x i16> @llvm.fshl.v32i16(<32 x i16>, <32 x i16>, <32 x i16>) 708declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) 709declare <8 x i64> @llvm.fshr.v8i64(<8 x i64>, <8 x i64>, <8 x i64>) 710declare <32 x i16> @llvm.fshr.v32i16(<32 x i16>, <32 x i16>, <32 x i16>) 711declare <32 x i16> @llvm.masked.expandload.v32i16(ptr, <32 x i1>, <32 x i16>) 712declare <64 x i8> @llvm.masked.expandload.v64i8(ptr, <64 x i1>, <64 x i8>) 713declare void @llvm.masked.compressstore.v32i16(<32 x i16>, ptr, <32 x i1>) 714declare void @llvm.masked.compressstore.v64i8(<64 x i8>, ptr, <64 x i1>) 715declare <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16>, <32 x i16>, <32 x i1>) 716declare <64 x i8> @llvm.x86.avx512.mask.expand.v64i8(<64 x i8>, <64 x i8>, <64 x i1>) 717declare <32 x i16> @llvm.x86.avx512.mask.compress.v32i16(<32 x i16>, <32 x i16>, <32 x i1>) 718declare <64 x i8> @llvm.x86.avx512.mask.compress.v64i8(<64 x i8>, <64 x i8>, <64 x i1>) 719