1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5define <32 x i16> @test_mask_expand_load_w_512(ptr %addr, <32 x i16> %data, i32 %mask) { 6; X86-LABEL: test_mask_expand_load_w_512: 7; X86: # %bb.0: 8; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 10; X86-NEXT: vpexpandw (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x00] 11; X86-NEXT: retl # encoding: [0xc3] 12; 13; X64-LABEL: test_mask_expand_load_w_512: 14; X64: # %bb.0: 15; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 16; X64-NEXT: vpexpandw (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x07] 17; X64-NEXT: retq # encoding: [0xc3] 18 %res = call <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(ptr %addr, <32 x i16> %data, i32 %mask) 19 ret <32 x i16> %res 20} 21 22define <32 x i16> @test_maskz_expand_load_w_512(ptr %addr, i32 %mask) { 23; X86-LABEL: test_maskz_expand_load_w_512: 24; X86: # %bb.0: 25; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 26; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 27; X86-NEXT: vpexpandw (%eax), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0x00] 28; X86-NEXT: retl # encoding: [0xc3] 29; 30; X64-LABEL: test_maskz_expand_load_w_512: 31; X64: # %bb.0: 32; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 33; X64-NEXT: vpexpandw (%rdi), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0x07] 34; X64-NEXT: retq # encoding: [0xc3] 35 %res = call <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(ptr %addr, <32 x i16> zeroinitializer, i32 %mask) 36 ret <32 x i16> %res 37} 38 39declare <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(ptr %addr, <32 x i16> %data, i32 %mask) 40 41define <32 x i16> @test_expand_load_w_512(ptr %addr, <32 x i16> %data) { 42; X86-LABEL: test_expand_load_w_512: 43; X86: # %bb.0: 44; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 45; X86-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 46; X86-NEXT: vpexpandw (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x00] 47; X86-NEXT: retl # encoding: [0xc3] 48; 49; X64-LABEL: test_expand_load_w_512: 50; X64: # %bb.0: 51; X64-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 52; X64-NEXT: vpexpandw (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x07] 53; X64-NEXT: retq # encoding: [0xc3] 54 %res = call <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(ptr %addr, <32 x i16> %data, i32 -1) 55 ret <32 x i16> %res 56} 57 58define <32 x i16> @test_expand_w_512(<32 x i16> %data) { 59; CHECK-LABEL: test_expand_w_512: 60; CHECK: # %bb.0: 61; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 62 %res = call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> undef, i32 -1) 63 ret <32 x i16> %res 64} 65 66define <32 x i16> @test_mask_expand_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) { 67; X86-LABEL: test_mask_expand_w_512: 68; X86: # %bb.0: 69; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 70; X86-NEXT: vpexpandw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0xc8] 71; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 72; X86-NEXT: retl # encoding: [0xc3] 73; 74; X64-LABEL: test_mask_expand_w_512: 75; X64: # %bb.0: 76; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 77; X64-NEXT: vpexpandw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0xc8] 78; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 79; X64-NEXT: retq # encoding: [0xc3] 80 %res = call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) 81 ret <32 x i16> %res 82} 83 84define <32 x i16> @test_maskz_expand_w_512(<32 x i16> %data, i32 %mask) { 85; X86-LABEL: test_maskz_expand_w_512: 86; X86: # %bb.0: 87; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 88; X86-NEXT: vpexpandw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0xc0] 89; X86-NEXT: retl # encoding: [0xc3] 90; 91; X64-LABEL: test_maskz_expand_w_512: 92; X64: # %bb.0: 93; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 94; X64-NEXT: vpexpandw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0xc0] 95; X64-NEXT: retq # encoding: [0xc3] 96 %res = call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> zeroinitializer, i32 %mask) 97 ret <32 x i16> %res 98} 99 100declare <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> %src0, i32 %mask) 101 102define <64 x i8> @test_mask_expand_load_b_512(ptr %addr, <64 x i8> %data, i64 %mask) { 103; X86-LABEL: test_mask_expand_load_b_512: 104; X86: # %bb.0: 105; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 106; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 107; X86-NEXT: vpexpandb (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x00] 108; X86-NEXT: retl # encoding: [0xc3] 109; 110; X64-LABEL: test_mask_expand_load_b_512: 111; X64: # %bb.0: 112; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 113; X64-NEXT: vpexpandb (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x07] 114; X64-NEXT: retq # encoding: [0xc3] 115 %res = call <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(ptr %addr, <64 x i8> %data, i64 %mask) 116 ret <64 x i8> %res 117} 118 119define <64 x i8> @test_maskz_expand_load_b_512(ptr %addr, i64 %mask) { 120; X86-LABEL: test_maskz_expand_load_b_512: 121; X86: # %bb.0: 122; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 123; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 124; X86-NEXT: vpexpandb (%eax), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0x00] 125; X86-NEXT: retl # encoding: [0xc3] 126; 127; X64-LABEL: test_maskz_expand_load_b_512: 128; X64: # %bb.0: 129; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 130; X64-NEXT: vpexpandb (%rdi), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0x07] 131; X64-NEXT: retq # encoding: [0xc3] 132 %res = call <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(ptr %addr, <64 x i8> zeroinitializer, i64 %mask) 133 ret <64 x i8> %res 134} 135 136declare <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(ptr %addr, <64 x i8> %data, i64 %mask) 137 138define <64 x i8> @test_expand_load_b_512(ptr %addr, <64 x i8> %data) { 139; X86-LABEL: test_expand_load_b_512: 140; X86: # %bb.0: 141; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 142; X86-NEXT: kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8] 143; X86-NEXT: vpexpandb (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x00] 144; X86-NEXT: retl # encoding: [0xc3] 145; 146; X64-LABEL: test_expand_load_b_512: 147; X64: # %bb.0: 148; X64-NEXT: kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8] 149; X64-NEXT: vpexpandb (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x07] 150; X64-NEXT: retq # encoding: [0xc3] 151 %res = call <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(ptr %addr, <64 x i8> %data, i64 -1) 152 ret <64 x i8> %res 153} 154 155define <64 x i8> @test_expand_b_512(<64 x i8> %data) { 156; CHECK-LABEL: test_expand_b_512: 157; CHECK: # %bb.0: 158; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 159 %res = call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> undef, i64 -1) 160 ret <64 x i8> %res 161} 162 163define <64 x i8> @test_mask_expand_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) { 164; X86-LABEL: test_mask_expand_b_512: 165; X86: # %bb.0: 166; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 167; X86-NEXT: vpexpandb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0xc8] 168; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 169; X86-NEXT: retl # encoding: [0xc3] 170; 171; X64-LABEL: test_mask_expand_b_512: 172; X64: # %bb.0: 173; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 174; X64-NEXT: vpexpandb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0xc8] 175; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 176; X64-NEXT: retq # encoding: [0xc3] 177 %res = call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) 178 ret <64 x i8> %res 179} 180 181define <64 x i8> @test_maskz_expand_b_512(<64 x i8> %data, i64 %mask) { 182; X86-LABEL: test_maskz_expand_b_512: 183; X86: # %bb.0: 184; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 185; X86-NEXT: vpexpandb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0xc0] 186; X86-NEXT: retl # encoding: [0xc3] 187; 188; X64-LABEL: test_maskz_expand_b_512: 189; X64: # %bb.0: 190; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 191; X64-NEXT: vpexpandb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0xc0] 192; X64-NEXT: retq # encoding: [0xc3] 193 %res = call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> zeroinitializer, i64 %mask) 194 ret <64 x i8> %res 195} 196 197declare <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> %src0, i64 %mask) 198 199define void @test_mask_compress_store_w_512(ptr %addr, <32 x i16> %data, i32 %mask) { 200; X86-LABEL: test_mask_compress_store_w_512: 201; X86: # %bb.0: 202; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 203; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 204; X86-NEXT: vpcompressw %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x00] 205; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 206; X86-NEXT: retl # encoding: [0xc3] 207; 208; X64-LABEL: test_mask_compress_store_w_512: 209; X64: # %bb.0: 210; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 211; X64-NEXT: vpcompressw %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x07] 212; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 213; X64-NEXT: retq # encoding: [0xc3] 214 call void @llvm.x86.avx512.mask.compress.store.w.512(ptr %addr, <32 x i16> %data, i32 %mask) 215 ret void 216} 217 218declare void @llvm.x86.avx512.mask.compress.store.w.512(ptr %addr, <32 x i16> %data, i32 %mask) 219 220define void @test_compress_store_w_512(ptr %addr, <32 x i16> %data) { 221; X86-LABEL: test_compress_store_w_512: 222; X86: # %bb.0: 223; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 224; X86-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 225; X86-NEXT: vpcompressw %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x00] 226; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 227; X86-NEXT: retl # encoding: [0xc3] 228; 229; X64-LABEL: test_compress_store_w_512: 230; X64: # %bb.0: 231; X64-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 232; X64-NEXT: vpcompressw %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x07] 233; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 234; X64-NEXT: retq # encoding: [0xc3] 235 call void @llvm.x86.avx512.mask.compress.store.w.512(ptr %addr, <32 x i16> %data, i32 -1) 236 ret void 237} 238 239define <32 x i16> @test_mask_compress_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) { 240; X86-LABEL: test_mask_compress_w_512: 241; X86: # %bb.0: 242; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 243; X86-NEXT: vpcompressw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0xc1] 244; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 245; X86-NEXT: retl # encoding: [0xc3] 246; 247; X64-LABEL: test_mask_compress_w_512: 248; X64: # %bb.0: 249; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 250; X64-NEXT: vpcompressw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0xc1] 251; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 252; X64-NEXT: retq # encoding: [0xc3] 253 %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) 254 ret <32 x i16> %res 255} 256 257define <32 x i16> @test_maskz_compress_w_512(<32 x i16> %data, i32 %mask) { 258; X86-LABEL: test_maskz_compress_w_512: 259; X86: # %bb.0: 260; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 261; X86-NEXT: vpcompressw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x63,0xc0] 262; X86-NEXT: retl # encoding: [0xc3] 263; 264; X64-LABEL: test_maskz_compress_w_512: 265; X64: # %bb.0: 266; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 267; X64-NEXT: vpcompressw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x63,0xc0] 268; X64-NEXT: retq # encoding: [0xc3] 269 %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> zeroinitializer, i32 %mask) 270 ret <32 x i16> %res 271} 272 273define <32 x i16> @test_compress_w_512(<32 x i16> %data) { 274; CHECK-LABEL: test_compress_w_512: 275; CHECK: # %bb.0: 276; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 277 %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> undef, i32 -1) 278 ret <32 x i16> %res 279} 280 281declare <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> %src0, i32 %mask) 282 283define void @test_mask_compress_store_b_512(ptr %addr, <64 x i8> %data, i64 %mask) { 284; X86-LABEL: test_mask_compress_store_b_512: 285; X86: # %bb.0: 286; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 287; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 288; X86-NEXT: vpcompressb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x00] 289; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 290; X86-NEXT: retl # encoding: [0xc3] 291; 292; X64-LABEL: test_mask_compress_store_b_512: 293; X64: # %bb.0: 294; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 295; X64-NEXT: vpcompressb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x07] 296; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 297; X64-NEXT: retq # encoding: [0xc3] 298 call void @llvm.x86.avx512.mask.compress.store.b.512(ptr %addr, <64 x i8> %data, i64 %mask) 299 ret void 300} 301 302declare void @llvm.x86.avx512.mask.compress.store.b.512(ptr %addr, <64 x i8> %data, i64 %mask) 303 304define void @test_compress_store_b_512(ptr %addr, <64 x i8> %data) { 305; X86-LABEL: test_compress_store_b_512: 306; X86: # %bb.0: 307; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 308; X86-NEXT: kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8] 309; X86-NEXT: vpcompressb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x00] 310; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 311; X86-NEXT: retl # encoding: [0xc3] 312; 313; X64-LABEL: test_compress_store_b_512: 314; X64: # %bb.0: 315; X64-NEXT: kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8] 316; X64-NEXT: vpcompressb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x07] 317; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 318; X64-NEXT: retq # encoding: [0xc3] 319 call void @llvm.x86.avx512.mask.compress.store.b.512(ptr %addr, <64 x i8> %data, i64 -1) 320 ret void 321} 322 323define <64 x i8> @test_mask_compress_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) { 324; X86-LABEL: test_mask_compress_b_512: 325; X86: # %bb.0: 326; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 327; X86-NEXT: vpcompressb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0xc1] 328; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 329; X86-NEXT: retl # encoding: [0xc3] 330; 331; X64-LABEL: test_mask_compress_b_512: 332; X64: # %bb.0: 333; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 334; X64-NEXT: vpcompressb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0xc1] 335; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 336; X64-NEXT: retq # encoding: [0xc3] 337 %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) 338 ret <64 x i8> %res 339} 340 341define <64 x i8> @test_maskz_compress_b_512(<64 x i8> %data, i64 %mask) { 342; X86-LABEL: test_maskz_compress_b_512: 343; X86: # %bb.0: 344; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 345; X86-NEXT: vpcompressb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x63,0xc0] 346; X86-NEXT: retl # encoding: [0xc3] 347; 348; X64-LABEL: test_maskz_compress_b_512: 349; X64: # %bb.0: 350; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 351; X64-NEXT: vpcompressb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x63,0xc0] 352; X64-NEXT: retq # encoding: [0xc3] 353 %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> zeroinitializer, i64 %mask) 354 ret <64 x i8> %res 355} 356 357define <64 x i8> @test_compress_b_512(<64 x i8> %data) { 358; CHECK-LABEL: test_compress_b_512: 359; CHECK: # %bb.0: 360; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 361 %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> undef, i64 -1) 362 ret <64 x i8> %res 363} 364 365declare <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> %src0, i64 %mask) 366 367define { <16 x i32>, <16 x i32> }@test_int_x86_avx512_mask_vpshld_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) { 368; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_512: 369; X86: # %bb.0: 370; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 371; X86-NEXT: vpshldd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x71,0xd1,0x16] 372; X86-NEXT: vpshldd $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0x7d,0x48,0x71,0xc9,0x17] 373; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 374; X86-NEXT: retl # encoding: [0xc3] 375; 376; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_512: 377; X64: # %bb.0: 378; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 379; X64-NEXT: vpshldd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x71,0xd1,0x16] 380; X64-NEXT: vpshldd $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0x7d,0x48,0x71,0xc9,0x17] 381; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 382; X64-NEXT: retq # encoding: [0xc3] 383 %res = call <16 x i32> @llvm.x86.avx512.mask.vpshld.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 %x4) 384 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpshld.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 23, <16 x i32> %x3, i16 -1) 385 %res2 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %res, 0 386 %res3 = insertvalue { <16 x i32>, <16 x i32> } %res2, <16 x i32> %res1, 1 387 ret { <16 x i32>, <16 x i32> } %res3 388} 389declare <16 x i32> @llvm.x86.avx512.mask.vpshld.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16) 390 391define { <8 x i64>, <8 x i64> }@test_int_x86_avx512_mask_vpshld_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) { 392; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_512: 393; X86: # %bb.0: 394; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 395; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 396; X86-NEXT: vpshldq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x71,0xd1,0x16] 397; X86-NEXT: vpshldq $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x71,0xc9,0x17] 398; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 399; X86-NEXT: retl # encoding: [0xc3] 400; 401; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_512: 402; X64: # %bb.0: 403; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 404; X64-NEXT: vpshldq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x71,0xd1,0x16] 405; X64-NEXT: vpshldq $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x71,0xc9,0x17] 406; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 407; X64-NEXT: retq # encoding: [0xc3] 408 %res = call <8 x i64> @llvm.x86.avx512.mask.vpshld.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 %x4) 409 %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpshld.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 23, <8 x i64> %x3, i8 -1) 410 %res2 = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> %res, 0 411 %res3 = insertvalue { <8 x i64>, <8 x i64> } %res2, <8 x i64> %res1, 1 412 ret { <8 x i64>, <8 x i64> } %res3 413} 414declare <8 x i64> @llvm.x86.avx512.mask.vpshld.q.512(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8) 415 416define { <32 x i16>, <32 x i16> }@test_int_x86_avx512_mask_vpshld_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) { 417; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_512: 418; X86: # %bb.0: 419; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 420; X86-NEXT: vpshldw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x06] 421; X86-NEXT: vpshldw $7, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xc9,0x07] 422; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 423; X86-NEXT: retl # encoding: [0xc3] 424; 425; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_512: 426; X64: # %bb.0: 427; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 428; X64-NEXT: vpshldw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x06] 429; X64-NEXT: vpshldw $7, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xc9,0x07] 430; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 431; X64-NEXT: retq # encoding: [0xc3] 432 %res = call <32 x i16> @llvm.x86.avx512.mask.vpshld.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 6, <32 x i16> %x3, i32 %x4) 433 %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpshld.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 7, <32 x i16> %x3, i32 -1) 434 %res2 = insertvalue { <32 x i16>, <32 x i16> } poison, <32 x i16> %res, 0 435 %res3 = insertvalue { <32 x i16>, <32 x i16> } %res2, <32 x i16> %res1, 1 436 ret { <32 x i16>, <32 x i16> } %res3 437} 438declare <32 x i16> @llvm.x86.avx512.mask.vpshld.w.512(<32 x i16>, <32 x i16>, i32, <32 x i16>, i32) 439 440define { <16 x i32>, <16 x i32> }@test_int_x86_avx512_mask_vpshrd_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) { 441; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_512: 442; X86: # %bb.0: 443; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 444; X86-NEXT: vpshrdd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x73,0xd1,0x16] 445; X86-NEXT: vpshrdd $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0x7d,0x48,0x73,0xc9,0x17] 446; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 447; X86-NEXT: retl # encoding: [0xc3] 448; 449; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_512: 450; X64: # %bb.0: 451; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 452; X64-NEXT: vpshrdd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x73,0xd1,0x16] 453; X64-NEXT: vpshrdd $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0x7d,0x48,0x73,0xc9,0x17] 454; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 455; X64-NEXT: retq # encoding: [0xc3] 456 %res = call <16 x i32> @llvm.x86.avx512.mask.vpshrd.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 %x4) 457 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpshrd.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 23, <16 x i32> %x3, i16 -1) 458 %res2 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %res, 0 459 %res3 = insertvalue { <16 x i32>, <16 x i32> } %res2, <16 x i32> %res1, 1 460 ret { <16 x i32>, <16 x i32> } %res3 461} 462declare <16 x i32> @llvm.x86.avx512.mask.vpshrd.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16) 463 464define { <8 x i64>, <8 x i64> }@test_int_x86_avx512_mask_vpshrd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) { 465; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_512: 466; X86: # %bb.0: 467; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 468; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 469; X86-NEXT: vpshrdq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x73,0xd1,0x16] 470; X86-NEXT: vpshrdq $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x73,0xc9,0x17] 471; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 472; X86-NEXT: retl # encoding: [0xc3] 473; 474; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_512: 475; X64: # %bb.0: 476; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 477; X64-NEXT: vpshrdq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x73,0xd1,0x16] 478; X64-NEXT: vpshrdq $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x73,0xc9,0x17] 479; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 480; X64-NEXT: retq # encoding: [0xc3] 481 %res = call <8 x i64> @llvm.x86.avx512.mask.vpshrd.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 %x4) 482 %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpshrd.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 23, <8 x i64> %x3, i8 -1) 483 %res2 = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> %res, 0 484 %res3 = insertvalue { <8 x i64>, <8 x i64> } %res2, <8 x i64> %res1, 1 485 ret { <8 x i64>, <8 x i64> } %res3 486} 487declare <8 x i64> @llvm.x86.avx512.mask.vpshrd.q.512(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8) 488 489define { <32 x i16>, <32 x i16> }@test_int_x86_avx512_mask_vpshrd_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) { 490; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_512: 491; X86: # %bb.0: 492; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 493; X86-NEXT: vpshrdw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x06] 494; X86-NEXT: vpshrdw $7, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xc9,0x07] 495; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 496; X86-NEXT: retl # encoding: [0xc3] 497; 498; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_512: 499; X64: # %bb.0: 500; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 501; X64-NEXT: vpshrdw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x06] 502; X64-NEXT: vpshrdw $7, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xc9,0x07] 503; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 504; X64-NEXT: retq # encoding: [0xc3] 505 %res = call <32 x i16> @llvm.x86.avx512.mask.vpshrd.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 6, <32 x i16> %x3, i32 %x4) 506 %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpshrd.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 7, <32 x i16> %x3, i32 -1) 507 %res2 = insertvalue { <32 x i16>, <32 x i16> } poison, <32 x i16> %res, 0 508 %res3 = insertvalue { <32 x i16>, <32 x i16> } %res2, <32 x i16> %res1, 1 509 ret { <32 x i16>, <32 x i16> } %res3 510} 511declare <32 x i16> @llvm.x86.avx512.mask.vpshrd.w.512(<32 x i16>, <32 x i16>, i32, <32 x i16>, i32) 512 513define { <16 x i32>, <16 x i32> }@test_int_x86_avx512_mask_vpshld_d_512_2(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) { 514; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_512_2: 515; X86: # %bb.0: 516; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 517; X86-NEXT: vpshldd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x71,0xd1,0x16] 518; X86-NEXT: vpshldd $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0x7d,0x48,0x71,0xc9,0x17] 519; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 520; X86-NEXT: retl # encoding: [0xc3] 521; 522; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_512_2: 523; X64: # %bb.0: 524; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 525; X64-NEXT: vpshldd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x71,0xd1,0x16] 526; X64-NEXT: vpshldd $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0x7d,0x48,0x71,0xc9,0x17] 527; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 528; X64-NEXT: retq # encoding: [0xc3] 529 %1 = call <16 x i32> @llvm.x86.avx512.vpshld.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 22) 530 %2 = bitcast i16 %x4 to <16 x i1> 531 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x3 532 %4 = call <16 x i32> @llvm.x86.avx512.vpshld.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 23) 533 %5 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %3, 0 534 %6 = insertvalue { <16 x i32>, <16 x i32> } %5, <16 x i32> %4, 1 535 ret { <16 x i32>, <16 x i32> } %6 536} 537declare <16 x i32> @llvm.x86.avx512.vpshld.d.512(<16 x i32>, <16 x i32>, i32) 538 539define { <8 x i64>, <8 x i64> }@test_int_x86_avx512_mask_vpshld_q_512_2(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) { 540; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_512_2: 541; X86: # %bb.0: 542; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 543; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 544; X86-NEXT: vpshldq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x71,0xd1,0x16] 545; X86-NEXT: vpshldq $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x71,0xc9,0x17] 546; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 547; X86-NEXT: retl # encoding: [0xc3] 548; 549; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_512_2: 550; X64: # %bb.0: 551; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 552; X64-NEXT: vpshldq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x71,0xd1,0x16] 553; X64-NEXT: vpshldq $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x71,0xc9,0x17] 554; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 555; X64-NEXT: retq # encoding: [0xc3] 556 %1 = call <8 x i64> @llvm.x86.avx512.vpshld.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 22) 557 %2 = bitcast i8 %x4 to <8 x i1> 558 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x3 559 %4 = call <8 x i64> @llvm.x86.avx512.vpshld.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 23) 560 %5 = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> %3, 0 561 %6 = insertvalue { <8 x i64>, <8 x i64> } %5, <8 x i64> %4, 1 562 ret { <8 x i64>, <8 x i64> } %6 563} 564declare <8 x i64> @llvm.x86.avx512.vpshld.q.512(<8 x i64>, <8 x i64>, i32) 565 566define { <32 x i16>, <32 x i16> }@test_int_x86_avx512_mask_vpshld_w_512_2(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) { 567; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_512_2: 568; X86: # %bb.0: 569; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 570; X86-NEXT: vpshldw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x06] 571; X86-NEXT: vpshldw $7, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xc9,0x07] 572; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 573; X86-NEXT: retl # encoding: [0xc3] 574; 575; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_512_2: 576; X64: # %bb.0: 577; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 578; X64-NEXT: vpshldw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x06] 579; X64-NEXT: vpshldw $7, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xc9,0x07] 580; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 581; X64-NEXT: retq # encoding: [0xc3] 582 %1 = call <32 x i16> @llvm.x86.avx512.vpshld.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 6) 583 %2 = bitcast i32 %x4 to <32 x i1> 584 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x3 585 %4 = call <32 x i16> @llvm.x86.avx512.vpshld.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 7) 586 %5 = insertvalue { <32 x i16>, <32 x i16> } poison, <32 x i16> %3, 0 587 %6 = insertvalue { <32 x i16>, <32 x i16> } %5, <32 x i16> %4, 1 588 ret { <32 x i16>, <32 x i16> } %6 589} 590declare <32 x i16> @llvm.x86.avx512.vpshld.w.512(<32 x i16>, <32 x i16>, i32) 591 592define { <16 x i32>, <16 x i32> }@test_int_x86_avx512_mask_vpshrd_d_512_2(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) { 593; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_512_2: 594; X86: # %bb.0: 595; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 596; X86-NEXT: vpshrdd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x73,0xd1,0x16] 597; X86-NEXT: vpshrdd $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0x7d,0x48,0x73,0xc9,0x17] 598; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 599; X86-NEXT: retl # encoding: [0xc3] 600; 601; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_512_2: 602; X64: # %bb.0: 603; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 604; X64-NEXT: vpshrdd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x73,0xd1,0x16] 605; X64-NEXT: vpshrdd $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0x7d,0x48,0x73,0xc9,0x17] 606; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 607; X64-NEXT: retq # encoding: [0xc3] 608 %1 = call <16 x i32> @llvm.x86.avx512.vpshrd.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 22) 609 %2 = bitcast i16 %x4 to <16 x i1> 610 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x3 611 %4 = call <16 x i32> @llvm.x86.avx512.vpshrd.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 23) 612 %5 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %3, 0 613 %6 = insertvalue { <16 x i32>, <16 x i32> } %5, <16 x i32> %4, 1 614 ret { <16 x i32>, <16 x i32> } %6 615} 616declare <16 x i32> @llvm.x86.avx512.vpshrd.d.512(<16 x i32>, <16 x i32>, i32) 617 618define { <8 x i64>, <8 x i64> }@test_int_x86_avx512_mask_vpshrd_q_512_2(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) { 619; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_512_2: 620; X86: # %bb.0: 621; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 622; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 623; X86-NEXT: vpshrdq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x73,0xd1,0x16] 624; X86-NEXT: vpshrdq $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x73,0xc9,0x17] 625; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 626; X86-NEXT: retl # encoding: [0xc3] 627; 628; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_512_2: 629; X64: # %bb.0: 630; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 631; X64-NEXT: vpshrdq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x73,0xd1,0x16] 632; X64-NEXT: vpshrdq $23, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x73,0xc9,0x17] 633; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 634; X64-NEXT: retq # encoding: [0xc3] 635 %1 = call <8 x i64> @llvm.x86.avx512.vpshrd.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 22) 636 %2 = bitcast i8 %x4 to <8 x i1> 637 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x3 638 %4 = call <8 x i64> @llvm.x86.avx512.vpshrd.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 23) 639 %5 = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> %3, 0 640 %6 = insertvalue { <8 x i64>, <8 x i64> } %5, <8 x i64> %4, 1 641 ret { <8 x i64>, <8 x i64> } %6 642} 643declare <8 x i64> @llvm.x86.avx512.vpshrd.q.512(<8 x i64>, <8 x i64>, i32) 644 645define { <32 x i16>, <32 x i16> }@test_int_x86_avx512_mask_vpshrd_w_512_2(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) { 646; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_512_2: 647; X86: # %bb.0: 648; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 649; X86-NEXT: vpshrdw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x06] 650; X86-NEXT: vpshrdw $7, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xc9,0x07] 651; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 652; X86-NEXT: retl # encoding: [0xc3] 653; 654; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_512_2: 655; X64: # %bb.0: 656; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 657; X64-NEXT: vpshrdw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x06] 658; X64-NEXT: vpshrdw $7, %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xc9,0x07] 659; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 660; X64-NEXT: retq # encoding: [0xc3] 661 %1 = call <32 x i16> @llvm.x86.avx512.vpshrd.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 6) 662 %2 = bitcast i32 %x4 to <32 x i1> 663 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x3 664 %4 = call <32 x i16> @llvm.x86.avx512.vpshrd.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 7) 665 %5 = insertvalue { <32 x i16>, <32 x i16> } poison, <32 x i16> %3, 0 666 %6 = insertvalue { <32 x i16>, <32 x i16> } %5, <32 x i16> %4, 1 667 ret { <32 x i16>, <32 x i16> } %6 668} 669declare <32 x i16> @llvm.x86.avx512.vpshrd.w.512(<32 x i16>, <32 x i16>, i32) 670 671declare <16 x i32> @llvm.x86.avx512.mask.vpshrdv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 672declare <16 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 673 674define <16 x i32>@test_int_x86_avx512_vpshrdv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) { 675; CHECK-LABEL: test_int_x86_avx512_vpshrdv_d_512: 676; CHECK: # %bb.0: 677; CHECK-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x73,0xc2] 678; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 679 %res = call <16 x i32> @llvm.x86.avx512.mask.vpshrdv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 680 ret <16 x i32> %res 681} 682 683define { <16 x i32>, <16 x i32> }@test_int_x86_avx512_mask_vpshrdv_d_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, <16 x i32> %x4, i16 %x3) { 684; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_d_512: 685; X86: # %bb.0: 686; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 687; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 688; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 689; X86-NEXT: vpshrdvd (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x73,0x00] 690; X86-NEXT: vpshrdvd %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x73,0xda] 691; X86-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] 692; X86-NEXT: retl # encoding: [0xc3] 693; 694; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_512: 695; X64: # %bb.0: 696; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 697; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 698; X64-NEXT: vpshrdvd (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x73,0x07] 699; X64-NEXT: vpshrdvd %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x73,0xda] 700; X64-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] 701; X64-NEXT: retq # encoding: [0xc3] 702 %x2 = load <16 x i32>, ptr %x2p 703 %res = call <16 x i32> @llvm.x86.avx512.mask.vpshrdv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 704 %res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 %x3) 705 %res2 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %res, 0 706 %res3 = insertvalue { <16 x i32>, <16 x i32> } %res2, <16 x i32> %res1, 1 707 ret { <16 x i32>, <16 x i32> } %res3 708} 709 710declare <8 x i64> @llvm.x86.avx512.mask.vpshrdv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 711declare <8 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 712 713define <8 x i64>@test_int_x86_avx512_vpshrdv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) { 714; CHECK-LABEL: test_int_x86_avx512_vpshrdv_q_512: 715; CHECK: # %bb.0: 716; CHECK-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x73,0xc2] 717; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 718 %res = call <8 x i64> @llvm.x86.avx512.mask.vpshrdv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 719 ret <8 x i64> %res 720} 721 722define { <8 x i64>, <8 x i64> }@test_int_x86_avx512_mask_vpshrdv_q_512(<8 x i64> %x0, <8 x i64> %x1, ptr %x2p, <8 x i64> %x4, i8 %x3) { 723; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_q_512: 724; X86: # %bb.0: 725; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 726; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 727; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 728; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 729; X86-NEXT: vpshrdvq (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x73,0x00] 730; X86-NEXT: vpshrdvq %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x73,0xda] 731; X86-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] 732; X86-NEXT: retl # encoding: [0xc3] 733; 734; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_512: 735; X64: # %bb.0: 736; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 737; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 738; X64-NEXT: vpshrdvq (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x73,0x07] 739; X64-NEXT: vpshrdvq %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x73,0xda] 740; X64-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] 741; X64-NEXT: retq # encoding: [0xc3] 742 %x2 = load <8 x i64>, ptr %x2p 743 %res = call <8 x i64> @llvm.x86.avx512.mask.vpshrdv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 744 %res1 = call <8 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x4, i8 %x3) 745 %res2 = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> %res, 0 746 %res3 = insertvalue { <8 x i64>, <8 x i64> } %res2, <8 x i64> %res1, 1 747 ret { <8 x i64>, <8 x i64> } %res3 748} 749 750declare <32 x i16> @llvm.x86.avx512.mask.vpshrdv.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 751declare <32 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 752 753define <32 x i16>@test_int_x86_avx512_vpshrdv_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) { 754; CHECK-LABEL: test_int_x86_avx512_vpshrdv_w_512: 755; CHECK: # %bb.0: 756; CHECK-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x72,0xc2] 757; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 758 %res = call <32 x i16> @llvm.x86.avx512.mask.vpshrdv.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 759 ret <32 x i16> %res 760} 761 762define { <32 x i16>, <32 x i16> }@test_int_x86_avx512_mask_vpshrdv_w_512(<32 x i16> %x0, <32 x i16> %x1, ptr %x2p, <32 x i16> %x4, i32 %x3) { 763; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_w_512: 764; X86: # %bb.0: 765; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 766; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 767; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 768; X86-NEXT: vpshrdvw (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x72,0x00] 769; X86-NEXT: vpshrdvw %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x72,0xda] 770; X86-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] 771; X86-NEXT: retl # encoding: [0xc3] 772; 773; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_512: 774; X64: # %bb.0: 775; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 776; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 777; X64-NEXT: vpshrdvw (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x72,0x07] 778; X64-NEXT: vpshrdvw %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x72,0xda] 779; X64-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] 780; X64-NEXT: retq # encoding: [0xc3] 781 %x2 = load <32 x i16>, ptr %x2p 782 %res = call <32 x i16> @llvm.x86.avx512.mask.vpshrdv.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 783 %res1 = call <32 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x4, i32 %x3) 784 %res2 = insertvalue { <32 x i16>, <32 x i16> } poison, <32 x i16> %res, 0 785 %res3 = insertvalue { <32 x i16>, <32 x i16> } %res2, <32 x i16> %res1, 1 786 ret { <32 x i16>, <32 x i16> } %res3 787} 788 789declare <16 x i32> @llvm.x86.avx512.mask.vpshldv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 790declare <16 x i32> @llvm.x86.avx512.maskz.vpshldv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 791 792define <16 x i32>@test_int_x86_avx512_vpshldv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) { 793; CHECK-LABEL: test_int_x86_avx512_vpshldv_d_512: 794; CHECK: # %bb.0: 795; CHECK-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x71,0xc2] 796; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 797 %res = call <16 x i32> @llvm.x86.avx512.mask.vpshldv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 798 ret <16 x i32> %res 799} 800 801define { <16 x i32>, <16 x i32> }@test_int_x86_avx512_mask_vpshldv_d_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, <16 x i32> %x4, i16 %x3) { 802; X86-LABEL: test_int_x86_avx512_mask_vpshldv_d_512: 803; X86: # %bb.0: 804; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 805; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 806; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 807; X86-NEXT: vpshldvd (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x71,0x00] 808; X86-NEXT: vpshldvd %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x71,0xda] 809; X86-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] 810; X86-NEXT: retl # encoding: [0xc3] 811; 812; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_512: 813; X64: # %bb.0: 814; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 815; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 816; X64-NEXT: vpshldvd (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x71,0x07] 817; X64-NEXT: vpshldvd %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x71,0xda] 818; X64-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] 819; X64-NEXT: retq # encoding: [0xc3] 820 %x2 = load <16 x i32>, ptr %x2p 821 %res = call <16 x i32> @llvm.x86.avx512.mask.vpshldv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 822 %res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpshldv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 %x3) 823 %res2 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %res, 0 824 %res3 = insertvalue { <16 x i32>, <16 x i32> } %res2, <16 x i32> %res1, 1 825 ret { <16 x i32>, <16 x i32> } %res3 826} 827 828declare <8 x i64> @llvm.x86.avx512.mask.vpshldv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 829declare <8 x i64> @llvm.x86.avx512.maskz.vpshldv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 830 831define <8 x i64>@test_int_x86_avx512_vpshldv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) { 832; CHECK-LABEL: test_int_x86_avx512_vpshldv_q_512: 833; CHECK: # %bb.0: 834; CHECK-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x71,0xc2] 835; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 836 %res = call <8 x i64> @llvm.x86.avx512.mask.vpshldv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 837 ret <8 x i64> %res 838} 839 840define { <8 x i64>, <8 x i64> }@test_int_x86_avx512_mask_vpshldv_q_512(<8 x i64> %x0, <8 x i64> %x1, ptr %x2p, <8 x i64> %x4, i8 %x3) { 841; X86-LABEL: test_int_x86_avx512_mask_vpshldv_q_512: 842; X86: # %bb.0: 843; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 844; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 845; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 846; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 847; X86-NEXT: vpshldvq (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x71,0x00] 848; X86-NEXT: vpshldvq %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x71,0xda] 849; X86-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] 850; X86-NEXT: retl # encoding: [0xc3] 851; 852; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_512: 853; X64: # %bb.0: 854; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 855; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 856; X64-NEXT: vpshldvq (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x71,0x07] 857; X64-NEXT: vpshldvq %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x71,0xda] 858; X64-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] 859; X64-NEXT: retq # encoding: [0xc3] 860 %x2 = load <8 x i64>, ptr %x2p 861 %res = call <8 x i64> @llvm.x86.avx512.mask.vpshldv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 862 %res1 = call <8 x i64> @llvm.x86.avx512.maskz.vpshldv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x4, i8 %x3) 863 %res2 = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> %res, 0 864 %res3 = insertvalue { <8 x i64>, <8 x i64> } %res2, <8 x i64> %res1, 1 865 ret { <8 x i64>, <8 x i64> } %res3 866} 867 868declare <32 x i16> @llvm.x86.avx512.mask.vpshldv.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 869declare <32 x i16> @llvm.x86.avx512.maskz.vpshldv.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 870 871define <32 x i16>@test_int_x86_avx512_vpshldv_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) { 872; CHECK-LABEL: test_int_x86_avx512_vpshldv_w_512: 873; CHECK: # %bb.0: 874; CHECK-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x70,0xc2] 875; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 876 %res = call <32 x i16> @llvm.x86.avx512.mask.vpshldv.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 877 ret <32 x i16> %res 878} 879 880define { <32 x i16>, <32 x i16> }@test_int_x86_avx512_mask_vpshldv_w_512(<32 x i16> %x0, <32 x i16> %x1, ptr %x2p, <32 x i16> %x4, i32 %x3) { 881; X86-LABEL: test_int_x86_avx512_mask_vpshldv_w_512: 882; X86: # %bb.0: 883; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 884; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 885; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 886; X86-NEXT: vpshldvw (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x70,0x00] 887; X86-NEXT: vpshldvw %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x70,0xda] 888; X86-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] 889; X86-NEXT: retl # encoding: [0xc3] 890; 891; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_512: 892; X64: # %bb.0: 893; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 894; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 895; X64-NEXT: vpshldvw (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x70,0x07] 896; X64-NEXT: vpshldvw %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x70,0xda] 897; X64-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] 898; X64-NEXT: retq # encoding: [0xc3] 899 %x2 = load <32 x i16>, ptr %x2p 900 %res = call <32 x i16> @llvm.x86.avx512.mask.vpshldv.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 901 %res1 = call <32 x i16> @llvm.x86.avx512.maskz.vpshldv.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x4, i32 %x3) 902 %res2 = insertvalue { <32 x i16>, <32 x i16> } poison, <32 x i16> %res, 0 903 %res3 = insertvalue { <32 x i16>, <32 x i16> } %res2, <32 x i16> %res1, 1 904 ret { <32 x i16>, <32 x i16> } %res3 905} 906