1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512bw,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5define <8 x i16> @test_mask_packs_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 6; CHECK-LABEL: test_mask_packs_epi32_rr_128: 7; CHECK: # %bb.0: 8; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0xc1] 9; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 10 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b) 11 ret <8 x i16> %1 12} 13 14define <8 x i16> @test_mask_packs_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) { 15; X86-LABEL: test_mask_packs_epi32_rrk_128: 16; X86: # %bb.0: 17; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 18; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 19; X86-NEXT: vpackssdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1] 20; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 21; X86-NEXT: retl # encoding: [0xc3] 22; 23; X64-LABEL: test_mask_packs_epi32_rrk_128: 24; X64: # %bb.0: 25; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 26; X64-NEXT: vpackssdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1] 27; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 28; X64-NEXT: retq # encoding: [0xc3] 29 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b) 30 %2 = bitcast i8 %mask to <8 x i1> 31 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru 32 ret <8 x i16> %3 33} 34 35define <8 x i16> @test_mask_packs_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 36; X86-LABEL: test_mask_packs_epi32_rrkz_128: 37; X86: # %bb.0: 38; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 39; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 40; X86-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1] 41; X86-NEXT: retl # encoding: [0xc3] 42; 43; X64-LABEL: test_mask_packs_epi32_rrkz_128: 44; X64: # %bb.0: 45; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 46; X64-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1] 47; X64-NEXT: retq # encoding: [0xc3] 48 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b) 49 %2 = bitcast i8 %mask to <8 x i1> 50 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 51 ret <8 x i16> %3 52} 53 54define <8 x i16> @test_mask_packs_epi32_rm_128(<4 x i32> %a, ptr %ptr_b) { 55; X86-LABEL: test_mask_packs_epi32_rm_128: 56; X86: # %bb.0: 57; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 58; X86-NEXT: vpackssdw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0x00] 59; X86-NEXT: retl # encoding: [0xc3] 60; 61; X64-LABEL: test_mask_packs_epi32_rm_128: 62; X64: # %bb.0: 63; X64-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0x07] 64; X64-NEXT: retq # encoding: [0xc3] 65 %b = load <4 x i32>, ptr %ptr_b 66 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b) 67 ret <8 x i16> %1 68} 69 70define <8 x i16> @test_mask_packs_epi32_rmk_128(<4 x i32> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) { 71; X86-LABEL: test_mask_packs_epi32_rmk_128: 72; X86: # %bb.0: 73; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 74; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 75; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 76; X86-NEXT: vpackssdw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x08] 77; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 78; X86-NEXT: retl # encoding: [0xc3] 79; 80; X64-LABEL: test_mask_packs_epi32_rmk_128: 81; X64: # %bb.0: 82; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 83; X64-NEXT: vpackssdw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x0f] 84; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 85; X64-NEXT: retq # encoding: [0xc3] 86 %b = load <4 x i32>, ptr %ptr_b 87 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b) 88 %2 = bitcast i8 %mask to <8 x i1> 89 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru 90 ret <8 x i16> %3 91} 92 93define <8 x i16> @test_mask_packs_epi32_rmkz_128(<4 x i32> %a, ptr %ptr_b, i8 %mask) { 94; X86-LABEL: test_mask_packs_epi32_rmkz_128: 95; X86: # %bb.0: 96; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 97; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 98; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 99; X86-NEXT: vpackssdw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x00] 100; X86-NEXT: retl # encoding: [0xc3] 101; 102; X64-LABEL: test_mask_packs_epi32_rmkz_128: 103; X64: # %bb.0: 104; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 105; X64-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x07] 106; X64-NEXT: retq # encoding: [0xc3] 107 %b = load <4 x i32>, ptr %ptr_b 108 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b) 109 %2 = bitcast i8 %mask to <8 x i1> 110 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 111 ret <8 x i16> %3 112} 113 114define <8 x i16> @test_mask_packs_epi32_rmb_128(<4 x i32> %a, ptr %ptr_b) { 115; X86-LABEL: test_mask_packs_epi32_rmb_128: 116; X86: # %bb.0: 117; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 118; X86-NEXT: vpackssdw (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x6b,0x00] 119; X86-NEXT: retl # encoding: [0xc3] 120; 121; X64-LABEL: test_mask_packs_epi32_rmb_128: 122; X64: # %bb.0: 123; X64-NEXT: vpackssdw (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x6b,0x07] 124; X64-NEXT: retq # encoding: [0xc3] 125 %q = load i32, ptr %ptr_b 126 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 127 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 128 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b) 129 ret <8 x i16> %1 130} 131 132define <8 x i16> @test_mask_packs_epi32_rmbk_128(<4 x i32> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) { 133; X86-LABEL: test_mask_packs_epi32_rmbk_128: 134; X86: # %bb.0: 135; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 136; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 137; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 138; X86-NEXT: vpackssdw (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x08] 139; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 140; X86-NEXT: retl # encoding: [0xc3] 141; 142; X64-LABEL: test_mask_packs_epi32_rmbk_128: 143; X64: # %bb.0: 144; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 145; X64-NEXT: vpackssdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x0f] 146; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 147; X64-NEXT: retq # encoding: [0xc3] 148 %q = load i32, ptr %ptr_b 149 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 150 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 151 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b) 152 %2 = bitcast i8 %mask to <8 x i1> 153 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru 154 ret <8 x i16> %3 155} 156 157define <8 x i16> @test_mask_packs_epi32_rmbkz_128(<4 x i32> %a, ptr %ptr_b, i8 %mask) { 158; X86-LABEL: test_mask_packs_epi32_rmbkz_128: 159; X86: # %bb.0: 160; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 161; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 162; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 163; X86-NEXT: vpackssdw (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x00] 164; X86-NEXT: retl # encoding: [0xc3] 165; 166; X64-LABEL: test_mask_packs_epi32_rmbkz_128: 167; X64: # %bb.0: 168; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 169; X64-NEXT: vpackssdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x07] 170; X64-NEXT: retq # encoding: [0xc3] 171 %q = load i32, ptr %ptr_b 172 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 173 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 174 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b) 175 %2 = bitcast i8 %mask to <8 x i1> 176 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 177 ret <8 x i16> %3 178} 179 180declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) 181 182define <16 x i16> @test_mask_packs_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 183; CHECK-LABEL: test_mask_packs_epi32_rr_256: 184; CHECK: # %bb.0: 185; CHECK-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0xc1] 186; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 187 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b) 188 ret <16 x i16> %1 189} 190 191define <16 x i16> @test_mask_packs_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) { 192; X86-LABEL: test_mask_packs_epi32_rrk_256: 193; X86: # %bb.0: 194; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 195; X86-NEXT: vpackssdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1] 196; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 197; X86-NEXT: retl # encoding: [0xc3] 198; 199; X64-LABEL: test_mask_packs_epi32_rrk_256: 200; X64: # %bb.0: 201; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 202; X64-NEXT: vpackssdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1] 203; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 204; X64-NEXT: retq # encoding: [0xc3] 205 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b) 206 %2 = bitcast i16 %mask to <16 x i1> 207 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru 208 ret <16 x i16> %3 209} 210 211define <16 x i16> @test_mask_packs_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) { 212; X86-LABEL: test_mask_packs_epi32_rrkz_256: 213; X86: # %bb.0: 214; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 215; X86-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1] 216; X86-NEXT: retl # encoding: [0xc3] 217; 218; X64-LABEL: test_mask_packs_epi32_rrkz_256: 219; X64: # %bb.0: 220; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 221; X64-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1] 222; X64-NEXT: retq # encoding: [0xc3] 223 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b) 224 %2 = bitcast i16 %mask to <16 x i1> 225 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 226 ret <16 x i16> %3 227} 228 229define <16 x i16> @test_mask_packs_epi32_rm_256(<8 x i32> %a, ptr %ptr_b) { 230; X86-LABEL: test_mask_packs_epi32_rm_256: 231; X86: # %bb.0: 232; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 233; X86-NEXT: vpackssdw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0x00] 234; X86-NEXT: retl # encoding: [0xc3] 235; 236; X64-LABEL: test_mask_packs_epi32_rm_256: 237; X64: # %bb.0: 238; X64-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0x07] 239; X64-NEXT: retq # encoding: [0xc3] 240 %b = load <8 x i32>, ptr %ptr_b 241 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b) 242 ret <16 x i16> %1 243} 244 245define <16 x i16> @test_mask_packs_epi32_rmk_256(<8 x i32> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) { 246; X86-LABEL: test_mask_packs_epi32_rmk_256: 247; X86: # %bb.0: 248; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 249; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 250; X86-NEXT: vpackssdw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x08] 251; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 252; X86-NEXT: retl # encoding: [0xc3] 253; 254; X64-LABEL: test_mask_packs_epi32_rmk_256: 255; X64: # %bb.0: 256; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 257; X64-NEXT: vpackssdw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x0f] 258; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 259; X64-NEXT: retq # encoding: [0xc3] 260 %b = load <8 x i32>, ptr %ptr_b 261 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b) 262 %2 = bitcast i16 %mask to <16 x i1> 263 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru 264 ret <16 x i16> %3 265} 266 267define <16 x i16> @test_mask_packs_epi32_rmkz_256(<8 x i32> %a, ptr %ptr_b, i16 %mask) { 268; X86-LABEL: test_mask_packs_epi32_rmkz_256: 269; X86: # %bb.0: 270; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 271; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 272; X86-NEXT: vpackssdw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x00] 273; X86-NEXT: retl # encoding: [0xc3] 274; 275; X64-LABEL: test_mask_packs_epi32_rmkz_256: 276; X64: # %bb.0: 277; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 278; X64-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x07] 279; X64-NEXT: retq # encoding: [0xc3] 280 %b = load <8 x i32>, ptr %ptr_b 281 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b) 282 %2 = bitcast i16 %mask to <16 x i1> 283 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 284 ret <16 x i16> %3 285} 286 287define <16 x i16> @test_mask_packs_epi32_rmb_256(<8 x i32> %a, ptr %ptr_b) { 288; X86-LABEL: test_mask_packs_epi32_rmb_256: 289; X86: # %bb.0: 290; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 291; X86-NEXT: vpackssdw (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0x6b,0x00] 292; X86-NEXT: retl # encoding: [0xc3] 293; 294; X64-LABEL: test_mask_packs_epi32_rmb_256: 295; X64: # %bb.0: 296; X64-NEXT: vpackssdw (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0x6b,0x07] 297; X64-NEXT: retq # encoding: [0xc3] 298 %q = load i32, ptr %ptr_b 299 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 300 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 301 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b) 302 ret <16 x i16> %1 303} 304 305define <16 x i16> @test_mask_packs_epi32_rmbk_256(<8 x i32> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) { 306; X86-LABEL: test_mask_packs_epi32_rmbk_256: 307; X86: # %bb.0: 308; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 309; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 310; X86-NEXT: vpackssdw (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x08] 311; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 312; X86-NEXT: retl # encoding: [0xc3] 313; 314; X64-LABEL: test_mask_packs_epi32_rmbk_256: 315; X64: # %bb.0: 316; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 317; X64-NEXT: vpackssdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x0f] 318; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 319; X64-NEXT: retq # encoding: [0xc3] 320 %q = load i32, ptr %ptr_b 321 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 322 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 323 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b) 324 %2 = bitcast i16 %mask to <16 x i1> 325 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru 326 ret <16 x i16> %3 327} 328 329define <16 x i16> @test_mask_packs_epi32_rmbkz_256(<8 x i32> %a, ptr %ptr_b, i16 %mask) { 330; X86-LABEL: test_mask_packs_epi32_rmbkz_256: 331; X86: # %bb.0: 332; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 333; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 334; X86-NEXT: vpackssdw (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x00] 335; X86-NEXT: retl # encoding: [0xc3] 336; 337; X64-LABEL: test_mask_packs_epi32_rmbkz_256: 338; X64: # %bb.0: 339; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 340; X64-NEXT: vpackssdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x07] 341; X64-NEXT: retq # encoding: [0xc3] 342 %q = load i32, ptr %ptr_b 343 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 344 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 345 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b) 346 %2 = bitcast i16 %mask to <16 x i1> 347 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 348 ret <16 x i16> %3 349} 350 351declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) 352 353define <16 x i8> @test_mask_packs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 354; CHECK-LABEL: test_mask_packs_epi16_rr_128: 355; CHECK: # %bb.0: 356; CHECK-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0xc1] 357; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 358 %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b) 359 ret <16 x i8> %1 360} 361 362define <16 x i8> @test_mask_packs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) { 363; X86-LABEL: test_mask_packs_epi16_rrk_128: 364; X86: # %bb.0: 365; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 366; X86-NEXT: vpacksswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0xd1] 367; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 368; X86-NEXT: retl # encoding: [0xc3] 369; 370; X64-LABEL: test_mask_packs_epi16_rrk_128: 371; X64: # %bb.0: 372; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 373; X64-NEXT: vpacksswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0xd1] 374; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 375; X64-NEXT: retq # encoding: [0xc3] 376 %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b) 377 %2 = bitcast i16 %mask to <16 x i1> 378 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru 379 ret <16 x i8> %3 380} 381 382define <16 x i8> @test_mask_packs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) { 383; X86-LABEL: test_mask_packs_epi16_rrkz_128: 384; X86: # %bb.0: 385; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 386; X86-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0xc1] 387; X86-NEXT: retl # encoding: [0xc3] 388; 389; X64-LABEL: test_mask_packs_epi16_rrkz_128: 390; X64: # %bb.0: 391; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 392; X64-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0xc1] 393; X64-NEXT: retq # encoding: [0xc3] 394 %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b) 395 %2 = bitcast i16 %mask to <16 x i1> 396 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer 397 ret <16 x i8> %3 398} 399 400define <16 x i8> @test_mask_packs_epi16_rm_128(<8 x i16> %a, ptr %ptr_b) { 401; X86-LABEL: test_mask_packs_epi16_rm_128: 402; X86: # %bb.0: 403; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 404; X86-NEXT: vpacksswb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0x00] 405; X86-NEXT: retl # encoding: [0xc3] 406; 407; X64-LABEL: test_mask_packs_epi16_rm_128: 408; X64: # %bb.0: 409; X64-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0x07] 410; X64-NEXT: retq # encoding: [0xc3] 411 %b = load <8 x i16>, ptr %ptr_b 412 %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b) 413 ret <16 x i8> %1 414} 415 416define <16 x i8> @test_mask_packs_epi16_rmk_128(<8 x i16> %a, ptr %ptr_b, <16 x i8> %passThru, i16 %mask) { 417; X86-LABEL: test_mask_packs_epi16_rmk_128: 418; X86: # %bb.0: 419; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 420; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 421; X86-NEXT: vpacksswb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0x08] 422; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 423; X86-NEXT: retl # encoding: [0xc3] 424; 425; X64-LABEL: test_mask_packs_epi16_rmk_128: 426; X64: # %bb.0: 427; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 428; X64-NEXT: vpacksswb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0x0f] 429; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 430; X64-NEXT: retq # encoding: [0xc3] 431 %b = load <8 x i16>, ptr %ptr_b 432 %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b) 433 %2 = bitcast i16 %mask to <16 x i1> 434 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru 435 ret <16 x i8> %3 436} 437 438define <16 x i8> @test_mask_packs_epi16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i16 %mask) { 439; X86-LABEL: test_mask_packs_epi16_rmkz_128: 440; X86: # %bb.0: 441; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 442; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 443; X86-NEXT: vpacksswb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0x00] 444; X86-NEXT: retl # encoding: [0xc3] 445; 446; X64-LABEL: test_mask_packs_epi16_rmkz_128: 447; X64: # %bb.0: 448; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 449; X64-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0x07] 450; X64-NEXT: retq # encoding: [0xc3] 451 %b = load <8 x i16>, ptr %ptr_b 452 %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b) 453 %2 = bitcast i16 %mask to <16 x i1> 454 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer 455 ret <16 x i8> %3 456} 457 458declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) 459 460define <32 x i8> @test_mask_packs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 461; CHECK-LABEL: test_mask_packs_epi16_rr_256: 462; CHECK: # %bb.0: 463; CHECK-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0xc1] 464; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 465 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b) 466 ret <32 x i8> %1 467} 468 469define <32 x i8> @test_mask_packs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) { 470; X86-LABEL: test_mask_packs_epi16_rrk_256: 471; X86: # %bb.0: 472; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 473; X86-NEXT: vpacksswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0xd1] 474; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 475; X86-NEXT: retl # encoding: [0xc3] 476; 477; X64-LABEL: test_mask_packs_epi16_rrk_256: 478; X64: # %bb.0: 479; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 480; X64-NEXT: vpacksswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0xd1] 481; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 482; X64-NEXT: retq # encoding: [0xc3] 483 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b) 484 %2 = bitcast i32 %mask to <32 x i1> 485 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru 486 ret <32 x i8> %3 487} 488 489define <32 x i8> @test_mask_packs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) { 490; X86-LABEL: test_mask_packs_epi16_rrkz_256: 491; X86: # %bb.0: 492; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 493; X86-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0xc1] 494; X86-NEXT: retl # encoding: [0xc3] 495; 496; X64-LABEL: test_mask_packs_epi16_rrkz_256: 497; X64: # %bb.0: 498; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 499; X64-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0xc1] 500; X64-NEXT: retq # encoding: [0xc3] 501 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b) 502 %2 = bitcast i32 %mask to <32 x i1> 503 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer 504 ret <32 x i8> %3 505} 506 507define <32 x i8> @test_mask_packs_epi16_rm_256(<16 x i16> %a, ptr %ptr_b) { 508; X86-LABEL: test_mask_packs_epi16_rm_256: 509; X86: # %bb.0: 510; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 511; X86-NEXT: vpacksswb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0x00] 512; X86-NEXT: retl # encoding: [0xc3] 513; 514; X64-LABEL: test_mask_packs_epi16_rm_256: 515; X64: # %bb.0: 516; X64-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0x07] 517; X64-NEXT: retq # encoding: [0xc3] 518 %b = load <16 x i16>, ptr %ptr_b 519 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b) 520 ret <32 x i8> %1 521} 522 523define <32 x i8> @test_mask_packs_epi16_rmk_256(<16 x i16> %a, ptr %ptr_b, <32 x i8> %passThru, i32 %mask) { 524; X86-LABEL: test_mask_packs_epi16_rmk_256: 525; X86: # %bb.0: 526; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 527; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 528; X86-NEXT: vpacksswb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0x08] 529; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 530; X86-NEXT: retl # encoding: [0xc3] 531; 532; X64-LABEL: test_mask_packs_epi16_rmk_256: 533; X64: # %bb.0: 534; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 535; X64-NEXT: vpacksswb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0x0f] 536; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 537; X64-NEXT: retq # encoding: [0xc3] 538 %b = load <16 x i16>, ptr %ptr_b 539 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b) 540 %2 = bitcast i32 %mask to <32 x i1> 541 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru 542 ret <32 x i8> %3 543} 544 545define <32 x i8> @test_mask_packs_epi16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i32 %mask) { 546; X86-LABEL: test_mask_packs_epi16_rmkz_256: 547; X86: # %bb.0: 548; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 549; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 550; X86-NEXT: vpacksswb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0x00] 551; X86-NEXT: retl # encoding: [0xc3] 552; 553; X64-LABEL: test_mask_packs_epi16_rmkz_256: 554; X64: # %bb.0: 555; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 556; X64-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0x07] 557; X64-NEXT: retq # encoding: [0xc3] 558 %b = load <16 x i16>, ptr %ptr_b 559 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b) 560 %2 = bitcast i32 %mask to <32 x i1> 561 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer 562 ret <32 x i8> %3 563} 564 565declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) 566 567 568define <8 x i16> @test_mask_packus_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 569; CHECK-LABEL: test_mask_packus_epi32_rr_128: 570; CHECK: # %bb.0: 571; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0xc1] 572; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 573 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b) 574 ret <8 x i16> %1 575} 576 577define <8 x i16> @test_mask_packus_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) { 578; X86-LABEL: test_mask_packus_epi32_rrk_128: 579; X86: # %bb.0: 580; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 581; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 582; X86-NEXT: vpackusdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0xd1] 583; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 584; X86-NEXT: retl # encoding: [0xc3] 585; 586; X64-LABEL: test_mask_packus_epi32_rrk_128: 587; X64: # %bb.0: 588; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 589; X64-NEXT: vpackusdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0xd1] 590; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 591; X64-NEXT: retq # encoding: [0xc3] 592 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b) 593 %2 = bitcast i8 %mask to <8 x i1> 594 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru 595 ret <8 x i16> %3 596} 597 598define <8 x i16> @test_mask_packus_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 599; X86-LABEL: test_mask_packus_epi32_rrkz_128: 600; X86: # %bb.0: 601; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 602; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 603; X86-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0xc1] 604; X86-NEXT: retl # encoding: [0xc3] 605; 606; X64-LABEL: test_mask_packus_epi32_rrkz_128: 607; X64: # %bb.0: 608; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 609; X64-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0xc1] 610; X64-NEXT: retq # encoding: [0xc3] 611 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b) 612 %2 = bitcast i8 %mask to <8 x i1> 613 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 614 ret <8 x i16> %3 615} 616 617define <8 x i16> @test_mask_packus_epi32_rm_128(<4 x i32> %a, ptr %ptr_b) { 618; X86-LABEL: test_mask_packus_epi32_rm_128: 619; X86: # %bb.0: 620; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 621; X86-NEXT: vpackusdw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0x00] 622; X86-NEXT: retl # encoding: [0xc3] 623; 624; X64-LABEL: test_mask_packus_epi32_rm_128: 625; X64: # %bb.0: 626; X64-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0x07] 627; X64-NEXT: retq # encoding: [0xc3] 628 %b = load <4 x i32>, ptr %ptr_b 629 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b) 630 ret <8 x i16> %1 631} 632 633define <8 x i16> @test_mask_packus_epi32_rmk_128(<4 x i32> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) { 634; X86-LABEL: test_mask_packus_epi32_rmk_128: 635; X86: # %bb.0: 636; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 637; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 638; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 639; X86-NEXT: vpackusdw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0x08] 640; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 641; X86-NEXT: retl # encoding: [0xc3] 642; 643; X64-LABEL: test_mask_packus_epi32_rmk_128: 644; X64: # %bb.0: 645; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 646; X64-NEXT: vpackusdw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0x0f] 647; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 648; X64-NEXT: retq # encoding: [0xc3] 649 %b = load <4 x i32>, ptr %ptr_b 650 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b) 651 %2 = bitcast i8 %mask to <8 x i1> 652 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru 653 ret <8 x i16> %3 654} 655 656define <8 x i16> @test_mask_packus_epi32_rmkz_128(<4 x i32> %a, ptr %ptr_b, i8 %mask) { 657; X86-LABEL: test_mask_packus_epi32_rmkz_128: 658; X86: # %bb.0: 659; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 660; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 661; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 662; X86-NEXT: vpackusdw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0x00] 663; X86-NEXT: retl # encoding: [0xc3] 664; 665; X64-LABEL: test_mask_packus_epi32_rmkz_128: 666; X64: # %bb.0: 667; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 668; X64-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0x07] 669; X64-NEXT: retq # encoding: [0xc3] 670 %b = load <4 x i32>, ptr %ptr_b 671 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b) 672 %2 = bitcast i8 %mask to <8 x i1> 673 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 674 ret <8 x i16> %3 675} 676 677define <8 x i16> @test_mask_packus_epi32_rmb_128(<4 x i32> %a, ptr %ptr_b) { 678; X86-LABEL: test_mask_packus_epi32_rmb_128: 679; X86: # %bb.0: 680; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 681; X86-NEXT: vpackusdw (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0x2b,0x00] 682; X86-NEXT: retl # encoding: [0xc3] 683; 684; X64-LABEL: test_mask_packus_epi32_rmb_128: 685; X64: # %bb.0: 686; X64-NEXT: vpackusdw (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0x2b,0x07] 687; X64-NEXT: retq # encoding: [0xc3] 688 %q = load i32, ptr %ptr_b 689 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 690 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 691 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b) 692 ret <8 x i16> %1 693} 694 695define <8 x i16> @test_mask_packus_epi32_rmbk_128(<4 x i32> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) { 696; X86-LABEL: test_mask_packus_epi32_rmbk_128: 697; X86: # %bb.0: 698; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 699; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 700; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 701; X86-NEXT: vpackusdw (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0x2b,0x08] 702; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 703; X86-NEXT: retl # encoding: [0xc3] 704; 705; X64-LABEL: test_mask_packus_epi32_rmbk_128: 706; X64: # %bb.0: 707; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 708; X64-NEXT: vpackusdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0x2b,0x0f] 709; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 710; X64-NEXT: retq # encoding: [0xc3] 711 %q = load i32, ptr %ptr_b 712 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 713 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 714 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b) 715 %2 = bitcast i8 %mask to <8 x i1> 716 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru 717 ret <8 x i16> %3 718} 719 720define <8 x i16> @test_mask_packus_epi32_rmbkz_128(<4 x i32> %a, ptr %ptr_b, i8 %mask) { 721; X86-LABEL: test_mask_packus_epi32_rmbkz_128: 722; X86: # %bb.0: 723; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 724; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 725; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 726; X86-NEXT: vpackusdw (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0x2b,0x00] 727; X86-NEXT: retl # encoding: [0xc3] 728; 729; X64-LABEL: test_mask_packus_epi32_rmbkz_128: 730; X64: # %bb.0: 731; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 732; X64-NEXT: vpackusdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0x2b,0x07] 733; X64-NEXT: retq # encoding: [0xc3] 734 %q = load i32, ptr %ptr_b 735 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 736 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 737 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b) 738 %2 = bitcast i8 %mask to <8 x i1> 739 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 740 ret <8 x i16> %3 741} 742 743declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) 744 745define <16 x i16> @test_mask_packus_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 746; CHECK-LABEL: test_mask_packus_epi32_rr_256: 747; CHECK: # %bb.0: 748; CHECK-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0xc1] 749; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 750 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b) 751 ret <16 x i16> %1 752} 753 754define <16 x i16> @test_mask_packus_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) { 755; X86-LABEL: test_mask_packus_epi32_rrk_256: 756; X86: # %bb.0: 757; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 758; X86-NEXT: vpackusdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0xd1] 759; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 760; X86-NEXT: retl # encoding: [0xc3] 761; 762; X64-LABEL: test_mask_packus_epi32_rrk_256: 763; X64: # %bb.0: 764; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 765; X64-NEXT: vpackusdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0xd1] 766; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 767; X64-NEXT: retq # encoding: [0xc3] 768 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b) 769 %2 = bitcast i16 %mask to <16 x i1> 770 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru 771 ret <16 x i16> %3 772} 773 774define <16 x i16> @test_mask_packus_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) { 775; X86-LABEL: test_mask_packus_epi32_rrkz_256: 776; X86: # %bb.0: 777; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 778; X86-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0xc1] 779; X86-NEXT: retl # encoding: [0xc3] 780; 781; X64-LABEL: test_mask_packus_epi32_rrkz_256: 782; X64: # %bb.0: 783; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 784; X64-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0xc1] 785; X64-NEXT: retq # encoding: [0xc3] 786 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b) 787 %2 = bitcast i16 %mask to <16 x i1> 788 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 789 ret <16 x i16> %3 790} 791 792define <16 x i16> @test_mask_packus_epi32_rm_256(<8 x i32> %a, ptr %ptr_b) { 793; X86-LABEL: test_mask_packus_epi32_rm_256: 794; X86: # %bb.0: 795; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 796; X86-NEXT: vpackusdw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0x00] 797; X86-NEXT: retl # encoding: [0xc3] 798; 799; X64-LABEL: test_mask_packus_epi32_rm_256: 800; X64: # %bb.0: 801; X64-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0x07] 802; X64-NEXT: retq # encoding: [0xc3] 803 %b = load <8 x i32>, ptr %ptr_b 804 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b) 805 ret <16 x i16> %1 806} 807 808define <16 x i16> @test_mask_packus_epi32_rmk_256(<8 x i32> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) { 809; X86-LABEL: test_mask_packus_epi32_rmk_256: 810; X86: # %bb.0: 811; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 812; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 813; X86-NEXT: vpackusdw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0x08] 814; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 815; X86-NEXT: retl # encoding: [0xc3] 816; 817; X64-LABEL: test_mask_packus_epi32_rmk_256: 818; X64: # %bb.0: 819; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 820; X64-NEXT: vpackusdw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0x0f] 821; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 822; X64-NEXT: retq # encoding: [0xc3] 823 %b = load <8 x i32>, ptr %ptr_b 824 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b) 825 %2 = bitcast i16 %mask to <16 x i1> 826 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru 827 ret <16 x i16> %3 828} 829 830define <16 x i16> @test_mask_packus_epi32_rmkz_256(<8 x i32> %a, ptr %ptr_b, i16 %mask) { 831; X86-LABEL: test_mask_packus_epi32_rmkz_256: 832; X86: # %bb.0: 833; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 834; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 835; X86-NEXT: vpackusdw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0x00] 836; X86-NEXT: retl # encoding: [0xc3] 837; 838; X64-LABEL: test_mask_packus_epi32_rmkz_256: 839; X64: # %bb.0: 840; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 841; X64-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0x07] 842; X64-NEXT: retq # encoding: [0xc3] 843 %b = load <8 x i32>, ptr %ptr_b 844 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b) 845 %2 = bitcast i16 %mask to <16 x i1> 846 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 847 ret <16 x i16> %3 848} 849 850define <16 x i16> @test_mask_packus_epi32_rmb_256(<8 x i32> %a, ptr %ptr_b) { 851; X86-LABEL: test_mask_packus_epi32_rmb_256: 852; X86: # %bb.0: 853; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 854; X86-NEXT: vpackusdw (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x38,0x2b,0x00] 855; X86-NEXT: retl # encoding: [0xc3] 856; 857; X64-LABEL: test_mask_packus_epi32_rmb_256: 858; X64: # %bb.0: 859; X64-NEXT: vpackusdw (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x38,0x2b,0x07] 860; X64-NEXT: retq # encoding: [0xc3] 861 %q = load i32, ptr %ptr_b 862 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 863 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 864 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b) 865 ret <16 x i16> %1 866} 867 868define <16 x i16> @test_mask_packus_epi32_rmbk_256(<8 x i32> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) { 869; X86-LABEL: test_mask_packus_epi32_rmbk_256: 870; X86: # %bb.0: 871; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 872; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 873; X86-NEXT: vpackusdw (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x39,0x2b,0x08] 874; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 875; X86-NEXT: retl # encoding: [0xc3] 876; 877; X64-LABEL: test_mask_packus_epi32_rmbk_256: 878; X64: # %bb.0: 879; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 880; X64-NEXT: vpackusdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x39,0x2b,0x0f] 881; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 882; X64-NEXT: retq # encoding: [0xc3] 883 %q = load i32, ptr %ptr_b 884 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 885 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 886 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b) 887 %2 = bitcast i16 %mask to <16 x i1> 888 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru 889 ret <16 x i16> %3 890} 891 892define <16 x i16> @test_mask_packus_epi32_rmbkz_256(<8 x i32> %a, ptr %ptr_b, i16 %mask) { 893; X86-LABEL: test_mask_packus_epi32_rmbkz_256: 894; X86: # %bb.0: 895; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 896; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 897; X86-NEXT: vpackusdw (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xb9,0x2b,0x00] 898; X86-NEXT: retl # encoding: [0xc3] 899; 900; X64-LABEL: test_mask_packus_epi32_rmbkz_256: 901; X64: # %bb.0: 902; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 903; X64-NEXT: vpackusdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xb9,0x2b,0x07] 904; X64-NEXT: retq # encoding: [0xc3] 905 %q = load i32, ptr %ptr_b 906 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 907 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 908 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b) 909 %2 = bitcast i16 %mask to <16 x i1> 910 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 911 ret <16 x i16> %3 912} 913 914declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) 915 916define <16 x i8> @test_mask_packus_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 917; CHECK-LABEL: test_mask_packus_epi16_rr_128: 918; CHECK: # %bb.0: 919; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0xc1] 920; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 921 %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b) 922 ret <16 x i8> %1 923} 924 925define <16 x i8> @test_mask_packus_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) { 926; X86-LABEL: test_mask_packus_epi16_rrk_128: 927; X86: # %bb.0: 928; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 929; X86-NEXT: vpackuswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0xd1] 930; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 931; X86-NEXT: retl # encoding: [0xc3] 932; 933; X64-LABEL: test_mask_packus_epi16_rrk_128: 934; X64: # %bb.0: 935; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 936; X64-NEXT: vpackuswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0xd1] 937; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 938; X64-NEXT: retq # encoding: [0xc3] 939 %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b) 940 %2 = bitcast i16 %mask to <16 x i1> 941 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru 942 ret <16 x i8> %3 943} 944 945define <16 x i8> @test_mask_packus_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) { 946; X86-LABEL: test_mask_packus_epi16_rrkz_128: 947; X86: # %bb.0: 948; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 949; X86-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0xc1] 950; X86-NEXT: retl # encoding: [0xc3] 951; 952; X64-LABEL: test_mask_packus_epi16_rrkz_128: 953; X64: # %bb.0: 954; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 955; X64-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0xc1] 956; X64-NEXT: retq # encoding: [0xc3] 957 %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b) 958 %2 = bitcast i16 %mask to <16 x i1> 959 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer 960 ret <16 x i8> %3 961} 962 963define <16 x i8> @test_mask_packus_epi16_rm_128(<8 x i16> %a, ptr %ptr_b) { 964; X86-LABEL: test_mask_packus_epi16_rm_128: 965; X86: # %bb.0: 966; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 967; X86-NEXT: vpackuswb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0x00] 968; X86-NEXT: retl # encoding: [0xc3] 969; 970; X64-LABEL: test_mask_packus_epi16_rm_128: 971; X64: # %bb.0: 972; X64-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0x07] 973; X64-NEXT: retq # encoding: [0xc3] 974 %b = load <8 x i16>, ptr %ptr_b 975 %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b) 976 ret <16 x i8> %1 977} 978 979define <16 x i8> @test_mask_packus_epi16_rmk_128(<8 x i16> %a, ptr %ptr_b, <16 x i8> %passThru, i16 %mask) { 980; X86-LABEL: test_mask_packus_epi16_rmk_128: 981; X86: # %bb.0: 982; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 983; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 984; X86-NEXT: vpackuswb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0x08] 985; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 986; X86-NEXT: retl # encoding: [0xc3] 987; 988; X64-LABEL: test_mask_packus_epi16_rmk_128: 989; X64: # %bb.0: 990; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 991; X64-NEXT: vpackuswb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0x0f] 992; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 993; X64-NEXT: retq # encoding: [0xc3] 994 %b = load <8 x i16>, ptr %ptr_b 995 %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b) 996 %2 = bitcast i16 %mask to <16 x i1> 997 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru 998 ret <16 x i8> %3 999} 1000 1001define <16 x i8> @test_mask_packus_epi16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i16 %mask) { 1002; X86-LABEL: test_mask_packus_epi16_rmkz_128: 1003; X86: # %bb.0: 1004; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1005; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1006; X86-NEXT: vpackuswb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0x00] 1007; X86-NEXT: retl # encoding: [0xc3] 1008; 1009; X64-LABEL: test_mask_packus_epi16_rmkz_128: 1010; X64: # %bb.0: 1011; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1012; X64-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0x07] 1013; X64-NEXT: retq # encoding: [0xc3] 1014 %b = load <8 x i16>, ptr %ptr_b 1015 %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b) 1016 %2 = bitcast i16 %mask to <16 x i1> 1017 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer 1018 ret <16 x i8> %3 1019} 1020 1021declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) 1022 1023define <32 x i8> @test_mask_packus_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 1024; CHECK-LABEL: test_mask_packus_epi16_rr_256: 1025; CHECK: # %bb.0: 1026; CHECK-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0xc1] 1027; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1028 %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b) 1029 ret <32 x i8> %1 1030} 1031 1032define <32 x i8> @test_mask_packus_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) { 1033; X86-LABEL: test_mask_packus_epi16_rrk_256: 1034; X86: # %bb.0: 1035; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1036; X86-NEXT: vpackuswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0xd1] 1037; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1038; X86-NEXT: retl # encoding: [0xc3] 1039; 1040; X64-LABEL: test_mask_packus_epi16_rrk_256: 1041; X64: # %bb.0: 1042; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1043; X64-NEXT: vpackuswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0xd1] 1044; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1045; X64-NEXT: retq # encoding: [0xc3] 1046 %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b) 1047 %2 = bitcast i32 %mask to <32 x i1> 1048 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru 1049 ret <32 x i8> %3 1050} 1051 1052define <32 x i8> @test_mask_packus_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) { 1053; X86-LABEL: test_mask_packus_epi16_rrkz_256: 1054; X86: # %bb.0: 1055; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1056; X86-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0xc1] 1057; X86-NEXT: retl # encoding: [0xc3] 1058; 1059; X64-LABEL: test_mask_packus_epi16_rrkz_256: 1060; X64: # %bb.0: 1061; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1062; X64-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0xc1] 1063; X64-NEXT: retq # encoding: [0xc3] 1064 %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b) 1065 %2 = bitcast i32 %mask to <32 x i1> 1066 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer 1067 ret <32 x i8> %3 1068} 1069 1070define <32 x i8> @test_mask_packus_epi16_rm_256(<16 x i16> %a, ptr %ptr_b) { 1071; X86-LABEL: test_mask_packus_epi16_rm_256: 1072; X86: # %bb.0: 1073; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1074; X86-NEXT: vpackuswb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0x00] 1075; X86-NEXT: retl # encoding: [0xc3] 1076; 1077; X64-LABEL: test_mask_packus_epi16_rm_256: 1078; X64: # %bb.0: 1079; X64-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0x07] 1080; X64-NEXT: retq # encoding: [0xc3] 1081 %b = load <16 x i16>, ptr %ptr_b 1082 %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b) 1083 ret <32 x i8> %1 1084} 1085 1086define <32 x i8> @test_mask_packus_epi16_rmk_256(<16 x i16> %a, ptr %ptr_b, <32 x i8> %passThru, i32 %mask) { 1087; X86-LABEL: test_mask_packus_epi16_rmk_256: 1088; X86: # %bb.0: 1089; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1090; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1091; X86-NEXT: vpackuswb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0x08] 1092; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1093; X86-NEXT: retl # encoding: [0xc3] 1094; 1095; X64-LABEL: test_mask_packus_epi16_rmk_256: 1096; X64: # %bb.0: 1097; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1098; X64-NEXT: vpackuswb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0x0f] 1099; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1100; X64-NEXT: retq # encoding: [0xc3] 1101 %b = load <16 x i16>, ptr %ptr_b 1102 %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b) 1103 %2 = bitcast i32 %mask to <32 x i1> 1104 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru 1105 ret <32 x i8> %3 1106} 1107 1108define <32 x i8> @test_mask_packus_epi16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i32 %mask) { 1109; X86-LABEL: test_mask_packus_epi16_rmkz_256: 1110; X86: # %bb.0: 1111; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1112; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1113; X86-NEXT: vpackuswb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0x00] 1114; X86-NEXT: retl # encoding: [0xc3] 1115; 1116; X64-LABEL: test_mask_packus_epi16_rmkz_256: 1117; X64: # %bb.0: 1118; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1119; X64-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0x07] 1120; X64-NEXT: retq # encoding: [0xc3] 1121 %b = load <16 x i16>, ptr %ptr_b 1122 %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b) 1123 %2 = bitcast i32 %mask to <32 x i1> 1124 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer 1125 ret <32 x i8> %3 1126} 1127 1128declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) 1129 1130define <8 x i16>@test_int_x86_avx512_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 1131; CHECK-LABEL: test_int_x86_avx512_vpermt2var_hi_128: 1132; CHECK: # %bb.0: 1133; CHECK-NEXT: vpermi2w %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x75,0xc2] 1134; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1135 %1 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x2) 1136 ret <8 x i16> %1 1137} 1138 1139define <8 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 1140; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_128: 1141; X86: # %bb.0: 1142; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1143; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1144; X86-NEXT: vpermt2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7d,0xca] 1145; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1146; X86-NEXT: retl # encoding: [0xc3] 1147; 1148; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_128: 1149; X64: # %bb.0: 1150; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1151; X64-NEXT: vpermt2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7d,0xca] 1152; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1153; X64-NEXT: retq # encoding: [0xc3] 1154 %1 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x2) 1155 %2 = bitcast i8 %x3 to <8 x i1> 1156 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x1 1157 ret <8 x i16> %3 1158} 1159 1160define <8 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 1161; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_128: 1162; X86: # %bb.0: 1163; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1164; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1165; X86-NEXT: vpermi2w %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x75,0xc2] 1166; X86-NEXT: retl # encoding: [0xc3] 1167; 1168; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_128: 1169; X64: # %bb.0: 1170; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1171; X64-NEXT: vpermi2w %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x75,0xc2] 1172; X64-NEXT: retq # encoding: [0xc3] 1173 %1 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x2) 1174 %2 = bitcast i8 %x3 to <8 x i1> 1175 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 1176 ret <8 x i16> %3 1177} 1178 1179define <16 x i16>@test_int_x86_avx512_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) { 1180; CHECK-LABEL: test_int_x86_avx512_vpermt2var_hi_256: 1181; CHECK: # %bb.0: 1182; CHECK-NEXT: vpermi2w %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x75,0xc2] 1183; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1184 %1 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x2) 1185 ret <16 x i16> %1 1186} 1187 1188define <16 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 1189; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_256: 1190; X86: # %bb.0: 1191; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1192; X86-NEXT: vpermt2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7d,0xca] 1193; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1194; X86-NEXT: retl # encoding: [0xc3] 1195; 1196; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_256: 1197; X64: # %bb.0: 1198; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1199; X64-NEXT: vpermt2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7d,0xca] 1200; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1201; X64-NEXT: retq # encoding: [0xc3] 1202 %1 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x2) 1203 %2 = bitcast i16 %x3 to <16 x i1> 1204 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x1 1205 ret <16 x i16> %3 1206} 1207 1208define <16 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 1209; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_256: 1210; X86: # %bb.0: 1211; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1212; X86-NEXT: vpermi2w %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x75,0xc2] 1213; X86-NEXT: retl # encoding: [0xc3] 1214; 1215; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_256: 1216; X64: # %bb.0: 1217; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1218; X64-NEXT: vpermi2w %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x75,0xc2] 1219; X64-NEXT: retq # encoding: [0xc3] 1220 %1 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x2) 1221 %2 = bitcast i16 %x3 to <16 x i1> 1222 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 1223 ret <16 x i16> %3 1224} 1225 1226declare <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>) 1227 1228define <8 x i16>@test_int_x86_avx512_vpermi2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 1229; CHECK-LABEL: test_int_x86_avx512_vpermi2var_hi_128: 1230; CHECK: # %bb.0: 1231; CHECK-NEXT: vpermt2w %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x7d,0xc2] 1232; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1233 %1 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) 1234 ret <8 x i16> %1 1235} 1236 1237define <8 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 1238; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128: 1239; X86: # %bb.0: 1240; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1241; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1242; X86-NEXT: vpermi2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x75,0xca] 1243; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1244; X86-NEXT: retl # encoding: [0xc3] 1245; 1246; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128: 1247; X64: # %bb.0: 1248; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1249; X64-NEXT: vpermi2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x75,0xca] 1250; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1251; X64-NEXT: retq # encoding: [0xc3] 1252 %1 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) 1253 %2 = bitcast i8 %x3 to <8 x i1> 1254 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x1 1255 ret <8 x i16> %3 1256} 1257 1258declare <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>) 1259 1260define <16 x i16>@test_int_x86_avx512_vpermi2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) { 1261; CHECK-LABEL: test_int_x86_avx512_vpermi2var_hi_256: 1262; CHECK: # %bb.0: 1263; CHECK-NEXT: vpermt2w %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x7d,0xc2] 1264; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1265 %1 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) 1266 ret <16 x i16> %1 1267} 1268 1269define <16 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 1270; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256: 1271; X86: # %bb.0: 1272; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1273; X86-NEXT: vpermi2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x75,0xca] 1274; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1275; X86-NEXT: retl # encoding: [0xc3] 1276; 1277; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256: 1278; X64: # %bb.0: 1279; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1280; X64-NEXT: vpermi2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x75,0xca] 1281; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1282; X64-NEXT: retq # encoding: [0xc3] 1283 %1 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) 1284 %2 = bitcast i16 %x3 to <16 x i1> 1285 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x1 1286 ret <16 x i16> %3 1287} 1288 1289declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) 1290 1291define <16 x i8> @test_int_x86_avx512_mask_pavg_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { 1292; X86-LABEL: test_int_x86_avx512_mask_pavg_b_128: 1293; X86: # %bb.0: 1294; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1295; X86-NEXT: vpavgb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe0,0xd1] 1296; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1297; X86-NEXT: retl # encoding: [0xc3] 1298; 1299; X64-LABEL: test_int_x86_avx512_mask_pavg_b_128: 1300; X64: # %bb.0: 1301; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1302; X64-NEXT: vpavgb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe0,0xd1] 1303; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1304; X64-NEXT: retq # encoding: [0xc3] 1305 %1 = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %x0, <16 x i8> %x1) 1306 %2 = bitcast i16 %x3 to <16 x i1> 1307 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %x2 1308 ret <16 x i8> %3 1309} 1310 1311declare <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8>, <32 x i8>) 1312 1313define <32 x i8> @test_int_x86_avx512_mask_pavg_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 1314; X86-LABEL: test_int_x86_avx512_mask_pavg_b_256: 1315; X86: # %bb.0: 1316; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1317; X86-NEXT: vpavgb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe0,0xd1] 1318; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1319; X86-NEXT: retl # encoding: [0xc3] 1320; 1321; X64-LABEL: test_int_x86_avx512_mask_pavg_b_256: 1322; X64: # %bb.0: 1323; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1324; X64-NEXT: vpavgb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe0,0xd1] 1325; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1326; X64-NEXT: retq # encoding: [0xc3] 1327 %1 = call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %x0, <32 x i8> %x1) 1328 %2 = bitcast i32 %x3 to <32 x i1> 1329 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %x2 1330 ret <32 x i8> %3 1331} 1332 1333declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) 1334 1335define <8 x i16> @test_int_x86_avx512_mask_pavg_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 1336; X86-LABEL: test_int_x86_avx512_mask_pavg_w_128: 1337; X86: # %bb.0: 1338; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1339; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1340; X86-NEXT: vpavgw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe3,0xd1] 1341; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1342; X86-NEXT: retl # encoding: [0xc3] 1343; 1344; X64-LABEL: test_int_x86_avx512_mask_pavg_w_128: 1345; X64: # %bb.0: 1346; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1347; X64-NEXT: vpavgw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe3,0xd1] 1348; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1349; X64-NEXT: retq # encoding: [0xc3] 1350 %1 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %x0, <8 x i16> %x1) 1351 %2 = bitcast i8 %x3 to <8 x i1> 1352 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2 1353 ret <8 x i16> %3 1354} 1355 1356declare <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16>, <16 x i16>) 1357 1358define <16 x i16> @test_int_x86_avx512_mask_pavg_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 1359; X86-LABEL: test_int_x86_avx512_mask_pavg_w_256: 1360; X86: # %bb.0: 1361; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1362; X86-NEXT: vpavgw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe3,0xd1] 1363; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1364; X86-NEXT: retl # encoding: [0xc3] 1365; 1366; X64-LABEL: test_int_x86_avx512_mask_pavg_w_256: 1367; X64: # %bb.0: 1368; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1369; X64-NEXT: vpavgw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe3,0xd1] 1370; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1371; X64-NEXT: retq # encoding: [0xc3] 1372 %1 = call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %x0, <16 x i16> %x1) 1373 %2 = bitcast i16 %x3 to <16 x i1> 1374 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2 1375 ret <16 x i16> %3 1376} 1377 1378declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) 1379 1380define <8 x i16> @test_int_x86_avx512_mask_pmulhu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 1381; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_128: 1382; X86: # %bb.0: 1383; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1384; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1385; X86-NEXT: vpmulhuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe4,0xd1] 1386; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1387; X86-NEXT: retl # encoding: [0xc3] 1388; 1389; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_128: 1390; X64: # %bb.0: 1391; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1392; X64-NEXT: vpmulhuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe4,0xd1] 1393; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1394; X64-NEXT: retq # encoding: [0xc3] 1395 %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %x0, <8 x i16> %x1) 1396 %2 = bitcast i8 %x3 to <8 x i1> 1397 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2 1398 ret <8 x i16> %3 1399} 1400 1401declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) 1402 1403define <16 x i16> @test_int_x86_avx512_mask_pmulhu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 1404; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_256: 1405; X86: # %bb.0: 1406; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1407; X86-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe4,0xd1] 1408; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1409; X86-NEXT: retl # encoding: [0xc3] 1410; 1411; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_256: 1412; X64: # %bb.0: 1413; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1414; X64-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe4,0xd1] 1415; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1416; X64-NEXT: retq # encoding: [0xc3] 1417 %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %x0, <16 x i16> %x1) 1418 %2 = bitcast i16 %x3 to <16 x i1> 1419 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2 1420 ret <16 x i16> %3 1421} 1422 1423declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) 1424 1425define <8 x i16> @test_int_x86_avx512_mask_pmulh_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 1426; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_128: 1427; X86: # %bb.0: 1428; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1429; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1430; X86-NEXT: vpmulhw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe5,0xd1] 1431; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1432; X86-NEXT: retl # encoding: [0xc3] 1433; 1434; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_128: 1435; X64: # %bb.0: 1436; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1437; X64-NEXT: vpmulhw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe5,0xd1] 1438; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1439; X64-NEXT: retq # encoding: [0xc3] 1440 %1 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %x0, <8 x i16> %x1) 1441 %2 = bitcast i8 %x3 to <8 x i1> 1442 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2 1443 ret <8 x i16> %3 1444} 1445 1446declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>) 1447 1448define <16 x i16> @test_int_x86_avx512_mask_pmulh_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 1449; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_256: 1450; X86: # %bb.0: 1451; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1452; X86-NEXT: vpmulhw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe5,0xd1] 1453; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1454; X86-NEXT: retl # encoding: [0xc3] 1455; 1456; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_256: 1457; X64: # %bb.0: 1458; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1459; X64-NEXT: vpmulhw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe5,0xd1] 1460; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1461; X64-NEXT: retq # encoding: [0xc3] 1462 %1 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %x0, <16 x i16> %x1) 1463 %2 = bitcast i16 %x3 to <16 x i1> 1464 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2 1465 ret <16 x i16> %3 1466} 1467 1468declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) 1469 1470define <8 x i16> @test_int_x86_avx512_mask_pmulhr_sw_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 1471; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_128: 1472; X86: # %bb.0: 1473; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1474; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1475; X86-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0b,0xd1] 1476; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1477; X86-NEXT: retl # encoding: [0xc3] 1478; 1479; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_128: 1480; X64: # %bb.0: 1481; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1482; X64-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0b,0xd1] 1483; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1484; X64-NEXT: retq # encoding: [0xc3] 1485 %1 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %x0, <8 x i16> %x1) 1486 %2 = bitcast i8 %x3 to <8 x i1> 1487 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2 1488 ret <8 x i16> %3 1489} 1490 1491declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) 1492 1493define <16 x i16> @test_int_x86_avx512_mask_pmulhr_sw_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 1494; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_256: 1495; X86: # %bb.0: 1496; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1497; X86-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0b,0xd1] 1498; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1499; X86-NEXT: retl # encoding: [0xc3] 1500; 1501; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_256: 1502; X64: # %bb.0: 1503; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1504; X64-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0b,0xd1] 1505; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1506; X64-NEXT: retq # encoding: [0xc3] 1507 %1 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %x0, <16 x i16> %x1) 1508 %2 = bitcast i16 %x3 to <16 x i1> 1509 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2 1510 ret <16 x i16> %3 1511} 1512 1513declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16>, <16 x i8>, i8) 1514 1515define { <16 x i8>, <16 x i8>, <16 x i8> } @test_int_x86_avx512_mask_pmov_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { 1516; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_128: 1517; X86: # %bb.0: 1518; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1519; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1520; X86-NEXT: vpmovwb %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7e,0x08,0x30,0xc3] 1521; X86-NEXT: vpmovwb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x30,0xc1] 1522; X86-NEXT: vpmovwb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x30,0xc2] 1523; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] 1524; X86-NEXT: retl # encoding: [0xc3] 1525; 1526; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_128: 1527; X64: # %bb.0: 1528; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1529; X64-NEXT: vpmovwb %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7e,0x08,0x30,0xc3] 1530; X64-NEXT: vpmovwb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x30,0xc1] 1531; X64-NEXT: vpmovwb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x30,0xc2] 1532; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] 1533; X64-NEXT: retq # encoding: [0xc3] 1534 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1) 1535 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) 1536 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2) 1537 %res3 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } poison, <16 x i8> %res0, 0 1538 %res4 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %res3, <16 x i8> %res1, 1 1539 %res5 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %res4, <16 x i8> %res2, 2 1540 ret { <16 x i8>, <16 x i8>, <16 x i8> } %res5 1541} 1542 1543declare void @llvm.x86.avx512.mask.pmov.wb.mem.128(ptr %ptr, <8 x i16>, i8) 1544 1545define void @test_int_x86_avx512_mask_pmov_wb_mem_128(ptr %ptr, <8 x i16> %x1, i8 %x2) { 1546; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_128: 1547; X86: # %bb.0: 1548; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 1549; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1550; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1551; X86-NEXT: vpmovwb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x30,0x00] 1552; X86-NEXT: vpmovwb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x30,0x00] 1553; X86-NEXT: retl # encoding: [0xc3] 1554; 1555; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_128: 1556; X64: # %bb.0: 1557; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1558; X64-NEXT: vpmovwb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x30,0x07] 1559; X64-NEXT: vpmovwb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x30,0x07] 1560; X64-NEXT: retq # encoding: [0xc3] 1561 call void @llvm.x86.avx512.mask.pmov.wb.mem.128(ptr %ptr, <8 x i16> %x1, i8 -1) 1562 call void @llvm.x86.avx512.mask.pmov.wb.mem.128(ptr %ptr, <8 x i16> %x1, i8 %x2) 1563 ret void 1564} 1565 1566declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16>, <16 x i8>, i8) 1567 1568define { <16 x i8>, <16 x i8>, <16 x i8> } @test_int_x86_avx512_mask_pmovs_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { 1569; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_128: 1570; X86: # %bb.0: 1571; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1572; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1573; X86-NEXT: vpmovswb %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7e,0x08,0x20,0xc3] 1574; X86-NEXT: vpmovswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x20,0xc1] 1575; X86-NEXT: vpmovswb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x20,0xc2] 1576; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] 1577; X86-NEXT: retl # encoding: [0xc3] 1578; 1579; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_128: 1580; X64: # %bb.0: 1581; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1582; X64-NEXT: vpmovswb %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7e,0x08,0x20,0xc3] 1583; X64-NEXT: vpmovswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x20,0xc1] 1584; X64-NEXT: vpmovswb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x20,0xc2] 1585; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] 1586; X64-NEXT: retq # encoding: [0xc3] 1587 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1) 1588 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) 1589 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2) 1590 %res3 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } poison, <16 x i8> %res0, 0 1591 %res4 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %res3, <16 x i8> %res1, 1 1592 %res5 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %res4, <16 x i8> %res2, 2 1593 ret { <16 x i8>, <16 x i8>, <16 x i8> } %res5 1594} 1595 1596declare void @llvm.x86.avx512.mask.pmovs.wb.mem.128(ptr %ptr, <8 x i16>, i8) 1597 1598define void @test_int_x86_avx512_mask_pmovs_wb_mem_128(ptr %ptr, <8 x i16> %x1, i8 %x2) { 1599; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_128: 1600; X86: # %bb.0: 1601; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 1602; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1603; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1604; X86-NEXT: vpmovswb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x20,0x00] 1605; X86-NEXT: vpmovswb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x20,0x00] 1606; X86-NEXT: retl # encoding: [0xc3] 1607; 1608; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_128: 1609; X64: # %bb.0: 1610; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1611; X64-NEXT: vpmovswb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x20,0x07] 1612; X64-NEXT: vpmovswb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x20,0x07] 1613; X64-NEXT: retq # encoding: [0xc3] 1614 call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(ptr %ptr, <8 x i16> %x1, i8 -1) 1615 call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(ptr %ptr, <8 x i16> %x1, i8 %x2) 1616 ret void 1617} 1618 1619declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16>, <16 x i8>, i8) 1620 1621define { <16 x i8>, <16 x i8>, <16 x i8> } @test_int_x86_avx512_mask_pmovus_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { 1622; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_128: 1623; X86: # %bb.0: 1624; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1625; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1626; X86-NEXT: vpmovuswb %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7e,0x08,0x10,0xc3] 1627; X86-NEXT: vpmovuswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x10,0xc1] 1628; X86-NEXT: vpmovuswb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x10,0xc2] 1629; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] 1630; X86-NEXT: retl # encoding: [0xc3] 1631; 1632; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_128: 1633; X64: # %bb.0: 1634; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1635; X64-NEXT: vpmovuswb %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7e,0x08,0x10,0xc3] 1636; X64-NEXT: vpmovuswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x10,0xc1] 1637; X64-NEXT: vpmovuswb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x10,0xc2] 1638; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] 1639; X64-NEXT: retq # encoding: [0xc3] 1640 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1) 1641 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) 1642 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2) 1643 %res3 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } poison, <16 x i8> %res0, 0 1644 %res4 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %res3, <16 x i8> %res1, 1 1645 %res5 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %res4, <16 x i8> %res2, 2 1646 ret { <16 x i8>, <16 x i8>, <16 x i8> } %res5 1647} 1648 1649declare void @llvm.x86.avx512.mask.pmovus.wb.mem.128(ptr %ptr, <8 x i16>, i8) 1650 1651define void @test_int_x86_avx512_mask_pmovus_wb_mem_128(ptr %ptr, <8 x i16> %x1, i8 %x2) { 1652; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_128: 1653; X86: # %bb.0: 1654; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 1655; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1656; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1657; X86-NEXT: vpmovuswb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x10,0x00] 1658; X86-NEXT: vpmovuswb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x10,0x00] 1659; X86-NEXT: retl # encoding: [0xc3] 1660; 1661; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_128: 1662; X64: # %bb.0: 1663; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1664; X64-NEXT: vpmovuswb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x10,0x07] 1665; X64-NEXT: vpmovuswb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x10,0x07] 1666; X64-NEXT: retq # encoding: [0xc3] 1667 call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(ptr %ptr, <8 x i16> %x1, i8 -1) 1668 call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(ptr %ptr, <8 x i16> %x1, i8 %x2) 1669 ret void 1670} 1671 1672define <16 x i8>@test_int_x86_avx512_pmov_wb_256(<16 x i16> %x0) { 1673; CHECK-LABEL: test_int_x86_avx512_pmov_wb_256: 1674; CHECK: # %bb.0: 1675; CHECK-NEXT: vpmovwb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x30,0xc0] 1676; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1677; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1678 %1 = trunc <16 x i16> %x0 to <16 x i8> 1679 ret <16 x i8> %1 1680} 1681 1682define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) { 1683; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_256: 1684; X86: # %bb.0: 1685; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1686; X86-NEXT: vpmovwb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0xc1] 1687; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1688; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1689; X86-NEXT: retl # encoding: [0xc3] 1690; 1691; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_256: 1692; X64: # %bb.0: 1693; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1694; X64-NEXT: vpmovwb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0xc1] 1695; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1696; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1697; X64-NEXT: retq # encoding: [0xc3] 1698 %1 = trunc <16 x i16> %x0 to <16 x i8> 1699 %2 = bitcast i16 %x2 to <16 x i1> 1700 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %x1 1701 ret <16 x i8> %3 1702} 1703 1704define <16 x i8>@test_int_x86_avx512_maskz_pmov_wb_256(<16 x i16> %x0, i16 %x2) { 1705; X86-LABEL: test_int_x86_avx512_maskz_pmov_wb_256: 1706; X86: # %bb.0: 1707; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1708; X86-NEXT: vpmovwb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x30,0xc0] 1709; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1710; X86-NEXT: retl # encoding: [0xc3] 1711; 1712; X64-LABEL: test_int_x86_avx512_maskz_pmov_wb_256: 1713; X64: # %bb.0: 1714; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1715; X64-NEXT: vpmovwb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x30,0xc0] 1716; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1717; X64-NEXT: retq # encoding: [0xc3] 1718 %1 = trunc <16 x i16> %x0 to <16 x i8> 1719 %2 = bitcast i16 %x2 to <16 x i1> 1720 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer 1721 ret <16 x i8> %3 1722} 1723 1724declare void @llvm.x86.avx512.mask.pmov.wb.mem.256(ptr %ptr, <16 x i16>, i16) 1725 1726define void @test_int_x86_avx512_mask_pmov_wb_mem_256(ptr %ptr, <16 x i16> %x1, i16 %x2) { 1727; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_256: 1728; X86: # %bb.0: 1729; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1730; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1731; X86-NEXT: vpmovwb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x30,0x00] 1732; X86-NEXT: vpmovwb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0x00] 1733; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1734; X86-NEXT: retl # encoding: [0xc3] 1735; 1736; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_256: 1737; X64: # %bb.0: 1738; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1739; X64-NEXT: vpmovwb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x30,0x07] 1740; X64-NEXT: vpmovwb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0x07] 1741; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1742; X64-NEXT: retq # encoding: [0xc3] 1743 call void @llvm.x86.avx512.mask.pmov.wb.mem.256(ptr %ptr, <16 x i16> %x1, i16 -1) 1744 call void @llvm.x86.avx512.mask.pmov.wb.mem.256(ptr %ptr, <16 x i16> %x1, i16 %x2) 1745 ret void 1746} 1747 1748declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16>, <16 x i8>, i16) 1749 1750define <16 x i8>@test_int_x86_avx512_pmovs_wb_256(<16 x i16> %x0, <16 x i8> %x1) { 1751; CHECK-LABEL: test_int_x86_avx512_pmovs_wb_256: 1752; CHECK: # %bb.0: 1753; CHECK-NEXT: vpmovswb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x20,0xc0] 1754; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1755; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1756 %res = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1) 1757 ret <16 x i8> %res 1758} 1759 1760define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) { 1761; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_256: 1762; X86: # %bb.0: 1763; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1764; X86-NEXT: vpmovswb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x20,0xc1] 1765; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1766; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1767; X86-NEXT: retl # encoding: [0xc3] 1768; 1769; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_256: 1770; X64: # %bb.0: 1771; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1772; X64-NEXT: vpmovswb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x20,0xc1] 1773; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1774; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1775; X64-NEXT: retq # encoding: [0xc3] 1776 %res = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) 1777 ret <16 x i8> %res 1778} 1779 1780define <16 x i8>@test_int_x86_avx512_maskz_pmovs_wb_256(<16 x i16> %x0, i16 %x2) { 1781; X86-LABEL: test_int_x86_avx512_maskz_pmovs_wb_256: 1782; X86: # %bb.0: 1783; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1784; X86-NEXT: vpmovswb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x20,0xc0] 1785; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1786; X86-NEXT: retl # encoding: [0xc3] 1787; 1788; X64-LABEL: test_int_x86_avx512_maskz_pmovs_wb_256: 1789; X64: # %bb.0: 1790; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1791; X64-NEXT: vpmovswb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x20,0xc0] 1792; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1793; X64-NEXT: retq # encoding: [0xc3] 1794 %res = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2) 1795 ret <16 x i8> %res 1796} 1797 1798declare void @llvm.x86.avx512.mask.pmovs.wb.mem.256(ptr %ptr, <16 x i16>, i16) 1799 1800define void @test_int_x86_avx512_mask_pmovs_wb_mem_256(ptr %ptr, <16 x i16> %x1, i16 %x2) { 1801; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_256: 1802; X86: # %bb.0: 1803; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1804; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1805; X86-NEXT: vpmovswb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x20,0x00] 1806; X86-NEXT: vpmovswb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x20,0x00] 1807; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1808; X86-NEXT: retl # encoding: [0xc3] 1809; 1810; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_256: 1811; X64: # %bb.0: 1812; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1813; X64-NEXT: vpmovswb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x20,0x07] 1814; X64-NEXT: vpmovswb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x20,0x07] 1815; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1816; X64-NEXT: retq # encoding: [0xc3] 1817 call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(ptr %ptr, <16 x i16> %x1, i16 -1) 1818 call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(ptr %ptr, <16 x i16> %x1, i16 %x2) 1819 ret void 1820} 1821 1822declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16>, <16 x i8>, i16) 1823 1824define <16 x i8>@test_int_x86_avx512_pmovus_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) { 1825; CHECK-LABEL: test_int_x86_avx512_pmovus_wb_256: 1826; CHECK: # %bb.0: 1827; CHECK-NEXT: vpmovuswb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x10,0xc0] 1828; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1829; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1830 %res = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1) 1831 ret <16 x i8> %res 1832} 1833 1834define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) { 1835; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_256: 1836; X86: # %bb.0: 1837; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1838; X86-NEXT: vpmovuswb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x10,0xc1] 1839; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1840; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1841; X86-NEXT: retl # encoding: [0xc3] 1842; 1843; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_256: 1844; X64: # %bb.0: 1845; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1846; X64-NEXT: vpmovuswb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x10,0xc1] 1847; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1848; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1849; X64-NEXT: retq # encoding: [0xc3] 1850 %res = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) 1851 ret <16 x i8> %res 1852} 1853 1854define <16 x i8>@test_int_x86_avx512_maskz_pmovus_wb_256(<16 x i16> %x0, i16 %x2) { 1855; X86-LABEL: test_int_x86_avx512_maskz_pmovus_wb_256: 1856; X86: # %bb.0: 1857; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1858; X86-NEXT: vpmovuswb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x10,0xc0] 1859; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1860; X86-NEXT: retl # encoding: [0xc3] 1861; 1862; X64-LABEL: test_int_x86_avx512_maskz_pmovus_wb_256: 1863; X64: # %bb.0: 1864; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1865; X64-NEXT: vpmovuswb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x10,0xc0] 1866; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1867; X64-NEXT: retq # encoding: [0xc3] 1868 %res = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2) 1869 ret <16 x i8> %res 1870} 1871 1872declare void @llvm.x86.avx512.mask.pmovus.wb.mem.256(ptr %ptr, <16 x i16>, i16) 1873 1874define void @test_int_x86_avx512_mask_pmovus_wb_mem_256(ptr %ptr, <16 x i16> %x1, i16 %x2) { 1875; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_256: 1876; X86: # %bb.0: 1877; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1878; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1879; X86-NEXT: vpmovuswb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x10,0x00] 1880; X86-NEXT: vpmovuswb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x10,0x00] 1881; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1882; X86-NEXT: retl # encoding: [0xc3] 1883; 1884; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_256: 1885; X64: # %bb.0: 1886; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1887; X64-NEXT: vpmovuswb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x10,0x07] 1888; X64-NEXT: vpmovuswb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x10,0x07] 1889; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1890; X64-NEXT: retq # encoding: [0xc3] 1891 call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(ptr %ptr, <16 x i16> %x1, i16 -1) 1892 call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(ptr %ptr, <16 x i16> %x1, i16 %x2) 1893 ret void 1894} 1895 1896declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) 1897 1898define <4 x i32> @test_int_x86_avx512_mask_pmaddw_d_128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) { 1899; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_128: 1900; X86: # %bb.0: 1901; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1902; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1903; X86-NEXT: vpmaddwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf5,0xd1] 1904; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1905; X86-NEXT: retl # encoding: [0xc3] 1906; 1907; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_128: 1908; X64: # %bb.0: 1909; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1910; X64-NEXT: vpmaddwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf5,0xd1] 1911; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1912; X64-NEXT: retq # encoding: [0xc3] 1913 %1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %x0, <8 x i16> %x1) 1914 %2 = bitcast i8 %x3 to <8 x i1> 1915 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1916 %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x2 1917 ret <4 x i32> %3 1918} 1919 1920declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) 1921 1922define <8 x i32> @test_int_x86_avx512_mask_pmaddw_d_256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3) { 1923; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_256: 1924; X86: # %bb.0: 1925; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1926; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1927; X86-NEXT: vpmaddwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf5,0xd1] 1928; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1929; X86-NEXT: retl # encoding: [0xc3] 1930; 1931; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_256: 1932; X64: # %bb.0: 1933; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1934; X64-NEXT: vpmaddwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf5,0xd1] 1935; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1936; X64-NEXT: retq # encoding: [0xc3] 1937 %1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %x0, <16 x i16> %x1) 1938 %2 = bitcast i8 %x3 to <8 x i1> 1939 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x2 1940 ret <8 x i32> %3 1941} 1942 1943declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) 1944 1945define <8 x i16> @test_int_x86_avx512_mask_pmaddubs_w_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3) { 1946; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128: 1947; X86: # %bb.0: 1948; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1949; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1950; X86-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x04,0xd1] 1951; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1952; X86-NEXT: retl # encoding: [0xc3] 1953; 1954; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128: 1955; X64: # %bb.0: 1956; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1957; X64-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x04,0xd1] 1958; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1959; X64-NEXT: retq # encoding: [0xc3] 1960 %1 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %x0, <16 x i8> %x1) 1961 %2 = bitcast i8 %x3 to <8 x i1> 1962 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2 1963 ret <8 x i16> %3 1964} 1965 1966declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) 1967 1968define <16 x i16> @test_int_x86_avx512_mask_pmaddubs_w_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3) { 1969; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256: 1970; X86: # %bb.0: 1971; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1972; X86-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x04,0xd1] 1973; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1974; X86-NEXT: retl # encoding: [0xc3] 1975; 1976; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256: 1977; X64: # %bb.0: 1978; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1979; X64-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x04,0xd1] 1980; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1981; X64-NEXT: retq # encoding: [0xc3] 1982 %1 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %x0, <32 x i8> %x1) 1983 %2 = bitcast i16 %x3 to <16 x i1> 1984 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2 1985 ret <16 x i16> %3 1986} 1987 1988declare <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8>, <16 x i8>, i32) 1989 1990define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_dbpsadbw_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x3, i8 %x4) { 1991; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_128: 1992; X86: # %bb.0: 1993; X86-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2] 1994; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1995; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1996; X86-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xe1,0x02] 1997; X86-NEXT: vdbpsadbw $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x03] 1998; X86-NEXT: vdbpsadbw $4, %xmm1, %xmm0, %xmm2 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xd1,0x04] 1999; X86-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4] 2000; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 2001; X86-NEXT: retl # encoding: [0xc3] 2002; 2003; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_128: 2004; X64: # %bb.0: 2005; X64-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2] 2006; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2007; X64-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xe1,0x02] 2008; X64-NEXT: vdbpsadbw $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x03] 2009; X64-NEXT: vdbpsadbw $4, %xmm1, %xmm0, %xmm2 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xd1,0x04] 2010; X64-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4] 2011; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 2012; X64-NEXT: retq # encoding: [0xc3] 2013 %1 = call <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2) 2014 %2 = bitcast i8 %x4 to <8 x i1> 2015 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x3 2016 %4 = call <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 3) 2017 %5 = bitcast i8 %x4 to <8 x i1> 2018 %6 = select <8 x i1> %5, <8 x i16> %4, <8 x i16> zeroinitializer 2019 %7 = call <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 4) 2020 %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %3, 0 2021 %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res3, <8 x i16> %6, 1 2022 %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res4, <8 x i16> %7, 2 2023 ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5 2024} 2025 2026declare <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8>, <32 x i8>, i32) 2027 2028define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x3, i16 %x4) { 2029; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_256: 2030; X86: # %bb.0: 2031; X86-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2] 2032; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2033; X86-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xe1,0x02] 2034; X86-NEXT: vdbpsadbw $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x03] 2035; X86-NEXT: vdbpsadbw $4, %ymm1, %ymm0, %ymm2 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xd1,0x04] 2036; X86-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4] 2037; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 2038; X86-NEXT: retl # encoding: [0xc3] 2039; 2040; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_256: 2041; X64: # %bb.0: 2042; X64-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2] 2043; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2044; X64-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xe1,0x02] 2045; X64-NEXT: vdbpsadbw $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x03] 2046; X64-NEXT: vdbpsadbw $4, %ymm1, %ymm0, %ymm2 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xd1,0x04] 2047; X64-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4] 2048; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 2049; X64-NEXT: retq # encoding: [0xc3] 2050 %1 = call <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2) 2051 %2 = bitcast i16 %x4 to <16 x i1> 2052 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x3 2053 %4 = call <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 3) 2054 %5 = bitcast i16 %x4 to <16 x i1> 2055 %6 = select <16 x i1> %5, <16 x i16> %4, <16 x i16> zeroinitializer 2056 %7 = call <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 4) 2057 %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %3, 0 2058 %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res3, <16 x i16> %6, 1 2059 %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res4, <16 x i16> %7, 2 2060 ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5 2061} 2062 2063declare <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16) 2064 2065define <16 x i16>@test_int_x86_avx512_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) { 2066; CHECK-LABEL: test_int_x86_avx512_psrlv16_hi: 2067; CHECK: # %bb.0: 2068; CHECK-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x10,0xc1] 2069; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2070 %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 2071 ret <16 x i16> %res 2072} 2073 2074define <16 x i16>@test_int_x86_avx512_mask_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 2075; X86-LABEL: test_int_x86_avx512_mask_psrlv16_hi: 2076; X86: # %bb.0: 2077; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2078; X86-NEXT: vpsrlvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x10,0xd1] 2079; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2080; X86-NEXT: retl # encoding: [0xc3] 2081; 2082; X64-LABEL: test_int_x86_avx512_mask_psrlv16_hi: 2083; X64: # %bb.0: 2084; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2085; X64-NEXT: vpsrlvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x10,0xd1] 2086; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2087; X64-NEXT: retq # encoding: [0xc3] 2088 %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 2089 ret <16 x i16> %res 2090} 2091 2092define <16 x i16>@test_int_x86_avx512_maskz_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) { 2093; X86-LABEL: test_int_x86_avx512_maskz_psrlv16_hi: 2094; X86: # %bb.0: 2095; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2096; X86-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x10,0xc1] 2097; X86-NEXT: retl # encoding: [0xc3] 2098; 2099; X64-LABEL: test_int_x86_avx512_maskz_psrlv16_hi: 2100; X64: # %bb.0: 2101; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2102; X64-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x10,0xc1] 2103; X64-NEXT: retq # encoding: [0xc3] 2104 %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) 2105 ret <16 x i16> %res 2106} 2107 2108declare <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8) 2109 2110define <8 x i16>@test_int_x86_avx512_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 2111; CHECK-LABEL: test_int_x86_avx512_psrlv8_hi: 2112; CHECK: # %bb.0: 2113; CHECK-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x10,0xc1] 2114; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2115 %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 2116 ret <8 x i16> %res 2117} 2118 2119define <8 x i16>@test_int_x86_avx512_mask_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 2120; X86-LABEL: test_int_x86_avx512_mask_psrlv8_hi: 2121; X86: # %bb.0: 2122; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2123; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2124; X86-NEXT: vpsrlvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x10,0xd1] 2125; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2126; X86-NEXT: retl # encoding: [0xc3] 2127; 2128; X64-LABEL: test_int_x86_avx512_mask_psrlv8_hi: 2129; X64: # %bb.0: 2130; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2131; X64-NEXT: vpsrlvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x10,0xd1] 2132; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2133; X64-NEXT: retq # encoding: [0xc3] 2134 %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 2135 ret <8 x i16> %res 2136} 2137 2138define <8 x i16>@test_int_x86_avx512_maskz_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) { 2139; X86-LABEL: test_int_x86_avx512_maskz_psrlv8_hi: 2140; X86: # %bb.0: 2141; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2142; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2143; X86-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x10,0xc1] 2144; X86-NEXT: retl # encoding: [0xc3] 2145; 2146; X64-LABEL: test_int_x86_avx512_maskz_psrlv8_hi: 2147; X64: # %bb.0: 2148; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2149; X64-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x10,0xc1] 2150; X64-NEXT: retq # encoding: [0xc3] 2151 %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) 2152 ret <8 x i16> %res 2153} 2154 2155 2156define <8 x i16> @test_int_x86_avx512_psrlv_w_128_const() optsize { 2157; X86-LABEL: test_int_x86_avx512_psrlv_w_128_const: 2158; X86: # %bb.0: 2159; X86-NEXT: vpmovsxbw {{.*#+}} xmm0 = [4,4,4,4,4,4,4,65535] 2160; X86-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x20,0x05,A,A,A,A] 2161; X86-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 2162; X86-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x10,0x05,A,A,A,A] 2163; X86-NEXT: # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 2164; X86-NEXT: retl # encoding: [0xc3] 2165; 2166; X64-LABEL: test_int_x86_avx512_psrlv_w_128_const: 2167; X64: # %bb.0: 2168; X64-NEXT: vpmovsxbw {{.*#+}} xmm0 = [4,4,4,4,4,4,4,65535] 2169; X64-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x20,0x05,A,A,A,A] 2170; X64-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 2171; X64-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x10,0x05,A,A,A,A] 2172; X64-NEXT: # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 2173; X64-NEXT: retq # encoding: [0xc3] 2174 %res = call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>) 2175 ret <8 x i16> %res 2176} 2177 2178declare <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16>, <8 x i16>) 2179 2180define <16 x i16> @test_int_x86_avx512_psrlv_w_256_const() optsize { 2181; X86-LABEL: test_int_x86_avx512_psrlv_w_256_const: 2182; X86: # %bb.0: 2183; X86-NEXT: vpmovsxbw {{.*#+}} ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] 2184; X86-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x20,0x05,A,A,A,A] 2185; X86-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 2186; X86-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x10,0x05,A,A,A,A] 2187; X86-NEXT: # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 2188; X86-NEXT: retl # encoding: [0xc3] 2189; 2190; X64-LABEL: test_int_x86_avx512_psrlv_w_256_const: 2191; X64: # %bb.0: 2192; X64-NEXT: vpmovsxbw {{.*#+}} ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] 2193; X64-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x20,0x05,A,A,A,A] 2194; X64-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 2195; X64-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x10,0x05,A,A,A,A] 2196; X64-NEXT: # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 2197; X64-NEXT: retq # encoding: [0xc3] 2198 %res = call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>) 2199 ret <16 x i16> %res 2200} 2201 2202declare <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16>, <16 x i16>) 2203 2204declare <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16>, <16 x i16>) 2205 2206define <16 x i16>@test_int_x86_avx512_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1) { 2207; CHECK-LABEL: test_int_x86_avx512_psrav16_hi: 2208; CHECK: # %bb.0: 2209; CHECK-NEXT: vpsravw %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x11,0xc1] 2210; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2211 %1 = call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %x0, <16 x i16> %x1) 2212 ret <16 x i16> %1 2213} 2214 2215define <16 x i16>@test_int_x86_avx512_mask_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 2216; X86-LABEL: test_int_x86_avx512_mask_psrav16_hi: 2217; X86: # %bb.0: 2218; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2219; X86-NEXT: vpsravw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x11,0xd1] 2220; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2221; X86-NEXT: retl # encoding: [0xc3] 2222; 2223; X64-LABEL: test_int_x86_avx512_mask_psrav16_hi: 2224; X64: # %bb.0: 2225; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2226; X64-NEXT: vpsravw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x11,0xd1] 2227; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2228; X64-NEXT: retq # encoding: [0xc3] 2229 %1 = call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %x0, <16 x i16> %x1) 2230 %2 = bitcast i16 %x3 to <16 x i1> 2231 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2 2232 ret <16 x i16> %3 2233} 2234 2235define <16 x i16>@test_int_x86_avx512_maskz_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) { 2236; X86-LABEL: test_int_x86_avx512_maskz_psrav16_hi: 2237; X86: # %bb.0: 2238; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2239; X86-NEXT: vpsravw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x11,0xc1] 2240; X86-NEXT: retl # encoding: [0xc3] 2241; 2242; X64-LABEL: test_int_x86_avx512_maskz_psrav16_hi: 2243; X64: # %bb.0: 2244; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2245; X64-NEXT: vpsravw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x11,0xc1] 2246; X64-NEXT: retq # encoding: [0xc3] 2247 %1 = call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %x0, <16 x i16> %x1) 2248 %2 = bitcast i16 %x3 to <16 x i1> 2249 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 2250 ret <16 x i16> %3 2251} 2252 2253declare <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16>, <8 x i16>) 2254 2255define <8 x i16>@test_int_x86_avx512_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1) { 2256; CHECK-LABEL: test_int_x86_avx512_psrav8_hi: 2257; CHECK: # %bb.0: 2258; CHECK-NEXT: vpsravw %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x11,0xc1] 2259; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2260 %1 = call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %x0, <8 x i16> %x1) 2261 ret <8 x i16> %1 2262} 2263 2264define <8 x i16>@test_int_x86_avx512_mask_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 2265; X86-LABEL: test_int_x86_avx512_mask_psrav8_hi: 2266; X86: # %bb.0: 2267; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2268; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2269; X86-NEXT: vpsravw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x11,0xd1] 2270; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2271; X86-NEXT: retl # encoding: [0xc3] 2272; 2273; X64-LABEL: test_int_x86_avx512_mask_psrav8_hi: 2274; X64: # %bb.0: 2275; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2276; X64-NEXT: vpsravw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x11,0xd1] 2277; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2278; X64-NEXT: retq # encoding: [0xc3] 2279 %1 = call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %x0, <8 x i16> %x1) 2280 %2 = bitcast i8 %x3 to <8 x i1> 2281 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2 2282 ret <8 x i16> %3 2283} 2284 2285define <8 x i16>@test_int_x86_avx512_maskz_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) { 2286; X86-LABEL: test_int_x86_avx512_maskz_psrav8_hi: 2287; X86: # %bb.0: 2288; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2289; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2290; X86-NEXT: vpsravw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x11,0xc1] 2291; X86-NEXT: retl # encoding: [0xc3] 2292; 2293; X64-LABEL: test_int_x86_avx512_maskz_psrav8_hi: 2294; X64: # %bb.0: 2295; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2296; X64-NEXT: vpsravw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x11,0xc1] 2297; X64-NEXT: retq # encoding: [0xc3] 2298 %1 = call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %x0, <8 x i16> %x1) 2299 %2 = bitcast i8 %x3 to <8 x i1> 2300 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 2301 ret <8 x i16> %3 2302} 2303 2304define <16 x i16>@test_int_x86_avx512_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1) { 2305; CHECK-LABEL: test_int_x86_avx512_psllv16_hi: 2306; CHECK: # %bb.0: 2307; CHECK-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x12,0xc1] 2308; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2309 %1 = call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %x0, <16 x i16> %x1) 2310 ret <16 x i16> %1 2311} 2312 2313define <16 x i16>@test_int_x86_avx512_mask_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 2314; X86-LABEL: test_int_x86_avx512_mask_psllv16_hi: 2315; X86: # %bb.0: 2316; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2317; X86-NEXT: vpsllvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x12,0xd1] 2318; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2319; X86-NEXT: retl # encoding: [0xc3] 2320; 2321; X64-LABEL: test_int_x86_avx512_mask_psllv16_hi: 2322; X64: # %bb.0: 2323; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2324; X64-NEXT: vpsllvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x12,0xd1] 2325; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2326; X64-NEXT: retq # encoding: [0xc3] 2327 %1 = call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %x0, <16 x i16> %x1) 2328 %2 = bitcast i16 %x3 to <16 x i1> 2329 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2 2330 ret <16 x i16> %3 2331} 2332 2333define <16 x i16>@test_int_x86_avx512_maskz_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) { 2334; X86-LABEL: test_int_x86_avx512_maskz_psllv16_hi: 2335; X86: # %bb.0: 2336; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2337; X86-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x12,0xc1] 2338; X86-NEXT: retl # encoding: [0xc3] 2339; 2340; X64-LABEL: test_int_x86_avx512_maskz_psllv16_hi: 2341; X64: # %bb.0: 2342; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2343; X64-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x12,0xc1] 2344; X64-NEXT: retq # encoding: [0xc3] 2345 %1 = call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %x0, <16 x i16> %x1) 2346 %2 = bitcast i16 %x3 to <16 x i1> 2347 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 2348 ret <16 x i16> %3 2349} 2350 2351define <8 x i16>@test_int_x86_avx512_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1) { 2352; CHECK-LABEL: test_int_x86_avx512_psllv8_hi: 2353; CHECK: # %bb.0: 2354; CHECK-NEXT: vpsllvw %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x12,0xc1] 2355; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2356 %1 = call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %x0, <8 x i16> %x1) 2357 ret <8 x i16> %1 2358} 2359 2360define <8 x i16>@test_int_x86_avx512_mask_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 2361; X86-LABEL: test_int_x86_avx512_mask_psllv8_hi: 2362; X86: # %bb.0: 2363; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2364; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2365; X86-NEXT: vpsllvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x12,0xd1] 2366; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2367; X86-NEXT: retl # encoding: [0xc3] 2368; 2369; X64-LABEL: test_int_x86_avx512_mask_psllv8_hi: 2370; X64: # %bb.0: 2371; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2372; X64-NEXT: vpsllvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x12,0xd1] 2373; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2374; X64-NEXT: retq # encoding: [0xc3] 2375 %1 = call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %x0, <8 x i16> %x1) 2376 %2 = bitcast i8 %x3 to <8 x i1> 2377 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2 2378 ret <8 x i16> %3 2379} 2380 2381define <8 x i16>@test_int_x86_avx512_maskz_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) { 2382; X86-LABEL: test_int_x86_avx512_maskz_psllv8_hi: 2383; X86: # %bb.0: 2384; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2385; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2386; X86-NEXT: vpsllvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x12,0xc1] 2387; X86-NEXT: retl # encoding: [0xc3] 2388; 2389; X64-LABEL: test_int_x86_avx512_maskz_psllv8_hi: 2390; X64: # %bb.0: 2391; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2392; X64-NEXT: vpsllvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x12,0xc1] 2393; X64-NEXT: retq # encoding: [0xc3] 2394 %1 = call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %x0, <8 x i16> %x1) 2395 %2 = bitcast i8 %x3 to <8 x i1> 2396 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 2397 ret <8 x i16> %3 2398} 2399 2400define <8 x i16> @test_int_x86_avx512_psllv_w_128_const() optsize { 2401; X86-LABEL: test_int_x86_avx512_psllv_w_128_const: 2402; X86: # %bb.0: 2403; X86-NEXT: vpmovsxbw {{.*#+}} xmm0 = [4,4,4,4,4,4,4,65535] 2404; X86-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x20,0x05,A,A,A,A] 2405; X86-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 2406; X86-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x12,0x05,A,A,A,A] 2407; X86-NEXT: # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 2408; X86-NEXT: retl # encoding: [0xc3] 2409; 2410; X64-LABEL: test_int_x86_avx512_psllv_w_128_const: 2411; X64: # %bb.0: 2412; X64-NEXT: vpmovsxbw {{.*#+}} xmm0 = [4,4,4,4,4,4,4,65535] 2413; X64-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x20,0x05,A,A,A,A] 2414; X64-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 2415; X64-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x12,0x05,A,A,A,A] 2416; X64-NEXT: # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 2417; X64-NEXT: retq # encoding: [0xc3] 2418 %res = call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>) 2419 ret <8 x i16> %res 2420} 2421 2422declare <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16>, <8 x i16>) 2423 2424 2425define <16 x i16> @test_int_x86_avx512_psllv_w_256_const() optsize { 2426; X86-LABEL: test_int_x86_avx512_psllv_w_256_const: 2427; X86: # %bb.0: 2428; X86-NEXT: vpmovsxbw {{.*#+}} ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] 2429; X86-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x20,0x05,A,A,A,A] 2430; X86-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 2431; X86-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x12,0x05,A,A,A,A] 2432; X86-NEXT: # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 2433; X86-NEXT: retl # encoding: [0xc3] 2434; 2435; X64-LABEL: test_int_x86_avx512_psllv_w_256_const: 2436; X64: # %bb.0: 2437; X64-NEXT: vpmovsxbw {{.*#+}} ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] 2438; X64-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x20,0x05,A,A,A,A] 2439; X64-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 2440; X64-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x12,0x05,A,A,A,A] 2441; X64-NEXT: # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 2442; X64-NEXT: retq # encoding: [0xc3] 2443 %res = call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>) 2444 ret <16 x i16> %res 2445} 2446 2447declare <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16>, <16 x i16>) 2448 2449 2450 2451declare <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16>, <8 x i16>) 2452 2453define <8 x i16>@test_int_x86_avx512_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1) { 2454; CHECK-LABEL: test_int_x86_avx512_permvar_hi_128: 2455; CHECK: # %bb.0: 2456; CHECK-NEXT: vpermw %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x8d,0xc0] 2457; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2458 %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1) 2459 ret <8 x i16> %1 2460} 2461 2462define <8 x i16>@test_int_x86_avx512_mask_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 2463; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_128: 2464; X86: # %bb.0: 2465; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2466; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2467; X86-NEXT: vpermw %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x8d,0xd0] 2468; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2469; X86-NEXT: retl # encoding: [0xc3] 2470; 2471; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_128: 2472; X64: # %bb.0: 2473; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2474; X64-NEXT: vpermw %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x8d,0xd0] 2475; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2476; X64-NEXT: retq # encoding: [0xc3] 2477 %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1) 2478 %2 = bitcast i8 %x3 to <8 x i1> 2479 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2 2480 ret <8 x i16> %3 2481} 2482 2483define <8 x i16>@test_int_x86_avx512_maskz_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) { 2484; X86-LABEL: test_int_x86_avx512_maskz_permvar_hi_128: 2485; X86: # %bb.0: 2486; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2487; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2488; X86-NEXT: vpermw %xmm0, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xc0] 2489; X86-NEXT: retl # encoding: [0xc3] 2490; 2491; X64-LABEL: test_int_x86_avx512_maskz_permvar_hi_128: 2492; X64: # %bb.0: 2493; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2494; X64-NEXT: vpermw %xmm0, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xc0] 2495; X64-NEXT: retq # encoding: [0xc3] 2496 %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1) 2497 %2 = bitcast i8 %x3 to <8 x i1> 2498 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 2499 ret <8 x i16> %3 2500} 2501 2502declare <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16>, <16 x i16>) 2503 2504define <16 x i16>@test_int_x86_avx512_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1) { 2505; CHECK-LABEL: test_int_x86_avx512_permvar_hi_256: 2506; CHECK: # %bb.0: 2507; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x8d,0xc0] 2508; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2509 %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1) 2510 ret <16 x i16> %1 2511} 2512 2513define <16 x i16>@test_int_x86_avx512_mask_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 2514; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_256: 2515; X86: # %bb.0: 2516; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2517; X86-NEXT: vpermw %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x8d,0xd0] 2518; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2519; X86-NEXT: retl # encoding: [0xc3] 2520; 2521; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_256: 2522; X64: # %bb.0: 2523; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2524; X64-NEXT: vpermw %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x8d,0xd0] 2525; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2526; X64-NEXT: retq # encoding: [0xc3] 2527 %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1) 2528 %2 = bitcast i16 %x3 to <16 x i1> 2529 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2 2530 ret <16 x i16> %3 2531} 2532 2533define <16 x i16>@test_int_x86_avx512_maskz_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) { 2534; X86-LABEL: test_int_x86_avx512_maskz_permvar_hi_256: 2535; X86: # %bb.0: 2536; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2537; X86-NEXT: vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xc0] 2538; X86-NEXT: retl # encoding: [0xc3] 2539; 2540; X64-LABEL: test_int_x86_avx512_maskz_permvar_hi_256: 2541; X64: # %bb.0: 2542; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2543; X64-NEXT: vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xc0] 2544; X64-NEXT: retq # encoding: [0xc3] 2545 %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1) 2546 %2 = bitcast i16 %x3 to <16 x i1> 2547 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 2548 ret <16 x i16> %3 2549} 2550