1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512bw,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5declare <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8, <16 x i8>, i16) 6 7define { <16 x i8>, <16 x i8>, <16 x i8> } @test_int_x86_avx512_mask_pbroadcast_b_gpr_128(i8 %x0, <16 x i8> %x1, i16 %mask) { 8; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_128: 9; X86: # %bb.0: 10; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x78,0x5c,0x24,0x04] 11; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 12; X86-NEXT: vpblendmb %xmm3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x66,0xcb] 13; X86-NEXT: vmovdqu8 %xmm3, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x6f,0xd3] 14; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] 15; X86-NEXT: retl # encoding: [0xc3] 16; 17; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_128: 18; X64: # %bb.0: 19; X64-NEXT: vmovdqa %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc8] 20; X64-NEXT: vpbroadcastb %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xc7] 21; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 22; X64-NEXT: vpbroadcastb %edi, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7a,0xcf] 23; X64-NEXT: vpbroadcastb %edi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7a,0xd7] 24; X64-NEXT: retq # encoding: [0xc3] 25 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 -1) 26 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 %mask) 27 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> zeroinitializer, i16 %mask) 28 %res3 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } poison, <16 x i8> %res0, 0 29 %res4 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %res3, <16 x i8> %res1, 1 30 %res5 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %res4, <16 x i8> %res2, 2 31 ret { <16 x i8>, <16 x i8>, <16 x i8> } %res5 32} 33 34 35declare <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16, <8 x i16>, i8) 36 37define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_pbroadcast_w_gpr_128(i16 %x0, <8 x i16> %x1, i8 %mask) { 38; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_128: 39; X86: # %bb.0: 40; X86-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x79,0x5c,0x24,0x04] 41; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 42; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 43; X86-NEXT: vpblendmw %xmm3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x66,0xcb] 44; X86-NEXT: vmovdqu16 %xmm3, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x6f,0xd3] 45; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] 46; X86-NEXT: retl # encoding: [0xc3] 47; 48; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_128: 49; X64: # %bb.0: 50; X64-NEXT: vmovdqa %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc8] 51; X64-NEXT: vpbroadcastw %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xc7] 52; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 53; X64-NEXT: vpbroadcastw %edi, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7b,0xcf] 54; X64-NEXT: vpbroadcastw %edi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7b,0xd7] 55; X64-NEXT: retq # encoding: [0xc3] 56 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 -1) 57 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 %mask) 58 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> zeroinitializer, i8 %mask) 59 %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0 60 %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res3, <8 x i16> %res1, 1 61 %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res4, <8 x i16> %res2, 2 62 ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5 63} 64 65 66declare <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8, <32 x i8>, i32) 67 68define { <32 x i8>, <32 x i8>, <32 x i8> } @test_int_x86_avx512_mask_pbroadcast_b_gpr_256(i8 %x0, <32 x i8> %x1, i32 %mask) { 69; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_256: 70; X86: # %bb.0: 71; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x78,0x5c,0x24,0x04] 72; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 73; X86-NEXT: vpblendmb %ymm3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x66,0xcb] 74; X86-NEXT: vmovdqu8 %ymm3, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0xd3] 75; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] 76; X86-NEXT: retl # encoding: [0xc3] 77; 78; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_256: 79; X64: # %bb.0: 80; X64-NEXT: vmovdqa %ymm0, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc8] 81; X64-NEXT: vpbroadcastb %edi, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x7a,0xc7] 82; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 83; X64-NEXT: vpbroadcastb %edi, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7a,0xcf] 84; X64-NEXT: vpbroadcastb %edi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7a,0xd7] 85; X64-NEXT: retq # encoding: [0xc3] 86 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 -1) 87 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 %mask) 88 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> zeroinitializer, i32 %mask) 89 %res3 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } poison, <32 x i8> %res0, 0 90 %res4 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } %res3, <32 x i8> %res1, 1 91 %res5 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } %res4, <32 x i8> %res2, 2 92 ret { <32 x i8>, <32 x i8>, <32 x i8> } %res5 93} 94 95 96 97declare <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16, <16 x i16>, i16) 98 99define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_pbroadcast_w_gpr_256(i16 %x0, <16 x i16> %x1, i16 %mask) { 100; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_256: 101; X86: # %bb.0: 102; X86-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x79,0x5c,0x24,0x04] 103; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 104; X86-NEXT: vpblendmw %ymm3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x66,0xcb] 105; X86-NEXT: vmovdqu16 %ymm3, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x6f,0xd3] 106; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] 107; X86-NEXT: retl # encoding: [0xc3] 108; 109; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_256: 110; X64: # %bb.0: 111; X64-NEXT: vmovdqa %ymm0, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc8] 112; X64-NEXT: vpbroadcastw %edi, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x7b,0xc7] 113; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 114; X64-NEXT: vpbroadcastw %edi, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7b,0xcf] 115; X64-NEXT: vpbroadcastw %edi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7b,0xd7] 116; X64-NEXT: retq # encoding: [0xc3] 117 %res = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 -1) 118 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 %mask) 119 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> zeroinitializer, i16 %mask) 120 %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res, 0 121 %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res3, <16 x i16> %res1, 1 122 %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res4, <16 x i16> %res2, 2 123 ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5 124} 125 126declare <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8>, <32 x i8>, i32) 127 128define <32 x i8>@test_int_x86_avx512_pbroadcastb_256(<16 x i8> %x0, <32 x i8> %x1) { 129; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_256: 130; CHECK: # %bb.0: 131; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x78,0xc0] 132; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 133 %res = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 -1) 134 ret <32 x i8> %res 135} 136 137define <32 x i8>@test_int_x86_avx512_mask_pbroadcastb_256(<16 x i8> %x0, <32 x i8> %x1, i32 %mask) { 138; X86-LABEL: test_int_x86_avx512_mask_pbroadcastb_256: 139; X86: # %bb.0: 140; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 141; X86-NEXT: vpbroadcastb %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x78,0xc8] 142; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 143; X86-NEXT: retl # encoding: [0xc3] 144; 145; X64-LABEL: test_int_x86_avx512_mask_pbroadcastb_256: 146; X64: # %bb.0: 147; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 148; X64-NEXT: vpbroadcastb %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x78,0xc8] 149; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 150; X64-NEXT: retq # encoding: [0xc3] 151 %res = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 %mask) 152 ret <32 x i8> %res 153} 154 155define <32 x i8>@test_int_x86_avx512_maskz_pbroadcastb_256(<16 x i8> %x0, i32 %mask) { 156; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastb_256: 157; X86: # %bb.0: 158; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 159; X86-NEXT: vpbroadcastb %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x78,0xc0] 160; X86-NEXT: retl # encoding: [0xc3] 161; 162; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastb_256: 163; X64: # %bb.0: 164; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 165; X64-NEXT: vpbroadcastb %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x78,0xc0] 166; X64-NEXT: retq # encoding: [0xc3] 167 %res = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> zeroinitializer, i32 %mask) 168 ret <32 x i8> %res 169} 170 171declare <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8>, <16 x i8>, i16) 172 173define <16 x i8>@test_int_x86_avx512_pbroadcastb_128(<16 x i8> %x0, <16 x i8> %x1) { 174; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_128: 175; CHECK: # %bb.0: 176; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x78,0xc0] 177; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 178 %res = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1) 179 ret <16 x i8> %res 180} 181 182define <16 x i8>@test_int_x86_avx512_mask_pbroadcastb_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) { 183; X86-LABEL: test_int_x86_avx512_mask_pbroadcastb_128: 184; X86: # %bb.0: 185; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 186; X86-NEXT: vpbroadcastb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x78,0xc8] 187; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 188; X86-NEXT: retl # encoding: [0xc3] 189; 190; X64-LABEL: test_int_x86_avx512_mask_pbroadcastb_128: 191; X64: # %bb.0: 192; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 193; X64-NEXT: vpbroadcastb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x78,0xc8] 194; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 195; X64-NEXT: retq # encoding: [0xc3] 196 %res = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) 197 ret <16 x i8> %res 198} 199 200define <16 x i8>@test_int_x86_avx512_maskz_pbroadcastb_128(<16 x i8> %x0, i16 %mask) { 201; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastb_128: 202; X86: # %bb.0: 203; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 204; X86-NEXT: vpbroadcastb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x78,0xc0] 205; X86-NEXT: retl # encoding: [0xc3] 206; 207; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastb_128: 208; X64: # %bb.0: 209; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 210; X64-NEXT: vpbroadcastb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x78,0xc0] 211; X64-NEXT: retq # encoding: [0xc3] 212 %res = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> zeroinitializer, i16 %mask) 213 ret <16 x i8> %res 214} 215 216declare <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16>, <16 x i16>, i16) 217 218define <16 x i16>@test_int_x86_avx512_pbroadcastw_256(<8 x i16> %x0, <16 x i16> %x1) { 219; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_256: 220; CHECK: # %bb.0: 221; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x79,0xc0] 222; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 223 %res = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 -1) 224 ret <16 x i16> %res 225} 226 227define <16 x i16>@test_int_x86_avx512_mask_pbroadcastw_256(<8 x i16> %x0, <16 x i16> %x1, i16 %mask) { 228; X86-LABEL: test_int_x86_avx512_mask_pbroadcastw_256: 229; X86: # %bb.0: 230; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 231; X86-NEXT: vpbroadcastw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x79,0xc8] 232; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 233; X86-NEXT: retl # encoding: [0xc3] 234; 235; X64-LABEL: test_int_x86_avx512_mask_pbroadcastw_256: 236; X64: # %bb.0: 237; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 238; X64-NEXT: vpbroadcastw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x79,0xc8] 239; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 240; X64-NEXT: retq # encoding: [0xc3] 241 %res = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 %mask) 242 ret <16 x i16> %res 243} 244 245define <16 x i16>@test_int_x86_avx512_maskz_pbroadcastw_256(<8 x i16> %x0, i16 %mask) { 246; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastw_256: 247; X86: # %bb.0: 248; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 249; X86-NEXT: vpbroadcastw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x79,0xc0] 250; X86-NEXT: retl # encoding: [0xc3] 251; 252; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastw_256: 253; X64: # %bb.0: 254; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 255; X64-NEXT: vpbroadcastw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x79,0xc0] 256; X64-NEXT: retq # encoding: [0xc3] 257 %res = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> zeroinitializer, i16 %mask) 258 ret <16 x i16> %res 259} 260 261declare <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16>, <8 x i16>, i8) 262 263define <8 x i16>@test_int_x86_avx512_pbroadcastw_128(<8 x i16> %x0, <8 x i16> %x1) { 264; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_128: 265; CHECK: # %bb.0: 266; CHECK-NEXT: vpbroadcastw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x79,0xc0] 267; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 268 %res = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> %x1, i8 -1) 269 ret <8 x i16> %res 270} 271 272define <8 x i16>@test_int_x86_avx512_mask_pbroadcastw_128(<8 x i16> %x0, <8 x i16> %x1, i8 %mask) { 273; X86-LABEL: test_int_x86_avx512_mask_pbroadcastw_128: 274; X86: # %bb.0: 275; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 276; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 277; X86-NEXT: vpbroadcastw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x79,0xc8] 278; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 279; X86-NEXT: retl # encoding: [0xc3] 280; 281; X64-LABEL: test_int_x86_avx512_mask_pbroadcastw_128: 282; X64: # %bb.0: 283; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 284; X64-NEXT: vpbroadcastw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x79,0xc8] 285; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 286; X64-NEXT: retq # encoding: [0xc3] 287 %res = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> %x1, i8 %mask) 288 ret <8 x i16> %res 289} 290 291define <8 x i16>@test_int_x86_avx512_maskz_pbroadcastw_128(<8 x i16> %x0, i8 %mask) { 292; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastw_128: 293; X86: # %bb.0: 294; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 295; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 296; X86-NEXT: vpbroadcastw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x79,0xc0] 297; X86-NEXT: retl # encoding: [0xc3] 298; 299; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastw_128: 300; X64: # %bb.0: 301; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 302; X64-NEXT: vpbroadcastw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x79,0xc0] 303; X64-NEXT: retq # encoding: [0xc3] 304 %res = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> zeroinitializer, i8 %mask) 305 ret <8 x i16> %res 306} 307 308declare <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8>, <64 x i8>, i64) 309 310define <64 x i8>@test_int_x86_avx512_pbroadcastb_512(<16 x i8> %x0, <64 x i8> %x1) { 311; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_512: 312; CHECK: # %bb.0: 313; CHECK-NEXT: vpbroadcastb %xmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x78,0xc0] 314; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 315 %res = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> %x1, i64 -1) 316 ret <64 x i8> %res 317} 318 319define <64 x i8>@test_int_x86_avx512_mask_pbroadcastb_512(<16 x i8> %x0, <64 x i8> %x1, i64 %mask) { 320; X86-LABEL: test_int_x86_avx512_mask_pbroadcastb_512: 321; X86: # %bb.0: 322; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 323; X86-NEXT: vpbroadcastb %xmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x78,0xc8] 324; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 325; X86-NEXT: retl # encoding: [0xc3] 326; 327; X64-LABEL: test_int_x86_avx512_mask_pbroadcastb_512: 328; X64: # %bb.0: 329; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 330; X64-NEXT: vpbroadcastb %xmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x78,0xc8] 331; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 332; X64-NEXT: retq # encoding: [0xc3] 333 %res = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> %x1, i64 %mask) 334 ret <64 x i8> %res 335} 336 337define <64 x i8>@test_int_x86_avx512_maskz_pbroadcastb_512(<16 x i8> %x0, i64 %mask) { 338; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastb_512: 339; X86: # %bb.0: 340; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 341; X86-NEXT: vpbroadcastb %xmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x78,0xc0] 342; X86-NEXT: retl # encoding: [0xc3] 343; 344; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastb_512: 345; X64: # %bb.0: 346; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 347; X64-NEXT: vpbroadcastb %xmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x78,0xc0] 348; X64-NEXT: retq # encoding: [0xc3] 349 %res = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> zeroinitializer, i64 %mask) 350 ret <64 x i8> %res 351} 352 353declare <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16>, <32 x i16>, i32) 354 355define <32 x i16>@test_int_x86_avx512_pbroadcastw_512(<8 x i16> %x0, <32 x i16> %x1) { 356; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_512: 357; CHECK: # %bb.0: 358; CHECK-NEXT: vpbroadcastw %xmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x79,0xc0] 359; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 360 %res = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> %x1, i32 -1) 361 ret <32 x i16> %res 362} 363 364define <32 x i16>@test_int_x86_avx512_mask_pbroadcastw_512(<8 x i16> %x0, <32 x i16> %x1, i32 %mask) { 365; X86-LABEL: test_int_x86_avx512_mask_pbroadcastw_512: 366; X86: # %bb.0: 367; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 368; X86-NEXT: vpbroadcastw %xmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x79,0xc8] 369; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 370; X86-NEXT: retl # encoding: [0xc3] 371; 372; X64-LABEL: test_int_x86_avx512_mask_pbroadcastw_512: 373; X64: # %bb.0: 374; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 375; X64-NEXT: vpbroadcastw %xmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x79,0xc8] 376; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 377; X64-NEXT: retq # encoding: [0xc3] 378 %res = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> %x1, i32 %mask) 379 ret <32 x i16> %res 380} 381 382define <32 x i16>@test_int_x86_avx512_maskz_pbroadcastw_512(<8 x i16> %x0, i32 %mask) { 383; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastw_512: 384; X86: # %bb.0: 385; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 386; X86-NEXT: vpbroadcastw %xmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x79,0xc0] 387; X86-NEXT: retl # encoding: [0xc3] 388; 389; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastw_512: 390; X64: # %bb.0: 391; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 392; X64-NEXT: vpbroadcastw %xmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x79,0xc0] 393; X64-NEXT: retq # encoding: [0xc3] 394 %res = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> zeroinitializer, i32 %mask) 395 ret <32 x i16> %res 396} 397 398declare void @llvm.x86.avx512.mask.storeu.b.128(ptr, <16 x i8>, i16) 399 400define void@test_int_x86_avx512_mask_storeu_b_128(ptr %ptr1, ptr %ptr2, <16 x i8> %x1, i16 %x2) { 401; X86-LABEL: test_int_x86_avx512_mask_storeu_b_128: 402; X86: # %bb.0: 403; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 404; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 405; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c] 406; X86-NEXT: vmovdqu8 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x7f,0x01] 407; X86-NEXT: vmovdqu %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00] 408; X86-NEXT: retl # encoding: [0xc3] 409; 410; X64-LABEL: test_int_x86_avx512_mask_storeu_b_128: 411; X64: # %bb.0: 412; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] 413; X64-NEXT: vmovdqu8 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x7f,0x07] 414; X64-NEXT: vmovdqu %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x06] 415; X64-NEXT: retq # encoding: [0xc3] 416 call void @llvm.x86.avx512.mask.storeu.b.128(ptr %ptr1, <16 x i8> %x1, i16 %x2) 417 call void @llvm.x86.avx512.mask.storeu.b.128(ptr %ptr2, <16 x i8> %x1, i16 -1) 418 ret void 419} 420 421declare void @llvm.x86.avx512.mask.storeu.b.256(ptr, <32 x i8>, i32) 422 423define void@test_int_x86_avx512_mask_storeu_b_256(ptr %ptr1, ptr %ptr2, <32 x i8> %x1, i32 %x2) { 424; X86-LABEL: test_int_x86_avx512_mask_storeu_b_256: 425; X86: # %bb.0: 426; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 427; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 428; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c] 429; X86-NEXT: vmovdqu8 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x7f,0x01] 430; X86-NEXT: vmovdqu %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x00] 431; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 432; X86-NEXT: retl # encoding: [0xc3] 433; 434; X64-LABEL: test_int_x86_avx512_mask_storeu_b_256: 435; X64: # %bb.0: 436; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] 437; X64-NEXT: vmovdqu8 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x7f,0x07] 438; X64-NEXT: vmovdqu %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06] 439; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 440; X64-NEXT: retq # encoding: [0xc3] 441 call void @llvm.x86.avx512.mask.storeu.b.256(ptr %ptr1, <32 x i8> %x1, i32 %x2) 442 call void @llvm.x86.avx512.mask.storeu.b.256(ptr %ptr2, <32 x i8> %x1, i32 -1) 443 ret void 444} 445 446declare void @llvm.x86.avx512.mask.storeu.w.128(ptr, <8 x i16>, i8) 447 448define void@test_int_x86_avx512_mask_storeu_w_128(ptr %ptr1, ptr %ptr2, <8 x i16> %x1, i8 %x2) { 449; X86-LABEL: test_int_x86_avx512_mask_storeu_w_128: 450; X86: # %bb.0: 451; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 452; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 453; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 454; X86-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] 455; X86-NEXT: vmovdqu16 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7f,0x01] 456; X86-NEXT: vmovdqu %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00] 457; X86-NEXT: retl # encoding: [0xc3] 458; 459; X64-LABEL: test_int_x86_avx512_mask_storeu_w_128: 460; X64: # %bb.0: 461; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] 462; X64-NEXT: vmovdqu16 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7f,0x07] 463; X64-NEXT: vmovdqu %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x06] 464; X64-NEXT: retq # encoding: [0xc3] 465 call void @llvm.x86.avx512.mask.storeu.w.128(ptr %ptr1, <8 x i16> %x1, i8 %x2) 466 call void @llvm.x86.avx512.mask.storeu.w.128(ptr %ptr2, <8 x i16> %x1, i8 -1) 467 ret void 468} 469 470declare void @llvm.x86.avx512.mask.storeu.w.256(ptr, <16 x i16>, i16) 471 472define void@test_int_x86_avx512_mask_storeu_w_256(ptr %ptr1, ptr %ptr2, <16 x i16> %x1, i16 %x2) { 473; X86-LABEL: test_int_x86_avx512_mask_storeu_w_256: 474; X86: # %bb.0: 475; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 476; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 477; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c] 478; X86-NEXT: vmovdqu16 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7f,0x01] 479; X86-NEXT: vmovdqu %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x00] 480; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 481; X86-NEXT: retl # encoding: [0xc3] 482; 483; X64-LABEL: test_int_x86_avx512_mask_storeu_w_256: 484; X64: # %bb.0: 485; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] 486; X64-NEXT: vmovdqu16 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7f,0x07] 487; X64-NEXT: vmovdqu %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06] 488; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 489; X64-NEXT: retq # encoding: [0xc3] 490 call void @llvm.x86.avx512.mask.storeu.w.256(ptr %ptr1, <16 x i16> %x1, i16 %x2) 491 call void @llvm.x86.avx512.mask.storeu.w.256(ptr %ptr2, <16 x i16> %x1, i16 -1) 492 ret void 493} 494 495declare <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(ptr, <8 x i16>, i8) 496 497define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_loadu_w_128(ptr %ptr, ptr %ptr2, <8 x i16> %x1, i8 %mask) { 498; X86-LABEL: test_int_x86_avx512_mask_loadu_w_128: 499; X86: # %bb.0: 500; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 501; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 502; X86-NEXT: vmovdqu (%ecx), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01] 503; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 504; X86-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] 505; X86-NEXT: vpblendmw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x66,0x08] 506; X86-NEXT: vmovdqu16 (%ecx), %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x6f,0x11] 507; X86-NEXT: retl # encoding: [0xc3] 508; 509; X64-LABEL: test_int_x86_avx512_mask_loadu_w_128: 510; X64: # %bb.0: 511; X64-NEXT: vmovdqu (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07] 512; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] 513; X64-NEXT: vpblendmw (%rsi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x66,0x0e] 514; X64-NEXT: vmovdqu16 (%rdi), %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x6f,0x17] 515; X64-NEXT: retq # encoding: [0xc3] 516 %res0 = call <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(ptr %ptr, <8 x i16> %x1, i8 -1) 517 %res1 = call <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(ptr %ptr2, <8 x i16> %res0, i8 %mask) 518 %res2 = call <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(ptr %ptr, <8 x i16> zeroinitializer, i8 %mask) 519 %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0 520 %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res3, <8 x i16> %res1, 1 521 %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res4, <8 x i16> %res2, 2 522 ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5 523} 524 525declare <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(ptr, <16 x i16>, i16) 526 527define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_loadu_w_256(ptr %ptr, ptr %ptr2, <16 x i16> %x1, i16 %mask) { 528; X86-LABEL: test_int_x86_avx512_mask_loadu_w_256: 529; X86: # %bb.0: 530; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 531; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 532; X86-NEXT: vmovdqu (%ecx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x01] 533; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c] 534; X86-NEXT: vpblendmw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x66,0x08] 535; X86-NEXT: vmovdqu16 (%ecx), %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x6f,0x11] 536; X86-NEXT: retl # encoding: [0xc3] 537; 538; X64-LABEL: test_int_x86_avx512_mask_loadu_w_256: 539; X64: # %bb.0: 540; X64-NEXT: vmovdqu (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07] 541; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] 542; X64-NEXT: vpblendmw (%rsi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x66,0x0e] 543; X64-NEXT: vmovdqu16 (%rdi), %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x6f,0x17] 544; X64-NEXT: retq # encoding: [0xc3] 545 %res0 = call <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(ptr %ptr, <16 x i16> %x1, i16 -1) 546 %res1 = call <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(ptr %ptr2, <16 x i16> %res0, i16 %mask) 547 %res2 = call <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(ptr %ptr, <16 x i16> zeroinitializer, i16 %mask) 548 %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res0, 0 549 %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res3, <16 x i16> %res1, 1 550 %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res4, <16 x i16> %res2, 2 551 ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5 552} 553 554declare <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(ptr, <16 x i8>, i16) 555 556define { <16 x i8>, <16 x i8>, <16 x i8> } @test_int_x86_avx512_mask_loadu_b_128(ptr %ptr, ptr %ptr2, <16 x i8> %x1, i16 %mask) { 557; X86-LABEL: test_int_x86_avx512_mask_loadu_b_128: 558; X86: # %bb.0: 559; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 560; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 561; X86-NEXT: vmovdqu (%ecx), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01] 562; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c] 563; X86-NEXT: vpblendmb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x66,0x08] 564; X86-NEXT: vmovdqu8 (%ecx), %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x6f,0x11] 565; X86-NEXT: retl # encoding: [0xc3] 566; 567; X64-LABEL: test_int_x86_avx512_mask_loadu_b_128: 568; X64: # %bb.0: 569; X64-NEXT: vmovdqu (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07] 570; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] 571; X64-NEXT: vpblendmb (%rsi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x66,0x0e] 572; X64-NEXT: vmovdqu8 (%rdi), %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x6f,0x17] 573; X64-NEXT: retq # encoding: [0xc3] 574 %res0 = call <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(ptr %ptr, <16 x i8> %x1, i16 -1) 575 %res1 = call <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(ptr %ptr2, <16 x i8> %res0, i16 %mask) 576 %res2 = call <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(ptr %ptr, <16 x i8> zeroinitializer, i16 %mask) 577 %res3 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } poison, <16 x i8> %res0, 0 578 %res4 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %res3, <16 x i8> %res1, 1 579 %res5 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %res4, <16 x i8> %res2, 2 580 ret { <16 x i8>, <16 x i8>, <16 x i8> } %res5 581} 582 583declare <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(ptr, <32 x i8>, i32) 584 585define { <32 x i8>, <32 x i8>, <32 x i8> } @test_int_x86_avx512_mask_loadu_b_256(ptr %ptr, ptr %ptr2, <32 x i8> %x1, i32 %mask) { 586; X86-LABEL: test_int_x86_avx512_mask_loadu_b_256: 587; X86: # %bb.0: 588; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 589; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 590; X86-NEXT: vmovdqu (%ecx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x01] 591; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c] 592; X86-NEXT: vpblendmb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x66,0x08] 593; X86-NEXT: vmovdqu8 (%ecx), %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0x11] 594; X86-NEXT: retl # encoding: [0xc3] 595; 596; X64-LABEL: test_int_x86_avx512_mask_loadu_b_256: 597; X64: # %bb.0: 598; X64-NEXT: vmovdqu (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07] 599; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] 600; X64-NEXT: vpblendmb (%rsi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x66,0x0e] 601; X64-NEXT: vmovdqu8 (%rdi), %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0x17] 602; X64-NEXT: retq # encoding: [0xc3] 603 %res0 = call <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(ptr %ptr, <32 x i8> %x1, i32 -1) 604 %res1 = call <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(ptr %ptr2, <32 x i8> %res0, i32 %mask) 605 %res2 = call <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(ptr %ptr, <32 x i8> zeroinitializer, i32 %mask) 606 %res3 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } poison, <32 x i8> %res0, 0 607 %res4 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } %res3, <32 x i8> %res1, 1 608 %res5 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } %res4, <32 x i8> %res2, 2 609 ret { <32 x i8>, <32 x i8>, <32 x i8> } %res5 610} 611 612declare <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8>, <16 x i8>, i32, <16 x i8>, i16) 613 614define <16 x i8>@test_int_x86_avx512_palignr_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x3) { 615; CHECK-LABEL: test_int_x86_avx512_palignr_128: 616; CHECK: # %bb.0: 617; CHECK-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xc1,0x02] 618; CHECK-NEXT: # xmm0 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1] 619; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 620 %res = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> %x3, i16 -1) 621 ret <16 x i8> %res 622} 623 624define <16 x i8>@test_int_x86_avx512_mask_palignr_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x3, i16 %x4) { 625; X86-LABEL: test_int_x86_avx512_mask_palignr_128: 626; X86: # %bb.0: 627; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 628; X86-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x0f,0xd1,0x02] 629; X86-NEXT: # xmm2 {%k1} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1] 630; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 631; X86-NEXT: retl # encoding: [0xc3] 632; 633; X64-LABEL: test_int_x86_avx512_mask_palignr_128: 634; X64: # %bb.0: 635; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 636; X64-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x0f,0xd1,0x02] 637; X64-NEXT: # xmm2 {%k1} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1] 638; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 639; X64-NEXT: retq # encoding: [0xc3] 640 %res = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> %x3, i16 %x4) 641 ret <16 x i8> %res 642} 643 644define <16 x i8>@test_int_x86_avx512_maskz_palignr_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x4) { 645; X86-LABEL: test_int_x86_avx512_maskz_palignr_128: 646; X86: # %bb.0: 647; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 648; X86-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x0f,0xc1,0x02] 649; X86-NEXT: # xmm0 {%k1} {z} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1] 650; X86-NEXT: retl # encoding: [0xc3] 651; 652; X64-LABEL: test_int_x86_avx512_maskz_palignr_128: 653; X64: # %bb.0: 654; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 655; X64-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x0f,0xc1,0x02] 656; X64-NEXT: # xmm0 {%k1} {z} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1] 657; X64-NEXT: retq # encoding: [0xc3] 658 %res = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> zeroinitializer, i16 %x4) 659 ret <16 x i8> %res 660} 661 662declare <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8>, <32 x i8>, i32, <32 x i8>, i32) 663 664define <32 x i8>@test_int_x86_avx512_palignr_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x3) { 665; CHECK-LABEL: test_int_x86_avx512_palignr_256: 666; CHECK: # %bb.0: 667; CHECK-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x0f,0xc1,0x02] 668; CHECK-NEXT: # ymm0 = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17] 669; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 670 %res = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> %x3, i32 -1) 671 ret <32 x i8> %res 672} 673 674define <32 x i8>@test_int_x86_avx512_mask_palignr_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x3, i32 %x4) { 675; X86-LABEL: test_int_x86_avx512_mask_palignr_256: 676; X86: # %bb.0: 677; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 678; X86-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x0f,0xd1,0x02] 679; X86-NEXT: # ymm2 {%k1} = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17] 680; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 681; X86-NEXT: retl # encoding: [0xc3] 682; 683; X64-LABEL: test_int_x86_avx512_mask_palignr_256: 684; X64: # %bb.0: 685; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 686; X64-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x0f,0xd1,0x02] 687; X64-NEXT: # ymm2 {%k1} = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17] 688; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 689; X64-NEXT: retq # encoding: [0xc3] 690 %res = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> %x3, i32 %x4) 691 ret <32 x i8> %res 692} 693 694define <32 x i8>@test_int_x86_avx512_maskz_palignr_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x4) { 695; X86-LABEL: test_int_x86_avx512_maskz_palignr_256: 696; X86: # %bb.0: 697; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 698; X86-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x0f,0xc1,0x02] 699; X86-NEXT: # ymm0 {%k1} {z} = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17] 700; X86-NEXT: retl # encoding: [0xc3] 701; 702; X64-LABEL: test_int_x86_avx512_maskz_palignr_256: 703; X64: # %bb.0: 704; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 705; X64-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x0f,0xc1,0x02] 706; X64-NEXT: # ymm0 {%k1} {z} = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17] 707; X64-NEXT: retq # encoding: [0xc3] 708 %res = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> zeroinitializer, i32 %x4) 709 ret <32 x i8> %res 710} 711 712declare <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16>, i32, <8 x i16>, i8) 713 714define <8 x i16>@test_int_x86_avx512_pshufh_w_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2) { 715; CHECK-LABEL: test_int_x86_avx512_pshufh_w_128: 716; CHECK: # %bb.0: 717; CHECK-NEXT: vpshufhw $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x70,0xc0,0x03] 718; CHECK-NEXT: # xmm0 = xmm0[0,1,2,3,7,4,4,4] 719; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 720 %res = call <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 -1) 721 ret <8 x i16> %res 722} 723 724define <8 x i16>@test_int_x86_avx512_mask_pshufh_w_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { 725; X86-LABEL: test_int_x86_avx512_mask_pshufh_w_128: 726; X86: # %bb.0: 727; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 728; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 729; X86-NEXT: vpshufhw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x70,0xc8,0x03] 730; X86-NEXT: # xmm1 {%k1} = xmm0[0,1,2,3,7,4,4,4] 731; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 732; X86-NEXT: retl # encoding: [0xc3] 733; 734; X64-LABEL: test_int_x86_avx512_mask_pshufh_w_128: 735; X64: # %bb.0: 736; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 737; X64-NEXT: vpshufhw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x70,0xc8,0x03] 738; X64-NEXT: # xmm1 {%k1} = xmm0[0,1,2,3,7,4,4,4] 739; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 740; X64-NEXT: retq # encoding: [0xc3] 741 %res = call <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) 742 ret <8 x i16> %res 743} 744 745define <8 x i16>@test_int_x86_avx512_maskz_pshufh_w_128(<8 x i16> %x0, i8 %x3) { 746; X86-LABEL: test_int_x86_avx512_maskz_pshufh_w_128: 747; X86: # %bb.0: 748; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 749; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 750; X86-NEXT: vpshufhw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x70,0xc0,0x03] 751; X86-NEXT: # xmm0 {%k1} {z} = xmm0[0,1,2,3,7,4,4,4] 752; X86-NEXT: retl # encoding: [0xc3] 753; 754; X64-LABEL: test_int_x86_avx512_maskz_pshufh_w_128: 755; X64: # %bb.0: 756; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 757; X64-NEXT: vpshufhw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x70,0xc0,0x03] 758; X64-NEXT: # xmm0 {%k1} {z} = xmm0[0,1,2,3,7,4,4,4] 759; X64-NEXT: retq # encoding: [0xc3] 760 %res = call <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3) 761 ret <8 x i16> %res 762} 763 764declare <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16>, i32, <16 x i16>, i16) 765 766define <16 x i16>@test_int_x86_avx512_pshufh_w_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2) { 767; CHECK-LABEL: test_int_x86_avx512_pshufh_w_256: 768; CHECK: # %bb.0: 769; CHECK-NEXT: vpshufhw $3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x70,0xc0,0x03] 770; CHECK-NEXT: # ymm0 = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12] 771; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 772 %res = call <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 -1) 773 ret <16 x i16> %res 774} 775 776define <16 x i16>@test_int_x86_avx512_mask_pshufh_w_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { 777; X86-LABEL: test_int_x86_avx512_mask_pshufh_w_256: 778; X86: # %bb.0: 779; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 780; X86-NEXT: vpshufhw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x70,0xc8,0x03] 781; X86-NEXT: # ymm1 {%k1} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12] 782; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 783; X86-NEXT: retl # encoding: [0xc3] 784; 785; X64-LABEL: test_int_x86_avx512_mask_pshufh_w_256: 786; X64: # %bb.0: 787; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 788; X64-NEXT: vpshufhw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x70,0xc8,0x03] 789; X64-NEXT: # ymm1 {%k1} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12] 790; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 791; X64-NEXT: retq # encoding: [0xc3] 792 %res = call <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) 793 ret <16 x i16> %res 794} 795 796define <16 x i16>@test_int_x86_avx512_maskz_pshufh_w_256(<16 x i16> %x0, i16 %x3) { 797; X86-LABEL: test_int_x86_avx512_maskz_pshufh_w_256: 798; X86: # %bb.0: 799; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 800; X86-NEXT: vpshufhw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x70,0xc0,0x03] 801; X86-NEXT: # ymm0 {%k1} {z} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12] 802; X86-NEXT: retl # encoding: [0xc3] 803; 804; X64-LABEL: test_int_x86_avx512_maskz_pshufh_w_256: 805; X64: # %bb.0: 806; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 807; X64-NEXT: vpshufhw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x70,0xc0,0x03] 808; X64-NEXT: # ymm0 {%k1} {z} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12] 809; X64-NEXT: retq # encoding: [0xc3] 810 %res = call <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3) 811 ret <16 x i16> %res 812} 813 814declare <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16>, i32, <8 x i16>, i8) 815 816define <8 x i16>@test_int_x86_avx512_pshufl_w_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2) { 817; CHECK-LABEL: test_int_x86_avx512_pshufl_w_128: 818; CHECK: # %bb.0: 819; CHECK-NEXT: vpshuflw $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x70,0xc0,0x03] 820; CHECK-NEXT: # xmm0 = xmm0[3,0,0,0,4,5,6,7] 821; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 822 %res = call <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 -1) 823 ret <8 x i16> %res 824} 825 826define <8 x i16>@test_int_x86_avx512_mask_pshufl_w_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { 827; X86-LABEL: test_int_x86_avx512_mask_pshufl_w_128: 828; X86: # %bb.0: 829; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 830; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 831; X86-NEXT: vpshuflw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x70,0xc8,0x03] 832; X86-NEXT: # xmm1 {%k1} = xmm0[3,0,0,0,4,5,6,7] 833; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 834; X86-NEXT: retl # encoding: [0xc3] 835; 836; X64-LABEL: test_int_x86_avx512_mask_pshufl_w_128: 837; X64: # %bb.0: 838; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 839; X64-NEXT: vpshuflw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x70,0xc8,0x03] 840; X64-NEXT: # xmm1 {%k1} = xmm0[3,0,0,0,4,5,6,7] 841; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 842; X64-NEXT: retq # encoding: [0xc3] 843 %res = call <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) 844 ret <8 x i16> %res 845} 846 847define <8 x i16>@test_int_x86_avx512_maskz_pshufl_w_128(<8 x i16> %x0, i8 %x3) { 848; X86-LABEL: test_int_x86_avx512_maskz_pshufl_w_128: 849; X86: # %bb.0: 850; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 851; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 852; X86-NEXT: vpshuflw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x70,0xc0,0x03] 853; X86-NEXT: # xmm0 {%k1} {z} = xmm0[3,0,0,0,4,5,6,7] 854; X86-NEXT: retl # encoding: [0xc3] 855; 856; X64-LABEL: test_int_x86_avx512_maskz_pshufl_w_128: 857; X64: # %bb.0: 858; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 859; X64-NEXT: vpshuflw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x70,0xc0,0x03] 860; X64-NEXT: # xmm0 {%k1} {z} = xmm0[3,0,0,0,4,5,6,7] 861; X64-NEXT: retq # encoding: [0xc3] 862 %res = call <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3) 863 ret <8 x i16> %res 864} 865 866declare <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16>, i32, <16 x i16>, i16) 867 868define <16 x i16>@test_int_x86_avx512_pshufl_w_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { 869; CHECK-LABEL: test_int_x86_avx512_pshufl_w_256: 870; CHECK: # %bb.0: 871; CHECK-NEXT: vpshuflw $3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xff,0x70,0xc0,0x03] 872; CHECK-NEXT: # ymm0 = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15] 873; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 874 %res = call <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 -1) 875 ret <16 x i16> %res 876} 877 878define <16 x i16>@test_int_x86_avx512_mask_pshufl_w_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { 879; X86-LABEL: test_int_x86_avx512_mask_pshufl_w_256: 880; X86: # %bb.0: 881; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 882; X86-NEXT: vpshuflw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x70,0xc8,0x03] 883; X86-NEXT: # ymm1 {%k1} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15] 884; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 885; X86-NEXT: retl # encoding: [0xc3] 886; 887; X64-LABEL: test_int_x86_avx512_mask_pshufl_w_256: 888; X64: # %bb.0: 889; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 890; X64-NEXT: vpshuflw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x70,0xc8,0x03] 891; X64-NEXT: # ymm1 {%k1} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15] 892; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 893; X64-NEXT: retq # encoding: [0xc3] 894 %res = call <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) 895 ret <16 x i16> %res 896} 897 898define <16 x i16>@test_int_x86_avx512_maskz_pshufl_w_256(<16 x i16> %x0, i16 %x3) { 899; X86-LABEL: test_int_x86_avx512_maskz_pshufl_w_256: 900; X86: # %bb.0: 901; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 902; X86-NEXT: vpshuflw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x70,0xc0,0x03] 903; X86-NEXT: # ymm0 {%k1} {z} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15] 904; X86-NEXT: retl # encoding: [0xc3] 905; 906; X64-LABEL: test_int_x86_avx512_maskz_pshufl_w_256: 907; X64: # %bb.0: 908; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 909; X64-NEXT: vpshuflw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x70,0xc0,0x03] 910; X64-NEXT: # ymm0 {%k1} {z} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15] 911; X64-NEXT: retq # encoding: [0xc3] 912 %res = call <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3) 913 ret <16 x i16> %res 914} 915 916define i32 @test_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b) { 917; CHECK-LABEL: test_pcmpeq_b_256: 918; CHECK: # %bb.0: 919; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1] 920; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 921; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 922; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 923 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1) 924 ret i32 %res 925} 926 927define i32 @test_mask_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 928; X86-LABEL: test_mask_pcmpeq_b_256: 929; X86: # %bb.0: 930; X86-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1] 931; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 932; X86-NEXT: andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04] 933; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 934; X86-NEXT: retl # encoding: [0xc3] 935; 936; X64-LABEL: test_mask_pcmpeq_b_256: 937; X64: # %bb.0: 938; X64-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1] 939; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 940; X64-NEXT: andl %edi, %eax # encoding: [0x21,0xf8] 941; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 942; X64-NEXT: retq # encoding: [0xc3] 943 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask) 944 ret i32 %res 945} 946 947declare i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8>, <32 x i8>, i32) 948 949define i16 @test_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b) { 950; CHECK-LABEL: test_pcmpeq_w_256: 951; CHECK: # %bb.0: 952; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1] 953; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 954; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 955; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 956; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 957 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1) 958 ret i16 %res 959} 960 961define i16 @test_mask_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 962; X86-LABEL: test_mask_pcmpeq_w_256: 963; X86: # %bb.0: 964; X86-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1] 965; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 966; X86-NEXT: andw {{[0-9]+}}(%esp), %ax # encoding: [0x66,0x23,0x44,0x24,0x04] 967; X86-NEXT: # kill: def $ax killed $ax killed $eax 968; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 969; X86-NEXT: retl # encoding: [0xc3] 970; 971; X64-LABEL: test_mask_pcmpeq_w_256: 972; X64: # %bb.0: 973; X64-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1] 974; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 975; X64-NEXT: andl %edi, %eax # encoding: [0x21,0xf8] 976; X64-NEXT: # kill: def $ax killed $ax killed $eax 977; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 978; X64-NEXT: retq # encoding: [0xc3] 979 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask) 980 ret i16 %res 981} 982 983declare i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16>, <16 x i16>, i16) 984 985define i32 @test_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b) { 986; CHECK-LABEL: test_pcmpgt_b_256: 987; CHECK: # %bb.0: 988; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1] 989; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 990; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 991; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 992 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1) 993 ret i32 %res 994} 995 996define i32 @test_mask_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 997; X86-LABEL: test_mask_pcmpgt_b_256: 998; X86: # %bb.0: 999; X86-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1] 1000; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1001; X86-NEXT: andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04] 1002; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1003; X86-NEXT: retl # encoding: [0xc3] 1004; 1005; X64-LABEL: test_mask_pcmpgt_b_256: 1006; X64: # %bb.0: 1007; X64-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1] 1008; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1009; X64-NEXT: andl %edi, %eax # encoding: [0x21,0xf8] 1010; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1011; X64-NEXT: retq # encoding: [0xc3] 1012 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask) 1013 ret i32 %res 1014} 1015 1016declare i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8>, <32 x i8>, i32) 1017 1018define i16 @test_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b) { 1019; CHECK-LABEL: test_pcmpgt_w_256: 1020; CHECK: # %bb.0: 1021; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x65,0xc1] 1022; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1023; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 1024; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1025; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1026 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1) 1027 ret i16 %res 1028} 1029 1030define i16 @test_mask_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 1031; X86-LABEL: test_mask_pcmpgt_w_256: 1032; X86: # %bb.0: 1033; X86-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x65,0xc1] 1034; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1035; X86-NEXT: andw {{[0-9]+}}(%esp), %ax # encoding: [0x66,0x23,0x44,0x24,0x04] 1036; X86-NEXT: # kill: def $ax killed $ax killed $eax 1037; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1038; X86-NEXT: retl # encoding: [0xc3] 1039; 1040; X64-LABEL: test_mask_pcmpgt_w_256: 1041; X64: # %bb.0: 1042; X64-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x65,0xc1] 1043; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1044; X64-NEXT: andl %edi, %eax # encoding: [0x21,0xf8] 1045; X64-NEXT: # kill: def $ax killed $ax killed $eax 1046; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1047; X64-NEXT: retq # encoding: [0xc3] 1048 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask) 1049 ret i16 %res 1050} 1051 1052declare i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16>, <16 x i16>, i16) 1053 1054define i16 @test_pcmpeq_b_128(<16 x i8> %a, <16 x i8> %b) { 1055; CHECK-LABEL: test_pcmpeq_b_128: 1056; CHECK: # %bb.0: 1057; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1] 1058; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1059; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 1060; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1061 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8> %a, <16 x i8> %b, i16 -1) 1062 ret i16 %res 1063} 1064 1065define i16 @test_mask_pcmpeq_b_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 1066; X86-LABEL: test_mask_pcmpeq_b_128: 1067; X86: # %bb.0: 1068; X86-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1] 1069; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1070; X86-NEXT: andw {{[0-9]+}}(%esp), %ax # encoding: [0x66,0x23,0x44,0x24,0x04] 1071; X86-NEXT: # kill: def $ax killed $ax killed $eax 1072; X86-NEXT: retl # encoding: [0xc3] 1073; 1074; X64-LABEL: test_mask_pcmpeq_b_128: 1075; X64: # %bb.0: 1076; X64-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1] 1077; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1078; X64-NEXT: andl %edi, %eax # encoding: [0x21,0xf8] 1079; X64-NEXT: # kill: def $ax killed $ax killed $eax 1080; X64-NEXT: retq # encoding: [0xc3] 1081 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8> %a, <16 x i8> %b, i16 %mask) 1082 ret i16 %res 1083} 1084 1085declare i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8>, <16 x i8>, i16) 1086 1087define i8 @test_pcmpeq_w_128(<8 x i16> %a, <8 x i16> %b) { 1088; CHECK-LABEL: test_pcmpeq_w_128: 1089; CHECK: # %bb.0: 1090; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1] 1091; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1092; CHECK-NEXT: # kill: def $al killed $al killed $eax 1093; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1094 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16> %a, <8 x i16> %b, i8 -1) 1095 ret i8 %res 1096} 1097 1098define i8 @test_mask_pcmpeq_w_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 1099; X86-LABEL: test_mask_pcmpeq_w_128: 1100; X86: # %bb.0: 1101; X86-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1] 1102; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1103; X86-NEXT: andb {{[0-9]+}}(%esp), %al # encoding: [0x22,0x44,0x24,0x04] 1104; X86-NEXT: # kill: def $al killed $al killed $eax 1105; X86-NEXT: retl # encoding: [0xc3] 1106; 1107; X64-LABEL: test_mask_pcmpeq_w_128: 1108; X64: # %bb.0: 1109; X64-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1] 1110; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1111; X64-NEXT: andb %dil, %al # encoding: [0x40,0x20,0xf8] 1112; X64-NEXT: # kill: def $al killed $al killed $eax 1113; X64-NEXT: retq # encoding: [0xc3] 1114 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16> %a, <8 x i16> %b, i8 %mask) 1115 ret i8 %res 1116} 1117 1118declare i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16>, <8 x i16>, i8) 1119 1120define i16 @test_pcmpgt_b_128(<16 x i8> %a, <16 x i8> %b) { 1121; CHECK-LABEL: test_pcmpgt_b_128: 1122; CHECK: # %bb.0: 1123; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1] 1124; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1125; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 1126; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1127 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8> %a, <16 x i8> %b, i16 -1) 1128 ret i16 %res 1129} 1130 1131define i16 @test_mask_pcmpgt_b_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 1132; X86-LABEL: test_mask_pcmpgt_b_128: 1133; X86: # %bb.0: 1134; X86-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1] 1135; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1136; X86-NEXT: andw {{[0-9]+}}(%esp), %ax # encoding: [0x66,0x23,0x44,0x24,0x04] 1137; X86-NEXT: # kill: def $ax killed $ax killed $eax 1138; X86-NEXT: retl # encoding: [0xc3] 1139; 1140; X64-LABEL: test_mask_pcmpgt_b_128: 1141; X64: # %bb.0: 1142; X64-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1] 1143; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1144; X64-NEXT: andl %edi, %eax # encoding: [0x21,0xf8] 1145; X64-NEXT: # kill: def $ax killed $ax killed $eax 1146; X64-NEXT: retq # encoding: [0xc3] 1147 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8> %a, <16 x i8> %b, i16 %mask) 1148 ret i16 %res 1149} 1150 1151declare i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8>, <16 x i8>, i16) 1152 1153define i8 @test_pcmpgt_w_128(<8 x i16> %a, <8 x i16> %b) { 1154; CHECK-LABEL: test_pcmpgt_w_128: 1155; CHECK: # %bb.0: 1156; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1] 1157; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1158; CHECK-NEXT: # kill: def $al killed $al killed $eax 1159; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1160 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16> %a, <8 x i16> %b, i8 -1) 1161 ret i8 %res 1162} 1163 1164define i8 @test_mask_pcmpgt_w_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 1165; X86-LABEL: test_mask_pcmpgt_w_128: 1166; X86: # %bb.0: 1167; X86-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1] 1168; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1169; X86-NEXT: andb {{[0-9]+}}(%esp), %al # encoding: [0x22,0x44,0x24,0x04] 1170; X86-NEXT: # kill: def $al killed $al killed $eax 1171; X86-NEXT: retl # encoding: [0xc3] 1172; 1173; X64-LABEL: test_mask_pcmpgt_w_128: 1174; X64: # %bb.0: 1175; X64-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1] 1176; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1177; X64-NEXT: andb %dil, %al # encoding: [0x40,0x20,0xf8] 1178; X64-NEXT: # kill: def $al killed $al killed $eax 1179; X64-NEXT: retq # encoding: [0xc3] 1180 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16> %a, <8 x i16> %b, i8 %mask) 1181 ret i8 %res 1182} 1183 1184declare i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16>, <8 x i16>, i8) 1185 1186declare <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 1187 1188define <16 x i8>@test_int_x86_avx512_punpckhb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) { 1189; CHECK-LABEL: test_int_x86_avx512_punpckhb_w_128: 1190; CHECK: # %bb.0: 1191; CHECK-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x68,0xc1] 1192; CHECK-NEXT: # xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 1193; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1194 %res = call <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) 1195 ret <16 x i8> %res 1196} 1197 1198define <16 x i8>@test_int_x86_avx512_mask_punpckhb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { 1199; X86-LABEL: test_int_x86_avx512_mask_punpckhb_w_128: 1200; X86: # %bb.0: 1201; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1202; X86-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x68,0xd1] 1203; X86-NEXT: # xmm2 {%k1} = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 1204; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1205; X86-NEXT: retl # encoding: [0xc3] 1206; 1207; X64-LABEL: test_int_x86_avx512_mask_punpckhb_w_128: 1208; X64: # %bb.0: 1209; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1210; X64-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x68,0xd1] 1211; X64-NEXT: # xmm2 {%k1} = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 1212; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1213; X64-NEXT: retq # encoding: [0xc3] 1214 %res = call <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) 1215 ret <16 x i8> %res 1216} 1217 1218declare <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 1219 1220define <16 x i8>@test_int_x86_avx512_ask_punpcklb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) { 1221; CHECK-LABEL: test_int_x86_avx512_ask_punpcklb_w_128: 1222; CHECK: # %bb.0: 1223; CHECK-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x60,0xc1] 1224; CHECK-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1225; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1226 %res = call <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) 1227 ret <16 x i8> %res 1228} 1229 1230define <16 x i8>@test_int_x86_avx512_mask_punpcklb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { 1231; X86-LABEL: test_int_x86_avx512_mask_punpcklb_w_128: 1232; X86: # %bb.0: 1233; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1234; X86-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x60,0xd1] 1235; X86-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1236; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1237; X86-NEXT: retl # encoding: [0xc3] 1238; 1239; X64-LABEL: test_int_x86_avx512_mask_punpcklb_w_128: 1240; X64: # %bb.0: 1241; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1242; X64-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x60,0xd1] 1243; X64-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1244; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1245; X64-NEXT: retq # encoding: [0xc3] 1246 %res = call <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) 1247 ret <16 x i8> %res 1248} 1249 1250declare <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 1251 1252define <32 x i8>@test_int_x86_avx512_punpckhb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) { 1253; CHECK-LABEL: test_int_x86_avx512_punpckhb_w_256: 1254; CHECK: # %bb.0: 1255; CHECK-NEXT: vpunpckhbw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x68,0xc1] 1256; CHECK-NEXT: # ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] 1257; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1258 %res = call <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 1259 ret <32 x i8> %res 1260} 1261 1262define <32 x i8>@test_int_x86_avx512_mask_punpckhb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 1263; X86-LABEL: test_int_x86_avx512_mask_punpckhb_w_256: 1264; X86: # %bb.0: 1265; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1266; X86-NEXT: vpunpckhbw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x68,0xd1] 1267; X86-NEXT: # ymm2 {%k1} = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] 1268; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1269; X86-NEXT: retl # encoding: [0xc3] 1270; 1271; X64-LABEL: test_int_x86_avx512_mask_punpckhb_w_256: 1272; X64: # %bb.0: 1273; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1274; X64-NEXT: vpunpckhbw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x68,0xd1] 1275; X64-NEXT: # ymm2 {%k1} = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] 1276; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1277; X64-NEXT: retq # encoding: [0xc3] 1278 %res = call <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 1279 ret <32 x i8> %res 1280} 1281 1282declare <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 1283 1284define <32 x i8>@test_int_x86_avx512_punpcklb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) { 1285; CHECK-LABEL: test_int_x86_avx512_punpcklb_w_256: 1286; CHECK: # %bb.0: 1287; CHECK-NEXT: vpunpcklbw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x60,0xc1] 1288; CHECK-NEXT: # ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 1289; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1290 %res = call <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 1291 ret <32 x i8> %res 1292} 1293 1294define <32 x i8>@test_int_x86_avx512_mask_punpcklb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 1295; X86-LABEL: test_int_x86_avx512_mask_punpcklb_w_256: 1296; X86: # %bb.0: 1297; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1298; X86-NEXT: vpunpcklbw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x60,0xd1] 1299; X86-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 1300; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1301; X86-NEXT: retl # encoding: [0xc3] 1302; 1303; X64-LABEL: test_int_x86_avx512_mask_punpcklb_w_256: 1304; X64: # %bb.0: 1305; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1306; X64-NEXT: vpunpcklbw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x60,0xd1] 1307; X64-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 1308; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1309; X64-NEXT: retq # encoding: [0xc3] 1310 %res = call <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 1311 ret <32 x i8> %res 1312} 1313 1314declare <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 1315 1316define <8 x i16>@test_int_x86_avx512_punpcklw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 1317; CHECK-LABEL: test_int_x86_avx512_punpcklw_d_128: 1318; CHECK: # %bb.0: 1319; CHECK-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x61,0xc1] 1320; CHECK-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1321; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1322 %res = call <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 1323 ret <8 x i16> %res 1324} 1325 1326define <8 x i16>@test_int_x86_avx512_mask_punpcklw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 1327; X86-LABEL: test_int_x86_avx512_mask_punpcklw_d_128: 1328; X86: # %bb.0: 1329; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1330; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1331; X86-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x61,0xd1] 1332; X86-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1333; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1334; X86-NEXT: retl # encoding: [0xc3] 1335; 1336; X64-LABEL: test_int_x86_avx512_mask_punpcklw_d_128: 1337; X64: # %bb.0: 1338; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1339; X64-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x61,0xd1] 1340; X64-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1341; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1342; X64-NEXT: retq # encoding: [0xc3] 1343 %res = call <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 1344 ret <8 x i16> %res 1345} 1346 1347declare <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 1348 1349define <8 x i16>@test_int_x86_avx512_punpckhw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 1350; CHECK-LABEL: test_int_x86_avx512_punpckhw_d_128: 1351; CHECK: # %bb.0: 1352; CHECK-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x69,0xc1] 1353; CHECK-NEXT: # xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1354; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1355 %res = call <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 1356 ret <8 x i16> %res 1357} 1358 1359define <8 x i16>@test_int_x86_avx512_mask_punpckhw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 1360; X86-LABEL: test_int_x86_avx512_mask_punpckhw_d_128: 1361; X86: # %bb.0: 1362; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1363; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1364; X86-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x69,0xd1] 1365; X86-NEXT: # xmm2 {%k1} = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1366; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1367; X86-NEXT: retl # encoding: [0xc3] 1368; 1369; X64-LABEL: test_int_x86_avx512_mask_punpckhw_d_128: 1370; X64: # %bb.0: 1371; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1372; X64-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x69,0xd1] 1373; X64-NEXT: # xmm2 {%k1} = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1374; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1375; X64-NEXT: retq # encoding: [0xc3] 1376 %res = call <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 1377 ret <8 x i16> %res 1378} 1379 1380declare <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 1381 1382define <16 x i16>@test_int_x86_avx512_punpcklw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) { 1383; CHECK-LABEL: test_int_x86_avx512_punpcklw_d_256: 1384; CHECK: # %bb.0: 1385; CHECK-NEXT: vpunpcklwd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x61,0xc1] 1386; CHECK-NEXT: # ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] 1387; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1388 %res = call <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 1389 ret <16 x i16> %res 1390} 1391 1392define <16 x i16>@test_int_x86_avx512_mask_punpcklw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 1393; X86-LABEL: test_int_x86_avx512_mask_punpcklw_d_256: 1394; X86: # %bb.0: 1395; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1396; X86-NEXT: vpunpcklwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x61,0xd1] 1397; X86-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] 1398; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1399; X86-NEXT: retl # encoding: [0xc3] 1400; 1401; X64-LABEL: test_int_x86_avx512_mask_punpcklw_d_256: 1402; X64: # %bb.0: 1403; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1404; X64-NEXT: vpunpcklwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x61,0xd1] 1405; X64-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] 1406; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1407; X64-NEXT: retq # encoding: [0xc3] 1408 %res = call <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 1409 ret <16 x i16> %res 1410} 1411 1412declare <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 1413 1414define <16 x i16>@test_int_x86_avx512_punpckhw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) { 1415; CHECK-LABEL: test_int_x86_avx512_punpckhw_d_256: 1416; CHECK: # %bb.0: 1417; CHECK-NEXT: vpunpckhwd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x69,0xc1] 1418; CHECK-NEXT: # ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] 1419; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1420 %res = call <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 1421 ret <16 x i16> %res 1422} 1423 1424define <16 x i16>@test_int_x86_avx512_mask_punpckhw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 1425; X86-LABEL: test_int_x86_avx512_mask_punpckhw_d_256: 1426; X86: # %bb.0: 1427; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1428; X86-NEXT: vpunpckhwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x69,0xd1] 1429; X86-NEXT: # ymm2 {%k1} = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] 1430; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1431; X86-NEXT: retl # encoding: [0xc3] 1432; 1433; X64-LABEL: test_int_x86_avx512_mask_punpckhw_d_256: 1434; X64: # %bb.0: 1435; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1436; X64-NEXT: vpunpckhwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x69,0xd1] 1437; X64-NEXT: # ymm2 {%k1} = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] 1438; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1439; X64-NEXT: retq # encoding: [0xc3] 1440 %res = call <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 1441 ret <16 x i16> %res 1442} 1443 1444define <8 x i16> @test_mask_add_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 1445; CHECK-LABEL: test_mask_add_epi16_rr_128: 1446; CHECK: # %bb.0: 1447; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1] 1448; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1449 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 1450 ret <8 x i16> %res 1451} 1452 1453define <8 x i16> @test_mask_add_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 1454; X86-LABEL: test_mask_add_epi16_rrk_128: 1455; X86: # %bb.0: 1456; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1457; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1458; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfd,0xd1] 1459; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1460; X86-NEXT: retl # encoding: [0xc3] 1461; 1462; X64-LABEL: test_mask_add_epi16_rrk_128: 1463; X64: # %bb.0: 1464; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1465; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfd,0xd1] 1466; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1467; X64-NEXT: retq # encoding: [0xc3] 1468 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 1469 ret <8 x i16> %res 1470} 1471 1472define <8 x i16> @test_mask_add_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 1473; X86-LABEL: test_mask_add_epi16_rrkz_128: 1474; X86: # %bb.0: 1475; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1476; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1477; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfd,0xc1] 1478; X86-NEXT: retl # encoding: [0xc3] 1479; 1480; X64-LABEL: test_mask_add_epi16_rrkz_128: 1481; X64: # %bb.0: 1482; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1483; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfd,0xc1] 1484; X64-NEXT: retq # encoding: [0xc3] 1485 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 1486 ret <8 x i16> %res 1487} 1488 1489define <8 x i16> @test_mask_add_epi16_rm_128(<8 x i16> %a, ptr %ptr_b) { 1490; X86-LABEL: test_mask_add_epi16_rm_128: 1491; X86: # %bb.0: 1492; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1493; X86-NEXT: vpaddw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0x00] 1494; X86-NEXT: retl # encoding: [0xc3] 1495; 1496; X64-LABEL: test_mask_add_epi16_rm_128: 1497; X64: # %bb.0: 1498; X64-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0x07] 1499; X64-NEXT: retq # encoding: [0xc3] 1500 %b = load <8 x i16>, ptr %ptr_b 1501 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 1502 ret <8 x i16> %res 1503} 1504 1505define <8 x i16> @test_mask_add_epi16_rmk_128(<8 x i16> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) { 1506; X86-LABEL: test_mask_add_epi16_rmk_128: 1507; X86: # %bb.0: 1508; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1509; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1510; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1511; X86-NEXT: vpaddw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfd,0x08] 1512; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1513; X86-NEXT: retl # encoding: [0xc3] 1514; 1515; X64-LABEL: test_mask_add_epi16_rmk_128: 1516; X64: # %bb.0: 1517; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1518; X64-NEXT: vpaddw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfd,0x0f] 1519; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1520; X64-NEXT: retq # encoding: [0xc3] 1521 %b = load <8 x i16>, ptr %ptr_b 1522 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 1523 ret <8 x i16> %res 1524} 1525 1526define <8 x i16> @test_mask_add_epi16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i8 %mask) { 1527; X86-LABEL: test_mask_add_epi16_rmkz_128: 1528; X86: # %bb.0: 1529; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1530; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1531; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1532; X86-NEXT: vpaddw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfd,0x00] 1533; X86-NEXT: retl # encoding: [0xc3] 1534; 1535; X64-LABEL: test_mask_add_epi16_rmkz_128: 1536; X64: # %bb.0: 1537; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1538; X64-NEXT: vpaddw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfd,0x07] 1539; X64-NEXT: retq # encoding: [0xc3] 1540 %b = load <8 x i16>, ptr %ptr_b 1541 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 1542 ret <8 x i16> %res 1543} 1544 1545declare <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 1546 1547define <16 x i16> @test_mask_add_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 1548; CHECK-LABEL: test_mask_add_epi16_rr_256: 1549; CHECK: # %bb.0: 1550; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc1] 1551; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1552 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 1553 ret <16 x i16> %res 1554} 1555 1556define <16 x i16> @test_mask_add_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 1557; X86-LABEL: test_mask_add_epi16_rrk_256: 1558; X86: # %bb.0: 1559; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1560; X86-NEXT: vpaddw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfd,0xd1] 1561; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1562; X86-NEXT: retl # encoding: [0xc3] 1563; 1564; X64-LABEL: test_mask_add_epi16_rrk_256: 1565; X64: # %bb.0: 1566; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1567; X64-NEXT: vpaddw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfd,0xd1] 1568; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1569; X64-NEXT: retq # encoding: [0xc3] 1570 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 1571 ret <16 x i16> %res 1572} 1573 1574define <16 x i16> @test_mask_add_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 1575; X86-LABEL: test_mask_add_epi16_rrkz_256: 1576; X86: # %bb.0: 1577; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1578; X86-NEXT: vpaddw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0xc1] 1579; X86-NEXT: retl # encoding: [0xc3] 1580; 1581; X64-LABEL: test_mask_add_epi16_rrkz_256: 1582; X64: # %bb.0: 1583; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1584; X64-NEXT: vpaddw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0xc1] 1585; X64-NEXT: retq # encoding: [0xc3] 1586 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 1587 ret <16 x i16> %res 1588} 1589 1590define <16 x i16> @test_mask_add_epi16_rm_256(<16 x i16> %a, ptr %ptr_b) { 1591; X86-LABEL: test_mask_add_epi16_rm_256: 1592; X86: # %bb.0: 1593; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1594; X86-NEXT: vpaddw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0x00] 1595; X86-NEXT: retl # encoding: [0xc3] 1596; 1597; X64-LABEL: test_mask_add_epi16_rm_256: 1598; X64: # %bb.0: 1599; X64-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0x07] 1600; X64-NEXT: retq # encoding: [0xc3] 1601 %b = load <16 x i16>, ptr %ptr_b 1602 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 1603 ret <16 x i16> %res 1604} 1605 1606define <16 x i16> @test_mask_add_epi16_rmk_256(<16 x i16> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) { 1607; X86-LABEL: test_mask_add_epi16_rmk_256: 1608; X86: # %bb.0: 1609; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1610; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1611; X86-NEXT: vpaddw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfd,0x08] 1612; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1613; X86-NEXT: retl # encoding: [0xc3] 1614; 1615; X64-LABEL: test_mask_add_epi16_rmk_256: 1616; X64: # %bb.0: 1617; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1618; X64-NEXT: vpaddw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfd,0x0f] 1619; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1620; X64-NEXT: retq # encoding: [0xc3] 1621 %b = load <16 x i16>, ptr %ptr_b 1622 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 1623 ret <16 x i16> %res 1624} 1625 1626define <16 x i16> @test_mask_add_epi16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i16 %mask) { 1627; X86-LABEL: test_mask_add_epi16_rmkz_256: 1628; X86: # %bb.0: 1629; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1630; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1631; X86-NEXT: vpaddw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0x00] 1632; X86-NEXT: retl # encoding: [0xc3] 1633; 1634; X64-LABEL: test_mask_add_epi16_rmkz_256: 1635; X64: # %bb.0: 1636; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1637; X64-NEXT: vpaddw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0x07] 1638; X64-NEXT: retq # encoding: [0xc3] 1639 %b = load <16 x i16>, ptr %ptr_b 1640 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 1641 ret <16 x i16> %res 1642} 1643 1644declare <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 1645 1646define <8 x i16> @test_mask_sub_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 1647; CHECK-LABEL: test_mask_sub_epi16_rr_128: 1648; CHECK: # %bb.0: 1649; CHECK-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf9,0xc1] 1650; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1651 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 1652 ret <8 x i16> %res 1653} 1654 1655define <8 x i16> @test_mask_sub_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 1656; X86-LABEL: test_mask_sub_epi16_rrk_128: 1657; X86: # %bb.0: 1658; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1659; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1660; X86-NEXT: vpsubw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf9,0xd1] 1661; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1662; X86-NEXT: retl # encoding: [0xc3] 1663; 1664; X64-LABEL: test_mask_sub_epi16_rrk_128: 1665; X64: # %bb.0: 1666; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1667; X64-NEXT: vpsubw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf9,0xd1] 1668; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1669; X64-NEXT: retq # encoding: [0xc3] 1670 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 1671 ret <8 x i16> %res 1672} 1673 1674define <8 x i16> @test_mask_sub_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 1675; X86-LABEL: test_mask_sub_epi16_rrkz_128: 1676; X86: # %bb.0: 1677; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1678; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1679; X86-NEXT: vpsubw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf9,0xc1] 1680; X86-NEXT: retl # encoding: [0xc3] 1681; 1682; X64-LABEL: test_mask_sub_epi16_rrkz_128: 1683; X64: # %bb.0: 1684; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1685; X64-NEXT: vpsubw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf9,0xc1] 1686; X64-NEXT: retq # encoding: [0xc3] 1687 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 1688 ret <8 x i16> %res 1689} 1690 1691define <8 x i16> @test_mask_sub_epi16_rm_128(<8 x i16> %a, ptr %ptr_b) { 1692; X86-LABEL: test_mask_sub_epi16_rm_128: 1693; X86: # %bb.0: 1694; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1695; X86-NEXT: vpsubw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf9,0x00] 1696; X86-NEXT: retl # encoding: [0xc3] 1697; 1698; X64-LABEL: test_mask_sub_epi16_rm_128: 1699; X64: # %bb.0: 1700; X64-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf9,0x07] 1701; X64-NEXT: retq # encoding: [0xc3] 1702 %b = load <8 x i16>, ptr %ptr_b 1703 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 1704 ret <8 x i16> %res 1705} 1706 1707define <8 x i16> @test_mask_sub_epi16_rmk_128(<8 x i16> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) { 1708; X86-LABEL: test_mask_sub_epi16_rmk_128: 1709; X86: # %bb.0: 1710; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1711; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1712; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1713; X86-NEXT: vpsubw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf9,0x08] 1714; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1715; X86-NEXT: retl # encoding: [0xc3] 1716; 1717; X64-LABEL: test_mask_sub_epi16_rmk_128: 1718; X64: # %bb.0: 1719; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1720; X64-NEXT: vpsubw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf9,0x0f] 1721; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1722; X64-NEXT: retq # encoding: [0xc3] 1723 %b = load <8 x i16>, ptr %ptr_b 1724 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 1725 ret <8 x i16> %res 1726} 1727 1728define <8 x i16> @test_mask_sub_epi16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i8 %mask) { 1729; X86-LABEL: test_mask_sub_epi16_rmkz_128: 1730; X86: # %bb.0: 1731; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1732; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1733; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1734; X86-NEXT: vpsubw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf9,0x00] 1735; X86-NEXT: retl # encoding: [0xc3] 1736; 1737; X64-LABEL: test_mask_sub_epi16_rmkz_128: 1738; X64: # %bb.0: 1739; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1740; X64-NEXT: vpsubw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf9,0x07] 1741; X64-NEXT: retq # encoding: [0xc3] 1742 %b = load <8 x i16>, ptr %ptr_b 1743 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 1744 ret <8 x i16> %res 1745} 1746 1747declare <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 1748 1749define <16 x i16> @test_mask_sub_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 1750; CHECK-LABEL: test_mask_sub_epi16_rr_256: 1751; CHECK: # %bb.0: 1752; CHECK-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf9,0xc1] 1753; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1754 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 1755 ret <16 x i16> %res 1756} 1757 1758define <16 x i16> @test_mask_sub_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 1759; X86-LABEL: test_mask_sub_epi16_rrk_256: 1760; X86: # %bb.0: 1761; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1762; X86-NEXT: vpsubw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf9,0xd1] 1763; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1764; X86-NEXT: retl # encoding: [0xc3] 1765; 1766; X64-LABEL: test_mask_sub_epi16_rrk_256: 1767; X64: # %bb.0: 1768; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1769; X64-NEXT: vpsubw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf9,0xd1] 1770; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1771; X64-NEXT: retq # encoding: [0xc3] 1772 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 1773 ret <16 x i16> %res 1774} 1775 1776define <16 x i16> @test_mask_sub_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 1777; X86-LABEL: test_mask_sub_epi16_rrkz_256: 1778; X86: # %bb.0: 1779; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1780; X86-NEXT: vpsubw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0xc1] 1781; X86-NEXT: retl # encoding: [0xc3] 1782; 1783; X64-LABEL: test_mask_sub_epi16_rrkz_256: 1784; X64: # %bb.0: 1785; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1786; X64-NEXT: vpsubw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0xc1] 1787; X64-NEXT: retq # encoding: [0xc3] 1788 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 1789 ret <16 x i16> %res 1790} 1791 1792define <16 x i16> @test_mask_sub_epi16_rm_256(<16 x i16> %a, ptr %ptr_b) { 1793; X86-LABEL: test_mask_sub_epi16_rm_256: 1794; X86: # %bb.0: 1795; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1796; X86-NEXT: vpsubw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf9,0x00] 1797; X86-NEXT: retl # encoding: [0xc3] 1798; 1799; X64-LABEL: test_mask_sub_epi16_rm_256: 1800; X64: # %bb.0: 1801; X64-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf9,0x07] 1802; X64-NEXT: retq # encoding: [0xc3] 1803 %b = load <16 x i16>, ptr %ptr_b 1804 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 1805 ret <16 x i16> %res 1806} 1807 1808define <16 x i16> @test_mask_sub_epi16_rmk_256(<16 x i16> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) { 1809; X86-LABEL: test_mask_sub_epi16_rmk_256: 1810; X86: # %bb.0: 1811; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1812; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1813; X86-NEXT: vpsubw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf9,0x08] 1814; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1815; X86-NEXT: retl # encoding: [0xc3] 1816; 1817; X64-LABEL: test_mask_sub_epi16_rmk_256: 1818; X64: # %bb.0: 1819; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1820; X64-NEXT: vpsubw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf9,0x0f] 1821; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1822; X64-NEXT: retq # encoding: [0xc3] 1823 %b = load <16 x i16>, ptr %ptr_b 1824 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 1825 ret <16 x i16> %res 1826} 1827 1828define <16 x i16> @test_mask_sub_epi16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i16 %mask) { 1829; X86-LABEL: test_mask_sub_epi16_rmkz_256: 1830; X86: # %bb.0: 1831; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1832; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1833; X86-NEXT: vpsubw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0x00] 1834; X86-NEXT: retl # encoding: [0xc3] 1835; 1836; X64-LABEL: test_mask_sub_epi16_rmkz_256: 1837; X64: # %bb.0: 1838; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1839; X64-NEXT: vpsubw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0x07] 1840; X64-NEXT: retq # encoding: [0xc3] 1841 %b = load <16 x i16>, ptr %ptr_b 1842 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 1843 ret <16 x i16> %res 1844} 1845 1846declare <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 1847 1848define <32 x i16> @test_mask_add_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 1849; CHECK-LABEL: test_mask_add_epi16_rr_512: 1850; CHECK: # %bb.0: 1851; CHECK-NEXT: vpaddw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1] 1852; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1853 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1854 ret <32 x i16> %res 1855} 1856 1857define <32 x i16> @test_mask_add_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 1858; X86-LABEL: test_mask_add_epi16_rrk_512: 1859; X86: # %bb.0: 1860; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1861; X86-NEXT: vpaddw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xfd,0xd1] 1862; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1863; X86-NEXT: retl # encoding: [0xc3] 1864; 1865; X64-LABEL: test_mask_add_epi16_rrk_512: 1866; X64: # %bb.0: 1867; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1868; X64-NEXT: vpaddw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xfd,0xd1] 1869; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1870; X64-NEXT: retq # encoding: [0xc3] 1871 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1872 ret <32 x i16> %res 1873} 1874 1875define <32 x i16> @test_mask_add_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 1876; X86-LABEL: test_mask_add_epi16_rrkz_512: 1877; X86: # %bb.0: 1878; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1879; X86-NEXT: vpaddw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0xc1] 1880; X86-NEXT: retl # encoding: [0xc3] 1881; 1882; X64-LABEL: test_mask_add_epi16_rrkz_512: 1883; X64: # %bb.0: 1884; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1885; X64-NEXT: vpaddw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0xc1] 1886; X64-NEXT: retq # encoding: [0xc3] 1887 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1888 ret <32 x i16> %res 1889} 1890 1891define <32 x i16> @test_mask_add_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) { 1892; X86-LABEL: test_mask_add_epi16_rm_512: 1893; X86: # %bb.0: 1894; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1895; X86-NEXT: vpaddw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0x00] 1896; X86-NEXT: retl # encoding: [0xc3] 1897; 1898; X64-LABEL: test_mask_add_epi16_rm_512: 1899; X64: # %bb.0: 1900; X64-NEXT: vpaddw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0x07] 1901; X64-NEXT: retq # encoding: [0xc3] 1902 %b = load <32 x i16>, ptr %ptr_b 1903 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1904 ret <32 x i16> %res 1905} 1906 1907define <32 x i16> @test_mask_add_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) { 1908; X86-LABEL: test_mask_add_epi16_rmk_512: 1909; X86: # %bb.0: 1910; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1911; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1912; X86-NEXT: vpaddw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xfd,0x08] 1913; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1914; X86-NEXT: retl # encoding: [0xc3] 1915; 1916; X64-LABEL: test_mask_add_epi16_rmk_512: 1917; X64: # %bb.0: 1918; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1919; X64-NEXT: vpaddw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xfd,0x0f] 1920; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1921; X64-NEXT: retq # encoding: [0xc3] 1922 %b = load <32 x i16>, ptr %ptr_b 1923 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1924 ret <32 x i16> %res 1925} 1926 1927define <32 x i16> @test_mask_add_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i32 %mask) { 1928; X86-LABEL: test_mask_add_epi16_rmkz_512: 1929; X86: # %bb.0: 1930; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1931; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1932; X86-NEXT: vpaddw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0x00] 1933; X86-NEXT: retl # encoding: [0xc3] 1934; 1935; X64-LABEL: test_mask_add_epi16_rmkz_512: 1936; X64: # %bb.0: 1937; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1938; X64-NEXT: vpaddw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0x07] 1939; X64-NEXT: retq # encoding: [0xc3] 1940 %b = load <32 x i16>, ptr %ptr_b 1941 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1942 ret <32 x i16> %res 1943} 1944 1945declare <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1946 1947define <32 x i16> @test_mask_sub_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 1948; CHECK-LABEL: test_mask_sub_epi16_rr_512: 1949; CHECK: # %bb.0: 1950; CHECK-NEXT: vpsubw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf9,0xc1] 1951; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1952 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1953 ret <32 x i16> %res 1954} 1955 1956define <32 x i16> @test_mask_sub_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 1957; X86-LABEL: test_mask_sub_epi16_rrk_512: 1958; X86: # %bb.0: 1959; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1960; X86-NEXT: vpsubw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf9,0xd1] 1961; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1962; X86-NEXT: retl # encoding: [0xc3] 1963; 1964; X64-LABEL: test_mask_sub_epi16_rrk_512: 1965; X64: # %bb.0: 1966; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1967; X64-NEXT: vpsubw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf9,0xd1] 1968; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1969; X64-NEXT: retq # encoding: [0xc3] 1970 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1971 ret <32 x i16> %res 1972} 1973 1974define <32 x i16> @test_mask_sub_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 1975; X86-LABEL: test_mask_sub_epi16_rrkz_512: 1976; X86: # %bb.0: 1977; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1978; X86-NEXT: vpsubw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0xc1] 1979; X86-NEXT: retl # encoding: [0xc3] 1980; 1981; X64-LABEL: test_mask_sub_epi16_rrkz_512: 1982; X64: # %bb.0: 1983; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1984; X64-NEXT: vpsubw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0xc1] 1985; X64-NEXT: retq # encoding: [0xc3] 1986 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1987 ret <32 x i16> %res 1988} 1989 1990define <32 x i16> @test_mask_sub_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) { 1991; X86-LABEL: test_mask_sub_epi16_rm_512: 1992; X86: # %bb.0: 1993; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1994; X86-NEXT: vpsubw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf9,0x00] 1995; X86-NEXT: retl # encoding: [0xc3] 1996; 1997; X64-LABEL: test_mask_sub_epi16_rm_512: 1998; X64: # %bb.0: 1999; X64-NEXT: vpsubw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf9,0x07] 2000; X64-NEXT: retq # encoding: [0xc3] 2001 %b = load <32 x i16>, ptr %ptr_b 2002 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 2003 ret <32 x i16> %res 2004} 2005 2006define <32 x i16> @test_mask_sub_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) { 2007; X86-LABEL: test_mask_sub_epi16_rmk_512: 2008; X86: # %bb.0: 2009; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2010; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 2011; X86-NEXT: vpsubw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf9,0x08] 2012; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 2013; X86-NEXT: retl # encoding: [0xc3] 2014; 2015; X64-LABEL: test_mask_sub_epi16_rmk_512: 2016; X64: # %bb.0: 2017; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2018; X64-NEXT: vpsubw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf9,0x0f] 2019; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 2020; X64-NEXT: retq # encoding: [0xc3] 2021 %b = load <32 x i16>, ptr %ptr_b 2022 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 2023 ret <32 x i16> %res 2024} 2025 2026define <32 x i16> @test_mask_sub_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i32 %mask) { 2027; X86-LABEL: test_mask_sub_epi16_rmkz_512: 2028; X86: # %bb.0: 2029; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2030; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 2031; X86-NEXT: vpsubw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0x00] 2032; X86-NEXT: retl # encoding: [0xc3] 2033; 2034; X64-LABEL: test_mask_sub_epi16_rmkz_512: 2035; X64: # %bb.0: 2036; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2037; X64-NEXT: vpsubw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0x07] 2038; X64-NEXT: retq # encoding: [0xc3] 2039 %b = load <32 x i16>, ptr %ptr_b 2040 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 2041 ret <32 x i16> %res 2042} 2043 2044declare <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2045 2046define <32 x i16> @test_mask_mullo_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 2047; CHECK-LABEL: test_mask_mullo_epi16_rr_512: 2048; CHECK: # %bb.0: 2049; CHECK-NEXT: vpmullw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd5,0xc1] 2050; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2051 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 2052 ret <32 x i16> %res 2053} 2054 2055define <32 x i16> @test_mask_mullo_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 2056; X86-LABEL: test_mask_mullo_epi16_rrk_512: 2057; X86: # %bb.0: 2058; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2059; X86-NEXT: vpmullw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd5,0xd1] 2060; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 2061; X86-NEXT: retl # encoding: [0xc3] 2062; 2063; X64-LABEL: test_mask_mullo_epi16_rrk_512: 2064; X64: # %bb.0: 2065; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2066; X64-NEXT: vpmullw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd5,0xd1] 2067; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 2068; X64-NEXT: retq # encoding: [0xc3] 2069 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 2070 ret <32 x i16> %res 2071} 2072 2073define <32 x i16> @test_mask_mullo_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 2074; X86-LABEL: test_mask_mullo_epi16_rrkz_512: 2075; X86: # %bb.0: 2076; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2077; X86-NEXT: vpmullw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0xc1] 2078; X86-NEXT: retl # encoding: [0xc3] 2079; 2080; X64-LABEL: test_mask_mullo_epi16_rrkz_512: 2081; X64: # %bb.0: 2082; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2083; X64-NEXT: vpmullw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0xc1] 2084; X64-NEXT: retq # encoding: [0xc3] 2085 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 2086 ret <32 x i16> %res 2087} 2088 2089define <32 x i16> @test_mask_mullo_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) { 2090; X86-LABEL: test_mask_mullo_epi16_rm_512: 2091; X86: # %bb.0: 2092; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2093; X86-NEXT: vpmullw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd5,0x00] 2094; X86-NEXT: retl # encoding: [0xc3] 2095; 2096; X64-LABEL: test_mask_mullo_epi16_rm_512: 2097; X64: # %bb.0: 2098; X64-NEXT: vpmullw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd5,0x07] 2099; X64-NEXT: retq # encoding: [0xc3] 2100 %b = load <32 x i16>, ptr %ptr_b 2101 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 2102 ret <32 x i16> %res 2103} 2104 2105define <32 x i16> @test_mask_mullo_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) { 2106; X86-LABEL: test_mask_mullo_epi16_rmk_512: 2107; X86: # %bb.0: 2108; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2109; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 2110; X86-NEXT: vpmullw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd5,0x08] 2111; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 2112; X86-NEXT: retl # encoding: [0xc3] 2113; 2114; X64-LABEL: test_mask_mullo_epi16_rmk_512: 2115; X64: # %bb.0: 2116; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2117; X64-NEXT: vpmullw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd5,0x0f] 2118; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 2119; X64-NEXT: retq # encoding: [0xc3] 2120 %b = load <32 x i16>, ptr %ptr_b 2121 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 2122 ret <32 x i16> %res 2123} 2124 2125define <32 x i16> @test_mask_mullo_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i32 %mask) { 2126; X86-LABEL: test_mask_mullo_epi16_rmkz_512: 2127; X86: # %bb.0: 2128; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2129; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 2130; X86-NEXT: vpmullw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0x00] 2131; X86-NEXT: retl # encoding: [0xc3] 2132; 2133; X64-LABEL: test_mask_mullo_epi16_rmkz_512: 2134; X64: # %bb.0: 2135; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2136; X64-NEXT: vpmullw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0x07] 2137; X64-NEXT: retq # encoding: [0xc3] 2138 %b = load <32 x i16>, ptr %ptr_b 2139 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 2140 ret <32 x i16> %res 2141} 2142 2143declare <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2144 2145define <8 x i16> @test_mask_mullo_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 2146; CHECK-LABEL: test_mask_mullo_epi16_rr_128: 2147; CHECK: # %bb.0: 2148; CHECK-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd5,0xc1] 2149; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2150 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 2151 ret <8 x i16> %res 2152} 2153 2154define <8 x i16> @test_mask_mullo_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 2155; X86-LABEL: test_mask_mullo_epi16_rrk_128: 2156; X86: # %bb.0: 2157; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2158; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2159; X86-NEXT: vpmullw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd5,0xd1] 2160; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2161; X86-NEXT: retl # encoding: [0xc3] 2162; 2163; X64-LABEL: test_mask_mullo_epi16_rrk_128: 2164; X64: # %bb.0: 2165; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2166; X64-NEXT: vpmullw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd5,0xd1] 2167; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2168; X64-NEXT: retq # encoding: [0xc3] 2169 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 2170 ret <8 x i16> %res 2171} 2172 2173define <8 x i16> @test_mask_mullo_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 2174; X86-LABEL: test_mask_mullo_epi16_rrkz_128: 2175; X86: # %bb.0: 2176; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2177; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2178; X86-NEXT: vpmullw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd5,0xc1] 2179; X86-NEXT: retl # encoding: [0xc3] 2180; 2181; X64-LABEL: test_mask_mullo_epi16_rrkz_128: 2182; X64: # %bb.0: 2183; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2184; X64-NEXT: vpmullw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd5,0xc1] 2185; X64-NEXT: retq # encoding: [0xc3] 2186 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 2187 ret <8 x i16> %res 2188} 2189 2190define <8 x i16> @test_mask_mullo_epi16_rm_128(<8 x i16> %a, ptr %ptr_b) { 2191; X86-LABEL: test_mask_mullo_epi16_rm_128: 2192; X86: # %bb.0: 2193; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2194; X86-NEXT: vpmullw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd5,0x00] 2195; X86-NEXT: retl # encoding: [0xc3] 2196; 2197; X64-LABEL: test_mask_mullo_epi16_rm_128: 2198; X64: # %bb.0: 2199; X64-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd5,0x07] 2200; X64-NEXT: retq # encoding: [0xc3] 2201 %b = load <8 x i16>, ptr %ptr_b 2202 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 2203 ret <8 x i16> %res 2204} 2205 2206define <8 x i16> @test_mask_mullo_epi16_rmk_128(<8 x i16> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) { 2207; X86-LABEL: test_mask_mullo_epi16_rmk_128: 2208; X86: # %bb.0: 2209; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2210; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 2211; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 2212; X86-NEXT: vpmullw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd5,0x08] 2213; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 2214; X86-NEXT: retl # encoding: [0xc3] 2215; 2216; X64-LABEL: test_mask_mullo_epi16_rmk_128: 2217; X64: # %bb.0: 2218; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2219; X64-NEXT: vpmullw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd5,0x0f] 2220; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 2221; X64-NEXT: retq # encoding: [0xc3] 2222 %b = load <8 x i16>, ptr %ptr_b 2223 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 2224 ret <8 x i16> %res 2225} 2226 2227define <8 x i16> @test_mask_mullo_epi16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i8 %mask) { 2228; X86-LABEL: test_mask_mullo_epi16_rmkz_128: 2229; X86: # %bb.0: 2230; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2231; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 2232; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 2233; X86-NEXT: vpmullw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd5,0x00] 2234; X86-NEXT: retl # encoding: [0xc3] 2235; 2236; X64-LABEL: test_mask_mullo_epi16_rmkz_128: 2237; X64: # %bb.0: 2238; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2239; X64-NEXT: vpmullw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd5,0x07] 2240; X64-NEXT: retq # encoding: [0xc3] 2241 %b = load <8 x i16>, ptr %ptr_b 2242 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 2243 ret <8 x i16> %res 2244} 2245 2246declare <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 2247 2248define <16 x i16> @test_mask_mullo_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 2249; CHECK-LABEL: test_mask_mullo_epi16_rr_256: 2250; CHECK: # %bb.0: 2251; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd5,0xc1] 2252; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2253 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 2254 ret <16 x i16> %res 2255} 2256 2257define <16 x i16> @test_mask_mullo_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 2258; X86-LABEL: test_mask_mullo_epi16_rrk_256: 2259; X86: # %bb.0: 2260; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2261; X86-NEXT: vpmullw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd5,0xd1] 2262; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2263; X86-NEXT: retl # encoding: [0xc3] 2264; 2265; X64-LABEL: test_mask_mullo_epi16_rrk_256: 2266; X64: # %bb.0: 2267; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2268; X64-NEXT: vpmullw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd5,0xd1] 2269; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2270; X64-NEXT: retq # encoding: [0xc3] 2271 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 2272 ret <16 x i16> %res 2273} 2274 2275define <16 x i16> @test_mask_mullo_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 2276; X86-LABEL: test_mask_mullo_epi16_rrkz_256: 2277; X86: # %bb.0: 2278; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2279; X86-NEXT: vpmullw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0xc1] 2280; X86-NEXT: retl # encoding: [0xc3] 2281; 2282; X64-LABEL: test_mask_mullo_epi16_rrkz_256: 2283; X64: # %bb.0: 2284; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2285; X64-NEXT: vpmullw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0xc1] 2286; X64-NEXT: retq # encoding: [0xc3] 2287 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 2288 ret <16 x i16> %res 2289} 2290 2291define <16 x i16> @test_mask_mullo_epi16_rm_256(<16 x i16> %a, ptr %ptr_b) { 2292; X86-LABEL: test_mask_mullo_epi16_rm_256: 2293; X86: # %bb.0: 2294; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2295; X86-NEXT: vpmullw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd5,0x00] 2296; X86-NEXT: retl # encoding: [0xc3] 2297; 2298; X64-LABEL: test_mask_mullo_epi16_rm_256: 2299; X64: # %bb.0: 2300; X64-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd5,0x07] 2301; X64-NEXT: retq # encoding: [0xc3] 2302 %b = load <16 x i16>, ptr %ptr_b 2303 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 2304 ret <16 x i16> %res 2305} 2306 2307define <16 x i16> @test_mask_mullo_epi16_rmk_256(<16 x i16> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) { 2308; X86-LABEL: test_mask_mullo_epi16_rmk_256: 2309; X86: # %bb.0: 2310; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2311; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 2312; X86-NEXT: vpmullw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd5,0x08] 2313; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 2314; X86-NEXT: retl # encoding: [0xc3] 2315; 2316; X64-LABEL: test_mask_mullo_epi16_rmk_256: 2317; X64: # %bb.0: 2318; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2319; X64-NEXT: vpmullw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd5,0x0f] 2320; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 2321; X64-NEXT: retq # encoding: [0xc3] 2322 %b = load <16 x i16>, ptr %ptr_b 2323 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 2324 ret <16 x i16> %res 2325} 2326 2327define <16 x i16> @test_mask_mullo_epi16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i16 %mask) { 2328; X86-LABEL: test_mask_mullo_epi16_rmkz_256: 2329; X86: # %bb.0: 2330; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2331; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 2332; X86-NEXT: vpmullw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0x00] 2333; X86-NEXT: retl # encoding: [0xc3] 2334; 2335; X64-LABEL: test_mask_mullo_epi16_rmkz_256: 2336; X64: # %bb.0: 2337; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2338; X64-NEXT: vpmullw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0x07] 2339; X64-NEXT: retq # encoding: [0xc3] 2340 %b = load <16 x i16>, ptr %ptr_b 2341 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 2342 ret <16 x i16> %res 2343} 2344 2345declare <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 2346 2347declare <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 2348 2349define <16 x i8>@test_int_x86_avx512_mask_pmaxs_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) { 2350; X86-LABEL: test_int_x86_avx512_mask_pmaxs_b_128: 2351; X86: # %bb.0: 2352; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2353; X86-NEXT: vpmaxsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3c,0xd1] 2354; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2355; X86-NEXT: retl # encoding: [0xc3] 2356; 2357; X64-LABEL: test_int_x86_avx512_mask_pmaxs_b_128: 2358; X64: # %bb.0: 2359; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2360; X64-NEXT: vpmaxsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3c,0xd1] 2361; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2362; X64-NEXT: retq # encoding: [0xc3] 2363 %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2 ,i16 %mask) 2364 ret <16 x i8> %res 2365} 2366 2367define <16 x i8>@test_int_x86_avx512_maskz_pmaxs_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) { 2368; X86-LABEL: test_int_x86_avx512_maskz_pmaxs_b_128: 2369; X86: # %bb.0: 2370; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2371; X86-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3c,0xc1] 2372; X86-NEXT: retl # encoding: [0xc3] 2373; 2374; X64-LABEL: test_int_x86_avx512_maskz_pmaxs_b_128: 2375; X64: # %bb.0: 2376; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2377; X64-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3c,0xc1] 2378; X64-NEXT: retq # encoding: [0xc3] 2379 %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask) 2380 ret <16 x i8> %res 2381} 2382 2383declare <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 2384 2385define <32 x i8>@test_int_x86_avx512_pmaxs_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) { 2386; CHECK-LABEL: test_int_x86_avx512_pmaxs_b_256: 2387; CHECK: # %bb.0: 2388; CHECK-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3c,0xc1] 2389; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2390 %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 2391 ret <32 x i8> %res 2392} 2393 2394define <32 x i8>@test_int_x86_avx512_mask_pmaxs_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 2395; X86-LABEL: test_int_x86_avx512_mask_pmaxs_b_256: 2396; X86: # %bb.0: 2397; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2398; X86-NEXT: vpmaxsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3c,0xd1] 2399; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2400; X86-NEXT: retl # encoding: [0xc3] 2401; 2402; X64-LABEL: test_int_x86_avx512_mask_pmaxs_b_256: 2403; X64: # %bb.0: 2404; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2405; X64-NEXT: vpmaxsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3c,0xd1] 2406; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2407; X64-NEXT: retq # encoding: [0xc3] 2408 %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 2409 ret <32 x i8> %res 2410} 2411 2412declare <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 2413 2414define <8 x i16>@test_int_x86_avx512_pmaxs_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 2415; CHECK-LABEL: test_int_x86_avx512_pmaxs_w_128: 2416; CHECK: # %bb.0: 2417; CHECK-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xee,0xc1] 2418; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2419 %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 2420 ret <8 x i16> %res 2421} 2422 2423define <8 x i16>@test_int_x86_avx512_mask_pmaxs_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 2424; X86-LABEL: test_int_x86_avx512_mask_pmaxs_w_128: 2425; X86: # %bb.0: 2426; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2427; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2428; X86-NEXT: vpmaxsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xee,0xd1] 2429; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2430; X86-NEXT: retl # encoding: [0xc3] 2431; 2432; X64-LABEL: test_int_x86_avx512_mask_pmaxs_w_128: 2433; X64: # %bb.0: 2434; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2435; X64-NEXT: vpmaxsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xee,0xd1] 2436; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2437; X64-NEXT: retq # encoding: [0xc3] 2438 %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 2439 ret <8 x i16> %res 2440} 2441 2442declare <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 2443 2444define <16 x i16>@test_int_x86_avx512_mask_pmaxs_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) { 2445; X86-LABEL: test_int_x86_avx512_mask_pmaxs_w_256: 2446; X86: # %bb.0: 2447; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2448; X86-NEXT: vpmaxsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xee,0xd1] 2449; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2450; X86-NEXT: retl # encoding: [0xc3] 2451; 2452; X64-LABEL: test_int_x86_avx512_mask_pmaxs_w_256: 2453; X64: # %bb.0: 2454; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2455; X64-NEXT: vpmaxsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xee,0xd1] 2456; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2457; X64-NEXT: retq # encoding: [0xc3] 2458 %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) 2459 ret <16 x i16> %res 2460} 2461 2462define <16 x i16>@test_int_x86_avx512_maskz_pmaxs_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %mask) { 2463; X86-LABEL: test_int_x86_avx512_maskz_pmaxs_w_256: 2464; X86: # %bb.0: 2465; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2466; X86-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xee,0xc1] 2467; X86-NEXT: retl # encoding: [0xc3] 2468; 2469; X64-LABEL: test_int_x86_avx512_maskz_pmaxs_w_256: 2470; X64: # %bb.0: 2471; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2472; X64-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xee,0xc1] 2473; X64-NEXT: retq # encoding: [0xc3] 2474 %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask) 2475 ret <16 x i16> %res 2476} 2477 2478declare <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 2479 2480define <16 x i8>@test_int_x86_avx512_mask_pmaxu_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2,i16 %mask) { 2481; X86-LABEL: test_int_x86_avx512_mask_pmaxu_b_128: 2482; X86: # %bb.0: 2483; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2484; X86-NEXT: vpmaxub %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xde,0xd1] 2485; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2486; X86-NEXT: retl # encoding: [0xc3] 2487; 2488; X64-LABEL: test_int_x86_avx512_mask_pmaxu_b_128: 2489; X64: # %bb.0: 2490; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2491; X64-NEXT: vpmaxub %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xde,0xd1] 2492; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2493; X64-NEXT: retq # encoding: [0xc3] 2494 %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) 2495 ret <16 x i8> %res 2496} 2497 2498define <16 x i8>@test_int_x86_avx512_maskz_pmaxu_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) { 2499; X86-LABEL: test_int_x86_avx512_maskz_pmaxu_b_128: 2500; X86: # %bb.0: 2501; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2502; X86-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xde,0xc1] 2503; X86-NEXT: retl # encoding: [0xc3] 2504; 2505; X64-LABEL: test_int_x86_avx512_maskz_pmaxu_b_128: 2506; X64: # %bb.0: 2507; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2508; X64-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xde,0xc1] 2509; X64-NEXT: retq # encoding: [0xc3] 2510 %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask) 2511 ret <16 x i8> %res 2512} 2513 2514declare <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 2515 2516define <32 x i8>@test_int_x86_avx512_pmaxu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) { 2517; CHECK-LABEL: test_int_x86_avx512_pmaxu_b_256: 2518; CHECK: # %bb.0: 2519; CHECK-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xde,0xc1] 2520; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2521 %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 2522 ret <32 x i8> %res 2523} 2524 2525define <32 x i8>@test_int_x86_avx512_mask_pmaxu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 2526; X86-LABEL: test_int_x86_avx512_mask_pmaxu_b_256: 2527; X86: # %bb.0: 2528; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2529; X86-NEXT: vpmaxub %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xde,0xd1] 2530; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2531; X86-NEXT: retl # encoding: [0xc3] 2532; 2533; X64-LABEL: test_int_x86_avx512_mask_pmaxu_b_256: 2534; X64: # %bb.0: 2535; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2536; X64-NEXT: vpmaxub %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xde,0xd1] 2537; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2538; X64-NEXT: retq # encoding: [0xc3] 2539 %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 2540 ret <32 x i8> %res 2541} 2542 2543declare <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 2544 2545define <8 x i16>@test_int_x86_avx512_pmaxu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 2546; CHECK-LABEL: test_int_x86_avx512_pmaxu_w_128: 2547; CHECK: # %bb.0: 2548; CHECK-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3e,0xc1] 2549; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2550 %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 2551 ret <8 x i16> %res 2552} 2553 2554define <8 x i16>@test_int_x86_avx512_mask_pmaxu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 2555; X86-LABEL: test_int_x86_avx512_mask_pmaxu_w_128: 2556; X86: # %bb.0: 2557; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2558; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2559; X86-NEXT: vpmaxuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3e,0xd1] 2560; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2561; X86-NEXT: retl # encoding: [0xc3] 2562; 2563; X64-LABEL: test_int_x86_avx512_mask_pmaxu_w_128: 2564; X64: # %bb.0: 2565; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2566; X64-NEXT: vpmaxuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3e,0xd1] 2567; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2568; X64-NEXT: retq # encoding: [0xc3] 2569 %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 2570 ret <8 x i16> %res 2571} 2572 2573declare <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 2574 2575define <16 x i16>@test_int_x86_avx512_mask_pmaxu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) { 2576; X86-LABEL: test_int_x86_avx512_mask_pmaxu_w_256: 2577; X86: # %bb.0: 2578; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2579; X86-NEXT: vpmaxuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3e,0xd1] 2580; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2581; X86-NEXT: retl # encoding: [0xc3] 2582; 2583; X64-LABEL: test_int_x86_avx512_mask_pmaxu_w_256: 2584; X64: # %bb.0: 2585; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2586; X64-NEXT: vpmaxuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3e,0xd1] 2587; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2588; X64-NEXT: retq # encoding: [0xc3] 2589 %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) 2590 ret <16 x i16> %res 2591} 2592 2593define <16 x i16>@test_int_x86_avx512_maskz_pmaxu_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %mask) { 2594; X86-LABEL: test_int_x86_avx512_maskz_pmaxu_w_256: 2595; X86: # %bb.0: 2596; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2597; X86-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x3e,0xc1] 2598; X86-NEXT: retl # encoding: [0xc3] 2599; 2600; X64-LABEL: test_int_x86_avx512_maskz_pmaxu_w_256: 2601; X64: # %bb.0: 2602; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2603; X64-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x3e,0xc1] 2604; X64-NEXT: retq # encoding: [0xc3] 2605 %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask) 2606 ret <16 x i16> %res 2607} 2608 2609declare <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 2610 2611define <16 x i8>@test_int_x86_avx512_mask_pmins_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) { 2612; X86-LABEL: test_int_x86_avx512_mask_pmins_b_128: 2613; X86: # %bb.0: 2614; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2615; X86-NEXT: vpminsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x38,0xd1] 2616; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2617; X86-NEXT: retl # encoding: [0xc3] 2618; 2619; X64-LABEL: test_int_x86_avx512_mask_pmins_b_128: 2620; X64: # %bb.0: 2621; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2622; X64-NEXT: vpminsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x38,0xd1] 2623; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2624; X64-NEXT: retq # encoding: [0xc3] 2625 %res = call <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) 2626 ret <16 x i8> %res 2627} 2628 2629define <16 x i8>@test_int_x86_avx512_maskz_pmins_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) { 2630; X86-LABEL: test_int_x86_avx512_maskz_pmins_b_128: 2631; X86: # %bb.0: 2632; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2633; X86-NEXT: vpminsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x38,0xc1] 2634; X86-NEXT: retl # encoding: [0xc3] 2635; 2636; X64-LABEL: test_int_x86_avx512_maskz_pmins_b_128: 2637; X64: # %bb.0: 2638; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2639; X64-NEXT: vpminsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x38,0xc1] 2640; X64-NEXT: retq # encoding: [0xc3] 2641 %res = call <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask) 2642 ret <16 x i8> %res 2643} 2644 2645declare <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 2646 2647define <32 x i8>@test_int_x86_avx512_pmins_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) { 2648; CHECK-LABEL: test_int_x86_avx512_pmins_b_256: 2649; CHECK: # %bb.0: 2650; CHECK-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x38,0xc1] 2651; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2652 %res = call <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 2653 ret <32 x i8> %res 2654} 2655 2656define <32 x i8>@test_int_x86_avx512_mask_pmins_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 2657; X86-LABEL: test_int_x86_avx512_mask_pmins_b_256: 2658; X86: # %bb.0: 2659; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2660; X86-NEXT: vpminsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x38,0xd1] 2661; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2662; X86-NEXT: retl # encoding: [0xc3] 2663; 2664; X64-LABEL: test_int_x86_avx512_mask_pmins_b_256: 2665; X64: # %bb.0: 2666; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2667; X64-NEXT: vpminsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x38,0xd1] 2668; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2669; X64-NEXT: retq # encoding: [0xc3] 2670 %res = call <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 2671 ret <32 x i8> %res 2672} 2673 2674declare <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 2675 2676define <8 x i16>@test_int_x86_avx512_pmins_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 2677; CHECK-LABEL: test_int_x86_avx512_pmins_w_128: 2678; CHECK: # %bb.0: 2679; CHECK-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xea,0xc1] 2680; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2681 %res = call <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 2682 ret <8 x i16> %res 2683} 2684 2685define <8 x i16>@test_int_x86_avx512_mask_pmins_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 2686; X86-LABEL: test_int_x86_avx512_mask_pmins_w_128: 2687; X86: # %bb.0: 2688; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2689; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2690; X86-NEXT: vpminsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xea,0xd1] 2691; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2692; X86-NEXT: retl # encoding: [0xc3] 2693; 2694; X64-LABEL: test_int_x86_avx512_mask_pmins_w_128: 2695; X64: # %bb.0: 2696; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2697; X64-NEXT: vpminsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xea,0xd1] 2698; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2699; X64-NEXT: retq # encoding: [0xc3] 2700 %res = call <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 2701 ret <8 x i16> %res 2702} 2703 2704declare <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 2705 2706define <16 x i16>@test_int_x86_avx512_mask_pmins_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) { 2707; X86-LABEL: test_int_x86_avx512_mask_pmins_w_256: 2708; X86: # %bb.0: 2709; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2710; X86-NEXT: vpminsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xea,0xd1] 2711; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2712; X86-NEXT: retl # encoding: [0xc3] 2713; 2714; X64-LABEL: test_int_x86_avx512_mask_pmins_w_256: 2715; X64: # %bb.0: 2716; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2717; X64-NEXT: vpminsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xea,0xd1] 2718; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2719; X64-NEXT: retq # encoding: [0xc3] 2720 %res = call <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) 2721 ret <16 x i16> %res 2722} 2723 2724define <16 x i16>@test_int_x86_avx512_maskz_pmins_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %mask) { 2725; X86-LABEL: test_int_x86_avx512_maskz_pmins_w_256: 2726; X86: # %bb.0: 2727; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2728; X86-NEXT: vpminsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xea,0xc1] 2729; X86-NEXT: retl # encoding: [0xc3] 2730; 2731; X64-LABEL: test_int_x86_avx512_maskz_pmins_w_256: 2732; X64: # %bb.0: 2733; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2734; X64-NEXT: vpminsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xea,0xc1] 2735; X64-NEXT: retq # encoding: [0xc3] 2736 %res = call <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask) 2737 ret <16 x i16> %res 2738} 2739 2740declare <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 2741 2742define <16 x i8>@test_int_x86_avx512_mask_pminu_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) { 2743; X86-LABEL: test_int_x86_avx512_mask_pminu_b_128: 2744; X86: # %bb.0: 2745; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2746; X86-NEXT: vpminub %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xda,0xd1] 2747; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2748; X86-NEXT: retl # encoding: [0xc3] 2749; 2750; X64-LABEL: test_int_x86_avx512_mask_pminu_b_128: 2751; X64: # %bb.0: 2752; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2753; X64-NEXT: vpminub %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xda,0xd1] 2754; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2755; X64-NEXT: retq # encoding: [0xc3] 2756 %res = call <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) 2757 ret <16 x i8> %res 2758} 2759 2760define <16 x i8>@test_int_x86_avx512_maskz_pminu_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) { 2761; X86-LABEL: test_int_x86_avx512_maskz_pminu_b_128: 2762; X86: # %bb.0: 2763; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2764; X86-NEXT: vpminub %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xda,0xc1] 2765; X86-NEXT: retl # encoding: [0xc3] 2766; 2767; X64-LABEL: test_int_x86_avx512_maskz_pminu_b_128: 2768; X64: # %bb.0: 2769; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2770; X64-NEXT: vpminub %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xda,0xc1] 2771; X64-NEXT: retq # encoding: [0xc3] 2772 %res = call <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask) 2773 ret <16 x i8> %res 2774} 2775 2776declare <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 2777 2778define <32 x i8>@test_int_x86_avx512_pminu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) { 2779; CHECK-LABEL: test_int_x86_avx512_pminu_b_256: 2780; CHECK: # %bb.0: 2781; CHECK-NEXT: vpminub %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xda,0xc1] 2782; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2783 %res = call <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 2784 ret <32 x i8> %res 2785} 2786 2787define <32 x i8>@test_int_x86_avx512_mask_pminu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 2788; X86-LABEL: test_int_x86_avx512_mask_pminu_b_256: 2789; X86: # %bb.0: 2790; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2791; X86-NEXT: vpminub %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xda,0xd1] 2792; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2793; X86-NEXT: retl # encoding: [0xc3] 2794; 2795; X64-LABEL: test_int_x86_avx512_mask_pminu_b_256: 2796; X64: # %bb.0: 2797; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2798; X64-NEXT: vpminub %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xda,0xd1] 2799; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2800; X64-NEXT: retq # encoding: [0xc3] 2801 %res = call <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 2802 ret <32 x i8> %res 2803} 2804 2805declare <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 2806 2807define <8 x i16>@test_int_x86_avx512_pminu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 2808; CHECK-LABEL: test_int_x86_avx512_pminu_w_128: 2809; CHECK: # %bb.0: 2810; CHECK-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3a,0xc1] 2811; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2812 %res = call <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 2813 ret <8 x i16> %res 2814} 2815 2816define <8 x i16>@test_int_x86_avx512_mask_pminu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 2817; X86-LABEL: test_int_x86_avx512_mask_pminu_w_128: 2818; X86: # %bb.0: 2819; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2820; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2821; X86-NEXT: vpminuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3a,0xd1] 2822; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2823; X86-NEXT: retl # encoding: [0xc3] 2824; 2825; X64-LABEL: test_int_x86_avx512_mask_pminu_w_128: 2826; X64: # %bb.0: 2827; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2828; X64-NEXT: vpminuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3a,0xd1] 2829; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2830; X64-NEXT: retq # encoding: [0xc3] 2831 %res = call <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 2832 ret <8 x i16> %res 2833} 2834 2835declare <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 2836 2837define <16 x i16>@test_int_x86_avx512_mask_pminu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) { 2838; X86-LABEL: test_int_x86_avx512_mask_pminu_w_256: 2839; X86: # %bb.0: 2840; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2841; X86-NEXT: vpminuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3a,0xd1] 2842; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2843; X86-NEXT: retl # encoding: [0xc3] 2844; 2845; X64-LABEL: test_int_x86_avx512_mask_pminu_w_256: 2846; X64: # %bb.0: 2847; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2848; X64-NEXT: vpminuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3a,0xd1] 2849; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2850; X64-NEXT: retq # encoding: [0xc3] 2851 %res = call <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) 2852 ret <16 x i16> %res 2853} 2854 2855define <16 x i16>@test_int_x86_avx512_maskz_pminu_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %mask) { 2856; X86-LABEL: test_int_x86_avx512_maskz_pminu_w_256: 2857; X86: # %bb.0: 2858; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2859; X86-NEXT: vpminuw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x3a,0xc1] 2860; X86-NEXT: retl # encoding: [0xc3] 2861; 2862; X64-LABEL: test_int_x86_avx512_maskz_pminu_w_256: 2863; X64: # %bb.0: 2864; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2865; X64-NEXT: vpminuw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x3a,0xc1] 2866; X64-NEXT: retq # encoding: [0xc3] 2867 %res = call <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask) 2868 ret <16 x i16> %res 2869} 2870 2871declare <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 2872 2873define <8 x i16>@test_int_x86_avx512_psrl_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 2874; CHECK-LABEL: test_int_x86_avx512_psrl_w_128: 2875; CHECK: # %bb.0: 2876; CHECK-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0xc1] 2877; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2878 %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 2879 ret <8 x i16> %res 2880} 2881 2882define <8 x i16>@test_int_x86_avx512_mask_psrl_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 2883; X86-LABEL: test_int_x86_avx512_mask_psrl_w_128: 2884; X86: # %bb.0: 2885; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2886; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2887; X86-NEXT: vpsrlw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd1,0xd1] 2888; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2889; X86-NEXT: retl # encoding: [0xc3] 2890; 2891; X64-LABEL: test_int_x86_avx512_mask_psrl_w_128: 2892; X64: # %bb.0: 2893; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2894; X64-NEXT: vpsrlw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd1,0xd1] 2895; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2896; X64-NEXT: retq # encoding: [0xc3] 2897 %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 2898 ret <8 x i16> %res 2899} 2900 2901 2902define <8 x i16>@test_int_x86_avx512_maskz_psrl_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) { 2903; X86-LABEL: test_int_x86_avx512_maskz_psrl_w_128: 2904; X86: # %bb.0: 2905; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2906; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2907; X86-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd1,0xc1] 2908; X86-NEXT: retl # encoding: [0xc3] 2909; 2910; X64-LABEL: test_int_x86_avx512_maskz_psrl_w_128: 2911; X64: # %bb.0: 2912; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2913; X64-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd1,0xc1] 2914; X64-NEXT: retq # encoding: [0xc3] 2915 %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) 2916 ret <8 x i16> %res 2917} 2918 2919declare <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16>, <8 x i16>, <16 x i16>, i16) 2920 2921define <16 x i16>@test_int_x86_avx512_psrl_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2) { 2922; CHECK-LABEL: test_int_x86_avx512_psrl_w_256: 2923; CHECK: # %bb.0: 2924; CHECK-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd1,0xc1] 2925; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2926 %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 -1) 2927 ret <16 x i16> %res 2928} 2929 2930define <16 x i16>@test_int_x86_avx512_mask_psrl_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) { 2931; X86-LABEL: test_int_x86_avx512_mask_psrl_w_256: 2932; X86: # %bb.0: 2933; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2934; X86-NEXT: vpsrlw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd1,0xd1] 2935; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2936; X86-NEXT: retl # encoding: [0xc3] 2937; 2938; X64-LABEL: test_int_x86_avx512_mask_psrl_w_256: 2939; X64: # %bb.0: 2940; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2941; X64-NEXT: vpsrlw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd1,0xd1] 2942; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2943; X64-NEXT: retq # encoding: [0xc3] 2944 %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) 2945 ret <16 x i16> %res 2946} 2947 2948define <16 x i16>@test_int_x86_avx512_maskz_psrl_w_256(<16 x i16> %x0, <8 x i16> %x1, i16 %x3) { 2949; X86-LABEL: test_int_x86_avx512_maskz_psrl_w_256: 2950; X86: # %bb.0: 2951; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2952; X86-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd1,0xc1] 2953; X86-NEXT: retl # encoding: [0xc3] 2954; 2955; X64-LABEL: test_int_x86_avx512_maskz_psrl_w_256: 2956; X64: # %bb.0: 2957; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2958; X64-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd1,0xc1] 2959; X64-NEXT: retq # encoding: [0xc3] 2960 %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) 2961 ret <16 x i16> %res 2962} 2963 2964declare <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 2965 2966define <8 x i16>@test_int_x86_avx512_psra_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 2967; CHECK-LABEL: test_int_x86_avx512_psra_w_128: 2968; CHECK: # %bb.0: 2969; CHECK-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe1,0xc1] 2970; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2971 %res = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 2972 ret <8 x i16> %res 2973} 2974 2975define <8 x i16>@test_int_x86_avx512_mask_psra_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 2976; X86-LABEL: test_int_x86_avx512_mask_psra_w_128: 2977; X86: # %bb.0: 2978; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2979; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2980; X86-NEXT: vpsraw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe1,0xd1] 2981; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2982; X86-NEXT: retl # encoding: [0xc3] 2983; 2984; X64-LABEL: test_int_x86_avx512_mask_psra_w_128: 2985; X64: # %bb.0: 2986; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2987; X64-NEXT: vpsraw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe1,0xd1] 2988; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2989; X64-NEXT: retq # encoding: [0xc3] 2990 %res = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 2991 ret <8 x i16> %res 2992} 2993 2994define <8 x i16>@test_int_x86_avx512_maskz_psra_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) { 2995; X86-LABEL: test_int_x86_avx512_maskz_psra_w_128: 2996; X86: # %bb.0: 2997; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2998; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2999; X86-NEXT: vpsraw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe1,0xc1] 3000; X86-NEXT: retl # encoding: [0xc3] 3001; 3002; X64-LABEL: test_int_x86_avx512_maskz_psra_w_128: 3003; X64: # %bb.0: 3004; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3005; X64-NEXT: vpsraw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe1,0xc1] 3006; X64-NEXT: retq # encoding: [0xc3] 3007 %res = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) 3008 ret <8 x i16> %res 3009} 3010 3011declare <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16>, <8 x i16>, <16 x i16>, i16) 3012 3013define <16 x i16>@test_int_x86_avx512_psra_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2) { 3014; CHECK-LABEL: test_int_x86_avx512_psra_w_256: 3015; CHECK: # %bb.0: 3016; CHECK-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe1,0xc1] 3017; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3018 %res = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 -1) 3019 ret <16 x i16> %res 3020} 3021 3022define <16 x i16>@test_int_x86_avx512_mask_psra_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) { 3023; X86-LABEL: test_int_x86_avx512_mask_psra_w_256: 3024; X86: # %bb.0: 3025; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3026; X86-NEXT: vpsraw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe1,0xd1] 3027; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3028; X86-NEXT: retl # encoding: [0xc3] 3029; 3030; X64-LABEL: test_int_x86_avx512_mask_psra_w_256: 3031; X64: # %bb.0: 3032; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3033; X64-NEXT: vpsraw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe1,0xd1] 3034; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3035; X64-NEXT: retq # encoding: [0xc3] 3036 %res = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) 3037 ret <16 x i16> %res 3038} 3039 3040define <16 x i16>@test_int_x86_avx512_maskz_psra_w_256(<16 x i16> %x0, <8 x i16> %x1, i16 %x3) { 3041; X86-LABEL: test_int_x86_avx512_maskz_psra_w_256: 3042; X86: # %bb.0: 3043; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3044; X86-NEXT: vpsraw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe1,0xc1] 3045; X86-NEXT: retl # encoding: [0xc3] 3046; 3047; X64-LABEL: test_int_x86_avx512_maskz_psra_w_256: 3048; X64: # %bb.0: 3049; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3050; X64-NEXT: vpsraw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe1,0xc1] 3051; X64-NEXT: retq # encoding: [0xc3] 3052 %res = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) 3053 ret <16 x i16> %res 3054} 3055 3056declare <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 3057 3058define <8 x i16>@test_int_x86_avx512_psll_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 3059; CHECK-LABEL: test_int_x86_avx512_psll_w_128: 3060; CHECK: # %bb.0: 3061; CHECK-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf1,0xc1] 3062; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3063 %res = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 3064 ret <8 x i16> %res 3065} 3066 3067define <8 x i16>@test_int_x86_avx512_mask_psll_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 3068; X86-LABEL: test_int_x86_avx512_mask_psll_w_128: 3069; X86: # %bb.0: 3070; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3071; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3072; X86-NEXT: vpsllw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf1,0xd1] 3073; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 3074; X86-NEXT: retl # encoding: [0xc3] 3075; 3076; X64-LABEL: test_int_x86_avx512_mask_psll_w_128: 3077; X64: # %bb.0: 3078; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3079; X64-NEXT: vpsllw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf1,0xd1] 3080; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 3081; X64-NEXT: retq # encoding: [0xc3] 3082 %res = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 3083 ret <8 x i16> %res 3084} 3085 3086define <8 x i16>@test_int_x86_avx512_maskz_psll_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) { 3087; X86-LABEL: test_int_x86_avx512_maskz_psll_w_128: 3088; X86: # %bb.0: 3089; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3090; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3091; X86-NEXT: vpsllw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf1,0xc1] 3092; X86-NEXT: retl # encoding: [0xc3] 3093; 3094; X64-LABEL: test_int_x86_avx512_maskz_psll_w_128: 3095; X64: # %bb.0: 3096; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3097; X64-NEXT: vpsllw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf1,0xc1] 3098; X64-NEXT: retq # encoding: [0xc3] 3099 %res = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) 3100 ret <8 x i16> %res 3101} 3102 3103declare <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16>, <8 x i16>, <16 x i16>, i16) 3104 3105define <16 x i16>@test_int_x86_avx512_psll_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2) { 3106; CHECK-LABEL: test_int_x86_avx512_psll_w_256: 3107; CHECK: # %bb.0: 3108; CHECK-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf1,0xc1] 3109; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3110 %res = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 -1) 3111 ret <16 x i16> %res 3112} 3113 3114define <16 x i16>@test_int_x86_avx512_mask_psll_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) { 3115; X86-LABEL: test_int_x86_avx512_mask_psll_w_256: 3116; X86: # %bb.0: 3117; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3118; X86-NEXT: vpsllw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf1,0xd1] 3119; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3120; X86-NEXT: retl # encoding: [0xc3] 3121; 3122; X64-LABEL: test_int_x86_avx512_mask_psll_w_256: 3123; X64: # %bb.0: 3124; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3125; X64-NEXT: vpsllw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf1,0xd1] 3126; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3127; X64-NEXT: retq # encoding: [0xc3] 3128 %res = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) 3129 ret <16 x i16> %res 3130} 3131 3132define <16 x i16>@test_int_x86_avx512_maskz_psll_w_256(<16 x i16> %x0, <8 x i16> %x1, i16 %x3) { 3133; X86-LABEL: test_int_x86_avx512_maskz_psll_w_256: 3134; X86: # %bb.0: 3135; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3136; X86-NEXT: vpsllw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf1,0xc1] 3137; X86-NEXT: retl # encoding: [0xc3] 3138; 3139; X64-LABEL: test_int_x86_avx512_maskz_psll_w_256: 3140; X64: # %bb.0: 3141; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3142; X64-NEXT: vpsllw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf1,0xc1] 3143; X64-NEXT: retq # encoding: [0xc3] 3144 %res = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) 3145 ret <16 x i16> %res 3146} 3147 3148declare <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16>, i32, <8 x i16>, i8) 3149 3150define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_psrl_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { 3151; X86-LABEL: test_int_x86_avx512_mask_psrl_wi_128: 3152; X86: # %bb.0: 3153; X86-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] 3154; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 3155; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3156; X86-NEXT: vpsrlw $3, %xmm0, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0x71,0xd0,0x03] 3157; X86-NEXT: vpsrlw $4, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x71,0xd0,0x04] 3158; X86-NEXT: vpsrlw $5, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x71,0xd0,0x05] 3159; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] 3160; X86-NEXT: retl # encoding: [0xc3] 3161; 3162; X64-LABEL: test_int_x86_avx512_mask_psrl_wi_128: 3163; X64: # %bb.0: 3164; X64-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] 3165; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3166; X64-NEXT: vpsrlw $3, %xmm0, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0x71,0xd0,0x03] 3167; X64-NEXT: vpsrlw $4, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x71,0xd0,0x04] 3168; X64-NEXT: vpsrlw $5, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x71,0xd0,0x05] 3169; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] 3170; X64-NEXT: retq # encoding: [0xc3] 3171 %res0 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) 3172 %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 4, <8 x i16> %x2, i8 -1) 3173 %res2 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 5, <8 x i16> zeroinitializer, i8 %x3) 3174 %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0 3175 %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res3, <8 x i16> %res1, 1 3176 %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res4, <8 x i16> %res2, 2 3177 ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5 3178} 3179 3180declare <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16>, i32, <16 x i16>, i16) 3181 3182define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_psrl_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { 3183; X86-LABEL: test_int_x86_avx512_mask_psrl_wi_256: 3184; X86: # %bb.0: 3185; X86-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] 3186; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 3187; X86-NEXT: vpsrlw $3, %ymm0, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0x71,0xd0,0x03] 3188; X86-NEXT: vpsrlw $4, %ymm0, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x71,0xd0,0x04] 3189; X86-NEXT: vpsrlw $5, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xd0,0x05] 3190; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] 3191; X86-NEXT: retl # encoding: [0xc3] 3192; 3193; X64-LABEL: test_int_x86_avx512_mask_psrl_wi_256: 3194; X64: # %bb.0: 3195; X64-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] 3196; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3197; X64-NEXT: vpsrlw $3, %ymm0, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0x71,0xd0,0x03] 3198; X64-NEXT: vpsrlw $4, %ymm0, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x71,0xd0,0x04] 3199; X64-NEXT: vpsrlw $5, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xd0,0x05] 3200; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] 3201; X64-NEXT: retq # encoding: [0xc3] 3202 %res0 = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) 3203 %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 4, <16 x i16> %x2, i16 -1) 3204 %res2 = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 5, <16 x i16> zeroinitializer, i16 %x3) 3205 %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res0, 0 3206 %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res3, <16 x i16> %res1, 1 3207 %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res4, <16 x i16> %res2, 2 3208 ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5 3209} 3210 3211declare <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16>, i32, <8 x i16>, i8) 3212 3213define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_psra_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { 3214; X86-LABEL: test_int_x86_avx512_mask_psra_wi_128: 3215; X86: # %bb.0: 3216; X86-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] 3217; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 3218; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3219; X86-NEXT: vpsraw $3, %xmm0, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0x71,0xe0,0x03] 3220; X86-NEXT: vpsraw $4, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0x89,0x71,0xe0,0x04] 3221; X86-NEXT: vpsraw $5, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xe0,0x05] 3222; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] 3223; X86-NEXT: retl # encoding: [0xc3] 3224; 3225; X64-LABEL: test_int_x86_avx512_mask_psra_wi_128: 3226; X64: # %bb.0: 3227; X64-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] 3228; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3229; X64-NEXT: vpsraw $3, %xmm0, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0x71,0xe0,0x03] 3230; X64-NEXT: vpsraw $4, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0x89,0x71,0xe0,0x04] 3231; X64-NEXT: vpsraw $5, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xe0,0x05] 3232; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] 3233; X64-NEXT: retq # encoding: [0xc3] 3234 %res0 = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) 3235 %res1 = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 4, <8 x i16> zeroinitializer, i8 %x3) 3236 %res2 = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 5, <8 x i16> %x2, i8 -1) 3237 %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0 3238 %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res3, <8 x i16> %res1, 1 3239 %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res4, <8 x i16> %res2, 2 3240 ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5 3241} 3242 3243declare <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16>, i32, <16 x i16>, i16) 3244 3245define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_psra_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { 3246; X86-LABEL: test_int_x86_avx512_mask_psra_wi_256: 3247; X86: # %bb.0: 3248; X86-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] 3249; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 3250; X86-NEXT: vpsraw $3, %ymm0, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0x71,0xe0,0x03] 3251; X86-NEXT: vpsraw $4, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0xa9,0x71,0xe0,0x04] 3252; X86-NEXT: vpsraw $5, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xe0,0x05] 3253; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] 3254; X86-NEXT: retl # encoding: [0xc3] 3255; 3256; X64-LABEL: test_int_x86_avx512_mask_psra_wi_256: 3257; X64: # %bb.0: 3258; X64-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] 3259; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3260; X64-NEXT: vpsraw $3, %ymm0, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0x71,0xe0,0x03] 3261; X64-NEXT: vpsraw $4, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0xa9,0x71,0xe0,0x04] 3262; X64-NEXT: vpsraw $5, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xe0,0x05] 3263; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] 3264; X64-NEXT: retq # encoding: [0xc3] 3265 %res = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) 3266 %res1 = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 4, <16 x i16> zeroinitializer, i16 %x3) 3267 %res2 = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 5, <16 x i16> %x2, i16 -1) 3268 %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res, 0 3269 %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res3, <16 x i16> %res1, 1 3270 %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res4, <16 x i16> %res2, 2 3271 ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5 3272} 3273 3274declare <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16>, i32, <8 x i16>, i8) 3275 3276define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_psll_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { 3277; X86-LABEL: test_int_x86_avx512_mask_psll_wi_128: 3278; X86: # %bb.0: 3279; X86-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] 3280; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 3281; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3282; X86-NEXT: vpsllw $3, %xmm0, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0x71,0xf0,0x03] 3283; X86-NEXT: vpsllw $4, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0x89,0x71,0xf0,0x04] 3284; X86-NEXT: vpsllw $5, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xf0,0x05] 3285; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] 3286; X86-NEXT: retl # encoding: [0xc3] 3287; 3288; X64-LABEL: test_int_x86_avx512_mask_psll_wi_128: 3289; X64: # %bb.0: 3290; X64-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] 3291; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3292; X64-NEXT: vpsllw $3, %xmm0, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0x71,0xf0,0x03] 3293; X64-NEXT: vpsllw $4, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0x89,0x71,0xf0,0x04] 3294; X64-NEXT: vpsllw $5, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xf0,0x05] 3295; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] 3296; X64-NEXT: retq # encoding: [0xc3] 3297 %res0 = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) 3298 %res1 = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 4, <8 x i16> zeroinitializer, i8 %x3) 3299 %res2 = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 5, <8 x i16> %x2, i8 -1) 3300 %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0 3301 %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res3, <8 x i16> %res1, 1 3302 %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res4, <8 x i16> %res2, 2 3303 ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5 3304} 3305 3306declare <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16>, i32, <16 x i16>, i16) 3307 3308define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_psll_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { 3309; X86-LABEL: test_int_x86_avx512_mask_psll_wi_256: 3310; X86: # %bb.0: 3311; X86-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] 3312; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 3313; X86-NEXT: vpsllw $3, %ymm0, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0x71,0xf0,0x03] 3314; X86-NEXT: vpsllw $4, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0xa9,0x71,0xf0,0x04] 3315; X86-NEXT: vpsllw $5, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xf0,0x05] 3316; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] 3317; X86-NEXT: retl # encoding: [0xc3] 3318; 3319; X64-LABEL: test_int_x86_avx512_mask_psll_wi_256: 3320; X64: # %bb.0: 3321; X64-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] 3322; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3323; X64-NEXT: vpsllw $3, %ymm0, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0x71,0xf0,0x03] 3324; X64-NEXT: vpsllw $4, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0xa9,0x71,0xf0,0x04] 3325; X64-NEXT: vpsllw $5, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xf0,0x05] 3326; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] 3327; X64-NEXT: retq # encoding: [0xc3] 3328 %res = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) 3329 %res1 = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 4, <16 x i16> zeroinitializer, i16 %x3) 3330 %res2 = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 5, <16 x i16> %x2, i16 -1) 3331 %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res, 0 3332 %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res3, <16 x i16> %res1, 1 3333 %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res4, <16 x i16> %res2, 2 3334 ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5 3335} 3336 3337declare <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 3338 3339define <16 x i8>@test_int_x86_avx512_pshuf_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) { 3340; CHECK-LABEL: test_int_x86_avx512_pshuf_b_128: 3341; CHECK: # %bb.0: 3342; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x00,0xc1] 3343; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3344 %res = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) 3345 ret <16 x i8> %res 3346} 3347 3348define <16 x i8>@test_int_x86_avx512_mask_pshuf_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { 3349; X86-LABEL: test_int_x86_avx512_mask_pshuf_b_128: 3350; X86: # %bb.0: 3351; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3352; X86-NEXT: vpshufb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x00,0xd1] 3353; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 3354; X86-NEXT: retl # encoding: [0xc3] 3355; 3356; X64-LABEL: test_int_x86_avx512_mask_pshuf_b_128: 3357; X64: # %bb.0: 3358; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3359; X64-NEXT: vpshufb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x00,0xd1] 3360; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 3361; X64-NEXT: retq # encoding: [0xc3] 3362 %res = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) 3363 ret <16 x i8> %res 3364} 3365 3366declare <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 3367 3368define <32 x i8>@test_int_x86_avx512_pshuf_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) { 3369; CHECK-LABEL: test_int_x86_avx512_pshuf_b_256: 3370; CHECK: # %bb.0: 3371; CHECK-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x00,0xc1] 3372; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3373 %res = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 3374 ret <32 x i8> %res 3375} 3376 3377define <32 x i8>@test_int_x86_avx512_mask_pshuf_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 3378; X86-LABEL: test_int_x86_avx512_mask_pshuf_b_256: 3379; X86: # %bb.0: 3380; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 3381; X86-NEXT: vpshufb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x00,0xd1] 3382; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3383; X86-NEXT: retl # encoding: [0xc3] 3384; 3385; X64-LABEL: test_int_x86_avx512_mask_pshuf_b_256: 3386; X64: # %bb.0: 3387; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3388; X64-NEXT: vpshufb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x00,0xd1] 3389; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3390; X64-NEXT: retq # encoding: [0xc3] 3391 %res = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 3392 ret <32 x i8> %res 3393} 3394 3395declare <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8>, <8 x i16>, i8) 3396 3397define <8 x i16>@test_int_x86_avx512_pmovzxb_w_128(<16 x i8> %x0, <8 x i16> %x1) { 3398; CHECK-LABEL: test_int_x86_avx512_pmovzxb_w_128: 3399; CHECK: # %bb.0: 3400; CHECK-NEXT: vpmovzxbw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x30,0xc0] 3401; CHECK-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 3402; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3403 %res = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 -1) 3404 ret <8 x i16> %res 3405} 3406 3407define <8 x i16>@test_int_x86_avx512_mask_pmovzxb_w_128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2) { 3408; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_w_128: 3409; X86: # %bb.0: 3410; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3411; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3412; X86-NEXT: vpmovzxbw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x30,0xc8] 3413; X86-NEXT: # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 3414; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3415; X86-NEXT: retl # encoding: [0xc3] 3416; 3417; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_w_128: 3418; X64: # %bb.0: 3419; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3420; X64-NEXT: vpmovzxbw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x30,0xc8] 3421; X64-NEXT: # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 3422; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3423; X64-NEXT: retq # encoding: [0xc3] 3424 %res = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2) 3425 ret <8 x i16> %res 3426} 3427 3428define <8 x i16>@test_int_x86_avx512_maskz_pmovzxb_w_128(<16 x i8> %x0, i8 %x2) { 3429; X86-LABEL: test_int_x86_avx512_maskz_pmovzxb_w_128: 3430; X86: # %bb.0: 3431; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3432; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3433; X86-NEXT: vpmovzxbw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x30,0xc0] 3434; X86-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 3435; X86-NEXT: retl # encoding: [0xc3] 3436; 3437; X64-LABEL: test_int_x86_avx512_maskz_pmovzxb_w_128: 3438; X64: # %bb.0: 3439; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3440; X64-NEXT: vpmovzxbw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x30,0xc0] 3441; X64-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 3442; X64-NEXT: retq # encoding: [0xc3] 3443 %res = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> zeroinitializer, i8 %x2) 3444 ret <8 x i16> %res 3445} 3446 3447declare <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8>, <16 x i16>, i16) 3448 3449define <16 x i16>@test_int_x86_avx512_pmovzxb_w_256(<16 x i8> %x0, <16 x i16> %x1) { 3450; CHECK-LABEL: test_int_x86_avx512_pmovzxb_w_256: 3451; CHECK: # %bb.0: 3452; CHECK-NEXT: vpmovzxbw %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x30,0xc0] 3453; CHECK-NEXT: # ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 3454; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3455 %res = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 -1) 3456 ret <16 x i16> %res 3457} 3458 3459define <16 x i16>@test_int_x86_avx512_mask_pmovzxb_w_256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) { 3460; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_w_256: 3461; X86: # %bb.0: 3462; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3463; X86-NEXT: vpmovzxbw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x30,0xc8] 3464; X86-NEXT: # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 3465; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3466; X86-NEXT: retl # encoding: [0xc3] 3467; 3468; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_w_256: 3469; X64: # %bb.0: 3470; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3471; X64-NEXT: vpmovzxbw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x30,0xc8] 3472; X64-NEXT: # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 3473; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3474; X64-NEXT: retq # encoding: [0xc3] 3475 %res = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) 3476 ret <16 x i16> %res 3477} 3478 3479define <16 x i16>@test_int_x86_avx512_maskz_pmovzxb_w_256(<16 x i8> %x0, i16 %x2) { 3480; X86-LABEL: test_int_x86_avx512_maskz_pmovzxb_w_256: 3481; X86: # %bb.0: 3482; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3483; X86-NEXT: vpmovzxbw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x30,0xc0] 3484; X86-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 3485; X86-NEXT: retl # encoding: [0xc3] 3486; 3487; X64-LABEL: test_int_x86_avx512_maskz_pmovzxb_w_256: 3488; X64: # %bb.0: 3489; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3490; X64-NEXT: vpmovzxbw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x30,0xc0] 3491; X64-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 3492; X64-NEXT: retq # encoding: [0xc3] 3493 %res = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> zeroinitializer, i16 %x2) 3494 ret <16 x i16> %res 3495} 3496 3497declare <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8>, <8 x i16>, i8) 3498 3499define <8 x i16>@test_int_x86_avx512_pmovsxb_w_128(<16 x i8> %x0, <8 x i16> %x1) { 3500; CHECK-LABEL: test_int_x86_avx512_pmovsxb_w_128: 3501; CHECK: # %bb.0: 3502; CHECK-NEXT: vpmovsxbw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x20,0xc0] 3503; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3504 %res = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 -1) 3505 ret <8 x i16> %res 3506} 3507 3508define <8 x i16>@test_int_x86_avx512_mask_pmovsxb_w_128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2) { 3509; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_w_128: 3510; X86: # %bb.0: 3511; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3512; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3513; X86-NEXT: vpmovsxbw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x20,0xc8] 3514; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3515; X86-NEXT: retl # encoding: [0xc3] 3516; 3517; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_w_128: 3518; X64: # %bb.0: 3519; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3520; X64-NEXT: vpmovsxbw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x20,0xc8] 3521; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3522; X64-NEXT: retq # encoding: [0xc3] 3523 %res = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2) 3524 ret <8 x i16> %res 3525} 3526 3527define <8 x i16>@test_int_x86_avx512_maskz_pmovsxb_w_128(<16 x i8> %x0, i8 %x2) { 3528; X86-LABEL: test_int_x86_avx512_maskz_pmovsxb_w_128: 3529; X86: # %bb.0: 3530; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3531; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3532; X86-NEXT: vpmovsxbw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x20,0xc0] 3533; X86-NEXT: retl # encoding: [0xc3] 3534; 3535; X64-LABEL: test_int_x86_avx512_maskz_pmovsxb_w_128: 3536; X64: # %bb.0: 3537; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3538; X64-NEXT: vpmovsxbw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x20,0xc0] 3539; X64-NEXT: retq # encoding: [0xc3] 3540 %res = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> zeroinitializer, i8 %x2) 3541 ret <8 x i16> %res 3542} 3543 3544declare <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8>, <16 x i16>, i16) 3545 3546define <16 x i16>@test_int_x86_avx512_pmovsxb_w_256(<16 x i8> %x0, <16 x i16> %x1) { 3547; CHECK-LABEL: test_int_x86_avx512_pmovsxb_w_256: 3548; CHECK: # %bb.0: 3549; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x20,0xc0] 3550; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3551 %res = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 -1) 3552 ret <16 x i16> %res 3553} 3554 3555define <16 x i16>@test_int_x86_avx512_mask_pmovsxb_w_256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) { 3556; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_w_256: 3557; X86: # %bb.0: 3558; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3559; X86-NEXT: vpmovsxbw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x20,0xc8] 3560; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3561; X86-NEXT: retl # encoding: [0xc3] 3562; 3563; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_w_256: 3564; X64: # %bb.0: 3565; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3566; X64-NEXT: vpmovsxbw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x20,0xc8] 3567; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3568; X64-NEXT: retq # encoding: [0xc3] 3569 %res = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) 3570 ret <16 x i16> %res 3571} 3572 3573define <16 x i16>@test_int_x86_avx512_maskz_pmovsxb_w_256(<16 x i8> %x0, i16 %x2) { 3574; X86-LABEL: test_int_x86_avx512_maskz_pmovsxb_w_256: 3575; X86: # %bb.0: 3576; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3577; X86-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x20,0xc0] 3578; X86-NEXT: retl # encoding: [0xc3] 3579; 3580; X64-LABEL: test_int_x86_avx512_maskz_pmovsxb_w_256: 3581; X64: # %bb.0: 3582; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3583; X64-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x20,0xc0] 3584; X64-NEXT: retq # encoding: [0xc3] 3585 %res = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> zeroinitializer, i16 %x2) 3586 ret <16 x i16> %res 3587} 3588 3589declare <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32>, <2 x i64>, i8) 3590 3591define <2 x i64>@test_int_x86_avx512_pmovsxd_q_128(<4 x i32> %x0, <2 x i64> %x1) { 3592; CHECK-LABEL: test_int_x86_avx512_pmovsxd_q_128: 3593; CHECK: # %bb.0: 3594; CHECK-NEXT: vpmovsxdq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x25,0xc0] 3595; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3596 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 -1) 3597 ret <2 x i64> %res 3598} 3599 3600define <2 x i64>@test_int_x86_avx512_mask_pmovsxd_q_128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) { 3601; X86-LABEL: test_int_x86_avx512_mask_pmovsxd_q_128: 3602; X86: # %bb.0: 3603; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3604; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3605; X86-NEXT: vpmovsxdq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x25,0xc8] 3606; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3607; X86-NEXT: retl # encoding: [0xc3] 3608; 3609; X64-LABEL: test_int_x86_avx512_mask_pmovsxd_q_128: 3610; X64: # %bb.0: 3611; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3612; X64-NEXT: vpmovsxdq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x25,0xc8] 3613; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3614; X64-NEXT: retq # encoding: [0xc3] 3615 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) 3616 ret <2 x i64> %res 3617} 3618 3619define <2 x i64>@test_int_x86_avx512_maskz_pmovsxd_q_128(<4 x i32> %x0, i8 %x2) { 3620; X86-LABEL: test_int_x86_avx512_maskz_pmovsxd_q_128: 3621; X86: # %bb.0: 3622; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3623; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3624; X86-NEXT: vpmovsxdq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x25,0xc0] 3625; X86-NEXT: retl # encoding: [0xc3] 3626; 3627; X64-LABEL: test_int_x86_avx512_maskz_pmovsxd_q_128: 3628; X64: # %bb.0: 3629; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3630; X64-NEXT: vpmovsxdq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x25,0xc0] 3631; X64-NEXT: retq # encoding: [0xc3] 3632 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> zeroinitializer, i8 %x2) 3633 ret <2 x i64> %res 3634} 3635 3636declare <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32>, <4 x i64>, i8) 3637 3638define <4 x i64>@test_int_x86_avx512_pmovsxd_q_256(<4 x i32> %x0, <4 x i64> %x1) { 3639; CHECK-LABEL: test_int_x86_avx512_pmovsxd_q_256: 3640; CHECK: # %bb.0: 3641; CHECK-NEXT: vpmovsxdq %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x25,0xc0] 3642; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3643 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 -1) 3644 ret <4 x i64> %res 3645} 3646 3647define <4 x i64>@test_int_x86_avx512_mask_pmovsxd_q_256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) { 3648; X86-LABEL: test_int_x86_avx512_mask_pmovsxd_q_256: 3649; X86: # %bb.0: 3650; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3651; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3652; X86-NEXT: vpmovsxdq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x25,0xc8] 3653; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3654; X86-NEXT: retl # encoding: [0xc3] 3655; 3656; X64-LABEL: test_int_x86_avx512_mask_pmovsxd_q_256: 3657; X64: # %bb.0: 3658; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3659; X64-NEXT: vpmovsxdq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x25,0xc8] 3660; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3661; X64-NEXT: retq # encoding: [0xc3] 3662 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) 3663 ret <4 x i64> %res 3664} 3665 3666define <4 x i64>@test_int_x86_avx512_maskz_pmovsxd_q_256(<4 x i32> %x0, i8 %x2) { 3667; X86-LABEL: test_int_x86_avx512_maskz_pmovsxd_q_256: 3668; X86: # %bb.0: 3669; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3670; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3671; X86-NEXT: vpmovsxdq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x25,0xc0] 3672; X86-NEXT: retl # encoding: [0xc3] 3673; 3674; X64-LABEL: test_int_x86_avx512_maskz_pmovsxd_q_256: 3675; X64: # %bb.0: 3676; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3677; X64-NEXT: vpmovsxdq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x25,0xc0] 3678; X64-NEXT: retq # encoding: [0xc3] 3679 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> zeroinitializer, i8 %x2) 3680 ret <4 x i64> %res 3681} 3682 3683declare <16 x i8> @llvm.x86.avx512.cvtmask2b.128(i16) 3684 3685define <16 x i8>@test_int_x86_avx512_cvtmask2b_128(i16 %x0) { 3686; X86-LABEL: test_int_x86_avx512_cvtmask2b_128: 3687; X86: # %bb.0: 3688; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf8,0x90,0x44,0x24,0x04] 3689; X86-NEXT: vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0] 3690; X86-NEXT: retl # encoding: [0xc3] 3691; 3692; X64-LABEL: test_int_x86_avx512_cvtmask2b_128: 3693; X64: # %bb.0: 3694; X64-NEXT: kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7] 3695; X64-NEXT: vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0] 3696; X64-NEXT: retq # encoding: [0xc3] 3697 %res = call <16 x i8> @llvm.x86.avx512.cvtmask2b.128(i16 %x0) 3698 ret <16 x i8> %res 3699} 3700 3701declare <32 x i8> @llvm.x86.avx512.cvtmask2b.256(i32) 3702 3703define <32 x i8>@test_int_x86_avx512_cvtmask2b_256(i32 %x0) { 3704; X86-LABEL: test_int_x86_avx512_cvtmask2b_256: 3705; X86: # %bb.0: 3706; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04] 3707; X86-NEXT: vpmovm2b %k0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x28,0x28,0xc0] 3708; X86-NEXT: retl # encoding: [0xc3] 3709; 3710; X64-LABEL: test_int_x86_avx512_cvtmask2b_256: 3711; X64: # %bb.0: 3712; X64-NEXT: kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7] 3713; X64-NEXT: vpmovm2b %k0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x28,0x28,0xc0] 3714; X64-NEXT: retq # encoding: [0xc3] 3715 %res = call <32 x i8> @llvm.x86.avx512.cvtmask2b.256(i32 %x0) 3716 ret <32 x i8> %res 3717} 3718 3719declare <8 x i16> @llvm.x86.avx512.cvtmask2w.128(i8) 3720 3721define <8 x i16>@test_int_x86_avx512_cvtmask2w_128(i8 %x0) { 3722; X86-LABEL: test_int_x86_avx512_cvtmask2w_128: 3723; X86: # %bb.0: 3724; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3725; X86-NEXT: kmovd %eax, %k0 # encoding: [0xc5,0xfb,0x92,0xc0] 3726; X86-NEXT: vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0] 3727; X86-NEXT: retl # encoding: [0xc3] 3728; 3729; X64-LABEL: test_int_x86_avx512_cvtmask2w_128: 3730; X64: # %bb.0: 3731; X64-NEXT: kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7] 3732; X64-NEXT: vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0] 3733; X64-NEXT: retq # encoding: [0xc3] 3734 %res = call <8 x i16> @llvm.x86.avx512.cvtmask2w.128(i8 %x0) 3735 ret <8 x i16> %res 3736} 3737 3738declare <16 x i16> @llvm.x86.avx512.cvtmask2w.256(i16) 3739 3740define <16 x i16>@test_int_x86_avx512_cvtmask2w_256(i16 %x0) { 3741; X86-LABEL: test_int_x86_avx512_cvtmask2w_256: 3742; X86: # %bb.0: 3743; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf8,0x90,0x44,0x24,0x04] 3744; X86-NEXT: vpmovm2w %k0, %ymm0 # encoding: [0x62,0xf2,0xfe,0x28,0x28,0xc0] 3745; X86-NEXT: retl # encoding: [0xc3] 3746; 3747; X64-LABEL: test_int_x86_avx512_cvtmask2w_256: 3748; X64: # %bb.0: 3749; X64-NEXT: kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7] 3750; X64-NEXT: vpmovm2w %k0, %ymm0 # encoding: [0x62,0xf2,0xfe,0x28,0x28,0xc0] 3751; X64-NEXT: retq # encoding: [0xc3] 3752 %res = call <16 x i16> @llvm.x86.avx512.cvtmask2w.256(i16 %x0) 3753 ret <16 x i16> %res 3754} 3755define <8 x i16> @test_mask_packs_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 3756; CHECK-LABEL: test_mask_packs_epi32_rr_128: 3757; CHECK: # %bb.0: 3758; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0xc1] 3759; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3760 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) 3761 ret <8 x i16> %res 3762} 3763 3764define <8 x i16> @test_mask_packs_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) { 3765; X86-LABEL: test_mask_packs_epi32_rrk_128: 3766; X86: # %bb.0: 3767; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3768; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3769; X86-NEXT: vpackssdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1] 3770; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 3771; X86-NEXT: retl # encoding: [0xc3] 3772; 3773; X64-LABEL: test_mask_packs_epi32_rrk_128: 3774; X64: # %bb.0: 3775; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3776; X64-NEXT: vpackssdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1] 3777; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 3778; X64-NEXT: retq # encoding: [0xc3] 3779 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) 3780 ret <8 x i16> %res 3781} 3782 3783define <8 x i16> @test_mask_packs_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 3784; X86-LABEL: test_mask_packs_epi32_rrkz_128: 3785; X86: # %bb.0: 3786; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3787; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3788; X86-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1] 3789; X86-NEXT: retl # encoding: [0xc3] 3790; 3791; X64-LABEL: test_mask_packs_epi32_rrkz_128: 3792; X64: # %bb.0: 3793; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3794; X64-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1] 3795; X64-NEXT: retq # encoding: [0xc3] 3796 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) 3797 ret <8 x i16> %res 3798} 3799 3800define <8 x i16> @test_mask_packs_epi32_rm_128(<4 x i32> %a, ptr %ptr_b) { 3801; X86-LABEL: test_mask_packs_epi32_rm_128: 3802; X86: # %bb.0: 3803; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3804; X86-NEXT: vpackssdw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0x00] 3805; X86-NEXT: retl # encoding: [0xc3] 3806; 3807; X64-LABEL: test_mask_packs_epi32_rm_128: 3808; X64: # %bb.0: 3809; X64-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0x07] 3810; X64-NEXT: retq # encoding: [0xc3] 3811 %b = load <4 x i32>, ptr %ptr_b 3812 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) 3813 ret <8 x i16> %res 3814} 3815 3816define <8 x i16> @test_mask_packs_epi32_rmk_128(<4 x i32> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) { 3817; X86-LABEL: test_mask_packs_epi32_rmk_128: 3818; X86: # %bb.0: 3819; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3820; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3821; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 3822; X86-NEXT: vpackssdw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x08] 3823; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3824; X86-NEXT: retl # encoding: [0xc3] 3825; 3826; X64-LABEL: test_mask_packs_epi32_rmk_128: 3827; X64: # %bb.0: 3828; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3829; X64-NEXT: vpackssdw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x0f] 3830; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3831; X64-NEXT: retq # encoding: [0xc3] 3832 %b = load <4 x i32>, ptr %ptr_b 3833 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) 3834 ret <8 x i16> %res 3835} 3836 3837define <8 x i16> @test_mask_packs_epi32_rmkz_128(<4 x i32> %a, ptr %ptr_b, i8 %mask) { 3838; X86-LABEL: test_mask_packs_epi32_rmkz_128: 3839; X86: # %bb.0: 3840; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3841; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3842; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 3843; X86-NEXT: vpackssdw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x00] 3844; X86-NEXT: retl # encoding: [0xc3] 3845; 3846; X64-LABEL: test_mask_packs_epi32_rmkz_128: 3847; X64: # %bb.0: 3848; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3849; X64-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x07] 3850; X64-NEXT: retq # encoding: [0xc3] 3851 %b = load <4 x i32>, ptr %ptr_b 3852 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) 3853 ret <8 x i16> %res 3854} 3855 3856define <8 x i16> @test_mask_packs_epi32_rmb_128(<4 x i32> %a, ptr %ptr_b) { 3857; X86-LABEL: test_mask_packs_epi32_rmb_128: 3858; X86: # %bb.0: 3859; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3860; X86-NEXT: vpackssdw (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x6b,0x00] 3861; X86-NEXT: retl # encoding: [0xc3] 3862; 3863; X64-LABEL: test_mask_packs_epi32_rmb_128: 3864; X64: # %bb.0: 3865; X64-NEXT: vpackssdw (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x6b,0x07] 3866; X64-NEXT: retq # encoding: [0xc3] 3867 %q = load i32, ptr %ptr_b 3868 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 3869 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 3870 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) 3871 ret <8 x i16> %res 3872} 3873 3874define <8 x i16> @test_mask_packs_epi32_rmbk_128(<4 x i32> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) { 3875; X86-LABEL: test_mask_packs_epi32_rmbk_128: 3876; X86: # %bb.0: 3877; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3878; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3879; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 3880; X86-NEXT: vpackssdw (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x08] 3881; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3882; X86-NEXT: retl # encoding: [0xc3] 3883; 3884; X64-LABEL: test_mask_packs_epi32_rmbk_128: 3885; X64: # %bb.0: 3886; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3887; X64-NEXT: vpackssdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x0f] 3888; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3889; X64-NEXT: retq # encoding: [0xc3] 3890 %q = load i32, ptr %ptr_b 3891 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 3892 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 3893 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) 3894 ret <8 x i16> %res 3895} 3896 3897define <8 x i16> @test_mask_packs_epi32_rmbkz_128(<4 x i32> %a, ptr %ptr_b, i8 %mask) { 3898; X86-LABEL: test_mask_packs_epi32_rmbkz_128: 3899; X86: # %bb.0: 3900; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3901; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3902; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 3903; X86-NEXT: vpackssdw (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x00] 3904; X86-NEXT: retl # encoding: [0xc3] 3905; 3906; X64-LABEL: test_mask_packs_epi32_rmbkz_128: 3907; X64: # %bb.0: 3908; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3909; X64-NEXT: vpackssdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x07] 3910; X64-NEXT: retq # encoding: [0xc3] 3911 %q = load i32, ptr %ptr_b 3912 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 3913 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 3914 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) 3915 ret <8 x i16> %res 3916} 3917 3918declare <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32>, <4 x i32>, <8 x i16>, i8) 3919 3920define <16 x i16> @test_mask_packs_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 3921; CHECK-LABEL: test_mask_packs_epi32_rr_256: 3922; CHECK: # %bb.0: 3923; CHECK-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0xc1] 3924; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3925 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) 3926 ret <16 x i16> %res 3927} 3928 3929define <16 x i16> @test_mask_packs_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) { 3930; X86-LABEL: test_mask_packs_epi32_rrk_256: 3931; X86: # %bb.0: 3932; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3933; X86-NEXT: vpackssdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1] 3934; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3935; X86-NEXT: retl # encoding: [0xc3] 3936; 3937; X64-LABEL: test_mask_packs_epi32_rrk_256: 3938; X64: # %bb.0: 3939; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3940; X64-NEXT: vpackssdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1] 3941; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3942; X64-NEXT: retq # encoding: [0xc3] 3943 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) 3944 ret <16 x i16> %res 3945} 3946 3947define <16 x i16> @test_mask_packs_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) { 3948; X86-LABEL: test_mask_packs_epi32_rrkz_256: 3949; X86: # %bb.0: 3950; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3951; X86-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1] 3952; X86-NEXT: retl # encoding: [0xc3] 3953; 3954; X64-LABEL: test_mask_packs_epi32_rrkz_256: 3955; X64: # %bb.0: 3956; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3957; X64-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1] 3958; X64-NEXT: retq # encoding: [0xc3] 3959 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) 3960 ret <16 x i16> %res 3961} 3962 3963define <16 x i16> @test_mask_packs_epi32_rm_256(<8 x i32> %a, ptr %ptr_b) { 3964; X86-LABEL: test_mask_packs_epi32_rm_256: 3965; X86: # %bb.0: 3966; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3967; X86-NEXT: vpackssdw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0x00] 3968; X86-NEXT: retl # encoding: [0xc3] 3969; 3970; X64-LABEL: test_mask_packs_epi32_rm_256: 3971; X64: # %bb.0: 3972; X64-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0x07] 3973; X64-NEXT: retq # encoding: [0xc3] 3974 %b = load <8 x i32>, ptr %ptr_b 3975 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) 3976 ret <16 x i16> %res 3977} 3978 3979define <16 x i16> @test_mask_packs_epi32_rmk_256(<8 x i32> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) { 3980; X86-LABEL: test_mask_packs_epi32_rmk_256: 3981; X86: # %bb.0: 3982; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3983; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 3984; X86-NEXT: vpackssdw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x08] 3985; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3986; X86-NEXT: retl # encoding: [0xc3] 3987; 3988; X64-LABEL: test_mask_packs_epi32_rmk_256: 3989; X64: # %bb.0: 3990; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3991; X64-NEXT: vpackssdw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x0f] 3992; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3993; X64-NEXT: retq # encoding: [0xc3] 3994 %b = load <8 x i32>, ptr %ptr_b 3995 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) 3996 ret <16 x i16> %res 3997} 3998 3999define <16 x i16> @test_mask_packs_epi32_rmkz_256(<8 x i32> %a, ptr %ptr_b, i16 %mask) { 4000; X86-LABEL: test_mask_packs_epi32_rmkz_256: 4001; X86: # %bb.0: 4002; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4003; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 4004; X86-NEXT: vpackssdw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x00] 4005; X86-NEXT: retl # encoding: [0xc3] 4006; 4007; X64-LABEL: test_mask_packs_epi32_rmkz_256: 4008; X64: # %bb.0: 4009; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4010; X64-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x07] 4011; X64-NEXT: retq # encoding: [0xc3] 4012 %b = load <8 x i32>, ptr %ptr_b 4013 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) 4014 ret <16 x i16> %res 4015} 4016 4017define <16 x i16> @test_mask_packs_epi32_rmb_256(<8 x i32> %a, ptr %ptr_b) { 4018; X86-LABEL: test_mask_packs_epi32_rmb_256: 4019; X86: # %bb.0: 4020; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4021; X86-NEXT: vpackssdw (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0x6b,0x00] 4022; X86-NEXT: retl # encoding: [0xc3] 4023; 4024; X64-LABEL: test_mask_packs_epi32_rmb_256: 4025; X64: # %bb.0: 4026; X64-NEXT: vpackssdw (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0x6b,0x07] 4027; X64-NEXT: retq # encoding: [0xc3] 4028 %q = load i32, ptr %ptr_b 4029 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 4030 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 4031 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) 4032 ret <16 x i16> %res 4033} 4034 4035define <16 x i16> @test_mask_packs_epi32_rmbk_256(<8 x i32> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) { 4036; X86-LABEL: test_mask_packs_epi32_rmbk_256: 4037; X86: # %bb.0: 4038; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4039; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 4040; X86-NEXT: vpackssdw (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x08] 4041; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4042; X86-NEXT: retl # encoding: [0xc3] 4043; 4044; X64-LABEL: test_mask_packs_epi32_rmbk_256: 4045; X64: # %bb.0: 4046; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4047; X64-NEXT: vpackssdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x0f] 4048; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4049; X64-NEXT: retq # encoding: [0xc3] 4050 %q = load i32, ptr %ptr_b 4051 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 4052 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 4053 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) 4054 ret <16 x i16> %res 4055} 4056 4057define <16 x i16> @test_mask_packs_epi32_rmbkz_256(<8 x i32> %a, ptr %ptr_b, i16 %mask) { 4058; X86-LABEL: test_mask_packs_epi32_rmbkz_256: 4059; X86: # %bb.0: 4060; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4061; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 4062; X86-NEXT: vpackssdw (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x00] 4063; X86-NEXT: retl # encoding: [0xc3] 4064; 4065; X64-LABEL: test_mask_packs_epi32_rmbkz_256: 4066; X64: # %bb.0: 4067; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4068; X64-NEXT: vpackssdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x07] 4069; X64-NEXT: retq # encoding: [0xc3] 4070 %q = load i32, ptr %ptr_b 4071 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 4072 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 4073 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) 4074 ret <16 x i16> %res 4075} 4076 4077declare <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32>, <8 x i32>, <16 x i16>, i16) 4078 4079define <16 x i8> @test_mask_packs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 4080; CHECK-LABEL: test_mask_packs_epi16_rr_128: 4081; CHECK: # %bb.0: 4082; CHECK-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0xc1] 4083; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4084 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1) 4085 ret <16 x i8> %res 4086} 4087 4088define <16 x i8> @test_mask_packs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) { 4089; X86-LABEL: test_mask_packs_epi16_rrk_128: 4090; X86: # %bb.0: 4091; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 4092; X86-NEXT: vpacksswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0xd1] 4093; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 4094; X86-NEXT: retl # encoding: [0xc3] 4095; 4096; X64-LABEL: test_mask_packs_epi16_rrk_128: 4097; X64: # %bb.0: 4098; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4099; X64-NEXT: vpacksswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0xd1] 4100; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 4101; X64-NEXT: retq # encoding: [0xc3] 4102 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) 4103 ret <16 x i8> %res 4104} 4105 4106define <16 x i8> @test_mask_packs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) { 4107; X86-LABEL: test_mask_packs_epi16_rrkz_128: 4108; X86: # %bb.0: 4109; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 4110; X86-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0xc1] 4111; X86-NEXT: retl # encoding: [0xc3] 4112; 4113; X64-LABEL: test_mask_packs_epi16_rrkz_128: 4114; X64: # %bb.0: 4115; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4116; X64-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0xc1] 4117; X64-NEXT: retq # encoding: [0xc3] 4118 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask) 4119 ret <16 x i8> %res 4120} 4121 4122define <16 x i8> @test_mask_packs_epi16_rm_128(<8 x i16> %a, ptr %ptr_b) { 4123; X86-LABEL: test_mask_packs_epi16_rm_128: 4124; X86: # %bb.0: 4125; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4126; X86-NEXT: vpacksswb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0x00] 4127; X86-NEXT: retl # encoding: [0xc3] 4128; 4129; X64-LABEL: test_mask_packs_epi16_rm_128: 4130; X64: # %bb.0: 4131; X64-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0x07] 4132; X64-NEXT: retq # encoding: [0xc3] 4133 %b = load <8 x i16>, ptr %ptr_b 4134 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1) 4135 ret <16 x i8> %res 4136} 4137 4138define <16 x i8> @test_mask_packs_epi16_rmk_128(<8 x i16> %a, ptr %ptr_b, <16 x i8> %passThru, i16 %mask) { 4139; X86-LABEL: test_mask_packs_epi16_rmk_128: 4140; X86: # %bb.0: 4141; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4142; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 4143; X86-NEXT: vpacksswb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0x08] 4144; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4145; X86-NEXT: retl # encoding: [0xc3] 4146; 4147; X64-LABEL: test_mask_packs_epi16_rmk_128: 4148; X64: # %bb.0: 4149; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4150; X64-NEXT: vpacksswb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0x0f] 4151; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4152; X64-NEXT: retq # encoding: [0xc3] 4153 %b = load <8 x i16>, ptr %ptr_b 4154 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) 4155 ret <16 x i8> %res 4156} 4157 4158define <16 x i8> @test_mask_packs_epi16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i16 %mask) { 4159; X86-LABEL: test_mask_packs_epi16_rmkz_128: 4160; X86: # %bb.0: 4161; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4162; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 4163; X86-NEXT: vpacksswb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0x00] 4164; X86-NEXT: retl # encoding: [0xc3] 4165; 4166; X64-LABEL: test_mask_packs_epi16_rmkz_128: 4167; X64: # %bb.0: 4168; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4169; X64-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0x07] 4170; X64-NEXT: retq # encoding: [0xc3] 4171 %b = load <8 x i16>, ptr %ptr_b 4172 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask) 4173 ret <16 x i8> %res 4174} 4175 4176declare <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16>, <8 x i16>, <16 x i8>, i16) 4177 4178define <32 x i8> @test_mask_packs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 4179; CHECK-LABEL: test_mask_packs_epi16_rr_256: 4180; CHECK: # %bb.0: 4181; CHECK-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0xc1] 4182; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4183 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1) 4184 ret <32 x i8> %res 4185} 4186 4187define <32 x i8> @test_mask_packs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) { 4188; X86-LABEL: test_mask_packs_epi16_rrk_256: 4189; X86: # %bb.0: 4190; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 4191; X86-NEXT: vpacksswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0xd1] 4192; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 4193; X86-NEXT: retl # encoding: [0xc3] 4194; 4195; X64-LABEL: test_mask_packs_epi16_rrk_256: 4196; X64: # %bb.0: 4197; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4198; X64-NEXT: vpacksswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0xd1] 4199; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 4200; X64-NEXT: retq # encoding: [0xc3] 4201 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) 4202 ret <32 x i8> %res 4203} 4204 4205define <32 x i8> @test_mask_packs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) { 4206; X86-LABEL: test_mask_packs_epi16_rrkz_256: 4207; X86: # %bb.0: 4208; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 4209; X86-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0xc1] 4210; X86-NEXT: retl # encoding: [0xc3] 4211; 4212; X64-LABEL: test_mask_packs_epi16_rrkz_256: 4213; X64: # %bb.0: 4214; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4215; X64-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0xc1] 4216; X64-NEXT: retq # encoding: [0xc3] 4217 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask) 4218 ret <32 x i8> %res 4219} 4220 4221define <32 x i8> @test_mask_packs_epi16_rm_256(<16 x i16> %a, ptr %ptr_b) { 4222; X86-LABEL: test_mask_packs_epi16_rm_256: 4223; X86: # %bb.0: 4224; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4225; X86-NEXT: vpacksswb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0x00] 4226; X86-NEXT: retl # encoding: [0xc3] 4227; 4228; X64-LABEL: test_mask_packs_epi16_rm_256: 4229; X64: # %bb.0: 4230; X64-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0x07] 4231; X64-NEXT: retq # encoding: [0xc3] 4232 %b = load <16 x i16>, ptr %ptr_b 4233 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1) 4234 ret <32 x i8> %res 4235} 4236 4237define <32 x i8> @test_mask_packs_epi16_rmk_256(<16 x i16> %a, ptr %ptr_b, <32 x i8> %passThru, i32 %mask) { 4238; X86-LABEL: test_mask_packs_epi16_rmk_256: 4239; X86: # %bb.0: 4240; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4241; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 4242; X86-NEXT: vpacksswb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0x08] 4243; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4244; X86-NEXT: retl # encoding: [0xc3] 4245; 4246; X64-LABEL: test_mask_packs_epi16_rmk_256: 4247; X64: # %bb.0: 4248; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4249; X64-NEXT: vpacksswb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0x0f] 4250; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4251; X64-NEXT: retq # encoding: [0xc3] 4252 %b = load <16 x i16>, ptr %ptr_b 4253 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) 4254 ret <32 x i8> %res 4255} 4256 4257define <32 x i8> @test_mask_packs_epi16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i32 %mask) { 4258; X86-LABEL: test_mask_packs_epi16_rmkz_256: 4259; X86: # %bb.0: 4260; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4261; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 4262; X86-NEXT: vpacksswb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0x00] 4263; X86-NEXT: retl # encoding: [0xc3] 4264; 4265; X64-LABEL: test_mask_packs_epi16_rmkz_256: 4266; X64: # %bb.0: 4267; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4268; X64-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0x07] 4269; X64-NEXT: retq # encoding: [0xc3] 4270 %b = load <16 x i16>, ptr %ptr_b 4271 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask) 4272 ret <32 x i8> %res 4273} 4274 4275declare <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16>, <16 x i16>, <32 x i8>, i32) 4276 4277 4278define <8 x i16> @test_mask_packus_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 4279; CHECK-LABEL: test_mask_packus_epi32_rr_128: 4280; CHECK: # %bb.0: 4281; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0xc1] 4282; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4283 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) 4284 ret <8 x i16> %res 4285} 4286 4287define <8 x i16> @test_mask_packus_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) { 4288; X86-LABEL: test_mask_packus_epi32_rrk_128: 4289; X86: # %bb.0: 4290; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4291; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 4292; X86-NEXT: vpackusdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0xd1] 4293; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 4294; X86-NEXT: retl # encoding: [0xc3] 4295; 4296; X64-LABEL: test_mask_packus_epi32_rrk_128: 4297; X64: # %bb.0: 4298; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4299; X64-NEXT: vpackusdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0xd1] 4300; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 4301; X64-NEXT: retq # encoding: [0xc3] 4302 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) 4303 ret <8 x i16> %res 4304} 4305 4306define <8 x i16> @test_mask_packus_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 4307; X86-LABEL: test_mask_packus_epi32_rrkz_128: 4308; X86: # %bb.0: 4309; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4310; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 4311; X86-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0xc1] 4312; X86-NEXT: retl # encoding: [0xc3] 4313; 4314; X64-LABEL: test_mask_packus_epi32_rrkz_128: 4315; X64: # %bb.0: 4316; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4317; X64-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0xc1] 4318; X64-NEXT: retq # encoding: [0xc3] 4319 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) 4320 ret <8 x i16> %res 4321} 4322 4323define <8 x i16> @test_mask_packus_epi32_rm_128(<4 x i32> %a, ptr %ptr_b) { 4324; X86-LABEL: test_mask_packus_epi32_rm_128: 4325; X86: # %bb.0: 4326; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4327; X86-NEXT: vpackusdw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0x00] 4328; X86-NEXT: retl # encoding: [0xc3] 4329; 4330; X64-LABEL: test_mask_packus_epi32_rm_128: 4331; X64: # %bb.0: 4332; X64-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0x07] 4333; X64-NEXT: retq # encoding: [0xc3] 4334 %b = load <4 x i32>, ptr %ptr_b 4335 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) 4336 ret <8 x i16> %res 4337} 4338 4339define <8 x i16> @test_mask_packus_epi32_rmk_128(<4 x i32> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) { 4340; X86-LABEL: test_mask_packus_epi32_rmk_128: 4341; X86: # %bb.0: 4342; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4343; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4344; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 4345; X86-NEXT: vpackusdw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0x08] 4346; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4347; X86-NEXT: retl # encoding: [0xc3] 4348; 4349; X64-LABEL: test_mask_packus_epi32_rmk_128: 4350; X64: # %bb.0: 4351; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4352; X64-NEXT: vpackusdw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0x0f] 4353; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4354; X64-NEXT: retq # encoding: [0xc3] 4355 %b = load <4 x i32>, ptr %ptr_b 4356 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) 4357 ret <8 x i16> %res 4358} 4359 4360define <8 x i16> @test_mask_packus_epi32_rmkz_128(<4 x i32> %a, ptr %ptr_b, i8 %mask) { 4361; X86-LABEL: test_mask_packus_epi32_rmkz_128: 4362; X86: # %bb.0: 4363; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4364; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4365; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 4366; X86-NEXT: vpackusdw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0x00] 4367; X86-NEXT: retl # encoding: [0xc3] 4368; 4369; X64-LABEL: test_mask_packus_epi32_rmkz_128: 4370; X64: # %bb.0: 4371; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4372; X64-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0x07] 4373; X64-NEXT: retq # encoding: [0xc3] 4374 %b = load <4 x i32>, ptr %ptr_b 4375 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) 4376 ret <8 x i16> %res 4377} 4378 4379define <8 x i16> @test_mask_packus_epi32_rmb_128(<4 x i32> %a, ptr %ptr_b) { 4380; X86-LABEL: test_mask_packus_epi32_rmb_128: 4381; X86: # %bb.0: 4382; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4383; X86-NEXT: vpackusdw (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0x2b,0x00] 4384; X86-NEXT: retl # encoding: [0xc3] 4385; 4386; X64-LABEL: test_mask_packus_epi32_rmb_128: 4387; X64: # %bb.0: 4388; X64-NEXT: vpackusdw (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0x2b,0x07] 4389; X64-NEXT: retq # encoding: [0xc3] 4390 %q = load i32, ptr %ptr_b 4391 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 4392 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 4393 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) 4394 ret <8 x i16> %res 4395} 4396 4397define <8 x i16> @test_mask_packus_epi32_rmbk_128(<4 x i32> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) { 4398; X86-LABEL: test_mask_packus_epi32_rmbk_128: 4399; X86: # %bb.0: 4400; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4401; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4402; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 4403; X86-NEXT: vpackusdw (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0x2b,0x08] 4404; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4405; X86-NEXT: retl # encoding: [0xc3] 4406; 4407; X64-LABEL: test_mask_packus_epi32_rmbk_128: 4408; X64: # %bb.0: 4409; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4410; X64-NEXT: vpackusdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0x2b,0x0f] 4411; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4412; X64-NEXT: retq # encoding: [0xc3] 4413 %q = load i32, ptr %ptr_b 4414 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 4415 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 4416 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) 4417 ret <8 x i16> %res 4418} 4419 4420define <8 x i16> @test_mask_packus_epi32_rmbkz_128(<4 x i32> %a, ptr %ptr_b, i8 %mask) { 4421; X86-LABEL: test_mask_packus_epi32_rmbkz_128: 4422; X86: # %bb.0: 4423; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4424; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4425; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 4426; X86-NEXT: vpackusdw (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0x2b,0x00] 4427; X86-NEXT: retl # encoding: [0xc3] 4428; 4429; X64-LABEL: test_mask_packus_epi32_rmbkz_128: 4430; X64: # %bb.0: 4431; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4432; X64-NEXT: vpackusdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0x2b,0x07] 4433; X64-NEXT: retq # encoding: [0xc3] 4434 %q = load i32, ptr %ptr_b 4435 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 4436 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 4437 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) 4438 ret <8 x i16> %res 4439} 4440 4441declare <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32>, <4 x i32>, <8 x i16>, i8) 4442 4443define <16 x i16> @test_mask_packus_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 4444; CHECK-LABEL: test_mask_packus_epi32_rr_256: 4445; CHECK: # %bb.0: 4446; CHECK-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0xc1] 4447; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4448 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) 4449 ret <16 x i16> %res 4450} 4451 4452define <16 x i16> @test_mask_packus_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) { 4453; X86-LABEL: test_mask_packus_epi32_rrk_256: 4454; X86: # %bb.0: 4455; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 4456; X86-NEXT: vpackusdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0xd1] 4457; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 4458; X86-NEXT: retl # encoding: [0xc3] 4459; 4460; X64-LABEL: test_mask_packus_epi32_rrk_256: 4461; X64: # %bb.0: 4462; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4463; X64-NEXT: vpackusdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0xd1] 4464; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 4465; X64-NEXT: retq # encoding: [0xc3] 4466 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) 4467 ret <16 x i16> %res 4468} 4469 4470define <16 x i16> @test_mask_packus_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) { 4471; X86-LABEL: test_mask_packus_epi32_rrkz_256: 4472; X86: # %bb.0: 4473; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 4474; X86-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0xc1] 4475; X86-NEXT: retl # encoding: [0xc3] 4476; 4477; X64-LABEL: test_mask_packus_epi32_rrkz_256: 4478; X64: # %bb.0: 4479; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4480; X64-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0xc1] 4481; X64-NEXT: retq # encoding: [0xc3] 4482 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) 4483 ret <16 x i16> %res 4484} 4485 4486define <16 x i16> @test_mask_packus_epi32_rm_256(<8 x i32> %a, ptr %ptr_b) { 4487; X86-LABEL: test_mask_packus_epi32_rm_256: 4488; X86: # %bb.0: 4489; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4490; X86-NEXT: vpackusdw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0x00] 4491; X86-NEXT: retl # encoding: [0xc3] 4492; 4493; X64-LABEL: test_mask_packus_epi32_rm_256: 4494; X64: # %bb.0: 4495; X64-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0x07] 4496; X64-NEXT: retq # encoding: [0xc3] 4497 %b = load <8 x i32>, ptr %ptr_b 4498 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) 4499 ret <16 x i16> %res 4500} 4501 4502define <16 x i16> @test_mask_packus_epi32_rmk_256(<8 x i32> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) { 4503; X86-LABEL: test_mask_packus_epi32_rmk_256: 4504; X86: # %bb.0: 4505; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4506; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 4507; X86-NEXT: vpackusdw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0x08] 4508; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4509; X86-NEXT: retl # encoding: [0xc3] 4510; 4511; X64-LABEL: test_mask_packus_epi32_rmk_256: 4512; X64: # %bb.0: 4513; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4514; X64-NEXT: vpackusdw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0x0f] 4515; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4516; X64-NEXT: retq # encoding: [0xc3] 4517 %b = load <8 x i32>, ptr %ptr_b 4518 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) 4519 ret <16 x i16> %res 4520} 4521 4522define <16 x i16> @test_mask_packus_epi32_rmkz_256(<8 x i32> %a, ptr %ptr_b, i16 %mask) { 4523; X86-LABEL: test_mask_packus_epi32_rmkz_256: 4524; X86: # %bb.0: 4525; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4526; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 4527; X86-NEXT: vpackusdw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0x00] 4528; X86-NEXT: retl # encoding: [0xc3] 4529; 4530; X64-LABEL: test_mask_packus_epi32_rmkz_256: 4531; X64: # %bb.0: 4532; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4533; X64-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0x07] 4534; X64-NEXT: retq # encoding: [0xc3] 4535 %b = load <8 x i32>, ptr %ptr_b 4536 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) 4537 ret <16 x i16> %res 4538} 4539 4540define <16 x i16> @test_mask_packus_epi32_rmb_256(<8 x i32> %a, ptr %ptr_b) { 4541; X86-LABEL: test_mask_packus_epi32_rmb_256: 4542; X86: # %bb.0: 4543; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4544; X86-NEXT: vpackusdw (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x38,0x2b,0x00] 4545; X86-NEXT: retl # encoding: [0xc3] 4546; 4547; X64-LABEL: test_mask_packus_epi32_rmb_256: 4548; X64: # %bb.0: 4549; X64-NEXT: vpackusdw (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x38,0x2b,0x07] 4550; X64-NEXT: retq # encoding: [0xc3] 4551 %q = load i32, ptr %ptr_b 4552 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 4553 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 4554 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) 4555 ret <16 x i16> %res 4556} 4557 4558define <16 x i16> @test_mask_packus_epi32_rmbk_256(<8 x i32> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) { 4559; X86-LABEL: test_mask_packus_epi32_rmbk_256: 4560; X86: # %bb.0: 4561; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4562; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 4563; X86-NEXT: vpackusdw (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x39,0x2b,0x08] 4564; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4565; X86-NEXT: retl # encoding: [0xc3] 4566; 4567; X64-LABEL: test_mask_packus_epi32_rmbk_256: 4568; X64: # %bb.0: 4569; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4570; X64-NEXT: vpackusdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x39,0x2b,0x0f] 4571; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4572; X64-NEXT: retq # encoding: [0xc3] 4573 %q = load i32, ptr %ptr_b 4574 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 4575 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 4576 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) 4577 ret <16 x i16> %res 4578} 4579 4580define <16 x i16> @test_mask_packus_epi32_rmbkz_256(<8 x i32> %a, ptr %ptr_b, i16 %mask) { 4581; X86-LABEL: test_mask_packus_epi32_rmbkz_256: 4582; X86: # %bb.0: 4583; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4584; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 4585; X86-NEXT: vpackusdw (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xb9,0x2b,0x00] 4586; X86-NEXT: retl # encoding: [0xc3] 4587; 4588; X64-LABEL: test_mask_packus_epi32_rmbkz_256: 4589; X64: # %bb.0: 4590; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4591; X64-NEXT: vpackusdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xb9,0x2b,0x07] 4592; X64-NEXT: retq # encoding: [0xc3] 4593 %q = load i32, ptr %ptr_b 4594 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 4595 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 4596 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) 4597 ret <16 x i16> %res 4598} 4599 4600declare <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32>, <8 x i32>, <16 x i16>, i16) 4601 4602define <16 x i8> @test_mask_packus_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 4603; CHECK-LABEL: test_mask_packus_epi16_rr_128: 4604; CHECK: # %bb.0: 4605; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0xc1] 4606; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4607 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1) 4608 ret <16 x i8> %res 4609} 4610 4611define <16 x i8> @test_mask_packus_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) { 4612; X86-LABEL: test_mask_packus_epi16_rrk_128: 4613; X86: # %bb.0: 4614; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 4615; X86-NEXT: vpackuswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0xd1] 4616; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 4617; X86-NEXT: retl # encoding: [0xc3] 4618; 4619; X64-LABEL: test_mask_packus_epi16_rrk_128: 4620; X64: # %bb.0: 4621; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4622; X64-NEXT: vpackuswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0xd1] 4623; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 4624; X64-NEXT: retq # encoding: [0xc3] 4625 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) 4626 ret <16 x i8> %res 4627} 4628 4629define <16 x i8> @test_mask_packus_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) { 4630; X86-LABEL: test_mask_packus_epi16_rrkz_128: 4631; X86: # %bb.0: 4632; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 4633; X86-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0xc1] 4634; X86-NEXT: retl # encoding: [0xc3] 4635; 4636; X64-LABEL: test_mask_packus_epi16_rrkz_128: 4637; X64: # %bb.0: 4638; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4639; X64-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0xc1] 4640; X64-NEXT: retq # encoding: [0xc3] 4641 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask) 4642 ret <16 x i8> %res 4643} 4644 4645define <16 x i8> @test_mask_packus_epi16_rm_128(<8 x i16> %a, ptr %ptr_b) { 4646; X86-LABEL: test_mask_packus_epi16_rm_128: 4647; X86: # %bb.0: 4648; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4649; X86-NEXT: vpackuswb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0x00] 4650; X86-NEXT: retl # encoding: [0xc3] 4651; 4652; X64-LABEL: test_mask_packus_epi16_rm_128: 4653; X64: # %bb.0: 4654; X64-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0x07] 4655; X64-NEXT: retq # encoding: [0xc3] 4656 %b = load <8 x i16>, ptr %ptr_b 4657 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1) 4658 ret <16 x i8> %res 4659} 4660 4661define <16 x i8> @test_mask_packus_epi16_rmk_128(<8 x i16> %a, ptr %ptr_b, <16 x i8> %passThru, i16 %mask) { 4662; X86-LABEL: test_mask_packus_epi16_rmk_128: 4663; X86: # %bb.0: 4664; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4665; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 4666; X86-NEXT: vpackuswb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0x08] 4667; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4668; X86-NEXT: retl # encoding: [0xc3] 4669; 4670; X64-LABEL: test_mask_packus_epi16_rmk_128: 4671; X64: # %bb.0: 4672; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4673; X64-NEXT: vpackuswb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0x0f] 4674; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4675; X64-NEXT: retq # encoding: [0xc3] 4676 %b = load <8 x i16>, ptr %ptr_b 4677 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) 4678 ret <16 x i8> %res 4679} 4680 4681define <16 x i8> @test_mask_packus_epi16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i16 %mask) { 4682; X86-LABEL: test_mask_packus_epi16_rmkz_128: 4683; X86: # %bb.0: 4684; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4685; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 4686; X86-NEXT: vpackuswb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0x00] 4687; X86-NEXT: retl # encoding: [0xc3] 4688; 4689; X64-LABEL: test_mask_packus_epi16_rmkz_128: 4690; X64: # %bb.0: 4691; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4692; X64-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0x07] 4693; X64-NEXT: retq # encoding: [0xc3] 4694 %b = load <8 x i16>, ptr %ptr_b 4695 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask) 4696 ret <16 x i8> %res 4697} 4698 4699declare <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16>, <8 x i16>, <16 x i8>, i16) 4700 4701define <32 x i8> @test_mask_packus_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 4702; CHECK-LABEL: test_mask_packus_epi16_rr_256: 4703; CHECK: # %bb.0: 4704; CHECK-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0xc1] 4705; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4706 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1) 4707 ret <32 x i8> %res 4708} 4709 4710define <32 x i8> @test_mask_packus_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) { 4711; X86-LABEL: test_mask_packus_epi16_rrk_256: 4712; X86: # %bb.0: 4713; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 4714; X86-NEXT: vpackuswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0xd1] 4715; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 4716; X86-NEXT: retl # encoding: [0xc3] 4717; 4718; X64-LABEL: test_mask_packus_epi16_rrk_256: 4719; X64: # %bb.0: 4720; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4721; X64-NEXT: vpackuswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0xd1] 4722; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 4723; X64-NEXT: retq # encoding: [0xc3] 4724 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) 4725 ret <32 x i8> %res 4726} 4727 4728define <32 x i8> @test_mask_packus_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) { 4729; X86-LABEL: test_mask_packus_epi16_rrkz_256: 4730; X86: # %bb.0: 4731; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 4732; X86-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0xc1] 4733; X86-NEXT: retl # encoding: [0xc3] 4734; 4735; X64-LABEL: test_mask_packus_epi16_rrkz_256: 4736; X64: # %bb.0: 4737; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4738; X64-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0xc1] 4739; X64-NEXT: retq # encoding: [0xc3] 4740 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask) 4741 ret <32 x i8> %res 4742} 4743 4744define <32 x i8> @test_mask_packus_epi16_rm_256(<16 x i16> %a, ptr %ptr_b) { 4745; X86-LABEL: test_mask_packus_epi16_rm_256: 4746; X86: # %bb.0: 4747; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4748; X86-NEXT: vpackuswb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0x00] 4749; X86-NEXT: retl # encoding: [0xc3] 4750; 4751; X64-LABEL: test_mask_packus_epi16_rm_256: 4752; X64: # %bb.0: 4753; X64-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0x07] 4754; X64-NEXT: retq # encoding: [0xc3] 4755 %b = load <16 x i16>, ptr %ptr_b 4756 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1) 4757 ret <32 x i8> %res 4758} 4759 4760define <32 x i8> @test_mask_packus_epi16_rmk_256(<16 x i16> %a, ptr %ptr_b, <32 x i8> %passThru, i32 %mask) { 4761; X86-LABEL: test_mask_packus_epi16_rmk_256: 4762; X86: # %bb.0: 4763; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4764; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 4765; X86-NEXT: vpackuswb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0x08] 4766; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4767; X86-NEXT: retl # encoding: [0xc3] 4768; 4769; X64-LABEL: test_mask_packus_epi16_rmk_256: 4770; X64: # %bb.0: 4771; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4772; X64-NEXT: vpackuswb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0x0f] 4773; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4774; X64-NEXT: retq # encoding: [0xc3] 4775 %b = load <16 x i16>, ptr %ptr_b 4776 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) 4777 ret <32 x i8> %res 4778} 4779 4780define <32 x i8> @test_mask_packus_epi16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i32 %mask) { 4781; X86-LABEL: test_mask_packus_epi16_rmkz_256: 4782; X86: # %bb.0: 4783; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4784; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 4785; X86-NEXT: vpackuswb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0x00] 4786; X86-NEXT: retl # encoding: [0xc3] 4787; 4788; X64-LABEL: test_mask_packus_epi16_rmkz_256: 4789; X64: # %bb.0: 4790; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4791; X64-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0x07] 4792; X64-NEXT: retq # encoding: [0xc3] 4793 %b = load <16 x i16>, ptr %ptr_b 4794 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask) 4795 ret <32 x i8> %res 4796} 4797 4798declare <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16>, <16 x i16>, <32 x i8>, i32) 4799 4800define <8 x i32> @test_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1) { 4801; X86-LABEL: test_cmp_b_256: 4802; X86: # %bb.0: 4803; X86-NEXT: pushl %ebx # encoding: [0x53] 4804; X86-NEXT: .cfi_def_cfa_offset 8 4805; X86-NEXT: pushl %edi # encoding: [0x57] 4806; X86-NEXT: .cfi_def_cfa_offset 12 4807; X86-NEXT: pushl %esi # encoding: [0x56] 4808; X86-NEXT: .cfi_def_cfa_offset 16 4809; X86-NEXT: .cfi_offset %esi, -16 4810; X86-NEXT: .cfi_offset %edi, -12 4811; X86-NEXT: .cfi_offset %ebx, -8 4812; X86-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1] 4813; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 4814; X86-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 # encoding: [0x62,0xf1,0x75,0x28,0x64,0xc0] 4815; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 4816; X86-NEXT: vpcmpleb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x02] 4817; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 4818; X86-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04] 4819; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 4820; X86-NEXT: vpcmpnltb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x05] 4821; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8] 4822; X86-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1] 4823; X86-NEXT: kmovd %k0, %ebx # encoding: [0xc5,0xfb,0x93,0xd8] 4824; X86-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] 4825; X86-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01] 4826; X86-NEXT: vpinsrd $2, %ebx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc3,0x02] 4827; X86-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 4828; X86-NEXT: vpblendd $8, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc1,0x08] 4829; X86-NEXT: # xmm0 = xmm0[0,1,2],xmm1[3] 4830; X86-NEXT: vmovd %edx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xca] 4831; X86-NEXT: vmovd %eax, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0] 4832; X86-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9] 4833; X86-NEXT: # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 4834; X86-NEXT: vmovd %ecx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd1] 4835; X86-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca] 4836; X86-NEXT: # xmm1 = xmm1[0],xmm2[0] 4837; X86-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] 4838; X86-NEXT: popl %esi # encoding: [0x5e] 4839; X86-NEXT: .cfi_def_cfa_offset 12 4840; X86-NEXT: popl %edi # encoding: [0x5f] 4841; X86-NEXT: .cfi_def_cfa_offset 8 4842; X86-NEXT: popl %ebx # encoding: [0x5b] 4843; X86-NEXT: .cfi_def_cfa_offset 4 4844; X86-NEXT: retl # encoding: [0xc3] 4845; 4846; X64-LABEL: test_cmp_b_256: 4847; X64: # %bb.0: 4848; X64-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1] 4849; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 4850; X64-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 # encoding: [0x62,0xf1,0x75,0x28,0x64,0xc0] 4851; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 4852; X64-NEXT: vpcmpleb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x02] 4853; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 4854; X64-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04] 4855; X64-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 4856; X64-NEXT: vpcmpnltb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x05] 4857; X64-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8] 4858; X64-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1] 4859; X64-NEXT: kmovd %k0, %r8d # encoding: [0xc5,0x7b,0x93,0xc0] 4860; X64-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] 4861; X64-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01] 4862; X64-NEXT: vpinsrd $2, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x22,0xc0,0x02] 4863; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 4864; X64-NEXT: vpblendd $8, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc1,0x08] 4865; X64-NEXT: # xmm0 = xmm0[0,1,2],xmm1[3] 4866; X64-NEXT: vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9] 4867; X64-NEXT: vmovd %eax, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0] 4868; X64-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9] 4869; X64-NEXT: # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 4870; X64-NEXT: vmovd %edx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2] 4871; X64-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca] 4872; X64-NEXT: # xmm1 = xmm1[0],xmm2[0] 4873; X64-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] 4874; X64-NEXT: retq # encoding: [0xc3] 4875 %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1) 4876 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 4877 %res1 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 -1) 4878 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 4879 %res2 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 -1) 4880 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 4881 %res3 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 -1) 4882 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 4883 %res4 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 -1) 4884 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 4885 %res5 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 -1) 4886 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 4887 %res6 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 -1) 4888 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 4889 %res7 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 -1) 4890 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 4891 ret <8 x i32> %vec7 4892} 4893 4894define <8 x i32> @test_mask_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) { 4895; X86-LABEL: test_mask_cmp_b_256: 4896; X86: # %bb.0: 4897; X86-NEXT: pushl %ebp # encoding: [0x55] 4898; X86-NEXT: .cfi_def_cfa_offset 8 4899; X86-NEXT: pushl %ebx # encoding: [0x53] 4900; X86-NEXT: .cfi_def_cfa_offset 12 4901; X86-NEXT: pushl %edi # encoding: [0x57] 4902; X86-NEXT: .cfi_def_cfa_offset 16 4903; X86-NEXT: pushl %esi # encoding: [0x56] 4904; X86-NEXT: .cfi_def_cfa_offset 20 4905; X86-NEXT: .cfi_offset %esi, -20 4906; X86-NEXT: .cfi_offset %edi, -16 4907; X86-NEXT: .cfi_offset %ebx, -12 4908; X86-NEXT: .cfi_offset %ebp, -8 4909; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14] 4910; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 4911; X86-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1] 4912; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 4913; X86-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x64,0xc0] 4914; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 4915; X86-NEXT: vpcmpleb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x02] 4916; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 4917; X86-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04] 4918; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8] 4919; X86-NEXT: vpcmpnltb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x05] 4920; X86-NEXT: kmovd %k0, %ebx # encoding: [0xc5,0xfb,0x93,0xd8] 4921; X86-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1] 4922; X86-NEXT: kmovd %k0, %ebp # encoding: [0xc5,0xfb,0x93,0xe8] 4923; X86-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] 4924; X86-NEXT: vpinsrd $1, %ebx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc3,0x01] 4925; X86-NEXT: vpinsrd $2, %ebp, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc5,0x02] 4926; X86-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x03] 4927; X86-NEXT: vmovd %edx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xca] 4928; X86-NEXT: vmovd %ecx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd1] 4929; X86-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9] 4930; X86-NEXT: # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 4931; X86-NEXT: vmovd %esi, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd6] 4932; X86-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca] 4933; X86-NEXT: # xmm1 = xmm1[0],xmm2[0] 4934; X86-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] 4935; X86-NEXT: popl %esi # encoding: [0x5e] 4936; X86-NEXT: .cfi_def_cfa_offset 16 4937; X86-NEXT: popl %edi # encoding: [0x5f] 4938; X86-NEXT: .cfi_def_cfa_offset 12 4939; X86-NEXT: popl %ebx # encoding: [0x5b] 4940; X86-NEXT: .cfi_def_cfa_offset 8 4941; X86-NEXT: popl %ebp # encoding: [0x5d] 4942; X86-NEXT: .cfi_def_cfa_offset 4 4943; X86-NEXT: retl # encoding: [0xc3] 4944; 4945; X64-LABEL: test_mask_cmp_b_256: 4946; X64: # %bb.0: 4947; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4948; X64-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1] 4949; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 4950; X64-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x64,0xc0] 4951; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 4952; X64-NEXT: vpcmpleb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x02] 4953; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 4954; X64-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04] 4955; X64-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 4956; X64-NEXT: vpcmpnltb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x05] 4957; X64-NEXT: kmovd %k0, %r8d # encoding: [0xc5,0x7b,0x93,0xc0] 4958; X64-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1] 4959; X64-NEXT: kmovd %k0, %r9d # encoding: [0xc5,0x7b,0x93,0xc8] 4960; X64-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] 4961; X64-NEXT: vpinsrd $1, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x22,0xc0,0x01] 4962; X64-NEXT: vpinsrd $2, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x22,0xc1,0x02] 4963; X64-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03] 4964; X64-NEXT: vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9] 4965; X64-NEXT: vmovd %eax, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0] 4966; X64-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9] 4967; X64-NEXT: # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 4968; X64-NEXT: vmovd %edx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2] 4969; X64-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca] 4970; X64-NEXT: # xmm1 = xmm1[0],xmm2[0] 4971; X64-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] 4972; X64-NEXT: retq # encoding: [0xc3] 4973 %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask) 4974 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 4975 %res1 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 %mask) 4976 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 4977 %res2 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 %mask) 4978 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 4979 %res3 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 %mask) 4980 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 4981 %res4 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 %mask) 4982 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 4983 %res5 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 %mask) 4984 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 4985 %res6 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 %mask) 4986 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 4987 %res7 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 %mask) 4988 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 4989 ret <8 x i32> %vec7 4990} 4991 4992declare i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8>, <32 x i8>, i32, i32) nounwind readnone 4993 4994define <8 x i32> @test_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1) { 4995; X86-LABEL: test_ucmp_b_256: 4996; X86: # %bb.0: 4997; X86-NEXT: pushl %ebx # encoding: [0x53] 4998; X86-NEXT: .cfi_def_cfa_offset 8 4999; X86-NEXT: pushl %edi # encoding: [0x57] 5000; X86-NEXT: .cfi_def_cfa_offset 12 5001; X86-NEXT: pushl %esi # encoding: [0x56] 5002; X86-NEXT: .cfi_def_cfa_offset 16 5003; X86-NEXT: .cfi_offset %esi, -16 5004; X86-NEXT: .cfi_offset %edi, -12 5005; X86-NEXT: .cfi_offset %ebx, -8 5006; X86-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1] 5007; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 5008; X86-NEXT: vpcmpltub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x01] 5009; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 5010; X86-NEXT: vpcmpleub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x02] 5011; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 5012; X86-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04] 5013; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 5014; X86-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x05] 5015; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8] 5016; X86-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x06] 5017; X86-NEXT: kmovd %k0, %ebx # encoding: [0xc5,0xfb,0x93,0xd8] 5018; X86-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] 5019; X86-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01] 5020; X86-NEXT: vpinsrd $2, %ebx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc3,0x02] 5021; X86-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 5022; X86-NEXT: vpblendd $8, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc1,0x08] 5023; X86-NEXT: # xmm0 = xmm0[0,1,2],xmm1[3] 5024; X86-NEXT: vmovd %edx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xca] 5025; X86-NEXT: vmovd %eax, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0] 5026; X86-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9] 5027; X86-NEXT: # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 5028; X86-NEXT: vmovd %ecx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd1] 5029; X86-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca] 5030; X86-NEXT: # xmm1 = xmm1[0],xmm2[0] 5031; X86-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] 5032; X86-NEXT: popl %esi # encoding: [0x5e] 5033; X86-NEXT: .cfi_def_cfa_offset 12 5034; X86-NEXT: popl %edi # encoding: [0x5f] 5035; X86-NEXT: .cfi_def_cfa_offset 8 5036; X86-NEXT: popl %ebx # encoding: [0x5b] 5037; X86-NEXT: .cfi_def_cfa_offset 4 5038; X86-NEXT: retl # encoding: [0xc3] 5039; 5040; X64-LABEL: test_ucmp_b_256: 5041; X64: # %bb.0: 5042; X64-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1] 5043; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 5044; X64-NEXT: vpcmpltub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x01] 5045; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 5046; X64-NEXT: vpcmpleub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x02] 5047; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 5048; X64-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04] 5049; X64-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 5050; X64-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x05] 5051; X64-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8] 5052; X64-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x06] 5053; X64-NEXT: kmovd %k0, %r8d # encoding: [0xc5,0x7b,0x93,0xc0] 5054; X64-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] 5055; X64-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01] 5056; X64-NEXT: vpinsrd $2, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x22,0xc0,0x02] 5057; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 5058; X64-NEXT: vpblendd $8, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc1,0x08] 5059; X64-NEXT: # xmm0 = xmm0[0,1,2],xmm1[3] 5060; X64-NEXT: vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9] 5061; X64-NEXT: vmovd %eax, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0] 5062; X64-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9] 5063; X64-NEXT: # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 5064; X64-NEXT: vmovd %edx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2] 5065; X64-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca] 5066; X64-NEXT: # xmm1 = xmm1[0],xmm2[0] 5067; X64-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] 5068; X64-NEXT: retq # encoding: [0xc3] 5069 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1) 5070 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 5071 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 -1) 5072 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 5073 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 -1) 5074 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 5075 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 -1) 5076 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 5077 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 -1) 5078 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 5079 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 -1) 5080 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 5081 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 -1) 5082 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 5083 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 -1) 5084 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 5085 ret <8 x i32> %vec7 5086} 5087 5088define <8 x i32> @test_mask_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) { 5089; X86-LABEL: test_mask_ucmp_b_256: 5090; X86: # %bb.0: 5091; X86-NEXT: pushl %ebp # encoding: [0x55] 5092; X86-NEXT: .cfi_def_cfa_offset 8 5093; X86-NEXT: pushl %ebx # encoding: [0x53] 5094; X86-NEXT: .cfi_def_cfa_offset 12 5095; X86-NEXT: pushl %edi # encoding: [0x57] 5096; X86-NEXT: .cfi_def_cfa_offset 16 5097; X86-NEXT: pushl %esi # encoding: [0x56] 5098; X86-NEXT: .cfi_def_cfa_offset 20 5099; X86-NEXT: .cfi_offset %esi, -20 5100; X86-NEXT: .cfi_offset %edi, -16 5101; X86-NEXT: .cfi_offset %ebx, -12 5102; X86-NEXT: .cfi_offset %ebp, -8 5103; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14] 5104; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 5105; X86-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1] 5106; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 5107; X86-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x01] 5108; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 5109; X86-NEXT: vpcmpleub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x02] 5110; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 5111; X86-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04] 5112; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8] 5113; X86-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x05] 5114; X86-NEXT: kmovd %k0, %ebx # encoding: [0xc5,0xfb,0x93,0xd8] 5115; X86-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x06] 5116; X86-NEXT: kmovd %k0, %ebp # encoding: [0xc5,0xfb,0x93,0xe8] 5117; X86-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] 5118; X86-NEXT: vpinsrd $1, %ebx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc3,0x01] 5119; X86-NEXT: vpinsrd $2, %ebp, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc5,0x02] 5120; X86-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x03] 5121; X86-NEXT: vmovd %edx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xca] 5122; X86-NEXT: vmovd %ecx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd1] 5123; X86-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9] 5124; X86-NEXT: # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 5125; X86-NEXT: vmovd %esi, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd6] 5126; X86-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca] 5127; X86-NEXT: # xmm1 = xmm1[0],xmm2[0] 5128; X86-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] 5129; X86-NEXT: popl %esi # encoding: [0x5e] 5130; X86-NEXT: .cfi_def_cfa_offset 16 5131; X86-NEXT: popl %edi # encoding: [0x5f] 5132; X86-NEXT: .cfi_def_cfa_offset 12 5133; X86-NEXT: popl %ebx # encoding: [0x5b] 5134; X86-NEXT: .cfi_def_cfa_offset 8 5135; X86-NEXT: popl %ebp # encoding: [0x5d] 5136; X86-NEXT: .cfi_def_cfa_offset 4 5137; X86-NEXT: retl # encoding: [0xc3] 5138; 5139; X64-LABEL: test_mask_ucmp_b_256: 5140; X64: # %bb.0: 5141; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5142; X64-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1] 5143; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 5144; X64-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x01] 5145; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 5146; X64-NEXT: vpcmpleub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x02] 5147; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 5148; X64-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04] 5149; X64-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 5150; X64-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x05] 5151; X64-NEXT: kmovd %k0, %r8d # encoding: [0xc5,0x7b,0x93,0xc0] 5152; X64-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x06] 5153; X64-NEXT: kmovd %k0, %r9d # encoding: [0xc5,0x7b,0x93,0xc8] 5154; X64-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] 5155; X64-NEXT: vpinsrd $1, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x22,0xc0,0x01] 5156; X64-NEXT: vpinsrd $2, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x22,0xc1,0x02] 5157; X64-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03] 5158; X64-NEXT: vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9] 5159; X64-NEXT: vmovd %eax, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0] 5160; X64-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9] 5161; X64-NEXT: # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 5162; X64-NEXT: vmovd %edx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2] 5163; X64-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca] 5164; X64-NEXT: # xmm1 = xmm1[0],xmm2[0] 5165; X64-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] 5166; X64-NEXT: retq # encoding: [0xc3] 5167 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask) 5168 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 5169 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 %mask) 5170 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 5171 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 %mask) 5172 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 5173 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 %mask) 5174 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 5175 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 %mask) 5176 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 5177 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 %mask) 5178 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 5179 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 %mask) 5180 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 5181 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 %mask) 5182 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 5183 ret <8 x i32> %vec7 5184} 5185 5186declare i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8>, <32 x i8>, i32, i32) nounwind readnone 5187 5188define <8 x i16> @test_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1) { 5189; CHECK-LABEL: test_cmp_w_256: 5190; CHECK: # %bb.0: 5191; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1] 5192; CHECK-NEXT: vpcmpgtw %ymm0, %ymm1, %k1 # encoding: [0x62,0xf1,0x75,0x28,0x65,0xc8] 5193; CHECK-NEXT: vpcmplew %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xd1,0x02] 5194; CHECK-NEXT: vpcmpneqw %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xd9,0x04] 5195; CHECK-NEXT: vpcmpnltw %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xe1,0x05] 5196; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k5 # encoding: [0x62,0xf1,0x7d,0x28,0x65,0xe9] 5197; CHECK-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 5198; CHECK-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 5199; CHECK-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 5200; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 5201; CHECK-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 5202; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 5203; CHECK-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 5204; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 5205; CHECK-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 5206; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 5207; CHECK-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5] 5208; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 5209; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 5210; CHECK-NEXT: vpblendw $128, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80] 5211; CHECK-NEXT: # xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 5212; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 5213; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5214 %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1) 5215 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 5216 %res1 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 -1) 5217 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 5218 %res2 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 -1) 5219 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 5220 %res3 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 -1) 5221 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 5222 %res4 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 -1) 5223 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 5224 %res5 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 -1) 5225 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 5226 %res6 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 -1) 5227 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 5228 %res7 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 -1) 5229 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 5230 ret <8 x i16> %vec7 5231} 5232 5233define <8 x i16> @test_mask_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask) { 5234; X86-LABEL: test_mask_cmp_w_256: 5235; X86: # %bb.0: 5236; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5237; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 5238; X86-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1] 5239; X86-NEXT: vpcmpgtw %ymm0, %ymm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x65,0xd0] 5240; X86-NEXT: vpcmplew %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xd9,0x02] 5241; X86-NEXT: vpcmpneqw %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x04] 5242; X86-NEXT: vpcmpnltw %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe9,0x05] 5243; X86-NEXT: vpcmpgtw %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x65,0xc9] 5244; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca] 5245; X86-NEXT: kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0] 5246; X86-NEXT: vmovd %edx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc2] 5247; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01] 5248; X86-NEXT: kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb] 5249; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02] 5250; X86-NEXT: kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc] 5251; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04] 5252; X86-NEXT: kmovd %k5, %ecx # encoding: [0xc5,0xfb,0x93,0xcd] 5253; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x05] 5254; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9] 5255; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x06] 5256; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 5257; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 5258; X86-NEXT: retl # encoding: [0xc3] 5259; 5260; X64-LABEL: test_mask_cmp_w_256: 5261; X64: # %bb.0: 5262; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5263; X64-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1] 5264; X64-NEXT: vpcmpgtw %ymm0, %ymm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x65,0xd0] 5265; X64-NEXT: vpcmplew %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xd9,0x02] 5266; X64-NEXT: vpcmpneqw %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x04] 5267; X64-NEXT: vpcmpnltw %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe9,0x05] 5268; X64-NEXT: vpcmpgtw %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x65,0xc9] 5269; X64-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 5270; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 5271; X64-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 5272; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 5273; X64-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 5274; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 5275; X64-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 5276; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 5277; X64-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5] 5278; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 5279; X64-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 5280; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 5281; X64-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07] 5282; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 5283; X64-NEXT: retq # encoding: [0xc3] 5284 %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask) 5285 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 5286 %res1 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 %mask) 5287 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 5288 %res2 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 %mask) 5289 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 5290 %res3 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 %mask) 5291 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 5292 %res4 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 %mask) 5293 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 5294 %res5 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 %mask) 5295 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 5296 %res6 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 %mask) 5297 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 5298 %res7 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 %mask) 5299 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 5300 ret <8 x i16> %vec7 5301} 5302 5303declare i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16>, <16 x i16>, i32, i16) nounwind readnone 5304 5305define <8 x i16> @test_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1) { 5306; CHECK-LABEL: test_ucmp_w_256: 5307; CHECK: # %bb.0: 5308; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1] 5309; CHECK-NEXT: vpcmpltuw %ymm1, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xc9,0x01] 5310; CHECK-NEXT: vpcmpleuw %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xd1,0x02] 5311; CHECK-NEXT: vpcmpneqw %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xd9,0x04] 5312; CHECK-NEXT: vpcmpnltuw %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xe1,0x05] 5313; CHECK-NEXT: vpcmpnleuw %ymm1, %ymm0, %k5 # encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xe9,0x06] 5314; CHECK-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 5315; CHECK-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 5316; CHECK-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 5317; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 5318; CHECK-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 5319; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 5320; CHECK-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 5321; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 5322; CHECK-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 5323; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 5324; CHECK-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5] 5325; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 5326; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 5327; CHECK-NEXT: vpblendw $128, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80] 5328; CHECK-NEXT: # xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 5329; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 5330; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5331 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1) 5332 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 5333 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 -1) 5334 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 5335 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 -1) 5336 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 5337 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 -1) 5338 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 5339 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 -1) 5340 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 5341 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 -1) 5342 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 5343 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 -1) 5344 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 5345 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 -1) 5346 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 5347 ret <8 x i16> %vec7 5348} 5349 5350define <8 x i16> @test_mask_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask) { 5351; X86-LABEL: test_mask_ucmp_w_256: 5352; X86: # %bb.0: 5353; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5354; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 5355; X86-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1] 5356; X86-NEXT: vpcmpltuw %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd1,0x01] 5357; X86-NEXT: vpcmpleuw %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd9,0x02] 5358; X86-NEXT: vpcmpneqw %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x04] 5359; X86-NEXT: vpcmpnltuw %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xe9,0x05] 5360; X86-NEXT: vpcmpnleuw %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xc9,0x06] 5361; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca] 5362; X86-NEXT: kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0] 5363; X86-NEXT: vmovd %edx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc2] 5364; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01] 5365; X86-NEXT: kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb] 5366; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02] 5367; X86-NEXT: kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc] 5368; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04] 5369; X86-NEXT: kmovd %k5, %ecx # encoding: [0xc5,0xfb,0x93,0xcd] 5370; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x05] 5371; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9] 5372; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x06] 5373; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 5374; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 5375; X86-NEXT: retl # encoding: [0xc3] 5376; 5377; X64-LABEL: test_mask_ucmp_w_256: 5378; X64: # %bb.0: 5379; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5380; X64-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1] 5381; X64-NEXT: vpcmpltuw %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd1,0x01] 5382; X64-NEXT: vpcmpleuw %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd9,0x02] 5383; X64-NEXT: vpcmpneqw %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x04] 5384; X64-NEXT: vpcmpnltuw %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xe9,0x05] 5385; X64-NEXT: vpcmpnleuw %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xc9,0x06] 5386; X64-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 5387; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 5388; X64-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 5389; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 5390; X64-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 5391; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 5392; X64-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 5393; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 5394; X64-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5] 5395; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 5396; X64-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 5397; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 5398; X64-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07] 5399; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 5400; X64-NEXT: retq # encoding: [0xc3] 5401 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask) 5402 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 5403 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 %mask) 5404 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 5405 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 %mask) 5406 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 5407 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 %mask) 5408 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 5409 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 %mask) 5410 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 5411 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 %mask) 5412 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 5413 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 %mask) 5414 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 5415 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 %mask) 5416 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 5417 ret <8 x i16> %vec7 5418} 5419 5420declare i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16>, <16 x i16>, i32, i16) nounwind readnone 5421 5422define <8 x i16> @test_cmp_b_128(<16 x i8> %a0, <16 x i8> %a1) { 5423; CHECK-LABEL: test_cmp_b_128: 5424; CHECK: # %bb.0: 5425; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1] 5426; CHECK-NEXT: vpcmpgtb %xmm0, %xmm1, %k1 # encoding: [0x62,0xf1,0x75,0x08,0x64,0xc8] 5427; CHECK-NEXT: vpcmpleb %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xd1,0x02] 5428; CHECK-NEXT: vpcmpneqb %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xd9,0x04] 5429; CHECK-NEXT: vpcmpnltb %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xe1,0x05] 5430; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k5 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xe9] 5431; CHECK-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 5432; CHECK-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 5433; CHECK-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 5434; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 5435; CHECK-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 5436; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 5437; CHECK-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 5438; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 5439; CHECK-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 5440; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 5441; CHECK-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5] 5442; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 5443; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 5444; CHECK-NEXT: vpblendw $128, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80] 5445; CHECK-NEXT: # xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 5446; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5447 %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1) 5448 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 5449 %res1 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 -1) 5450 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 5451 %res2 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 -1) 5452 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 5453 %res3 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 -1) 5454 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 5455 %res4 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 -1) 5456 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 5457 %res5 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 -1) 5458 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 5459 %res6 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 -1) 5460 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 5461 %res7 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 -1) 5462 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 5463 ret <8 x i16> %vec7 5464} 5465 5466define <8 x i16> @test_mask_cmp_b_128(<16 x i8> %a0, <16 x i8> %a1, i16 %mask) { 5467; X86-LABEL: test_mask_cmp_b_128: 5468; X86: # %bb.0: 5469; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5470; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 5471; X86-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1] 5472; X86-NEXT: vpcmpgtb %xmm0, %xmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x64,0xd0] 5473; X86-NEXT: vpcmpleb %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xd9,0x02] 5474; X86-NEXT: vpcmpneqb %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x04] 5475; X86-NEXT: vpcmpnltb %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe9,0x05] 5476; X86-NEXT: vpcmpgtb %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x64,0xc9] 5477; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca] 5478; X86-NEXT: kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0] 5479; X86-NEXT: vmovd %edx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc2] 5480; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01] 5481; X86-NEXT: kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb] 5482; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02] 5483; X86-NEXT: kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc] 5484; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04] 5485; X86-NEXT: kmovd %k5, %ecx # encoding: [0xc5,0xfb,0x93,0xcd] 5486; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x05] 5487; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9] 5488; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x06] 5489; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 5490; X86-NEXT: retl # encoding: [0xc3] 5491; 5492; X64-LABEL: test_mask_cmp_b_128: 5493; X64: # %bb.0: 5494; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5495; X64-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1] 5496; X64-NEXT: vpcmpgtb %xmm0, %xmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x64,0xd0] 5497; X64-NEXT: vpcmpleb %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xd9,0x02] 5498; X64-NEXT: vpcmpneqb %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x04] 5499; X64-NEXT: vpcmpnltb %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe9,0x05] 5500; X64-NEXT: vpcmpgtb %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x64,0xc9] 5501; X64-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 5502; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 5503; X64-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 5504; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 5505; X64-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 5506; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 5507; X64-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 5508; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 5509; X64-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5] 5510; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 5511; X64-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 5512; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 5513; X64-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07] 5514; X64-NEXT: retq # encoding: [0xc3] 5515 %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 %mask) 5516 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 5517 %res1 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 %mask) 5518 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 5519 %res2 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 %mask) 5520 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 5521 %res3 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 %mask) 5522 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 5523 %res4 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 %mask) 5524 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 5525 %res5 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 %mask) 5526 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 5527 %res6 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 %mask) 5528 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 5529 %res7 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 %mask) 5530 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 5531 ret <8 x i16> %vec7 5532} 5533 5534declare i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8>, <16 x i8>, i32, i16) nounwind readnone 5535 5536define <8 x i16> @test_ucmp_b_128(<16 x i8> %a0, <16 x i8> %a1) { 5537; CHECK-LABEL: test_ucmp_b_128: 5538; CHECK: # %bb.0: 5539; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1] 5540; CHECK-NEXT: vpcmpltub %xmm1, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xc9,0x01] 5541; CHECK-NEXT: vpcmpleub %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xd1,0x02] 5542; CHECK-NEXT: vpcmpneqb %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xd9,0x04] 5543; CHECK-NEXT: vpcmpnltub %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xe1,0x05] 5544; CHECK-NEXT: vpcmpnleub %xmm1, %xmm0, %k5 # encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xe9,0x06] 5545; CHECK-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 5546; CHECK-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 5547; CHECK-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 5548; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 5549; CHECK-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 5550; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 5551; CHECK-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 5552; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 5553; CHECK-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 5554; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 5555; CHECK-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5] 5556; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 5557; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 5558; CHECK-NEXT: vpblendw $128, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80] 5559; CHECK-NEXT: # xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 5560; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5561 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1) 5562 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 5563 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 -1) 5564 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 5565 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 -1) 5566 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 5567 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 -1) 5568 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 5569 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 -1) 5570 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 5571 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 -1) 5572 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 5573 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 -1) 5574 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 5575 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 -1) 5576 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 5577 ret <8 x i16> %vec7 5578} 5579 5580define <8 x i16> @test_mask_ucmp_b_128(<16 x i8> %a0, <16 x i8> %a1, i16 %mask) { 5581; X86-LABEL: test_mask_ucmp_b_128: 5582; X86: # %bb.0: 5583; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5584; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 5585; X86-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1] 5586; X86-NEXT: vpcmpltub %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd1,0x01] 5587; X86-NEXT: vpcmpleub %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd9,0x02] 5588; X86-NEXT: vpcmpneqb %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x04] 5589; X86-NEXT: vpcmpnltub %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xe9,0x05] 5590; X86-NEXT: vpcmpnleub %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xc9,0x06] 5591; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca] 5592; X86-NEXT: kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0] 5593; X86-NEXT: vmovd %edx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc2] 5594; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01] 5595; X86-NEXT: kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb] 5596; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02] 5597; X86-NEXT: kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc] 5598; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04] 5599; X86-NEXT: kmovd %k5, %ecx # encoding: [0xc5,0xfb,0x93,0xcd] 5600; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x05] 5601; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9] 5602; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x06] 5603; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 5604; X86-NEXT: retl # encoding: [0xc3] 5605; 5606; X64-LABEL: test_mask_ucmp_b_128: 5607; X64: # %bb.0: 5608; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5609; X64-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1] 5610; X64-NEXT: vpcmpltub %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd1,0x01] 5611; X64-NEXT: vpcmpleub %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd9,0x02] 5612; X64-NEXT: vpcmpneqb %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x04] 5613; X64-NEXT: vpcmpnltub %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xe9,0x05] 5614; X64-NEXT: vpcmpnleub %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xc9,0x06] 5615; X64-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 5616; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 5617; X64-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 5618; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 5619; X64-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 5620; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 5621; X64-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 5622; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 5623; X64-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5] 5624; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 5625; X64-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 5626; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 5627; X64-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07] 5628; X64-NEXT: retq # encoding: [0xc3] 5629 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 %mask) 5630 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 5631 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 %mask) 5632 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 5633 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 %mask) 5634 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 5635 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 %mask) 5636 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 5637 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 %mask) 5638 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 5639 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 %mask) 5640 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 5641 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 %mask) 5642 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 5643 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 %mask) 5644 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 5645 ret <8 x i16> %vec7 5646} 5647 5648declare i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8>, <16 x i8>, i32, i16) nounwind readnone 5649 5650define <8 x i8> @test_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1) { 5651; CHECK-LABEL: test_cmp_w_128: 5652; CHECK: # %bb.0: 5653; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1] 5654; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 5655; CHECK-NEXT: vpcmpgtw %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x75,0x08,0x65,0xc0] 5656; CHECK-NEXT: vpcmplew %xmm1, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xc9,0x02] 5657; CHECK-NEXT: vpcmpneqw %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd1,0x04] 5658; CHECK-NEXT: vpcmpnltw %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd9,0x05] 5659; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k4 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xe1] 5660; CHECK-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 5661; CHECK-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0] 5662; CHECK-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] 5663; CHECK-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01] 5664; CHECK-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 5665; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 5666; CHECK-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 5667; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 5668; CHECK-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 5669; CHECK-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 5670; CHECK-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 5671; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 5672; CHECK-NEXT: movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00] 5673; CHECK-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 5674; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5675 %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1) 5676 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 5677 %res1 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 -1) 5678 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 5679 %res2 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 -1) 5680 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 5681 %res3 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 -1) 5682 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 5683 %res4 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 -1) 5684 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 5685 %res5 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 -1) 5686 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 5687 %res6 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 -1) 5688 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 5689 %res7 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 -1) 5690 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 5691 ret <8 x i8> %vec7 5692} 5693 5694define <8 x i8> @test_mask_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) { 5695; X86-LABEL: test_mask_cmp_w_128: 5696; X86: # %bb.0: 5697; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5698; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 5699; X86-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1] 5700; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 5701; X86-NEXT: vpcmpgtw %xmm0, %xmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x65,0xc0] 5702; X86-NEXT: vpcmplew %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd1,0x02] 5703; X86-NEXT: vpcmpneqw %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x04] 5704; X86-NEXT: vpcmpnltw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe1,0x05] 5705; X86-NEXT: vpcmpgtw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x65,0xc9] 5706; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 5707; X86-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9] 5708; X86-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 5709; X86-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x01] 5710; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca] 5711; X86-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x02] 5712; X86-NEXT: kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb] 5713; X86-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x04] 5714; X86-NEXT: kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc] 5715; X86-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x05] 5716; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9] 5717; X86-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06] 5718; X86-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 5719; X86-NEXT: retl # encoding: [0xc3] 5720; 5721; X64-LABEL: test_mask_cmp_w_128: 5722; X64: # %bb.0: 5723; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5724; X64-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1] 5725; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 5726; X64-NEXT: vpcmpgtw %xmm0, %xmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x65,0xc0] 5727; X64-NEXT: vpcmplew %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd1,0x02] 5728; X64-NEXT: vpcmpneqw %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x04] 5729; X64-NEXT: vpcmpnltw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe1,0x05] 5730; X64-NEXT: vpcmpgtw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x65,0xc9] 5731; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 5732; X64-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0] 5733; X64-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] 5734; X64-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01] 5735; X64-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 5736; X64-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 5737; X64-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 5738; X64-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 5739; X64-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 5740; X64-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 5741; X64-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 5742; X64-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 5743; X64-NEXT: vpinsrb $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x07] 5744; X64-NEXT: retq # encoding: [0xc3] 5745 %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask) 5746 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 5747 %res1 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 %mask) 5748 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 5749 %res2 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 %mask) 5750 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 5751 %res3 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 %mask) 5752 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 5753 %res4 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 %mask) 5754 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 5755 %res5 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 %mask) 5756 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 5757 %res6 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 %mask) 5758 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 5759 %res7 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 %mask) 5760 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 5761 ret <8 x i8> %vec7 5762} 5763 5764declare i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16>, <8 x i16>, i32, i8) nounwind readnone 5765 5766define <8 x i8> @test_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1) { 5767; CHECK-LABEL: test_ucmp_w_128: 5768; CHECK: # %bb.0: 5769; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1] 5770; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 5771; CHECK-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc1,0x01] 5772; CHECK-NEXT: vpcmpleuw %xmm1, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc9,0x02] 5773; CHECK-NEXT: vpcmpneqw %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd1,0x04] 5774; CHECK-NEXT: vpcmpnltuw %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xd9,0x05] 5775; CHECK-NEXT: vpcmpnleuw %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xe1,0x06] 5776; CHECK-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 5777; CHECK-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0] 5778; CHECK-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] 5779; CHECK-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01] 5780; CHECK-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 5781; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 5782; CHECK-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 5783; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 5784; CHECK-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 5785; CHECK-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 5786; CHECK-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 5787; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 5788; CHECK-NEXT: movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00] 5789; CHECK-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 5790; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5791 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1) 5792 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 5793 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 -1) 5794 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 5795 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 -1) 5796 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 5797 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 -1) 5798 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 5799 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 -1) 5800 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 5801 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 -1) 5802 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 5803 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 -1) 5804 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 5805 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 -1) 5806 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 5807 ret <8 x i8> %vec7 5808} 5809 5810define <8 x i8> @test_mask_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) { 5811; X86-LABEL: test_mask_ucmp_w_128: 5812; X86: # %bb.0: 5813; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5814; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 5815; X86-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1] 5816; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 5817; X86-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc1,0x01] 5818; X86-NEXT: vpcmpleuw %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xd1,0x02] 5819; X86-NEXT: vpcmpneqw %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x04] 5820; X86-NEXT: vpcmpnltuw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xe1,0x05] 5821; X86-NEXT: vpcmpnleuw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc9,0x06] 5822; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 5823; X86-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9] 5824; X86-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 5825; X86-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x01] 5826; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca] 5827; X86-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x02] 5828; X86-NEXT: kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb] 5829; X86-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x04] 5830; X86-NEXT: kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc] 5831; X86-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x05] 5832; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9] 5833; X86-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06] 5834; X86-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 5835; X86-NEXT: retl # encoding: [0xc3] 5836; 5837; X64-LABEL: test_mask_ucmp_w_128: 5838; X64: # %bb.0: 5839; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5840; X64-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1] 5841; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 5842; X64-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc1,0x01] 5843; X64-NEXT: vpcmpleuw %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xd1,0x02] 5844; X64-NEXT: vpcmpneqw %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x04] 5845; X64-NEXT: vpcmpnltuw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xe1,0x05] 5846; X64-NEXT: vpcmpnleuw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc9,0x06] 5847; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 5848; X64-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0] 5849; X64-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] 5850; X64-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01] 5851; X64-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 5852; X64-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 5853; X64-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 5854; X64-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 5855; X64-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 5856; X64-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 5857; X64-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 5858; X64-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 5859; X64-NEXT: vpinsrb $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x07] 5860; X64-NEXT: retq # encoding: [0xc3] 5861 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask) 5862 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 5863 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 %mask) 5864 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 5865 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 %mask) 5866 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 5867 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 %mask) 5868 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 5869 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 %mask) 5870 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 5871 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 %mask) 5872 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 5873 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 %mask) 5874 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 5875 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 %mask) 5876 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 5877 ret <8 x i8> %vec7 5878} 5879 5880declare i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16>, <8 x i16>, i32, i8) nounwind readnone 5881 5882define <16 x i8>@mm_avg_epu8(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) { 5883; CHECK-LABEL: mm_avg_epu8: 5884; CHECK: # %bb.0: 5885; CHECK-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe0,0xc1] 5886; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5887 %res = call <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) 5888 ret <16 x i8> %res 5889} 5890 5891define <16 x i8>@mm_mask_avg_epu8(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { 5892; X86-LABEL: mm_mask_avg_epu8: 5893; X86: # %bb.0: 5894; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 5895; X86-NEXT: vpavgb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe0,0xd1] 5896; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 5897; X86-NEXT: retl # encoding: [0xc3] 5898; 5899; X64-LABEL: mm_mask_avg_epu8: 5900; X64: # %bb.0: 5901; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5902; X64-NEXT: vpavgb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe0,0xd1] 5903; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 5904; X64-NEXT: retq # encoding: [0xc3] 5905 %res = call <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) 5906 ret <16 x i8> %res 5907} 5908 5909declare <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8>, <16 x i8>, i16) 5910 5911define <16 x i8>@test_int_x86_avx512_pabs_b_128(<16 x i8> %x0, <16 x i8> %x1) { 5912; CHECK-LABEL: test_int_x86_avx512_pabs_b_128: 5913; CHECK: # %bb.0: 5914; CHECK-NEXT: vpabsb %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1c,0xc0] 5915; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5916 %res = call <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1) 5917 ret <16 x i8> %res 5918} 5919 5920define <16 x i8>@test_int_x86_avx512_mask_pabs_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) { 5921; X86-LABEL: test_int_x86_avx512_mask_pabs_b_128: 5922; X86: # %bb.0: 5923; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 5924; X86-NEXT: vpabsb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1c,0xc8] 5925; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 5926; X86-NEXT: retl # encoding: [0xc3] 5927; 5928; X64-LABEL: test_int_x86_avx512_mask_pabs_b_128: 5929; X64: # %bb.0: 5930; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5931; X64-NEXT: vpabsb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1c,0xc8] 5932; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 5933; X64-NEXT: retq # encoding: [0xc3] 5934 %res = call <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) 5935 ret <16 x i8> %res 5936} 5937 5938declare <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 5939 5940define <32 x i8>@mm256_avg_epu8(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 5941; CHECK-LABEL: mm256_avg_epu8: 5942; CHECK: # %bb.0: 5943; CHECK-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe0,0xc1] 5944; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5945 %res = call <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 5946 ret <32 x i8> %res 5947} 5948 5949define <32 x i8>@mm256_mask_avg_epu8(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 5950; X86-LABEL: mm256_mask_avg_epu8: 5951; X86: # %bb.0: 5952; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 5953; X86-NEXT: vpavgb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe0,0xd1] 5954; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 5955; X86-NEXT: retl # encoding: [0xc3] 5956; 5957; X64-LABEL: mm256_mask_avg_epu8: 5958; X64: # %bb.0: 5959; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5960; X64-NEXT: vpavgb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe0,0xd1] 5961; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 5962; X64-NEXT: retq # encoding: [0xc3] 5963 %res = call <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 5964 ret <32 x i8> %res 5965} 5966 5967declare <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8>, <32 x i8>, i32) 5968 5969define <32 x i8>@test_int_x86_avx512_pabs_b_256(<32 x i8> %x0, <32 x i8> %x1) { 5970; CHECK-LABEL: test_int_x86_avx512_pabs_b_256: 5971; CHECK: # %bb.0: 5972; CHECK-NEXT: vpabsb %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1c,0xc0] 5973; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5974 %res = call <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 -1) 5975 ret <32 x i8> %res 5976} 5977 5978define <32 x i8>@test_int_x86_avx512_mask_pabs_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) { 5979; X86-LABEL: test_int_x86_avx512_mask_pabs_b_256: 5980; X86: # %bb.0: 5981; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 5982; X86-NEXT: vpabsb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1c,0xc8] 5983; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 5984; X86-NEXT: retl # encoding: [0xc3] 5985; 5986; X64-LABEL: test_int_x86_avx512_mask_pabs_b_256: 5987; X64: # %bb.0: 5988; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5989; X64-NEXT: vpabsb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1c,0xc8] 5990; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 5991; X64-NEXT: retq # encoding: [0xc3] 5992 %res = call <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) 5993 ret <32 x i8> %res 5994} 5995 5996declare <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 5997 5998define <8 x i16>@mm_avg_epu16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 5999; CHECK-LABEL: mm_avg_epu16: 6000; CHECK: # %bb.0: 6001; CHECK-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe3,0xc1] 6002; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6003 %res = call <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 6004 ret <8 x i16> %res 6005} 6006 6007define <8 x i16>@mm_mask_avg_epu16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 6008; X86-LABEL: mm_mask_avg_epu16: 6009; X86: # %bb.0: 6010; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6011; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6012; X86-NEXT: vpavgw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe3,0xd1] 6013; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6014; X86-NEXT: retl # encoding: [0xc3] 6015; 6016; X64-LABEL: mm_mask_avg_epu16: 6017; X64: # %bb.0: 6018; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6019; X64-NEXT: vpavgw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe3,0xd1] 6020; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6021; X64-NEXT: retq # encoding: [0xc3] 6022 %res = call <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 6023 ret <8 x i16> %res 6024} 6025 6026declare <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16>, <8 x i16>, i8) 6027 6028define <8 x i16>@test_int_x86_avx512_pabs_w_128(<8 x i16> %x0, <8 x i16> %x1) { 6029; CHECK-LABEL: test_int_x86_avx512_pabs_w_128: 6030; CHECK: # %bb.0: 6031; CHECK-NEXT: vpabsw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1d,0xc0] 6032; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6033 %res = call <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 -1) 6034 ret <8 x i16> %res 6035} 6036 6037define <8 x i16>@test_int_x86_avx512_mask_pabs_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) { 6038; X86-LABEL: test_int_x86_avx512_mask_pabs_w_128: 6039; X86: # %bb.0: 6040; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6041; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6042; X86-NEXT: vpabsw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1d,0xc8] 6043; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 6044; X86-NEXT: retl # encoding: [0xc3] 6045; 6046; X64-LABEL: test_int_x86_avx512_mask_pabs_w_128: 6047; X64: # %bb.0: 6048; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6049; X64-NEXT: vpabsw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1d,0xc8] 6050; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 6051; X64-NEXT: retq # encoding: [0xc3] 6052 %res = call <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) 6053 ret <8 x i16> %res 6054} 6055 6056declare <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 6057 6058define <16 x i16>@mm256_avg_epu16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) { 6059; CHECK-LABEL: mm256_avg_epu16: 6060; CHECK: # %bb.0: 6061; CHECK-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe3,0xc1] 6062; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6063 %res = call <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 6064 ret <16 x i16> %res 6065} 6066 6067define <16 x i16>@mm256_mask_avg_epu16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 6068; X86-LABEL: mm256_mask_avg_epu16: 6069; X86: # %bb.0: 6070; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6071; X86-NEXT: vpavgw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe3,0xd1] 6072; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6073; X86-NEXT: retl # encoding: [0xc3] 6074; 6075; X64-LABEL: mm256_mask_avg_epu16: 6076; X64: # %bb.0: 6077; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6078; X64-NEXT: vpavgw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe3,0xd1] 6079; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6080; X64-NEXT: retq # encoding: [0xc3] 6081 %res = call <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 6082 ret <16 x i16> %res 6083} 6084 6085declare <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16>, <16 x i16>, i16) 6086 6087define <16 x i16>@test_int_x86_avx512_pabs_w_256(<16 x i16> %x0, <16 x i16> %x1) { 6088; CHECK-LABEL: test_int_x86_avx512_pabs_w_256: 6089; CHECK: # %bb.0: 6090; CHECK-NEXT: vpabsw %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1d,0xc0] 6091; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6092 %res = call <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 -1) 6093 ret <16 x i16> %res 6094} 6095 6096define <16 x i16>@test_int_x86_avx512_mask_pabs_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) { 6097; X86-LABEL: test_int_x86_avx512_mask_pabs_w_256: 6098; X86: # %bb.0: 6099; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6100; X86-NEXT: vpabsw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1d,0xc8] 6101; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 6102; X86-NEXT: retl # encoding: [0xc3] 6103; 6104; X64-LABEL: test_int_x86_avx512_mask_pabs_w_256: 6105; X64: # %bb.0: 6106; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6107; X64-NEXT: vpabsw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1d,0xc8] 6108; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 6109; X64-NEXT: retq # encoding: [0xc3] 6110 %res = call <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) 6111 ret <16 x i16> %res 6112} 6113 6114declare <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 6115 6116declare i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8>, <16 x i8>, i16) 6117 6118define i16@test_int_x86_avx512_ptestm_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) { 6119; X86-LABEL: test_int_x86_avx512_ptestm_b_128: 6120; X86: # %bb.0: 6121; X86-NEXT: vptestmb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x08,0x26,0xc1] 6122; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 6123; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 6124; X86-NEXT: andw %cx, %ax # encoding: [0x66,0x21,0xc8] 6125; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 6126; X86-NEXT: # kill: def $ax killed $ax killed $eax 6127; X86-NEXT: retl # encoding: [0xc3] 6128; 6129; X64-LABEL: test_int_x86_avx512_ptestm_b_128: 6130; X64: # %bb.0: 6131; X64-NEXT: vptestmb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x08,0x26,0xc1] 6132; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 6133; X64-NEXT: andl %eax, %edi # encoding: [0x21,0xc7] 6134; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] 6135; X64-NEXT: # kill: def $ax killed $ax killed $eax 6136; X64-NEXT: retq # encoding: [0xc3] 6137 %res = call i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) 6138 %res1 = call i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16-1) 6139 %res2 = add i16 %res, %res1 6140 ret i16 %res2 6141} 6142 6143declare i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8>, <32 x i8>, i32) 6144 6145define i32@test_int_x86_avx512_ptestm_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) { 6146; X86-LABEL: test_int_x86_avx512_ptestm_b_256: 6147; X86: # %bb.0: 6148; X86-NEXT: vptestmb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7d,0x28,0x26,0xc1] 6149; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 6150; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 6151; X86-NEXT: andl %ecx, %eax # encoding: [0x21,0xc8] 6152; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 6153; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 6154; X86-NEXT: retl # encoding: [0xc3] 6155; 6156; X64-LABEL: test_int_x86_avx512_ptestm_b_256: 6157; X64: # %bb.0: 6158; X64-NEXT: vptestmb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7d,0x28,0x26,0xc1] 6159; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 6160; X64-NEXT: andl %eax, %edi # encoding: [0x21,0xc7] 6161; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] 6162; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 6163; X64-NEXT: retq # encoding: [0xc3] 6164 %res = call i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) 6165 %res1 = call i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32-1) 6166 %res2 = add i32 %res, %res1 6167 ret i32 %res2 6168} 6169 6170declare i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16>, <8 x i16>, i8) 6171 6172define i8@test_int_x86_avx512_ptestm_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) { 6173; X86-LABEL: test_int_x86_avx512_ptestm_w_128: 6174; X86: # %bb.0: 6175; X86-NEXT: vptestmw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x26,0xc1] 6176; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 6177; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6178; X86-NEXT: andb %cl, %al # encoding: [0x20,0xc8] 6179; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8] 6180; X86-NEXT: retl # encoding: [0xc3] 6181; 6182; X64-LABEL: test_int_x86_avx512_ptestm_w_128: 6183; X64: # %bb.0: 6184; X64-NEXT: vptestmw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x26,0xc1] 6185; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 6186; X64-NEXT: andb %al, %dil # encoding: [0x40,0x20,0xc7] 6187; X64-NEXT: addb %dil, %al # encoding: [0x40,0x00,0xf8] 6188; X64-NEXT: # kill: def $al killed $al killed $eax 6189; X64-NEXT: retq # encoding: [0xc3] 6190 %res = call i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) 6191 %res1 = call i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8-1) 6192 %res2 = add i8 %res, %res1 6193 ret i8 %res2 6194} 6195 6196declare i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16>, <16 x i16>, i16) 6197 6198define i16@test_int_x86_avx512_ptestm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) { 6199; X86-LABEL: test_int_x86_avx512_ptestm_w_256: 6200; X86: # %bb.0: 6201; X86-NEXT: vptestmw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x26,0xc1] 6202; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 6203; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 6204; X86-NEXT: andw %cx, %ax # encoding: [0x66,0x21,0xc8] 6205; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 6206; X86-NEXT: # kill: def $ax killed $ax killed $eax 6207; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 6208; X86-NEXT: retl # encoding: [0xc3] 6209; 6210; X64-LABEL: test_int_x86_avx512_ptestm_w_256: 6211; X64: # %bb.0: 6212; X64-NEXT: vptestmw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x26,0xc1] 6213; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 6214; X64-NEXT: andl %eax, %edi # encoding: [0x21,0xc7] 6215; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] 6216; X64-NEXT: # kill: def $ax killed $ax killed $eax 6217; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 6218; X64-NEXT: retq # encoding: [0xc3] 6219 %res = call i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) 6220 %res1 = call i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16-1) 6221 %res2 = add i16 %res, %res1 6222 ret i16 %res2 6223} 6224 6225declare i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8>, <16 x i8>, i16) 6226 6227define i16@test_int_x86_avx512_ptestnm_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) { 6228; X86-LABEL: test_int_x86_avx512_ptestnm_b_128: 6229; X86: # %bb.0: 6230; X86-NEXT: vptestnmb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x26,0xc1] 6231; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 6232; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 6233; X86-NEXT: andw %cx, %ax # encoding: [0x66,0x21,0xc8] 6234; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 6235; X86-NEXT: # kill: def $ax killed $ax killed $eax 6236; X86-NEXT: retl # encoding: [0xc3] 6237; 6238; X64-LABEL: test_int_x86_avx512_ptestnm_b_128: 6239; X64: # %bb.0: 6240; X64-NEXT: vptestnmb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x26,0xc1] 6241; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 6242; X64-NEXT: andl %eax, %edi # encoding: [0x21,0xc7] 6243; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] 6244; X64-NEXT: # kill: def $ax killed $ax killed $eax 6245; X64-NEXT: retq # encoding: [0xc3] 6246 %res = call i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) 6247 %res1 = call i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16-1) 6248 %res2 = add i16 %res, %res1 6249 ret i16 %res2 6250} 6251 6252declare i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8>, <32 x i8>, i32) 6253 6254define i32@test_int_x86_avx512_ptestnm_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) { 6255; X86-LABEL: test_int_x86_avx512_ptestnm_b_256: 6256; X86: # %bb.0: 6257; X86-NEXT: vptestnmb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x26,0xc1] 6258; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 6259; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 6260; X86-NEXT: andl %ecx, %eax # encoding: [0x21,0xc8] 6261; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 6262; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 6263; X86-NEXT: retl # encoding: [0xc3] 6264; 6265; X64-LABEL: test_int_x86_avx512_ptestnm_b_256: 6266; X64: # %bb.0: 6267; X64-NEXT: vptestnmb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x26,0xc1] 6268; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 6269; X64-NEXT: andl %eax, %edi # encoding: [0x21,0xc7] 6270; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] 6271; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 6272; X64-NEXT: retq # encoding: [0xc3] 6273 %res = call i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) 6274 %res1 = call i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32-1) 6275 %res2 = add i32 %res, %res1 6276 ret i32 %res2 6277} 6278 6279declare i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16>, <8 x i16>, i8 %x2) 6280 6281define i8@test_int_x86_avx512_ptestnm_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) { 6282; X86-LABEL: test_int_x86_avx512_ptestnm_w_128: 6283; X86: # %bb.0: 6284; X86-NEXT: vptestnmw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x26,0xc1] 6285; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 6286; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6287; X86-NEXT: andb %cl, %al # encoding: [0x20,0xc8] 6288; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8] 6289; X86-NEXT: retl # encoding: [0xc3] 6290; 6291; X64-LABEL: test_int_x86_avx512_ptestnm_w_128: 6292; X64: # %bb.0: 6293; X64-NEXT: vptestnmw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x26,0xc1] 6294; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 6295; X64-NEXT: andb %al, %dil # encoding: [0x40,0x20,0xc7] 6296; X64-NEXT: addb %dil, %al # encoding: [0x40,0x00,0xf8] 6297; X64-NEXT: # kill: def $al killed $al killed $eax 6298; X64-NEXT: retq # encoding: [0xc3] 6299 %res = call i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) 6300 %res1 = call i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8-1) 6301 %res2 = add i8 %res, %res1 6302 ret i8 %res2 6303} 6304 6305declare i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16>, <16 x i16>, i16 %x2) 6306 6307define i16@test_int_x86_avx512_ptestnm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) { 6308; X86-LABEL: test_int_x86_avx512_ptestnm_w_256: 6309; X86: # %bb.0: 6310; X86-NEXT: vptestnmw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x26,0xc1] 6311; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 6312; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 6313; X86-NEXT: andw %cx, %ax # encoding: [0x66,0x21,0xc8] 6314; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 6315; X86-NEXT: # kill: def $ax killed $ax killed $eax 6316; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 6317; X86-NEXT: retl # encoding: [0xc3] 6318; 6319; X64-LABEL: test_int_x86_avx512_ptestnm_w_256: 6320; X64: # %bb.0: 6321; X64-NEXT: vptestnmw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x26,0xc1] 6322; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 6323; X64-NEXT: andl %eax, %edi # encoding: [0x21,0xc7] 6324; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] 6325; X64-NEXT: # kill: def $ax killed $ax killed $eax 6326; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 6327; X64-NEXT: retq # encoding: [0xc3] 6328 %res = call i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) 6329 %res1 = call i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16-1) 6330 %res2 = add i16 %res, %res1 6331 ret i16 %res2 6332} 6333 6334declare i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8>) 6335 6336define i16@test_int_x86_avx512_cvtb2mask_128(<16 x i8> %x0) { 6337; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_128: 6338; CHECK: # %bb.0: 6339; CHECK-NEXT: vpmovmskb %xmm0, %eax # encoding: [0xc5,0xf9,0xd7,0xc0] 6340; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 6341; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6342 %res = call i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8> %x0) 6343 ret i16 %res 6344} 6345 6346declare i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8>) 6347 6348define i32@test_int_x86_avx512_cvtb2mask_256(<32 x i8> %x0) { 6349; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_256: 6350; CHECK: # %bb.0: 6351; CHECK-NEXT: vpmovmskb %ymm0, %eax # encoding: [0xc5,0xfd,0xd7,0xc0] 6352; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 6353; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6354 %res = call i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8> %x0) 6355 ret i32 %res 6356} 6357 6358declare i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16>) 6359 6360define i8@test_int_x86_avx512_cvtw2mask_128(<8 x i16> %x0) { 6361; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_128: 6362; CHECK: # %bb.0: 6363; CHECK-NEXT: vpmovw2m %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x29,0xc0] 6364; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 6365; CHECK-NEXT: # kill: def $al killed $al killed $eax 6366; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6367 %res = call i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16> %x0) 6368 ret i8 %res 6369} 6370 6371declare i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16>) 6372 6373define i16@test_int_x86_avx512_cvtw2mask_256(<16 x i16> %x0) { 6374; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_256: 6375; CHECK: # %bb.0: 6376; CHECK-NEXT: vpmovw2m %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x29,0xc0] 6377; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 6378; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 6379; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 6380; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6381 %res = call i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16> %x0) 6382 ret i16 %res 6383} 6384 6385declare <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 6386 6387define <8 x i16>@test_int_x86_avx512_pmulhu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 6388; CHECK-LABEL: test_int_x86_avx512_pmulhu_w_128: 6389; CHECK: # %bb.0: 6390; CHECK-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe4,0xc1] 6391; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6392 %res = call <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 6393 ret <8 x i16> %res 6394} 6395 6396define <8 x i16>@test_int_x86_avx512_mask_pmulhu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 6397; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_128: 6398; X86: # %bb.0: 6399; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6400; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6401; X86-NEXT: vpmulhuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe4,0xd1] 6402; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6403; X86-NEXT: retl # encoding: [0xc3] 6404; 6405; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_128: 6406; X64: # %bb.0: 6407; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6408; X64-NEXT: vpmulhuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe4,0xd1] 6409; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6410; X64-NEXT: retq # encoding: [0xc3] 6411 %res = call <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 6412 ret <8 x i16> %res 6413} 6414 6415declare <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 6416 6417define <16 x i16>@test_int_x86_avx512_pmulhu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) { 6418; CHECK-LABEL: test_int_x86_avx512_pmulhu_w_256: 6419; CHECK: # %bb.0: 6420; CHECK-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe4,0xc1] 6421; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6422 %res = call <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 6423 ret <16 x i16> %res 6424} 6425 6426define <16 x i16>@test_int_x86_avx512_mask_pmulhu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 6427; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_256: 6428; X86: # %bb.0: 6429; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6430; X86-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe4,0xd1] 6431; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6432; X86-NEXT: retl # encoding: [0xc3] 6433; 6434; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_256: 6435; X64: # %bb.0: 6436; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6437; X64-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe4,0xd1] 6438; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6439; X64-NEXT: retq # encoding: [0xc3] 6440 %res = call <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 6441 ret <16 x i16> %res 6442} 6443 6444declare <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 6445 6446define <8 x i16>@test_int_x86_avx512_pmulh_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 6447; CHECK-LABEL: test_int_x86_avx512_pmulh_w_128: 6448; CHECK: # %bb.0: 6449; CHECK-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe5,0xc1] 6450; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6451 %res = call <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 6452 ret <8 x i16> %res 6453} 6454 6455define <8 x i16>@test_int_x86_avx512_mask_pmulh_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 6456; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_128: 6457; X86: # %bb.0: 6458; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6459; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6460; X86-NEXT: vpmulhw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe5,0xd1] 6461; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6462; X86-NEXT: retl # encoding: [0xc3] 6463; 6464; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_128: 6465; X64: # %bb.0: 6466; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6467; X64-NEXT: vpmulhw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe5,0xd1] 6468; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6469; X64-NEXT: retq # encoding: [0xc3] 6470 %res = call <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 6471 ret <8 x i16> %res 6472} 6473 6474declare <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 6475 6476define <16 x i16>@test_int_x86_avx512_pmulh_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) { 6477; CHECK-LABEL: test_int_x86_avx512_pmulh_w_256: 6478; CHECK: # %bb.0: 6479; CHECK-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe5,0xc1] 6480; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6481 %res = call <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 6482 ret <16 x i16> %res 6483} 6484 6485define <16 x i16>@test_int_x86_avx512_mask_pmulh_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 6486; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_256: 6487; X86: # %bb.0: 6488; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6489; X86-NEXT: vpmulhw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe5,0xd1] 6490; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6491; X86-NEXT: retl # encoding: [0xc3] 6492; 6493; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_256: 6494; X64: # %bb.0: 6495; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6496; X64-NEXT: vpmulhw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe5,0xd1] 6497; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6498; X64-NEXT: retq # encoding: [0xc3] 6499 %res = call <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 6500 ret <16 x i16> %res 6501} 6502 6503declare <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 6504 6505define <8 x i16>@test_int_x86_avx512_pmulhr_sw_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 6506; CHECK-LABEL: test_int_x86_avx512_pmulhr_sw_128: 6507; CHECK: # %bb.0: 6508; CHECK-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0b,0xc1] 6509; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6510 %res = call <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 6511 ret <8 x i16> %res 6512} 6513 6514define <8 x i16>@test_int_x86_avx512_mask_pmulhr_sw_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 6515; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_128: 6516; X86: # %bb.0: 6517; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6518; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6519; X86-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0b,0xd1] 6520; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6521; X86-NEXT: retl # encoding: [0xc3] 6522; 6523; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_128: 6524; X64: # %bb.0: 6525; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6526; X64-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0b,0xd1] 6527; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6528; X64-NEXT: retq # encoding: [0xc3] 6529 %res = call <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 6530 ret <8 x i16> %res 6531} 6532 6533declare <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 6534 6535define <16 x i16>@test_int_x86_avx512_pmulhr_sw_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) { 6536; CHECK-LABEL: test_int_x86_avx512_pmulhr_sw_256: 6537; CHECK: # %bb.0: 6538; CHECK-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0b,0xc1] 6539; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6540 %res = call <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 6541 ret <16 x i16> %res 6542} 6543 6544define <16 x i16>@test_int_x86_avx512_mask_pmulhr_sw_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 6545; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_256: 6546; X86: # %bb.0: 6547; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6548; X86-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0b,0xd1] 6549; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6550; X86-NEXT: retl # encoding: [0xc3] 6551; 6552; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_256: 6553; X64: # %bb.0: 6554; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6555; X64-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0b,0xd1] 6556; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6557; X64-NEXT: retq # encoding: [0xc3] 6558 %res = call <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 6559 ret <16 x i16> %res 6560} 6561 6562declare <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8>, <16 x i8>, <8 x i16>, i8) 6563 6564define <8 x i16>@test_int_x86_avx512_ask_pmaddubs_w_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2) { 6565; CHECK-LABEL: test_int_x86_avx512_ask_pmaddubs_w_128: 6566; CHECK: # %bb.0: 6567; CHECK-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x04,0xc1] 6568; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6569 %res = call <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 -1) 6570 ret <8 x i16> %res 6571} 6572 6573define <8 x i16>@test_int_x86_avx512_mask_pmaddubs_w_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3) { 6574; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128: 6575; X86: # %bb.0: 6576; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6577; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6578; X86-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x04,0xd1] 6579; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6580; X86-NEXT: retl # encoding: [0xc3] 6581; 6582; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128: 6583; X64: # %bb.0: 6584; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6585; X64-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x04,0xd1] 6586; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6587; X64-NEXT: retq # encoding: [0xc3] 6588 %res = call <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3) 6589 ret <8 x i16> %res 6590} 6591 6592declare <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8>, <32 x i8>, <16 x i16>, i16) 6593 6594define <16 x i16>@test_int_x86_avx512_pmaddubs_w_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2) { 6595; CHECK-LABEL: test_int_x86_avx512_pmaddubs_w_256: 6596; CHECK: # %bb.0: 6597; CHECK-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x04,0xc1] 6598; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6599 %res = call <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 -1) 6600 ret <16 x i16> %res 6601} 6602 6603define <16 x i16>@test_int_x86_avx512_mask_pmaddubs_w_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3) { 6604; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256: 6605; X86: # %bb.0: 6606; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6607; X86-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x04,0xd1] 6608; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6609; X86-NEXT: retl # encoding: [0xc3] 6610; 6611; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256: 6612; X64: # %bb.0: 6613; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6614; X64-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x04,0xd1] 6615; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6616; X64-NEXT: retq # encoding: [0xc3] 6617 %res = call <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3) 6618 ret <16 x i16> %res 6619} 6620 6621declare <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16>, <8 x i16>, <4 x i32>, i8) 6622 6623define <4 x i32>@test_int_x86_avx512_pmaddw_d_128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2) { 6624; CHECK-LABEL: test_int_x86_avx512_pmaddw_d_128: 6625; CHECK: # %bb.0: 6626; CHECK-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf5,0xc1] 6627; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6628 %res = call <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 -1) 6629 ret <4 x i32> %res 6630} 6631 6632define <4 x i32>@test_int_x86_avx512_mask_pmaddw_d_128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) { 6633; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_128: 6634; X86: # %bb.0: 6635; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6636; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6637; X86-NEXT: vpmaddwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf5,0xd1] 6638; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6639; X86-NEXT: retl # encoding: [0xc3] 6640; 6641; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_128: 6642; X64: # %bb.0: 6643; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6644; X64-NEXT: vpmaddwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf5,0xd1] 6645; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6646; X64-NEXT: retq # encoding: [0xc3] 6647 %res = call <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) 6648 ret <4 x i32> %res 6649} 6650 6651declare <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16>, <16 x i16>, <8 x i32>, i8) 6652 6653define <8 x i32>@test_int_x86_avx512_pmaddw_d_256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2) { 6654; CHECK-LABEL: test_int_x86_avx512_pmaddw_d_256: 6655; CHECK: # %bb.0: 6656; CHECK-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf5,0xc1] 6657; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6658 %res = call <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 -1) 6659 ret <8 x i32> %res 6660} 6661 6662define <8 x i32>@test_int_x86_avx512_mask_pmaddw_d_256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3) { 6663; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_256: 6664; X86: # %bb.0: 6665; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6666; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6667; X86-NEXT: vpmaddwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf5,0xd1] 6668; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6669; X86-NEXT: retl # encoding: [0xc3] 6670; 6671; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_256: 6672; X64: # %bb.0: 6673; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6674; X64-NEXT: vpmaddwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf5,0xd1] 6675; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6676; X64-NEXT: retq # encoding: [0xc3] 6677 %res = call <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3) 6678 ret <8 x i32> %res 6679} 6680 6681declare <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 6682 6683define <8 x i16>@test_int_x86_avx512_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 6684; CHECK-LABEL: test_int_x86_avx512_permvar_hi_128: 6685; CHECK: # %bb.0: 6686; CHECK-NEXT: vpermw %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x8d,0xc0] 6687; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6688 %res = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 6689 ret <8 x i16> %res 6690} 6691 6692define <8 x i16>@test_int_x86_avx512_mask_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 6693; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_128: 6694; X86: # %bb.0: 6695; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6696; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6697; X86-NEXT: vpermw %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x8d,0xd0] 6698; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6699; X86-NEXT: retl # encoding: [0xc3] 6700; 6701; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_128: 6702; X64: # %bb.0: 6703; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6704; X64-NEXT: vpermw %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x8d,0xd0] 6705; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6706; X64-NEXT: retq # encoding: [0xc3] 6707 %res = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 6708 ret <8 x i16> %res 6709} 6710 6711define <8 x i16>@test_int_x86_avx512_maskz_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) { 6712; X86-LABEL: test_int_x86_avx512_maskz_permvar_hi_128: 6713; X86: # %bb.0: 6714; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6715; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6716; X86-NEXT: vpermw %xmm0, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xc0] 6717; X86-NEXT: retl # encoding: [0xc3] 6718; 6719; X64-LABEL: test_int_x86_avx512_maskz_permvar_hi_128: 6720; X64: # %bb.0: 6721; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6722; X64-NEXT: vpermw %xmm0, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xc0] 6723; X64-NEXT: retq # encoding: [0xc3] 6724 %res = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) 6725 ret <8 x i16> %res 6726} 6727 6728declare <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 6729 6730define <16 x i16>@test_int_x86_avx512_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) { 6731; CHECK-LABEL: test_int_x86_avx512_permvar_hi_256: 6732; CHECK: # %bb.0: 6733; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x8d,0xc0] 6734; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6735 %res = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 6736 ret <16 x i16> %res 6737} 6738 6739define <16 x i16>@test_int_x86_avx512_mask_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 6740; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_256: 6741; X86: # %bb.0: 6742; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6743; X86-NEXT: vpermw %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x8d,0xd0] 6744; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6745; X86-NEXT: retl # encoding: [0xc3] 6746; 6747; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_256: 6748; X64: # %bb.0: 6749; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6750; X64-NEXT: vpermw %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x8d,0xd0] 6751; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6752; X64-NEXT: retq # encoding: [0xc3] 6753 %res = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 6754 ret <16 x i16> %res 6755} 6756 6757define <16 x i16>@test_int_x86_avx512_maskz_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) { 6758; X86-LABEL: test_int_x86_avx512_maskz_permvar_hi_256: 6759; X86: # %bb.0: 6760; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6761; X86-NEXT: vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xc0] 6762; X86-NEXT: retl # encoding: [0xc3] 6763; 6764; X64-LABEL: test_int_x86_avx512_maskz_permvar_hi_256: 6765; X64: # %bb.0: 6766; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6767; X64-NEXT: vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xc0] 6768; X64-NEXT: retq # encoding: [0xc3] 6769 %res = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) 6770 ret <16 x i16> %res 6771} 6772 6773declare <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 6774 6775define <8 x i16>@test_int_x86_avx512_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 6776; CHECK-LABEL: test_int_x86_avx512_vpermt2var_hi_128: 6777; CHECK: # %bb.0: 6778; CHECK-NEXT: vpermi2w %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x75,0xc2] 6779; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6780 %res = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 6781 ret <8 x i16> %res 6782} 6783 6784define <8 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 6785; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_128: 6786; X86: # %bb.0: 6787; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6788; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6789; X86-NEXT: vpermt2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7d,0xca] 6790; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 6791; X86-NEXT: retl # encoding: [0xc3] 6792; 6793; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_128: 6794; X64: # %bb.0: 6795; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6796; X64-NEXT: vpermt2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7d,0xca] 6797; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 6798; X64-NEXT: retq # encoding: [0xc3] 6799 %res = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 6800 ret <8 x i16> %res 6801} 6802 6803declare <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 6804 6805define <8 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 6806; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_128: 6807; X86: # %bb.0: 6808; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6809; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6810; X86-NEXT: vpermi2w %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x75,0xc2] 6811; X86-NEXT: retl # encoding: [0xc3] 6812; 6813; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_128: 6814; X64: # %bb.0: 6815; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6816; X64-NEXT: vpermi2w %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x75,0xc2] 6817; X64-NEXT: retq # encoding: [0xc3] 6818 %res = call <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 6819 ret <8 x i16> %res 6820} 6821 6822declare <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 6823 6824define <16 x i16>@test_int_x86_avx512_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) { 6825; CHECK-LABEL: test_int_x86_avx512_vpermt2var_hi_256: 6826; CHECK: # %bb.0: 6827; CHECK-NEXT: vpermi2w %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x75,0xc2] 6828; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6829 %res = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 6830 ret <16 x i16> %res 6831} 6832 6833define <16 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 6834; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_256: 6835; X86: # %bb.0: 6836; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6837; X86-NEXT: vpermt2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7d,0xca] 6838; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 6839; X86-NEXT: retl # encoding: [0xc3] 6840; 6841; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_256: 6842; X64: # %bb.0: 6843; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6844; X64-NEXT: vpermt2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7d,0xca] 6845; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 6846; X64-NEXT: retq # encoding: [0xc3] 6847 %res = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 6848 ret <16 x i16> %res 6849} 6850 6851declare <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 6852 6853define <16 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 6854; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_256: 6855; X86: # %bb.0: 6856; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6857; X86-NEXT: vpermi2w %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x75,0xc2] 6858; X86-NEXT: retl # encoding: [0xc3] 6859; 6860; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_256: 6861; X64: # %bb.0: 6862; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6863; X64-NEXT: vpermi2w %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x75,0xc2] 6864; X64-NEXT: retq # encoding: [0xc3] 6865 %res = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 6866 ret <16 x i16> %res 6867} 6868 6869declare <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 6870 6871define <8 x i16>@test_int_x86_avx512_vpermi2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 6872; CHECK-LABEL: test_int_x86_avx512_vpermi2var_hi_128: 6873; CHECK: # %bb.0: 6874; CHECK-NEXT: vpermt2w %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x7d,0xc2] 6875; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6876 %res = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 6877 ret <8 x i16> %res 6878} 6879 6880define <8 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 6881; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128: 6882; X86: # %bb.0: 6883; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6884; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6885; X86-NEXT: vpermi2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x75,0xca] 6886; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 6887; X86-NEXT: retl # encoding: [0xc3] 6888; 6889; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128: 6890; X64: # %bb.0: 6891; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6892; X64-NEXT: vpermi2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x75,0xca] 6893; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 6894; X64-NEXT: retq # encoding: [0xc3] 6895 %res = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 6896 ret <8 x i16> %res 6897} 6898 6899declare <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 6900 6901define <16 x i16>@test_int_x86_avx512_vpermi2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) { 6902; CHECK-LABEL: test_int_x86_avx512_vpermi2var_hi_256: 6903; CHECK: # %bb.0: 6904; CHECK-NEXT: vpermt2w %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x7d,0xc2] 6905; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6906 %res = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 6907 ret <16 x i16> %res 6908} 6909 6910define <16 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 6911; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256: 6912; X86: # %bb.0: 6913; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6914; X86-NEXT: vpermi2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x75,0xca] 6915; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 6916; X86-NEXT: retl # encoding: [0xc3] 6917; 6918; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256: 6919; X64: # %bb.0: 6920; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6921; X64-NEXT: vpermi2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x75,0xca] 6922; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 6923; X64-NEXT: retq # encoding: [0xc3] 6924 %res = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 6925 ret <16 x i16> %res 6926} 6927 6928declare <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8>, <16 x i8>, i32, <8 x i16>, i8) 6929 6930define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_dbpsadbw_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x3, i8 %x4) { 6931; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_128: 6932; X86: # %bb.0: 6933; X86-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2] 6934; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6935; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6936; X86-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xe1,0x02] 6937; X86-NEXT: vdbpsadbw $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x03] 6938; X86-NEXT: vdbpsadbw $4, %xmm1, %xmm0, %xmm2 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xd1,0x04] 6939; X86-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4] 6940; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 6941; X86-NEXT: retl # encoding: [0xc3] 6942; 6943; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_128: 6944; X64: # %bb.0: 6945; X64-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2] 6946; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6947; X64-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xe1,0x02] 6948; X64-NEXT: vdbpsadbw $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x03] 6949; X64-NEXT: vdbpsadbw $4, %xmm1, %xmm0, %xmm2 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xd1,0x04] 6950; X64-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4] 6951; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 6952; X64-NEXT: retq # encoding: [0xc3] 6953 %res0 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> %x3, i8 %x4) 6954 %res1 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 3, <8 x i16> zeroinitializer, i8 %x4) 6955 %res2 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 4, <8 x i16> %x3, i8 -1) 6956 %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0 6957 %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res3, <8 x i16> %res1, 1 6958 %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res4, <8 x i16> %res2, 2 6959 ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5 6960} 6961 6962declare <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8>, <32 x i8>, i32, <16 x i16>, i16) 6963 6964define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x3, i16 %x4) { 6965; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_256: 6966; X86: # %bb.0: 6967; X86-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2] 6968; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6969; X86-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xe1,0x02] 6970; X86-NEXT: vdbpsadbw $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x03] 6971; X86-NEXT: vdbpsadbw $4, %ymm1, %ymm0, %ymm2 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xd1,0x04] 6972; X86-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4] 6973; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 6974; X86-NEXT: retl # encoding: [0xc3] 6975; 6976; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_256: 6977; X64: # %bb.0: 6978; X64-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2] 6979; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6980; X64-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xe1,0x02] 6981; X64-NEXT: vdbpsadbw $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x03] 6982; X64-NEXT: vdbpsadbw $4, %ymm1, %ymm0, %ymm2 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xd1,0x04] 6983; X64-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4] 6984; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 6985; X64-NEXT: retq # encoding: [0xc3] 6986 %res = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> %x3, i16 %x4) 6987 %res1 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 3, <16 x i16> zeroinitializer, i16 %x4) 6988 %res2 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 4, <16 x i16> %x3, i16 -1) 6989 %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res, 0 6990 %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res3, <16 x i16> %res1, 1 6991 %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res4, <16 x i16> %res2, 2 6992 ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5 6993} 6994 6995define <8 x i16> @test_mask_adds_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) { 6996; CHECK-LABEL: test_mask_adds_epu16_rr_128: 6997; CHECK: # %bb.0: 6998; CHECK-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1] 6999; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7000 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 7001 ret <8 x i16> %res 7002} 7003 7004define <8 x i16> @test_mask_adds_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 7005; X86-LABEL: test_mask_adds_epu16_rrk_128: 7006; X86: # %bb.0: 7007; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7008; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 7009; X86-NEXT: vpaddusw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdd,0xd1] 7010; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 7011; X86-NEXT: retl # encoding: [0xc3] 7012; 7013; X64-LABEL: test_mask_adds_epu16_rrk_128: 7014; X64: # %bb.0: 7015; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7016; X64-NEXT: vpaddusw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdd,0xd1] 7017; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 7018; X64-NEXT: retq # encoding: [0xc3] 7019 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 7020 ret <8 x i16> %res 7021} 7022 7023define <8 x i16> @test_mask_adds_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 7024; X86-LABEL: test_mask_adds_epu16_rrkz_128: 7025; X86: # %bb.0: 7026; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7027; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 7028; X86-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdd,0xc1] 7029; X86-NEXT: retl # encoding: [0xc3] 7030; 7031; X64-LABEL: test_mask_adds_epu16_rrkz_128: 7032; X64: # %bb.0: 7033; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7034; X64-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdd,0xc1] 7035; X64-NEXT: retq # encoding: [0xc3] 7036 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 7037 ret <8 x i16> %res 7038} 7039 7040define <8 x i16> @test_mask_adds_epu16_rm_128(<8 x i16> %a, ptr %ptr_b) { 7041; X86-LABEL: test_mask_adds_epu16_rm_128: 7042; X86: # %bb.0: 7043; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7044; X86-NEXT: vpaddusw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0x00] 7045; X86-NEXT: retl # encoding: [0xc3] 7046; 7047; X64-LABEL: test_mask_adds_epu16_rm_128: 7048; X64: # %bb.0: 7049; X64-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0x07] 7050; X64-NEXT: retq # encoding: [0xc3] 7051 %b = load <8 x i16>, ptr %ptr_b 7052 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 7053 ret <8 x i16> %res 7054} 7055 7056define <8 x i16> @test_mask_adds_epu16_rmk_128(<8 x i16> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) { 7057; X86-LABEL: test_mask_adds_epu16_rmk_128: 7058; X86: # %bb.0: 7059; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7060; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 7061; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 7062; X86-NEXT: vpaddusw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdd,0x08] 7063; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 7064; X86-NEXT: retl # encoding: [0xc3] 7065; 7066; X64-LABEL: test_mask_adds_epu16_rmk_128: 7067; X64: # %bb.0: 7068; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7069; X64-NEXT: vpaddusw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdd,0x0f] 7070; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 7071; X64-NEXT: retq # encoding: [0xc3] 7072 %b = load <8 x i16>, ptr %ptr_b 7073 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 7074 ret <8 x i16> %res 7075} 7076 7077define <8 x i16> @test_mask_adds_epu16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i8 %mask) { 7078; X86-LABEL: test_mask_adds_epu16_rmkz_128: 7079; X86: # %bb.0: 7080; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7081; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 7082; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 7083; X86-NEXT: vpaddusw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdd,0x00] 7084; X86-NEXT: retl # encoding: [0xc3] 7085; 7086; X64-LABEL: test_mask_adds_epu16_rmkz_128: 7087; X64: # %bb.0: 7088; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7089; X64-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdd,0x07] 7090; X64-NEXT: retq # encoding: [0xc3] 7091 %b = load <8 x i16>, ptr %ptr_b 7092 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 7093 ret <8 x i16> %res 7094} 7095 7096declare <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 7097 7098define <16 x i16> @test_mask_adds_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) { 7099; CHECK-LABEL: test_mask_adds_epu16_rr_256: 7100; CHECK: # %bb.0: 7101; CHECK-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0xc1] 7102; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7103 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 7104 ret <16 x i16> %res 7105} 7106 7107define <16 x i16> @test_mask_adds_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 7108; X86-LABEL: test_mask_adds_epu16_rrk_256: 7109; X86: # %bb.0: 7110; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7111; X86-NEXT: vpaddusw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdd,0xd1] 7112; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 7113; X86-NEXT: retl # encoding: [0xc3] 7114; 7115; X64-LABEL: test_mask_adds_epu16_rrk_256: 7116; X64: # %bb.0: 7117; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7118; X64-NEXT: vpaddusw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdd,0xd1] 7119; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 7120; X64-NEXT: retq # encoding: [0xc3] 7121 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 7122 ret <16 x i16> %res 7123} 7124 7125define <16 x i16> @test_mask_adds_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 7126; X86-LABEL: test_mask_adds_epu16_rrkz_256: 7127; X86: # %bb.0: 7128; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7129; X86-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0xc1] 7130; X86-NEXT: retl # encoding: [0xc3] 7131; 7132; X64-LABEL: test_mask_adds_epu16_rrkz_256: 7133; X64: # %bb.0: 7134; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7135; X64-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0xc1] 7136; X64-NEXT: retq # encoding: [0xc3] 7137 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 7138 ret <16 x i16> %res 7139} 7140 7141define <16 x i16> @test_mask_adds_epu16_rm_256(<16 x i16> %a, ptr %ptr_b) { 7142; X86-LABEL: test_mask_adds_epu16_rm_256: 7143; X86: # %bb.0: 7144; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7145; X86-NEXT: vpaddusw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0x00] 7146; X86-NEXT: retl # encoding: [0xc3] 7147; 7148; X64-LABEL: test_mask_adds_epu16_rm_256: 7149; X64: # %bb.0: 7150; X64-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0x07] 7151; X64-NEXT: retq # encoding: [0xc3] 7152 %b = load <16 x i16>, ptr %ptr_b 7153 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 7154 ret <16 x i16> %res 7155} 7156 7157define <16 x i16> @test_mask_adds_epu16_rmk_256(<16 x i16> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) { 7158; X86-LABEL: test_mask_adds_epu16_rmk_256: 7159; X86: # %bb.0: 7160; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7161; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 7162; X86-NEXT: vpaddusw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdd,0x08] 7163; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 7164; X86-NEXT: retl # encoding: [0xc3] 7165; 7166; X64-LABEL: test_mask_adds_epu16_rmk_256: 7167; X64: # %bb.0: 7168; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7169; X64-NEXT: vpaddusw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdd,0x0f] 7170; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 7171; X64-NEXT: retq # encoding: [0xc3] 7172 %b = load <16 x i16>, ptr %ptr_b 7173 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 7174 ret <16 x i16> %res 7175} 7176 7177define <16 x i16> @test_mask_adds_epu16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i16 %mask) { 7178; X86-LABEL: test_mask_adds_epu16_rmkz_256: 7179; X86: # %bb.0: 7180; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7181; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 7182; X86-NEXT: vpaddusw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0x00] 7183; X86-NEXT: retl # encoding: [0xc3] 7184; 7185; X64-LABEL: test_mask_adds_epu16_rmkz_256: 7186; X64: # %bb.0: 7187; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7188; X64-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0x07] 7189; X64-NEXT: retq # encoding: [0xc3] 7190 %b = load <16 x i16>, ptr %ptr_b 7191 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 7192 ret <16 x i16> %res 7193} 7194 7195declare <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 7196 7197define <8 x i16> @test_mask_subs_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) { 7198; CHECK-LABEL: test_mask_subs_epu16_rr_128: 7199; CHECK: # %bb.0: 7200; CHECK-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0xc1] 7201; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7202 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 7203 ret <8 x i16> %res 7204} 7205 7206define <8 x i16> @test_mask_subs_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 7207; X86-LABEL: test_mask_subs_epu16_rrk_128: 7208; X86: # %bb.0: 7209; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7210; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 7211; X86-NEXT: vpsubusw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd9,0xd1] 7212; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 7213; X86-NEXT: retl # encoding: [0xc3] 7214; 7215; X64-LABEL: test_mask_subs_epu16_rrk_128: 7216; X64: # %bb.0: 7217; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7218; X64-NEXT: vpsubusw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd9,0xd1] 7219; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 7220; X64-NEXT: retq # encoding: [0xc3] 7221 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 7222 ret <8 x i16> %res 7223} 7224 7225define <8 x i16> @test_mask_subs_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 7226; X86-LABEL: test_mask_subs_epu16_rrkz_128: 7227; X86: # %bb.0: 7228; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7229; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 7230; X86-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd9,0xc1] 7231; X86-NEXT: retl # encoding: [0xc3] 7232; 7233; X64-LABEL: test_mask_subs_epu16_rrkz_128: 7234; X64: # %bb.0: 7235; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7236; X64-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd9,0xc1] 7237; X64-NEXT: retq # encoding: [0xc3] 7238 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 7239 ret <8 x i16> %res 7240} 7241 7242define <8 x i16> @test_mask_subs_epu16_rm_128(<8 x i16> %a, ptr %ptr_b) { 7243; X86-LABEL: test_mask_subs_epu16_rm_128: 7244; X86: # %bb.0: 7245; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7246; X86-NEXT: vpsubusw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0x00] 7247; X86-NEXT: retl # encoding: [0xc3] 7248; 7249; X64-LABEL: test_mask_subs_epu16_rm_128: 7250; X64: # %bb.0: 7251; X64-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0x07] 7252; X64-NEXT: retq # encoding: [0xc3] 7253 %b = load <8 x i16>, ptr %ptr_b 7254 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 7255 ret <8 x i16> %res 7256} 7257 7258define <8 x i16> @test_mask_subs_epu16_rmk_128(<8 x i16> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) { 7259; X86-LABEL: test_mask_subs_epu16_rmk_128: 7260; X86: # %bb.0: 7261; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7262; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 7263; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 7264; X86-NEXT: vpsubusw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd9,0x08] 7265; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 7266; X86-NEXT: retl # encoding: [0xc3] 7267; 7268; X64-LABEL: test_mask_subs_epu16_rmk_128: 7269; X64: # %bb.0: 7270; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7271; X64-NEXT: vpsubusw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd9,0x0f] 7272; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 7273; X64-NEXT: retq # encoding: [0xc3] 7274 %b = load <8 x i16>, ptr %ptr_b 7275 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 7276 ret <8 x i16> %res 7277} 7278 7279define <8 x i16> @test_mask_subs_epu16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i8 %mask) { 7280; X86-LABEL: test_mask_subs_epu16_rmkz_128: 7281; X86: # %bb.0: 7282; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7283; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 7284; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 7285; X86-NEXT: vpsubusw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd9,0x00] 7286; X86-NEXT: retl # encoding: [0xc3] 7287; 7288; X64-LABEL: test_mask_subs_epu16_rmkz_128: 7289; X64: # %bb.0: 7290; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7291; X64-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd9,0x07] 7292; X64-NEXT: retq # encoding: [0xc3] 7293 %b = load <8 x i16>, ptr %ptr_b 7294 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 7295 ret <8 x i16> %res 7296} 7297 7298declare <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 7299 7300define <16 x i16> @test_mask_subs_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) { 7301; CHECK-LABEL: test_mask_subs_epu16_rr_256: 7302; CHECK: # %bb.0: 7303; CHECK-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0xc1] 7304; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7305 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 7306 ret <16 x i16> %res 7307} 7308 7309define <16 x i16> @test_mask_subs_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 7310; X86-LABEL: test_mask_subs_epu16_rrk_256: 7311; X86: # %bb.0: 7312; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7313; X86-NEXT: vpsubusw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd9,0xd1] 7314; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 7315; X86-NEXT: retl # encoding: [0xc3] 7316; 7317; X64-LABEL: test_mask_subs_epu16_rrk_256: 7318; X64: # %bb.0: 7319; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7320; X64-NEXT: vpsubusw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd9,0xd1] 7321; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 7322; X64-NEXT: retq # encoding: [0xc3] 7323 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 7324 ret <16 x i16> %res 7325} 7326 7327define <16 x i16> @test_mask_subs_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 7328; X86-LABEL: test_mask_subs_epu16_rrkz_256: 7329; X86: # %bb.0: 7330; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7331; X86-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0xc1] 7332; X86-NEXT: retl # encoding: [0xc3] 7333; 7334; X64-LABEL: test_mask_subs_epu16_rrkz_256: 7335; X64: # %bb.0: 7336; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7337; X64-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0xc1] 7338; X64-NEXT: retq # encoding: [0xc3] 7339 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 7340 ret <16 x i16> %res 7341} 7342 7343define <16 x i16> @test_mask_subs_epu16_rm_256(<16 x i16> %a, ptr %ptr_b) { 7344; X86-LABEL: test_mask_subs_epu16_rm_256: 7345; X86: # %bb.0: 7346; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7347; X86-NEXT: vpsubusw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0x00] 7348; X86-NEXT: retl # encoding: [0xc3] 7349; 7350; X64-LABEL: test_mask_subs_epu16_rm_256: 7351; X64: # %bb.0: 7352; X64-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0x07] 7353; X64-NEXT: retq # encoding: [0xc3] 7354 %b = load <16 x i16>, ptr %ptr_b 7355 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 7356 ret <16 x i16> %res 7357} 7358 7359define <16 x i16> @test_mask_subs_epu16_rmk_256(<16 x i16> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) { 7360; X86-LABEL: test_mask_subs_epu16_rmk_256: 7361; X86: # %bb.0: 7362; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7363; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 7364; X86-NEXT: vpsubusw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd9,0x08] 7365; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 7366; X86-NEXT: retl # encoding: [0xc3] 7367; 7368; X64-LABEL: test_mask_subs_epu16_rmk_256: 7369; X64: # %bb.0: 7370; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7371; X64-NEXT: vpsubusw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd9,0x0f] 7372; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 7373; X64-NEXT: retq # encoding: [0xc3] 7374 %b = load <16 x i16>, ptr %ptr_b 7375 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 7376 ret <16 x i16> %res 7377} 7378 7379define <16 x i16> @test_mask_subs_epu16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i16 %mask) { 7380; X86-LABEL: test_mask_subs_epu16_rmkz_256: 7381; X86: # %bb.0: 7382; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7383; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 7384; X86-NEXT: vpsubusw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0x00] 7385; X86-NEXT: retl # encoding: [0xc3] 7386; 7387; X64-LABEL: test_mask_subs_epu16_rmkz_256: 7388; X64: # %bb.0: 7389; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7390; X64-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0x07] 7391; X64-NEXT: retq # encoding: [0xc3] 7392 %b = load <16 x i16>, ptr %ptr_b 7393 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 7394 ret <16 x i16> %res 7395} 7396 7397declare <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 7398 7399define <16 x i8> @test_mask_adds_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) { 7400; CHECK-LABEL: test_mask_adds_epu8_rr_128: 7401; CHECK: # %bb.0: 7402; CHECK-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0xc1] 7403; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7404 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 7405 ret <16 x i8> %res 7406} 7407 7408define <16 x i8> @test_mask_adds_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { 7409; X86-LABEL: test_mask_adds_epu8_rrk_128: 7410; X86: # %bb.0: 7411; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7412; X86-NEXT: vpaddusb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdc,0xd1] 7413; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 7414; X86-NEXT: retl # encoding: [0xc3] 7415; 7416; X64-LABEL: test_mask_adds_epu8_rrk_128: 7417; X64: # %bb.0: 7418; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7419; X64-NEXT: vpaddusb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdc,0xd1] 7420; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 7421; X64-NEXT: retq # encoding: [0xc3] 7422 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 7423 ret <16 x i8> %res 7424} 7425 7426define <16 x i8> @test_mask_adds_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 7427; X86-LABEL: test_mask_adds_epu8_rrkz_128: 7428; X86: # %bb.0: 7429; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7430; X86-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdc,0xc1] 7431; X86-NEXT: retl # encoding: [0xc3] 7432; 7433; X64-LABEL: test_mask_adds_epu8_rrkz_128: 7434; X64: # %bb.0: 7435; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7436; X64-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdc,0xc1] 7437; X64-NEXT: retq # encoding: [0xc3] 7438 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 7439 ret <16 x i8> %res 7440} 7441 7442define <16 x i8> @test_mask_adds_epu8_rm_128(<16 x i8> %a, ptr %ptr_b) { 7443; X86-LABEL: test_mask_adds_epu8_rm_128: 7444; X86: # %bb.0: 7445; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7446; X86-NEXT: vpaddusb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0x00] 7447; X86-NEXT: retl # encoding: [0xc3] 7448; 7449; X64-LABEL: test_mask_adds_epu8_rm_128: 7450; X64: # %bb.0: 7451; X64-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0x07] 7452; X64-NEXT: retq # encoding: [0xc3] 7453 %b = load <16 x i8>, ptr %ptr_b 7454 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 7455 ret <16 x i8> %res 7456} 7457 7458define <16 x i8> @test_mask_adds_epu8_rmk_128(<16 x i8> %a, ptr %ptr_b, <16 x i8> %passThru, i16 %mask) { 7459; X86-LABEL: test_mask_adds_epu8_rmk_128: 7460; X86: # %bb.0: 7461; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7462; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 7463; X86-NEXT: vpaddusb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdc,0x08] 7464; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 7465; X86-NEXT: retl # encoding: [0xc3] 7466; 7467; X64-LABEL: test_mask_adds_epu8_rmk_128: 7468; X64: # %bb.0: 7469; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7470; X64-NEXT: vpaddusb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdc,0x0f] 7471; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 7472; X64-NEXT: retq # encoding: [0xc3] 7473 %b = load <16 x i8>, ptr %ptr_b 7474 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 7475 ret <16 x i8> %res 7476} 7477 7478define <16 x i8> @test_mask_adds_epu8_rmkz_128(<16 x i8> %a, ptr %ptr_b, i16 %mask) { 7479; X86-LABEL: test_mask_adds_epu8_rmkz_128: 7480; X86: # %bb.0: 7481; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7482; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 7483; X86-NEXT: vpaddusb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdc,0x00] 7484; X86-NEXT: retl # encoding: [0xc3] 7485; 7486; X64-LABEL: test_mask_adds_epu8_rmkz_128: 7487; X64: # %bb.0: 7488; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7489; X64-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdc,0x07] 7490; X64-NEXT: retq # encoding: [0xc3] 7491 %b = load <16 x i8>, ptr %ptr_b 7492 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 7493 ret <16 x i8> %res 7494} 7495 7496declare <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 7497 7498define <32 x i8> @test_mask_adds_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) { 7499; CHECK-LABEL: test_mask_adds_epu8_rr_256: 7500; CHECK: # %bb.0: 7501; CHECK-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0xc1] 7502; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7503 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 7504 ret <32 x i8> %res 7505} 7506 7507define <32 x i8> @test_mask_adds_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { 7508; X86-LABEL: test_mask_adds_epu8_rrk_256: 7509; X86: # %bb.0: 7510; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 7511; X86-NEXT: vpaddusb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdc,0xd1] 7512; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 7513; X86-NEXT: retl # encoding: [0xc3] 7514; 7515; X64-LABEL: test_mask_adds_epu8_rrk_256: 7516; X64: # %bb.0: 7517; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7518; X64-NEXT: vpaddusb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdc,0xd1] 7519; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 7520; X64-NEXT: retq # encoding: [0xc3] 7521 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 7522 ret <32 x i8> %res 7523} 7524 7525define <32 x i8> @test_mask_adds_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 7526; X86-LABEL: test_mask_adds_epu8_rrkz_256: 7527; X86: # %bb.0: 7528; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 7529; X86-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0xc1] 7530; X86-NEXT: retl # encoding: [0xc3] 7531; 7532; X64-LABEL: test_mask_adds_epu8_rrkz_256: 7533; X64: # %bb.0: 7534; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7535; X64-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0xc1] 7536; X64-NEXT: retq # encoding: [0xc3] 7537 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 7538 ret <32 x i8> %res 7539} 7540 7541define <32 x i8> @test_mask_adds_epu8_rm_256(<32 x i8> %a, ptr %ptr_b) { 7542; X86-LABEL: test_mask_adds_epu8_rm_256: 7543; X86: # %bb.0: 7544; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7545; X86-NEXT: vpaddusb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0x00] 7546; X86-NEXT: retl # encoding: [0xc3] 7547; 7548; X64-LABEL: test_mask_adds_epu8_rm_256: 7549; X64: # %bb.0: 7550; X64-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0x07] 7551; X64-NEXT: retq # encoding: [0xc3] 7552 %b = load <32 x i8>, ptr %ptr_b 7553 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 7554 ret <32 x i8> %res 7555} 7556 7557define <32 x i8> @test_mask_adds_epu8_rmk_256(<32 x i8> %a, ptr %ptr_b, <32 x i8> %passThru, i32 %mask) { 7558; X86-LABEL: test_mask_adds_epu8_rmk_256: 7559; X86: # %bb.0: 7560; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7561; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 7562; X86-NEXT: vpaddusb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdc,0x08] 7563; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 7564; X86-NEXT: retl # encoding: [0xc3] 7565; 7566; X64-LABEL: test_mask_adds_epu8_rmk_256: 7567; X64: # %bb.0: 7568; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7569; X64-NEXT: vpaddusb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdc,0x0f] 7570; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 7571; X64-NEXT: retq # encoding: [0xc3] 7572 %b = load <32 x i8>, ptr %ptr_b 7573 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 7574 ret <32 x i8> %res 7575} 7576 7577define <32 x i8> @test_mask_adds_epu8_rmkz_256(<32 x i8> %a, ptr %ptr_b, i32 %mask) { 7578; X86-LABEL: test_mask_adds_epu8_rmkz_256: 7579; X86: # %bb.0: 7580; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7581; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 7582; X86-NEXT: vpaddusb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0x00] 7583; X86-NEXT: retl # encoding: [0xc3] 7584; 7585; X64-LABEL: test_mask_adds_epu8_rmkz_256: 7586; X64: # %bb.0: 7587; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7588; X64-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0x07] 7589; X64-NEXT: retq # encoding: [0xc3] 7590 %b = load <32 x i8>, ptr %ptr_b 7591 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 7592 ret <32 x i8> %res 7593} 7594 7595declare <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 7596 7597define <16 x i8> @test_mask_subs_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) { 7598; CHECK-LABEL: test_mask_subs_epu8_rr_128: 7599; CHECK: # %bb.0: 7600; CHECK-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0xc1] 7601; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7602 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 7603 ret <16 x i8> %res 7604} 7605 7606define <16 x i8> @test_mask_subs_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { 7607; X86-LABEL: test_mask_subs_epu8_rrk_128: 7608; X86: # %bb.0: 7609; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7610; X86-NEXT: vpsubusb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd8,0xd1] 7611; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 7612; X86-NEXT: retl # encoding: [0xc3] 7613; 7614; X64-LABEL: test_mask_subs_epu8_rrk_128: 7615; X64: # %bb.0: 7616; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7617; X64-NEXT: vpsubusb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd8,0xd1] 7618; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 7619; X64-NEXT: retq # encoding: [0xc3] 7620 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 7621 ret <16 x i8> %res 7622} 7623 7624define <16 x i8> @test_mask_subs_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 7625; X86-LABEL: test_mask_subs_epu8_rrkz_128: 7626; X86: # %bb.0: 7627; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7628; X86-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd8,0xc1] 7629; X86-NEXT: retl # encoding: [0xc3] 7630; 7631; X64-LABEL: test_mask_subs_epu8_rrkz_128: 7632; X64: # %bb.0: 7633; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7634; X64-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd8,0xc1] 7635; X64-NEXT: retq # encoding: [0xc3] 7636 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 7637 ret <16 x i8> %res 7638} 7639 7640define <16 x i8> @test_mask_subs_epu8_rm_128(<16 x i8> %a, ptr %ptr_b) { 7641; X86-LABEL: test_mask_subs_epu8_rm_128: 7642; X86: # %bb.0: 7643; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7644; X86-NEXT: vpsubusb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0x00] 7645; X86-NEXT: retl # encoding: [0xc3] 7646; 7647; X64-LABEL: test_mask_subs_epu8_rm_128: 7648; X64: # %bb.0: 7649; X64-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0x07] 7650; X64-NEXT: retq # encoding: [0xc3] 7651 %b = load <16 x i8>, ptr %ptr_b 7652 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 7653 ret <16 x i8> %res 7654} 7655 7656define <16 x i8> @test_mask_subs_epu8_rmk_128(<16 x i8> %a, ptr %ptr_b, <16 x i8> %passThru, i16 %mask) { 7657; X86-LABEL: test_mask_subs_epu8_rmk_128: 7658; X86: # %bb.0: 7659; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7660; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 7661; X86-NEXT: vpsubusb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd8,0x08] 7662; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 7663; X86-NEXT: retl # encoding: [0xc3] 7664; 7665; X64-LABEL: test_mask_subs_epu8_rmk_128: 7666; X64: # %bb.0: 7667; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7668; X64-NEXT: vpsubusb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd8,0x0f] 7669; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 7670; X64-NEXT: retq # encoding: [0xc3] 7671 %b = load <16 x i8>, ptr %ptr_b 7672 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 7673 ret <16 x i8> %res 7674} 7675 7676define <16 x i8> @test_mask_subs_epu8_rmkz_128(<16 x i8> %a, ptr %ptr_b, i16 %mask) { 7677; X86-LABEL: test_mask_subs_epu8_rmkz_128: 7678; X86: # %bb.0: 7679; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7680; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 7681; X86-NEXT: vpsubusb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd8,0x00] 7682; X86-NEXT: retl # encoding: [0xc3] 7683; 7684; X64-LABEL: test_mask_subs_epu8_rmkz_128: 7685; X64: # %bb.0: 7686; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7687; X64-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd8,0x07] 7688; X64-NEXT: retq # encoding: [0xc3] 7689 %b = load <16 x i8>, ptr %ptr_b 7690 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 7691 ret <16 x i8> %res 7692} 7693 7694declare <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 7695 7696define <32 x i8> @test_mask_subs_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) { 7697; CHECK-LABEL: test_mask_subs_epu8_rr_256: 7698; CHECK: # %bb.0: 7699; CHECK-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0xc1] 7700; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7701 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 7702 ret <32 x i8> %res 7703} 7704 7705define <32 x i8> @test_mask_subs_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { 7706; X86-LABEL: test_mask_subs_epu8_rrk_256: 7707; X86: # %bb.0: 7708; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 7709; X86-NEXT: vpsubusb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd8,0xd1] 7710; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 7711; X86-NEXT: retl # encoding: [0xc3] 7712; 7713; X64-LABEL: test_mask_subs_epu8_rrk_256: 7714; X64: # %bb.0: 7715; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7716; X64-NEXT: vpsubusb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd8,0xd1] 7717; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 7718; X64-NEXT: retq # encoding: [0xc3] 7719 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 7720 ret <32 x i8> %res 7721} 7722 7723define <32 x i8> @test_mask_subs_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 7724; X86-LABEL: test_mask_subs_epu8_rrkz_256: 7725; X86: # %bb.0: 7726; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 7727; X86-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0xc1] 7728; X86-NEXT: retl # encoding: [0xc3] 7729; 7730; X64-LABEL: test_mask_subs_epu8_rrkz_256: 7731; X64: # %bb.0: 7732; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7733; X64-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0xc1] 7734; X64-NEXT: retq # encoding: [0xc3] 7735 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 7736 ret <32 x i8> %res 7737} 7738 7739define <32 x i8> @test_mask_subs_epu8_rm_256(<32 x i8> %a, ptr %ptr_b) { 7740; X86-LABEL: test_mask_subs_epu8_rm_256: 7741; X86: # %bb.0: 7742; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7743; X86-NEXT: vpsubusb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0x00] 7744; X86-NEXT: retl # encoding: [0xc3] 7745; 7746; X64-LABEL: test_mask_subs_epu8_rm_256: 7747; X64: # %bb.0: 7748; X64-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0x07] 7749; X64-NEXT: retq # encoding: [0xc3] 7750 %b = load <32 x i8>, ptr %ptr_b 7751 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 7752 ret <32 x i8> %res 7753} 7754 7755define <32 x i8> @test_mask_subs_epu8_rmk_256(<32 x i8> %a, ptr %ptr_b, <32 x i8> %passThru, i32 %mask) { 7756; X86-LABEL: test_mask_subs_epu8_rmk_256: 7757; X86: # %bb.0: 7758; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7759; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 7760; X86-NEXT: vpsubusb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd8,0x08] 7761; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 7762; X86-NEXT: retl # encoding: [0xc3] 7763; 7764; X64-LABEL: test_mask_subs_epu8_rmk_256: 7765; X64: # %bb.0: 7766; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7767; X64-NEXT: vpsubusb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd8,0x0f] 7768; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 7769; X64-NEXT: retq # encoding: [0xc3] 7770 %b = load <32 x i8>, ptr %ptr_b 7771 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 7772 ret <32 x i8> %res 7773} 7774 7775define <32 x i8> @test_mask_subs_epu8_rmkz_256(<32 x i8> %a, ptr %ptr_b, i32 %mask) { 7776; X86-LABEL: test_mask_subs_epu8_rmkz_256: 7777; X86: # %bb.0: 7778; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7779; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 7780; X86-NEXT: vpsubusb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0x00] 7781; X86-NEXT: retl # encoding: [0xc3] 7782; 7783; X64-LABEL: test_mask_subs_epu8_rmkz_256: 7784; X64: # %bb.0: 7785; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7786; X64-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0x07] 7787; X64-NEXT: retq # encoding: [0xc3] 7788 %b = load <32 x i8>, ptr %ptr_b 7789 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 7790 ret <32 x i8> %res 7791} 7792 7793declare <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 7794 7795define <8 x i16> @test_mask_adds_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 7796; CHECK-LABEL: test_mask_adds_epi16_rr_128: 7797; CHECK: # %bb.0: 7798; CHECK-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0xc1] 7799; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7800 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 7801 ret <8 x i16> %res 7802} 7803 7804define <8 x i16> @test_mask_adds_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 7805; X86-LABEL: test_mask_adds_epi16_rrk_128: 7806; X86: # %bb.0: 7807; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7808; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 7809; X86-NEXT: vpaddsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1] 7810; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 7811; X86-NEXT: retl # encoding: [0xc3] 7812; 7813; X64-LABEL: test_mask_adds_epi16_rrk_128: 7814; X64: # %bb.0: 7815; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7816; X64-NEXT: vpaddsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1] 7817; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 7818; X64-NEXT: retq # encoding: [0xc3] 7819 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 7820 ret <8 x i16> %res 7821} 7822 7823define <8 x i16> @test_mask_adds_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 7824; X86-LABEL: test_mask_adds_epi16_rrkz_128: 7825; X86: # %bb.0: 7826; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7827; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 7828; X86-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0xc1] 7829; X86-NEXT: retl # encoding: [0xc3] 7830; 7831; X64-LABEL: test_mask_adds_epi16_rrkz_128: 7832; X64: # %bb.0: 7833; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7834; X64-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0xc1] 7835; X64-NEXT: retq # encoding: [0xc3] 7836 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 7837 ret <8 x i16> %res 7838} 7839 7840define <8 x i16> @test_mask_adds_epi16_rm_128(<8 x i16> %a, ptr %ptr_b) { 7841; X86-LABEL: test_mask_adds_epi16_rm_128: 7842; X86: # %bb.0: 7843; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7844; X86-NEXT: vpaddsw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0x00] 7845; X86-NEXT: retl # encoding: [0xc3] 7846; 7847; X64-LABEL: test_mask_adds_epi16_rm_128: 7848; X64: # %bb.0: 7849; X64-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0x07] 7850; X64-NEXT: retq # encoding: [0xc3] 7851 %b = load <8 x i16>, ptr %ptr_b 7852 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 7853 ret <8 x i16> %res 7854} 7855 7856define <8 x i16> @test_mask_adds_epi16_rmk_128(<8 x i16> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) { 7857; X86-LABEL: test_mask_adds_epi16_rmk_128: 7858; X86: # %bb.0: 7859; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7860; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 7861; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 7862; X86-NEXT: vpaddsw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0x08] 7863; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 7864; X86-NEXT: retl # encoding: [0xc3] 7865; 7866; X64-LABEL: test_mask_adds_epi16_rmk_128: 7867; X64: # %bb.0: 7868; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7869; X64-NEXT: vpaddsw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0x0f] 7870; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 7871; X64-NEXT: retq # encoding: [0xc3] 7872 %b = load <8 x i16>, ptr %ptr_b 7873 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 7874 ret <8 x i16> %res 7875} 7876 7877define <8 x i16> @test_mask_adds_epi16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i8 %mask) { 7878; X86-LABEL: test_mask_adds_epi16_rmkz_128: 7879; X86: # %bb.0: 7880; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7881; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 7882; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 7883; X86-NEXT: vpaddsw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0x00] 7884; X86-NEXT: retl # encoding: [0xc3] 7885; 7886; X64-LABEL: test_mask_adds_epi16_rmkz_128: 7887; X64: # %bb.0: 7888; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7889; X64-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0x07] 7890; X64-NEXT: retq # encoding: [0xc3] 7891 %b = load <8 x i16>, ptr %ptr_b 7892 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 7893 ret <8 x i16> %res 7894} 7895 7896declare <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 7897 7898define <16 x i16> @test_mask_adds_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 7899; CHECK-LABEL: test_mask_adds_epi16_rr_256: 7900; CHECK: # %bb.0: 7901; CHECK-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0xc1] 7902; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7903 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 7904 ret <16 x i16> %res 7905} 7906 7907define <16 x i16> @test_mask_adds_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 7908; X86-LABEL: test_mask_adds_epi16_rrk_256: 7909; X86: # %bb.0: 7910; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7911; X86-NEXT: vpaddsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1] 7912; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 7913; X86-NEXT: retl # encoding: [0xc3] 7914; 7915; X64-LABEL: test_mask_adds_epi16_rrk_256: 7916; X64: # %bb.0: 7917; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7918; X64-NEXT: vpaddsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1] 7919; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 7920; X64-NEXT: retq # encoding: [0xc3] 7921 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 7922 ret <16 x i16> %res 7923} 7924 7925define <16 x i16> @test_mask_adds_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 7926; X86-LABEL: test_mask_adds_epi16_rrkz_256: 7927; X86: # %bb.0: 7928; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7929; X86-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0xc1] 7930; X86-NEXT: retl # encoding: [0xc3] 7931; 7932; X64-LABEL: test_mask_adds_epi16_rrkz_256: 7933; X64: # %bb.0: 7934; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7935; X64-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0xc1] 7936; X64-NEXT: retq # encoding: [0xc3] 7937 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 7938 ret <16 x i16> %res 7939} 7940 7941define <16 x i16> @test_mask_adds_epi16_rm_256(<16 x i16> %a, ptr %ptr_b) { 7942; X86-LABEL: test_mask_adds_epi16_rm_256: 7943; X86: # %bb.0: 7944; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7945; X86-NEXT: vpaddsw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0x00] 7946; X86-NEXT: retl # encoding: [0xc3] 7947; 7948; X64-LABEL: test_mask_adds_epi16_rm_256: 7949; X64: # %bb.0: 7950; X64-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0x07] 7951; X64-NEXT: retq # encoding: [0xc3] 7952 %b = load <16 x i16>, ptr %ptr_b 7953 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 7954 ret <16 x i16> %res 7955} 7956 7957define <16 x i16> @test_mask_adds_epi16_rmk_256(<16 x i16> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) { 7958; X86-LABEL: test_mask_adds_epi16_rmk_256: 7959; X86: # %bb.0: 7960; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7961; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 7962; X86-NEXT: vpaddsw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0x08] 7963; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 7964; X86-NEXT: retl # encoding: [0xc3] 7965; 7966; X64-LABEL: test_mask_adds_epi16_rmk_256: 7967; X64: # %bb.0: 7968; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7969; X64-NEXT: vpaddsw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0x0f] 7970; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 7971; X64-NEXT: retq # encoding: [0xc3] 7972 %b = load <16 x i16>, ptr %ptr_b 7973 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 7974 ret <16 x i16> %res 7975} 7976 7977define <16 x i16> @test_mask_adds_epi16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i16 %mask) { 7978; X86-LABEL: test_mask_adds_epi16_rmkz_256: 7979; X86: # %bb.0: 7980; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7981; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 7982; X86-NEXT: vpaddsw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0x00] 7983; X86-NEXT: retl # encoding: [0xc3] 7984; 7985; X64-LABEL: test_mask_adds_epi16_rmkz_256: 7986; X64: # %bb.0: 7987; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7988; X64-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0x07] 7989; X64-NEXT: retq # encoding: [0xc3] 7990 %b = load <16 x i16>, ptr %ptr_b 7991 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 7992 ret <16 x i16> %res 7993} 7994 7995declare <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 7996 7997declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) #0 7998 7999define <8 x i16> @test_test_subs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 8000; X86-LABEL: test_test_subs_epi16_rrk_128: 8001; X86: # %bb.0: 8002; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8003; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 8004; X86-NEXT: vpsubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1] 8005; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 8006; X86-NEXT: retl # encoding: [0xc3] 8007; 8008; X64-LABEL: test_test_subs_epi16_rrk_128: 8009; X64: # %bb.0: 8010; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8011; X64-NEXT: vpsubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1] 8012; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 8013; X64-NEXT: retq # encoding: [0xc3] 8014 %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a, <8 x i16> %b) 8015 %2 = bitcast i8 %mask to <8 x i1> 8016 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru 8017 ret <8 x i16> %3 8018} 8019 8020define <8 x i16> @test_test_subs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 8021; X86-LABEL: test_test_subs_epi16_rrkz_128: 8022; X86: # %bb.0: 8023; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8024; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 8025; X86-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0xc1] 8026; X86-NEXT: retl # encoding: [0xc3] 8027; 8028; X64-LABEL: test_test_subs_epi16_rrkz_128: 8029; X64: # %bb.0: 8030; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8031; X64-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0xc1] 8032; X64-NEXT: retq # encoding: [0xc3] 8033 %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a, <8 x i16> %b) 8034 %2 = bitcast i8 %mask to <8 x i1> 8035 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 8036 ret <8 x i16> %3 8037} 8038 8039define <8 x i16> @test_test_subs_epi16_rmk_128(<8 x i16> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) { 8040; X86-LABEL: test_test_subs_epi16_rmk_128: 8041; X86: # %bb.0: 8042; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8043; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 8044; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 8045; X86-NEXT: vpsubsw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x08] 8046; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 8047; X86-NEXT: retl # encoding: [0xc3] 8048; 8049; X64-LABEL: test_test_subs_epi16_rmk_128: 8050; X64: # %bb.0: 8051; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8052; X64-NEXT: vpsubsw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x0f] 8053; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 8054; X64-NEXT: retq # encoding: [0xc3] 8055 %b = load <8 x i16>, ptr %ptr_b 8056 %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a, <8 x i16> %b) 8057 %2 = bitcast i8 %mask to <8 x i1> 8058 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru 8059 ret <8 x i16> %3 8060} 8061 8062define <8 x i16> @test_test_subs_epi16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i8 %mask) { 8063; X86-LABEL: test_test_subs_epi16_rmkz_128: 8064; X86: # %bb.0: 8065; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8066; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 8067; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 8068; X86-NEXT: vpsubsw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0x00] 8069; X86-NEXT: retl # encoding: [0xc3] 8070; 8071; X64-LABEL: test_test_subs_epi16_rmkz_128: 8072; X64: # %bb.0: 8073; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8074; X64-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0x07] 8075; X64-NEXT: retq # encoding: [0xc3] 8076 %b = load <8 x i16>, ptr %ptr_b 8077 %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a, <8 x i16> %b) 8078 %2 = bitcast i8 %mask to <8 x i1> 8079 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 8080 ret <8 x i16> %3 8081} 8082 8083declare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) #0 8084 8085define <16 x i16> @test_test_subs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 8086; X86-LABEL: test_test_subs_epi16_rrk_256: 8087; X86: # %bb.0: 8088; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8089; X86-NEXT: vpsubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1] 8090; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 8091; X86-NEXT: retl # encoding: [0xc3] 8092; 8093; X64-LABEL: test_test_subs_epi16_rrk_256: 8094; X64: # %bb.0: 8095; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8096; X64-NEXT: vpsubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1] 8097; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 8098; X64-NEXT: retq # encoding: [0xc3] 8099 %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a, <16 x i16> %b) 8100 %2 = bitcast i16 %mask to <16 x i1> 8101 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru 8102 ret <16 x i16> %3 8103} 8104 8105define <16 x i16> @test_test_subs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 8106; X86-LABEL: test_test_subs_epi16_rrkz_256: 8107; X86: # %bb.0: 8108; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8109; X86-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0xc1] 8110; X86-NEXT: retl # encoding: [0xc3] 8111; 8112; X64-LABEL: test_test_subs_epi16_rrkz_256: 8113; X64: # %bb.0: 8114; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8115; X64-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0xc1] 8116; X64-NEXT: retq # encoding: [0xc3] 8117 %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a, <16 x i16> %b) 8118 %2 = bitcast i16 %mask to <16 x i1> 8119 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 8120 ret <16 x i16> %3 8121} 8122 8123define <16 x i16> @test_test_subs_epi16_rmk_256(<16 x i16> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) { 8124; X86-LABEL: test_test_subs_epi16_rmk_256: 8125; X86: # %bb.0: 8126; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8127; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 8128; X86-NEXT: vpsubsw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x08] 8129; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 8130; X86-NEXT: retl # encoding: [0xc3] 8131; 8132; X64-LABEL: test_test_subs_epi16_rmk_256: 8133; X64: # %bb.0: 8134; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8135; X64-NEXT: vpsubsw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x0f] 8136; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 8137; X64-NEXT: retq # encoding: [0xc3] 8138 %b = load <16 x i16>, ptr %ptr_b 8139 %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a, <16 x i16> %b) 8140 %2 = bitcast i16 %mask to <16 x i1> 8141 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru 8142 ret <16 x i16> %3 8143} 8144 8145define <16 x i16> @test_test_subs_epi16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i16 %mask) { 8146; X86-LABEL: test_test_subs_epi16_rmkz_256: 8147; X86: # %bb.0: 8148; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8149; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 8150; X86-NEXT: vpsubsw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0x00] 8151; X86-NEXT: retl # encoding: [0xc3] 8152; 8153; X64-LABEL: test_test_subs_epi16_rmkz_256: 8154; X64: # %bb.0: 8155; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8156; X64-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0x07] 8157; X64-NEXT: retq # encoding: [0xc3] 8158 %b = load <16 x i16>, ptr %ptr_b 8159 %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a, <16 x i16> %b) 8160 %2 = bitcast i16 %mask to <16 x i1> 8161 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 8162 ret <16 x i16> %3 8163} 8164 8165declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) #0 8166 8167define <16 x i8> @test_test_subs_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { 8168; X86-LABEL: test_test_subs_epi8_rrk_128: 8169; X86: # %bb.0: 8170; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8171; X86-NEXT: vpsubsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1] 8172; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 8173; X86-NEXT: retl # encoding: [0xc3] 8174; 8175; X64-LABEL: test_test_subs_epi8_rrk_128: 8176; X64: # %bb.0: 8177; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8178; X64-NEXT: vpsubsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1] 8179; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 8180; X64-NEXT: retq # encoding: [0xc3] 8181 %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a, <16 x i8> %b) 8182 %2 = bitcast i16 %mask to <16 x i1> 8183 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru 8184 ret <16 x i8> %3 8185} 8186 8187define <16 x i8> @test_test_subs_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 8188; X86-LABEL: test_test_subs_epi8_rrkz_128: 8189; X86: # %bb.0: 8190; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8191; X86-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0xc1] 8192; X86-NEXT: retl # encoding: [0xc3] 8193; 8194; X64-LABEL: test_test_subs_epi8_rrkz_128: 8195; X64: # %bb.0: 8196; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8197; X64-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0xc1] 8198; X64-NEXT: retq # encoding: [0xc3] 8199 %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a, <16 x i8> %b) 8200 %2 = bitcast i16 %mask to <16 x i1> 8201 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer 8202 ret <16 x i8> %3 8203} 8204 8205define <16 x i8> @test_test_subs_epi8_rmk_128(<16 x i8> %a, ptr %ptr_b, <16 x i8> %passThru, i16 %mask) { 8206; X86-LABEL: test_test_subs_epi8_rmk_128: 8207; X86: # %bb.0: 8208; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8209; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 8210; X86-NEXT: vpsubsb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x08] 8211; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 8212; X86-NEXT: retl # encoding: [0xc3] 8213; 8214; X64-LABEL: test_test_subs_epi8_rmk_128: 8215; X64: # %bb.0: 8216; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8217; X64-NEXT: vpsubsb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x0f] 8218; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 8219; X64-NEXT: retq # encoding: [0xc3] 8220 %b = load <16 x i8>, ptr %ptr_b 8221 %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a, <16 x i8> %b) 8222 %2 = bitcast i16 %mask to <16 x i1> 8223 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru 8224 ret <16 x i8> %3 8225} 8226 8227define <16 x i8> @test_test_subs_epi8_rmkz_128(<16 x i8> %a, ptr %ptr_b, i16 %mask) { 8228; X86-LABEL: test_test_subs_epi8_rmkz_128: 8229; X86: # %bb.0: 8230; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8231; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 8232; X86-NEXT: vpsubsb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0x00] 8233; X86-NEXT: retl # encoding: [0xc3] 8234; 8235; X64-LABEL: test_test_subs_epi8_rmkz_128: 8236; X64: # %bb.0: 8237; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8238; X64-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0x07] 8239; X64-NEXT: retq # encoding: [0xc3] 8240 %b = load <16 x i8>, ptr %ptr_b 8241 %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a, <16 x i8> %b) 8242 %2 = bitcast i16 %mask to <16 x i1> 8243 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer 8244 ret <16 x i8> %3 8245} 8246 8247declare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) #0 8248 8249define <32 x i8> @test_test_subs_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { 8250; X86-LABEL: test_test_subs_epi8_rrk_256: 8251; X86: # %bb.0: 8252; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 8253; X86-NEXT: vpsubsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0xd1] 8254; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 8255; X86-NEXT: retl # encoding: [0xc3] 8256; 8257; X64-LABEL: test_test_subs_epi8_rrk_256: 8258; X64: # %bb.0: 8259; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8260; X64-NEXT: vpsubsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0xd1] 8261; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 8262; X64-NEXT: retq # encoding: [0xc3] 8263 %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a, <32 x i8> %b) 8264 %2 = bitcast i32 %mask to <32 x i1> 8265 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru 8266 ret <32 x i8> %3 8267} 8268 8269define <32 x i8> @test_test_subs_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 8270; X86-LABEL: test_test_subs_epi8_rrkz_256: 8271; X86: # %bb.0: 8272; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 8273; X86-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0xc1] 8274; X86-NEXT: retl # encoding: [0xc3] 8275; 8276; X64-LABEL: test_test_subs_epi8_rrkz_256: 8277; X64: # %bb.0: 8278; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8279; X64-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0xc1] 8280; X64-NEXT: retq # encoding: [0xc3] 8281 %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a, <32 x i8> %b) 8282 %2 = bitcast i32 %mask to <32 x i1> 8283 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer 8284 ret <32 x i8> %3 8285} 8286 8287define <32 x i8> @test_test_subs_epi8_rmk_256(<32 x i8> %a, ptr %ptr_b, <32 x i8> %passThru, i32 %mask) { 8288; X86-LABEL: test_test_subs_epi8_rmk_256: 8289; X86: # %bb.0: 8290; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8291; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 8292; X86-NEXT: vpsubsb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0x08] 8293; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 8294; X86-NEXT: retl # encoding: [0xc3] 8295; 8296; X64-LABEL: test_test_subs_epi8_rmk_256: 8297; X64: # %bb.0: 8298; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8299; X64-NEXT: vpsubsb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0x0f] 8300; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 8301; X64-NEXT: retq # encoding: [0xc3] 8302 %b = load <32 x i8>, ptr %ptr_b 8303 %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a, <32 x i8> %b) 8304 %2 = bitcast i32 %mask to <32 x i1> 8305 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru 8306 ret <32 x i8> %3 8307} 8308 8309define <32 x i8> @test_test_subs_epi8_rmkz_256(<32 x i8> %a, ptr %ptr_b, i32 %mask) { 8310; X86-LABEL: test_test_subs_epi8_rmkz_256: 8311; X86: # %bb.0: 8312; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8313; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 8314; X86-NEXT: vpsubsb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0x00] 8315; X86-NEXT: retl # encoding: [0xc3] 8316; 8317; X64-LABEL: test_test_subs_epi8_rmkz_256: 8318; X64: # %bb.0: 8319; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8320; X64-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0x07] 8321; X64-NEXT: retq # encoding: [0xc3] 8322 %b = load <32 x i8>, ptr %ptr_b 8323 %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a, <32 x i8> %b) 8324 %2 = bitcast i32 %mask to <32 x i1> 8325 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer 8326 ret <32 x i8> %3 8327} 8328 8329define <8 x i16> @test_mask_subs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 8330; CHECK-LABEL: test_mask_subs_epi16_rr_128: 8331; CHECK: # %bb.0: 8332; CHECK-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0xc1] 8333; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8334 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 8335 ret <8 x i16> %res 8336} 8337 8338define <8 x i16> @test_mask_subs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 8339; X86-LABEL: test_mask_subs_epi16_rrk_128: 8340; X86: # %bb.0: 8341; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8342; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 8343; X86-NEXT: vpsubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1] 8344; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 8345; X86-NEXT: retl # encoding: [0xc3] 8346; 8347; X64-LABEL: test_mask_subs_epi16_rrk_128: 8348; X64: # %bb.0: 8349; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8350; X64-NEXT: vpsubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1] 8351; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 8352; X64-NEXT: retq # encoding: [0xc3] 8353 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 8354 ret <8 x i16> %res 8355} 8356 8357define <8 x i16> @test_mask_subs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 8358; X86-LABEL: test_mask_subs_epi16_rrkz_128: 8359; X86: # %bb.0: 8360; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8361; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 8362; X86-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0xc1] 8363; X86-NEXT: retl # encoding: [0xc3] 8364; 8365; X64-LABEL: test_mask_subs_epi16_rrkz_128: 8366; X64: # %bb.0: 8367; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8368; X64-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0xc1] 8369; X64-NEXT: retq # encoding: [0xc3] 8370 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 8371 ret <8 x i16> %res 8372} 8373 8374define <8 x i16> @test_mask_subs_epi16_rm_128(<8 x i16> %a, ptr %ptr_b) { 8375; X86-LABEL: test_mask_subs_epi16_rm_128: 8376; X86: # %bb.0: 8377; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8378; X86-NEXT: vpsubsw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0x00] 8379; X86-NEXT: retl # encoding: [0xc3] 8380; 8381; X64-LABEL: test_mask_subs_epi16_rm_128: 8382; X64: # %bb.0: 8383; X64-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0x07] 8384; X64-NEXT: retq # encoding: [0xc3] 8385 %b = load <8 x i16>, ptr %ptr_b 8386 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 8387 ret <8 x i16> %res 8388} 8389 8390define <8 x i16> @test_mask_subs_epi16_rmk_128(<8 x i16> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) { 8391; X86-LABEL: test_mask_subs_epi16_rmk_128: 8392; X86: # %bb.0: 8393; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8394; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 8395; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 8396; X86-NEXT: vpsubsw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x08] 8397; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 8398; X86-NEXT: retl # encoding: [0xc3] 8399; 8400; X64-LABEL: test_mask_subs_epi16_rmk_128: 8401; X64: # %bb.0: 8402; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8403; X64-NEXT: vpsubsw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x0f] 8404; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 8405; X64-NEXT: retq # encoding: [0xc3] 8406 %b = load <8 x i16>, ptr %ptr_b 8407 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 8408 ret <8 x i16> %res 8409} 8410 8411define <8 x i16> @test_mask_subs_epi16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i8 %mask) { 8412; X86-LABEL: test_mask_subs_epi16_rmkz_128: 8413; X86: # %bb.0: 8414; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8415; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 8416; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 8417; X86-NEXT: vpsubsw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0x00] 8418; X86-NEXT: retl # encoding: [0xc3] 8419; 8420; X64-LABEL: test_mask_subs_epi16_rmkz_128: 8421; X64: # %bb.0: 8422; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8423; X64-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0x07] 8424; X64-NEXT: retq # encoding: [0xc3] 8425 %b = load <8 x i16>, ptr %ptr_b 8426 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 8427 ret <8 x i16> %res 8428} 8429 8430declare <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 8431 8432define <16 x i16> @test_mask_subs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 8433; CHECK-LABEL: test_mask_subs_epi16_rr_256: 8434; CHECK: # %bb.0: 8435; CHECK-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0xc1] 8436; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8437 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 8438 ret <16 x i16> %res 8439} 8440 8441define <16 x i16> @test_mask_subs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 8442; X86-LABEL: test_mask_subs_epi16_rrk_256: 8443; X86: # %bb.0: 8444; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8445; X86-NEXT: vpsubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1] 8446; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 8447; X86-NEXT: retl # encoding: [0xc3] 8448; 8449; X64-LABEL: test_mask_subs_epi16_rrk_256: 8450; X64: # %bb.0: 8451; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8452; X64-NEXT: vpsubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1] 8453; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 8454; X64-NEXT: retq # encoding: [0xc3] 8455 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 8456 ret <16 x i16> %res 8457} 8458 8459define <16 x i16> @test_mask_subs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 8460; X86-LABEL: test_mask_subs_epi16_rrkz_256: 8461; X86: # %bb.0: 8462; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8463; X86-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0xc1] 8464; X86-NEXT: retl # encoding: [0xc3] 8465; 8466; X64-LABEL: test_mask_subs_epi16_rrkz_256: 8467; X64: # %bb.0: 8468; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8469; X64-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0xc1] 8470; X64-NEXT: retq # encoding: [0xc3] 8471 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 8472 ret <16 x i16> %res 8473} 8474 8475define <16 x i16> @test_mask_subs_epi16_rm_256(<16 x i16> %a, ptr %ptr_b) { 8476; X86-LABEL: test_mask_subs_epi16_rm_256: 8477; X86: # %bb.0: 8478; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8479; X86-NEXT: vpsubsw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0x00] 8480; X86-NEXT: retl # encoding: [0xc3] 8481; 8482; X64-LABEL: test_mask_subs_epi16_rm_256: 8483; X64: # %bb.0: 8484; X64-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0x07] 8485; X64-NEXT: retq # encoding: [0xc3] 8486 %b = load <16 x i16>, ptr %ptr_b 8487 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 8488 ret <16 x i16> %res 8489} 8490 8491define <16 x i16> @test_mask_subs_epi16_rmk_256(<16 x i16> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) { 8492; X86-LABEL: test_mask_subs_epi16_rmk_256: 8493; X86: # %bb.0: 8494; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8495; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 8496; X86-NEXT: vpsubsw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x08] 8497; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 8498; X86-NEXT: retl # encoding: [0xc3] 8499; 8500; X64-LABEL: test_mask_subs_epi16_rmk_256: 8501; X64: # %bb.0: 8502; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8503; X64-NEXT: vpsubsw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x0f] 8504; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 8505; X64-NEXT: retq # encoding: [0xc3] 8506 %b = load <16 x i16>, ptr %ptr_b 8507 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 8508 ret <16 x i16> %res 8509} 8510 8511define <16 x i16> @test_mask_subs_epi16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i16 %mask) { 8512; X86-LABEL: test_mask_subs_epi16_rmkz_256: 8513; X86: # %bb.0: 8514; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8515; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 8516; X86-NEXT: vpsubsw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0x00] 8517; X86-NEXT: retl # encoding: [0xc3] 8518; 8519; X64-LABEL: test_mask_subs_epi16_rmkz_256: 8520; X64: # %bb.0: 8521; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8522; X64-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0x07] 8523; X64-NEXT: retq # encoding: [0xc3] 8524 %b = load <16 x i16>, ptr %ptr_b 8525 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 8526 ret <16 x i16> %res 8527} 8528 8529declare <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 8530 8531declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) #0 8532 8533define <8 x i16> @test_adds_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 8534; X86-LABEL: test_adds_epi16_rrk_128: 8535; X86: # %bb.0: 8536; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8537; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 8538; X86-NEXT: vpaddsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1] 8539; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 8540; X86-NEXT: retl # encoding: [0xc3] 8541; 8542; X64-LABEL: test_adds_epi16_rrk_128: 8543; X64: # %bb.0: 8544; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8545; X64-NEXT: vpaddsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1] 8546; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 8547; X64-NEXT: retq # encoding: [0xc3] 8548 %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b) 8549 %2 = bitcast i8 %mask to <8 x i1> 8550 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru 8551 ret <8 x i16> %3 8552} 8553 8554define <8 x i16> @test_adds_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 8555; X86-LABEL: test_adds_epi16_rrkz_128: 8556; X86: # %bb.0: 8557; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8558; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 8559; X86-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0xc1] 8560; X86-NEXT: retl # encoding: [0xc3] 8561; 8562; X64-LABEL: test_adds_epi16_rrkz_128: 8563; X64: # %bb.0: 8564; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8565; X64-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0xc1] 8566; X64-NEXT: retq # encoding: [0xc3] 8567 %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b) 8568 %2 = bitcast i8 %mask to <8 x i1> 8569 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 8570 ret <8 x i16> %3 8571} 8572 8573define <8 x i16> @test_adds_epi16_rmk_128(<8 x i16> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) { 8574; X86-LABEL: test_adds_epi16_rmk_128: 8575; X86: # %bb.0: 8576; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8577; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 8578; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 8579; X86-NEXT: vpaddsw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0x08] 8580; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 8581; X86-NEXT: retl # encoding: [0xc3] 8582; 8583; X64-LABEL: test_adds_epi16_rmk_128: 8584; X64: # %bb.0: 8585; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8586; X64-NEXT: vpaddsw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0x0f] 8587; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 8588; X64-NEXT: retq # encoding: [0xc3] 8589 %b = load <8 x i16>, ptr %ptr_b 8590 %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b) 8591 %2 = bitcast i8 %mask to <8 x i1> 8592 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru 8593 ret <8 x i16> %3 8594} 8595 8596define <8 x i16> @test_adds_epi16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i8 %mask) { 8597; X86-LABEL: test_adds_epi16_rmkz_128: 8598; X86: # %bb.0: 8599; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8600; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 8601; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 8602; X86-NEXT: vpaddsw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0x00] 8603; X86-NEXT: retl # encoding: [0xc3] 8604; 8605; X64-LABEL: test_adds_epi16_rmkz_128: 8606; X64: # %bb.0: 8607; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8608; X64-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0x07] 8609; X64-NEXT: retq # encoding: [0xc3] 8610 %b = load <8 x i16>, ptr %ptr_b 8611 %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b) 8612 %2 = bitcast i8 %mask to <8 x i1> 8613 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 8614 ret <8 x i16> %3 8615} 8616 8617declare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) #0 8618 8619define <16 x i16> @test_adds_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 8620; X86-LABEL: test_adds_epi16_rrk_256: 8621; X86: # %bb.0: 8622; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8623; X86-NEXT: vpaddsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1] 8624; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 8625; X86-NEXT: retl # encoding: [0xc3] 8626; 8627; X64-LABEL: test_adds_epi16_rrk_256: 8628; X64: # %bb.0: 8629; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8630; X64-NEXT: vpaddsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1] 8631; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 8632; X64-NEXT: retq # encoding: [0xc3] 8633 %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a, <16 x i16> %b) 8634 %2 = bitcast i16 %mask to <16 x i1> 8635 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru 8636 ret <16 x i16> %3 8637} 8638 8639define <16 x i16> @test_adds_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 8640; X86-LABEL: test_adds_epi16_rrkz_256: 8641; X86: # %bb.0: 8642; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8643; X86-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0xc1] 8644; X86-NEXT: retl # encoding: [0xc3] 8645; 8646; X64-LABEL: test_adds_epi16_rrkz_256: 8647; X64: # %bb.0: 8648; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8649; X64-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0xc1] 8650; X64-NEXT: retq # encoding: [0xc3] 8651 %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a, <16 x i16> %b) 8652 %2 = bitcast i16 %mask to <16 x i1> 8653 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 8654 ret <16 x i16> %3 8655} 8656 8657define <16 x i16> @test_adds_epi16_rmk_256(<16 x i16> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) { 8658; X86-LABEL: test_adds_epi16_rmk_256: 8659; X86: # %bb.0: 8660; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8661; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 8662; X86-NEXT: vpaddsw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0x08] 8663; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 8664; X86-NEXT: retl # encoding: [0xc3] 8665; 8666; X64-LABEL: test_adds_epi16_rmk_256: 8667; X64: # %bb.0: 8668; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8669; X64-NEXT: vpaddsw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0x0f] 8670; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 8671; X64-NEXT: retq # encoding: [0xc3] 8672 %b = load <16 x i16>, ptr %ptr_b 8673 %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a, <16 x i16> %b) 8674 %2 = bitcast i16 %mask to <16 x i1> 8675 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru 8676 ret <16 x i16> %3 8677} 8678 8679define <16 x i16> @test_adds_epi16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i16 %mask) { 8680; X86-LABEL: test_adds_epi16_rmkz_256: 8681; X86: # %bb.0: 8682; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8683; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 8684; X86-NEXT: vpaddsw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0x00] 8685; X86-NEXT: retl # encoding: [0xc3] 8686; 8687; X64-LABEL: test_adds_epi16_rmkz_256: 8688; X64: # %bb.0: 8689; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8690; X64-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0x07] 8691; X64-NEXT: retq # encoding: [0xc3] 8692 %b = load <16 x i16>, ptr %ptr_b 8693 %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a, <16 x i16> %b) 8694 %2 = bitcast i16 %mask to <16 x i1> 8695 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 8696 ret <16 x i16> %3 8697} 8698 8699declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) #0 8700 8701define <16 x i8> @test_adds_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { 8702; X86-LABEL: test_adds_epi8_rrk_128: 8703; X86: # %bb.0: 8704; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8705; X86-NEXT: vpaddsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1] 8706; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 8707; X86-NEXT: retl # encoding: [0xc3] 8708; 8709; X64-LABEL: test_adds_epi8_rrk_128: 8710; X64: # %bb.0: 8711; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8712; X64-NEXT: vpaddsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1] 8713; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 8714; X64-NEXT: retq # encoding: [0xc3] 8715 %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b) 8716 %2 = bitcast i16 %mask to <16 x i1> 8717 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru 8718 ret <16 x i8> %3 8719} 8720 8721define <16 x i8> @test_adds_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 8722; X86-LABEL: test_adds_epi8_rrkz_128: 8723; X86: # %bb.0: 8724; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8725; X86-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0xc1] 8726; X86-NEXT: retl # encoding: [0xc3] 8727; 8728; X64-LABEL: test_adds_epi8_rrkz_128: 8729; X64: # %bb.0: 8730; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8731; X64-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0xc1] 8732; X64-NEXT: retq # encoding: [0xc3] 8733 %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b) 8734 %2 = bitcast i16 %mask to <16 x i1> 8735 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer 8736 ret <16 x i8> %3 8737} 8738 8739define <16 x i8> @test_adds_epi8_rm_128(<16 x i8> %a, ptr %ptr_b) { 8740; X86-LABEL: test_adds_epi8_rm_128: 8741; X86: # %bb.0: 8742; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8743; X86-NEXT: vpaddsb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0x00] 8744; X86-NEXT: retl # encoding: [0xc3] 8745; 8746; X64-LABEL: test_adds_epi8_rm_128: 8747; X64: # %bb.0: 8748; X64-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0x07] 8749; X64-NEXT: retq # encoding: [0xc3] 8750 %b = load <16 x i8>, ptr %ptr_b 8751 %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b) 8752 ret <16 x i8> %1 8753} 8754 8755define <16 x i8> @test_adds_epi8_rmk_128(<16 x i8> %a, ptr %ptr_b, <16 x i8> %passThru, i16 %mask) { 8756; X86-LABEL: test_adds_epi8_rmk_128: 8757; X86: # %bb.0: 8758; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8759; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 8760; X86-NEXT: vpaddsb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0x08] 8761; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 8762; X86-NEXT: retl # encoding: [0xc3] 8763; 8764; X64-LABEL: test_adds_epi8_rmk_128: 8765; X64: # %bb.0: 8766; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8767; X64-NEXT: vpaddsb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0x0f] 8768; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 8769; X64-NEXT: retq # encoding: [0xc3] 8770 %b = load <16 x i8>, ptr %ptr_b 8771 %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b) 8772 %2 = bitcast i16 %mask to <16 x i1> 8773 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru 8774 ret <16 x i8> %3 8775} 8776 8777define <16 x i8> @test_adds_epi8_rmkz_128(<16 x i8> %a, ptr %ptr_b, i16 %mask) { 8778; X86-LABEL: test_adds_epi8_rmkz_128: 8779; X86: # %bb.0: 8780; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8781; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 8782; X86-NEXT: vpaddsb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0x00] 8783; X86-NEXT: retl # encoding: [0xc3] 8784; 8785; X64-LABEL: test_adds_epi8_rmkz_128: 8786; X64: # %bb.0: 8787; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8788; X64-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0x07] 8789; X64-NEXT: retq # encoding: [0xc3] 8790 %b = load <16 x i8>, ptr %ptr_b 8791 %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b) 8792 %2 = bitcast i16 %mask to <16 x i1> 8793 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer 8794 ret <16 x i8> %3 8795} 8796 8797declare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) #0 8798 8799define <32 x i8> @test_adds_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { 8800; X86-LABEL: test_adds_epi8_rrk_256: 8801; X86: # %bb.0: 8802; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 8803; X86-NEXT: vpaddsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0xd1] 8804; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 8805; X86-NEXT: retl # encoding: [0xc3] 8806; 8807; X64-LABEL: test_adds_epi8_rrk_256: 8808; X64: # %bb.0: 8809; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8810; X64-NEXT: vpaddsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0xd1] 8811; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 8812; X64-NEXT: retq # encoding: [0xc3] 8813 %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a, <32 x i8> %b) 8814 %2 = bitcast i32 %mask to <32 x i1> 8815 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru 8816 ret <32 x i8> %3 8817} 8818 8819define <32 x i8> @test_adds_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 8820; X86-LABEL: test_adds_epi8_rrkz_256: 8821; X86: # %bb.0: 8822; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 8823; X86-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0xc1] 8824; X86-NEXT: retl # encoding: [0xc3] 8825; 8826; X64-LABEL: test_adds_epi8_rrkz_256: 8827; X64: # %bb.0: 8828; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8829; X64-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0xc1] 8830; X64-NEXT: retq # encoding: [0xc3] 8831 %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a, <32 x i8> %b) 8832 %2 = bitcast i32 %mask to <32 x i1> 8833 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer 8834 ret <32 x i8> %3 8835} 8836 8837define <32 x i8> @test_adds_epi8_rmk_256(<32 x i8> %a, ptr %ptr_b, <32 x i8> %passThru, i32 %mask) { 8838; X86-LABEL: test_adds_epi8_rmk_256: 8839; X86: # %bb.0: 8840; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8841; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 8842; X86-NEXT: vpaddsb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0x08] 8843; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 8844; X86-NEXT: retl # encoding: [0xc3] 8845; 8846; X64-LABEL: test_adds_epi8_rmk_256: 8847; X64: # %bb.0: 8848; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8849; X64-NEXT: vpaddsb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0x0f] 8850; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 8851; X64-NEXT: retq # encoding: [0xc3] 8852 %b = load <32 x i8>, ptr %ptr_b 8853 %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a, <32 x i8> %b) 8854 %2 = bitcast i32 %mask to <32 x i1> 8855 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru 8856 ret <32 x i8> %3 8857} 8858 8859define <32 x i8> @test_adds_epi8_rmkz_256(<32 x i8> %a, ptr %ptr_b, i32 %mask) { 8860; X86-LABEL: test_adds_epi8_rmkz_256: 8861; X86: # %bb.0: 8862; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8863; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 8864; X86-NEXT: vpaddsb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0x00] 8865; X86-NEXT: retl # encoding: [0xc3] 8866; 8867; X64-LABEL: test_adds_epi8_rmkz_256: 8868; X64: # %bb.0: 8869; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8870; X64-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0x07] 8871; X64-NEXT: retq # encoding: [0xc3] 8872 %b = load <32 x i8>, ptr %ptr_b 8873 %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a, <32 x i8> %b) 8874 %2 = bitcast i32 %mask to <32 x i1> 8875 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer 8876 ret <32 x i8> %3 8877} 8878 8879define <16 x i8> @test_mask_adds_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) { 8880; CHECK-LABEL: test_mask_adds_epi8_rr_128: 8881; CHECK: # %bb.0: 8882; CHECK-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0xc1] 8883; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8884 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 8885 ret <16 x i8> %res 8886} 8887 8888define <16 x i8> @test_mask_adds_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { 8889; X86-LABEL: test_mask_adds_epi8_rrk_128: 8890; X86: # %bb.0: 8891; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8892; X86-NEXT: vpaddsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1] 8893; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 8894; X86-NEXT: retl # encoding: [0xc3] 8895; 8896; X64-LABEL: test_mask_adds_epi8_rrk_128: 8897; X64: # %bb.0: 8898; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8899; X64-NEXT: vpaddsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1] 8900; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 8901; X64-NEXT: retq # encoding: [0xc3] 8902 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 8903 ret <16 x i8> %res 8904} 8905 8906define <16 x i8> @test_mask_adds_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 8907; X86-LABEL: test_mask_adds_epi8_rrkz_128: 8908; X86: # %bb.0: 8909; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8910; X86-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0xc1] 8911; X86-NEXT: retl # encoding: [0xc3] 8912; 8913; X64-LABEL: test_mask_adds_epi8_rrkz_128: 8914; X64: # %bb.0: 8915; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8916; X64-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0xc1] 8917; X64-NEXT: retq # encoding: [0xc3] 8918 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 8919 ret <16 x i8> %res 8920} 8921 8922define <16 x i8> @test_mask_adds_epi8_rm_128(<16 x i8> %a, ptr %ptr_b) { 8923; X86-LABEL: test_mask_adds_epi8_rm_128: 8924; X86: # %bb.0: 8925; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8926; X86-NEXT: vpaddsb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0x00] 8927; X86-NEXT: retl # encoding: [0xc3] 8928; 8929; X64-LABEL: test_mask_adds_epi8_rm_128: 8930; X64: # %bb.0: 8931; X64-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0x07] 8932; X64-NEXT: retq # encoding: [0xc3] 8933 %b = load <16 x i8>, ptr %ptr_b 8934 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 8935 ret <16 x i8> %res 8936} 8937 8938define <16 x i8> @test_mask_adds_epi8_rmk_128(<16 x i8> %a, ptr %ptr_b, <16 x i8> %passThru, i16 %mask) { 8939; X86-LABEL: test_mask_adds_epi8_rmk_128: 8940; X86: # %bb.0: 8941; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8942; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 8943; X86-NEXT: vpaddsb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0x08] 8944; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 8945; X86-NEXT: retl # encoding: [0xc3] 8946; 8947; X64-LABEL: test_mask_adds_epi8_rmk_128: 8948; X64: # %bb.0: 8949; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8950; X64-NEXT: vpaddsb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0x0f] 8951; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 8952; X64-NEXT: retq # encoding: [0xc3] 8953 %b = load <16 x i8>, ptr %ptr_b 8954 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 8955 ret <16 x i8> %res 8956} 8957 8958define <16 x i8> @test_mask_adds_epi8_rmkz_128(<16 x i8> %a, ptr %ptr_b, i16 %mask) { 8959; X86-LABEL: test_mask_adds_epi8_rmkz_128: 8960; X86: # %bb.0: 8961; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8962; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 8963; X86-NEXT: vpaddsb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0x00] 8964; X86-NEXT: retl # encoding: [0xc3] 8965; 8966; X64-LABEL: test_mask_adds_epi8_rmkz_128: 8967; X64: # %bb.0: 8968; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8969; X64-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0x07] 8970; X64-NEXT: retq # encoding: [0xc3] 8971 %b = load <16 x i8>, ptr %ptr_b 8972 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 8973 ret <16 x i8> %res 8974} 8975 8976declare <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 8977 8978define <32 x i8> @test_mask_adds_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) { 8979; CHECK-LABEL: test_mask_adds_epi8_rr_256: 8980; CHECK: # %bb.0: 8981; CHECK-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0xc1] 8982; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8983 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 8984 ret <32 x i8> %res 8985} 8986 8987define <32 x i8> @test_mask_adds_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { 8988; X86-LABEL: test_mask_adds_epi8_rrk_256: 8989; X86: # %bb.0: 8990; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 8991; X86-NEXT: vpaddsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0xd1] 8992; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 8993; X86-NEXT: retl # encoding: [0xc3] 8994; 8995; X64-LABEL: test_mask_adds_epi8_rrk_256: 8996; X64: # %bb.0: 8997; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8998; X64-NEXT: vpaddsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0xd1] 8999; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 9000; X64-NEXT: retq # encoding: [0xc3] 9001 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 9002 ret <32 x i8> %res 9003} 9004 9005define <32 x i8> @test_mask_adds_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 9006; X86-LABEL: test_mask_adds_epi8_rrkz_256: 9007; X86: # %bb.0: 9008; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 9009; X86-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0xc1] 9010; X86-NEXT: retl # encoding: [0xc3] 9011; 9012; X64-LABEL: test_mask_adds_epi8_rrkz_256: 9013; X64: # %bb.0: 9014; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9015; X64-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0xc1] 9016; X64-NEXT: retq # encoding: [0xc3] 9017 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 9018 ret <32 x i8> %res 9019} 9020 9021define <32 x i8> @test_mask_adds_epi8_rm_256(<32 x i8> %a, ptr %ptr_b) { 9022; X86-LABEL: test_mask_adds_epi8_rm_256: 9023; X86: # %bb.0: 9024; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9025; X86-NEXT: vpaddsb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0x00] 9026; X86-NEXT: retl # encoding: [0xc3] 9027; 9028; X64-LABEL: test_mask_adds_epi8_rm_256: 9029; X64: # %bb.0: 9030; X64-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0x07] 9031; X64-NEXT: retq # encoding: [0xc3] 9032 %b = load <32 x i8>, ptr %ptr_b 9033 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 9034 ret <32 x i8> %res 9035} 9036 9037define <32 x i8> @test_mask_adds_epi8_rmk_256(<32 x i8> %a, ptr %ptr_b, <32 x i8> %passThru, i32 %mask) { 9038; X86-LABEL: test_mask_adds_epi8_rmk_256: 9039; X86: # %bb.0: 9040; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9041; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 9042; X86-NEXT: vpaddsb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0x08] 9043; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 9044; X86-NEXT: retl # encoding: [0xc3] 9045; 9046; X64-LABEL: test_mask_adds_epi8_rmk_256: 9047; X64: # %bb.0: 9048; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 9049; X64-NEXT: vpaddsb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0x0f] 9050; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 9051; X64-NEXT: retq # encoding: [0xc3] 9052 %b = load <32 x i8>, ptr %ptr_b 9053 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 9054 ret <32 x i8> %res 9055} 9056 9057define <32 x i8> @test_mask_adds_epi8_rmkz_256(<32 x i8> %a, ptr %ptr_b, i32 %mask) { 9058; X86-LABEL: test_mask_adds_epi8_rmkz_256: 9059; X86: # %bb.0: 9060; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9061; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 9062; X86-NEXT: vpaddsb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0x00] 9063; X86-NEXT: retl # encoding: [0xc3] 9064; 9065; X64-LABEL: test_mask_adds_epi8_rmkz_256: 9066; X64: # %bb.0: 9067; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 9068; X64-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0x07] 9069; X64-NEXT: retq # encoding: [0xc3] 9070 %b = load <32 x i8>, ptr %ptr_b 9071 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 9072 ret <32 x i8> %res 9073} 9074 9075declare <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 9076 9077define <16 x i8> @test_mask_subs_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) { 9078; CHECK-LABEL: test_mask_subs_epi8_rr_128: 9079; CHECK: # %bb.0: 9080; CHECK-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0xc1] 9081; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9082 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 9083 ret <16 x i8> %res 9084} 9085 9086define <16 x i8> @test_mask_subs_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { 9087; X86-LABEL: test_mask_subs_epi8_rrk_128: 9088; X86: # %bb.0: 9089; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 9090; X86-NEXT: vpsubsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1] 9091; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 9092; X86-NEXT: retl # encoding: [0xc3] 9093; 9094; X64-LABEL: test_mask_subs_epi8_rrk_128: 9095; X64: # %bb.0: 9096; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9097; X64-NEXT: vpsubsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1] 9098; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 9099; X64-NEXT: retq # encoding: [0xc3] 9100 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 9101 ret <16 x i8> %res 9102} 9103 9104define <16 x i8> @test_mask_subs_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 9105; X86-LABEL: test_mask_subs_epi8_rrkz_128: 9106; X86: # %bb.0: 9107; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 9108; X86-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0xc1] 9109; X86-NEXT: retl # encoding: [0xc3] 9110; 9111; X64-LABEL: test_mask_subs_epi8_rrkz_128: 9112; X64: # %bb.0: 9113; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9114; X64-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0xc1] 9115; X64-NEXT: retq # encoding: [0xc3] 9116 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 9117 ret <16 x i8> %res 9118} 9119 9120define <16 x i8> @test_mask_subs_epi8_rm_128(<16 x i8> %a, ptr %ptr_b) { 9121; X86-LABEL: test_mask_subs_epi8_rm_128: 9122; X86: # %bb.0: 9123; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9124; X86-NEXT: vpsubsb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0x00] 9125; X86-NEXT: retl # encoding: [0xc3] 9126; 9127; X64-LABEL: test_mask_subs_epi8_rm_128: 9128; X64: # %bb.0: 9129; X64-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0x07] 9130; X64-NEXT: retq # encoding: [0xc3] 9131 %b = load <16 x i8>, ptr %ptr_b 9132 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 9133 ret <16 x i8> %res 9134} 9135 9136define <16 x i8> @test_mask_subs_epi8_rmk_128(<16 x i8> %a, ptr %ptr_b, <16 x i8> %passThru, i16 %mask) { 9137; X86-LABEL: test_mask_subs_epi8_rmk_128: 9138; X86: # %bb.0: 9139; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9140; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 9141; X86-NEXT: vpsubsb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x08] 9142; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 9143; X86-NEXT: retl # encoding: [0xc3] 9144; 9145; X64-LABEL: test_mask_subs_epi8_rmk_128: 9146; X64: # %bb.0: 9147; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 9148; X64-NEXT: vpsubsb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x0f] 9149; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 9150; X64-NEXT: retq # encoding: [0xc3] 9151 %b = load <16 x i8>, ptr %ptr_b 9152 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 9153 ret <16 x i8> %res 9154} 9155 9156define <16 x i8> @test_mask_subs_epi8_rmkz_128(<16 x i8> %a, ptr %ptr_b, i16 %mask) { 9157; X86-LABEL: test_mask_subs_epi8_rmkz_128: 9158; X86: # %bb.0: 9159; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9160; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 9161; X86-NEXT: vpsubsb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0x00] 9162; X86-NEXT: retl # encoding: [0xc3] 9163; 9164; X64-LABEL: test_mask_subs_epi8_rmkz_128: 9165; X64: # %bb.0: 9166; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 9167; X64-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0x07] 9168; X64-NEXT: retq # encoding: [0xc3] 9169 %b = load <16 x i8>, ptr %ptr_b 9170 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 9171 ret <16 x i8> %res 9172} 9173 9174declare <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 9175 9176define <32 x i8> @test_mask_subs_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) { 9177; CHECK-LABEL: test_mask_subs_epi8_rr_256: 9178; CHECK: # %bb.0: 9179; CHECK-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0xc1] 9180; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9181 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 9182 ret <32 x i8> %res 9183} 9184 9185define <32 x i8> @test_mask_subs_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { 9186; X86-LABEL: test_mask_subs_epi8_rrk_256: 9187; X86: # %bb.0: 9188; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 9189; X86-NEXT: vpsubsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0xd1] 9190; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 9191; X86-NEXT: retl # encoding: [0xc3] 9192; 9193; X64-LABEL: test_mask_subs_epi8_rrk_256: 9194; X64: # %bb.0: 9195; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9196; X64-NEXT: vpsubsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0xd1] 9197; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 9198; X64-NEXT: retq # encoding: [0xc3] 9199 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 9200 ret <32 x i8> %res 9201} 9202 9203define <32 x i8> @test_mask_subs_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 9204; X86-LABEL: test_mask_subs_epi8_rrkz_256: 9205; X86: # %bb.0: 9206; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 9207; X86-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0xc1] 9208; X86-NEXT: retl # encoding: [0xc3] 9209; 9210; X64-LABEL: test_mask_subs_epi8_rrkz_256: 9211; X64: # %bb.0: 9212; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9213; X64-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0xc1] 9214; X64-NEXT: retq # encoding: [0xc3] 9215 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 9216 ret <32 x i8> %res 9217} 9218 9219define <32 x i8> @test_mask_subs_epi8_rm_256(<32 x i8> %a, ptr %ptr_b) { 9220; X86-LABEL: test_mask_subs_epi8_rm_256: 9221; X86: # %bb.0: 9222; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9223; X86-NEXT: vpsubsb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0x00] 9224; X86-NEXT: retl # encoding: [0xc3] 9225; 9226; X64-LABEL: test_mask_subs_epi8_rm_256: 9227; X64: # %bb.0: 9228; X64-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0x07] 9229; X64-NEXT: retq # encoding: [0xc3] 9230 %b = load <32 x i8>, ptr %ptr_b 9231 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 9232 ret <32 x i8> %res 9233} 9234 9235define <32 x i8> @test_mask_subs_epi8_rmk_256(<32 x i8> %a, ptr %ptr_b, <32 x i8> %passThru, i32 %mask) { 9236; X86-LABEL: test_mask_subs_epi8_rmk_256: 9237; X86: # %bb.0: 9238; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9239; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 9240; X86-NEXT: vpsubsb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0x08] 9241; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 9242; X86-NEXT: retl # encoding: [0xc3] 9243; 9244; X64-LABEL: test_mask_subs_epi8_rmk_256: 9245; X64: # %bb.0: 9246; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 9247; X64-NEXT: vpsubsb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0x0f] 9248; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 9249; X64-NEXT: retq # encoding: [0xc3] 9250 %b = load <32 x i8>, ptr %ptr_b 9251 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 9252 ret <32 x i8> %res 9253} 9254 9255define <32 x i8> @test_mask_subs_epi8_rmkz_256(<32 x i8> %a, ptr %ptr_b, i32 %mask) { 9256; X86-LABEL: test_mask_subs_epi8_rmkz_256: 9257; X86: # %bb.0: 9258; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9259; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 9260; X86-NEXT: vpsubsb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0x00] 9261; X86-NEXT: retl # encoding: [0xc3] 9262; 9263; X64-LABEL: test_mask_subs_epi8_rmkz_256: 9264; X64: # %bb.0: 9265; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 9266; X64-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0x07] 9267; X64-NEXT: retq # encoding: [0xc3] 9268 %b = load <32 x i8>, ptr %ptr_b 9269 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 9270 ret <32 x i8> %res 9271} 9272 9273declare <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 9274 9275declare <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16) 9276 9277define <16 x i16>@test_int_x86_avx512_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) { 9278; CHECK-LABEL: test_int_x86_avx512_psrav16_hi: 9279; CHECK: # %bb.0: 9280; CHECK-NEXT: vpsravw %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x11,0xc1] 9281; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9282 %res = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 9283 ret <16 x i16> %res 9284} 9285 9286define <16 x i16>@test_int_x86_avx512_mask_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 9287; X86-LABEL: test_int_x86_avx512_mask_psrav16_hi: 9288; X86: # %bb.0: 9289; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 9290; X86-NEXT: vpsravw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x11,0xd1] 9291; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 9292; X86-NEXT: retl # encoding: [0xc3] 9293; 9294; X64-LABEL: test_int_x86_avx512_mask_psrav16_hi: 9295; X64: # %bb.0: 9296; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9297; X64-NEXT: vpsravw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x11,0xd1] 9298; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 9299; X64-NEXT: retq # encoding: [0xc3] 9300 %res = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 9301 ret <16 x i16> %res 9302} 9303 9304define <16 x i16>@test_int_x86_avx512_maskz_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) { 9305; X86-LABEL: test_int_x86_avx512_maskz_psrav16_hi: 9306; X86: # %bb.0: 9307; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 9308; X86-NEXT: vpsravw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x11,0xc1] 9309; X86-NEXT: retl # encoding: [0xc3] 9310; 9311; X64-LABEL: test_int_x86_avx512_maskz_psrav16_hi: 9312; X64: # %bb.0: 9313; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9314; X64-NEXT: vpsravw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x11,0xc1] 9315; X64-NEXT: retq # encoding: [0xc3] 9316 %res = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) 9317 ret <16 x i16> %res 9318} 9319 9320declare <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8) 9321 9322define <8 x i16>@test_int_x86_avx512_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 9323; CHECK-LABEL: test_int_x86_avx512_psrav8_hi: 9324; CHECK: # %bb.0: 9325; CHECK-NEXT: vpsravw %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x11,0xc1] 9326; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9327 %res = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 9328 ret <8 x i16> %res 9329} 9330 9331define <8 x i16>@test_int_x86_avx512_mask_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 9332; X86-LABEL: test_int_x86_avx512_mask_psrav8_hi: 9333; X86: # %bb.0: 9334; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9335; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 9336; X86-NEXT: vpsravw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x11,0xd1] 9337; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 9338; X86-NEXT: retl # encoding: [0xc3] 9339; 9340; X64-LABEL: test_int_x86_avx512_mask_psrav8_hi: 9341; X64: # %bb.0: 9342; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9343; X64-NEXT: vpsravw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x11,0xd1] 9344; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 9345; X64-NEXT: retq # encoding: [0xc3] 9346 %res = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 9347 ret <8 x i16> %res 9348} 9349 9350define <8 x i16>@test_int_x86_avx512_maskz_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) { 9351; X86-LABEL: test_int_x86_avx512_maskz_psrav8_hi: 9352; X86: # %bb.0: 9353; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9354; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 9355; X86-NEXT: vpsravw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x11,0xc1] 9356; X86-NEXT: retl # encoding: [0xc3] 9357; 9358; X64-LABEL: test_int_x86_avx512_maskz_psrav8_hi: 9359; X64: # %bb.0: 9360; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9361; X64-NEXT: vpsravw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x11,0xc1] 9362; X64-NEXT: retq # encoding: [0xc3] 9363 %res = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) 9364 ret <8 x i16> %res 9365} 9366 9367declare <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16) 9368 9369define <16 x i16>@test_int_x86_avx512_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) { 9370; CHECK-LABEL: test_int_x86_avx512_psllv16_hi: 9371; CHECK: # %bb.0: 9372; CHECK-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x12,0xc1] 9373; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9374 %res = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 9375 ret <16 x i16> %res 9376} 9377 9378define <16 x i16>@test_int_x86_avx512_mask_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 9379; X86-LABEL: test_int_x86_avx512_mask_psllv16_hi: 9380; X86: # %bb.0: 9381; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 9382; X86-NEXT: vpsllvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x12,0xd1] 9383; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 9384; X86-NEXT: retl # encoding: [0xc3] 9385; 9386; X64-LABEL: test_int_x86_avx512_mask_psllv16_hi: 9387; X64: # %bb.0: 9388; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9389; X64-NEXT: vpsllvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x12,0xd1] 9390; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 9391; X64-NEXT: retq # encoding: [0xc3] 9392 %res = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 9393 ret <16 x i16> %res 9394} 9395 9396define <16 x i16>@test_int_x86_avx512_maskz_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) { 9397; X86-LABEL: test_int_x86_avx512_maskz_psllv16_hi: 9398; X86: # %bb.0: 9399; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 9400; X86-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x12,0xc1] 9401; X86-NEXT: retl # encoding: [0xc3] 9402; 9403; X64-LABEL: test_int_x86_avx512_maskz_psllv16_hi: 9404; X64: # %bb.0: 9405; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9406; X64-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x12,0xc1] 9407; X64-NEXT: retq # encoding: [0xc3] 9408 %res = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) 9409 ret <16 x i16> %res 9410} 9411 9412declare <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8) 9413 9414define <8 x i16>@test_int_x86_avx512_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 9415; CHECK-LABEL: test_int_x86_avx512_psllv8_hi: 9416; CHECK: # %bb.0: 9417; CHECK-NEXT: vpsllvw %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x12,0xc1] 9418; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9419 %res = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 9420 ret <8 x i16> %res 9421} 9422 9423define <8 x i16>@test_int_x86_avx512_mask_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 9424; X86-LABEL: test_int_x86_avx512_mask_psllv8_hi: 9425; X86: # %bb.0: 9426; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9427; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 9428; X86-NEXT: vpsllvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x12,0xd1] 9429; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 9430; X86-NEXT: retl # encoding: [0xc3] 9431; 9432; X64-LABEL: test_int_x86_avx512_mask_psllv8_hi: 9433; X64: # %bb.0: 9434; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9435; X64-NEXT: vpsllvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x12,0xd1] 9436; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 9437; X64-NEXT: retq # encoding: [0xc3] 9438 %res = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 9439 ret <8 x i16> %res 9440} 9441 9442define <8 x i16>@test_int_x86_avx512_maskz_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) { 9443; X86-LABEL: test_int_x86_avx512_maskz_psllv8_hi: 9444; X86: # %bb.0: 9445; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9446; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 9447; X86-NEXT: vpsllvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x12,0xc1] 9448; X86-NEXT: retl # encoding: [0xc3] 9449; 9450; X64-LABEL: test_int_x86_avx512_maskz_psllv8_hi: 9451; X64: # %bb.0: 9452; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9453; X64-NEXT: vpsllvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x12,0xc1] 9454; X64-NEXT: retq # encoding: [0xc3] 9455 %res = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) 9456 ret <8 x i16> %res 9457} 9458 9459declare <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16) 9460 9461define <16 x i16>@test_int_x86_avx512_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) { 9462; CHECK-LABEL: test_int_x86_avx512_psrlv16_hi: 9463; CHECK: # %bb.0: 9464; CHECK-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x10,0xc1] 9465; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9466 %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 9467 ret <16 x i16> %res 9468} 9469 9470define <16 x i16>@test_int_x86_avx512_mask_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 9471; X86-LABEL: test_int_x86_avx512_mask_psrlv16_hi: 9472; X86: # %bb.0: 9473; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 9474; X86-NEXT: vpsrlvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x10,0xd1] 9475; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 9476; X86-NEXT: retl # encoding: [0xc3] 9477; 9478; X64-LABEL: test_int_x86_avx512_mask_psrlv16_hi: 9479; X64: # %bb.0: 9480; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9481; X64-NEXT: vpsrlvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x10,0xd1] 9482; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 9483; X64-NEXT: retq # encoding: [0xc3] 9484 %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 9485 ret <16 x i16> %res 9486} 9487 9488define <16 x i16>@test_int_x86_avx512_maskz_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) { 9489; X86-LABEL: test_int_x86_avx512_maskz_psrlv16_hi: 9490; X86: # %bb.0: 9491; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 9492; X86-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x10,0xc1] 9493; X86-NEXT: retl # encoding: [0xc3] 9494; 9495; X64-LABEL: test_int_x86_avx512_maskz_psrlv16_hi: 9496; X64: # %bb.0: 9497; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9498; X64-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x10,0xc1] 9499; X64-NEXT: retq # encoding: [0xc3] 9500 %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) 9501 ret <16 x i16> %res 9502} 9503 9504declare <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8) 9505 9506define <8 x i16>@test_int_x86_avx512_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 9507; CHECK-LABEL: test_int_x86_avx512_psrlv8_hi: 9508; CHECK: # %bb.0: 9509; CHECK-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x10,0xc1] 9510; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9511 %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 9512 ret <8 x i16> %res 9513} 9514 9515define <8 x i16>@test_int_x86_avx512_mask_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 9516; X86-LABEL: test_int_x86_avx512_mask_psrlv8_hi: 9517; X86: # %bb.0: 9518; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9519; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 9520; X86-NEXT: vpsrlvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x10,0xd1] 9521; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 9522; X86-NEXT: retl # encoding: [0xc3] 9523; 9524; X64-LABEL: test_int_x86_avx512_mask_psrlv8_hi: 9525; X64: # %bb.0: 9526; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9527; X64-NEXT: vpsrlvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x10,0xd1] 9528; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 9529; X64-NEXT: retq # encoding: [0xc3] 9530 %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 9531 ret <8 x i16> %res 9532} 9533 9534define <8 x i16>@test_int_x86_avx512_maskz_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) { 9535; X86-LABEL: test_int_x86_avx512_maskz_psrlv8_hi: 9536; X86: # %bb.0: 9537; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9538; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 9539; X86-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x10,0xc1] 9540; X86-NEXT: retl # encoding: [0xc3] 9541; 9542; X64-LABEL: test_int_x86_avx512_maskz_psrlv8_hi: 9543; X64: # %bb.0: 9544; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9545; X64-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x10,0xc1] 9546; X64-NEXT: retq # encoding: [0xc3] 9547 %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) 9548 ret <8 x i16> %res 9549} 9550 9551declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16>, <16 x i8>, i16) 9552 9553define <16 x i8>@test_int_x86_avx512_pmov_wb_256(<16 x i16> %x0, <16 x i8> %x1) { 9554; CHECK-LABEL: test_int_x86_avx512_pmov_wb_256: 9555; CHECK: # %bb.0: 9556; CHECK-NEXT: vpmovwb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x30,0xc0] 9557; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9558; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9559 %res = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1) 9560 ret <16 x i8> %res 9561} 9562 9563define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) { 9564; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_256: 9565; X86: # %bb.0: 9566; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 9567; X86-NEXT: vpmovwb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0xc1] 9568; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 9569; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9570; X86-NEXT: retl # encoding: [0xc3] 9571; 9572; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_256: 9573; X64: # %bb.0: 9574; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9575; X64-NEXT: vpmovwb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0xc1] 9576; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 9577; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9578; X64-NEXT: retq # encoding: [0xc3] 9579 %res = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) 9580 ret <16 x i8> %res 9581} 9582 9583define <16 x i8>@test_int_x86_avx512_maskz_pmov_wb_256(<16 x i16> %x0, i16 %x2) { 9584; X86-LABEL: test_int_x86_avx512_maskz_pmov_wb_256: 9585; X86: # %bb.0: 9586; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 9587; X86-NEXT: vpmovwb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x30,0xc0] 9588; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9589; X86-NEXT: retl # encoding: [0xc3] 9590; 9591; X64-LABEL: test_int_x86_avx512_maskz_pmov_wb_256: 9592; X64: # %bb.0: 9593; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9594; X64-NEXT: vpmovwb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x30,0xc0] 9595; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9596; X64-NEXT: retq # encoding: [0xc3] 9597 %res = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2) 9598 ret <16 x i8> %res 9599} 9600