xref: /llvm-project/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll (revision b5d35feacb7246573c6a4ab2bddc4919a4228ed5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=avx2 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 --check-prefix=X86 --check-prefix=X86-AVX
3; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512VL --check-prefix=X86 --check-prefix=X86-AVX512VL
4; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx2 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 --check-prefix=X64 --check-prefix=X64-AVX
5; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512VL --check-prefix=X64 --check-prefix=X64-AVX512VL
6
7define <16 x i16> @test_x86_avx2_packssdw(<8 x i32> %a0, <8 x i32> %a1) {
8; AVX2-LABEL: test_x86_avx2_packssdw:
9; AVX2:       # %bb.0:
10; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x6b,0xc1]
11; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
12;
13; AVX512VL-LABEL: test_x86_avx2_packssdw:
14; AVX512VL:       # %bb.0:
15; AVX512VL-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0xc1]
16; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
17  %res = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1) ; <<16 x i16>> [#uses=1]
18  ret <16 x i16> %res
19}
20declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone
21
22
23define <16 x i16> @test_x86_avx2_packssdw_fold() {
24; X86-AVX-LABEL: test_x86_avx2_packssdw_fold:
25; X86-AVX:       # %bb.0:
26; X86-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,32767,65535,0,0,0,0,32769,32768,0,65280]
27; X86-AVX-NEXT:    # encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
28; X86-AVX-NEXT:    # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
29; X86-AVX-NEXT:    retl # encoding: [0xc3]
30;
31; X86-AVX512VL-LABEL: test_x86_avx2_packssdw_fold:
32; X86-AVX512VL:       # %bb.0:
33; X86-AVX512VL-NEXT:    vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,32767,65535,0,0,0,0,32769,32768,0,65280]
34; X86-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
35; X86-AVX512VL-NEXT:    # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
36; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
37;
38; X64-AVX-LABEL: test_x86_avx2_packssdw_fold:
39; X64-AVX:       # %bb.0:
40; X64-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,32767,65535,0,0,0,0,32769,32768,0,65280]
41; X64-AVX-NEXT:    # encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
42; X64-AVX-NEXT:    # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
43; X64-AVX-NEXT:    retq # encoding: [0xc3]
44;
45; X64-AVX512VL-LABEL: test_x86_avx2_packssdw_fold:
46; X64-AVX512VL:       # %bb.0:
47; X64-AVX512VL-NEXT:    vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,32767,65535,0,0,0,0,32769,32768,0,65280]
48; X64-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
49; X64-AVX512VL-NEXT:    # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
50; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
51  %res = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> zeroinitializer, <8 x i32> <i32 255, i32 32767, i32 65535, i32 -1, i32 -32767, i32 -65535, i32 0, i32 -256>)
52  ret <16 x i16> %res
53}
54
55
56define <32 x i8> @test_x86_avx2_packsswb(<16 x i16> %a0, <16 x i16> %a1) {
57; AVX2-LABEL: test_x86_avx2_packsswb:
58; AVX2:       # %bb.0:
59; AVX2-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x63,0xc1]
60; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
61;
62; AVX512VL-LABEL: test_x86_avx2_packsswb:
63; AVX512VL:       # %bb.0:
64; AVX512VL-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0xc1]
65; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
66  %res = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1) ; <<32 x i8>> [#uses=1]
67  ret <32 x i8> %res
68}
69declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone
70
71
72define <32 x i8> @test_x86_avx2_packsswb_fold() {
73; X86-AVX-LABEL: test_x86_avx2_packsswb_fold:
74; X86-AVX:       # %bb.0:
75; X86-AVX-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0,0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
76; X86-AVX-NEXT:    # encoding: [0xc4,0xe2,0x7d,0x1a,0x05,A,A,A,A]
77; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
78; X86-AVX-NEXT:    # ymm0 = mem[0,1,0,1]
79; X86-AVX-NEXT:    retl # encoding: [0xc3]
80;
81; X86-AVX512VL-LABEL: test_x86_avx2_packsswb_fold:
82; X86-AVX512VL:       # %bb.0:
83; X86-AVX512VL-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0,0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
84; X86-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x05,A,A,A,A]
85; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
86; X86-AVX512VL-NEXT:    # ymm0 = mem[0,1,0,1]
87; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
88;
89; X64-AVX-LABEL: test_x86_avx2_packsswb_fold:
90; X64-AVX:       # %bb.0:
91; X64-AVX-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0,0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
92; X64-AVX-NEXT:    # encoding: [0xc4,0xe2,0x7d,0x1a,0x05,A,A,A,A]
93; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
94; X64-AVX-NEXT:    # ymm0 = mem[0,1,0,1]
95; X64-AVX-NEXT:    retq # encoding: [0xc3]
96;
97; X64-AVX512VL-LABEL: test_x86_avx2_packsswb_fold:
98; X64-AVX512VL:       # %bb.0:
99; X64-AVX512VL-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0,0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
100; X64-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x05,A,A,A,A]
101; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
102; X64-AVX512VL-NEXT:    # ymm0 = mem[0,1,0,1]
103; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
104  %res = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678, i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <16 x i16> zeroinitializer)
105  ret <32 x i8> %res
106}
107
108
109define <32 x i8> @test_x86_avx2_packuswb(<16 x i16> %a0, <16 x i16> %a1) {
110; AVX2-LABEL: test_x86_avx2_packuswb:
111; AVX2:       # %bb.0:
112; AVX2-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x67,0xc1]
113; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
114;
115; AVX512VL-LABEL: test_x86_avx2_packuswb:
116; AVX512VL:       # %bb.0:
117; AVX512VL-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0xc1]
118; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
119  %res = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a0, <16 x i16> %a1) ; <<32 x i8>> [#uses=1]
120  ret <32 x i8> %res
121}
122declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone
123
124
125define <32 x i8> @test_x86_avx2_packuswb_fold() {
126; X86-AVX-LABEL: test_x86_avx2_packuswb_fold:
127; X86-AVX:       # %bb.0:
128; X86-AVX-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
129; X86-AVX-NEXT:    # encoding: [0xc4,0xe2,0x7d,0x1a,0x05,A,A,A,A]
130; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
131; X86-AVX-NEXT:    # ymm0 = mem[0,1,0,1]
132; X86-AVX-NEXT:    retl # encoding: [0xc3]
133;
134; X86-AVX512VL-LABEL: test_x86_avx2_packuswb_fold:
135; X86-AVX512VL:       # %bb.0:
136; X86-AVX512VL-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
137; X86-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x05,A,A,A,A]
138; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
139; X86-AVX512VL-NEXT:    # ymm0 = mem[0,1,0,1]
140; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
141;
142; X64-AVX-LABEL: test_x86_avx2_packuswb_fold:
143; X64-AVX:       # %bb.0:
144; X64-AVX-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
145; X64-AVX-NEXT:    # encoding: [0xc4,0xe2,0x7d,0x1a,0x05,A,A,A,A]
146; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
147; X64-AVX-NEXT:    # ymm0 = mem[0,1,0,1]
148; X64-AVX-NEXT:    retq # encoding: [0xc3]
149;
150; X64-AVX512VL-LABEL: test_x86_avx2_packuswb_fold:
151; X64-AVX512VL:       # %bb.0:
152; X64-AVX512VL-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
153; X64-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x05,A,A,A,A]
154; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
155; X64-AVX512VL-NEXT:    # ymm0 = mem[0,1,0,1]
156; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
157  %res = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678, i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <16 x i16> zeroinitializer)
158  ret <32 x i8> %res
159}
160
161
162define <32 x i8> @test_x86_avx2_pavg_b(<32 x i8> %a0, <32 x i8> %a1) {
163; AVX2-LABEL: test_x86_avx2_pavg_b:
164; AVX2:       # %bb.0:
165; AVX2-NEXT:    vpavgb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xe0,0xc1]
166; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
167;
168; AVX512VL-LABEL: test_x86_avx2_pavg_b:
169; AVX512VL:       # %bb.0:
170; AVX512VL-NEXT:    vpavgb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe0,0xc1]
171; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
172  %res = call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
173  ret <32 x i8> %res
174}
175declare <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8>, <32 x i8>) nounwind readnone
176
177
178define <16 x i16> @test_x86_avx2_pavg_w(<16 x i16> %a0, <16 x i16> %a1) {
179; AVX2-LABEL: test_x86_avx2_pavg_w:
180; AVX2:       # %bb.0:
181; AVX2-NEXT:    vpavgw %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xe3,0xc1]
182; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
183;
184; AVX512VL-LABEL: test_x86_avx2_pavg_w:
185; AVX512VL:       # %bb.0:
186; AVX512VL-NEXT:    vpavgw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe3,0xc1]
187; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
188  %res = call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
189  ret <16 x i16> %res
190}
191declare <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16>, <16 x i16>) nounwind readnone
192
193
194define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) {
195; AVX2-LABEL: test_x86_avx2_pmadd_wd:
196; AVX2:       # %bb.0:
197; AVX2-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xf5,0xc1]
198; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
199;
200; AVX512VL-LABEL: test_x86_avx2_pmadd_wd:
201; AVX512VL:       # %bb.0:
202; AVX512VL-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf5,0xc1]
203; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
204  %res = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> %a1) ; <<8 x i32>> [#uses=1]
205  ret <8 x i32> %res
206}
207declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) nounwind readnone
208
209
210define i32 @test_x86_avx2_pmovmskb(<32 x i8> %a0) {
211; CHECK-LABEL: test_x86_avx2_pmovmskb:
212; CHECK:       # %bb.0:
213; CHECK-NEXT:    vpmovmskb %ymm0, %eax # encoding: [0xc5,0xfd,0xd7,0xc0]
214; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
215; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
216  %res = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %a0) ; <i32> [#uses=1]
217  ret i32 %res
218}
219declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>) nounwind readnone
220
221
222define <16 x i16> @test_x86_avx2_pmulh_w(<16 x i16> %a0, <16 x i16> %a1) {
223; AVX2-LABEL: test_x86_avx2_pmulh_w:
224; AVX2:       # %bb.0:
225; AVX2-NEXT:    vpmulhw %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xe5,0xc1]
226; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
227;
228; AVX512VL-LABEL: test_x86_avx2_pmulh_w:
229; AVX512VL:       # %bb.0:
230; AVX512VL-NEXT:    vpmulhw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe5,0xc1]
231; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
232  %res = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
233  ret <16 x i16> %res
234}
235declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>) nounwind readnone
236
237
238define <16 x i16> @test_x86_avx2_pmulhu_w(<16 x i16> %a0, <16 x i16> %a1) {
239; AVX2-LABEL: test_x86_avx2_pmulhu_w:
240; AVX2:       # %bb.0:
241; AVX2-NEXT:    vpmulhuw %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xe4,0xc1]
242; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
243;
244; AVX512VL-LABEL: test_x86_avx2_pmulhu_w:
245; AVX512VL:       # %bb.0:
246; AVX512VL-NEXT:    vpmulhuw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe4,0xc1]
247; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
248  %res = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
249  ret <16 x i16> %res
250}
251declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) nounwind readnone
252
253
254define <4 x i64> @test_x86_avx2_psad_bw(<32 x i8> %a0, <32 x i8> %a1) {
255; AVX2-LABEL: test_x86_avx2_psad_bw:
256; AVX2:       # %bb.0:
257; AVX2-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xf6,0xc1]
258; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
259;
260; AVX512VL-LABEL: test_x86_avx2_psad_bw:
261; AVX512VL:       # %bb.0:
262; AVX512VL-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf6,0xc1]
263; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
264  %res = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %a0, <32 x i8> %a1) ; <<4 x i64>> [#uses=1]
265  ret <4 x i64> %res
266}
267declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone
268
269
270define <8 x i32> @test_x86_avx2_psll_d(<8 x i32> %a0, <4 x i32> %a1) {
271; AVX2-LABEL: test_x86_avx2_psll_d:
272; AVX2:       # %bb.0:
273; AVX2-NEXT:    vpslld %xmm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xf2,0xc1]
274; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
275;
276; AVX512VL-LABEL: test_x86_avx2_psll_d:
277; AVX512VL:       # %bb.0:
278; AVX512VL-NEXT:    vpslld %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf2,0xc1]
279; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
280  %res = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1]
281  ret <8 x i32> %res
282}
283declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone
284
285
286define <4 x i64> @test_x86_avx2_psll_q(<4 x i64> %a0, <2 x i64> %a1) {
287; AVX2-LABEL: test_x86_avx2_psll_q:
288; AVX2:       # %bb.0:
289; AVX2-NEXT:    vpsllq %xmm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xf3,0xc1]
290; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
291;
292; AVX512VL-LABEL: test_x86_avx2_psll_q:
293; AVX512VL:       # %bb.0:
294; AVX512VL-NEXT:    vpsllq %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf3,0xc1]
295; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
296  %res = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1]
297  ret <4 x i64> %res
298}
299declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) nounwind readnone
300
301
302define <16 x i16> @test_x86_avx2_psll_w(<16 x i16> %a0, <8 x i16> %a1) {
303; AVX2-LABEL: test_x86_avx2_psll_w:
304; AVX2:       # %bb.0:
305; AVX2-NEXT:    vpsllw %xmm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xf1,0xc1]
306; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
307;
308; AVX512VL-LABEL: test_x86_avx2_psll_w:
309; AVX512VL:       # %bb.0:
310; AVX512VL-NEXT:    vpsllw %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf1,0xc1]
311; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
312  %res = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1]
313  ret <16 x i16> %res
314}
315declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) nounwind readnone
316
317
318define <8 x i32> @test_x86_avx2_pslli_d(<8 x i32> %a0) {
319; AVX2-LABEL: test_x86_avx2_pslli_d:
320; AVX2:       # %bb.0:
321; AVX2-NEXT:    vpslld $7, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x72,0xf0,0x07]
322; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
323;
324; AVX512VL-LABEL: test_x86_avx2_pslli_d:
325; AVX512VL:       # %bb.0:
326; AVX512VL-NEXT:    vpslld $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xf0,0x07]
327; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
328  %res = call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1]
329  ret <8 x i32> %res
330}
331declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) nounwind readnone
332
333
334define <4 x i64> @test_x86_avx2_pslli_q(<4 x i64> %a0) {
335; AVX2-LABEL: test_x86_avx2_pslli_q:
336; AVX2:       # %bb.0:
337; AVX2-NEXT:    vpsllq $7, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x73,0xf0,0x07]
338; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
339;
340; AVX512VL-LABEL: test_x86_avx2_pslli_q:
341; AVX512VL:       # %bb.0:
342; AVX512VL-NEXT:    vpsllq $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x73,0xf0,0x07]
343; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
344  %res = call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
345  ret <4 x i64> %res
346}
347declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) nounwind readnone
348
349
350define <16 x i16> @test_x86_avx2_pslli_w(<16 x i16> %a0) {
351; AVX2-LABEL: test_x86_avx2_pslli_w:
352; AVX2:       # %bb.0:
353; AVX2-NEXT:    vpsllw $7, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x71,0xf0,0x07]
354; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
355;
356; AVX512VL-LABEL: test_x86_avx2_pslli_w:
357; AVX512VL:       # %bb.0:
358; AVX512VL-NEXT:    vpsllw $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xf0,0x07]
359; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
360  %res = call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1]
361  ret <16 x i16> %res
362}
363declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) nounwind readnone
364
365
366define <8 x i32> @test_x86_avx2_psra_d(<8 x i32> %a0, <4 x i32> %a1) {
367; AVX2-LABEL: test_x86_avx2_psra_d:
368; AVX2:       # %bb.0:
369; AVX2-NEXT:    vpsrad %xmm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xe2,0xc1]
370; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
371;
372; AVX512VL-LABEL: test_x86_avx2_psra_d:
373; AVX512VL:       # %bb.0:
374; AVX512VL-NEXT:    vpsrad %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe2,0xc1]
375; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
376  %res = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1]
377  ret <8 x i32> %res
378}
379declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) nounwind readnone
380
381
382define <16 x i16> @test_x86_avx2_psra_w(<16 x i16> %a0, <8 x i16> %a1) {
383; AVX2-LABEL: test_x86_avx2_psra_w:
384; AVX2:       # %bb.0:
385; AVX2-NEXT:    vpsraw %xmm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xe1,0xc1]
386; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
387;
388; AVX512VL-LABEL: test_x86_avx2_psra_w:
389; AVX512VL:       # %bb.0:
390; AVX512VL-NEXT:    vpsraw %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe1,0xc1]
391; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
392  %res = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1]
393  ret <16 x i16> %res
394}
395declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) nounwind readnone
396
397
398define <8 x i32> @test_x86_avx2_psrai_d(<8 x i32> %a0) {
399; AVX2-LABEL: test_x86_avx2_psrai_d:
400; AVX2:       # %bb.0:
401; AVX2-NEXT:    vpsrad $7, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x72,0xe0,0x07]
402; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
403;
404; AVX512VL-LABEL: test_x86_avx2_psrai_d:
405; AVX512VL:       # %bb.0:
406; AVX512VL-NEXT:    vpsrad $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xe0,0x07]
407; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
408  %res = call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1]
409  ret <8 x i32> %res
410}
411declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) nounwind readnone
412
413
414define <16 x i16> @test_x86_avx2_psrai_w(<16 x i16> %a0) {
415; AVX2-LABEL: test_x86_avx2_psrai_w:
416; AVX2:       # %bb.0:
417; AVX2-NEXT:    vpsraw $7, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x71,0xe0,0x07]
418; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
419;
420; AVX512VL-LABEL: test_x86_avx2_psrai_w:
421; AVX512VL:       # %bb.0:
422; AVX512VL-NEXT:    vpsraw $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xe0,0x07]
423; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
424  %res = call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1]
425  ret <16 x i16> %res
426}
427declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) nounwind readnone
428
429
430define <8 x i32> @test_x86_avx2_psrl_d(<8 x i32> %a0, <4 x i32> %a1) {
431; AVX2-LABEL: test_x86_avx2_psrl_d:
432; AVX2:       # %bb.0:
433; AVX2-NEXT:    vpsrld %xmm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xd2,0xc1]
434; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
435;
436; AVX512VL-LABEL: test_x86_avx2_psrl_d:
437; AVX512VL:       # %bb.0:
438; AVX512VL-NEXT:    vpsrld %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd2,0xc1]
439; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
440  %res = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1]
441  ret <8 x i32> %res
442}
443declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone
444
445
446define <4 x i64> @test_x86_avx2_psrl_q(<4 x i64> %a0, <2 x i64> %a1) {
447; AVX2-LABEL: test_x86_avx2_psrl_q:
448; AVX2:       # %bb.0:
449; AVX2-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xd3,0xc1]
450; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
451;
452; AVX512VL-LABEL: test_x86_avx2_psrl_q:
453; AVX512VL:       # %bb.0:
454; AVX512VL-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd3,0xc1]
455; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
456  %res = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1]
457  ret <4 x i64> %res
458}
459declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) nounwind readnone
460
461
462define <16 x i16> @test_x86_avx2_psrl_w(<16 x i16> %a0, <8 x i16> %a1) {
463; AVX2-LABEL: test_x86_avx2_psrl_w:
464; AVX2:       # %bb.0:
465; AVX2-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xd1,0xc1]
466; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
467;
468; AVX512VL-LABEL: test_x86_avx2_psrl_w:
469; AVX512VL:       # %bb.0:
470; AVX512VL-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd1,0xc1]
471; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
472  %res = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1]
473  ret <16 x i16> %res
474}
475declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) nounwind readnone
476
477
478define <16 x i16> @test_x86_avx2_psrl_w_load(<16 x i16> %a0, ptr %p) {
479; X86-AVX-LABEL: test_x86_avx2_psrl_w_load:
480; X86-AVX:       # %bb.0:
481; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
482; X86-AVX-NEXT:    vpsrlw (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xd1,0x00]
483; X86-AVX-NEXT:    retl # encoding: [0xc3]
484;
485; X86-AVX512VL-LABEL: test_x86_avx2_psrl_w_load:
486; X86-AVX512VL:       # %bb.0:
487; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
488; X86-AVX512VL-NEXT:    vpsrlw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd1,0x00]
489; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
490;
491; X64-AVX-LABEL: test_x86_avx2_psrl_w_load:
492; X64-AVX:       # %bb.0:
493; X64-AVX-NEXT:    vpsrlw (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xd1,0x07]
494; X64-AVX-NEXT:    retq # encoding: [0xc3]
495;
496; X64-AVX512VL-LABEL: test_x86_avx2_psrl_w_load:
497; X64-AVX512VL:       # %bb.0:
498; X64-AVX512VL-NEXT:    vpsrlw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd1,0x07]
499; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
500  %a1 = load <8 x i16>, ptr %p
501  %res = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1]
502  ret <16 x i16> %res
503}
504
505
506define <8 x i32> @test_x86_avx2_psrli_d(<8 x i32> %a0) {
507; AVX2-LABEL: test_x86_avx2_psrli_d:
508; AVX2:       # %bb.0:
509; AVX2-NEXT:    vpsrld $7, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x72,0xd0,0x07]
510; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
511;
512; AVX512VL-LABEL: test_x86_avx2_psrli_d:
513; AVX512VL:       # %bb.0:
514; AVX512VL-NEXT:    vpsrld $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xd0,0x07]
515; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
516  %res = call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1]
517  ret <8 x i32> %res
518}
519declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) nounwind readnone
520
521
522define <4 x i64> @test_x86_avx2_psrli_q(<4 x i64> %a0) {
523; AVX2-LABEL: test_x86_avx2_psrli_q:
524; AVX2:       # %bb.0:
525; AVX2-NEXT:    vpsrlq $7, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x73,0xd0,0x07]
526; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
527;
528; AVX512VL-LABEL: test_x86_avx2_psrli_q:
529; AVX512VL:       # %bb.0:
530; AVX512VL-NEXT:    vpsrlq $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x73,0xd0,0x07]
531; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
532  %res = call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
533  ret <4 x i64> %res
534}
535declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) nounwind readnone
536
537
538define <16 x i16> @test_x86_avx2_psrli_w(<16 x i16> %a0) {
539; AVX2-LABEL: test_x86_avx2_psrli_w:
540; AVX2:       # %bb.0:
541; AVX2-NEXT:    vpsrlw $7, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x71,0xd0,0x07]
542; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
543;
544; AVX512VL-LABEL: test_x86_avx2_psrli_w:
545; AVX512VL:       # %bb.0:
546; AVX512VL-NEXT:    vpsrlw $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xd0,0x07]
547; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
548  %res = call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1]
549  ret <16 x i16> %res
550}
551declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) nounwind readnone
552
553
554define <8 x i32> @test_x86_avx2_phadd_d(<8 x i32> %a0, <8 x i32> %a1) {
555; CHECK-LABEL: test_x86_avx2_phadd_d:
556; CHECK:       # %bb.0:
557; CHECK-NEXT:    vphaddd %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x02,0xc1]
558; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
559  %res = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
560  ret <8 x i32> %res
561}
562declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) nounwind readnone
563
564
565define <16 x i16> @test_x86_avx2_phadd_sw(<16 x i16> %a0, <16 x i16> %a1) {
566; CHECK-LABEL: test_x86_avx2_phadd_sw:
567; CHECK:       # %bb.0:
568; CHECK-NEXT:    vphaddsw %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x03,0xc1]
569; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
570  %res = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
571  ret <16 x i16> %res
572}
573declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) nounwind readnone
574
575
576define <16 x i16> @test_x86_avx2_phadd_w(<16 x i16> %a0, <16 x i16> %a1) {
577; CHECK-LABEL: test_x86_avx2_phadd_w:
578; CHECK:       # %bb.0:
579; CHECK-NEXT:    vphaddw %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x01,0xc1]
580; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
581  %res = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
582  ret <16 x i16> %res
583}
584declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) nounwind readnone
585
586
587define <8 x i32> @test_x86_avx2_phsub_d(<8 x i32> %a0, <8 x i32> %a1) {
588; CHECK-LABEL: test_x86_avx2_phsub_d:
589; CHECK:       # %bb.0:
590; CHECK-NEXT:    vphsubd %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x06,0xc1]
591; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
592  %res = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
593  ret <8 x i32> %res
594}
595declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) nounwind readnone
596
597
598define <16 x i16> @test_x86_avx2_phsub_sw(<16 x i16> %a0, <16 x i16> %a1) {
599; CHECK-LABEL: test_x86_avx2_phsub_sw:
600; CHECK:       # %bb.0:
601; CHECK-NEXT:    vphsubsw %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x07,0xc1]
602; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
603  %res = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
604  ret <16 x i16> %res
605}
606declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) nounwind readnone
607
608
609define <16 x i16> @test_x86_avx2_phsub_w(<16 x i16> %a0, <16 x i16> %a1) {
610; CHECK-LABEL: test_x86_avx2_phsub_w:
611; CHECK:       # %bb.0:
612; CHECK-NEXT:    vphsubw %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x05,0xc1]
613; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
614  %res = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
615  ret <16 x i16> %res
616}
617declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) nounwind readnone
618
619
620define <16 x i16> @test_x86_avx2_pmadd_ub_sw(<32 x i8> %a0, <32 x i8> %a1) {
621; AVX2-LABEL: test_x86_avx2_pmadd_ub_sw:
622; AVX2:       # %bb.0:
623; AVX2-NEXT:    vpmaddubsw %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x04,0xc1]
624; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
625;
626; AVX512VL-LABEL: test_x86_avx2_pmadd_ub_sw:
627; AVX512VL:       # %bb.0:
628; AVX512VL-NEXT:    vpmaddubsw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x04,0xc1]
629; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
630  %res = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i16>> [#uses=1]
631  ret <16 x i16> %res
632}
633declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) nounwind readnone
634
635; Make sure we don't commute this operation.
636define <16 x i16> @test_x86_avx2_pmadd_ub_sw_load_op0(ptr %ptr, <32 x i8> %a1) {
637; X86-AVX-LABEL: test_x86_avx2_pmadd_ub_sw_load_op0:
638; X86-AVX:       # %bb.0:
639; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
640; X86-AVX-NEXT:    vmovdqa (%eax), %ymm1 # encoding: [0xc5,0xfd,0x6f,0x08]
641; X86-AVX-NEXT:    vpmaddubsw %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0x04,0xc0]
642; X86-AVX-NEXT:    retl # encoding: [0xc3]
643;
644; X86-AVX512VL-LABEL: test_x86_avx2_pmadd_ub_sw_load_op0:
645; X86-AVX512VL:       # %bb.0:
646; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
647; X86-AVX512VL-NEXT:    vmovdqa (%eax), %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x08]
648; X86-AVX512VL-NEXT:    vpmaddubsw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x04,0xc0]
649; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
650;
651; X64-AVX-LABEL: test_x86_avx2_pmadd_ub_sw_load_op0:
652; X64-AVX:       # %bb.0:
653; X64-AVX-NEXT:    vmovdqa (%rdi), %ymm1 # encoding: [0xc5,0xfd,0x6f,0x0f]
654; X64-AVX-NEXT:    vpmaddubsw %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0x04,0xc0]
655; X64-AVX-NEXT:    retq # encoding: [0xc3]
656;
657; X64-AVX512VL-LABEL: test_x86_avx2_pmadd_ub_sw_load_op0:
658; X64-AVX512VL:       # %bb.0:
659; X64-AVX512VL-NEXT:    vmovdqa (%rdi), %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x0f]
660; X64-AVX512VL-NEXT:    vpmaddubsw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x04,0xc0]
661; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
662  %a0 = load <32 x i8>, ptr %ptr
663  %res = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i16>> [#uses=1]
664  ret <16 x i16> %res
665}
666
667define <16 x i16> @test_x86_avx2_pmul_hr_sw(<16 x i16> %a0, <16 x i16> %a1) {
668; AVX2-LABEL: test_x86_avx2_pmul_hr_sw:
669; AVX2:       # %bb.0:
670; AVX2-NEXT:    vpmulhrsw %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0b,0xc1]
671; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
672;
673; AVX512VL-LABEL: test_x86_avx2_pmul_hr_sw:
674; AVX512VL:       # %bb.0:
675; AVX512VL-NEXT:    vpmulhrsw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0b,0xc1]
676; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
677  %res = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
678  ret <16 x i16> %res
679}
680declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) nounwind readnone
681
682
683define <32 x i8> @test_x86_avx2_pshuf_b(<32 x i8> %a0, <32 x i8> %a1) {
684; AVX2-LABEL: test_x86_avx2_pshuf_b:
685; AVX2:       # %bb.0:
686; AVX2-NEXT:    vpshufb %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x00,0xc1]
687; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
688;
689; AVX512VL-LABEL: test_x86_avx2_pshuf_b:
690; AVX512VL:       # %bb.0:
691; AVX512VL-NEXT:    vpshufb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x00,0xc1]
692; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
693  %res = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i8>> [#uses=1]
694  ret <32 x i8> %res
695}
696declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) nounwind readnone
697
698
699define <32 x i8> @test_x86_avx2_psign_b(<32 x i8> %a0, <32 x i8> %a1) {
700; CHECK-LABEL: test_x86_avx2_psign_b:
701; CHECK:       # %bb.0:
702; CHECK-NEXT:    vpsignb %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x08,0xc1]
703; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
704  %res = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
705  ret <32 x i8> %res
706}
707declare <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8>, <32 x i8>) nounwind readnone
708
709
710define <8 x i32> @test_x86_avx2_psign_d(<8 x i32> %a0, <8 x i32> %a1) {
711; CHECK-LABEL: test_x86_avx2_psign_d:
712; CHECK:       # %bb.0:
713; CHECK-NEXT:    vpsignd %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0a,0xc1]
714; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
715  %res = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i32>> [#uses=1]
716  ret <8 x i32> %res
717}
718declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) nounwind readnone
719
720
721define <16 x i16> @test_x86_avx2_psign_w(<16 x i16> %a0, <16 x i16> %a1) {
722; CHECK-LABEL: test_x86_avx2_psign_w:
723; CHECK:       # %bb.0:
724; CHECK-NEXT:    vpsignw %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x09,0xc1]
725; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
726  %res = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
727  ret <16 x i16> %res
728}
729declare <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16>, <16 x i16>) nounwind readnone
730
731
732define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) {
733; CHECK-LABEL: test_x86_avx2_mpsadbw:
734; CHECK:       # %bb.0:
735; CHECK-NEXT:    vmpsadbw $7, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x42,0xc1,0x07]
736; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
737  %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7) ; <<16 x i16>> [#uses=1]
738  ret <16 x i16> %res
739}
740declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind readnone
741
742; We shouldn't commute this operation to fold the load.
743define <16 x i16> @test_x86_avx2_mpsadbw_load_op0(ptr %ptr, <32 x i8> %a1) {
744; X86-AVX-LABEL: test_x86_avx2_mpsadbw_load_op0:
745; X86-AVX:       # %bb.0:
746; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
747; X86-AVX-NEXT:    vmovdqa (%eax), %ymm1 # encoding: [0xc5,0xfd,0x6f,0x08]
748; X86-AVX-NEXT:    vmpsadbw $7, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x42,0xc0,0x07]
749; X86-AVX-NEXT:    retl # encoding: [0xc3]
750;
751; X86-AVX512VL-LABEL: test_x86_avx2_mpsadbw_load_op0:
752; X86-AVX512VL:       # %bb.0:
753; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
754; X86-AVX512VL-NEXT:    vmovdqa (%eax), %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x08]
755; X86-AVX512VL-NEXT:    vmpsadbw $7, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x42,0xc0,0x07]
756; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
757;
758; X64-AVX-LABEL: test_x86_avx2_mpsadbw_load_op0:
759; X64-AVX:       # %bb.0:
760; X64-AVX-NEXT:    vmovdqa (%rdi), %ymm1 # encoding: [0xc5,0xfd,0x6f,0x0f]
761; X64-AVX-NEXT:    vmpsadbw $7, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x42,0xc0,0x07]
762; X64-AVX-NEXT:    retq # encoding: [0xc3]
763;
764; X64-AVX512VL-LABEL: test_x86_avx2_mpsadbw_load_op0:
765; X64-AVX512VL:       # %bb.0:
766; X64-AVX512VL-NEXT:    vmovdqa (%rdi), %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x0f]
767; X64-AVX512VL-NEXT:    vmpsadbw $7, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x42,0xc0,0x07]
768; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
769  %a0 = load <32 x i8>, ptr %ptr
770  %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7) ; <<16 x i16>> [#uses=1]
771  ret <16 x i16> %res
772}
773
774define <16 x i16> @test_x86_avx2_packusdw(<8 x i32> %a0, <8 x i32> %a1) {
775; AVX2-LABEL: test_x86_avx2_packusdw:
776; AVX2:       # %bb.0:
777; AVX2-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2b,0xc1]
778; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
779;
780; AVX512VL-LABEL: test_x86_avx2_packusdw:
781; AVX512VL:       # %bb.0:
782; AVX512VL-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0xc1]
783; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
784  %res = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1) ; <<16 x i16>> [#uses=1]
785  ret <16 x i16> %res
786}
787declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone
788
789
790define <16 x i16> @test_x86_avx2_packusdw_fold() {
791; X86-AVX-LABEL: test_x86_avx2_packusdw_fold:
792; X86-AVX:       # %bb.0:
793; X86-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0]
794; X86-AVX-NEXT:    # encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
795; X86-AVX-NEXT:    # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
796; X86-AVX-NEXT:    retl # encoding: [0xc3]
797;
798; X86-AVX512VL-LABEL: test_x86_avx2_packusdw_fold:
799; X86-AVX512VL:       # %bb.0:
800; X86-AVX512VL-NEXT:    vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0]
801; X86-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
802; X86-AVX512VL-NEXT:    # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
803; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
804;
805; X64-AVX-LABEL: test_x86_avx2_packusdw_fold:
806; X64-AVX:       # %bb.0:
807; X64-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0]
808; X64-AVX-NEXT:    # encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
809; X64-AVX-NEXT:    # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
810; X64-AVX-NEXT:    retq # encoding: [0xc3]
811;
812; X64-AVX512VL-LABEL: test_x86_avx2_packusdw_fold:
813; X64-AVX512VL:       # %bb.0:
814; X64-AVX512VL-NEXT:    vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0]
815; X64-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
816; X64-AVX512VL-NEXT:    # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
817; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
818  %res = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> zeroinitializer, <8 x i32> <i32 255, i32 32767, i32 65535, i32 -1, i32 -32767, i32 -65535, i32 0, i32 -256>)
819  ret <16 x i16> %res
820}
821
822
823define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) {
824; CHECK-LABEL: test_x86_avx2_pblendvb:
825; CHECK:       # %bb.0:
826; CHECK-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x4c,0xc1,0x20]
827; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
828  %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) ; <<32 x i8>> [#uses=1]
829  ret <32 x i8> %res
830}
831declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) nounwind readnone
832
833
834define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
835; CHECK-LABEL: test_x86_avx2_pblendw:
836; CHECK:       # %bb.0:
837; CHECK-NEXT:    vpblendw $7, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0e,0xc1,0x07]
838; CHECK-NEXT:    # ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15]
839; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
840  %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i8 7) ; <<16 x i16>> [#uses=1]
841  ret <16 x i16> %res
842}
843declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i8) nounwind readnone
844
845
846define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) {
847; CHECK-LABEL: test_x86_avx2_pblendd_128:
848; CHECK:       # %bb.0:
849; CHECK-NEXT:    vblendps $8, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x08]
850; CHECK-NEXT:    # xmm0 = xmm1[0,1,2],xmm0[3]
851; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
852  %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i8 7) ; <<4 x i32>> [#uses=1]
853  ret <4 x i32> %res
854}
855declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i8) nounwind readnone
856
857
858define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) {
859; CHECK-LABEL: test_x86_avx2_pblendd_256:
860; CHECK:       # %bb.0:
861; CHECK-NEXT:    vblendps $7, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0x07]
862; CHECK-NEXT:    # ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
863; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
864  %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
865  ret <8 x i32> %res
866}
867declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
868
869
870; Check that the arguments are swapped between the intrinsic definition
871; and its lowering. Indeed, the offsets are the first source in
872; the instruction.
873define <8 x i32> @test_x86_avx2_permd(<8 x i32> %a0, <8 x i32> %a1) {
874; AVX2-LABEL: test_x86_avx2_permd:
875; AVX2:       # %bb.0:
876; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0x16,0xc0]
877; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
878;
879; AVX512VL-LABEL: test_x86_avx2_permd:
880; AVX512VL:       # %bb.0:
881; AVX512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xc0]
882; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
883  %res = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
884  ret <8 x i32> %res
885}
886declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly
887
888
889; Check that the arguments are swapped between the intrinsic definition
890; and its lowering. Indeed, the offsets are the first source in
891; the instruction.
892define <8 x float> @test_x86_avx2_permps(<8 x float> %a0, <8 x i32> %a1) {
893; AVX2-LABEL: test_x86_avx2_permps:
894; AVX2:       # %bb.0:
895; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0x16,0xc0]
896; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
897;
898; AVX512VL-LABEL: test_x86_avx2_permps:
899; AVX512VL:       # %bb.0:
900; AVX512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xc0]
901; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
902  %res = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1]
903  ret <8 x float> %res
904}
905declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly
906
907
908define <2 x i64> @test_x86_avx2_maskload_q(ptr %a0, <2 x i64> %a1) {
909; X86-LABEL: test_x86_avx2_maskload_q:
910; X86:       # %bb.0:
911; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
912; X86-NEXT:    vpmaskmovq (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x8c,0x00]
913; X86-NEXT:    retl # encoding: [0xc3]
914;
915; X64-LABEL: test_x86_avx2_maskload_q:
916; X64:       # %bb.0:
917; X64-NEXT:    vpmaskmovq (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x8c,0x07]
918; X64-NEXT:    retq # encoding: [0xc3]
919  %res = call <2 x i64> @llvm.x86.avx2.maskload.q(ptr %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
920  ret <2 x i64> %res
921}
922declare <2 x i64> @llvm.x86.avx2.maskload.q(ptr, <2 x i64>) nounwind readonly
923
924
925define <4 x i64> @test_x86_avx2_maskload_q_256(ptr %a0, <4 x i64> %a1) {
926; X86-LABEL: test_x86_avx2_maskload_q_256:
927; X86:       # %bb.0:
928; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
929; X86-NEXT:    vpmaskmovq (%eax), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0xfd,0x8c,0x00]
930; X86-NEXT:    retl # encoding: [0xc3]
931;
932; X64-LABEL: test_x86_avx2_maskload_q_256:
933; X64:       # %bb.0:
934; X64-NEXT:    vpmaskmovq (%rdi), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0xfd,0x8c,0x07]
935; X64-NEXT:    retq # encoding: [0xc3]
936  %res = call <4 x i64> @llvm.x86.avx2.maskload.q.256(ptr %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1]
937  ret <4 x i64> %res
938}
939declare <4 x i64> @llvm.x86.avx2.maskload.q.256(ptr, <4 x i64>) nounwind readonly
940
941
942define <4 x i32> @test_x86_avx2_maskload_d(ptr %a0, <4 x i32> %a1) {
943; X86-LABEL: test_x86_avx2_maskload_d:
944; X86:       # %bb.0:
945; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
946; X86-NEXT:    vpmaskmovd (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x8c,0x00]
947; X86-NEXT:    retl # encoding: [0xc3]
948;
949; X64-LABEL: test_x86_avx2_maskload_d:
950; X64:       # %bb.0:
951; X64-NEXT:    vpmaskmovd (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x8c,0x07]
952; X64-NEXT:    retq # encoding: [0xc3]
953  %res = call <4 x i32> @llvm.x86.avx2.maskload.d(ptr %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
954  ret <4 x i32> %res
955}
956declare <4 x i32> @llvm.x86.avx2.maskload.d(ptr, <4 x i32>) nounwind readonly
957
958
959define <8 x i32> @test_x86_avx2_maskload_d_256(ptr %a0, <8 x i32> %a1) {
960; X86-LABEL: test_x86_avx2_maskload_d_256:
961; X86:       # %bb.0:
962; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
963; X86-NEXT:    vpmaskmovd (%eax), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x8c,0x00]
964; X86-NEXT:    retl # encoding: [0xc3]
965;
966; X64-LABEL: test_x86_avx2_maskload_d_256:
967; X64:       # %bb.0:
968; X64-NEXT:    vpmaskmovd (%rdi), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x8c,0x07]
969; X64-NEXT:    retq # encoding: [0xc3]
970  %res = call <8 x i32> @llvm.x86.avx2.maskload.d.256(ptr %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
971  ret <8 x i32> %res
972}
973declare <8 x i32> @llvm.x86.avx2.maskload.d.256(ptr, <8 x i32>) nounwind readonly
974
975
976define void @test_x86_avx2_maskstore_q(ptr %a0, <2 x i64> %a1, <2 x i64> %a2) {
977; X86-LABEL: test_x86_avx2_maskstore_q:
978; X86:       # %bb.0:
979; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
980; X86-NEXT:    vpmaskmovq %xmm1, %xmm0, (%eax) # encoding: [0xc4,0xe2,0xf9,0x8e,0x08]
981; X86-NEXT:    retl # encoding: [0xc3]
982;
983; X64-LABEL: test_x86_avx2_maskstore_q:
984; X64:       # %bb.0:
985; X64-NEXT:    vpmaskmovq %xmm1, %xmm0, (%rdi) # encoding: [0xc4,0xe2,0xf9,0x8e,0x0f]
986; X64-NEXT:    retq # encoding: [0xc3]
987  call void @llvm.x86.avx2.maskstore.q(ptr %a0, <2 x i64> %a1, <2 x i64> %a2)
988  ret void
989}
990declare void @llvm.x86.avx2.maskstore.q(ptr, <2 x i64>, <2 x i64>) nounwind
991
992
993define void @test_x86_avx2_maskstore_q_256(ptr %a0, <4 x i64> %a1, <4 x i64> %a2) {
994; X86-LABEL: test_x86_avx2_maskstore_q_256:
995; X86:       # %bb.0:
996; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
997; X86-NEXT:    vpmaskmovq %ymm1, %ymm0, (%eax) # encoding: [0xc4,0xe2,0xfd,0x8e,0x08]
998; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
999; X86-NEXT:    retl # encoding: [0xc3]
1000;
1001; X64-LABEL: test_x86_avx2_maskstore_q_256:
1002; X64:       # %bb.0:
1003; X64-NEXT:    vpmaskmovq %ymm1, %ymm0, (%rdi) # encoding: [0xc4,0xe2,0xfd,0x8e,0x0f]
1004; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1005; X64-NEXT:    retq # encoding: [0xc3]
1006  call void @llvm.x86.avx2.maskstore.q.256(ptr %a0, <4 x i64> %a1, <4 x i64> %a2)
1007  ret void
1008}
1009declare void @llvm.x86.avx2.maskstore.q.256(ptr, <4 x i64>, <4 x i64>) nounwind
1010
1011
1012define void @test_x86_avx2_maskstore_d(ptr %a0, <4 x i32> %a1, <4 x i32> %a2) {
1013; X86-LABEL: test_x86_avx2_maskstore_d:
1014; X86:       # %bb.0:
1015; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1016; X86-NEXT:    vpmaskmovd %xmm1, %xmm0, (%eax) # encoding: [0xc4,0xe2,0x79,0x8e,0x08]
1017; X86-NEXT:    retl # encoding: [0xc3]
1018;
1019; X64-LABEL: test_x86_avx2_maskstore_d:
1020; X64:       # %bb.0:
1021; X64-NEXT:    vpmaskmovd %xmm1, %xmm0, (%rdi) # encoding: [0xc4,0xe2,0x79,0x8e,0x0f]
1022; X64-NEXT:    retq # encoding: [0xc3]
1023  call void @llvm.x86.avx2.maskstore.d(ptr %a0, <4 x i32> %a1, <4 x i32> %a2)
1024  ret void
1025}
1026declare void @llvm.x86.avx2.maskstore.d(ptr, <4 x i32>, <4 x i32>) nounwind
1027
1028
1029define void @test_x86_avx2_maskstore_d_256(ptr %a0, <8 x i32> %a1, <8 x i32> %a2) {
1030; X86-LABEL: test_x86_avx2_maskstore_d_256:
1031; X86:       # %bb.0:
1032; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1033; X86-NEXT:    vpmaskmovd %ymm1, %ymm0, (%eax) # encoding: [0xc4,0xe2,0x7d,0x8e,0x08]
1034; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1035; X86-NEXT:    retl # encoding: [0xc3]
1036;
1037; X64-LABEL: test_x86_avx2_maskstore_d_256:
1038; X64:       # %bb.0:
1039; X64-NEXT:    vpmaskmovd %ymm1, %ymm0, (%rdi) # encoding: [0xc4,0xe2,0x7d,0x8e,0x0f]
1040; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1041; X64-NEXT:    retq # encoding: [0xc3]
1042  call void @llvm.x86.avx2.maskstore.d.256(ptr %a0, <8 x i32> %a1, <8 x i32> %a2)
1043  ret void
1044}
1045declare void @llvm.x86.avx2.maskstore.d.256(ptr, <8 x i32>, <8 x i32>) nounwind
1046
1047
1048define <4 x i32> @test_x86_avx2_psllv_d(<4 x i32> %a0, <4 x i32> %a1) {
1049; AVX2-LABEL: test_x86_avx2_psllv_d:
1050; AVX2:       # %bb.0:
1051; AVX2-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x47,0xc1]
1052; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1053;
1054; AVX512VL-LABEL: test_x86_avx2_psllv_d:
1055; AVX512VL:       # %bb.0:
1056; AVX512VL-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x47,0xc1]
1057; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1058  %res = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1059  ret <4 x i32> %res
1060}
1061
1062define <4 x i32> @test_x86_avx2_psllv_d_const() {
1063; X86-AVX-LABEL: test_x86_avx2_psllv_d_const:
1064; X86-AVX:       # %bb.0:
1065; X86-AVX-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [2,9,0,4294967295]
1066; X86-AVX-NEXT:    # encoding: [0xc4,0xe2,0x79,0x21,0x05,A,A,A,A]
1067; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1068; X86-AVX-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x47,0x05,A,A,A,A]
1069; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1070; X86-AVX-NEXT:    vpmovsxbd {{.*#+}} xmm1 = [1,1,1,4294967295]
1071; X86-AVX-NEXT:    # encoding: [0xc4,0xe2,0x79,0x21,0x0d,A,A,A,A]
1072; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1073; X86-AVX-NEXT:    vpsllvd %xmm1, %xmm1, %xmm1 # encoding: [0xc4,0xe2,0x71,0x47,0xc9]
1074; X86-AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1]
1075; X86-AVX-NEXT:    retl # encoding: [0xc3]
1076;
1077; X86-AVX512VL-LABEL: test_x86_avx2_psllv_d_const:
1078; X86-AVX512VL:       # %bb.0:
1079; X86-AVX512VL-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [2,9,0,4294967295]
1080; X86-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0x05,A,A,A,A]
1081; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1082; X86-AVX512VL-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x47,0x05,A,A,A,A]
1083; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1084; X86-AVX512VL-NEXT:    vpmovsxbd {{.*#+}} xmm1 = [1,1,1,4294967295]
1085; X86-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0x0d,A,A,A,A]
1086; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1087; X86-AVX512VL-NEXT:    vpsllvd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x47,0xc9]
1088; X86-AVX512VL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
1089; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
1090;
1091; X64-AVX-LABEL: test_x86_avx2_psllv_d_const:
1092; X64-AVX:       # %bb.0:
1093; X64-AVX-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [2,9,0,4294967295]
1094; X64-AVX-NEXT:    # encoding: [0xc4,0xe2,0x79,0x21,0x05,A,A,A,A]
1095; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1096; X64-AVX-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x47,0x05,A,A,A,A]
1097; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1098; X64-AVX-NEXT:    vpmovsxbd {{.*#+}} xmm1 = [1,1,1,4294967295]
1099; X64-AVX-NEXT:    # encoding: [0xc4,0xe2,0x79,0x21,0x0d,A,A,A,A]
1100; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1101; X64-AVX-NEXT:    vpsllvd %xmm1, %xmm1, %xmm1 # encoding: [0xc4,0xe2,0x71,0x47,0xc9]
1102; X64-AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1]
1103; X64-AVX-NEXT:    retq # encoding: [0xc3]
1104;
1105; X64-AVX512VL-LABEL: test_x86_avx2_psllv_d_const:
1106; X64-AVX512VL:       # %bb.0:
1107; X64-AVX512VL-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [2,9,0,4294967295]
1108; X64-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0x05,A,A,A,A]
1109; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1110; X64-AVX512VL-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x47,0x05,A,A,A,A]
1111; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1112; X64-AVX512VL-NEXT:    vpmovsxbd {{.*#+}} xmm1 = [1,1,1,4294967295]
1113; X64-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0x0d,A,A,A,A]
1114; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1115; X64-AVX512VL-NEXT:    vpsllvd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x47,0xc9]
1116; X64-AVX512VL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
1117; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
1118  %res0 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> <i32 2, i32 9, i32 0, i32 -1>, <4 x i32> <i32 1, i32 0, i32 33, i32 -1>)
1119  %res1 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> <i32 1, i32 1, i32 1, i32 -1>, <4 x i32> <i32 1, i32 1, i32 1,  i32 -1>)
1120  %res2 = add <4 x i32> %res0, %res1
1121  ret <4 x i32> %res2
1122}
1123declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) nounwind readnone
1124
1125
1126define <8 x i32> @test_x86_avx2_psllv_d_256(<8 x i32> %a0, <8 x i32> %a1) {
1127; AVX2-LABEL: test_x86_avx2_psllv_d_256:
1128; AVX2:       # %bb.0:
1129; AVX2-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x47,0xc1]
1130; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1131;
1132; AVX512VL-LABEL: test_x86_avx2_psllv_d_256:
1133; AVX512VL:       # %bb.0:
1134; AVX512VL-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x47,0xc1]
1135; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1136  %res = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
1137  ret <8 x i32> %res
1138}
1139
1140define <8 x i32> @test_x86_avx2_psllv_d_256_const() {
1141; X86-AVX-LABEL: test_x86_avx2_psllv_d_256_const:
1142; X86-AVX:       # %bb.0:
1143; X86-AVX-NEXT:    vpmovsxbd {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0]
1144; X86-AVX-NEXT:    # encoding: [0xc4,0xe2,0x7d,0x21,0x05,A,A,A,A]
1145; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1146; X86-AVX-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x47,0x05,A,A,A,A]
1147; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1148; X86-AVX-NEXT:    vpmovsxbd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295]
1149; X86-AVX-NEXT:    # encoding: [0xc4,0xe2,0x7d,0x21,0x0d,A,A,A,A]
1150; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1151; X86-AVX-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1 # encoding: [0xc4,0xe2,0x75,0x47,0x0d,A,A,A,A]
1152; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1153; X86-AVX-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0xc1]
1154; X86-AVX-NEXT:    retl # encoding: [0xc3]
1155;
1156; X86-AVX512VL-LABEL: test_x86_avx2_psllv_d_256_const:
1157; X86-AVX512VL:       # %bb.0:
1158; X86-AVX512VL-NEXT:    vpmovsxbd {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0]
1159; X86-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x21,0x05,A,A,A,A]
1160; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1161; X86-AVX512VL-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x47,0x05,A,A,A,A]
1162; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1163; X86-AVX512VL-NEXT:    vpmovsxbd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295]
1164; X86-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x21,0x0d,A,A,A,A]
1165; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1166; X86-AVX512VL-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x47,0x0d,A,A,A,A]
1167; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1168; X86-AVX512VL-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1]
1169; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
1170;
1171; X64-AVX-LABEL: test_x86_avx2_psllv_d_256_const:
1172; X64-AVX:       # %bb.0:
1173; X64-AVX-NEXT:    vpmovsxbd {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0]
1174; X64-AVX-NEXT:    # encoding: [0xc4,0xe2,0x7d,0x21,0x05,A,A,A,A]
1175; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1176; X64-AVX-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x47,0x05,A,A,A,A]
1177; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1178; X64-AVX-NEXT:    vpmovsxbd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295]
1179; X64-AVX-NEXT:    # encoding: [0xc4,0xe2,0x7d,0x21,0x0d,A,A,A,A]
1180; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1181; X64-AVX-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # encoding: [0xc4,0xe2,0x75,0x47,0x0d,A,A,A,A]
1182; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1183; X64-AVX-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0xc1]
1184; X64-AVX-NEXT:    retq # encoding: [0xc3]
1185;
1186; X64-AVX512VL-LABEL: test_x86_avx2_psllv_d_256_const:
1187; X64-AVX512VL:       # %bb.0:
1188; X64-AVX512VL-NEXT:    vpmovsxbd {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0]
1189; X64-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x21,0x05,A,A,A,A]
1190; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1191; X64-AVX512VL-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x47,0x05,A,A,A,A]
1192; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1193; X64-AVX512VL-NEXT:    vpmovsxbd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295]
1194; X64-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x21,0x0d,A,A,A,A]
1195; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1196; X64-AVX512VL-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x47,0x0d,A,A,A,A]
1197; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1198; X64-AVX512VL-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1]
1199; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
1200  %res0 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> <i32 2, i32 9, i32 0, i32 -1, i32 3, i32 7, i32 -1, i32 0>, <8 x i32> <i32 1, i32 0, i32 33, i32 -1,i32 2, i32 0, i32 34, i32 -2>)
1201  %res1 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 -1>, <8 x i32> <i32 1, i32 1, i32 1,  i32 1, i32 1, i32 1, i32 1, i32 -1>)
1202  %res2 = add <8 x i32> %res0, %res1
1203  ret <8 x i32> %res2
1204}
1205declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
1206
1207
1208define <2 x i64> @test_x86_avx2_psllv_q(<2 x i64> %a0, <2 x i64> %a1) {
1209; AVX2-LABEL: test_x86_avx2_psllv_q:
1210; AVX2:       # %bb.0:
1211; AVX2-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x47,0xc1]
1212; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1213;
1214; AVX512VL-LABEL: test_x86_avx2_psllv_q:
1215; AVX512VL:       # %bb.0:
1216; AVX512VL-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x47,0xc1]
1217; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1218  %res = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
1219  ret <2 x i64> %res
1220}
1221define <2 x i64> @test_x86_avx2_psllv_q_const() {
1222; X86-AVX-LABEL: test_x86_avx2_psllv_q_const:
1223; X86-AVX:       # %bb.0:
1224; X86-AVX-NEXT:    vpmovsxbq {{.*#+}} xmm0 = [4,18446744073709551615]
1225; X86-AVX-NEXT:    # encoding: [0xc4,0xe2,0x79,0x22,0x05,A,A,A,A]
1226; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1227; X86-AVX-NEXT:    vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x47,0x05,A,A,A,A]
1228; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1229; X86-AVX-NEXT:    retl # encoding: [0xc3]
1230;
1231; X86-AVX512VL-LABEL: test_x86_avx2_psllv_q_const:
1232; X86-AVX512VL:       # %bb.0:
1233; X86-AVX512VL-NEXT:    vpmovsxbq {{.*#+}} xmm0 = [4,18446744073709551615]
1234; X86-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x22,0x05,A,A,A,A]
1235; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1236; X86-AVX512VL-NEXT:    vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x47,0x05,A,A,A,A]
1237; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1238; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
1239;
1240; X64-AVX-LABEL: test_x86_avx2_psllv_q_const:
1241; X64-AVX:       # %bb.0:
1242; X64-AVX-NEXT:    vpmovsxbq {{.*#+}} xmm0 = [4,18446744073709551615]
1243; X64-AVX-NEXT:    # encoding: [0xc4,0xe2,0x79,0x22,0x05,A,A,A,A]
1244; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1245; X64-AVX-NEXT:    vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x47,0x05,A,A,A,A]
1246; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1247; X64-AVX-NEXT:    retq # encoding: [0xc3]
1248;
1249; X64-AVX512VL-LABEL: test_x86_avx2_psllv_q_const:
1250; X64-AVX512VL:       # %bb.0:
1251; X64-AVX512VL-NEXT:    vpmovsxbq {{.*#+}} xmm0 = [4,18446744073709551615]
1252; X64-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x22,0x05,A,A,A,A]
1253; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1254; X64-AVX512VL-NEXT:    vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x47,0x05,A,A,A,A]
1255; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1256; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
1257  %res = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> <i64 4, i64 -1>, <2 x i64> <i64 1, i64 -1>)
1258  ret <2 x i64> %res
1259}
1260declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) nounwind readnone
1261
1262
1263define <4 x i64> @test_x86_avx2_psllv_q_256(<4 x i64> %a0, <4 x i64> %a1) {
1264; AVX2-LABEL: test_x86_avx2_psllv_q_256:
1265; AVX2:       # %bb.0:
1266; AVX2-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0xfd,0x47,0xc1]
1267; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1268;
1269; AVX512VL-LABEL: test_x86_avx2_psllv_q_256:
1270; AVX512VL:       # %bb.0:
1271; AVX512VL-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x47,0xc1]
1272; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1273  %res = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1]
1274  ret <4 x i64> %res
1275}
1276
1277define <4 x i64> @test_x86_avx2_psllv_q_256_const() {
1278; X86-AVX-LABEL: test_x86_avx2_psllv_q_256_const:
1279; X86-AVX:       # %bb.0:
1280; X86-AVX-NEXT:    vpmovsxbq {{.*#+}} ymm0 = [4,4,4,18446744073709551615]
1281; X86-AVX-NEXT:    # encoding: [0xc4,0xe2,0x7d,0x22,0x05,A,A,A,A]
1282; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1283; X86-AVX-NEXT:    vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0xfd,0x47,0x05,A,A,A,A]
1284; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1285; X86-AVX-NEXT:    retl # encoding: [0xc3]
1286;
1287; X86-AVX512VL-LABEL: test_x86_avx2_psllv_q_256_const:
1288; X86-AVX512VL:       # %bb.0:
1289; X86-AVX512VL-NEXT:    vpmovsxbq {{.*#+}} ymm0 = [4,4,4,18446744073709551615]
1290; X86-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x22,0x05,A,A,A,A]
1291; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1292; X86-AVX512VL-NEXT:    vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x47,0x05,A,A,A,A]
1293; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1294; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
1295;
1296; X64-AVX-LABEL: test_x86_avx2_psllv_q_256_const:
1297; X64-AVX:       # %bb.0:
1298; X64-AVX-NEXT:    vpmovsxbq {{.*#+}} ymm0 = [4,4,4,18446744073709551615]
1299; X64-AVX-NEXT:    # encoding: [0xc4,0xe2,0x7d,0x22,0x05,A,A,A,A]
1300; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1301; X64-AVX-NEXT:    vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0xfd,0x47,0x05,A,A,A,A]
1302; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1303; X64-AVX-NEXT:    retq # encoding: [0xc3]
1304;
1305; X64-AVX512VL-LABEL: test_x86_avx2_psllv_q_256_const:
1306; X64-AVX512VL:       # %bb.0:
1307; X64-AVX512VL-NEXT:    vpmovsxbq {{.*#+}} ymm0 = [4,4,4,18446744073709551615]
1308; X64-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x22,0x05,A,A,A,A]
1309; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1310; X64-AVX512VL-NEXT:    vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x47,0x05,A,A,A,A]
1311; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1312; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
1313  %res = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> <i64 4, i64 4, i64 4, i64 -1>, <4 x i64> <i64 1, i64 1, i64 1, i64 -1>)
1314  ret <4 x i64> %res
1315}
1316declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
1317
1318
1319define <4 x i32> @test_x86_avx2_psrlv_d(<4 x i32> %a0, <4 x i32> %a1) {
1320; AVX2-LABEL: test_x86_avx2_psrlv_d:
1321; AVX2:       # %bb.0:
1322; AVX2-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x45,0xc1]
1323; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1324;
1325; AVX512VL-LABEL: test_x86_avx2_psrlv_d:
1326; AVX512VL:       # %bb.0:
1327; AVX512VL-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x45,0xc1]
1328; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1329  %res = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1330  ret <4 x i32> %res
1331}
1332
1333define <4 x i32> @test_x86_avx2_psrlv_d_const() {
1334; X86-AVX-LABEL: test_x86_avx2_psrlv_d_const:
1335; X86-AVX:       # %bb.0:
1336; X86-AVX-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [2,9,0,4294967295]
1337; X86-AVX-NEXT:    # encoding: [0xc4,0xe2,0x79,0x21,0x05,A,A,A,A]
1338; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1339; X86-AVX-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x45,0x05,A,A,A,A]
1340; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1341; X86-AVX-NEXT:    vpmovsxbd {{.*#+}} xmm1 = [4,4,4,4294967295]
1342; X86-AVX-NEXT:    # encoding: [0xc4,0xe2,0x79,0x21,0x0d,A,A,A,A]
1343; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1344; X86-AVX-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 # encoding: [0xc4,0xe2,0x71,0x45,0x0d,A,A,A,A]
1345; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1346; X86-AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1]
1347; X86-AVX-NEXT:    retl # encoding: [0xc3]
1348;
1349; X86-AVX512VL-LABEL: test_x86_avx2_psrlv_d_const:
1350; X86-AVX512VL:       # %bb.0:
1351; X86-AVX512VL-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [2,9,0,4294967295]
1352; X86-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0x05,A,A,A,A]
1353; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1354; X86-AVX512VL-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x45,0x05,A,A,A,A]
1355; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1356; X86-AVX512VL-NEXT:    vpmovsxbd {{.*#+}} xmm1 = [4,4,4,4294967295]
1357; X86-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0x0d,A,A,A,A]
1358; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1359; X86-AVX512VL-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x45,0x0d,A,A,A,A]
1360; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1361; X86-AVX512VL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
1362; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
1363;
1364; X64-AVX-LABEL: test_x86_avx2_psrlv_d_const:
1365; X64-AVX:       # %bb.0:
1366; X64-AVX-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [2,9,0,4294967295]
1367; X64-AVX-NEXT:    # encoding: [0xc4,0xe2,0x79,0x21,0x05,A,A,A,A]
1368; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1369; X64-AVX-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x45,0x05,A,A,A,A]
1370; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1371; X64-AVX-NEXT:    vpmovsxbd {{.*#+}} xmm1 = [4,4,4,4294967295]
1372; X64-AVX-NEXT:    # encoding: [0xc4,0xe2,0x79,0x21,0x0d,A,A,A,A]
1373; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1374; X64-AVX-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # encoding: [0xc4,0xe2,0x71,0x45,0x0d,A,A,A,A]
1375; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1376; X64-AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1]
1377; X64-AVX-NEXT:    retq # encoding: [0xc3]
1378;
1379; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_d_const:
1380; X64-AVX512VL:       # %bb.0:
1381; X64-AVX512VL-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [2,9,0,4294967295]
1382; X64-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0x05,A,A,A,A]
1383; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1384; X64-AVX512VL-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x45,0x05,A,A,A,A]
1385; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1386; X64-AVX512VL-NEXT:    vpmovsxbd {{.*#+}} xmm1 = [4,4,4,4294967295]
1387; X64-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0x0d,A,A,A,A]
1388; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1389; X64-AVX512VL-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x45,0x0d,A,A,A,A]
1390; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1391; X64-AVX512VL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
1392; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
1393  %res0 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> <i32 2, i32 9, i32 0, i32 -1>, <4 x i32> <i32 1, i32 0, i32 33, i32 -1>)
1394  %res1 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> <i32 4, i32 4, i32 4, i32 -1>, <4 x i32> <i32 1, i32 1, i32 1,  i32 -1>)
1395  %res2 = add <4 x i32> %res0, %res1
1396  ret <4 x i32> %res2
1397}
1398declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) nounwind readnone
1399
1400
1401define <8 x i32> @test_x86_avx2_psrlv_d_256(<8 x i32> %a0, <8 x i32> %a1) {
1402; AVX2-LABEL: test_x86_avx2_psrlv_d_256:
1403; AVX2:       # %bb.0:
1404; AVX2-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x45,0xc1]
1405; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1406;
1407; AVX512VL-LABEL: test_x86_avx2_psrlv_d_256:
1408; AVX512VL:       # %bb.0:
1409; AVX512VL-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x45,0xc1]
1410; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1411  %res = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
1412  ret <8 x i32> %res
1413}
1414
1415define <8 x i32> @test_x86_avx2_psrlv_d_256_const() {
1416; X86-AVX-LABEL: test_x86_avx2_psrlv_d_256_const:
1417; X86-AVX:       # %bb.0:
1418; X86-AVX-NEXT:    vpmovsxbd {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0]
1419; X86-AVX-NEXT:    # encoding: [0xc4,0xe2,0x7d,0x21,0x05,A,A,A,A]
1420; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1421; X86-AVX-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x45,0x05,A,A,A,A]
1422; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1423; X86-AVX-NEXT:    vpmovsxbd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295]
1424; X86-AVX-NEXT:    # encoding: [0xc4,0xe2,0x7d,0x21,0x0d,A,A,A,A]
1425; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1426; X86-AVX-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1 # encoding: [0xc4,0xe2,0x75,0x45,0x0d,A,A,A,A]
1427; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1428; X86-AVX-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0xc1]
1429; X86-AVX-NEXT:    retl # encoding: [0xc3]
1430;
1431; X86-AVX512VL-LABEL: test_x86_avx2_psrlv_d_256_const:
1432; X86-AVX512VL:       # %bb.0:
1433; X86-AVX512VL-NEXT:    vpmovsxbd {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0]
1434; X86-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x21,0x05,A,A,A,A]
1435; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1436; X86-AVX512VL-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x45,0x05,A,A,A,A]
1437; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1438; X86-AVX512VL-NEXT:    vpmovsxbd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295]
1439; X86-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x21,0x0d,A,A,A,A]
1440; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1441; X86-AVX512VL-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x45,0x0d,A,A,A,A]
1442; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1443; X86-AVX512VL-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1]
1444; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
1445;
1446; X64-AVX-LABEL: test_x86_avx2_psrlv_d_256_const:
1447; X64-AVX:       # %bb.0:
1448; X64-AVX-NEXT:    vpmovsxbd {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0]
1449; X64-AVX-NEXT:    # encoding: [0xc4,0xe2,0x7d,0x21,0x05,A,A,A,A]
1450; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1451; X64-AVX-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x45,0x05,A,A,A,A]
1452; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1453; X64-AVX-NEXT:    vpmovsxbd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295]
1454; X64-AVX-NEXT:    # encoding: [0xc4,0xe2,0x7d,0x21,0x0d,A,A,A,A]
1455; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1456; X64-AVX-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # encoding: [0xc4,0xe2,0x75,0x45,0x0d,A,A,A,A]
1457; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1458; X64-AVX-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0xc1]
1459; X64-AVX-NEXT:    retq # encoding: [0xc3]
1460;
1461; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_d_256_const:
1462; X64-AVX512VL:       # %bb.0:
1463; X64-AVX512VL-NEXT:    vpmovsxbd {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0]
1464; X64-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x21,0x05,A,A,A,A]
1465; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1466; X64-AVX512VL-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x45,0x05,A,A,A,A]
1467; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1468; X64-AVX512VL-NEXT:    vpmovsxbd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295]
1469; X64-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x21,0x0d,A,A,A,A]
1470; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1471; X64-AVX512VL-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x45,0x0d,A,A,A,A]
1472; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1473; X64-AVX512VL-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1]
1474; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
1475  %res0 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> <i32 2, i32 9, i32 0, i32 -1, i32 3, i32 7, i32 -1, i32 0>, <8 x i32> <i32 1, i32 0, i32 33, i32 -1,i32 2, i32 0, i32 34, i32 -2>)
1476  %res1 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 -1>, <8 x i32> <i32 1, i32 1, i32 1,  i32 1, i32 1, i32 1, i32 1, i32 -1>)
1477  %res2 = add <8 x i32> %res0, %res1
1478  ret <8 x i32> %res2
1479}
1480declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
1481
1482
1483define <2 x i64> @test_x86_avx2_psrlv_q(<2 x i64> %a0, <2 x i64> %a1) {
1484; AVX2-LABEL: test_x86_avx2_psrlv_q:
1485; AVX2:       # %bb.0:
1486; AVX2-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x45,0xc1]
1487; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1488;
1489; AVX512VL-LABEL: test_x86_avx2_psrlv_q:
1490; AVX512VL:       # %bb.0:
1491; AVX512VL-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x45,0xc1]
1492; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1493  %res = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
1494  ret <2 x i64> %res
1495}
1496
1497define <2 x i64> @test_x86_avx2_psrlv_q_const() {
1498; X86-AVX-LABEL: test_x86_avx2_psrlv_q_const:
1499; X86-AVX:       # %bb.0:
1500; X86-AVX-NEXT:    vpmovsxbq {{.*#+}} xmm0 = [4,4]
1501; X86-AVX-NEXT:    # encoding: [0xc4,0xe2,0x79,0x22,0x05,A,A,A,A]
1502; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1503; X86-AVX-NEXT:    vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x45,0x05,A,A,A,A]
1504; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1505; X86-AVX-NEXT:    retl # encoding: [0xc3]
1506;
1507; X86-AVX512VL-LABEL: test_x86_avx2_psrlv_q_const:
1508; X86-AVX512VL:       # %bb.0:
1509; X86-AVX512VL-NEXT:    vpmovsxbq {{.*#+}} xmm0 = [4,4]
1510; X86-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x22,0x05,A,A,A,A]
1511; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1512; X86-AVX512VL-NEXT:    vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x45,0x05,A,A,A,A]
1513; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1514; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
1515;
1516; X64-AVX-LABEL: test_x86_avx2_psrlv_q_const:
1517; X64-AVX:       # %bb.0:
1518; X64-AVX-NEXT:    vpmovsxbq {{.*#+}} xmm0 = [4,4]
1519; X64-AVX-NEXT:    # encoding: [0xc4,0xe2,0x79,0x22,0x05,A,A,A,A]
1520; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1521; X64-AVX-NEXT:    vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x45,0x05,A,A,A,A]
1522; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1523; X64-AVX-NEXT:    retq # encoding: [0xc3]
1524;
1525; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_q_const:
1526; X64-AVX512VL:       # %bb.0:
1527; X64-AVX512VL-NEXT:    vpbroadcastq {{.*#+}} xmm0 = [4,4]
1528; X64-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x05,A,A,A,A]
1529; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1530; X64-AVX512VL-NEXT:    vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x45,0x05,A,A,A,A]
1531; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1532; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
1533  %res = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> <i64 4, i64 4>, <2 x i64> <i64 1, i64 -1>)
1534  ret <2 x i64> %res
1535}
1536declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) nounwind readnone
1537
1538
1539define <4 x i64> @test_x86_avx2_psrlv_q_256(<4 x i64> %a0, <4 x i64> %a1) {
1540; AVX2-LABEL: test_x86_avx2_psrlv_q_256:
1541; AVX2:       # %bb.0:
1542; AVX2-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0xfd,0x45,0xc1]
1543; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1544;
1545; AVX512VL-LABEL: test_x86_avx2_psrlv_q_256:
1546; AVX512VL:       # %bb.0:
1547; AVX512VL-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x45,0xc1]
1548; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1549  %res = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1]
1550  ret <4 x i64> %res
1551}
1552
1553
1554define <4 x i64> @test_x86_avx2_psrlv_q_256_const() {
1555; X86-AVX-LABEL: test_x86_avx2_psrlv_q_256_const:
1556; X86-AVX:       # %bb.0:
1557; X86-AVX-NEXT:    vpmovsxbq {{.*#+}} ymm0 = [4,4,4,4]
1558; X86-AVX-NEXT:    # encoding: [0xc4,0xe2,0x7d,0x22,0x05,A,A,A,A]
1559; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1560; X86-AVX-NEXT:    vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0xfd,0x45,0x05,A,A,A,A]
1561; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1562; X86-AVX-NEXT:    retl # encoding: [0xc3]
1563;
1564; X86-AVX512VL-LABEL: test_x86_avx2_psrlv_q_256_const:
1565; X86-AVX512VL:       # %bb.0:
1566; X86-AVX512VL-NEXT:    vpmovsxbq {{.*#+}} ymm0 = [4,4,4,4]
1567; X86-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x22,0x05,A,A,A,A]
1568; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1569; X86-AVX512VL-NEXT:    vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x45,0x05,A,A,A,A]
1570; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1571; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
1572;
1573; X64-AVX-LABEL: test_x86_avx2_psrlv_q_256_const:
1574; X64-AVX:       # %bb.0:
1575; X64-AVX-NEXT:    vpbroadcastq {{.*#+}} ymm0 = [4,4,4,4]
1576; X64-AVX-NEXT:    # encoding: [0xc4,0xe2,0x7d,0x59,0x05,A,A,A,A]
1577; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1578; X64-AVX-NEXT:    vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0xfd,0x45,0x05,A,A,A,A]
1579; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1580; X64-AVX-NEXT:    retq # encoding: [0xc3]
1581;
1582; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_q_256_const:
1583; X64-AVX512VL:       # %bb.0:
1584; X64-AVX512VL-NEXT:    vpbroadcastq {{.*#+}} ymm0 = [4,4,4,4]
1585; X64-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0x05,A,A,A,A]
1586; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1587; X64-AVX512VL-NEXT:    vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x45,0x05,A,A,A,A]
1588; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1589; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
1590  %res = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> <i64 4, i64 4, i64 4, i64 4>, <4 x i64> <i64 1, i64 1, i64 1, i64 -1>)
1591  ret <4 x i64> %res
1592}
1593declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
1594
1595
1596define <4 x i32> @test_x86_avx2_psrav_d(<4 x i32> %a0, <4 x i32> %a1) {
1597; AVX2-LABEL: test_x86_avx2_psrav_d:
1598; AVX2:       # %bb.0:
1599; AVX2-NEXT:    vpsravd %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x46,0xc1]
1600; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1601;
1602; AVX512VL-LABEL: test_x86_avx2_psrav_d:
1603; AVX512VL:       # %bb.0:
1604; AVX512VL-NEXT:    vpsravd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0xc1]
1605; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1606  %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1607  ret <4 x i32> %res
1608}
1609
1610define <4 x i32> @test_x86_avx2_psrav_d_const() {
1611; X86-AVX-LABEL: test_x86_avx2_psrav_d_const:
1612; X86-AVX:       # %bb.0:
1613; X86-AVX-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [2,9,4294967284,23]
1614; X86-AVX-NEXT:    # encoding: [0xc4,0xe2,0x79,0x21,0x05,A,A,A,A]
1615; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1616; X86-AVX-NEXT:    vpsravd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A]
1617; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1618; X86-AVX-NEXT:    retl # encoding: [0xc3]
1619;
1620; X86-AVX512VL-LABEL: test_x86_avx2_psrav_d_const:
1621; X86-AVX512VL:       # %bb.0:
1622; X86-AVX512VL-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [2,9,4294967284,23]
1623; X86-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0x05,A,A,A,A]
1624; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1625; X86-AVX512VL-NEXT:    vpsravd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A]
1626; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1627; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
1628;
1629; X64-AVX-LABEL: test_x86_avx2_psrav_d_const:
1630; X64-AVX:       # %bb.0:
1631; X64-AVX-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [2,9,4294967284,23]
1632; X64-AVX-NEXT:    # encoding: [0xc4,0xe2,0x79,0x21,0x05,A,A,A,A]
1633; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1634; X64-AVX-NEXT:    vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A]
1635; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1636; X64-AVX-NEXT:    retq # encoding: [0xc3]
1637;
1638; X64-AVX512VL-LABEL: test_x86_avx2_psrav_d_const:
1639; X64-AVX512VL:       # %bb.0:
1640; X64-AVX512VL-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [2,9,4294967284,23]
1641; X64-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0x05,A,A,A,A]
1642; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1643; X64-AVX512VL-NEXT:    vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A]
1644; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1645; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
1646  %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> <i32 2, i32 9, i32 -12, i32 23>, <4 x i32> <i32 1, i32 18, i32 35, i32 52>)
1647  ret <4 x i32> %res
1648}
1649declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone
1650
1651define <8 x i32> @test_x86_avx2_psrav_d_256(<8 x i32> %a0, <8 x i32> %a1) {
1652; AVX2-LABEL: test_x86_avx2_psrav_d_256:
1653; AVX2:       # %bb.0:
1654; AVX2-NEXT:    vpsravd %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x46,0xc1]
1655; AVX2-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1656;
1657; AVX512VL-LABEL: test_x86_avx2_psrav_d_256:
1658; AVX512VL:       # %bb.0:
1659; AVX512VL-NEXT:    vpsravd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0xc1]
1660; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1661  %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
1662  ret <8 x i32> %res
1663}
1664
1665define <8 x i32> @test_x86_avx2_psrav_d_256_const() {
1666; X86-AVX-LABEL: test_x86_avx2_psrav_d_256_const:
1667; X86-AVX:       # %bb.0:
1668; X86-AVX-NEXT:    vpmovsxbd {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
1669; X86-AVX-NEXT:    # encoding: [0xc4,0xe2,0x7d,0x21,0x05,A,A,A,A]
1670; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1671; X86-AVX-NEXT:    vpsravd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]
1672; X86-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1673; X86-AVX-NEXT:    retl # encoding: [0xc3]
1674;
1675; X86-AVX512VL-LABEL: test_x86_avx2_psrav_d_256_const:
1676; X86-AVX512VL:       # %bb.0:
1677; X86-AVX512VL-NEXT:    vpmovsxbd {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
1678; X86-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x21,0x05,A,A,A,A]
1679; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1680; X86-AVX512VL-NEXT:    vpsravd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]
1681; X86-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
1682; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
1683;
1684; X64-AVX-LABEL: test_x86_avx2_psrav_d_256_const:
1685; X64-AVX:       # %bb.0:
1686; X64-AVX-NEXT:    vpmovsxbd {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
1687; X64-AVX-NEXT:    # encoding: [0xc4,0xe2,0x7d,0x21,0x05,A,A,A,A]
1688; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1689; X64-AVX-NEXT:    vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]
1690; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1691; X64-AVX-NEXT:    retq # encoding: [0xc3]
1692;
1693; X64-AVX512VL-LABEL: test_x86_avx2_psrav_d_256_const:
1694; X64-AVX512VL:       # %bb.0:
1695; X64-AVX512VL-NEXT:    vpmovsxbd {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
1696; X64-AVX512VL-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x21,0x05,A,A,A,A]
1697; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1698; X64-AVX512VL-NEXT:    vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]
1699; X64-AVX512VL-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1700; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
1701  %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> <i32 2, i32 9, i32 -12, i32 23, i32 -26, i32 37, i32 -40, i32 51>, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>)
1702  ret <8 x i32> %res
1703}
1704declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone
1705
1706define <2 x double> @test_x86_avx2_gather_d_pd(<2 x double> %a0, ptr %a1, <4 x i32> %idx, <2 x double> %mask) {
1707; X86-LABEL: test_x86_avx2_gather_d_pd:
1708; X86:       # %bb.0:
1709; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1710; X86-NEXT:    vgatherdpd %xmm2, (%eax,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x92,0x04,0x48]
1711; X86-NEXT:    retl # encoding: [0xc3]
1712;
1713; X64-LABEL: test_x86_avx2_gather_d_pd:
1714; X64:       # %bb.0:
1715; X64-NEXT:    vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x92,0x04,0x4f]
1716; X64-NEXT:    retq # encoding: [0xc3]
1717  %res = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %a0,
1718                            ptr %a1, <4 x i32> %idx, <2 x double> %mask, i8 2) ;
1719  ret <2 x double> %res
1720}
1721declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, ptr,
1722                      <4 x i32>, <2 x double>, i8) nounwind readonly
1723
1724define <4 x double> @test_x86_avx2_gather_d_pd_256(<4 x double> %a0, ptr %a1, <4 x i32> %idx, <4 x double> %mask) {
1725; X86-LABEL: test_x86_avx2_gather_d_pd_256:
1726; X86:       # %bb.0:
1727; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1728; X86-NEXT:    vgatherdpd %ymm2, (%eax,%xmm1,2), %ymm0 # encoding: [0xc4,0xe2,0xed,0x92,0x04,0x48]
1729; X86-NEXT:    retl # encoding: [0xc3]
1730;
1731; X64-LABEL: test_x86_avx2_gather_d_pd_256:
1732; X64:       # %bb.0:
1733; X64-NEXT:    vgatherdpd %ymm2, (%rdi,%xmm1,2), %ymm0 # encoding: [0xc4,0xe2,0xed,0x92,0x04,0x4f]
1734; X64-NEXT:    retq # encoding: [0xc3]
1735  %res = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0,
1736                            ptr %a1, <4 x i32> %idx, <4 x double> %mask, i8 2) ;
1737  ret <4 x double> %res
1738}
1739declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, ptr,
1740                      <4 x i32>, <4 x double>, i8) nounwind readonly
1741
1742define <2 x double> @test_x86_avx2_gather_q_pd(<2 x double> %a0, ptr %a1, <2 x i64> %idx, <2 x double> %mask) {
1743; X86-LABEL: test_x86_avx2_gather_q_pd:
1744; X86:       # %bb.0:
1745; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1746; X86-NEXT:    vgatherqpd %xmm2, (%eax,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x93,0x04,0x48]
1747; X86-NEXT:    retl # encoding: [0xc3]
1748;
1749; X64-LABEL: test_x86_avx2_gather_q_pd:
1750; X64:       # %bb.0:
1751; X64-NEXT:    vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x93,0x04,0x4f]
1752; X64-NEXT:    retq # encoding: [0xc3]
1753  %res = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %a0,
1754                            ptr %a1, <2 x i64> %idx, <2 x double> %mask, i8 2) ;
1755  ret <2 x double> %res
1756}
1757declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, ptr,
1758                      <2 x i64>, <2 x double>, i8) nounwind readonly
1759
1760define <4 x double> @test_x86_avx2_gather_q_pd_256(<4 x double> %a0, ptr %a1, <4 x i64> %idx, <4 x double> %mask) {
1761; X86-LABEL: test_x86_avx2_gather_q_pd_256:
1762; X86:       # %bb.0:
1763; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1764; X86-NEXT:    vgatherqpd %ymm2, (%eax,%ymm1,2), %ymm0 # encoding: [0xc4,0xe2,0xed,0x93,0x04,0x48]
1765; X86-NEXT:    retl # encoding: [0xc3]
1766;
1767; X64-LABEL: test_x86_avx2_gather_q_pd_256:
1768; X64:       # %bb.0:
1769; X64-NEXT:    vgatherqpd %ymm2, (%rdi,%ymm1,2), %ymm0 # encoding: [0xc4,0xe2,0xed,0x93,0x04,0x4f]
1770; X64-NEXT:    retq # encoding: [0xc3]
1771  %res = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %a0,
1772                            ptr %a1, <4 x i64> %idx, <4 x double> %mask, i8 2) ;
1773  ret <4 x double> %res
1774}
1775declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, ptr,
1776                      <4 x i64>, <4 x double>, i8) nounwind readonly
1777
1778define <4 x float> @test_x86_avx2_gather_d_ps(<4 x float> %a0, ptr %a1, <4 x i32> %idx, <4 x float> %mask) {
1779; X86-LABEL: test_x86_avx2_gather_d_ps:
1780; X86:       # %bb.0:
1781; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1782; X86-NEXT:    vgatherdps %xmm2, (%eax,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0x69,0x92,0x04,0x48]
1783; X86-NEXT:    retl # encoding: [0xc3]
1784;
1785; X64-LABEL: test_x86_avx2_gather_d_ps:
1786; X64:       # %bb.0:
1787; X64-NEXT:    vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0x69,0x92,0x04,0x4f]
1788; X64-NEXT:    retq # encoding: [0xc3]
1789  %res = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0,
1790                            ptr %a1, <4 x i32> %idx, <4 x float> %mask, i8 2) ;
1791  ret <4 x float> %res
1792}
1793declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, ptr,
1794                      <4 x i32>, <4 x float>, i8) nounwind readonly
1795
1796define <8 x float> @test_x86_avx2_gather_d_ps_256(<8 x float> %a0, ptr %a1, <8 x i32> %idx, <8 x float> %mask) {
1797; X86-LABEL: test_x86_avx2_gather_d_ps_256:
1798; X86:       # %bb.0:
1799; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1800; X86-NEXT:    vgatherdps %ymm2, (%eax,%ymm1,2), %ymm0 # encoding: [0xc4,0xe2,0x6d,0x92,0x04,0x48]
1801; X86-NEXT:    retl # encoding: [0xc3]
1802;
1803; X64-LABEL: test_x86_avx2_gather_d_ps_256:
1804; X64:       # %bb.0:
1805; X64-NEXT:    vgatherdps %ymm2, (%rdi,%ymm1,2), %ymm0 # encoding: [0xc4,0xe2,0x6d,0x92,0x04,0x4f]
1806; X64-NEXT:    retq # encoding: [0xc3]
1807  %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0,
1808                            ptr %a1, <8 x i32> %idx, <8 x float> %mask, i8 2) ;
1809  ret <8 x float> %res
1810}
1811declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, ptr,
1812                      <8 x i32>, <8 x float>, i8) nounwind readonly
1813
1814define <4 x float> @test_x86_avx2_gather_q_ps(<4 x float> %a0, ptr %a1, <2 x i64> %idx, <4 x float> %mask) {
1815; X86-LABEL: test_x86_avx2_gather_q_ps:
1816; X86:       # %bb.0:
1817; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1818; X86-NEXT:    vgatherqps %xmm2, (%eax,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0x69,0x93,0x04,0x48]
1819; X86-NEXT:    retl # encoding: [0xc3]
1820;
1821; X64-LABEL: test_x86_avx2_gather_q_ps:
1822; X64:       # %bb.0:
1823; X64-NEXT:    vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0x69,0x93,0x04,0x4f]
1824; X64-NEXT:    retq # encoding: [0xc3]
1825  %res = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %a0,
1826                            ptr %a1, <2 x i64> %idx, <4 x float> %mask, i8 2) ;
1827  ret <4 x float> %res
1828}
1829declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, ptr,
1830                      <2 x i64>, <4 x float>, i8) nounwind readonly
1831
1832define <4 x float> @test_x86_avx2_gather_q_ps_256(<4 x float> %a0, ptr %a1, <4 x i64> %idx, <4 x float> %mask) {
1833; X86-LABEL: test_x86_avx2_gather_q_ps_256:
1834; X86:       # %bb.0:
1835; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1836; X86-NEXT:    vgatherqps %xmm2, (%eax,%ymm1,2), %xmm0 # encoding: [0xc4,0xe2,0x6d,0x93,0x04,0x48]
1837; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1838; X86-NEXT:    retl # encoding: [0xc3]
1839;
1840; X64-LABEL: test_x86_avx2_gather_q_ps_256:
1841; X64:       # %bb.0:
1842; X64-NEXT:    vgatherqps %xmm2, (%rdi,%ymm1,2), %xmm0 # encoding: [0xc4,0xe2,0x6d,0x93,0x04,0x4f]
1843; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1844; X64-NEXT:    retq # encoding: [0xc3]
1845  %res = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0,
1846                            ptr %a1, <4 x i64> %idx, <4 x float> %mask, i8 2) ;
1847  ret <4 x float> %res
1848}
1849declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, ptr,
1850                      <4 x i64>, <4 x float>, i8) nounwind readonly
1851
1852define <2 x i64> @test_x86_avx2_gather_d_q(<2 x i64> %a0, ptr %a1, <4 x i32> %idx, <2 x i64> %mask) {
1853; X86-LABEL: test_x86_avx2_gather_d_q:
1854; X86:       # %bb.0:
1855; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1856; X86-NEXT:    vpgatherdq %xmm2, (%eax,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x90,0x04,0x48]
1857; X86-NEXT:    retl # encoding: [0xc3]
1858;
1859; X64-LABEL: test_x86_avx2_gather_d_q:
1860; X64:       # %bb.0:
1861; X64-NEXT:    vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x90,0x04,0x4f]
1862; X64-NEXT:    retq # encoding: [0xc3]
1863  %res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0,
1864                            ptr %a1, <4 x i32> %idx, <2 x i64> %mask, i8 2) ;
1865  ret <2 x i64> %res
1866}
1867declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, ptr,
1868                      <4 x i32>, <2 x i64>, i8) nounwind readonly
1869
1870define <4 x i64> @test_x86_avx2_gather_d_q_256(<4 x i64> %a0, ptr %a1, <4 x i32> %idx, <4 x i64> %mask) {
1871; X86-LABEL: test_x86_avx2_gather_d_q_256:
1872; X86:       # %bb.0:
1873; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1874; X86-NEXT:    vpgatherdq %ymm2, (%eax,%xmm1,2), %ymm0 # encoding: [0xc4,0xe2,0xed,0x90,0x04,0x48]
1875; X86-NEXT:    retl # encoding: [0xc3]
1876;
1877; X64-LABEL: test_x86_avx2_gather_d_q_256:
1878; X64:       # %bb.0:
1879; X64-NEXT:    vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # encoding: [0xc4,0xe2,0xed,0x90,0x04,0x4f]
1880; X64-NEXT:    retq # encoding: [0xc3]
1881  %res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0,
1882                            ptr %a1, <4 x i32> %idx, <4 x i64> %mask, i8 2) ;
1883  ret <4 x i64> %res
1884}
1885declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, ptr,
1886                      <4 x i32>, <4 x i64>, i8) nounwind readonly
1887
1888define <2 x i64> @test_x86_avx2_gather_q_q(<2 x i64> %a0, ptr %a1, <2 x i64> %idx, <2 x i64> %mask) {
1889; X86-LABEL: test_x86_avx2_gather_q_q:
1890; X86:       # %bb.0:
1891; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1892; X86-NEXT:    vpgatherqq %xmm2, (%eax,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x91,0x04,0x48]
1893; X86-NEXT:    retl # encoding: [0xc3]
1894;
1895; X64-LABEL: test_x86_avx2_gather_q_q:
1896; X64:       # %bb.0:
1897; X64-NEXT:    vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x91,0x04,0x4f]
1898; X64-NEXT:    retq # encoding: [0xc3]
1899  %res = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0,
1900                            ptr %a1, <2 x i64> %idx, <2 x i64> %mask, i8 2) ;
1901  ret <2 x i64> %res
1902}
1903declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, ptr,
1904                      <2 x i64>, <2 x i64>, i8) nounwind readonly
1905
1906define <4 x i64> @test_x86_avx2_gather_q_q_256(<4 x i64> %a0, ptr %a1, <4 x i64> %idx, <4 x i64> %mask) {
1907; X86-LABEL: test_x86_avx2_gather_q_q_256:
1908; X86:       # %bb.0:
1909; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1910; X86-NEXT:    vpgatherqq %ymm2, (%eax,%ymm1,2), %ymm0 # encoding: [0xc4,0xe2,0xed,0x91,0x04,0x48]
1911; X86-NEXT:    retl # encoding: [0xc3]
1912;
1913; X64-LABEL: test_x86_avx2_gather_q_q_256:
1914; X64:       # %bb.0:
1915; X64-NEXT:    vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # encoding: [0xc4,0xe2,0xed,0x91,0x04,0x4f]
1916; X64-NEXT:    retq # encoding: [0xc3]
1917  %res = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0,
1918                            ptr %a1, <4 x i64> %idx, <4 x i64> %mask, i8 2) ;
1919  ret <4 x i64> %res
1920}
1921declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, ptr,
1922                      <4 x i64>, <4 x i64>, i8) nounwind readonly
1923
1924define <4 x i32> @test_x86_avx2_gather_d_d(<4 x i32> %a0, ptr %a1, <4 x i32> %idx, <4 x i32> %mask) {
1925; X86-LABEL: test_x86_avx2_gather_d_d:
1926; X86:       # %bb.0:
1927; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1928; X86-NEXT:    vpgatherdd %xmm2, (%eax,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0x69,0x90,0x04,0x48]
1929; X86-NEXT:    retl # encoding: [0xc3]
1930;
1931; X64-LABEL: test_x86_avx2_gather_d_d:
1932; X64:       # %bb.0:
1933; X64-NEXT:    vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0x69,0x90,0x04,0x4f]
1934; X64-NEXT:    retq # encoding: [0xc3]
1935  %res = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %a0,
1936                            ptr %a1, <4 x i32> %idx, <4 x i32> %mask, i8 2) ;
1937  ret <4 x i32> %res
1938}
1939declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, ptr,
1940                      <4 x i32>, <4 x i32>, i8) nounwind readonly
1941
1942define <8 x i32> @test_x86_avx2_gather_d_d_256(<8 x i32> %a0, ptr %a1, <8 x i32> %idx, <8 x i32> %mask) {
1943; X86-LABEL: test_x86_avx2_gather_d_d_256:
1944; X86:       # %bb.0:
1945; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1946; X86-NEXT:    vpgatherdd %ymm2, (%eax,%ymm1,2), %ymm0 # encoding: [0xc4,0xe2,0x6d,0x90,0x04,0x48]
1947; X86-NEXT:    retl # encoding: [0xc3]
1948;
1949; X64-LABEL: test_x86_avx2_gather_d_d_256:
1950; X64:       # %bb.0:
1951; X64-NEXT:    vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # encoding: [0xc4,0xe2,0x6d,0x90,0x04,0x4f]
1952; X64-NEXT:    retq # encoding: [0xc3]
1953  %res = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> %a0,
1954                            ptr %a1, <8 x i32> %idx, <8 x i32> %mask, i8 2) ;
1955  ret <8 x i32> %res
1956}
1957declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, ptr,
1958                      <8 x i32>, <8 x i32>, i8) nounwind readonly
1959
1960define <4 x i32> @test_x86_avx2_gather_q_d(<4 x i32> %a0, ptr %a1, <2 x i64> %idx, <4 x i32> %mask) {
1961; X86-LABEL: test_x86_avx2_gather_q_d:
1962; X86:       # %bb.0:
1963; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1964; X86-NEXT:    vpgatherqd %xmm2, (%eax,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0x69,0x91,0x04,0x48]
1965; X86-NEXT:    retl # encoding: [0xc3]
1966;
1967; X64-LABEL: test_x86_avx2_gather_q_d:
1968; X64:       # %bb.0:
1969; X64-NEXT:    vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0x69,0x91,0x04,0x4f]
1970; X64-NEXT:    retq # encoding: [0xc3]
1971  %res = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> %a0,
1972                            ptr %a1, <2 x i64> %idx, <4 x i32> %mask, i8 2) ;
1973  ret <4 x i32> %res
1974}
1975declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, ptr,
1976                      <2 x i64>, <4 x i32>, i8) nounwind readonly
1977
1978define <4 x i32> @test_x86_avx2_gather_q_d_256(<4 x i32> %a0, ptr %a1, <4 x i64> %idx, <4 x i32> %mask) {
1979; X86-LABEL: test_x86_avx2_gather_q_d_256:
1980; X86:       # %bb.0:
1981; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1982; X86-NEXT:    vpgatherqd %xmm2, (%eax,%ymm1,2), %xmm0 # encoding: [0xc4,0xe2,0x6d,0x91,0x04,0x48]
1983; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1984; X86-NEXT:    retl # encoding: [0xc3]
1985;
1986; X64-LABEL: test_x86_avx2_gather_q_d_256:
1987; X64:       # %bb.0:
1988; X64-NEXT:    vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # encoding: [0xc4,0xe2,0x6d,0x91,0x04,0x4f]
1989; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1990; X64-NEXT:    retq # encoding: [0xc3]
1991  %res = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %a0,
1992                            ptr %a1, <4 x i64> %idx, <4 x i32> %mask, i8 2) ;
1993  ret <4 x i32> %res
1994}
1995declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, ptr,
1996                      <4 x i64>, <4 x i32>, i8) nounwind readonly
1997
1998; PR13298
1999define <8 x float>  @test_gather_mask(<8 x float> %a0, ptr %a, <8 x i32> %idx, <8 x float> %mask, ptr nocapture %out) {
2000;; gather with mask
2001; X86-AVX-LABEL: test_gather_mask:
2002; X86-AVX:       # %bb.0:
2003; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
2004; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
2005; X86-AVX-NEXT:    vmovaps %ymm2, %ymm3 # encoding: [0xc5,0xfc,0x28,0xda]
2006; X86-AVX-NEXT:    vgatherdps %ymm3, (%ecx,%ymm1,4), %ymm0 # encoding: [0xc4,0xe2,0x65,0x92,0x04,0x89]
2007; X86-AVX-NEXT:    vmovups %ymm2, (%eax) # encoding: [0xc5,0xfc,0x11,0x10]
2008; X86-AVX-NEXT:    retl # encoding: [0xc3]
2009;
2010; X86-AVX512VL-LABEL: test_gather_mask:
2011; X86-AVX512VL:       # %bb.0:
2012; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
2013; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
2014; X86-AVX512VL-NEXT:    vmovaps %ymm2, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xda]
2015; X86-AVX512VL-NEXT:    vgatherdps %ymm3, (%ecx,%ymm1,4), %ymm0 # encoding: [0xc4,0xe2,0x65,0x92,0x04,0x89]
2016; X86-AVX512VL-NEXT:    vmovups %ymm2, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x10]
2017; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
2018;
2019; X64-AVX-LABEL: test_gather_mask:
2020; X64-AVX:       # %bb.0:
2021; X64-AVX-NEXT:    vmovaps %ymm2, %ymm3 # encoding: [0xc5,0xfc,0x28,0xda]
2022; X64-AVX-NEXT:    vgatherdps %ymm3, (%rdi,%ymm1,4), %ymm0 # encoding: [0xc4,0xe2,0x65,0x92,0x04,0x8f]
2023; X64-AVX-NEXT:    vmovups %ymm2, (%rsi) # encoding: [0xc5,0xfc,0x11,0x16]
2024; X64-AVX-NEXT:    retq # encoding: [0xc3]
2025;
2026; X64-AVX512VL-LABEL: test_gather_mask:
2027; X64-AVX512VL:       # %bb.0:
2028; X64-AVX512VL-NEXT:    vmovaps %ymm2, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xda]
2029; X64-AVX512VL-NEXT:    vgatherdps %ymm3, (%rdi,%ymm1,4), %ymm0 # encoding: [0xc4,0xe2,0x65,0x92,0x04,0x8f]
2030; X64-AVX512VL-NEXT:    vmovups %ymm2, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x16]
2031; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
2032  %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0,
2033                           ptr %a, <8 x i32> %idx, <8 x float> %mask, i8 4) ;
2034
2035;; for debugging, we'll just dump out the mask
2036  store <8 x float> %mask, ptr %out, align 4
2037
2038  ret <8 x float> %res
2039}
2040
2041define <2 x i64> @test_mask_demanded_bits(<2 x i64> %a0, ptr %a1, <2 x i64> %idx, <2 x i1> %mask) {
2042; X86-AVX-LABEL: test_mask_demanded_bits:
2043; X86-AVX:       # %bb.0:
2044; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2045; X86-AVX-NEXT:    vpsllq $63, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x73,0xf2,0x3f]
2046; X86-AVX-NEXT:    vpgatherqq %xmm2, (%eax,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x91,0x04,0x48]
2047; X86-AVX-NEXT:    retl # encoding: [0xc3]
2048;
2049; X86-AVX512VL-LABEL: test_mask_demanded_bits:
2050; X86-AVX512VL:       # %bb.0:
2051; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2052; X86-AVX512VL-NEXT:    vpsllq $63, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x73,0xf2,0x3f]
2053; X86-AVX512VL-NEXT:    vpgatherqq %xmm2, (%eax,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x91,0x04,0x48]
2054; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
2055;
2056; X64-AVX-LABEL: test_mask_demanded_bits:
2057; X64-AVX:       # %bb.0:
2058; X64-AVX-NEXT:    vpsllq $63, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x73,0xf2,0x3f]
2059; X64-AVX-NEXT:    vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x91,0x04,0x4f]
2060; X64-AVX-NEXT:    retq # encoding: [0xc3]
2061;
2062; X64-AVX512VL-LABEL: test_mask_demanded_bits:
2063; X64-AVX512VL:       # %bb.0:
2064; X64-AVX512VL-NEXT:    vpsllq $63, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x73,0xf2,0x3f]
2065; X64-AVX512VL-NEXT:    vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x91,0x04,0x4f]
2066; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
2067  %mask1 = sext <2 x i1> %mask to <2 x i64>
2068  %res = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0,
2069                            ptr %a1, <2 x i64> %idx, <2 x i64> %mask1, i8 2) ;
2070  ret <2 x i64> %res
2071}
2072