xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/X86/hsub-inseltpoison.ll (revision ac254fc055980219b30821c3717c6b7db0fbbc46)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,SSE
3; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,SLM
4; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
5; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
6; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512
7; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512
8
9;
10; 128-bit vectors
11;
12
13define <2 x double> @test_v2f64(<2 x double> %a, <2 x double> %b) {
14; CHECK-LABEL: @test_v2f64(
15; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x i32> <i32 0, i32 2>
16; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
17; CHECK-NEXT:    [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]]
18; CHECK-NEXT:    ret <2 x double> [[TMP3]]
19;
20  %a0 = extractelement <2 x double> %a, i32 0
21  %a1 = extractelement <2 x double> %a, i32 1
22  %b0 = extractelement <2 x double> %b, i32 0
23  %b1 = extractelement <2 x double> %b, i32 1
24  %r0 = fsub double %a0, %a1
25  %r1 = fsub double %b0, %b1
26  %r00 = insertelement <2 x double> poison, double %r0, i32 0
27  %r01 = insertelement <2 x double>  %r00, double %r1, i32 1
28  ret <2 x double> %r01
29}
30
31define <4 x float> @test_v4f32(<4 x float> %a, <4 x float> %b) {
32; CHECK-LABEL: @test_v4f32(
33; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
34; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
35; CHECK-NEXT:    [[TMP3:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]]
36; CHECK-NEXT:    ret <4 x float> [[TMP3]]
37;
38  %a0 = extractelement <4 x float> %a, i32 0
39  %a1 = extractelement <4 x float> %a, i32 1
40  %a2 = extractelement <4 x float> %a, i32 2
41  %a3 = extractelement <4 x float> %a, i32 3
42  %b0 = extractelement <4 x float> %b, i32 0
43  %b1 = extractelement <4 x float> %b, i32 1
44  %b2 = extractelement <4 x float> %b, i32 2
45  %b3 = extractelement <4 x float> %b, i32 3
46  %r0 = fsub float %a0, %a1
47  %r1 = fsub float %a2, %a3
48  %r2 = fsub float %b0, %b1
49  %r3 = fsub float %b2, %b3
50  %r00 = insertelement <4 x float> poison, float %r0, i32 0
51  %r01 = insertelement <4 x float>  %r00, float %r1, i32 1
52  %r02 = insertelement <4 x float>  %r01, float %r2, i32 2
53  %r03 = insertelement <4 x float>  %r02, float %r3, i32 3
54  ret <4 x float> %r03
55}
56
57define <2 x i64> @test_v2i64(<2 x i64> %a, <2 x i64> %b) {
58; CHECK-LABEL: @test_v2i64(
59; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], <2 x i32> <i32 0, i32 2>
60; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> <i32 1, i32 3>
61; CHECK-NEXT:    [[TMP3:%.*]] = sub <2 x i64> [[TMP1]], [[TMP2]]
62; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
63;
64  %a0 = extractelement <2 x i64> %a, i32 0
65  %a1 = extractelement <2 x i64> %a, i32 1
66  %b0 = extractelement <2 x i64> %b, i32 0
67  %b1 = extractelement <2 x i64> %b, i32 1
68  %r0 = sub i64 %a0, %a1
69  %r1 = sub i64 %b0, %b1
70  %r00 = insertelement <2 x i64> poison, i64 %r0, i32 0
71  %r01 = insertelement <2 x i64>  %r00, i64 %r1, i32 1
72  ret <2 x i64> %r01
73}
74
75define <4 x i32> @test_v4i32(<4 x i32> %a, <4 x i32> %b) {
76; CHECK-LABEL: @test_v4i32(
77; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
78; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
79; CHECK-NEXT:    [[TMP3:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]]
80; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
81;
82  %a0 = extractelement <4 x i32> %a, i32 0
83  %a1 = extractelement <4 x i32> %a, i32 1
84  %a2 = extractelement <4 x i32> %a, i32 2
85  %a3 = extractelement <4 x i32> %a, i32 3
86  %b0 = extractelement <4 x i32> %b, i32 0
87  %b1 = extractelement <4 x i32> %b, i32 1
88  %b2 = extractelement <4 x i32> %b, i32 2
89  %b3 = extractelement <4 x i32> %b, i32 3
90  %r0 = sub i32 %a0, %a1
91  %r1 = sub i32 %a2, %a3
92  %r2 = sub i32 %b0, %b1
93  %r3 = sub i32 %b2, %b3
94  %r00 = insertelement <4 x i32> poison, i32 %r0, i32 0
95  %r01 = insertelement <4 x i32>  %r00, i32 %r1, i32 1
96  %r02 = insertelement <4 x i32>  %r01, i32 %r2, i32 2
97  %r03 = insertelement <4 x i32>  %r02, i32 %r3, i32 3
98  ret <4 x i32> %r03
99}
100
101define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) {
102; CHECK-LABEL: @test_v8i16(
103; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
104; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
105; CHECK-NEXT:    [[TMP3:%.*]] = sub <8 x i16> [[TMP1]], [[TMP2]]
106; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
107;
108  %a0 = extractelement <8 x i16> %a, i32 0
109  %a1 = extractelement <8 x i16> %a, i32 1
110  %a2 = extractelement <8 x i16> %a, i32 2
111  %a3 = extractelement <8 x i16> %a, i32 3
112  %a4 = extractelement <8 x i16> %a, i32 4
113  %a5 = extractelement <8 x i16> %a, i32 5
114  %a6 = extractelement <8 x i16> %a, i32 6
115  %a7 = extractelement <8 x i16> %a, i32 7
116  %b0 = extractelement <8 x i16> %b, i32 0
117  %b1 = extractelement <8 x i16> %b, i32 1
118  %b2 = extractelement <8 x i16> %b, i32 2
119  %b3 = extractelement <8 x i16> %b, i32 3
120  %b4 = extractelement <8 x i16> %b, i32 4
121  %b5 = extractelement <8 x i16> %b, i32 5
122  %b6 = extractelement <8 x i16> %b, i32 6
123  %b7 = extractelement <8 x i16> %b, i32 7
124  %r0 = sub i16 %a0, %a1
125  %r1 = sub i16 %a2, %a3
126  %r2 = sub i16 %a4, %a5
127  %r3 = sub i16 %a6, %a7
128  %r4 = sub i16 %b0, %b1
129  %r5 = sub i16 %b2, %b3
130  %r6 = sub i16 %b4, %b5
131  %r7 = sub i16 %b6, %b7
132  %r00 = insertelement <8 x i16> poison, i16 %r0, i32 0
133  %r01 = insertelement <8 x i16>  %r00, i16 %r1, i32 1
134  %r02 = insertelement <8 x i16>  %r01, i16 %r2, i32 2
135  %r03 = insertelement <8 x i16>  %r02, i16 %r3, i32 3
136  %r04 = insertelement <8 x i16>  %r03, i16 %r4, i32 4
137  %r05 = insertelement <8 x i16>  %r04, i16 %r5, i32 5
138  %r06 = insertelement <8 x i16>  %r05, i16 %r6, i32 6
139  %r07 = insertelement <8 x i16>  %r06, i16 %r7, i32 7
140  ret <8 x i16> %r07
141}
142
143;
144; 256-bit vectors
145;
146
147define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) {
148; SSE-LABEL: @test_v4f64(
149; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4>
150; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 2, i32 6>
151; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5>
152; SSE-NEXT:    [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 3, i32 7>
153; SSE-NEXT:    [[TMP5:%.*]] = fsub <2 x double> [[TMP1]], [[TMP3]]
154; SSE-NEXT:    [[TMP6:%.*]] = fsub <2 x double> [[TMP2]], [[TMP4]]
155; SSE-NEXT:    [[TMP7:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
156; SSE-NEXT:    ret <4 x double> [[TMP7]]
157;
158; SLM-LABEL: @test_v4f64(
159; SLM-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4>
160; SLM-NEXT:    [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 2, i32 6>
161; SLM-NEXT:    [[TMP3:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5>
162; SLM-NEXT:    [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 3, i32 7>
163; SLM-NEXT:    [[TMP5:%.*]] = fsub <2 x double> [[TMP1]], [[TMP3]]
164; SLM-NEXT:    [[TMP6:%.*]] = fsub <2 x double> [[TMP2]], [[TMP4]]
165; SLM-NEXT:    [[TMP7:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
166; SLM-NEXT:    ret <4 x double> [[TMP7]]
167;
168; AVX-LABEL: @test_v4f64(
169; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
170; AVX-NEXT:    [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
171; AVX-NEXT:    [[TMP3:%.*]] = fsub <4 x double> [[TMP1]], [[TMP2]]
172; AVX-NEXT:    ret <4 x double> [[TMP3]]
173;
174  %a0 = extractelement <4 x double> %a, i32 0
175  %a1 = extractelement <4 x double> %a, i32 1
176  %a2 = extractelement <4 x double> %a, i32 2
177  %a3 = extractelement <4 x double> %a, i32 3
178  %b0 = extractelement <4 x double> %b, i32 0
179  %b1 = extractelement <4 x double> %b, i32 1
180  %b2 = extractelement <4 x double> %b, i32 2
181  %b3 = extractelement <4 x double> %b, i32 3
182  %r0 = fsub double %a0, %a1
183  %r1 = fsub double %b0, %b1
184  %r2 = fsub double %a2, %a3
185  %r3 = fsub double %b2, %b3
186  %r00 = insertelement <4 x double> poison, double %r0, i32 0
187  %r01 = insertelement <4 x double>  %r00, double %r1, i32 1
188  %r02 = insertelement <4 x double>  %r01, double %r2, i32 2
189  %r03 = insertelement <4 x double>  %r02, double %r3, i32 3
190  ret <4 x double> %r03
191}
192
193define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b) {
194; SSE-LABEL: @test_v8f32(
195; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 8, i32 10>
196; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14>
197; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 9, i32 11>
198; SSE-NEXT:    [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15>
199; SSE-NEXT:    [[TMP5:%.*]] = fsub <4 x float> [[TMP1]], [[TMP3]]
200; SSE-NEXT:    [[TMP6:%.*]] = fsub <4 x float> [[TMP2]], [[TMP4]]
201; SSE-NEXT:    [[TMP7:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
202; SSE-NEXT:    ret <8 x float> [[TMP7]]
203;
204; SLM-LABEL: @test_v8f32(
205; SLM-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 8, i32 10>
206; SLM-NEXT:    [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14>
207; SLM-NEXT:    [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 9, i32 11>
208; SLM-NEXT:    [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15>
209; SLM-NEXT:    [[TMP5:%.*]] = fsub <4 x float> [[TMP1]], [[TMP3]]
210; SLM-NEXT:    [[TMP6:%.*]] = fsub <4 x float> [[TMP2]], [[TMP4]]
211; SLM-NEXT:    [[TMP7:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
212; SLM-NEXT:    ret <8 x float> [[TMP7]]
213;
214; AVX-LABEL: @test_v8f32(
215; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
216; AVX-NEXT:    [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
217; AVX-NEXT:    [[TMP3:%.*]] = fsub <8 x float> [[TMP1]], [[TMP2]]
218; AVX-NEXT:    ret <8 x float> [[TMP3]]
219;
220  %a0 = extractelement <8 x float> %a, i32 0
221  %a1 = extractelement <8 x float> %a, i32 1
222  %a2 = extractelement <8 x float> %a, i32 2
223  %a3 = extractelement <8 x float> %a, i32 3
224  %a4 = extractelement <8 x float> %a, i32 4
225  %a5 = extractelement <8 x float> %a, i32 5
226  %a6 = extractelement <8 x float> %a, i32 6
227  %a7 = extractelement <8 x float> %a, i32 7
228  %b0 = extractelement <8 x float> %b, i32 0
229  %b1 = extractelement <8 x float> %b, i32 1
230  %b2 = extractelement <8 x float> %b, i32 2
231  %b3 = extractelement <8 x float> %b, i32 3
232  %b4 = extractelement <8 x float> %b, i32 4
233  %b5 = extractelement <8 x float> %b, i32 5
234  %b6 = extractelement <8 x float> %b, i32 6
235  %b7 = extractelement <8 x float> %b, i32 7
236  %r0 = fsub float %a0, %a1
237  %r1 = fsub float %a2, %a3
238  %r2 = fsub float %b0, %b1
239  %r3 = fsub float %b2, %b3
240  %r4 = fsub float %a4, %a5
241  %r5 = fsub float %a6, %a7
242  %r6 = fsub float %b4, %b5
243  %r7 = fsub float %b6, %b7
244  %r00 = insertelement <8 x float> poison, float %r0, i32 0
245  %r01 = insertelement <8 x float>  %r00, float %r1, i32 1
246  %r02 = insertelement <8 x float>  %r01, float %r2, i32 2
247  %r03 = insertelement <8 x float>  %r02, float %r3, i32 3
248  %r04 = insertelement <8 x float>  %r03, float %r4, i32 4
249  %r05 = insertelement <8 x float>  %r04, float %r5, i32 5
250  %r06 = insertelement <8 x float>  %r05, float %r6, i32 6
251  %r07 = insertelement <8 x float>  %r06, float %r7, i32 7
252  ret <8 x float> %r07
253}
254
255define <4 x i64> @test_v4i64(<4 x i64> %a, <4 x i64> %b) {
256; SSE-LABEL: @test_v4i64(
257; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <2 x i32> <i32 0, i32 4>
258; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 2, i32 6>
259; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 1, i32 5>
260; SSE-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 3, i32 7>
261; SSE-NEXT:    [[TMP5:%.*]] = sub <2 x i64> [[TMP1]], [[TMP3]]
262; SSE-NEXT:    [[TMP6:%.*]] = sub <2 x i64> [[TMP2]], [[TMP4]]
263; SSE-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
264; SSE-NEXT:    ret <4 x i64> [[TMP7]]
265;
266; SLM-LABEL: @test_v4i64(
267; SLM-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <2 x i32> <i32 0, i32 4>
268; SLM-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 2, i32 6>
269; SLM-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 1, i32 5>
270; SLM-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 3, i32 7>
271; SLM-NEXT:    [[TMP5:%.*]] = sub <2 x i64> [[TMP1]], [[TMP3]]
272; SLM-NEXT:    [[TMP6:%.*]] = sub <2 x i64> [[TMP2]], [[TMP4]]
273; SLM-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
274; SLM-NEXT:    ret <4 x i64> [[TMP7]]
275;
276; AVX-LABEL: @test_v4i64(
277; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
278; AVX-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
279; AVX-NEXT:    [[TMP3:%.*]] = sub <4 x i64> [[TMP1]], [[TMP2]]
280; AVX-NEXT:    ret <4 x i64> [[TMP3]]
281;
282  %a0 = extractelement <4 x i64> %a, i32 0
283  %a1 = extractelement <4 x i64> %a, i32 1
284  %a2 = extractelement <4 x i64> %a, i32 2
285  %a3 = extractelement <4 x i64> %a, i32 3
286  %b0 = extractelement <4 x i64> %b, i32 0
287  %b1 = extractelement <4 x i64> %b, i32 1
288  %b2 = extractelement <4 x i64> %b, i32 2
289  %b3 = extractelement <4 x i64> %b, i32 3
290  %r0 = sub i64 %a0, %a1
291  %r1 = sub i64 %b0, %b1
292  %r2 = sub i64 %a2, %a3
293  %r3 = sub i64 %b2, %b3
294  %r00 = insertelement <4 x i64> poison, i64 %r0, i32 0
295  %r01 = insertelement <4 x i64>  %r00, i64 %r1, i32 1
296  %r02 = insertelement <4 x i64>  %r01, i64 %r2, i32 2
297  %r03 = insertelement <4 x i64>  %r02, i64 %r3, i32 3
298  ret <4 x i64> %r03
299}
300
301define <8 x i32> @test_v8i32(<8 x i32> %a, <8 x i32> %b) {
302; SSE-LABEL: @test_v8i32(
303; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 8, i32 10>
304; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14>
305; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 9, i32 11>
306; SSE-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15>
307; SSE-NEXT:    [[TMP5:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
308; SSE-NEXT:    [[TMP6:%.*]] = sub <4 x i32> [[TMP2]], [[TMP4]]
309; SSE-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
310; SSE-NEXT:    ret <8 x i32> [[TMP7]]
311;
312; SLM-LABEL: @test_v8i32(
313; SLM-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 8, i32 10>
314; SLM-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14>
315; SLM-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 9, i32 11>
316; SLM-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15>
317; SLM-NEXT:    [[TMP5:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
318; SLM-NEXT:    [[TMP6:%.*]] = sub <4 x i32> [[TMP2]], [[TMP4]]
319; SLM-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
320; SLM-NEXT:    ret <8 x i32> [[TMP7]]
321;
322; AVX-LABEL: @test_v8i32(
323; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
324; AVX-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
325; AVX-NEXT:    [[TMP3:%.*]] = sub <8 x i32> [[TMP1]], [[TMP2]]
326; AVX-NEXT:    ret <8 x i32> [[TMP3]]
327;
328  %a0 = extractelement <8 x i32> %a, i32 0
329  %a1 = extractelement <8 x i32> %a, i32 1
330  %a2 = extractelement <8 x i32> %a, i32 2
331  %a3 = extractelement <8 x i32> %a, i32 3
332  %a4 = extractelement <8 x i32> %a, i32 4
333  %a5 = extractelement <8 x i32> %a, i32 5
334  %a6 = extractelement <8 x i32> %a, i32 6
335  %a7 = extractelement <8 x i32> %a, i32 7
336  %b0 = extractelement <8 x i32> %b, i32 0
337  %b1 = extractelement <8 x i32> %b, i32 1
338  %b2 = extractelement <8 x i32> %b, i32 2
339  %b3 = extractelement <8 x i32> %b, i32 3
340  %b4 = extractelement <8 x i32> %b, i32 4
341  %b5 = extractelement <8 x i32> %b, i32 5
342  %b6 = extractelement <8 x i32> %b, i32 6
343  %b7 = extractelement <8 x i32> %b, i32 7
344  %r0 = sub i32 %a0, %a1
345  %r1 = sub i32 %a2, %a3
346  %r2 = sub i32 %b0, %b1
347  %r3 = sub i32 %b2, %b3
348  %r4 = sub i32 %a4, %a5
349  %r5 = sub i32 %a6, %a7
350  %r6 = sub i32 %b4, %b5
351  %r7 = sub i32 %b6, %b7
352  %r00 = insertelement <8 x i32> poison, i32 %r0, i32 0
353  %r01 = insertelement <8 x i32>  %r00, i32 %r1, i32 1
354  %r02 = insertelement <8 x i32>  %r01, i32 %r2, i32 2
355  %r03 = insertelement <8 x i32>  %r02, i32 %r3, i32 3
356  %r04 = insertelement <8 x i32>  %r03, i32 %r4, i32 4
357  %r05 = insertelement <8 x i32>  %r04, i32 %r5, i32 5
358  %r06 = insertelement <8 x i32>  %r05, i32 %r6, i32 6
359  %r07 = insertelement <8 x i32>  %r06, i32 %r7, i32 7
360  ret <8 x i32> %r07
361}
362
363define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) {
364; SSE-LABEL: @test_v16i16(
365; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22>
366; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
367; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23>
368; SSE-NEXT:    [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
369; SSE-NEXT:    [[TMP5:%.*]] = sub <8 x i16> [[TMP1]], [[TMP3]]
370; SSE-NEXT:    [[TMP6:%.*]] = sub <8 x i16> [[TMP2]], [[TMP4]]
371; SSE-NEXT:    [[TMP7:%.*]] = shufflevector <8 x i16> [[TMP5]], <8 x i16> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
372; SSE-NEXT:    ret <16 x i16> [[TMP7]]
373;
374; SLM-LABEL: @test_v16i16(
375; SLM-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22>
376; SLM-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
377; SLM-NEXT:    [[TMP3:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23>
378; SLM-NEXT:    [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
379; SLM-NEXT:    [[TMP5:%.*]] = sub <8 x i16> [[TMP1]], [[TMP3]]
380; SLM-NEXT:    [[TMP6:%.*]] = sub <8 x i16> [[TMP2]], [[TMP4]]
381; SLM-NEXT:    [[TMP7:%.*]] = shufflevector <8 x i16> [[TMP5]], <8 x i16> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
382; SLM-NEXT:    ret <16 x i16> [[TMP7]]
383;
384; AVX-LABEL: @test_v16i16(
385; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
386; AVX-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
387; AVX-NEXT:    [[TMP3:%.*]] = sub <16 x i16> [[TMP1]], [[TMP2]]
388; AVX-NEXT:    ret <16 x i16> [[TMP3]]
389;
390  %a0  = extractelement <16 x i16> %a, i32 0
391  %a1  = extractelement <16 x i16> %a, i32 1
392  %a2  = extractelement <16 x i16> %a, i32 2
393  %a3  = extractelement <16 x i16> %a, i32 3
394  %a4  = extractelement <16 x i16> %a, i32 4
395  %a5  = extractelement <16 x i16> %a, i32 5
396  %a6  = extractelement <16 x i16> %a, i32 6
397  %a7  = extractelement <16 x i16> %a, i32 7
398  %a8  = extractelement <16 x i16> %a, i32 8
399  %a9  = extractelement <16 x i16> %a, i32 9
400  %a10 = extractelement <16 x i16> %a, i32 10
401  %a11 = extractelement <16 x i16> %a, i32 11
402  %a12 = extractelement <16 x i16> %a, i32 12
403  %a13 = extractelement <16 x i16> %a, i32 13
404  %a14 = extractelement <16 x i16> %a, i32 14
405  %a15 = extractelement <16 x i16> %a, i32 15
406  %b0  = extractelement <16 x i16> %b, i32 0
407  %b1  = extractelement <16 x i16> %b, i32 1
408  %b2  = extractelement <16 x i16> %b, i32 2
409  %b3  = extractelement <16 x i16> %b, i32 3
410  %b4  = extractelement <16 x i16> %b, i32 4
411  %b5  = extractelement <16 x i16> %b, i32 5
412  %b6  = extractelement <16 x i16> %b, i32 6
413  %b7  = extractelement <16 x i16> %b, i32 7
414  %b8  = extractelement <16 x i16> %b, i32 8
415  %b9  = extractelement <16 x i16> %b, i32 9
416  %b10 = extractelement <16 x i16> %b, i32 10
417  %b11 = extractelement <16 x i16> %b, i32 11
418  %b12 = extractelement <16 x i16> %b, i32 12
419  %b13 = extractelement <16 x i16> %b, i32 13
420  %b14 = extractelement <16 x i16> %b, i32 14
421  %b15 = extractelement <16 x i16> %b, i32 15
422  %r0  = sub i16 %a0 , %a1
423  %r1  = sub i16 %a2 , %a3
424  %r2  = sub i16 %a4 , %a5
425  %r3  = sub i16 %a6 , %a7
426  %r4  = sub i16 %b0 , %b1
427  %r5  = sub i16 %b2 , %b3
428  %r6  = sub i16 %b4 , %b5
429  %r7  = sub i16 %b6 , %b7
430  %r8  = sub i16 %a8 , %a9
431  %r9  = sub i16 %a10, %a11
432  %r10 = sub i16 %a12, %a13
433  %r11 = sub i16 %a14, %a15
434  %r12 = sub i16 %b8 , %b9
435  %r13 = sub i16 %b10, %b11
436  %r14 = sub i16 %b12, %b13
437  %r15 = sub i16 %b14, %b15
438  %rv0  = insertelement <16 x i16> poison, i16 %r0 , i32 0
439  %rv1  = insertelement <16 x i16> %rv0 , i16 %r1 , i32 1
440  %rv2  = insertelement <16 x i16> %rv1 , i16 %r2 , i32 2
441  %rv3  = insertelement <16 x i16> %rv2 , i16 %r3 , i32 3
442  %rv4  = insertelement <16 x i16> %rv3 , i16 %r4 , i32 4
443  %rv5  = insertelement <16 x i16> %rv4 , i16 %r5 , i32 5
444  %rv6  = insertelement <16 x i16> %rv5 , i16 %r6 , i32 6
445  %rv7  = insertelement <16 x i16> %rv6 , i16 %r7 , i32 7
446  %rv8  = insertelement <16 x i16> %rv7 , i16 %r8 , i32 8
447  %rv9  = insertelement <16 x i16> %rv8 , i16 %r9 , i32 9
448  %rv10 = insertelement <16 x i16> %rv9 , i16 %r10, i32 10
449  %rv11 = insertelement <16 x i16> %rv10, i16 %r11, i32 11
450  %rv12 = insertelement <16 x i16> %rv11, i16 %r12, i32 12
451  %rv13 = insertelement <16 x i16> %rv12, i16 %r13, i32 13
452  %rv14 = insertelement <16 x i16> %rv13, i16 %r14, i32 14
453  %rv15 = insertelement <16 x i16> %rv14, i16 %r15, i32 15
454  ret <16 x i16> %rv15
455}
456;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
457; AVX1: {{.*}}
458; AVX2: {{.*}}
459; AVX512: {{.*}}
460