xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/X86/sext.ll (revision 41afef9066eec8daf517ac357a628cdf30c95e39)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE
3; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE
4; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
5; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
6; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
7; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+avx512bw -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
8
9;
10; vXi8
11;
12
13define <2 x i64> @loadext_2i8_to_2i64(ptr %p0) {
14; SSE-LABEL: @loadext_2i8_to_2i64(
15; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
16; SSE-NEXT:    [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i64>
17; SSE-NEXT:    ret <2 x i64> [[TMP2]]
18;
19; AVX-LABEL: @loadext_2i8_to_2i64(
20; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
21; AVX-NEXT:    [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64>
22; AVX-NEXT:    ret <2 x i64> [[TMP3]]
23;
24  %p1 = getelementptr inbounds i8, ptr %p0, i64 1
25  %i0 = load i8, ptr %p0, align 1
26  %i1 = load i8, ptr %p1, align 1
27  %x0 = sext i8 %i0 to i64
28  %x1 = sext i8 %i1 to i64
29  %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0
30  %v1 = insertelement <2 x i64>   %v0, i64 %x1, i32 1
31  ret <2 x i64> %v1
32}
33
34define <4 x i32> @loadext_4i8_to_4i32(ptr %p0) {
35; SSE-LABEL: @loadext_4i8_to_4i32(
36; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
37; SSE-NEXT:    [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32>
38; SSE-NEXT:    ret <4 x i32> [[TMP3]]
39;
40; AVX-LABEL: @loadext_4i8_to_4i32(
41; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
42; AVX-NEXT:    [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32>
43; AVX-NEXT:    ret <4 x i32> [[TMP3]]
44;
45  %p1 = getelementptr inbounds i8, ptr %p0, i64 1
46  %p2 = getelementptr inbounds i8, ptr %p0, i64 2
47  %p3 = getelementptr inbounds i8, ptr %p0, i64 3
48  %i0 = load i8, ptr %p0, align 1
49  %i1 = load i8, ptr %p1, align 1
50  %i2 = load i8, ptr %p2, align 1
51  %i3 = load i8, ptr %p3, align 1
52  %x0 = sext i8 %i0 to i32
53  %x1 = sext i8 %i1 to i32
54  %x2 = sext i8 %i2 to i32
55  %x3 = sext i8 %i3 to i32
56  %v0 = insertelement <4 x i32> undef, i32 %x0, i32 0
57  %v1 = insertelement <4 x i32>   %v0, i32 %x1, i32 1
58  %v2 = insertelement <4 x i32>   %v1, i32 %x2, i32 2
59  %v3 = insertelement <4 x i32>   %v2, i32 %x3, i32 3
60  ret <4 x i32> %v3
61}
62
63define <4 x i64> @loadext_4i8_to_4i64(ptr %p0) {
64; SSE-LABEL: @loadext_4i8_to_4i64(
65; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
66; SSE-NEXT:    [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i64>
67; SSE-NEXT:    ret <4 x i64> [[TMP3]]
68;
69; AVX-LABEL: @loadext_4i8_to_4i64(
70; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
71; AVX-NEXT:    [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i64>
72; AVX-NEXT:    ret <4 x i64> [[TMP3]]
73;
74  %p1 = getelementptr inbounds i8, ptr %p0, i64 1
75  %p2 = getelementptr inbounds i8, ptr %p0, i64 2
76  %p3 = getelementptr inbounds i8, ptr %p0, i64 3
77  %i0 = load i8, ptr %p0, align 1
78  %i1 = load i8, ptr %p1, align 1
79  %i2 = load i8, ptr %p2, align 1
80  %i3 = load i8, ptr %p3, align 1
81  %x0 = sext i8 %i0 to i64
82  %x1 = sext i8 %i1 to i64
83  %x2 = sext i8 %i2 to i64
84  %x3 = sext i8 %i3 to i64
85  %v0 = insertelement <4 x i64> undef, i64 %x0, i32 0
86  %v1 = insertelement <4 x i64>   %v0, i64 %x1, i32 1
87  %v2 = insertelement <4 x i64>   %v1, i64 %x2, i32 2
88  %v3 = insertelement <4 x i64>   %v2, i64 %x3, i32 3
89  ret <4 x i64> %v3
90}
91
92define <8 x i16> @loadext_8i8_to_8i16(ptr %p0) {
93; SSE-LABEL: @loadext_8i8_to_8i16(
94; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
95; SSE-NEXT:    [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i16>
96; SSE-NEXT:    ret <8 x i16> [[TMP3]]
97;
98; AVX-LABEL: @loadext_8i8_to_8i16(
99; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
100; AVX-NEXT:    [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i16>
101; AVX-NEXT:    ret <8 x i16> [[TMP3]]
102;
103  %p1 = getelementptr inbounds i8, ptr %p0, i64 1
104  %p2 = getelementptr inbounds i8, ptr %p0, i64 2
105  %p3 = getelementptr inbounds i8, ptr %p0, i64 3
106  %p4 = getelementptr inbounds i8, ptr %p0, i64 4
107  %p5 = getelementptr inbounds i8, ptr %p0, i64 5
108  %p6 = getelementptr inbounds i8, ptr %p0, i64 6
109  %p7 = getelementptr inbounds i8, ptr %p0, i64 7
110  %i0 = load i8, ptr %p0, align 1
111  %i1 = load i8, ptr %p1, align 1
112  %i2 = load i8, ptr %p2, align 1
113  %i3 = load i8, ptr %p3, align 1
114  %i4 = load i8, ptr %p4, align 1
115  %i5 = load i8, ptr %p5, align 1
116  %i6 = load i8, ptr %p6, align 1
117  %i7 = load i8, ptr %p7, align 1
118  %x0 = sext i8 %i0 to i16
119  %x1 = sext i8 %i1 to i16
120  %x2 = sext i8 %i2 to i16
121  %x3 = sext i8 %i3 to i16
122  %x4 = sext i8 %i4 to i16
123  %x5 = sext i8 %i5 to i16
124  %x6 = sext i8 %i6 to i16
125  %x7 = sext i8 %i7 to i16
126  %v0 = insertelement <8 x i16> undef, i16 %x0, i32 0
127  %v1 = insertelement <8 x i16>   %v0, i16 %x1, i32 1
128  %v2 = insertelement <8 x i16>   %v1, i16 %x2, i32 2
129  %v3 = insertelement <8 x i16>   %v2, i16 %x3, i32 3
130  %v4 = insertelement <8 x i16>   %v3, i16 %x4, i32 4
131  %v5 = insertelement <8 x i16>   %v4, i16 %x5, i32 5
132  %v6 = insertelement <8 x i16>   %v5, i16 %x6, i32 6
133  %v7 = insertelement <8 x i16>   %v6, i16 %x7, i32 7
134  ret <8 x i16> %v7
135}
136
137define <8 x i32> @loadext_8i8_to_8i32(ptr %p0) {
138; SSE-LABEL: @loadext_8i8_to_8i32(
139; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
140; SSE-NEXT:    [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i32>
141; SSE-NEXT:    ret <8 x i32> [[TMP3]]
142;
143; AVX-LABEL: @loadext_8i8_to_8i32(
144; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
145; AVX-NEXT:    [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i32>
146; AVX-NEXT:    ret <8 x i32> [[TMP3]]
147;
148  %p1 = getelementptr inbounds i8, ptr %p0, i64 1
149  %p2 = getelementptr inbounds i8, ptr %p0, i64 2
150  %p3 = getelementptr inbounds i8, ptr %p0, i64 3
151  %p4 = getelementptr inbounds i8, ptr %p0, i64 4
152  %p5 = getelementptr inbounds i8, ptr %p0, i64 5
153  %p6 = getelementptr inbounds i8, ptr %p0, i64 6
154  %p7 = getelementptr inbounds i8, ptr %p0, i64 7
155  %i0 = load i8, ptr %p0, align 1
156  %i1 = load i8, ptr %p1, align 1
157  %i2 = load i8, ptr %p2, align 1
158  %i3 = load i8, ptr %p3, align 1
159  %i4 = load i8, ptr %p4, align 1
160  %i5 = load i8, ptr %p5, align 1
161  %i6 = load i8, ptr %p6, align 1
162  %i7 = load i8, ptr %p7, align 1
163  %x0 = sext i8 %i0 to i32
164  %x1 = sext i8 %i1 to i32
165  %x2 = sext i8 %i2 to i32
166  %x3 = sext i8 %i3 to i32
167  %x4 = sext i8 %i4 to i32
168  %x5 = sext i8 %i5 to i32
169  %x6 = sext i8 %i6 to i32
170  %x7 = sext i8 %i7 to i32
171  %v0 = insertelement <8 x i32> undef, i32 %x0, i32 0
172  %v1 = insertelement <8 x i32>   %v0, i32 %x1, i32 1
173  %v2 = insertelement <8 x i32>   %v1, i32 %x2, i32 2
174  %v3 = insertelement <8 x i32>   %v2, i32 %x3, i32 3
175  %v4 = insertelement <8 x i32>   %v3, i32 %x4, i32 4
176  %v5 = insertelement <8 x i32>   %v4, i32 %x5, i32 5
177  %v6 = insertelement <8 x i32>   %v5, i32 %x6, i32 6
178  %v7 = insertelement <8 x i32>   %v6, i32 %x7, i32 7
179  ret <8 x i32> %v7
180}
181
182define <16 x i16> @loadext_16i8_to_16i16(ptr %p0) {
183; SSE-LABEL: @loadext_16i8_to_16i16(
184; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[P0:%.*]], align 1
185; SSE-NEXT:    [[TMP3:%.*]] = sext <16 x i8> [[TMP2]] to <16 x i16>
186; SSE-NEXT:    ret <16 x i16> [[TMP3]]
187;
188; AVX-LABEL: @loadext_16i8_to_16i16(
189; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[P0:%.*]], align 1
190; AVX-NEXT:    [[TMP3:%.*]] = sext <16 x i8> [[TMP2]] to <16 x i16>
191; AVX-NEXT:    ret <16 x i16> [[TMP3]]
192;
193  %p1  = getelementptr inbounds i8, ptr %p0, i64 1
194  %p2  = getelementptr inbounds i8, ptr %p0, i64 2
195  %p3  = getelementptr inbounds i8, ptr %p0, i64 3
196  %p4  = getelementptr inbounds i8, ptr %p0, i64 4
197  %p5  = getelementptr inbounds i8, ptr %p0, i64 5
198  %p6  = getelementptr inbounds i8, ptr %p0, i64 6
199  %p7  = getelementptr inbounds i8, ptr %p0, i64 7
200  %p8  = getelementptr inbounds i8, ptr %p0, i64 8
201  %p9  = getelementptr inbounds i8, ptr %p0, i64 9
202  %p10 = getelementptr inbounds i8, ptr %p0, i64 10
203  %p11 = getelementptr inbounds i8, ptr %p0, i64 11
204  %p12 = getelementptr inbounds i8, ptr %p0, i64 12
205  %p13 = getelementptr inbounds i8, ptr %p0, i64 13
206  %p14 = getelementptr inbounds i8, ptr %p0, i64 14
207  %p15 = getelementptr inbounds i8, ptr %p0, i64 15
208  %i0  = load i8, ptr %p0,  align 1
209  %i1  = load i8, ptr %p1,  align 1
210  %i2  = load i8, ptr %p2,  align 1
211  %i3  = load i8, ptr %p3,  align 1
212  %i4  = load i8, ptr %p4,  align 1
213  %i5  = load i8, ptr %p5,  align 1
214  %i6  = load i8, ptr %p6,  align 1
215  %i7  = load i8, ptr %p7,  align 1
216  %i8  = load i8, ptr %p8,  align 1
217  %i9  = load i8, ptr %p9,  align 1
218  %i10 = load i8, ptr %p10, align 1
219  %i11 = load i8, ptr %p11, align 1
220  %i12 = load i8, ptr %p12, align 1
221  %i13 = load i8, ptr %p13, align 1
222  %i14 = load i8, ptr %p14, align 1
223  %i15 = load i8, ptr %p15, align 1
224  %x0  = sext i8 %i0  to i16
225  %x1  = sext i8 %i1  to i16
226  %x2  = sext i8 %i2  to i16
227  %x3  = sext i8 %i3  to i16
228  %x4  = sext i8 %i4  to i16
229  %x5  = sext i8 %i5  to i16
230  %x6  = sext i8 %i6  to i16
231  %x7  = sext i8 %i7  to i16
232  %x8  = sext i8 %i8  to i16
233  %x9  = sext i8 %i9  to i16
234  %x10 = sext i8 %i10 to i16
235  %x11 = sext i8 %i11 to i16
236  %x12 = sext i8 %i12 to i16
237  %x13 = sext i8 %i13 to i16
238  %x14 = sext i8 %i14 to i16
239  %x15 = sext i8 %i15 to i16
240  %v0  = insertelement <16 x i16> undef, i16 %x0,  i32 0
241  %v1  = insertelement <16 x i16>  %v0,  i16 %x1,  i32 1
242  %v2  = insertelement <16 x i16>  %v1,  i16 %x2,  i32 2
243  %v3  = insertelement <16 x i16>  %v2,  i16 %x3,  i32 3
244  %v4  = insertelement <16 x i16>  %v3,  i16 %x4,  i32 4
245  %v5  = insertelement <16 x i16>  %v4,  i16 %x5,  i32 5
246  %v6  = insertelement <16 x i16>  %v5,  i16 %x6,  i32 6
247  %v7  = insertelement <16 x i16>  %v6,  i16 %x7,  i32 7
248  %v8  = insertelement <16 x i16>  %v7,  i16 %x8,  i32 8
249  %v9  = insertelement <16 x i16>  %v8,  i16 %x9,  i32 9
250  %v10 = insertelement <16 x i16>  %v9,  i16 %x10, i32 10
251  %v11 = insertelement <16 x i16>  %v10, i16 %x11, i32 11
252  %v12 = insertelement <16 x i16>  %v11, i16 %x12, i32 12
253  %v13 = insertelement <16 x i16>  %v12, i16 %x13, i32 13
254  %v14 = insertelement <16 x i16>  %v13, i16 %x14, i32 14
255  %v15 = insertelement <16 x i16>  %v14, i16 %x15, i32 15
256  ret <16 x i16> %v15
257}
258
259;
260; vXi16
261;
262
263define <2 x i64> @loadext_2i16_to_2i64(ptr %p0) {
264; SSE-LABEL: @loadext_2i16_to_2i64(
265; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i16>, ptr [[P0:%.*]], align 1
266; SSE-NEXT:    [[TMP3:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i64>
267; SSE-NEXT:    ret <2 x i64> [[TMP3]]
268;
269; AVX-LABEL: @loadext_2i16_to_2i64(
270; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i16>, ptr [[P0:%.*]], align 1
271; AVX-NEXT:    [[TMP3:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i64>
272; AVX-NEXT:    ret <2 x i64> [[TMP3]]
273;
274  %p1 = getelementptr inbounds i16, ptr %p0, i64 1
275  %i0 = load i16, ptr %p0, align 1
276  %i1 = load i16, ptr %p1, align 1
277  %x0 = sext i16 %i0 to i64
278  %x1 = sext i16 %i1 to i64
279  %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0
280  %v1 = insertelement <2 x i64>   %v0, i64 %x1, i32 1
281  ret <2 x i64> %v1
282}
283
284define <4 x i32> @loadext_4i16_to_4i32(ptr %p0) {
285; SSE-LABEL: @loadext_4i16_to_4i32(
286; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
287; SSE-NEXT:    [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i32>
288; SSE-NEXT:    ret <4 x i32> [[TMP3]]
289;
290; AVX-LABEL: @loadext_4i16_to_4i32(
291; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
292; AVX-NEXT:    [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i32>
293; AVX-NEXT:    ret <4 x i32> [[TMP3]]
294;
295  %p1 = getelementptr inbounds i16, ptr %p0, i64 1
296  %p2 = getelementptr inbounds i16, ptr %p0, i64 2
297  %p3 = getelementptr inbounds i16, ptr %p0, i64 3
298  %i0 = load i16, ptr %p0, align 1
299  %i1 = load i16, ptr %p1, align 1
300  %i2 = load i16, ptr %p2, align 1
301  %i3 = load i16, ptr %p3, align 1
302  %x0 = sext i16 %i0 to i32
303  %x1 = sext i16 %i1 to i32
304  %x2 = sext i16 %i2 to i32
305  %x3 = sext i16 %i3 to i32
306  %v0 = insertelement <4 x i32> undef, i32 %x0, i32 0
307  %v1 = insertelement <4 x i32>   %v0, i32 %x1, i32 1
308  %v2 = insertelement <4 x i32>   %v1, i32 %x2, i32 2
309  %v3 = insertelement <4 x i32>   %v2, i32 %x3, i32 3
310  ret <4 x i32> %v3
311}
312
313define <4 x i64> @loadext_4i16_to_4i64(ptr %p0) {
314; SSE-LABEL: @loadext_4i16_to_4i64(
315; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
316; SSE-NEXT:    [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i64>
317; SSE-NEXT:    ret <4 x i64> [[TMP3]]
318;
319; AVX-LABEL: @loadext_4i16_to_4i64(
320; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
321; AVX-NEXT:    [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i64>
322; AVX-NEXT:    ret <4 x i64> [[TMP3]]
323;
324  %p1 = getelementptr inbounds i16, ptr %p0, i64 1
325  %p2 = getelementptr inbounds i16, ptr %p0, i64 2
326  %p3 = getelementptr inbounds i16, ptr %p0, i64 3
327  %i0 = load i16, ptr %p0, align 1
328  %i1 = load i16, ptr %p1, align 1
329  %i2 = load i16, ptr %p2, align 1
330  %i3 = load i16, ptr %p3, align 1
331  %x0 = sext i16 %i0 to i64
332  %x1 = sext i16 %i1 to i64
333  %x2 = sext i16 %i2 to i64
334  %x3 = sext i16 %i3 to i64
335  %v0 = insertelement <4 x i64> undef, i64 %x0, i32 0
336  %v1 = insertelement <4 x i64>   %v0, i64 %x1, i32 1
337  %v2 = insertelement <4 x i64>   %v1, i64 %x2, i32 2
338  %v3 = insertelement <4 x i64>   %v2, i64 %x3, i32 3
339  ret <4 x i64> %v3
340}
341
342define <8 x i32> @loadext_8i16_to_8i32(ptr %p0) {
343; SSE-LABEL: @loadext_8i16_to_8i32(
344; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[P0:%.*]], align 1
345; SSE-NEXT:    [[TMP3:%.*]] = sext <8 x i16> [[TMP2]] to <8 x i32>
346; SSE-NEXT:    ret <8 x i32> [[TMP3]]
347;
348; AVX-LABEL: @loadext_8i16_to_8i32(
349; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[P0:%.*]], align 1
350; AVX-NEXT:    [[TMP3:%.*]] = sext <8 x i16> [[TMP2]] to <8 x i32>
351; AVX-NEXT:    ret <8 x i32> [[TMP3]]
352;
353  %p1 = getelementptr inbounds i16, ptr %p0, i64 1
354  %p2 = getelementptr inbounds i16, ptr %p0, i64 2
355  %p3 = getelementptr inbounds i16, ptr %p0, i64 3
356  %p4 = getelementptr inbounds i16, ptr %p0, i64 4
357  %p5 = getelementptr inbounds i16, ptr %p0, i64 5
358  %p6 = getelementptr inbounds i16, ptr %p0, i64 6
359  %p7 = getelementptr inbounds i16, ptr %p0, i64 7
360  %i0 = load i16, ptr %p0, align 1
361  %i1 = load i16, ptr %p1, align 1
362  %i2 = load i16, ptr %p2, align 1
363  %i3 = load i16, ptr %p3, align 1
364  %i4 = load i16, ptr %p4, align 1
365  %i5 = load i16, ptr %p5, align 1
366  %i6 = load i16, ptr %p6, align 1
367  %i7 = load i16, ptr %p7, align 1
368  %x0 = sext i16 %i0 to i32
369  %x1 = sext i16 %i1 to i32
370  %x2 = sext i16 %i2 to i32
371  %x3 = sext i16 %i3 to i32
372  %x4 = sext i16 %i4 to i32
373  %x5 = sext i16 %i5 to i32
374  %x6 = sext i16 %i6 to i32
375  %x7 = sext i16 %i7 to i32
376  %v0 = insertelement <8 x i32> undef, i32 %x0, i32 0
377  %v1 = insertelement <8 x i32>   %v0, i32 %x1, i32 1
378  %v2 = insertelement <8 x i32>   %v1, i32 %x2, i32 2
379  %v3 = insertelement <8 x i32>   %v2, i32 %x3, i32 3
380  %v4 = insertelement <8 x i32>   %v3, i32 %x4, i32 4
381  %v5 = insertelement <8 x i32>   %v4, i32 %x5, i32 5
382  %v6 = insertelement <8 x i32>   %v5, i32 %x6, i32 6
383  %v7 = insertelement <8 x i32>   %v6, i32 %x7, i32 7
384  ret <8 x i32> %v7
385}
386
387;
388; vXi32
389;
390
391define <2 x i64> @loadext_2i32_to_2i64(ptr %p0) {
392; SSE-LABEL: @loadext_2i32_to_2i64(
393; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[P0:%.*]], align 1
394; SSE-NEXT:    [[TMP3:%.*]] = sext <2 x i32> [[TMP2]] to <2 x i64>
395; SSE-NEXT:    ret <2 x i64> [[TMP3]]
396;
397; AVX-LABEL: @loadext_2i32_to_2i64(
398; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[P0:%.*]], align 1
399; AVX-NEXT:    [[TMP3:%.*]] = sext <2 x i32> [[TMP2]] to <2 x i64>
400; AVX-NEXT:    ret <2 x i64> [[TMP3]]
401;
402  %p1 = getelementptr inbounds i32, ptr %p0, i64 1
403  %i0 = load i32, ptr %p0, align 1
404  %i1 = load i32, ptr %p1, align 1
405  %x0 = sext i32 %i0 to i64
406  %x1 = sext i32 %i1 to i64
407  %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0
408  %v1 = insertelement <2 x i64>   %v0, i64 %x1, i32 1
409  ret <2 x i64> %v1
410}
411
412define <4 x i64> @loadext_4i32_to_4i64(ptr %p0) {
413; SSE-LABEL: @loadext_4i32_to_4i64(
414; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[P0:%.*]], align 1
415; SSE-NEXT:    [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64>
416; SSE-NEXT:    ret <4 x i64> [[TMP3]]
417;
418; AVX-LABEL: @loadext_4i32_to_4i64(
419; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[P0:%.*]], align 1
420; AVX-NEXT:    [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64>
421; AVX-NEXT:    ret <4 x i64> [[TMP3]]
422;
423  %p1 = getelementptr inbounds i32, ptr %p0, i64 1
424  %p2 = getelementptr inbounds i32, ptr %p0, i64 2
425  %p3 = getelementptr inbounds i32, ptr %p0, i64 3
426  %i0 = load i32, ptr %p0, align 1
427  %i1 = load i32, ptr %p1, align 1
428  %i2 = load i32, ptr %p2, align 1
429  %i3 = load i32, ptr %p3, align 1
430  %x0 = sext i32 %i0 to i64
431  %x1 = sext i32 %i1 to i64
432  %x2 = sext i32 %i2 to i64
433  %x3 = sext i32 %i3 to i64
434  %v0 = insertelement <4 x i64> undef, i64 %x0, i32 0
435  %v1 = insertelement <4 x i64>   %v0, i64 %x1, i32 1
436  %v2 = insertelement <4 x i64>   %v1, i64 %x2, i32 2
437  %v3 = insertelement <4 x i64>   %v2, i64 %x3, i32 3
438  ret <4 x i64> %v3
439}
440