xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/X86/zext.ll (revision 41afef9066eec8daf517ac357a628cdf30c95e39)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE2
3; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SLM
4; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
5; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
6; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
7; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+avx512bw -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
8
9;
10; vXi8
11;
12
13define <2 x i64> @loadext_2i8_to_2i64(ptr %p0) {
14; SSE2-LABEL: @loadext_2i8_to_2i64(
15; SSE2-NEXT:    [[TMP1:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
16; SSE2-NEXT:    [[V1:%.*]] = zext <2 x i8> [[TMP1]] to <2 x i64>
17; SSE2-NEXT:    ret <2 x i64> [[V1]]
18;
19; SLM-LABEL: @loadext_2i8_to_2i64(
20; SLM-NEXT:    [[TMP2:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
21; SLM-NEXT:    [[TMP3:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i64>
22; SLM-NEXT:    ret <2 x i64> [[TMP3]]
23;
24; AVX-LABEL: @loadext_2i8_to_2i64(
25; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
26; AVX-NEXT:    [[TMP3:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i64>
27; AVX-NEXT:    ret <2 x i64> [[TMP3]]
28;
29  %p1 = getelementptr inbounds i8, ptr %p0, i64 1
30  %i0 = load i8, ptr %p0, align 1
31  %i1 = load i8, ptr %p1, align 1
32  %x0 = zext i8 %i0 to i64
33  %x1 = zext i8 %i1 to i64
34  %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0
35  %v1 = insertelement <2 x i64>   %v0, i64 %x1, i32 1
36  ret <2 x i64> %v1
37}
38
39define <4 x i32> @loadext_4i8_to_4i32(ptr %p0) {
40; SSE2-LABEL: @loadext_4i8_to_4i32(
41; SSE2-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
42; SSE2-NEXT:    [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32>
43; SSE2-NEXT:    ret <4 x i32> [[TMP3]]
44;
45; SLM-LABEL: @loadext_4i8_to_4i32(
46; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
47; SLM-NEXT:    [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32>
48; SLM-NEXT:    ret <4 x i32> [[TMP3]]
49;
50; AVX-LABEL: @loadext_4i8_to_4i32(
51; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
52; AVX-NEXT:    [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32>
53; AVX-NEXT:    ret <4 x i32> [[TMP3]]
54;
55  %p1 = getelementptr inbounds i8, ptr %p0, i64 1
56  %p2 = getelementptr inbounds i8, ptr %p0, i64 2
57  %p3 = getelementptr inbounds i8, ptr %p0, i64 3
58  %i0 = load i8, ptr %p0, align 1
59  %i1 = load i8, ptr %p1, align 1
60  %i2 = load i8, ptr %p2, align 1
61  %i3 = load i8, ptr %p3, align 1
62  %x0 = zext i8 %i0 to i32
63  %x1 = zext i8 %i1 to i32
64  %x2 = zext i8 %i2 to i32
65  %x3 = zext i8 %i3 to i32
66  %v0 = insertelement <4 x i32> undef, i32 %x0, i32 0
67  %v1 = insertelement <4 x i32>   %v0, i32 %x1, i32 1
68  %v2 = insertelement <4 x i32>   %v1, i32 %x2, i32 2
69  %v3 = insertelement <4 x i32>   %v2, i32 %x3, i32 3
70  ret <4 x i32> %v3
71}
72
73define <4 x i64> @loadext_4i8_to_4i64(ptr %p0) {
74; SSE2-LABEL: @loadext_4i8_to_4i64(
75; SSE2-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
76; SSE2-NEXT:    [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64>
77; SSE2-NEXT:    ret <4 x i64> [[TMP3]]
78;
79; SLM-LABEL: @loadext_4i8_to_4i64(
80; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
81; SLM-NEXT:    [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64>
82; SLM-NEXT:    ret <4 x i64> [[TMP3]]
83;
84; AVX-LABEL: @loadext_4i8_to_4i64(
85; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
86; AVX-NEXT:    [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64>
87; AVX-NEXT:    ret <4 x i64> [[TMP3]]
88;
89  %p1 = getelementptr inbounds i8, ptr %p0, i64 1
90  %p2 = getelementptr inbounds i8, ptr %p0, i64 2
91  %p3 = getelementptr inbounds i8, ptr %p0, i64 3
92  %i0 = load i8, ptr %p0, align 1
93  %i1 = load i8, ptr %p1, align 1
94  %i2 = load i8, ptr %p2, align 1
95  %i3 = load i8, ptr %p3, align 1
96  %x0 = zext i8 %i0 to i64
97  %x1 = zext i8 %i1 to i64
98  %x2 = zext i8 %i2 to i64
99  %x3 = zext i8 %i3 to i64
100  %v0 = insertelement <4 x i64> undef, i64 %x0, i32 0
101  %v1 = insertelement <4 x i64>   %v0, i64 %x1, i32 1
102  %v2 = insertelement <4 x i64>   %v1, i64 %x2, i32 2
103  %v3 = insertelement <4 x i64>   %v2, i64 %x3, i32 3
104  ret <4 x i64> %v3
105}
106
107define <8 x i16> @loadext_8i8_to_8i16(ptr %p0) {
108; SSE2-LABEL: @loadext_8i8_to_8i16(
109; SSE2-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
110; SSE2-NEXT:    [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16>
111; SSE2-NEXT:    ret <8 x i16> [[TMP3]]
112;
113; SLM-LABEL: @loadext_8i8_to_8i16(
114; SLM-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
115; SLM-NEXT:    [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16>
116; SLM-NEXT:    ret <8 x i16> [[TMP3]]
117;
118; AVX-LABEL: @loadext_8i8_to_8i16(
119; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
120; AVX-NEXT:    [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16>
121; AVX-NEXT:    ret <8 x i16> [[TMP3]]
122;
123  %p1 = getelementptr inbounds i8, ptr %p0, i64 1
124  %p2 = getelementptr inbounds i8, ptr %p0, i64 2
125  %p3 = getelementptr inbounds i8, ptr %p0, i64 3
126  %p4 = getelementptr inbounds i8, ptr %p0, i64 4
127  %p5 = getelementptr inbounds i8, ptr %p0, i64 5
128  %p6 = getelementptr inbounds i8, ptr %p0, i64 6
129  %p7 = getelementptr inbounds i8, ptr %p0, i64 7
130  %i0 = load i8, ptr %p0, align 1
131  %i1 = load i8, ptr %p1, align 1
132  %i2 = load i8, ptr %p2, align 1
133  %i3 = load i8, ptr %p3, align 1
134  %i4 = load i8, ptr %p4, align 1
135  %i5 = load i8, ptr %p5, align 1
136  %i6 = load i8, ptr %p6, align 1
137  %i7 = load i8, ptr %p7, align 1
138  %x0 = zext i8 %i0 to i16
139  %x1 = zext i8 %i1 to i16
140  %x2 = zext i8 %i2 to i16
141  %x3 = zext i8 %i3 to i16
142  %x4 = zext i8 %i4 to i16
143  %x5 = zext i8 %i5 to i16
144  %x6 = zext i8 %i6 to i16
145  %x7 = zext i8 %i7 to i16
146  %v0 = insertelement <8 x i16> undef, i16 %x0, i32 0
147  %v1 = insertelement <8 x i16>   %v0, i16 %x1, i32 1
148  %v2 = insertelement <8 x i16>   %v1, i16 %x2, i32 2
149  %v3 = insertelement <8 x i16>   %v2, i16 %x3, i32 3
150  %v4 = insertelement <8 x i16>   %v3, i16 %x4, i32 4
151  %v5 = insertelement <8 x i16>   %v4, i16 %x5, i32 5
152  %v6 = insertelement <8 x i16>   %v5, i16 %x6, i32 6
153  %v7 = insertelement <8 x i16>   %v6, i16 %x7, i32 7
154  ret <8 x i16> %v7
155}
156
157define <8 x i32> @loadext_8i8_to_8i32(ptr %p0) {
158; SSE2-LABEL: @loadext_8i8_to_8i32(
159; SSE2-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
160; SSE2-NEXT:    [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32>
161; SSE2-NEXT:    ret <8 x i32> [[TMP3]]
162;
163; SLM-LABEL: @loadext_8i8_to_8i32(
164; SLM-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
165; SLM-NEXT:    [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32>
166; SLM-NEXT:    ret <8 x i32> [[TMP3]]
167;
168; AVX-LABEL: @loadext_8i8_to_8i32(
169; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
170; AVX-NEXT:    [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32>
171; AVX-NEXT:    ret <8 x i32> [[TMP3]]
172;
173  %p1 = getelementptr inbounds i8, ptr %p0, i64 1
174  %p2 = getelementptr inbounds i8, ptr %p0, i64 2
175  %p3 = getelementptr inbounds i8, ptr %p0, i64 3
176  %p4 = getelementptr inbounds i8, ptr %p0, i64 4
177  %p5 = getelementptr inbounds i8, ptr %p0, i64 5
178  %p6 = getelementptr inbounds i8, ptr %p0, i64 6
179  %p7 = getelementptr inbounds i8, ptr %p0, i64 7
180  %i0 = load i8, ptr %p0, align 1
181  %i1 = load i8, ptr %p1, align 1
182  %i2 = load i8, ptr %p2, align 1
183  %i3 = load i8, ptr %p3, align 1
184  %i4 = load i8, ptr %p4, align 1
185  %i5 = load i8, ptr %p5, align 1
186  %i6 = load i8, ptr %p6, align 1
187  %i7 = load i8, ptr %p7, align 1
188  %x0 = zext i8 %i0 to i32
189  %x1 = zext i8 %i1 to i32
190  %x2 = zext i8 %i2 to i32
191  %x3 = zext i8 %i3 to i32
192  %x4 = zext i8 %i4 to i32
193  %x5 = zext i8 %i5 to i32
194  %x6 = zext i8 %i6 to i32
195  %x7 = zext i8 %i7 to i32
196  %v0 = insertelement <8 x i32> undef, i32 %x0, i32 0
197  %v1 = insertelement <8 x i32>   %v0, i32 %x1, i32 1
198  %v2 = insertelement <8 x i32>   %v1, i32 %x2, i32 2
199  %v3 = insertelement <8 x i32>   %v2, i32 %x3, i32 3
200  %v4 = insertelement <8 x i32>   %v3, i32 %x4, i32 4
201  %v5 = insertelement <8 x i32>   %v4, i32 %x5, i32 5
202  %v6 = insertelement <8 x i32>   %v5, i32 %x6, i32 6
203  %v7 = insertelement <8 x i32>   %v6, i32 %x7, i32 7
204  ret <8 x i32> %v7
205}
206
207define <16 x i16> @loadext_16i8_to_16i16(ptr %p0) {
208; SSE2-LABEL: @loadext_16i8_to_16i16(
209; SSE2-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[P0:%.*]], align 1
210; SSE2-NEXT:    [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16>
211; SSE2-NEXT:    ret <16 x i16> [[TMP3]]
212;
213; SLM-LABEL: @loadext_16i8_to_16i16(
214; SLM-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[P0:%.*]], align 1
215; SLM-NEXT:    [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16>
216; SLM-NEXT:    ret <16 x i16> [[TMP3]]
217;
218; AVX-LABEL: @loadext_16i8_to_16i16(
219; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[P0:%.*]], align 1
220; AVX-NEXT:    [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16>
221; AVX-NEXT:    ret <16 x i16> [[TMP3]]
222;
223  %p1  = getelementptr inbounds i8, ptr %p0, i64 1
224  %p2  = getelementptr inbounds i8, ptr %p0, i64 2
225  %p3  = getelementptr inbounds i8, ptr %p0, i64 3
226  %p4  = getelementptr inbounds i8, ptr %p0, i64 4
227  %p5  = getelementptr inbounds i8, ptr %p0, i64 5
228  %p6  = getelementptr inbounds i8, ptr %p0, i64 6
229  %p7  = getelementptr inbounds i8, ptr %p0, i64 7
230  %p8  = getelementptr inbounds i8, ptr %p0, i64 8
231  %p9  = getelementptr inbounds i8, ptr %p0, i64 9
232  %p10 = getelementptr inbounds i8, ptr %p0, i64 10
233  %p11 = getelementptr inbounds i8, ptr %p0, i64 11
234  %p12 = getelementptr inbounds i8, ptr %p0, i64 12
235  %p13 = getelementptr inbounds i8, ptr %p0, i64 13
236  %p14 = getelementptr inbounds i8, ptr %p0, i64 14
237  %p15 = getelementptr inbounds i8, ptr %p0, i64 15
238  %i0  = load i8, ptr %p0,  align 1
239  %i1  = load i8, ptr %p1,  align 1
240  %i2  = load i8, ptr %p2,  align 1
241  %i3  = load i8, ptr %p3,  align 1
242  %i4  = load i8, ptr %p4,  align 1
243  %i5  = load i8, ptr %p5,  align 1
244  %i6  = load i8, ptr %p6,  align 1
245  %i7  = load i8, ptr %p7,  align 1
246  %i8  = load i8, ptr %p8,  align 1
247  %i9  = load i8, ptr %p9,  align 1
248  %i10 = load i8, ptr %p10, align 1
249  %i11 = load i8, ptr %p11, align 1
250  %i12 = load i8, ptr %p12, align 1
251  %i13 = load i8, ptr %p13, align 1
252  %i14 = load i8, ptr %p14, align 1
253  %i15 = load i8, ptr %p15, align 1
254  %x0  = zext i8 %i0  to i16
255  %x1  = zext i8 %i1  to i16
256  %x2  = zext i8 %i2  to i16
257  %x3  = zext i8 %i3  to i16
258  %x4  = zext i8 %i4  to i16
259  %x5  = zext i8 %i5  to i16
260  %x6  = zext i8 %i6  to i16
261  %x7  = zext i8 %i7  to i16
262  %x8  = zext i8 %i8  to i16
263  %x9  = zext i8 %i9  to i16
264  %x10 = zext i8 %i10 to i16
265  %x11 = zext i8 %i11 to i16
266  %x12 = zext i8 %i12 to i16
267  %x13 = zext i8 %i13 to i16
268  %x14 = zext i8 %i14 to i16
269  %x15 = zext i8 %i15 to i16
270  %v0  = insertelement <16 x i16> undef, i16 %x0,  i32 0
271  %v1  = insertelement <16 x i16>  %v0,  i16 %x1,  i32 1
272  %v2  = insertelement <16 x i16>  %v1,  i16 %x2,  i32 2
273  %v3  = insertelement <16 x i16>  %v2,  i16 %x3,  i32 3
274  %v4  = insertelement <16 x i16>  %v3,  i16 %x4,  i32 4
275  %v5  = insertelement <16 x i16>  %v4,  i16 %x5,  i32 5
276  %v6  = insertelement <16 x i16>  %v5,  i16 %x6,  i32 6
277  %v7  = insertelement <16 x i16>  %v6,  i16 %x7,  i32 7
278  %v8  = insertelement <16 x i16>  %v7,  i16 %x8,  i32 8
279  %v9  = insertelement <16 x i16>  %v8,  i16 %x9,  i32 9
280  %v10 = insertelement <16 x i16>  %v9,  i16 %x10, i32 10
281  %v11 = insertelement <16 x i16>  %v10, i16 %x11, i32 11
282  %v12 = insertelement <16 x i16>  %v11, i16 %x12, i32 12
283  %v13 = insertelement <16 x i16>  %v12, i16 %x13, i32 13
284  %v14 = insertelement <16 x i16>  %v13, i16 %x14, i32 14
285  %v15 = insertelement <16 x i16>  %v14, i16 %x15, i32 15
286  ret <16 x i16> %v15
287}
288
289;
290; vXi16
291;
292
293define <2 x i64> @loadext_2i16_to_2i64(ptr %p0) {
294; SSE2-LABEL: @loadext_2i16_to_2i64(
295; SSE2-NEXT:    [[TMP2:%.*]] = load <2 x i16>, ptr [[P0:%.*]], align 1
296; SSE2-NEXT:    [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64>
297; SSE2-NEXT:    ret <2 x i64> [[TMP3]]
298;
299; SLM-LABEL: @loadext_2i16_to_2i64(
300; SLM-NEXT:    [[TMP2:%.*]] = load <2 x i16>, ptr [[P0:%.*]], align 1
301; SLM-NEXT:    [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64>
302; SLM-NEXT:    ret <2 x i64> [[TMP3]]
303;
304; AVX-LABEL: @loadext_2i16_to_2i64(
305; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i16>, ptr [[P0:%.*]], align 1
306; AVX-NEXT:    [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64>
307; AVX-NEXT:    ret <2 x i64> [[TMP3]]
308;
309  %p1 = getelementptr inbounds i16, ptr %p0, i64 1
310  %i0 = load i16, ptr %p0, align 1
311  %i1 = load i16, ptr %p1, align 1
312  %x0 = zext i16 %i0 to i64
313  %x1 = zext i16 %i1 to i64
314  %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0
315  %v1 = insertelement <2 x i64>   %v0, i64 %x1, i32 1
316  ret <2 x i64> %v1
317}
318
319define <4 x i32> @loadext_4i16_to_4i32(ptr %p0) {
320; SSE2-LABEL: @loadext_4i16_to_4i32(
321; SSE2-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
322; SSE2-NEXT:    [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32>
323; SSE2-NEXT:    ret <4 x i32> [[TMP3]]
324;
325; SLM-LABEL: @loadext_4i16_to_4i32(
326; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
327; SLM-NEXT:    [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32>
328; SLM-NEXT:    ret <4 x i32> [[TMP3]]
329;
330; AVX-LABEL: @loadext_4i16_to_4i32(
331; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
332; AVX-NEXT:    [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32>
333; AVX-NEXT:    ret <4 x i32> [[TMP3]]
334;
335  %p1 = getelementptr inbounds i16, ptr %p0, i64 1
336  %p2 = getelementptr inbounds i16, ptr %p0, i64 2
337  %p3 = getelementptr inbounds i16, ptr %p0, i64 3
338  %i0 = load i16, ptr %p0, align 1
339  %i1 = load i16, ptr %p1, align 1
340  %i2 = load i16, ptr %p2, align 1
341  %i3 = load i16, ptr %p3, align 1
342  %x0 = zext i16 %i0 to i32
343  %x1 = zext i16 %i1 to i32
344  %x2 = zext i16 %i2 to i32
345  %x3 = zext i16 %i3 to i32
346  %v0 = insertelement <4 x i32> undef, i32 %x0, i32 0
347  %v1 = insertelement <4 x i32>   %v0, i32 %x1, i32 1
348  %v2 = insertelement <4 x i32>   %v1, i32 %x2, i32 2
349  %v3 = insertelement <4 x i32>   %v2, i32 %x3, i32 3
350  ret <4 x i32> %v3
351}
352
353define <4 x i64> @loadext_4i16_to_4i64(ptr %p0) {
354; SSE2-LABEL: @loadext_4i16_to_4i64(
355; SSE2-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
356; SSE2-NEXT:    [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64>
357; SSE2-NEXT:    ret <4 x i64> [[TMP3]]
358;
359; SLM-LABEL: @loadext_4i16_to_4i64(
360; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
361; SLM-NEXT:    [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64>
362; SLM-NEXT:    ret <4 x i64> [[TMP3]]
363;
364; AVX-LABEL: @loadext_4i16_to_4i64(
365; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
366; AVX-NEXT:    [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64>
367; AVX-NEXT:    ret <4 x i64> [[TMP3]]
368;
369  %p1 = getelementptr inbounds i16, ptr %p0, i64 1
370  %p2 = getelementptr inbounds i16, ptr %p0, i64 2
371  %p3 = getelementptr inbounds i16, ptr %p0, i64 3
372  %i0 = load i16, ptr %p0, align 1
373  %i1 = load i16, ptr %p1, align 1
374  %i2 = load i16, ptr %p2, align 1
375  %i3 = load i16, ptr %p3, align 1
376  %x0 = zext i16 %i0 to i64
377  %x1 = zext i16 %i1 to i64
378  %x2 = zext i16 %i2 to i64
379  %x3 = zext i16 %i3 to i64
380  %v0 = insertelement <4 x i64> undef, i64 %x0, i32 0
381  %v1 = insertelement <4 x i64>   %v0, i64 %x1, i32 1
382  %v2 = insertelement <4 x i64>   %v1, i64 %x2, i32 2
383  %v3 = insertelement <4 x i64>   %v2, i64 %x3, i32 3
384  ret <4 x i64> %v3
385}
386
387define <8 x i32> @loadext_8i16_to_8i32(ptr %p0) {
388; SSE2-LABEL: @loadext_8i16_to_8i32(
389; SSE2-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[P0:%.*]], align 1
390; SSE2-NEXT:    [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
391; SSE2-NEXT:    ret <8 x i32> [[TMP3]]
392;
393; SLM-LABEL: @loadext_8i16_to_8i32(
394; SLM-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[P0:%.*]], align 1
395; SLM-NEXT:    [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
396; SLM-NEXT:    ret <8 x i32> [[TMP3]]
397;
398; AVX-LABEL: @loadext_8i16_to_8i32(
399; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[P0:%.*]], align 1
400; AVX-NEXT:    [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
401; AVX-NEXT:    ret <8 x i32> [[TMP3]]
402;
403  %p1 = getelementptr inbounds i16, ptr %p0, i64 1
404  %p2 = getelementptr inbounds i16, ptr %p0, i64 2
405  %p3 = getelementptr inbounds i16, ptr %p0, i64 3
406  %p4 = getelementptr inbounds i16, ptr %p0, i64 4
407  %p5 = getelementptr inbounds i16, ptr %p0, i64 5
408  %p6 = getelementptr inbounds i16, ptr %p0, i64 6
409  %p7 = getelementptr inbounds i16, ptr %p0, i64 7
410  %i0 = load i16, ptr %p0, align 1
411  %i1 = load i16, ptr %p1, align 1
412  %i2 = load i16, ptr %p2, align 1
413  %i3 = load i16, ptr %p3, align 1
414  %i4 = load i16, ptr %p4, align 1
415  %i5 = load i16, ptr %p5, align 1
416  %i6 = load i16, ptr %p6, align 1
417  %i7 = load i16, ptr %p7, align 1
418  %x0 = zext i16 %i0 to i32
419  %x1 = zext i16 %i1 to i32
420  %x2 = zext i16 %i2 to i32
421  %x3 = zext i16 %i3 to i32
422  %x4 = zext i16 %i4 to i32
423  %x5 = zext i16 %i5 to i32
424  %x6 = zext i16 %i6 to i32
425  %x7 = zext i16 %i7 to i32
426  %v0 = insertelement <8 x i32> undef, i32 %x0, i32 0
427  %v1 = insertelement <8 x i32>   %v0, i32 %x1, i32 1
428  %v2 = insertelement <8 x i32>   %v1, i32 %x2, i32 2
429  %v3 = insertelement <8 x i32>   %v2, i32 %x3, i32 3
430  %v4 = insertelement <8 x i32>   %v3, i32 %x4, i32 4
431  %v5 = insertelement <8 x i32>   %v4, i32 %x5, i32 5
432  %v6 = insertelement <8 x i32>   %v5, i32 %x6, i32 6
433  %v7 = insertelement <8 x i32>   %v6, i32 %x7, i32 7
434  ret <8 x i32> %v7
435}
436
437;
438; vXi32
439;
440
441define <2 x i64> @loadext_2i32_to_2i64(ptr %p0) {
442; SSE2-LABEL: @loadext_2i32_to_2i64(
443; SSE2-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[P0:%.*]], align 1
444; SSE2-NEXT:    [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64>
445; SSE2-NEXT:    ret <2 x i64> [[TMP3]]
446;
447; SLM-LABEL: @loadext_2i32_to_2i64(
448; SLM-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[P0:%.*]], align 1
449; SLM-NEXT:    [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64>
450; SLM-NEXT:    ret <2 x i64> [[TMP3]]
451;
452; AVX-LABEL: @loadext_2i32_to_2i64(
453; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[P0:%.*]], align 1
454; AVX-NEXT:    [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64>
455; AVX-NEXT:    ret <2 x i64> [[TMP3]]
456;
457  %p1 = getelementptr inbounds i32, ptr %p0, i64 1
458  %i0 = load i32, ptr %p0, align 1
459  %i1 = load i32, ptr %p1, align 1
460  %x0 = zext i32 %i0 to i64
461  %x1 = zext i32 %i1 to i64
462  %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0
463  %v1 = insertelement <2 x i64>   %v0, i64 %x1, i32 1
464  ret <2 x i64> %v1
465}
466
467define <4 x i64> @loadext_4i32_to_4i64(ptr %p0) {
468; SSE2-LABEL: @loadext_4i32_to_4i64(
469; SSE2-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[P0:%.*]], align 1
470; SSE2-NEXT:    [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64>
471; SSE2-NEXT:    ret <4 x i64> [[TMP3]]
472;
473; SLM-LABEL: @loadext_4i32_to_4i64(
474; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[P0:%.*]], align 1
475; SLM-NEXT:    [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64>
476; SLM-NEXT:    ret <4 x i64> [[TMP3]]
477;
478; AVX-LABEL: @loadext_4i32_to_4i64(
479; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[P0:%.*]], align 1
480; AVX-NEXT:    [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64>
481; AVX-NEXT:    ret <4 x i64> [[TMP3]]
482;
483  %p1 = getelementptr inbounds i32, ptr %p0, i64 1
484  %p2 = getelementptr inbounds i32, ptr %p0, i64 2
485  %p3 = getelementptr inbounds i32, ptr %p0, i64 3
486  %i0 = load i32, ptr %p0, align 1
487  %i1 = load i32, ptr %p1, align 1
488  %i2 = load i32, ptr %p2, align 1
489  %i3 = load i32, ptr %p3, align 1
490  %x0 = zext i32 %i0 to i64
491  %x1 = zext i32 %i1 to i64
492  %x2 = zext i32 %i2 to i64
493  %x3 = zext i32 %i3 to i64
494  %v0 = insertelement <4 x i64> undef, i64 %x0, i32 0
495  %v1 = insertelement <4 x i64>   %v0, i64 %x1, i32 1
496  %v2 = insertelement <4 x i64>   %v1, i64 %x2, i32 2
497  %v3 = insertelement <4 x i64>   %v2, i64 %x3, i32 3
498  ret <4 x i64> %v3
499}
500