xref: /llvm-project/llvm/test/CodeGen/AArch64/zext-shuffle.ll (revision 307713aafc011844acdcb18ca6acbf3f2de29f5f)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -mtriple=aarch64-none-eabi -o - %s | FileCheck %s
3
4define <2 x i64> @v2i64_02(<4 x i32> %a, <4 x i32> %b) {
5; CHECK-LABEL: v2i64_02:
6; CHECK:       // %bb.0:
7; CHECK-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
8; CHECK-NEXT:    zip1 v0.2s, v0.2s, v1.2s
9; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
10; CHECK-NEXT:    ret
11  %c = shufflevector <4 x i32> %a, <4 x i32> %b, <2 x i32> <i32 0, i32 2>
12  %d = zext <2 x i32> %c to <2 x i64>
13  ret <2 x i64> %d
14}
15
16define <2 x i64> @v2i64_13(<4 x i32> %a, <4 x i32> %b) {
17; CHECK-LABEL: v2i64_13:
18; CHECK:       // %bb.0:
19; CHECK-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
20; CHECK-NEXT:    zip2 v0.2s, v0.2s, v1.2s
21; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
22; CHECK-NEXT:    ret
23  %c = shufflevector <4 x i32> %a, <4 x i32> %b, <2 x i32> <i32 1, i32 3>
24  %d = zext <2 x i32> %c to <2 x i64>
25  ret <2 x i64> %d
26}
27
28define <2 x i64> @v2i64_04812(<4 x i32> %a, <4 x i32> %b) {
29; CHECK-LABEL: v2i64_04812:
30; CHECK:       // %bb.0:
31; CHECK-NEXT:    zip1 v0.2s, v0.2s, v1.2s
32; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
33; CHECK-NEXT:    ret
34  %c = shufflevector <4 x i32> %a, <4 x i32> %b, <2 x i32> <i32 0, i32 4>
35  %d = zext <2 x i32> %c to <2 x i64>
36  ret <2 x i64> %d
37}
38
39define <2 x i64> @v2i64_15913(<4 x i32> %a, <4 x i32> %b) {
40; CHECK-LABEL: v2i64_15913:
41; CHECK:       // %bb.0:
42; CHECK-NEXT:    zip2 v0.2s, v0.2s, v1.2s
43; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
44; CHECK-NEXT:    ret
45  %c = shufflevector <4 x i32> %a, <4 x i32> %b, <2 x i32> <i32 1, i32 5>
46  %d = zext <2 x i32> %c to <2 x i64>
47  ret <2 x i64> %d
48}
49
50define <2 x i64> @v2i64_261014(<4 x i32> %a, <4 x i32> %b) {
51; CHECK-LABEL: v2i64_261014:
52; CHECK:       // %bb.0:
53; CHECK-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
54; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
55; CHECK-NEXT:    zip1 v0.2s, v0.2s, v1.2s
56; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
57; CHECK-NEXT:    ret
58  %c = shufflevector <4 x i32> %a, <4 x i32> %b, <2 x i32> <i32 2, i32 6>
59  %d = zext <2 x i32> %c to <2 x i64>
60  ret <2 x i64> %d
61}
62
63define <2 x i64> @v2i64_37(<4 x i32> %a, <4 x i32> %b) {
64; CHECK-LABEL: v2i64_37:
65; CHECK:       // %bb.0:
66; CHECK-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
67; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
68; CHECK-NEXT:    zip2 v0.2s, v0.2s, v1.2s
69; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
70; CHECK-NEXT:    ret
71  %c = shufflevector <4 x i32> %a, <4 x i32> %b, <2 x i32> <i32 3, i32 7>
72  %d = zext <2 x i32> %c to <2 x i64>
73  ret <2 x i64> %d
74}
75
76define <4 x i64> @v2i64_i16_04812(<16 x i16> %a) {
77; CHECK-LABEL: v2i64_i16_04812:
78; CHECK:       // %bb.0:
79; CHECK-NEXT:    movi v2.2d, #0x00ffff0000ffff
80; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
81; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
82; CHECK-NEXT:    ushll2 v1.2d, v0.4s, #0
83; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
84; CHECK-NEXT:    ret
85  %s1 = shufflevector <16 x i16> %a, <16 x i16> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
86  %z1 = zext <4 x i16> %s1 to <4 x i64>
87  ret <4 x i64> %z1
88}
89
90define <4 x i64> @v2i64_i16_15913(<16 x i16> %a) {
91; CHECK-LABEL: v2i64_i16_15913:
92; CHECK:       // %bb.0:
93; CHECK-NEXT:    movi v2.2d, #0x0000000000ffff
94; CHECK-NEXT:    ushr v0.2d, v0.2d, #16
95; CHECK-NEXT:    ushr v1.2d, v1.2d, #16
96; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
97; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
98; CHECK-NEXT:    ret
99  %s1 = shufflevector <16 x i16> %a, <16 x i16> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
100  %z1 = zext <4 x i16> %s1 to <4 x i64>
101  ret <4 x i64> %z1
102}
103
104define <4 x i64> @v2i64_i16_261014(<16 x i16> %a) {
105; CHECK-LABEL: v2i64_i16_261014:
106; CHECK:       // %bb.0:
107; CHECK-NEXT:    movi v2.2d, #0x00ffff0000ffff
108; CHECK-NEXT:    uzp2 v0.4s, v0.4s, v1.4s
109; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
110; CHECK-NEXT:    ushll2 v1.2d, v0.4s, #0
111; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
112; CHECK-NEXT:    ret
113  %s1 = shufflevector <16 x i16> %a, <16 x i16> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
114  %z1 = zext <4 x i16> %s1 to <4 x i64>
115  ret <4 x i64> %z1
116}
117
118define <4 x i64> @v2i64_i16_371115(<16 x i16> %a) {
119; CHECK-LABEL: v2i64_i16_371115:
120; CHECK:       // %bb.0:
121; CHECK-NEXT:    ushr v0.2d, v0.2d, #48
122; CHECK-NEXT:    ushr v1.2d, v1.2d, #48
123; CHECK-NEXT:    ret
124  %s1 = shufflevector <16 x i16> %a, <16 x i16> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
125  %z1 = zext <4 x i16> %s1 to <4 x i64>
126  ret <4 x i64> %z1
127}
128
129
130define <4 x i32> @v4i32_0246(<8 x i16> %a, <8 x i16> %b) {
131; CHECK-LABEL: v4i32_0246:
132; CHECK:       // %bb.0:
133; CHECK-NEXT:    movi v1.2d, #0x00ffff0000ffff
134; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
135; CHECK-NEXT:    ret
136  %c = shufflevector <8 x i16> %a, <8 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
137  %d = zext <4 x i16> %c to <4 x i32>
138  ret <4 x i32> %d
139}
140
141define <4 x i32> @v4i32_1357(<8 x i16> %a, <8 x i16> %b) {
142; CHECK-LABEL: v4i32_1357:
143; CHECK:       // %bb.0:
144; CHECK-NEXT:    ushr v0.4s, v0.4s, #16
145; CHECK-NEXT:    ret
146  %c = shufflevector <8 x i16> %a, <8 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
147  %d = zext <4 x i16> %c to <4 x i32>
148  ret <4 x i32> %d
149}
150
151define <4 x i32> @v4i32_04812(<8 x i16> %a, <8 x i16> %b) {
152; CHECK-LABEL: v4i32_04812:
153; CHECK:       // %bb.0:
154; CHECK-NEXT:    movi v2.2d, #0x00ffff0000ffff
155; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
156; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
157; CHECK-NEXT:    ret
158  %c = shufflevector <8 x i16> %a, <8 x i16> %b, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
159  %d = zext <4 x i16> %c to <4 x i32>
160  ret <4 x i32> %d
161}
162
163define <4 x i32> @v4i32_15913(<8 x i16> %a, <8 x i16> %b) {
164; CHECK-LABEL: v4i32_15913:
165; CHECK:       // %bb.0:
166; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
167; CHECK-NEXT:    ushr v0.4s, v0.4s, #16
168; CHECK-NEXT:    ret
169  %c = shufflevector <8 x i16> %a, <8 x i16> %b, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
170  %d = zext <4 x i16> %c to <4 x i32>
171  ret <4 x i32> %d
172}
173
174define <4 x i32> @v4i32_261014(<8 x i16> %a, <8 x i16> %b) {
175; CHECK-LABEL: v4i32_261014:
176; CHECK:       // %bb.0:
177; CHECK-NEXT:    movi v2.2d, #0x00ffff0000ffff
178; CHECK-NEXT:    uzp2 v0.4s, v0.4s, v1.4s
179; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
180; CHECK-NEXT:    ret
181  %c = shufflevector <8 x i16> %a, <8 x i16> %b, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
182  %d = zext <4 x i16> %c to <4 x i32>
183  ret <4 x i32> %d
184}
185
186define <4 x i32> @v4i32_371115(<8 x i16> %a, <8 x i16> %b) {
187; CHECK-LABEL: v4i32_371115:
188; CHECK:       // %bb.0:
189; CHECK-NEXT:    uzp2 v0.4s, v0.4s, v1.4s
190; CHECK-NEXT:    ushr v0.4s, v0.4s, #16
191; CHECK-NEXT:    ret
192  %c = shufflevector <8 x i16> %a, <8 x i16> %b, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
193  %d = zext <4 x i16> %c to <4 x i32>
194  ret <4 x i32> %d
195}
196
197
198define <8 x i16> @v8i16_0246(<16 x i8> %a, <16 x i8> %b) {
199; CHECK-LABEL: v8i16_0246:
200; CHECK:       // %bb.0:
201; CHECK-NEXT:    bic v0.8h, #255, lsl #8
202; CHECK-NEXT:    ret
203  %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
204  %d = zext <8 x i8> %c to <8 x i16>
205  ret <8 x i16> %d
206}
207
208define <8 x i16> @v8i16_1357(<16 x i8> %a, <16 x i8> %b) {
209; CHECK-LABEL: v8i16_1357:
210; CHECK:       // %bb.0:
211; CHECK-NEXT:    ushr v0.8h, v0.8h, #8
212; CHECK-NEXT:    ret
213  %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
214  %d = zext <8 x i8> %c to <8 x i16>
215  ret <8 x i16> %d
216}
217
218define <8 x i16> @v8i16_04812(<16 x i8> %a, <16 x i8> %b) {
219; CHECK-LABEL: v8i16_04812:
220; CHECK:       // %bb.0:
221; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
222; CHECK-NEXT:    bic v0.8h, #255, lsl #8
223; CHECK-NEXT:    ret
224  %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
225  %d = zext <8 x i8> %c to <8 x i16>
226  ret <8 x i16> %d
227}
228
229define <8 x i16> @v8i16_15913(<16 x i8> %a, <16 x i8> %b) {
230; CHECK-LABEL: v8i16_15913:
231; CHECK:       // %bb.0:
232; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
233; CHECK-NEXT:    ushr v0.8h, v0.8h, #8
234; CHECK-NEXT:    ret
235  %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
236  %d = zext <8 x i8> %c to <8 x i16>
237  ret <8 x i16> %d
238}
239
240define <8 x i16> @v8i16_261014(<16 x i8> %a, <16 x i8> %b) {
241; CHECK-LABEL: v8i16_261014:
242; CHECK:       // %bb.0:
243; CHECK-NEXT:    uzp2 v0.8h, v0.8h, v1.8h
244; CHECK-NEXT:    bic v0.8h, #255, lsl #8
245; CHECK-NEXT:    ret
246  %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
247  %d = zext <8 x i8> %c to <8 x i16>
248  ret <8 x i16> %d
249}
250
251define <8 x i16> @v8i16_371115(<16 x i8> %a, <16 x i8> %b) {
252; CHECK-LABEL: v8i16_371115:
253; CHECK:       // %bb.0:
254; CHECK-NEXT:    uzp2 v0.8h, v0.8h, v1.8h
255; CHECK-NEXT:    ushr v0.8h, v0.8h, #8
256; CHECK-NEXT:    ret
257  %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
258  %d = zext <8 x i8> %c to <8 x i16>
259  ret <8 x i16> %d
260}
261
262
263define <8 x i32> @v8i32_0246(<16 x i8> %a, <16 x i8> %b) {
264; CHECK-LABEL: v8i32_0246:
265; CHECK:       // %bb.0:
266; CHECK-NEXT:    bic v0.8h, #255, lsl #8
267; CHECK-NEXT:    ushll2 v1.4s, v0.8h, #0
268; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
269; CHECK-NEXT:    ret
270  %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
271  %d = zext <8 x i8> %c to <8 x i32>
272  ret <8 x i32> %d
273}
274
275define <8 x i32> @v8i32_1357(<16 x i8> %a, <16 x i8> %b) {
276; CHECK-LABEL: v8i32_1357:
277; CHECK:       // %bb.0:
278; CHECK-NEXT:    ushr v0.8h, v0.8h, #8
279; CHECK-NEXT:    ushll2 v1.4s, v0.8h, #0
280; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
281; CHECK-NEXT:    ret
282  %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
283  %d = zext <8 x i8> %c to <8 x i32>
284  ret <8 x i32> %d
285}
286
287define <8 x i32> @v8i32_04812(<16 x i8> %a, <16 x i8> %b) {
288; CHECK-LABEL: v8i32_04812:
289; CHECK:       // %bb.0:
290; CHECK-NEXT:    movi v2.2d, #0x0000ff000000ff
291; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
292; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
293; CHECK-NEXT:    ret
294  %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
295  %d = zext <8 x i8> %c to <8 x i32>
296  ret <8 x i32> %d
297}
298
299define <8 x i32> @v8i32_15913(<16 x i8> %a, <16 x i8> %b) {
300; CHECK-LABEL: v8i32_15913:
301; CHECK:       // %bb.0:
302; CHECK-NEXT:    movi v2.2d, #0x0000ff000000ff
303; CHECK-NEXT:    ushr v0.4s, v0.4s, #8
304; CHECK-NEXT:    ushr v1.4s, v1.4s, #8
305; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
306; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
307; CHECK-NEXT:    ret
308  %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
309  %d = zext <8 x i8> %c to <8 x i32>
310  ret <8 x i32> %d
311}
312
313define <8 x i32> @v8i32_261014(<16 x i8> %a, <16 x i8> %b) {
314; CHECK-LABEL: v8i32_261014:
315; CHECK:       // %bb.0:
316; CHECK-NEXT:    ushr v0.4s, v0.4s, #16
317; CHECK-NEXT:    ushr v1.4s, v1.4s, #16
318; CHECK-NEXT:    bic v0.4s, #255, lsl #8
319; CHECK-NEXT:    bic v1.4s, #255, lsl #8
320; CHECK-NEXT:    ret
321  %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
322  %d = zext <8 x i8> %c to <8 x i32>
323  ret <8 x i32> %d
324}
325
326define <8 x i32> @v8i32_371115(<16 x i8> %a, <16 x i8> %b) {
327; CHECK-LABEL: v8i32_371115:
328; CHECK:       // %bb.0:
329; CHECK-NEXT:    ushr v0.4s, v0.4s, #24
330; CHECK-NEXT:    ushr v1.4s, v1.4s, #24
331; CHECK-NEXT:    ret
332  %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
333  %d = zext <8 x i8> %c to <8 x i32>
334  ret <8 x i32> %d
335}
336
337
338define <8 x i64> @zext_add(<32 x i16> %l) {
339; CHECK-LABEL: zext_add:
340; CHECK:       // %bb.0:
341; CHECK-NEXT:    movi v4.2d, #0x00ffff0000ffff
342; CHECK-NEXT:    uzp1 v5.4s, v0.4s, v1.4s
343; CHECK-NEXT:    uzp2 v0.4s, v0.4s, v1.4s
344; CHECK-NEXT:    uzp1 v1.4s, v2.4s, v3.4s
345; CHECK-NEXT:    uzp2 v2.4s, v2.4s, v3.4s
346; CHECK-NEXT:    and v3.16b, v5.16b, v4.16b
347; CHECK-NEXT:    and v6.16b, v0.16b, v4.16b
348; CHECK-NEXT:    and v7.16b, v1.16b, v4.16b
349; CHECK-NEXT:    and v4.16b, v2.16b, v4.16b
350; CHECK-NEXT:    usra v3.4s, v5.4s, #16
351; CHECK-NEXT:    usra v6.4s, v0.4s, #16
352; CHECK-NEXT:    usra v7.4s, v1.4s, #16
353; CHECK-NEXT:    usra v4.4s, v2.4s, #16
354; CHECK-NEXT:    uaddl v0.2d, v3.2s, v6.2s
355; CHECK-NEXT:    uaddl2 v1.2d, v3.4s, v6.4s
356; CHECK-NEXT:    uaddl2 v3.2d, v7.4s, v4.4s
357; CHECK-NEXT:    uaddl v2.2d, v7.2s, v4.2s
358; CHECK-NEXT:    ret
359    %s1 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
360    %z1 = zext <8 x i16> %s1 to <8 x i64>
361    %s2 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
362    %z2 = zext <8 x i16> %s2 to <8 x i64>
363    %s3 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
364    %z3 = zext <8 x i16> %s3 to <8 x i64>
365    %s4 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
366    %z4 = zext <8 x i16> %s4 to <8 x i64>
367    %a = add <8 x i64> %z1, %z2
368    %b = add <8 x i64> %z3, %z4
369    %c = add <8 x i64> %a, %b
370    ret <8 x i64> %c
371}
372
373define <8 x i64> @zext_load_add(ptr %p) {
374; CHECK-LABEL: zext_load_add:
375; CHECK:       // %bb.0:
376; CHECK-NEXT:    ld4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x0]
377; CHECK-NEXT:    uaddl v4.4s, v0.4h, v1.4h
378; CHECK-NEXT:    uaddl v5.4s, v2.4h, v3.4h
379; CHECK-NEXT:    uaddl2 v6.4s, v0.8h, v1.8h
380; CHECK-NEXT:    uaddl2 v2.4s, v2.8h, v3.8h
381; CHECK-NEXT:    uaddl v0.2d, v4.2s, v5.2s
382; CHECK-NEXT:    uaddl2 v1.2d, v4.4s, v5.4s
383; CHECK-NEXT:    uaddl2 v3.2d, v6.4s, v2.4s
384; CHECK-NEXT:    uaddl v2.2d, v6.2s, v2.2s
385; CHECK-NEXT:    ret
386    %l = load <32 x i16>, ptr %p
387    %s1 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
388    %z1 = zext <8 x i16> %s1 to <8 x i64>
389    %s2 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
390    %z2 = zext <8 x i16> %s2 to <8 x i64>
391    %s3 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
392    %z3 = zext <8 x i16> %s3 to <8 x i64>
393    %s4 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
394    %z4 = zext <8 x i16> %s4 to <8 x i64>
395    %a = add <8 x i64> %z1, %z2
396    %b = add <8 x i64> %z3, %z4
397    %c = add <8 x i64> %a, %b
398    ret <8 x i64> %c
399}
400
401define <8 x double> @uitofp_fadd(<32 x i16> %l) {
402; CHECK-LABEL: uitofp_fadd:
403; CHECK:       // %bb.0:
404; CHECK-NEXT:    movi v4.2d, #0x0000000000ffff
405; CHECK-NEXT:    ushr v5.2d, v0.2d, #16
406; CHECK-NEXT:    ushr v6.2d, v1.2d, #16
407; CHECK-NEXT:    ushr v7.2d, v2.2d, #16
408; CHECK-NEXT:    ushr v17.2d, v3.2d, #16
409; CHECK-NEXT:    ushr v20.2d, v0.2d, #32
410; CHECK-NEXT:    ushr v22.2d, v1.2d, #32
411; CHECK-NEXT:    ushr v23.2d, v2.2d, #32
412; CHECK-NEXT:    ushr v24.2d, v3.2d, #32
413; CHECK-NEXT:    and v16.16b, v0.16b, v4.16b
414; CHECK-NEXT:    and v18.16b, v1.16b, v4.16b
415; CHECK-NEXT:    and v19.16b, v2.16b, v4.16b
416; CHECK-NEXT:    and v21.16b, v3.16b, v4.16b
417; CHECK-NEXT:    and v5.16b, v5.16b, v4.16b
418; CHECK-NEXT:    and v6.16b, v6.16b, v4.16b
419; CHECK-NEXT:    and v7.16b, v7.16b, v4.16b
420; CHECK-NEXT:    and v17.16b, v17.16b, v4.16b
421; CHECK-NEXT:    and v20.16b, v20.16b, v4.16b
422; CHECK-NEXT:    and v22.16b, v22.16b, v4.16b
423; CHECK-NEXT:    and v23.16b, v23.16b, v4.16b
424; CHECK-NEXT:    and v4.16b, v24.16b, v4.16b
425; CHECK-NEXT:    ushr v0.2d, v0.2d, #48
426; CHECK-NEXT:    ushr v1.2d, v1.2d, #48
427; CHECK-NEXT:    ushr v2.2d, v2.2d, #48
428; CHECK-NEXT:    ushr v3.2d, v3.2d, #48
429; CHECK-NEXT:    ucvtf v16.2d, v16.2d
430; CHECK-NEXT:    ucvtf v18.2d, v18.2d
431; CHECK-NEXT:    ucvtf v19.2d, v19.2d
432; CHECK-NEXT:    ucvtf v21.2d, v21.2d
433; CHECK-NEXT:    ucvtf v5.2d, v5.2d
434; CHECK-NEXT:    ucvtf v6.2d, v6.2d
435; CHECK-NEXT:    ucvtf v7.2d, v7.2d
436; CHECK-NEXT:    ucvtf v17.2d, v17.2d
437; CHECK-NEXT:    ucvtf v20.2d, v20.2d
438; CHECK-NEXT:    ucvtf v22.2d, v22.2d
439; CHECK-NEXT:    ucvtf v23.2d, v23.2d
440; CHECK-NEXT:    ucvtf v4.2d, v4.2d
441; CHECK-NEXT:    ucvtf v0.2d, v0.2d
442; CHECK-NEXT:    ucvtf v1.2d, v1.2d
443; CHECK-NEXT:    ucvtf v2.2d, v2.2d
444; CHECK-NEXT:    ucvtf v3.2d, v3.2d
445; CHECK-NEXT:    fadd v5.2d, v16.2d, v5.2d
446; CHECK-NEXT:    fadd v17.2d, v21.2d, v17.2d
447; CHECK-NEXT:    fadd v7.2d, v19.2d, v7.2d
448; CHECK-NEXT:    fadd v6.2d, v18.2d, v6.2d
449; CHECK-NEXT:    fadd v0.2d, v20.2d, v0.2d
450; CHECK-NEXT:    fadd v1.2d, v22.2d, v1.2d
451; CHECK-NEXT:    fadd v3.2d, v4.2d, v3.2d
452; CHECK-NEXT:    fadd v2.2d, v23.2d, v2.2d
453; CHECK-NEXT:    fadd v0.2d, v5.2d, v0.2d
454; CHECK-NEXT:    fadd v1.2d, v6.2d, v1.2d
455; CHECK-NEXT:    fadd v2.2d, v7.2d, v2.2d
456; CHECK-NEXT:    fadd v3.2d, v17.2d, v3.2d
457; CHECK-NEXT:    ret
458    %s1 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
459    %z1 = uitofp <8 x i16> %s1 to <8 x double>
460    %s2 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
461    %z2 = uitofp <8 x i16> %s2 to <8 x double>
462    %s3 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
463    %z3 = uitofp <8 x i16> %s3 to <8 x double>
464    %s4 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
465    %z4 = uitofp <8 x i16> %s4 to <8 x double>
466    %a = fadd <8 x double> %z1, %z2
467    %b = fadd <8 x double> %z3, %z4
468    %c = fadd <8 x double> %a, %b
469    ret <8 x double> %c
470}
471
472define <8 x double> @uitofp_load_fadd(ptr %p) {
473; CHECK-LABEL: uitofp_load_fadd:
474; CHECK:       // %bb.0:
475; CHECK-NEXT:    ldp q1, q2, [x0]
476; CHECK-NEXT:    movi v0.2d, #0x0000000000ffff
477; CHECK-NEXT:    ldp q3, q4, [x0, #32]
478; CHECK-NEXT:    ushr v5.2d, v1.2d, #16
479; CHECK-NEXT:    ushr v6.2d, v2.2d, #16
480; CHECK-NEXT:    ushr v20.2d, v1.2d, #32
481; CHECK-NEXT:    ushr v7.2d, v3.2d, #16
482; CHECK-NEXT:    ushr v17.2d, v4.2d, #16
483; CHECK-NEXT:    ushr v22.2d, v2.2d, #32
484; CHECK-NEXT:    ushr v23.2d, v3.2d, #32
485; CHECK-NEXT:    ushr v24.2d, v4.2d, #32
486; CHECK-NEXT:    and v16.16b, v1.16b, v0.16b
487; CHECK-NEXT:    and v18.16b, v2.16b, v0.16b
488; CHECK-NEXT:    and v19.16b, v3.16b, v0.16b
489; CHECK-NEXT:    and v21.16b, v4.16b, v0.16b
490; CHECK-NEXT:    and v5.16b, v5.16b, v0.16b
491; CHECK-NEXT:    and v6.16b, v6.16b, v0.16b
492; CHECK-NEXT:    and v7.16b, v7.16b, v0.16b
493; CHECK-NEXT:    and v17.16b, v17.16b, v0.16b
494; CHECK-NEXT:    and v20.16b, v20.16b, v0.16b
495; CHECK-NEXT:    and v22.16b, v22.16b, v0.16b
496; CHECK-NEXT:    and v23.16b, v23.16b, v0.16b
497; CHECK-NEXT:    and v0.16b, v24.16b, v0.16b
498; CHECK-NEXT:    ushr v1.2d, v1.2d, #48
499; CHECK-NEXT:    ushr v2.2d, v2.2d, #48
500; CHECK-NEXT:    ushr v3.2d, v3.2d, #48
501; CHECK-NEXT:    ushr v4.2d, v4.2d, #48
502; CHECK-NEXT:    ucvtf v16.2d, v16.2d
503; CHECK-NEXT:    ucvtf v18.2d, v18.2d
504; CHECK-NEXT:    ucvtf v19.2d, v19.2d
505; CHECK-NEXT:    ucvtf v21.2d, v21.2d
506; CHECK-NEXT:    ucvtf v5.2d, v5.2d
507; CHECK-NEXT:    ucvtf v6.2d, v6.2d
508; CHECK-NEXT:    ucvtf v7.2d, v7.2d
509; CHECK-NEXT:    ucvtf v17.2d, v17.2d
510; CHECK-NEXT:    ucvtf v20.2d, v20.2d
511; CHECK-NEXT:    ucvtf v22.2d, v22.2d
512; CHECK-NEXT:    ucvtf v23.2d, v23.2d
513; CHECK-NEXT:    ucvtf v0.2d, v0.2d
514; CHECK-NEXT:    ucvtf v1.2d, v1.2d
515; CHECK-NEXT:    ucvtf v2.2d, v2.2d
516; CHECK-NEXT:    ucvtf v3.2d, v3.2d
517; CHECK-NEXT:    ucvtf v4.2d, v4.2d
518; CHECK-NEXT:    fadd v6.2d, v18.2d, v6.2d
519; CHECK-NEXT:    fadd v5.2d, v16.2d, v5.2d
520; CHECK-NEXT:    fadd v17.2d, v21.2d, v17.2d
521; CHECK-NEXT:    fadd v7.2d, v19.2d, v7.2d
522; CHECK-NEXT:    fadd v1.2d, v20.2d, v1.2d
523; CHECK-NEXT:    fadd v3.2d, v23.2d, v3.2d
524; CHECK-NEXT:    fadd v2.2d, v22.2d, v2.2d
525; CHECK-NEXT:    fadd v4.2d, v0.2d, v4.2d
526; CHECK-NEXT:    fadd v0.2d, v5.2d, v1.2d
527; CHECK-NEXT:    fadd v1.2d, v6.2d, v2.2d
528; CHECK-NEXT:    fadd v2.2d, v7.2d, v3.2d
529; CHECK-NEXT:    fadd v3.2d, v17.2d, v4.2d
530; CHECK-NEXT:    ret
531    %l = load <32 x i16>, ptr %p
532    %s1 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
533    %z1 = uitofp <8 x i16> %s1 to <8 x double>
534    %s2 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
535    %z2 = uitofp <8 x i16> %s2 to <8 x double>
536    %s3 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
537    %z3 = uitofp <8 x i16> %s3 to <8 x double>
538    %s4 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
539    %z4 = uitofp <8 x i16> %s4 to <8 x double>
540    %a = fadd <8 x double> %z1, %z2
541    %b = fadd <8 x double> %z3, %z4
542    %c = fadd <8 x double> %a, %b
543    ret <8 x double> %c
544}
545
546