xref: /llvm-project/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll (revision 61510b51c33464a6bc15e4cf5b1ee07e2e0ec1c9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4
5; Test efficient codegen of vector extends up from legal type to 128 bit
6; and 256 bit vector types.
7
8; CHECK-GI:       warning: Instruction selection used fallback path for zext_v32i1
9; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for zext_v64i1
10
11;-----
12; Vectors of i16.
13;-----
14
15define <8 x i16> @func1(<8 x i8> %v0) nounwind {
16; CHECK-LABEL: func1:
17; CHECK:       // %bb.0:
18; CHECK-NEXT:    ushll.8h v0, v0, #0
19; CHECK-NEXT:    ret
20  %r = zext <8 x i8> %v0 to <8 x i16>
21  ret <8 x i16> %r
22}
23
24define <8 x i16> @func2(<8 x i8> %v0) nounwind {
25; CHECK-LABEL: func2:
26; CHECK:       // %bb.0:
27; CHECK-NEXT:    sshll.8h v0, v0, #0
28; CHECK-NEXT:    ret
29  %r = sext <8 x i8> %v0 to <8 x i16>
30  ret <8 x i16> %r
31}
32
33define <16 x i16> @func3(<16 x i8> %v0) nounwind {
34; CHECK-SD-LABEL: func3:
35; CHECK-SD:       // %bb.0:
36; CHECK-SD-NEXT:    ushll2.8h v1, v0, #0
37; CHECK-SD-NEXT:    ushll.8h v0, v0, #0
38; CHECK-SD-NEXT:    ret
39;
40; CHECK-GI-LABEL: func3:
41; CHECK-GI:       // %bb.0:
42; CHECK-GI-NEXT:    ushll.8h v2, v0, #0
43; CHECK-GI-NEXT:    ushll2.8h v1, v0, #0
44; CHECK-GI-NEXT:    mov.16b v0, v2
45; CHECK-GI-NEXT:    ret
46  %r = zext <16 x i8> %v0 to <16 x i16>
47  ret <16 x i16> %r
48}
49
50define <16 x i16> @func4(<16 x i8> %v0) nounwind {
51; CHECK-SD-LABEL: func4:
52; CHECK-SD:       // %bb.0:
53; CHECK-SD-NEXT:    sshll2.8h v1, v0, #0
54; CHECK-SD-NEXT:    sshll.8h v0, v0, #0
55; CHECK-SD-NEXT:    ret
56;
57; CHECK-GI-LABEL: func4:
58; CHECK-GI:       // %bb.0:
59; CHECK-GI-NEXT:    sshll.8h v2, v0, #0
60; CHECK-GI-NEXT:    sshll2.8h v1, v0, #0
61; CHECK-GI-NEXT:    mov.16b v0, v2
62; CHECK-GI-NEXT:    ret
63  %r = sext <16 x i8> %v0 to <16 x i16>
64  ret <16 x i16> %r
65}
66
67;-----
68; Vectors of i32.
69;-----
70
71define <4 x i32> @afunc1(<4 x i16> %v0) nounwind {
72; CHECK-LABEL: afunc1:
73; CHECK:       // %bb.0:
74; CHECK-NEXT:    ushll.4s v0, v0, #0
75; CHECK-NEXT:    ret
76  %r = zext <4 x i16> %v0 to <4 x i32>
77  ret <4 x i32> %r
78}
79
80define <4 x i32> @afunc2(<4 x i16> %v0) nounwind {
81; CHECK-LABEL: afunc2:
82; CHECK:       // %bb.0:
83; CHECK-NEXT:    sshll.4s v0, v0, #0
84; CHECK-NEXT:    ret
85  %r = sext <4 x i16> %v0 to <4 x i32>
86  ret <4 x i32> %r
87}
88
89define <8 x i32> @afunc3(<8 x i16> %v0) nounwind {
90; CHECK-SD-LABEL: afunc3:
91; CHECK-SD:       // %bb.0:
92; CHECK-SD-NEXT:    ushll2.4s v1, v0, #0
93; CHECK-SD-NEXT:    ushll.4s v0, v0, #0
94; CHECK-SD-NEXT:    ret
95;
96; CHECK-GI-LABEL: afunc3:
97; CHECK-GI:       // %bb.0:
98; CHECK-GI-NEXT:    ushll.4s v2, v0, #0
99; CHECK-GI-NEXT:    ushll2.4s v1, v0, #0
100; CHECK-GI-NEXT:    mov.16b v0, v2
101; CHECK-GI-NEXT:    ret
102  %r = zext <8 x i16> %v0 to <8 x i32>
103  ret <8 x i32> %r
104}
105
106define <8 x i32> @afunc4(<8 x i16> %v0) nounwind {
107; CHECK-SD-LABEL: afunc4:
108; CHECK-SD:       // %bb.0:
109; CHECK-SD-NEXT:    sshll2.4s v1, v0, #0
110; CHECK-SD-NEXT:    sshll.4s v0, v0, #0
111; CHECK-SD-NEXT:    ret
112;
113; CHECK-GI-LABEL: afunc4:
114; CHECK-GI:       // %bb.0:
115; CHECK-GI-NEXT:    sshll.4s v2, v0, #0
116; CHECK-GI-NEXT:    sshll2.4s v1, v0, #0
117; CHECK-GI-NEXT:    mov.16b v0, v2
118; CHECK-GI-NEXT:    ret
119  %r = sext <8 x i16> %v0 to <8 x i32>
120  ret <8 x i32> %r
121}
122
123define <8 x i32> @bfunc1(<8 x i8> %v0) nounwind {
124; CHECK-SD-LABEL: bfunc1:
125; CHECK-SD:       // %bb.0:
126; CHECK-SD-NEXT:    ushll.8h v0, v0, #0
127; CHECK-SD-NEXT:    ushll2.4s v1, v0, #0
128; CHECK-SD-NEXT:    ushll.4s v0, v0, #0
129; CHECK-SD-NEXT:    ret
130;
131; CHECK-GI-LABEL: bfunc1:
132; CHECK-GI:       // %bb.0:
133; CHECK-GI-NEXT:    ushll.8h v1, v0, #0
134; CHECK-GI-NEXT:    ushll.4s v0, v1, #0
135; CHECK-GI-NEXT:    ushll2.4s v1, v1, #0
136; CHECK-GI-NEXT:    ret
137  %r = zext <8 x i8> %v0 to <8 x i32>
138  ret <8 x i32> %r
139}
140
141define <8 x i32> @bfunc2(<8 x i8> %v0) nounwind {
142; CHECK-SD-LABEL: bfunc2:
143; CHECK-SD:       // %bb.0:
144; CHECK-SD-NEXT:    sshll.8h v0, v0, #0
145; CHECK-SD-NEXT:    sshll2.4s v1, v0, #0
146; CHECK-SD-NEXT:    sshll.4s v0, v0, #0
147; CHECK-SD-NEXT:    ret
148;
149; CHECK-GI-LABEL: bfunc2:
150; CHECK-GI:       // %bb.0:
151; CHECK-GI-NEXT:    sshll.8h v1, v0, #0
152; CHECK-GI-NEXT:    sshll.4s v0, v1, #0
153; CHECK-GI-NEXT:    sshll2.4s v1, v1, #0
154; CHECK-GI-NEXT:    ret
155  %r = sext <8 x i8> %v0 to <8 x i32>
156  ret <8 x i32> %r
157}
158
159;-----
160; Vectors of i64.
161;-----
162
163define <4 x i64> @zfunc1(<4 x i32> %v0) nounwind {
164; CHECK-SD-LABEL: zfunc1:
165; CHECK-SD:       // %bb.0:
166; CHECK-SD-NEXT:    ushll2.2d v1, v0, #0
167; CHECK-SD-NEXT:    ushll.2d v0, v0, #0
168; CHECK-SD-NEXT:    ret
169;
170; CHECK-GI-LABEL: zfunc1:
171; CHECK-GI:       // %bb.0:
172; CHECK-GI-NEXT:    ushll.2d v2, v0, #0
173; CHECK-GI-NEXT:    ushll2.2d v1, v0, #0
174; CHECK-GI-NEXT:    mov.16b v0, v2
175; CHECK-GI-NEXT:    ret
176  %r = zext <4 x i32> %v0 to <4 x i64>
177  ret <4 x i64> %r
178}
179
180define <4 x i64> @zfunc2(<4 x i32> %v0) nounwind {
181; CHECK-SD-LABEL: zfunc2:
182; CHECK-SD:       // %bb.0:
183; CHECK-SD-NEXT:    sshll2.2d v1, v0, #0
184; CHECK-SD-NEXT:    sshll.2d v0, v0, #0
185; CHECK-SD-NEXT:    ret
186;
187; CHECK-GI-LABEL: zfunc2:
188; CHECK-GI:       // %bb.0:
189; CHECK-GI-NEXT:    sshll.2d v2, v0, #0
190; CHECK-GI-NEXT:    sshll2.2d v1, v0, #0
191; CHECK-GI-NEXT:    mov.16b v0, v2
192; CHECK-GI-NEXT:    ret
193  %r = sext <4 x i32> %v0 to <4 x i64>
194  ret <4 x i64> %r
195}
196
197define <4 x i64> @bfunc3(<4 x i16> %v0) nounwind {
198; CHECK-SD-LABEL: bfunc3:
199; CHECK-SD:       // %bb.0:
200; CHECK-SD-NEXT:    ushll.4s v0, v0, #0
201; CHECK-SD-NEXT:    ushll2.2d v1, v0, #0
202; CHECK-SD-NEXT:    ushll.2d v0, v0, #0
203; CHECK-SD-NEXT:    ret
204;
205; CHECK-GI-LABEL: bfunc3:
206; CHECK-GI:       // %bb.0:
207; CHECK-GI-NEXT:    ushll.4s v1, v0, #0
208; CHECK-GI-NEXT:    ushll.2d v0, v1, #0
209; CHECK-GI-NEXT:    ushll2.2d v1, v1, #0
210; CHECK-GI-NEXT:    ret
211  %r = zext <4 x i16> %v0 to <4 x i64>
212  ret <4 x i64> %r
213}
214
215define <4 x i64> @cfunc4(<4 x i16> %v0) nounwind {
216; CHECK-SD-LABEL: cfunc4:
217; CHECK-SD:       // %bb.0:
218; CHECK-SD-NEXT:    sshll.4s v0, v0, #0
219; CHECK-SD-NEXT:    sshll2.2d v1, v0, #0
220; CHECK-SD-NEXT:    sshll.2d v0, v0, #0
221; CHECK-SD-NEXT:    ret
222;
223; CHECK-GI-LABEL: cfunc4:
224; CHECK-GI:       // %bb.0:
225; CHECK-GI-NEXT:    sshll.4s v1, v0, #0
226; CHECK-GI-NEXT:    sshll.2d v0, v1, #0
227; CHECK-GI-NEXT:    sshll2.2d v1, v1, #0
228; CHECK-GI-NEXT:    ret
229  %r = sext <4 x i16> %v0 to <4 x i64>
230  ret <4 x i64> %r
231}
232
233define <4 x i64> @zext_v4i8_to_v4i64(<4 x i8> %v0) nounwind {
234; CHECK-SD-LABEL: zext_v4i8_to_v4i64:
235; CHECK-SD:       // %bb.0:
236; CHECK-SD-NEXT:    bic.4h v0, #255, lsl #8
237; CHECK-SD-NEXT:    ushll.4s v0, v0, #0
238; CHECK-SD-NEXT:    ushll2.2d v1, v0, #0
239; CHECK-SD-NEXT:    ushll.2d v0, v0, #0
240; CHECK-SD-NEXT:    ret
241;
242; CHECK-GI-LABEL: zext_v4i8_to_v4i64:
243; CHECK-GI:       // %bb.0:
244; CHECK-GI-NEXT:    ushll.4s v0, v0, #0
245; CHECK-GI-NEXT:    movi.2d v1, #0x000000000000ff
246; CHECK-GI-NEXT:    ushll.2d v2, v0, #0
247; CHECK-GI-NEXT:    ushll2.2d v3, v0, #0
248; CHECK-GI-NEXT:    and.16b v0, v2, v1
249; CHECK-GI-NEXT:    and.16b v1, v3, v1
250; CHECK-GI-NEXT:    ret
251  %r = zext <4 x i8> %v0 to <4 x i64>
252  ret <4 x i64> %r
253}
254
255define <4 x i64> @sext_v4i8_to_v4i64(<4 x i8> %v0) nounwind {
256; CHECK-SD-LABEL: sext_v4i8_to_v4i64:
257; CHECK-SD:       // %bb.0:
258; CHECK-SD-NEXT:    ushll.4s v0, v0, #0
259; CHECK-SD-NEXT:    ushll.2d v1, v0, #0
260; CHECK-SD-NEXT:    ushll2.2d v0, v0, #0
261; CHECK-SD-NEXT:    shl.2d v0, v0, #56
262; CHECK-SD-NEXT:    shl.2d v2, v1, #56
263; CHECK-SD-NEXT:    sshr.2d v1, v0, #56
264; CHECK-SD-NEXT:    sshr.2d v0, v2, #56
265; CHECK-SD-NEXT:    ret
266;
267; CHECK-GI-LABEL: sext_v4i8_to_v4i64:
268; CHECK-GI:       // %bb.0:
269; CHECK-GI-NEXT:    ushll.4s v0, v0, #0
270; CHECK-GI-NEXT:    ushll.2d v1, v0, #0
271; CHECK-GI-NEXT:    ushll2.2d v0, v0, #0
272; CHECK-GI-NEXT:    shl.2d v1, v1, #56
273; CHECK-GI-NEXT:    shl.2d v2, v0, #56
274; CHECK-GI-NEXT:    sshr.2d v0, v1, #56
275; CHECK-GI-NEXT:    sshr.2d v1, v2, #56
276; CHECK-GI-NEXT:    ret
277  %r = sext <4 x i8> %v0 to <4 x i64>
278  ret <4 x i64> %r
279}
280
281define <8 x i64> @zext_v8i8_to_v8i64(<8 x i8> %v0) nounwind {
282; CHECK-SD-LABEL: zext_v8i8_to_v8i64:
283; CHECK-SD:       // %bb.0:
284; CHECK-SD-NEXT:    ushll.8h v0, v0, #0
285; CHECK-SD-NEXT:    ushll.4s v1, v0, #0
286; CHECK-SD-NEXT:    ushll2.4s v2, v0, #0
287; CHECK-SD-NEXT:    ushll.2d v0, v1, #0
288; CHECK-SD-NEXT:    ushll2.2d v3, v2, #0
289; CHECK-SD-NEXT:    ushll2.2d v1, v1, #0
290; CHECK-SD-NEXT:    ushll.2d v2, v2, #0
291; CHECK-SD-NEXT:    ret
292;
293; CHECK-GI-LABEL: zext_v8i8_to_v8i64:
294; CHECK-GI:       // %bb.0:
295; CHECK-GI-NEXT:    ushll.8h v0, v0, #0
296; CHECK-GI-NEXT:    ushll.4s v1, v0, #0
297; CHECK-GI-NEXT:    ushll2.4s v3, v0, #0
298; CHECK-GI-NEXT:    ushll.2d v0, v1, #0
299; CHECK-GI-NEXT:    ushll2.2d v1, v1, #0
300; CHECK-GI-NEXT:    ushll.2d v2, v3, #0
301; CHECK-GI-NEXT:    ushll2.2d v3, v3, #0
302; CHECK-GI-NEXT:    ret
303  %r = zext <8 x i8> %v0 to <8 x i64>
304  ret <8 x i64> %r
305}
306
307define <8 x i64> @sext_v8i8_to_v8i64(<8 x i8> %v0) nounwind {
308; CHECK-SD-LABEL: sext_v8i8_to_v8i64:
309; CHECK-SD:       // %bb.0:
310; CHECK-SD-NEXT:    sshll.8h v0, v0, #0
311; CHECK-SD-NEXT:    sshll.4s v1, v0, #0
312; CHECK-SD-NEXT:    sshll2.4s v2, v0, #0
313; CHECK-SD-NEXT:    sshll.2d v0, v1, #0
314; CHECK-SD-NEXT:    sshll2.2d v3, v2, #0
315; CHECK-SD-NEXT:    sshll2.2d v1, v1, #0
316; CHECK-SD-NEXT:    sshll.2d v2, v2, #0
317; CHECK-SD-NEXT:    ret
318;
319; CHECK-GI-LABEL: sext_v8i8_to_v8i64:
320; CHECK-GI:       // %bb.0:
321; CHECK-GI-NEXT:    sshll.8h v0, v0, #0
322; CHECK-GI-NEXT:    sshll.4s v1, v0, #0
323; CHECK-GI-NEXT:    sshll2.4s v3, v0, #0
324; CHECK-GI-NEXT:    sshll.2d v0, v1, #0
325; CHECK-GI-NEXT:    sshll2.2d v1, v1, #0
326; CHECK-GI-NEXT:    sshll.2d v2, v3, #0
327; CHECK-GI-NEXT:    sshll2.2d v3, v3, #0
328; CHECK-GI-NEXT:    ret
329  %r = sext <8 x i8> %v0 to <8 x i64>
330  ret <8 x i64> %r
331}
332
333; Extends of vectors of i1.
334
335define <32 x i8> @zext_v32i1(<32 x i1> %arg) {
336; CHECK-LABEL: zext_v32i1:
337; CHECK:       // %bb.0:
338; CHECK-NEXT:    ldr w8, [sp, #64]
339; CHECK-NEXT:    fmov s0, w0
340; CHECK-NEXT:    ldr w9, [sp, #72]
341; CHECK-NEXT:    movi.16b v2, #1
342; CHECK-NEXT:    fmov s1, w8
343; CHECK-NEXT:    ldr w8, [sp, #80]
344; CHECK-NEXT:    mov.b v0[1], w1
345; CHECK-NEXT:    mov.b v1[1], w9
346; CHECK-NEXT:    ldr w9, [sp]
347; CHECK-NEXT:    mov.b v0[2], w2
348; CHECK-NEXT:    mov.b v1[2], w8
349; CHECK-NEXT:    ldr w8, [sp, #88]
350; CHECK-NEXT:    mov.b v0[3], w3
351; CHECK-NEXT:    mov.b v1[3], w8
352; CHECK-NEXT:    ldr w8, [sp, #96]
353; CHECK-NEXT:    mov.b v0[4], w4
354; CHECK-NEXT:    mov.b v1[4], w8
355; CHECK-NEXT:    ldr w8, [sp, #104]
356; CHECK-NEXT:    mov.b v0[5], w5
357; CHECK-NEXT:    mov.b v1[5], w8
358; CHECK-NEXT:    ldr w8, [sp, #112]
359; CHECK-NEXT:    mov.b v0[6], w6
360; CHECK-NEXT:    mov.b v1[6], w8
361; CHECK-NEXT:    ldr w8, [sp, #120]
362; CHECK-NEXT:    mov.b v0[7], w7
363; CHECK-NEXT:    mov.b v1[7], w8
364; CHECK-NEXT:    ldr w8, [sp, #128]
365; CHECK-NEXT:    mov.b v0[8], w9
366; CHECK-NEXT:    ldr w9, [sp, #8]
367; CHECK-NEXT:    mov.b v1[8], w8
368; CHECK-NEXT:    ldr w8, [sp, #136]
369; CHECK-NEXT:    mov.b v0[9], w9
370; CHECK-NEXT:    ldr w9, [sp, #16]
371; CHECK-NEXT:    mov.b v1[9], w8
372; CHECK-NEXT:    ldr w8, [sp, #144]
373; CHECK-NEXT:    mov.b v0[10], w9
374; CHECK-NEXT:    ldr w9, [sp, #24]
375; CHECK-NEXT:    mov.b v1[10], w8
376; CHECK-NEXT:    ldr w8, [sp, #152]
377; CHECK-NEXT:    mov.b v0[11], w9
378; CHECK-NEXT:    ldr w9, [sp, #32]
379; CHECK-NEXT:    mov.b v1[11], w8
380; CHECK-NEXT:    ldr w8, [sp, #160]
381; CHECK-NEXT:    mov.b v0[12], w9
382; CHECK-NEXT:    ldr w9, [sp, #40]
383; CHECK-NEXT:    mov.b v1[12], w8
384; CHECK-NEXT:    ldr w8, [sp, #168]
385; CHECK-NEXT:    mov.b v0[13], w9
386; CHECK-NEXT:    ldr w9, [sp, #48]
387; CHECK-NEXT:    mov.b v1[13], w8
388; CHECK-NEXT:    ldr w8, [sp, #176]
389; CHECK-NEXT:    mov.b v0[14], w9
390; CHECK-NEXT:    ldr w9, [sp, #56]
391; CHECK-NEXT:    mov.b v1[14], w8
392; CHECK-NEXT:    ldr w8, [sp, #184]
393; CHECK-NEXT:    mov.b v0[15], w9
394; CHECK-NEXT:    mov.b v1[15], w8
395; CHECK-NEXT:    and.16b v0, v0, v2
396; CHECK-NEXT:    and.16b v1, v1, v2
397; CHECK-NEXT:    ret
398  %res = zext <32 x i1> %arg to <32 x i8>
399  ret <32 x i8> %res
400}
401
402define <32 x i8> @sext_v32i1(<32 x i1> %arg) {
403; CHECK-SD-LABEL: sext_v32i1:
404; CHECK-SD:       // %bb.0:
405; CHECK-SD-NEXT:    ldr w8, [sp, #64]
406; CHECK-SD-NEXT:    fmov s1, w0
407; CHECK-SD-NEXT:    ldr w9, [sp, #72]
408; CHECK-SD-NEXT:    fmov s0, w8
409; CHECK-SD-NEXT:    ldr w8, [sp, #80]
410; CHECK-SD-NEXT:    mov.b v1[1], w1
411; CHECK-SD-NEXT:    mov.b v0[1], w9
412; CHECK-SD-NEXT:    ldr w9, [sp]
413; CHECK-SD-NEXT:    mov.b v1[2], w2
414; CHECK-SD-NEXT:    mov.b v0[2], w8
415; CHECK-SD-NEXT:    ldr w8, [sp, #88]
416; CHECK-SD-NEXT:    mov.b v1[3], w3
417; CHECK-SD-NEXT:    mov.b v0[3], w8
418; CHECK-SD-NEXT:    ldr w8, [sp, #96]
419; CHECK-SD-NEXT:    mov.b v1[4], w4
420; CHECK-SD-NEXT:    mov.b v0[4], w8
421; CHECK-SD-NEXT:    ldr w8, [sp, #104]
422; CHECK-SD-NEXT:    mov.b v1[5], w5
423; CHECK-SD-NEXT:    mov.b v0[5], w8
424; CHECK-SD-NEXT:    ldr w8, [sp, #112]
425; CHECK-SD-NEXT:    mov.b v1[6], w6
426; CHECK-SD-NEXT:    mov.b v0[6], w8
427; CHECK-SD-NEXT:    ldr w8, [sp, #120]
428; CHECK-SD-NEXT:    mov.b v1[7], w7
429; CHECK-SD-NEXT:    mov.b v0[7], w8
430; CHECK-SD-NEXT:    ldr w8, [sp, #128]
431; CHECK-SD-NEXT:    mov.b v1[8], w9
432; CHECK-SD-NEXT:    ldr w9, [sp, #8]
433; CHECK-SD-NEXT:    mov.b v0[8], w8
434; CHECK-SD-NEXT:    ldr w8, [sp, #136]
435; CHECK-SD-NEXT:    mov.b v1[9], w9
436; CHECK-SD-NEXT:    ldr w9, [sp, #16]
437; CHECK-SD-NEXT:    mov.b v0[9], w8
438; CHECK-SD-NEXT:    ldr w8, [sp, #144]
439; CHECK-SD-NEXT:    mov.b v1[10], w9
440; CHECK-SD-NEXT:    ldr w9, [sp, #24]
441; CHECK-SD-NEXT:    mov.b v0[10], w8
442; CHECK-SD-NEXT:    ldr w8, [sp, #152]
443; CHECK-SD-NEXT:    mov.b v1[11], w9
444; CHECK-SD-NEXT:    ldr w9, [sp, #32]
445; CHECK-SD-NEXT:    mov.b v0[11], w8
446; CHECK-SD-NEXT:    ldr w8, [sp, #160]
447; CHECK-SD-NEXT:    mov.b v1[12], w9
448; CHECK-SD-NEXT:    ldr w9, [sp, #40]
449; CHECK-SD-NEXT:    mov.b v0[12], w8
450; CHECK-SD-NEXT:    ldr w8, [sp, #168]
451; CHECK-SD-NEXT:    mov.b v1[13], w9
452; CHECK-SD-NEXT:    ldr w9, [sp, #48]
453; CHECK-SD-NEXT:    mov.b v0[13], w8
454; CHECK-SD-NEXT:    ldr w8, [sp, #176]
455; CHECK-SD-NEXT:    mov.b v1[14], w9
456; CHECK-SD-NEXT:    ldr w9, [sp, #56]
457; CHECK-SD-NEXT:    mov.b v0[14], w8
458; CHECK-SD-NEXT:    ldr w8, [sp, #184]
459; CHECK-SD-NEXT:    mov.b v1[15], w9
460; CHECK-SD-NEXT:    mov.b v0[15], w8
461; CHECK-SD-NEXT:    shl.16b v1, v1, #7
462; CHECK-SD-NEXT:    shl.16b v2, v0, #7
463; CHECK-SD-NEXT:    cmlt.16b v0, v1, #0
464; CHECK-SD-NEXT:    cmlt.16b v1, v2, #0
465; CHECK-SD-NEXT:    ret
466;
467; CHECK-GI-LABEL: sext_v32i1:
468; CHECK-GI:       // %bb.0:
469; CHECK-GI-NEXT:    ldr w8, [sp, #64]
470; CHECK-GI-NEXT:    fmov s0, w0
471; CHECK-GI-NEXT:    ldr w9, [sp, #72]
472; CHECK-GI-NEXT:    fmov s1, w8
473; CHECK-GI-NEXT:    ldr w8, [sp, #80]
474; CHECK-GI-NEXT:    mov.b v0[1], w1
475; CHECK-GI-NEXT:    mov.b v1[1], w9
476; CHECK-GI-NEXT:    ldr w9, [sp, #128]
477; CHECK-GI-NEXT:    mov.b v0[2], w2
478; CHECK-GI-NEXT:    mov.b v1[2], w8
479; CHECK-GI-NEXT:    ldr w8, [sp, #88]
480; CHECK-GI-NEXT:    mov.b v0[3], w3
481; CHECK-GI-NEXT:    mov.b v1[3], w8
482; CHECK-GI-NEXT:    ldr w8, [sp, #96]
483; CHECK-GI-NEXT:    mov.b v0[4], w4
484; CHECK-GI-NEXT:    mov.b v1[4], w8
485; CHECK-GI-NEXT:    ldr w8, [sp, #104]
486; CHECK-GI-NEXT:    mov.b v0[5], w5
487; CHECK-GI-NEXT:    mov.b v1[5], w8
488; CHECK-GI-NEXT:    ldr w8, [sp, #112]
489; CHECK-GI-NEXT:    mov.b v0[6], w6
490; CHECK-GI-NEXT:    mov.b v1[6], w8
491; CHECK-GI-NEXT:    ldr w8, [sp, #120]
492; CHECK-GI-NEXT:    mov.b v0[7], w7
493; CHECK-GI-NEXT:    mov.b v1[7], w8
494; CHECK-GI-NEXT:    ldr w8, [sp]
495; CHECK-GI-NEXT:    mov.b v0[8], w8
496; CHECK-GI-NEXT:    ldr w8, [sp, #8]
497; CHECK-GI-NEXT:    mov.b v1[8], w9
498; CHECK-GI-NEXT:    ldr w9, [sp, #136]
499; CHECK-GI-NEXT:    mov.b v0[9], w8
500; CHECK-GI-NEXT:    ldr w8, [sp, #16]
501; CHECK-GI-NEXT:    mov.b v1[9], w9
502; CHECK-GI-NEXT:    ldr w9, [sp, #144]
503; CHECK-GI-NEXT:    mov.b v0[10], w8
504; CHECK-GI-NEXT:    ldr w8, [sp, #24]
505; CHECK-GI-NEXT:    mov.b v1[10], w9
506; CHECK-GI-NEXT:    ldr w9, [sp, #152]
507; CHECK-GI-NEXT:    mov.b v0[11], w8
508; CHECK-GI-NEXT:    ldr w8, [sp, #32]
509; CHECK-GI-NEXT:    mov.b v1[11], w9
510; CHECK-GI-NEXT:    ldr w9, [sp, #160]
511; CHECK-GI-NEXT:    mov.b v0[12], w8
512; CHECK-GI-NEXT:    ldr w8, [sp, #40]
513; CHECK-GI-NEXT:    mov.b v1[12], w9
514; CHECK-GI-NEXT:    ldr w9, [sp, #168]
515; CHECK-GI-NEXT:    mov.b v0[13], w8
516; CHECK-GI-NEXT:    ldr w8, [sp, #48]
517; CHECK-GI-NEXT:    mov.b v1[13], w9
518; CHECK-GI-NEXT:    ldr w9, [sp, #176]
519; CHECK-GI-NEXT:    mov.b v0[14], w8
520; CHECK-GI-NEXT:    ldr w8, [sp, #56]
521; CHECK-GI-NEXT:    mov.b v1[14], w9
522; CHECK-GI-NEXT:    ldr w9, [sp, #184]
523; CHECK-GI-NEXT:    mov.b v0[15], w8
524; CHECK-GI-NEXT:    mov.b v1[15], w9
525; CHECK-GI-NEXT:    shl.16b v0, v0, #7
526; CHECK-GI-NEXT:    shl.16b v1, v1, #7
527; CHECK-GI-NEXT:    sshr.16b v0, v0, #7
528; CHECK-GI-NEXT:    sshr.16b v1, v1, #7
529; CHECK-GI-NEXT:    ret
530  %res = sext <32 x i1> %arg to <32 x i8>
531  ret <32 x i8> %res
532}
533
534define <64 x i8> @zext_v64i1(<64 x i1> %arg) {
535; CHECK-LABEL: zext_v64i1:
536; CHECK:       // %bb.0:
537; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
538; CHECK-NEXT:    .cfi_def_cfa_offset 16
539; CHECK-NEXT:    .cfi_offset w29, -16
540; CHECK-NEXT:    ldr w8, [sp, #336]
541; CHECK-NEXT:    ldr w9, [sp, #208]
542; CHECK-NEXT:    fmov s0, w0
543; CHECK-NEXT:    ldr w10, [sp, #80]
544; CHECK-NEXT:    ldr w11, [sp, #216]
545; CHECK-NEXT:    movi.16b v4, #1
546; CHECK-NEXT:    fmov s3, w8
547; CHECK-NEXT:    fmov s2, w9
548; CHECK-NEXT:    ldr w8, [sp, #344]
549; CHECK-NEXT:    fmov s1, w10
550; CHECK-NEXT:    ldr w12, [sp, #88]
551; CHECK-NEXT:    mov.b v0[1], w1
552; CHECK-NEXT:    ldr w9, [sp, #224]
553; CHECK-NEXT:    ldr w10, [sp, #96]
554; CHECK-NEXT:    mov.b v3[1], w8
555; CHECK-NEXT:    mov.b v2[1], w11
556; CHECK-NEXT:    ldr w8, [sp, #352]
557; CHECK-NEXT:    mov.b v1[1], w12
558; CHECK-NEXT:    ldr w11, [sp, #144]
559; CHECK-NEXT:    mov.b v0[2], w2
560; CHECK-NEXT:    mov.b v3[2], w8
561; CHECK-NEXT:    mov.b v2[2], w9
562; CHECK-NEXT:    ldr w8, [sp, #360]
563; CHECK-NEXT:    mov.b v1[2], w10
564; CHECK-NEXT:    ldr w9, [sp, #232]
565; CHECK-NEXT:    ldr w10, [sp, #104]
566; CHECK-NEXT:    mov.b v0[3], w3
567; CHECK-NEXT:    mov.b v3[3], w8
568; CHECK-NEXT:    mov.b v2[3], w9
569; CHECK-NEXT:    ldr w8, [sp, #368]
570; CHECK-NEXT:    mov.b v1[3], w10
571; CHECK-NEXT:    ldr w9, [sp, #240]
572; CHECK-NEXT:    ldr w10, [sp, #112]
573; CHECK-NEXT:    mov.b v0[4], w4
574; CHECK-NEXT:    mov.b v3[4], w8
575; CHECK-NEXT:    mov.b v2[4], w9
576; CHECK-NEXT:    ldr w8, [sp, #376]
577; CHECK-NEXT:    mov.b v1[4], w10
578; CHECK-NEXT:    ldr w9, [sp, #248]
579; CHECK-NEXT:    ldr w10, [sp, #120]
580; CHECK-NEXT:    mov.b v0[5], w5
581; CHECK-NEXT:    mov.b v3[5], w8
582; CHECK-NEXT:    mov.b v2[5], w9
583; CHECK-NEXT:    ldr w8, [sp, #384]
584; CHECK-NEXT:    mov.b v1[5], w10
585; CHECK-NEXT:    ldr w9, [sp, #256]
586; CHECK-NEXT:    ldr w10, [sp, #128]
587; CHECK-NEXT:    mov.b v0[6], w6
588; CHECK-NEXT:    mov.b v3[6], w8
589; CHECK-NEXT:    mov.b v2[6], w9
590; CHECK-NEXT:    ldr w8, [sp, #392]
591; CHECK-NEXT:    mov.b v1[6], w10
592; CHECK-NEXT:    ldr w9, [sp, #264]
593; CHECK-NEXT:    ldr w10, [sp, #136]
594; CHECK-NEXT:    mov.b v0[7], w7
595; CHECK-NEXT:    mov.b v3[7], w8
596; CHECK-NEXT:    mov.b v2[7], w9
597; CHECK-NEXT:    ldr w8, [sp, #16]
598; CHECK-NEXT:    mov.b v1[7], w10
599; CHECK-NEXT:    ldr w9, [sp, #400]
600; CHECK-NEXT:    ldr w10, [sp, #272]
601; CHECK-NEXT:    mov.b v0[8], w8
602; CHECK-NEXT:    ldr w8, [sp, #24]
603; CHECK-NEXT:    mov.b v3[8], w9
604; CHECK-NEXT:    mov.b v2[8], w10
605; CHECK-NEXT:    ldr w9, [sp, #408]
606; CHECK-NEXT:    mov.b v1[8], w11
607; CHECK-NEXT:    ldr w10, [sp, #280]
608; CHECK-NEXT:    ldr w11, [sp, #152]
609; CHECK-NEXT:    mov.b v0[9], w8
610; CHECK-NEXT:    ldr w8, [sp, #32]
611; CHECK-NEXT:    mov.b v3[9], w9
612; CHECK-NEXT:    mov.b v2[9], w10
613; CHECK-NEXT:    ldr w9, [sp, #416]
614; CHECK-NEXT:    mov.b v1[9], w11
615; CHECK-NEXT:    ldr w10, [sp, #288]
616; CHECK-NEXT:    ldr w11, [sp, #160]
617; CHECK-NEXT:    mov.b v0[10], w8
618; CHECK-NEXT:    ldr w8, [sp, #40]
619; CHECK-NEXT:    mov.b v3[10], w9
620; CHECK-NEXT:    mov.b v2[10], w10
621; CHECK-NEXT:    ldr w9, [sp, #424]
622; CHECK-NEXT:    mov.b v1[10], w11
623; CHECK-NEXT:    ldr w10, [sp, #296]
624; CHECK-NEXT:    ldr w11, [sp, #168]
625; CHECK-NEXT:    mov.b v0[11], w8
626; CHECK-NEXT:    ldr w8, [sp, #48]
627; CHECK-NEXT:    mov.b v3[11], w9
628; CHECK-NEXT:    mov.b v2[11], w10
629; CHECK-NEXT:    ldr w9, [sp, #432]
630; CHECK-NEXT:    mov.b v1[11], w11
631; CHECK-NEXT:    ldr w10, [sp, #304]
632; CHECK-NEXT:    ldr w11, [sp, #176]
633; CHECK-NEXT:    mov.b v0[12], w8
634; CHECK-NEXT:    ldr w8, [sp, #56]
635; CHECK-NEXT:    mov.b v3[12], w9
636; CHECK-NEXT:    mov.b v2[12], w10
637; CHECK-NEXT:    ldr w9, [sp, #440]
638; CHECK-NEXT:    mov.b v1[12], w11
639; CHECK-NEXT:    ldr w10, [sp, #312]
640; CHECK-NEXT:    ldr w11, [sp, #184]
641; CHECK-NEXT:    mov.b v0[13], w8
642; CHECK-NEXT:    ldr w8, [sp, #64]
643; CHECK-NEXT:    mov.b v3[13], w9
644; CHECK-NEXT:    mov.b v2[13], w10
645; CHECK-NEXT:    ldr w9, [sp, #448]
646; CHECK-NEXT:    mov.b v1[13], w11
647; CHECK-NEXT:    ldr w10, [sp, #320]
648; CHECK-NEXT:    ldr w11, [sp, #192]
649; CHECK-NEXT:    mov.b v0[14], w8
650; CHECK-NEXT:    ldr w8, [sp, #72]
651; CHECK-NEXT:    mov.b v3[14], w9
652; CHECK-NEXT:    mov.b v2[14], w10
653; CHECK-NEXT:    ldr w9, [sp, #456]
654; CHECK-NEXT:    mov.b v1[14], w11
655; CHECK-NEXT:    ldr w10, [sp, #328]
656; CHECK-NEXT:    ldr w11, [sp, #200]
657; CHECK-NEXT:    mov.b v0[15], w8
658; CHECK-NEXT:    mov.b v3[15], w9
659; CHECK-NEXT:    mov.b v2[15], w10
660; CHECK-NEXT:    mov.b v1[15], w11
661; CHECK-NEXT:    and.16b v0, v0, v4
662; CHECK-NEXT:    and.16b v2, v2, v4
663; CHECK-NEXT:    and.16b v3, v3, v4
664; CHECK-NEXT:    and.16b v1, v1, v4
665; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
666; CHECK-NEXT:    ret
667  %res = zext <64 x i1> %arg to <64 x i8>
668  ret <64 x i8> %res
669}
670
671define <64 x i8> @sext_v64i1(<64 x i1> %arg) {
672; CHECK-SD-LABEL: sext_v64i1:
673; CHECK-SD:       // %bb.0:
674; CHECK-SD-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
675; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
676; CHECK-SD-NEXT:    .cfi_offset w29, -16
677; CHECK-SD-NEXT:    ldr w8, [sp, #336]
678; CHECK-SD-NEXT:    ldr w9, [sp, #208]
679; CHECK-SD-NEXT:    fmov s2, w0
680; CHECK-SD-NEXT:    ldr w10, [sp, #80]
681; CHECK-SD-NEXT:    ldr w11, [sp, #216]
682; CHECK-SD-NEXT:    ldr w12, [sp, #88]
683; CHECK-SD-NEXT:    fmov s0, w8
684; CHECK-SD-NEXT:    fmov s1, w9
685; CHECK-SD-NEXT:    ldr w8, [sp, #344]
686; CHECK-SD-NEXT:    fmov s3, w10
687; CHECK-SD-NEXT:    mov.b v2[1], w1
688; CHECK-SD-NEXT:    ldr w9, [sp, #224]
689; CHECK-SD-NEXT:    ldr w10, [sp, #96]
690; CHECK-SD-NEXT:    mov.b v0[1], w8
691; CHECK-SD-NEXT:    mov.b v1[1], w11
692; CHECK-SD-NEXT:    ldr w8, [sp, #352]
693; CHECK-SD-NEXT:    mov.b v3[1], w12
694; CHECK-SD-NEXT:    ldr w11, [sp, #144]
695; CHECK-SD-NEXT:    mov.b v2[2], w2
696; CHECK-SD-NEXT:    mov.b v0[2], w8
697; CHECK-SD-NEXT:    mov.b v1[2], w9
698; CHECK-SD-NEXT:    ldr w8, [sp, #360]
699; CHECK-SD-NEXT:    mov.b v3[2], w10
700; CHECK-SD-NEXT:    ldr w9, [sp, #232]
701; CHECK-SD-NEXT:    ldr w10, [sp, #104]
702; CHECK-SD-NEXT:    mov.b v2[3], w3
703; CHECK-SD-NEXT:    mov.b v0[3], w8
704; CHECK-SD-NEXT:    mov.b v1[3], w9
705; CHECK-SD-NEXT:    ldr w8, [sp, #368]
706; CHECK-SD-NEXT:    mov.b v3[3], w10
707; CHECK-SD-NEXT:    ldr w9, [sp, #240]
708; CHECK-SD-NEXT:    ldr w10, [sp, #112]
709; CHECK-SD-NEXT:    mov.b v2[4], w4
710; CHECK-SD-NEXT:    mov.b v0[4], w8
711; CHECK-SD-NEXT:    mov.b v1[4], w9
712; CHECK-SD-NEXT:    ldr w8, [sp, #376]
713; CHECK-SD-NEXT:    mov.b v3[4], w10
714; CHECK-SD-NEXT:    ldr w9, [sp, #248]
715; CHECK-SD-NEXT:    ldr w10, [sp, #120]
716; CHECK-SD-NEXT:    mov.b v2[5], w5
717; CHECK-SD-NEXT:    mov.b v0[5], w8
718; CHECK-SD-NEXT:    mov.b v1[5], w9
719; CHECK-SD-NEXT:    ldr w8, [sp, #384]
720; CHECK-SD-NEXT:    mov.b v3[5], w10
721; CHECK-SD-NEXT:    ldr w9, [sp, #256]
722; CHECK-SD-NEXT:    ldr w10, [sp, #128]
723; CHECK-SD-NEXT:    mov.b v2[6], w6
724; CHECK-SD-NEXT:    mov.b v0[6], w8
725; CHECK-SD-NEXT:    mov.b v1[6], w9
726; CHECK-SD-NEXT:    ldr w8, [sp, #392]
727; CHECK-SD-NEXT:    mov.b v3[6], w10
728; CHECK-SD-NEXT:    ldr w9, [sp, #264]
729; CHECK-SD-NEXT:    ldr w10, [sp, #136]
730; CHECK-SD-NEXT:    mov.b v2[7], w7
731; CHECK-SD-NEXT:    mov.b v0[7], w8
732; CHECK-SD-NEXT:    mov.b v1[7], w9
733; CHECK-SD-NEXT:    ldr w8, [sp, #16]
734; CHECK-SD-NEXT:    mov.b v3[7], w10
735; CHECK-SD-NEXT:    ldr w9, [sp, #400]
736; CHECK-SD-NEXT:    ldr w10, [sp, #272]
737; CHECK-SD-NEXT:    mov.b v2[8], w8
738; CHECK-SD-NEXT:    ldr w8, [sp, #24]
739; CHECK-SD-NEXT:    mov.b v0[8], w9
740; CHECK-SD-NEXT:    mov.b v1[8], w10
741; CHECK-SD-NEXT:    ldr w9, [sp, #408]
742; CHECK-SD-NEXT:    mov.b v3[8], w11
743; CHECK-SD-NEXT:    ldr w10, [sp, #280]
744; CHECK-SD-NEXT:    ldr w11, [sp, #152]
745; CHECK-SD-NEXT:    mov.b v2[9], w8
746; CHECK-SD-NEXT:    ldr w8, [sp, #32]
747; CHECK-SD-NEXT:    mov.b v0[9], w9
748; CHECK-SD-NEXT:    mov.b v1[9], w10
749; CHECK-SD-NEXT:    ldr w9, [sp, #416]
750; CHECK-SD-NEXT:    mov.b v3[9], w11
751; CHECK-SD-NEXT:    ldr w10, [sp, #288]
752; CHECK-SD-NEXT:    ldr w11, [sp, #160]
753; CHECK-SD-NEXT:    mov.b v2[10], w8
754; CHECK-SD-NEXT:    ldr w8, [sp, #40]
755; CHECK-SD-NEXT:    mov.b v0[10], w9
756; CHECK-SD-NEXT:    mov.b v1[10], w10
757; CHECK-SD-NEXT:    ldr w9, [sp, #424]
758; CHECK-SD-NEXT:    mov.b v3[10], w11
759; CHECK-SD-NEXT:    ldr w10, [sp, #296]
760; CHECK-SD-NEXT:    ldr w11, [sp, #168]
761; CHECK-SD-NEXT:    mov.b v2[11], w8
762; CHECK-SD-NEXT:    ldr w8, [sp, #48]
763; CHECK-SD-NEXT:    mov.b v0[11], w9
764; CHECK-SD-NEXT:    mov.b v1[11], w10
765; CHECK-SD-NEXT:    ldr w9, [sp, #432]
766; CHECK-SD-NEXT:    mov.b v3[11], w11
767; CHECK-SD-NEXT:    ldr w10, [sp, #304]
768; CHECK-SD-NEXT:    ldr w11, [sp, #176]
769; CHECK-SD-NEXT:    mov.b v2[12], w8
770; CHECK-SD-NEXT:    ldr w8, [sp, #56]
771; CHECK-SD-NEXT:    mov.b v0[12], w9
772; CHECK-SD-NEXT:    mov.b v1[12], w10
773; CHECK-SD-NEXT:    ldr w9, [sp, #440]
774; CHECK-SD-NEXT:    mov.b v3[12], w11
775; CHECK-SD-NEXT:    ldr w10, [sp, #312]
776; CHECK-SD-NEXT:    ldr w11, [sp, #184]
777; CHECK-SD-NEXT:    mov.b v2[13], w8
778; CHECK-SD-NEXT:    ldr w8, [sp, #64]
779; CHECK-SD-NEXT:    mov.b v0[13], w9
780; CHECK-SD-NEXT:    mov.b v1[13], w10
781; CHECK-SD-NEXT:    ldr w9, [sp, #448]
782; CHECK-SD-NEXT:    mov.b v3[13], w11
783; CHECK-SD-NEXT:    ldr w10, [sp, #320]
784; CHECK-SD-NEXT:    ldr w11, [sp, #192]
785; CHECK-SD-NEXT:    mov.b v2[14], w8
786; CHECK-SD-NEXT:    ldr w8, [sp, #72]
787; CHECK-SD-NEXT:    mov.b v0[14], w9
788; CHECK-SD-NEXT:    mov.b v1[14], w10
789; CHECK-SD-NEXT:    ldr w9, [sp, #456]
790; CHECK-SD-NEXT:    mov.b v3[14], w11
791; CHECK-SD-NEXT:    ldr w10, [sp, #328]
792; CHECK-SD-NEXT:    ldr w11, [sp, #200]
793; CHECK-SD-NEXT:    mov.b v2[15], w8
794; CHECK-SD-NEXT:    mov.b v0[15], w9
795; CHECK-SD-NEXT:    mov.b v1[15], w10
796; CHECK-SD-NEXT:    mov.b v3[15], w11
797; CHECK-SD-NEXT:    shl.16b v2, v2, #7
798; CHECK-SD-NEXT:    shl.16b v4, v1, #7
799; CHECK-SD-NEXT:    shl.16b v5, v0, #7
800; CHECK-SD-NEXT:    shl.16b v3, v3, #7
801; CHECK-SD-NEXT:    cmlt.16b v0, v2, #0
802; CHECK-SD-NEXT:    cmlt.16b v2, v4, #0
803; CHECK-SD-NEXT:    cmlt.16b v1, v3, #0
804; CHECK-SD-NEXT:    cmlt.16b v3, v5, #0
805; CHECK-SD-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
806; CHECK-SD-NEXT:    ret
807;
808; CHECK-GI-LABEL: sext_v64i1:
809; CHECK-GI:       // %bb.0:
810; CHECK-GI-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
811; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
812; CHECK-GI-NEXT:    .cfi_offset w29, -16
813; CHECK-GI-NEXT:    ldr w13, [sp, #80]
814; CHECK-GI-NEXT:    ldr w11, [sp, #208]
815; CHECK-GI-NEXT:    fmov s0, w0
816; CHECK-GI-NEXT:    ldr w9, [sp, #336]
817; CHECK-GI-NEXT:    ldr w8, [sp, #88]
818; CHECK-GI-NEXT:    ldr w10, [sp, #216]
819; CHECK-GI-NEXT:    fmov s1, w13
820; CHECK-GI-NEXT:    fmov s2, w11
821; CHECK-GI-NEXT:    ldr w12, [sp, #344]
822; CHECK-GI-NEXT:    fmov s3, w9
823; CHECK-GI-NEXT:    mov.b v0[1], w1
824; CHECK-GI-NEXT:    ldr w9, [sp, #224]
825; CHECK-GI-NEXT:    ldr w11, [sp, #400]
826; CHECK-GI-NEXT:    mov.b v1[1], w8
827; CHECK-GI-NEXT:    mov.b v2[1], w10
828; CHECK-GI-NEXT:    ldr w8, [sp, #96]
829; CHECK-GI-NEXT:    mov.b v3[1], w12
830; CHECK-GI-NEXT:    ldr w10, [sp, #352]
831; CHECK-GI-NEXT:    mov.b v0[2], w2
832; CHECK-GI-NEXT:    mov.b v1[2], w8
833; CHECK-GI-NEXT:    mov.b v2[2], w9
834; CHECK-GI-NEXT:    ldr w8, [sp, #104]
835; CHECK-GI-NEXT:    mov.b v3[2], w10
836; CHECK-GI-NEXT:    ldr w9, [sp, #232]
837; CHECK-GI-NEXT:    ldr w10, [sp, #360]
838; CHECK-GI-NEXT:    mov.b v0[3], w3
839; CHECK-GI-NEXT:    mov.b v1[3], w8
840; CHECK-GI-NEXT:    mov.b v2[3], w9
841; CHECK-GI-NEXT:    ldr w8, [sp, #112]
842; CHECK-GI-NEXT:    mov.b v3[3], w10
843; CHECK-GI-NEXT:    ldr w9, [sp, #240]
844; CHECK-GI-NEXT:    ldr w10, [sp, #368]
845; CHECK-GI-NEXT:    mov.b v0[4], w4
846; CHECK-GI-NEXT:    mov.b v1[4], w8
847; CHECK-GI-NEXT:    mov.b v2[4], w9
848; CHECK-GI-NEXT:    ldr w8, [sp, #120]
849; CHECK-GI-NEXT:    mov.b v3[4], w10
850; CHECK-GI-NEXT:    ldr w9, [sp, #248]
851; CHECK-GI-NEXT:    ldr w10, [sp, #376]
852; CHECK-GI-NEXT:    mov.b v0[5], w5
853; CHECK-GI-NEXT:    mov.b v1[5], w8
854; CHECK-GI-NEXT:    mov.b v2[5], w9
855; CHECK-GI-NEXT:    ldr w8, [sp, #128]
856; CHECK-GI-NEXT:    mov.b v3[5], w10
857; CHECK-GI-NEXT:    ldr w9, [sp, #256]
858; CHECK-GI-NEXT:    ldr w10, [sp, #384]
859; CHECK-GI-NEXT:    mov.b v0[6], w6
860; CHECK-GI-NEXT:    mov.b v1[6], w8
861; CHECK-GI-NEXT:    mov.b v2[6], w9
862; CHECK-GI-NEXT:    ldr w8, [sp, #136]
863; CHECK-GI-NEXT:    mov.b v3[6], w10
864; CHECK-GI-NEXT:    ldr w9, [sp, #264]
865; CHECK-GI-NEXT:    ldr w10, [sp, #392]
866; CHECK-GI-NEXT:    mov.b v0[7], w7
867; CHECK-GI-NEXT:    mov.b v1[7], w8
868; CHECK-GI-NEXT:    mov.b v2[7], w9
869; CHECK-GI-NEXT:    ldr w8, [sp, #16]
870; CHECK-GI-NEXT:    mov.b v3[7], w10
871; CHECK-GI-NEXT:    ldr w9, [sp, #144]
872; CHECK-GI-NEXT:    ldr w10, [sp, #272]
873; CHECK-GI-NEXT:    mov.b v0[8], w8
874; CHECK-GI-NEXT:    ldr w8, [sp, #24]
875; CHECK-GI-NEXT:    mov.b v1[8], w9
876; CHECK-GI-NEXT:    mov.b v2[8], w10
877; CHECK-GI-NEXT:    ldr w9, [sp, #152]
878; CHECK-GI-NEXT:    mov.b v3[8], w11
879; CHECK-GI-NEXT:    ldr w10, [sp, #280]
880; CHECK-GI-NEXT:    ldr w11, [sp, #408]
881; CHECK-GI-NEXT:    mov.b v0[9], w8
882; CHECK-GI-NEXT:    ldr w8, [sp, #32]
883; CHECK-GI-NEXT:    mov.b v1[9], w9
884; CHECK-GI-NEXT:    mov.b v2[9], w10
885; CHECK-GI-NEXT:    ldr w9, [sp, #160]
886; CHECK-GI-NEXT:    mov.b v3[9], w11
887; CHECK-GI-NEXT:    ldr w10, [sp, #288]
888; CHECK-GI-NEXT:    ldr w11, [sp, #416]
889; CHECK-GI-NEXT:    mov.b v0[10], w8
890; CHECK-GI-NEXT:    ldr w8, [sp, #40]
891; CHECK-GI-NEXT:    mov.b v1[10], w9
892; CHECK-GI-NEXT:    mov.b v2[10], w10
893; CHECK-GI-NEXT:    ldr w9, [sp, #168]
894; CHECK-GI-NEXT:    mov.b v3[10], w11
895; CHECK-GI-NEXT:    ldr w10, [sp, #296]
896; CHECK-GI-NEXT:    ldr w11, [sp, #424]
897; CHECK-GI-NEXT:    mov.b v0[11], w8
898; CHECK-GI-NEXT:    ldr w8, [sp, #48]
899; CHECK-GI-NEXT:    mov.b v1[11], w9
900; CHECK-GI-NEXT:    mov.b v2[11], w10
901; CHECK-GI-NEXT:    ldr w9, [sp, #176]
902; CHECK-GI-NEXT:    mov.b v3[11], w11
903; CHECK-GI-NEXT:    ldr w10, [sp, #304]
904; CHECK-GI-NEXT:    ldr w11, [sp, #432]
905; CHECK-GI-NEXT:    mov.b v0[12], w8
906; CHECK-GI-NEXT:    ldr w8, [sp, #56]
907; CHECK-GI-NEXT:    mov.b v1[12], w9
908; CHECK-GI-NEXT:    mov.b v2[12], w10
909; CHECK-GI-NEXT:    ldr w9, [sp, #184]
910; CHECK-GI-NEXT:    mov.b v3[12], w11
911; CHECK-GI-NEXT:    ldr w10, [sp, #312]
912; CHECK-GI-NEXT:    ldr w11, [sp, #440]
913; CHECK-GI-NEXT:    mov.b v0[13], w8
914; CHECK-GI-NEXT:    ldr w8, [sp, #64]
915; CHECK-GI-NEXT:    mov.b v1[13], w9
916; CHECK-GI-NEXT:    mov.b v2[13], w10
917; CHECK-GI-NEXT:    ldr w9, [sp, #192]
918; CHECK-GI-NEXT:    mov.b v3[13], w11
919; CHECK-GI-NEXT:    ldr w10, [sp, #320]
920; CHECK-GI-NEXT:    ldr w11, [sp, #448]
921; CHECK-GI-NEXT:    mov.b v0[14], w8
922; CHECK-GI-NEXT:    ldr w8, [sp, #72]
923; CHECK-GI-NEXT:    mov.b v1[14], w9
924; CHECK-GI-NEXT:    mov.b v2[14], w10
925; CHECK-GI-NEXT:    ldr w9, [sp, #200]
926; CHECK-GI-NEXT:    mov.b v3[14], w11
927; CHECK-GI-NEXT:    ldr w10, [sp, #328]
928; CHECK-GI-NEXT:    ldr w11, [sp, #456]
929; CHECK-GI-NEXT:    mov.b v0[15], w8
930; CHECK-GI-NEXT:    mov.b v1[15], w9
931; CHECK-GI-NEXT:    mov.b v2[15], w10
932; CHECK-GI-NEXT:    mov.b v3[15], w11
933; CHECK-GI-NEXT:    shl.16b v0, v0, #7
934; CHECK-GI-NEXT:    shl.16b v1, v1, #7
935; CHECK-GI-NEXT:    shl.16b v2, v2, #7
936; CHECK-GI-NEXT:    shl.16b v3, v3, #7
937; CHECK-GI-NEXT:    sshr.16b v0, v0, #7
938; CHECK-GI-NEXT:    sshr.16b v1, v1, #7
939; CHECK-GI-NEXT:    sshr.16b v2, v2, #7
940; CHECK-GI-NEXT:    sshr.16b v3, v3, #7
941; CHECK-GI-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
942; CHECK-GI-NEXT:    ret
943  %res = sext <64 x i1> %arg to <64 x i8>
944  ret <64 x i8> %res
945}
946
947; X0 & X1 are the real return registers, SDAG messes with v0 too for unknown reasons.
948define <1 x i128> @sext_v1x64(<1 x i64> %arg) {
949; CHECK-SD-LABEL: sext_v1x64:
950; CHECK-SD:       // %bb.0:
951; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
952; CHECK-SD-NEXT:    fmov x0, d0
953; CHECK-SD-NEXT:    asr x1, x0, #63
954; CHECK-SD-NEXT:    ret
955;
956; CHECK-GI-LABEL: sext_v1x64:
957; CHECK-GI:       // %bb.0:
958; CHECK-GI-NEXT:    fmov x8, d0
959; CHECK-GI-NEXT:    fmov x0, d0
960; CHECK-GI-NEXT:    asr x1, x8, #63
961; CHECK-GI-NEXT:    ret
962  %res = sext <1 x i64> %arg to <1 x i128>
963  ret <1 x i128> %res
964}
965