xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll (revision 61510b51c33464a6bc15e4cf5b1ee07e2e0ec1c9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
3; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
4; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
5
6target triple = "aarch64-unknown-linux-gnu"
7
8declare void @def(ptr)
9
10define void @alloc_v4i8(ptr %st_ptr) nounwind {
11; CHECK-LABEL: alloc_v4i8:
12; CHECK:       // %bb.0:
13; CHECK-NEXT:    str x30, [sp, #-32]! // 8-byte Folded Spill
14; CHECK-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
15; CHECK-NEXT:    mov x19, x0
16; CHECK-NEXT:    add x0, sp, #12
17; CHECK-NEXT:    add x20, sp, #12
18; CHECK-NEXT:    bl def
19; CHECK-NEXT:    ptrue p0.b, vl2
20; CHECK-NEXT:    ld2b { z0.b, z1.b }, p0/z, [x20]
21; CHECK-NEXT:    ptrue p0.s, vl2
22; CHECK-NEXT:    mov z2.b, z0.b[1]
23; CHECK-NEXT:    zip1 z0.s, z0.s, z2.s
24; CHECK-NEXT:    st1b { z0.s }, p0, [x19]
25; CHECK-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
26; CHECK-NEXT:    ldr x30, [sp], #32 // 8-byte Folded Reload
27; CHECK-NEXT:    ret
28;
29; NONEON-NOSVE-LABEL: alloc_v4i8:
30; NONEON-NOSVE:       // %bb.0:
31; NONEON-NOSVE-NEXT:    sub sp, sp, #48
32; NONEON-NOSVE-NEXT:    stp x30, x19, [sp, #32] // 16-byte Folded Spill
33; NONEON-NOSVE-NEXT:    mov x19, x0
34; NONEON-NOSVE-NEXT:    add x0, sp, #28
35; NONEON-NOSVE-NEXT:    bl def
36; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #30]
37; NONEON-NOSVE-NEXT:    strh w8, [sp, #12]
38; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #28]
39; NONEON-NOSVE-NEXT:    strh w8, [sp, #8]
40; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
41; NONEON-NOSVE-NEXT:    str d0, [sp, #16]
42; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #20]
43; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #16]
44; NONEON-NOSVE-NEXT:    strb w8, [x19, #1]
45; NONEON-NOSVE-NEXT:    strb w9, [x19]
46; NONEON-NOSVE-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
47; NONEON-NOSVE-NEXT:    add sp, sp, #48
48; NONEON-NOSVE-NEXT:    ret
49  %alloc = alloca [4 x i8]
50  call void @def(ptr %alloc)
51  %load = load <4 x i8>, ptr %alloc
52  %strided.vec = shufflevector <4 x i8> %load, <4 x i8> poison, <2 x i32> <i32 0, i32 2>
53  store <2 x i8> %strided.vec, ptr %st_ptr
54  ret void
55}
56
57define void @alloc_v6i8(ptr %st_ptr) nounwind {
58; CHECK-LABEL: alloc_v6i8:
59; CHECK:       // %bb.0:
60; CHECK-NEXT:    sub sp, sp, #32
61; CHECK-NEXT:    stp x30, x19, [sp, #16] // 16-byte Folded Spill
62; CHECK-NEXT:    mov x19, x0
63; CHECK-NEXT:    add x0, sp, #8
64; CHECK-NEXT:    bl def
65; CHECK-NEXT:    ldr d0, [sp, #8]
66; CHECK-NEXT:    ptrue p0.h, vl4
67; CHECK-NEXT:    add x8, sp, #4
68; CHECK-NEXT:    ptrue p1.s, vl2
69; CHECK-NEXT:    mov z1.b, z0.b[3]
70; CHECK-NEXT:    mov z2.b, z0.b[1]
71; CHECK-NEXT:    mov z0.b, z0.b[5]
72; CHECK-NEXT:    zip1 z1.h, z2.h, z1.h
73; CHECK-NEXT:    zip1 z1.s, z1.s, z0.s
74; CHECK-NEXT:    st1b { z1.h }, p0, [x8]
75; CHECK-NEXT:    ld1h { z1.s }, p1/z, [x8]
76; CHECK-NEXT:    fmov w8, s0
77; CHECK-NEXT:    strb w8, [x19, #2]
78; CHECK-NEXT:    fmov w8, s1
79; CHECK-NEXT:    strh w8, [x19]
80; CHECK-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
81; CHECK-NEXT:    add sp, sp, #32
82; CHECK-NEXT:    ret
83;
84; NONEON-NOSVE-LABEL: alloc_v6i8:
85; NONEON-NOSVE:       // %bb.0:
86; NONEON-NOSVE-NEXT:    sub sp, sp, #48
87; NONEON-NOSVE-NEXT:    stp x30, x19, [sp, #32] // 16-byte Folded Spill
88; NONEON-NOSVE-NEXT:    mov x19, x0
89; NONEON-NOSVE-NEXT:    add x0, sp, #24
90; NONEON-NOSVE-NEXT:    bl def
91; NONEON-NOSVE-NEXT:    ldr x8, [sp, #24]
92; NONEON-NOSVE-NEXT:    str x8, [sp]
93; NONEON-NOSVE-NEXT:    ldr d0, [sp]
94; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
95; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #11]
96; NONEON-NOSVE-NEXT:    strb w8, [sp, #21]
97; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #9]
98; NONEON-NOSVE-NEXT:    strb w8, [sp, #20]
99; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #13]
100; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #20]
101; NONEON-NOSVE-NEXT:    strb w8, [x19, #2]
102; NONEON-NOSVE-NEXT:    strh w9, [x19]
103; NONEON-NOSVE-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
104; NONEON-NOSVE-NEXT:    add sp, sp, #48
105; NONEON-NOSVE-NEXT:    ret
106  %alloc = alloca [6 x i8]
107  call void @def(ptr %alloc)
108  %load = load <6 x i8>, ptr %alloc
109  %strided.vec = shufflevector <6 x i8> %load, <6 x i8> poison, <3 x i32> <i32 1, i32 3, i32 5>
110  store <3 x i8> %strided.vec, ptr %st_ptr
111  ret void
112}
113
114define void @alloc_v32i8(ptr %st_ptr) nounwind {
115; CHECK-LABEL: alloc_v32i8:
116; CHECK:       // %bb.0:
117; CHECK-NEXT:    sub sp, sp, #48
118; CHECK-NEXT:    stp x30, x19, [sp, #32] // 16-byte Folded Spill
119; CHECK-NEXT:    mov x19, x0
120; CHECK-NEXT:    mov x0, sp
121; CHECK-NEXT:    bl def
122; CHECK-NEXT:    adrp x8, .LCPI2_0
123; CHECK-NEXT:    ldr q0, [sp]
124; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI2_0]
125; CHECK-NEXT:    tbl z0.b, { z0.b }, z1.b
126; CHECK-NEXT:    ldr q1, [sp, #16]
127; CHECK-NEXT:    fmov w8, s1
128; CHECK-NEXT:    strb w8, [x19, #8]
129; CHECK-NEXT:    fmov x8, d0
130; CHECK-NEXT:    str x8, [x19]
131; CHECK-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
132; CHECK-NEXT:    add sp, sp, #48
133; CHECK-NEXT:    ret
134;
135; NONEON-NOSVE-LABEL: alloc_v32i8:
136; NONEON-NOSVE:       // %bb.0:
137; NONEON-NOSVE-NEXT:    sub sp, sp, #112
138; NONEON-NOSVE-NEXT:    stp x30, x19, [sp, #96] // 16-byte Folded Spill
139; NONEON-NOSVE-NEXT:    mov x19, x0
140; NONEON-NOSVE-NEXT:    add x0, sp, #64
141; NONEON-NOSVE-NEXT:    bl def
142; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
143; NONEON-NOSVE-NEXT:    str q0, [sp]
144; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #14]
145; NONEON-NOSVE-NEXT:    str q1, [sp, #32]
146; NONEON-NOSVE-NEXT:    strb w8, [sp, #23]
147; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #12]
148; NONEON-NOSVE-NEXT:    strb w8, [sp, #22]
149; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #10]
150; NONEON-NOSVE-NEXT:    strb w8, [sp, #21]
151; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #8]
152; NONEON-NOSVE-NEXT:    strb w8, [sp, #20]
153; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #6]
154; NONEON-NOSVE-NEXT:    strb w8, [sp, #19]
155; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #4]
156; NONEON-NOSVE-NEXT:    strb w8, [sp, #18]
157; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #2]
158; NONEON-NOSVE-NEXT:    strb w8, [sp, #17]
159; NONEON-NOSVE-NEXT:    ldrb w8, [sp]
160; NONEON-NOSVE-NEXT:    strb w8, [sp, #16]
161; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #32]
162; NONEON-NOSVE-NEXT:    ldr q0, [sp, #16]
163; NONEON-NOSVE-NEXT:    strb w8, [x19, #8]
164; NONEON-NOSVE-NEXT:    str q0, [sp, #48]
165; NONEON-NOSVE-NEXT:    ldr x8, [sp, #48]
166; NONEON-NOSVE-NEXT:    str x8, [x19]
167; NONEON-NOSVE-NEXT:    ldp x30, x19, [sp, #96] // 16-byte Folded Reload
168; NONEON-NOSVE-NEXT:    add sp, sp, #112
169; NONEON-NOSVE-NEXT:    ret
170  %alloc = alloca [32 x i8]
171  call void @def(ptr %alloc)
172  %load = load <32 x i8>, ptr %alloc
173  %strided.vec = shufflevector <32 x i8> %load, <32 x i8> poison, <9 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16>
174  store <9 x i8> %strided.vec, ptr %st_ptr
175  ret void
176}
177
178
179define void @alloc_v8f64(ptr %st_ptr) nounwind {
180; CHECK-LABEL: alloc_v8f64:
181; CHECK:       // %bb.0:
182; CHECK-NEXT:    sub sp, sp, #96
183; CHECK-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
184; CHECK-NEXT:    mov x19, x0
185; CHECK-NEXT:    mov x0, sp
186; CHECK-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
187; CHECK-NEXT:    mov x20, sp
188; CHECK-NEXT:    bl def
189; CHECK-NEXT:    ptrue p0.d, vl2
190; CHECK-NEXT:    mov x8, #4 // =0x4
191; CHECK-NEXT:    ld2d { z0.d, z1.d }, p0/z, [x20]
192; CHECK-NEXT:    ld2d { z2.d, z3.d }, p0/z, [x20, x8, lsl #3]
193; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
194; CHECK-NEXT:    stp q0, q2, [x19]
195; CHECK-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
196; CHECK-NEXT:    add sp, sp, #96
197; CHECK-NEXT:    ret
198;
199; NONEON-NOSVE-LABEL: alloc_v8f64:
200; NONEON-NOSVE:       // %bb.0:
201; NONEON-NOSVE-NEXT:    sub sp, sp, #176
202; NONEON-NOSVE-NEXT:    stp x30, x19, [sp, #160] // 16-byte Folded Spill
203; NONEON-NOSVE-NEXT:    mov x19, x0
204; NONEON-NOSVE-NEXT:    add x0, sp, #96
205; NONEON-NOSVE-NEXT:    bl def
206; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #96]
207; NONEON-NOSVE-NEXT:    ldp q2, q3, [sp, #128]
208; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #48]
209; NONEON-NOSVE-NEXT:    ldr d1, [sp, #64]
210; NONEON-NOSVE-NEXT:    ldr d0, [sp, #48]
211; NONEON-NOSVE-NEXT:    stp q2, q3, [sp]
212; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #80]
213; NONEON-NOSVE-NEXT:    ldr d1, [sp, #16]
214; NONEON-NOSVE-NEXT:    ldr d0, [sp]
215; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #32]
216; NONEON-NOSVE-NEXT:    ldr q1, [sp, #80]
217; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
218; NONEON-NOSVE-NEXT:    stp q1, q0, [x19]
219; NONEON-NOSVE-NEXT:    ldp x30, x19, [sp, #160] // 16-byte Folded Reload
220; NONEON-NOSVE-NEXT:    add sp, sp, #176
221; NONEON-NOSVE-NEXT:    ret
222  %alloc = alloca [8 x double]
223  call void @def(ptr %alloc)
224  %load = load <8 x double>, ptr %alloc
225  %strided.vec = shufflevector <8 x double> %load, <8 x double> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
226  store <4 x double> %strided.vec, ptr %st_ptr
227  ret void
228}
229