xref: /llvm-project/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll (revision 6e1ea7e5a7b6e581bf9a030b98a7f63ee2833278)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -verify-machineinstrs -mattr=+sve -mattr=+sme < %s | FileCheck %s
3
4; This file tests the following combinations related to streaming-enabled functions:
5; [ ] N  ->  SC    (Normal -> Streaming-compatible)
6; [ ] SC  ->  N    (Streaming-compatible -> Normal)
7; [ ] SC  ->  S    (Streaming-compatible -> Streaming)
8; [ ] SC  ->  SC   (Streaming-compatible -> Streaming-compatible)
9;
10; The following combination is tested in sme-streaming-interface.ll
11; [ ] S  ->  SC    (Streaming -> Streaming-compatible)
12
13declare void @normal_callee();
14declare void @streaming_callee() "aarch64_pstate_sm_enabled";
15declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible";
16
17; [x] N   ->  SC   (Normal -> Streaming-compatible)
18; [ ] SC  ->  N    (Streaming-compatible -> Normal)
19; [ ] SC  ->  S    (Streaming-compatible -> Streaming)
20; [ ] SC  ->  SC   (Streaming-compatible -> Streaming-compatible)
21define void @normal_caller_streaming_compatible_callee() nounwind {
22; CHECK-LABEL: normal_caller_streaming_compatible_callee:
23; CHECK:       // %bb.0:
24; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
25; CHECK-NEXT:    bl streaming_compatible_callee
26; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
27; CHECK-NEXT:    ret
28  call void @streaming_compatible_callee();
29  ret void;
30}
31
32; [ ] N   ->  SC   (Normal -> Streaming-compatible)
33; [x] SC  ->  N    (Streaming-compatible -> Normal)
34; [ ] SC  ->  S    (Streaming-compatible -> Streaming)
35; [ ] SC  ->  SC   (Streaming-compatible -> Streaming-compatible)
36define void @streaming_compatible_caller_normal_callee() "aarch64_pstate_sm_compatible" nounwind {
37; CHECK-LABEL: streaming_compatible_caller_normal_callee:
38; CHECK:       // %bb.0:
39; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
40; CHECK-NEXT:    cntd x9
41; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
42; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
43; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
44; CHECK-NEXT:    stp x30, x9, [sp, #64] // 16-byte Folded Spill
45; CHECK-NEXT:    str x19, [sp, #80] // 8-byte Folded Spill
46; CHECK-NEXT:    bl __arm_sme_state
47; CHECK-NEXT:    and x19, x0, #0x1
48; CHECK-NEXT:    tbz w19, #0, .LBB1_2
49; CHECK-NEXT:  // %bb.1:
50; CHECK-NEXT:    smstop sm
51; CHECK-NEXT:  .LBB1_2:
52; CHECK-NEXT:    bl normal_callee
53; CHECK-NEXT:    tbz w19, #0, .LBB1_4
54; CHECK-NEXT:  // %bb.3:
55; CHECK-NEXT:    smstart sm
56; CHECK-NEXT:  .LBB1_4:
57; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
58; CHECK-NEXT:    ldr x19, [sp, #80] // 8-byte Folded Reload
59; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
60; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
61; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
62; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload
63; CHECK-NEXT:    ret
64
65  call void @normal_callee();
66  ret void;
67}
68
69; Streaming Compatible Caller, Streaming Callee
70
71; [ ] N   ->  SC   (Normal -> Streaming-compatible)
72; [ ] SC  ->  N    (Streaming-compatible -> Normal)
73; [x] SC  ->  S    (Streaming-compatible -> Streaming)
74; [ ] SC  ->  SC   (Streaming-compatible -> Streaming-compatible)
75define void @streaming_compatible_caller_streaming_callee() "aarch64_pstate_sm_compatible" nounwind {
76; CHECK-LABEL: streaming_compatible_caller_streaming_callee:
77; CHECK:       // %bb.0:
78; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
79; CHECK-NEXT:    cntd x9
80; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
81; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
82; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
83; CHECK-NEXT:    stp x30, x9, [sp, #64] // 16-byte Folded Spill
84; CHECK-NEXT:    str x19, [sp, #80] // 8-byte Folded Spill
85; CHECK-NEXT:    bl __arm_sme_state
86; CHECK-NEXT:    and x19, x0, #0x1
87; CHECK-NEXT:    tbnz w19, #0, .LBB2_2
88; CHECK-NEXT:  // %bb.1:
89; CHECK-NEXT:    smstart sm
90; CHECK-NEXT:  .LBB2_2:
91; CHECK-NEXT:    bl streaming_callee
92; CHECK-NEXT:    tbnz w19, #0, .LBB2_4
93; CHECK-NEXT:  // %bb.3:
94; CHECK-NEXT:    smstop sm
95; CHECK-NEXT:  .LBB2_4:
96; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
97; CHECK-NEXT:    ldr x19, [sp, #80] // 8-byte Folded Reload
98; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
99; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
100; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
101; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload
102; CHECK-NEXT:    ret
103
104  call void @streaming_callee();
105  ret void;
106}
107
108; [ ] N  ->  SC    (Normal -> Streaming-compatible)
109; [ ] SC  ->  N    (Streaming-compatible -> Normal)
110; [ ] SC  ->  S    (Streaming-compatible -> Streaming)
111; [x] SC  ->  SC   (Streaming-compatible -> Streaming-compatible)
112define void @streaming_compatible_caller_and_callee() "aarch64_pstate_sm_compatible" nounwind {
113; CHECK-LABEL: streaming_compatible_caller_and_callee:
114; CHECK:       // %bb.0:
115; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
116; CHECK-NEXT:    bl streaming_compatible_callee
117; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
118; CHECK-NEXT:    ret
119
120  call void @streaming_compatible_callee();
121  ret void;
122}
123
124
125;
126; Handle special cases here.
127;
128
129define <2 x double> @streaming_compatible_with_neon_vectors(<2 x double> %arg) "aarch64_pstate_sm_compatible" nounwind {
130; CHECK-LABEL: streaming_compatible_with_neon_vectors:
131; CHECK:       // %bb.0:
132; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
133; CHECK-NEXT:    cntd x9
134; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
135; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
136; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
137; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
138; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
139; CHECK-NEXT:    sub sp, sp, #16
140; CHECK-NEXT:    addvl sp, sp, #-1
141; CHECK-NEXT:    add x8, sp, #16
142; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
143; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
144; CHECK-NEXT:    bl __arm_sme_state
145; CHECK-NEXT:    add x8, sp, #16
146; CHECK-NEXT:    ldr z0, [x8] // 16-byte Folded Reload
147; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
148; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
149; CHECK-NEXT:    and x19, x0, #0x1
150; CHECK-NEXT:    tbz w19, #0, .LBB4_2
151; CHECK-NEXT:  // %bb.1:
152; CHECK-NEXT:    smstop sm
153; CHECK-NEXT:  .LBB4_2:
154; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
155; CHECK-NEXT:    bl normal_callee_vec_arg
156; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
157; CHECK-NEXT:    tbz w19, #0, .LBB4_4
158; CHECK-NEXT:  // %bb.3:
159; CHECK-NEXT:    smstart sm
160; CHECK-NEXT:  .LBB4_4:
161; CHECK-NEXT:    add x8, sp, #16
162; CHECK-NEXT:    ptrue p0.d, vl2
163; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
164; CHECK-NEXT:    ldr z1, [x8] // 16-byte Folded Reload
165; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
166; CHECK-NEXT:    fadd z0.d, p0/m, z0.d, z1.d
167; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
168; CHECK-NEXT:    addvl sp, sp, #1
169; CHECK-NEXT:    add sp, sp, #16
170; CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload
171; CHECK-NEXT:    ldr x19, [sp, #88] // 8-byte Folded Reload
172; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
173; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
174; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
175; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload
176; CHECK-NEXT:    ret
177  %res = call <2 x double> @normal_callee_vec_arg(<2 x double> %arg)
178  %fadd = fadd <2 x double> %res, %arg
179  ret <2 x double> %fadd
180}
181declare <2 x double> @normal_callee_vec_arg(<2 x double>)
182
183define <vscale x 2 x double> @streaming_compatible_with_scalable_vectors(<vscale x 2 x double> %arg) "aarch64_pstate_sm_compatible" nounwind {
184; CHECK-LABEL: streaming_compatible_with_scalable_vectors:
185; CHECK:       // %bb.0:
186; CHECK-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
187; CHECK-NEXT:    cntd x9
188; CHECK-NEXT:    stp x9, x19, [sp, #16] // 16-byte Folded Spill
189; CHECK-NEXT:    addvl sp, sp, #-18
190; CHECK-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
191; CHECK-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
192; CHECK-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
193; CHECK-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Folded Spill
194; CHECK-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Folded Spill
195; CHECK-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Folded Spill
196; CHECK-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Folded Spill
197; CHECK-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
198; CHECK-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Folded Spill
199; CHECK-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Folded Spill
200; CHECK-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Folded Spill
201; CHECK-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Folded Spill
202; CHECK-NEXT:    str z23, [sp, #2, mul vl] // 16-byte Folded Spill
203; CHECK-NEXT:    str z22, [sp, #3, mul vl] // 16-byte Folded Spill
204; CHECK-NEXT:    str z21, [sp, #4, mul vl] // 16-byte Folded Spill
205; CHECK-NEXT:    str z20, [sp, #5, mul vl] // 16-byte Folded Spill
206; CHECK-NEXT:    str z19, [sp, #6, mul vl] // 16-byte Folded Spill
207; CHECK-NEXT:    str z18, [sp, #7, mul vl] // 16-byte Folded Spill
208; CHECK-NEXT:    str z17, [sp, #8, mul vl] // 16-byte Folded Spill
209; CHECK-NEXT:    str z16, [sp, #9, mul vl] // 16-byte Folded Spill
210; CHECK-NEXT:    str z15, [sp, #10, mul vl] // 16-byte Folded Spill
211; CHECK-NEXT:    str z14, [sp, #11, mul vl] // 16-byte Folded Spill
212; CHECK-NEXT:    str z13, [sp, #12, mul vl] // 16-byte Folded Spill
213; CHECK-NEXT:    str z12, [sp, #13, mul vl] // 16-byte Folded Spill
214; CHECK-NEXT:    str z11, [sp, #14, mul vl] // 16-byte Folded Spill
215; CHECK-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
216; CHECK-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
217; CHECK-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
218; CHECK-NEXT:    addvl sp, sp, #-2
219; CHECK-NEXT:    str z0, [sp, #1, mul vl] // 16-byte Folded Spill
220; CHECK-NEXT:    bl __arm_sme_state
221; CHECK-NEXT:    and x19, x0, #0x1
222; CHECK-NEXT:    tbz w19, #0, .LBB5_2
223; CHECK-NEXT:  // %bb.1:
224; CHECK-NEXT:    smstop sm
225; CHECK-NEXT:  .LBB5_2:
226; CHECK-NEXT:    ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload
227; CHECK-NEXT:    bl normal_callee_scalable_vec_arg
228; CHECK-NEXT:    str z0, [sp] // 16-byte Folded Spill
229; CHECK-NEXT:    tbz w19, #0, .LBB5_4
230; CHECK-NEXT:  // %bb.3:
231; CHECK-NEXT:    smstart sm
232; CHECK-NEXT:  .LBB5_4:
233; CHECK-NEXT:    ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload
234; CHECK-NEXT:    ldr z1, [sp] // 16-byte Folded Reload
235; CHECK-NEXT:    fadd z0.d, z1.d, z0.d
236; CHECK-NEXT:    addvl sp, sp, #2
237; CHECK-NEXT:    ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
238; CHECK-NEXT:    ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
239; CHECK-NEXT:    ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
240; CHECK-NEXT:    ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
241; CHECK-NEXT:    ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
242; CHECK-NEXT:    ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
243; CHECK-NEXT:    ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
244; CHECK-NEXT:    ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
245; CHECK-NEXT:    ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
246; CHECK-NEXT:    ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
247; CHECK-NEXT:    ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
248; CHECK-NEXT:    ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
249; CHECK-NEXT:    ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
250; CHECK-NEXT:    ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
251; CHECK-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
252; CHECK-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
253; CHECK-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
254; CHECK-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
255; CHECK-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
256; CHECK-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
257; CHECK-NEXT:    ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
258; CHECK-NEXT:    ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
259; CHECK-NEXT:    ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
260; CHECK-NEXT:    ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
261; CHECK-NEXT:    ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
262; CHECK-NEXT:    ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
263; CHECK-NEXT:    ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
264; CHECK-NEXT:    ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
265; CHECK-NEXT:    addvl sp, sp, #18
266; CHECK-NEXT:    ldr x19, [sp, #24] // 8-byte Folded Reload
267; CHECK-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
268; CHECK-NEXT:    ret
269  %res = call <vscale x 2 x double> @normal_callee_scalable_vec_arg(<vscale x 2 x double> %arg)
270  %fadd = fadd <vscale x 2 x double> %res, %arg
271  ret <vscale x 2 x double> %fadd
272}
273
274declare <vscale x 2 x double> @normal_callee_scalable_vec_arg(<vscale x 2 x double>)
275
276define <vscale x 2 x i1> @streaming_compatible_with_predicate_vectors(<vscale x 2 x i1> %arg) "aarch64_pstate_sm_compatible" nounwind {
277; CHECK-LABEL: streaming_compatible_with_predicate_vectors:
278; CHECK:       // %bb.0:
279; CHECK-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
280; CHECK-NEXT:    cntd x9
281; CHECK-NEXT:    stp x9, x19, [sp, #16] // 16-byte Folded Spill
282; CHECK-NEXT:    addvl sp, sp, #-18
283; CHECK-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
284; CHECK-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
285; CHECK-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
286; CHECK-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Folded Spill
287; CHECK-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Folded Spill
288; CHECK-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Folded Spill
289; CHECK-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Folded Spill
290; CHECK-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
291; CHECK-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Folded Spill
292; CHECK-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Folded Spill
293; CHECK-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Folded Spill
294; CHECK-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Folded Spill
295; CHECK-NEXT:    str z23, [sp, #2, mul vl] // 16-byte Folded Spill
296; CHECK-NEXT:    str z22, [sp, #3, mul vl] // 16-byte Folded Spill
297; CHECK-NEXT:    str z21, [sp, #4, mul vl] // 16-byte Folded Spill
298; CHECK-NEXT:    str z20, [sp, #5, mul vl] // 16-byte Folded Spill
299; CHECK-NEXT:    str z19, [sp, #6, mul vl] // 16-byte Folded Spill
300; CHECK-NEXT:    str z18, [sp, #7, mul vl] // 16-byte Folded Spill
301; CHECK-NEXT:    str z17, [sp, #8, mul vl] // 16-byte Folded Spill
302; CHECK-NEXT:    str z16, [sp, #9, mul vl] // 16-byte Folded Spill
303; CHECK-NEXT:    str z15, [sp, #10, mul vl] // 16-byte Folded Spill
304; CHECK-NEXT:    str z14, [sp, #11, mul vl] // 16-byte Folded Spill
305; CHECK-NEXT:    str z13, [sp, #12, mul vl] // 16-byte Folded Spill
306; CHECK-NEXT:    str z12, [sp, #13, mul vl] // 16-byte Folded Spill
307; CHECK-NEXT:    str z11, [sp, #14, mul vl] // 16-byte Folded Spill
308; CHECK-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
309; CHECK-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
310; CHECK-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
311; CHECK-NEXT:    addvl sp, sp, #-1
312; CHECK-NEXT:    str p0, [sp, #7, mul vl] // 2-byte Folded Spill
313; CHECK-NEXT:    bl __arm_sme_state
314; CHECK-NEXT:    and x19, x0, #0x1
315; CHECK-NEXT:    tbz w19, #0, .LBB6_2
316; CHECK-NEXT:  // %bb.1:
317; CHECK-NEXT:    smstop sm
318; CHECK-NEXT:  .LBB6_2:
319; CHECK-NEXT:    ldr p0, [sp, #7, mul vl] // 2-byte Folded Reload
320; CHECK-NEXT:    bl normal_callee_predicate_vec_arg
321; CHECK-NEXT:    str p0, [sp, #6, mul vl] // 2-byte Folded Spill
322; CHECK-NEXT:    tbz w19, #0, .LBB6_4
323; CHECK-NEXT:  // %bb.3:
324; CHECK-NEXT:    smstart sm
325; CHECK-NEXT:  .LBB6_4:
326; CHECK-NEXT:    ldr p0, [sp, #7, mul vl] // 2-byte Folded Reload
327; CHECK-NEXT:    ldr p1, [sp, #6, mul vl] // 2-byte Folded Reload
328; CHECK-NEXT:    and p0.b, p1/z, p1.b, p0.b
329; CHECK-NEXT:    addvl sp, sp, #1
330; CHECK-NEXT:    ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
331; CHECK-NEXT:    ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
332; CHECK-NEXT:    ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
333; CHECK-NEXT:    ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
334; CHECK-NEXT:    ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
335; CHECK-NEXT:    ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
336; CHECK-NEXT:    ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
337; CHECK-NEXT:    ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
338; CHECK-NEXT:    ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
339; CHECK-NEXT:    ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
340; CHECK-NEXT:    ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
341; CHECK-NEXT:    ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
342; CHECK-NEXT:    ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
343; CHECK-NEXT:    ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
344; CHECK-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
345; CHECK-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
346; CHECK-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
347; CHECK-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
348; CHECK-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
349; CHECK-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
350; CHECK-NEXT:    ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
351; CHECK-NEXT:    ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
352; CHECK-NEXT:    ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
353; CHECK-NEXT:    ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
354; CHECK-NEXT:    ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
355; CHECK-NEXT:    ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
356; CHECK-NEXT:    ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
357; CHECK-NEXT:    ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
358; CHECK-NEXT:    addvl sp, sp, #18
359; CHECK-NEXT:    ldr x19, [sp, #24] // 8-byte Folded Reload
360; CHECK-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
361; CHECK-NEXT:    ret
362  %res = call <vscale x 2 x i1> @normal_callee_predicate_vec_arg(<vscale x 2 x i1> %arg)
363  %and = and <vscale x 2 x i1> %res, %arg
364  ret <vscale x 2 x i1> %and
365}
366
367declare <vscale x 2 x i1> @normal_callee_predicate_vec_arg(<vscale x 2 x i1>)
368
369define i32 @conditional_smstart_unreachable_block() "aarch64_pstate_sm_compatible" nounwind {
370; CHECK-LABEL: conditional_smstart_unreachable_block:
371; CHECK:       // %bb.0:
372; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
373; CHECK-NEXT:    cntd x9
374; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
375; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
376; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
377; CHECK-NEXT:    stp x30, x9, [sp, #64] // 16-byte Folded Spill
378; CHECK-NEXT:    str x19, [sp, #80] // 8-byte Folded Spill
379; CHECK-NEXT:    bl __arm_sme_state
380; CHECK-NEXT:    and x19, x0, #0x1
381; CHECK-NEXT:    tbnz w19, #0, .LBB7_2
382; CHECK-NEXT:  // %bb.1:
383; CHECK-NEXT:    smstart sm
384; CHECK-NEXT:  .LBB7_2:
385; CHECK-NEXT:    bl streaming_callee
386; CHECK-NEXT:    tbnz w19, #0, .LBB7_4
387; CHECK-NEXT:  // %bb.3:
388; CHECK-NEXT:    smstop sm
389; CHECK-NEXT:  .LBB7_4:
390  call void @streaming_callee()
391  unreachable
392}
393
394define void @conditional_smstart_no_successor_block(i1 %p) "aarch64_pstate_sm_compatible" nounwind {
395; CHECK-LABEL: conditional_smstart_no_successor_block:
396; CHECK:       // %bb.0:
397; CHECK-NEXT:    tbz w0, #0, .LBB8_6
398; CHECK-NEXT:  // %bb.1: // %if.then
399; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
400; CHECK-NEXT:    cntd x9
401; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
402; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
403; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
404; CHECK-NEXT:    stp x30, x9, [sp, #64] // 16-byte Folded Spill
405; CHECK-NEXT:    str x19, [sp, #80] // 8-byte Folded Spill
406; CHECK-NEXT:    bl __arm_sme_state
407; CHECK-NEXT:    and x19, x0, #0x1
408; CHECK-NEXT:    tbnz w19, #0, .LBB8_3
409; CHECK-NEXT:  // %bb.2: // %if.then
410; CHECK-NEXT:    smstart sm
411; CHECK-NEXT:  .LBB8_3: // %if.then
412; CHECK-NEXT:    bl streaming_callee
413; CHECK-NEXT:    tbnz w19, #0, .LBB8_5
414; CHECK-NEXT:  // %bb.4: // %if.then
415; CHECK-NEXT:    smstop sm
416; CHECK-NEXT:  .LBB8_5: // %if.then
417; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
418; CHECK-NEXT:    ldr x19, [sp, #80] // 8-byte Folded Reload
419; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
420; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
421; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
422; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload
423; CHECK-NEXT:  .LBB8_6: // %exit
424; CHECK-NEXT:    ret
425  br i1 %p, label %if.then, label %exit
426
427if.then:
428  call void @streaming_callee()
429  br label %exit
430
431exit:
432  ret void
433}
434
435define void @disable_tailcallopt() "aarch64_pstate_sm_compatible" nounwind {
436; CHECK-LABEL: disable_tailcallopt:
437; CHECK:       // %bb.0:
438; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
439; CHECK-NEXT:    cntd x9
440; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
441; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
442; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
443; CHECK-NEXT:    stp x30, x9, [sp, #64] // 16-byte Folded Spill
444; CHECK-NEXT:    str x19, [sp, #80] // 8-byte Folded Spill
445; CHECK-NEXT:    bl __arm_sme_state
446; CHECK-NEXT:    and x19, x0, #0x1
447; CHECK-NEXT:    tbz w19, #0, .LBB9_2
448; CHECK-NEXT:  // %bb.1:
449; CHECK-NEXT:    smstop sm
450; CHECK-NEXT:  .LBB9_2:
451; CHECK-NEXT:    bl normal_callee
452; CHECK-NEXT:    tbz w19, #0, .LBB9_4
453; CHECK-NEXT:  // %bb.3:
454; CHECK-NEXT:    smstart sm
455; CHECK-NEXT:  .LBB9_4:
456; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
457; CHECK-NEXT:    ldr x19, [sp, #80] // 8-byte Folded Reload
458; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
459; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
460; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
461; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload
462; CHECK-NEXT:    ret
463
464  tail call void @normal_callee();
465  ret void;
466}
467
468define void @call_to_non_streaming_pass_args(ptr nocapture noundef readnone %ptr, i64 %long1, i64 %long2, i32 %int1, i32 %int2, float %float1, float %float2, double %double1, double %double2) "aarch64_pstate_sm_compatible" {
469; CHECK-LABEL: call_to_non_streaming_pass_args:
470; CHECK:       // %bb.0: // %entry
471; CHECK-NEXT:    sub sp, sp, #128
472; CHECK-NEXT:    .cfi_def_cfa_offset 128
473; CHECK-NEXT:    cntd x9
474; CHECK-NEXT:    stp d15, d14, [sp, #32] // 16-byte Folded Spill
475; CHECK-NEXT:    stp d13, d12, [sp, #48] // 16-byte Folded Spill
476; CHECK-NEXT:    stp d11, d10, [sp, #64] // 16-byte Folded Spill
477; CHECK-NEXT:    stp d9, d8, [sp, #80] // 16-byte Folded Spill
478; CHECK-NEXT:    stp x30, x9, [sp, #96] // 16-byte Folded Spill
479; CHECK-NEXT:    str x19, [sp, #112] // 8-byte Folded Spill
480; CHECK-NEXT:    .cfi_offset w19, -16
481; CHECK-NEXT:    .cfi_offset w30, -32
482; CHECK-NEXT:    .cfi_offset b8, -40
483; CHECK-NEXT:    .cfi_offset b9, -48
484; CHECK-NEXT:    .cfi_offset b10, -56
485; CHECK-NEXT:    .cfi_offset b11, -64
486; CHECK-NEXT:    .cfi_offset b12, -72
487; CHECK-NEXT:    .cfi_offset b13, -80
488; CHECK-NEXT:    .cfi_offset b14, -88
489; CHECK-NEXT:    .cfi_offset b15, -96
490; CHECK-NEXT:    stp d2, d3, [sp, #16] // 16-byte Folded Spill
491; CHECK-NEXT:    mov x8, x1
492; CHECK-NEXT:    mov x9, x0
493; CHECK-NEXT:    stp s0, s1, [sp, #8] // 8-byte Folded Spill
494; CHECK-NEXT:    bl __arm_sme_state
495; CHECK-NEXT:    and x19, x0, #0x1
496; CHECK-NEXT:    .cfi_offset vg, -24
497; CHECK-NEXT:    tbz w19, #0, .LBB10_2
498; CHECK-NEXT:  // %bb.1: // %entry
499; CHECK-NEXT:    smstop sm
500; CHECK-NEXT:  .LBB10_2: // %entry
501; CHECK-NEXT:    ldp s0, s1, [sp, #8] // 8-byte Folded Reload
502; CHECK-NEXT:    mov x0, x9
503; CHECK-NEXT:    ldp d2, d3, [sp, #16] // 16-byte Folded Reload
504; CHECK-NEXT:    mov x1, x8
505; CHECK-NEXT:    bl bar
506; CHECK-NEXT:    tbz w19, #0, .LBB10_4
507; CHECK-NEXT:  // %bb.3: // %entry
508; CHECK-NEXT:    smstart sm
509; CHECK-NEXT:  .LBB10_4: // %entry
510; CHECK-NEXT:    .cfi_restore vg
511; CHECK-NEXT:    ldp d9, d8, [sp, #80] // 16-byte Folded Reload
512; CHECK-NEXT:    ldr x19, [sp, #112] // 8-byte Folded Reload
513; CHECK-NEXT:    ldp d11, d10, [sp, #64] // 16-byte Folded Reload
514; CHECK-NEXT:    ldr x30, [sp, #96] // 8-byte Folded Reload
515; CHECK-NEXT:    ldp d13, d12, [sp, #48] // 16-byte Folded Reload
516; CHECK-NEXT:    ldp d15, d14, [sp, #32] // 16-byte Folded Reload
517; CHECK-NEXT:    add sp, sp, #128
518; CHECK-NEXT:    .cfi_def_cfa_offset 0
519; CHECK-NEXT:    .cfi_restore w19
520; CHECK-NEXT:    .cfi_restore w30
521; CHECK-NEXT:    .cfi_restore b8
522; CHECK-NEXT:    .cfi_restore b9
523; CHECK-NEXT:    .cfi_restore b10
524; CHECK-NEXT:    .cfi_restore b11
525; CHECK-NEXT:    .cfi_restore b12
526; CHECK-NEXT:    .cfi_restore b13
527; CHECK-NEXT:    .cfi_restore b14
528; CHECK-NEXT:    .cfi_restore b15
529; CHECK-NEXT:    ret
530entry:
531  call void @bar(ptr noundef nonnull %ptr, i64 %long1, i64 %long2, i32 %int1, i32 %int2, float %float1, float %float2, double %double1, double %double2)
532  ret void
533}
534
535declare void @bar(ptr noundef, i64 noundef, i64 noundef, i32 noundef, i32 noundef, float noundef, float noundef, double noundef, double noundef)
536