xref: /llvm-project/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll (revision 6e1ea7e5a7b6e581bf9a030b98a7f63ee2833278)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64 -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme < %s | FileCheck %s
3
4declare void @private_za_callee()
5declare float @llvm.cos.f32(float)
6
7; Test lazy-save mechanism for a single callee.
8define void @test_lazy_save_1_callee() nounwind "aarch64_inout_za" {
9; CHECK-LABEL: test_lazy_save_1_callee:
10; CHECK:       // %bb.0:
11; CHECK-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
12; CHECK-NEXT:    str x19, [sp, #16] // 8-byte Folded Spill
13; CHECK-NEXT:    mov x29, sp
14; CHECK-NEXT:    sub sp, sp, #16
15; CHECK-NEXT:    rdsvl x8, #1
16; CHECK-NEXT:    mov x9, sp
17; CHECK-NEXT:    msub x9, x8, x8, x9
18; CHECK-NEXT:    mov sp, x9
19; CHECK-NEXT:    stur x9, [x29, #-16]
20; CHECK-NEXT:    sub x9, x29, #16
21; CHECK-NEXT:    sturh wzr, [x29, #-6]
22; CHECK-NEXT:    stur wzr, [x29, #-4]
23; CHECK-NEXT:    sturh w8, [x29, #-8]
24; CHECK-NEXT:    msr TPIDR2_EL0, x9
25; CHECK-NEXT:    bl private_za_callee
26; CHECK-NEXT:    smstart za
27; CHECK-NEXT:    mrs x8, TPIDR2_EL0
28; CHECK-NEXT:    sub x0, x29, #16
29; CHECK-NEXT:    cbnz x8, .LBB0_2
30; CHECK-NEXT:  // %bb.1:
31; CHECK-NEXT:    bl __arm_tpidr2_restore
32; CHECK-NEXT:  .LBB0_2:
33; CHECK-NEXT:    msr TPIDR2_EL0, xzr
34; CHECK-NEXT:    mov sp, x29
35; CHECK-NEXT:    ldr x19, [sp, #16] // 8-byte Folded Reload
36; CHECK-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
37; CHECK-NEXT:    ret
38  call void @private_za_callee()
39  ret void
40}
41
42; Test lazy-save mechanism for multiple callees.
43define void @test_lazy_save_2_callees() nounwind "aarch64_inout_za" {
44; CHECK-LABEL: test_lazy_save_2_callees:
45; CHECK:       // %bb.0:
46; CHECK-NEXT:    stp x29, x30, [sp, #-48]! // 16-byte Folded Spill
47; CHECK-NEXT:    str x21, [sp, #16] // 8-byte Folded Spill
48; CHECK-NEXT:    mov x29, sp
49; CHECK-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
50; CHECK-NEXT:    sub sp, sp, #16
51; CHECK-NEXT:    rdsvl x20, #1
52; CHECK-NEXT:    mov x8, sp
53; CHECK-NEXT:    msub x8, x20, x20, x8
54; CHECK-NEXT:    mov sp, x8
55; CHECK-NEXT:    sub x21, x29, #16
56; CHECK-NEXT:    stur x8, [x29, #-16]
57; CHECK-NEXT:    sturh wzr, [x29, #-6]
58; CHECK-NEXT:    stur wzr, [x29, #-4]
59; CHECK-NEXT:    sturh w20, [x29, #-8]
60; CHECK-NEXT:    msr TPIDR2_EL0, x21
61; CHECK-NEXT:    bl private_za_callee
62; CHECK-NEXT:    smstart za
63; CHECK-NEXT:    mrs x8, TPIDR2_EL0
64; CHECK-NEXT:    sub x0, x29, #16
65; CHECK-NEXT:    cbnz x8, .LBB1_2
66; CHECK-NEXT:  // %bb.1:
67; CHECK-NEXT:    bl __arm_tpidr2_restore
68; CHECK-NEXT:  .LBB1_2:
69; CHECK-NEXT:    msr TPIDR2_EL0, xzr
70; CHECK-NEXT:    sturh w20, [x29, #-8]
71; CHECK-NEXT:    msr TPIDR2_EL0, x21
72; CHECK-NEXT:    bl private_za_callee
73; CHECK-NEXT:    smstart za
74; CHECK-NEXT:    mrs x8, TPIDR2_EL0
75; CHECK-NEXT:    sub x0, x29, #16
76; CHECK-NEXT:    cbnz x8, .LBB1_4
77; CHECK-NEXT:  // %bb.3:
78; CHECK-NEXT:    bl __arm_tpidr2_restore
79; CHECK-NEXT:  .LBB1_4:
80; CHECK-NEXT:    msr TPIDR2_EL0, xzr
81; CHECK-NEXT:    mov sp, x29
82; CHECK-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
83; CHECK-NEXT:    ldr x21, [sp, #16] // 8-byte Folded Reload
84; CHECK-NEXT:    ldp x29, x30, [sp], #48 // 16-byte Folded Reload
85; CHECK-NEXT:    ret
86  call void @private_za_callee()
87  call void @private_za_callee()
88  ret void
89}
90
91; Test a call of an intrinsic that gets expanded to a library call.
92define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_inout_za" {
93; CHECK-LABEL: test_lazy_save_expanded_intrinsic:
94; CHECK:       // %bb.0:
95; CHECK-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
96; CHECK-NEXT:    str x19, [sp, #16] // 8-byte Folded Spill
97; CHECK-NEXT:    mov x29, sp
98; CHECK-NEXT:    sub sp, sp, #16
99; CHECK-NEXT:    rdsvl x8, #1
100; CHECK-NEXT:    mov x9, sp
101; CHECK-NEXT:    msub x9, x8, x8, x9
102; CHECK-NEXT:    mov sp, x9
103; CHECK-NEXT:    stur x9, [x29, #-16]
104; CHECK-NEXT:    sub x9, x29, #16
105; CHECK-NEXT:    sturh wzr, [x29, #-6]
106; CHECK-NEXT:    stur wzr, [x29, #-4]
107; CHECK-NEXT:    sturh w8, [x29, #-8]
108; CHECK-NEXT:    msr TPIDR2_EL0, x9
109; CHECK-NEXT:    bl cosf
110; CHECK-NEXT:    smstart za
111; CHECK-NEXT:    mrs x8, TPIDR2_EL0
112; CHECK-NEXT:    sub x0, x29, #16
113; CHECK-NEXT:    cbnz x8, .LBB2_2
114; CHECK-NEXT:  // %bb.1:
115; CHECK-NEXT:    bl __arm_tpidr2_restore
116; CHECK-NEXT:  .LBB2_2:
117; CHECK-NEXT:    msr TPIDR2_EL0, xzr
118; CHECK-NEXT:    mov sp, x29
119; CHECK-NEXT:    ldr x19, [sp, #16] // 8-byte Folded Reload
120; CHECK-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
121; CHECK-NEXT:    ret
122  %res = call float @llvm.cos.f32(float %a)
123  ret float %res
124}
125
126; Test a combination of streaming-compatible -> normal call with lazy-save.
127define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_inout_za" "aarch64_pstate_sm_compatible" {
128; CHECK-LABEL: test_lazy_save_and_conditional_smstart:
129; CHECK:       // %bb.0:
130; CHECK-NEXT:    stp d15, d14, [sp, #-112]! // 16-byte Folded Spill
131; CHECK-NEXT:    cntd x9
132; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
133; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
134; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
135; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
136; CHECK-NEXT:    add x29, sp, #64
137; CHECK-NEXT:    str x9, [sp, #80] // 8-byte Folded Spill
138; CHECK-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
139; CHECK-NEXT:    sub sp, sp, #16
140; CHECK-NEXT:    rdsvl x8, #1
141; CHECK-NEXT:    mov x9, sp
142; CHECK-NEXT:    msub x9, x8, x8, x9
143; CHECK-NEXT:    mov sp, x9
144; CHECK-NEXT:    stur x9, [x29, #-80]
145; CHECK-NEXT:    sub x9, x29, #80
146; CHECK-NEXT:    sturh wzr, [x29, #-70]
147; CHECK-NEXT:    stur wzr, [x29, #-68]
148; CHECK-NEXT:    sturh w8, [x29, #-72]
149; CHECK-NEXT:    msr TPIDR2_EL0, x9
150; CHECK-NEXT:    bl __arm_sme_state
151; CHECK-NEXT:    and x20, x0, #0x1
152; CHECK-NEXT:    tbz w20, #0, .LBB3_2
153; CHECK-NEXT:  // %bb.1:
154; CHECK-NEXT:    smstop sm
155; CHECK-NEXT:  .LBB3_2:
156; CHECK-NEXT:    bl private_za_callee
157; CHECK-NEXT:    tbz w20, #0, .LBB3_4
158; CHECK-NEXT:  // %bb.3:
159; CHECK-NEXT:    smstart sm
160; CHECK-NEXT:  .LBB3_4:
161; CHECK-NEXT:    smstart za
162; CHECK-NEXT:    mrs x8, TPIDR2_EL0
163; CHECK-NEXT:    sub x0, x29, #80
164; CHECK-NEXT:    cbnz x8, .LBB3_6
165; CHECK-NEXT:  // %bb.5:
166; CHECK-NEXT:    bl __arm_tpidr2_restore
167; CHECK-NEXT:  .LBB3_6:
168; CHECK-NEXT:    msr TPIDR2_EL0, xzr
169; CHECK-NEXT:    sub sp, x29, #64
170; CHECK-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
171; CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload
172; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
173; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
174; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
175; CHECK-NEXT:    ldp d15, d14, [sp], #112 // 16-byte Folded Reload
176; CHECK-NEXT:    ret
177  call void @private_za_callee()
178  ret void
179}
180