xref: /llvm-project/llvm/test/CodeGen/AArch64/sme-za-lazy-save-buffer.ll (revision ca7dc895cea44c80263c969302fa2f202751aa9c)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 < %s | FileCheck %s
3
4define i32 @no_tpidr2_save_required() "aarch64_inout_za" {
5; CHECK-LABEL: no_tpidr2_save_required:
6; CHECK:       // %bb.0: // %entry
7; CHECK-NEXT:    mov w0, #42 // =0x2a
8; CHECK-NEXT:    ret
9entry:
10  ret i32 42
11}
12
13define float @multi_bb_stpidr2_save_required(i32 %a, float %b, float %c) "aarch64_inout_za" {
14; CHECK-LABEL: multi_bb_stpidr2_save_required:
15; CHECK:       // %bb.0:
16; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
17; CHECK-NEXT:    mov x29, sp
18; CHECK-NEXT:    sub sp, sp, #16
19; CHECK-NEXT:    .cfi_def_cfa w29, 16
20; CHECK-NEXT:    .cfi_offset w30, -8
21; CHECK-NEXT:    .cfi_offset w29, -16
22; CHECK-NEXT:    rdsvl x8, #1
23; CHECK-NEXT:    mov x9, sp
24; CHECK-NEXT:    msub x8, x8, x8, x9
25; CHECK-NEXT:    mov sp, x8
26; CHECK-NEXT:    stur x8, [x29, #-16]
27; CHECK-NEXT:    sturh wzr, [x29, #-6]
28; CHECK-NEXT:    stur wzr, [x29, #-4]
29; CHECK-NEXT:    cbz w0, .LBB1_2
30; CHECK-NEXT:  // %bb.1: // %use_b
31; CHECK-NEXT:    fmov s1, #4.00000000
32; CHECK-NEXT:    fadd s0, s0, s1
33; CHECK-NEXT:    b .LBB1_5
34; CHECK-NEXT:  .LBB1_2: // %use_c
35; CHECK-NEXT:    fmov s0, s1
36; CHECK-NEXT:    rdsvl x8, #1
37; CHECK-NEXT:    sub x9, x29, #16
38; CHECK-NEXT:    sturh w8, [x29, #-8]
39; CHECK-NEXT:    msr TPIDR2_EL0, x9
40; CHECK-NEXT:    bl cosf
41; CHECK-NEXT:    smstart za
42; CHECK-NEXT:    mrs x8, TPIDR2_EL0
43; CHECK-NEXT:    sub x0, x29, #16
44; CHECK-NEXT:    cbnz x8, .LBB1_4
45; CHECK-NEXT:  // %bb.3: // %use_c
46; CHECK-NEXT:    bl __arm_tpidr2_restore
47; CHECK-NEXT:  .LBB1_4: // %use_c
48; CHECK-NEXT:    msr TPIDR2_EL0, xzr
49; CHECK-NEXT:  .LBB1_5: // %exit
50; CHECK-NEXT:    mov sp, x29
51; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
52; CHECK-NEXT:    ret
53  %cmp = icmp ne i32 %a, 0
54  br i1 %cmp, label %use_b, label %use_c
55
56use_b:
57  %faddr = fadd float %b, 4.0
58  br label %exit
59
60use_c:
61  %res2 = call float @llvm.cos.f32(float %c)
62  br label %exit
63
64exit:
65  %ret = phi float [%faddr, %use_b], [%res2, %use_c]
66  ret float %ret
67}
68
69define float @multi_bb_stpidr2_save_required_stackprobe(i32 %a, float %b, float %c) "aarch64_inout_za" "probe-stack"="inline-asm" "stack-probe-size"="65536" {
70; CHECK-LABEL: multi_bb_stpidr2_save_required_stackprobe:
71; CHECK:       // %bb.0:
72; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
73; CHECK-NEXT:    mov x29, sp
74; CHECK-NEXT:    str xzr, [sp, #-16]!
75; CHECK-NEXT:    .cfi_def_cfa w29, 16
76; CHECK-NEXT:    .cfi_offset w30, -8
77; CHECK-NEXT:    .cfi_offset w29, -16
78; CHECK-NEXT:    rdsvl x8, #1
79; CHECK-NEXT:    mov x9, sp
80; CHECK-NEXT:    msub x8, x8, x8, x9
81; CHECK-NEXT:  .LBB2_1: // =>This Inner Loop Header: Depth=1
82; CHECK-NEXT:    sub sp, sp, #16, lsl #12 // =65536
83; CHECK-NEXT:    cmp sp, x8
84; CHECK-NEXT:    b.le .LBB2_3
85; CHECK-NEXT:  // %bb.2: // in Loop: Header=BB2_1 Depth=1
86; CHECK-NEXT:    str xzr, [sp]
87; CHECK-NEXT:    b .LBB2_1
88; CHECK-NEXT:  .LBB2_3:
89; CHECK-NEXT:    mov sp, x8
90; CHECK-NEXT:    ldr xzr, [sp]
91; CHECK-NEXT:    stur x8, [x29, #-16]
92; CHECK-NEXT:    sturh wzr, [x29, #-6]
93; CHECK-NEXT:    stur wzr, [x29, #-4]
94; CHECK-NEXT:    cbz w0, .LBB2_5
95; CHECK-NEXT:  // %bb.4: // %use_b
96; CHECK-NEXT:    fmov s1, #4.00000000
97; CHECK-NEXT:    fadd s0, s0, s1
98; CHECK-NEXT:    b .LBB2_8
99; CHECK-NEXT:  .LBB2_5: // %use_c
100; CHECK-NEXT:    fmov s0, s1
101; CHECK-NEXT:    rdsvl x8, #1
102; CHECK-NEXT:    sub x9, x29, #16
103; CHECK-NEXT:    sturh w8, [x29, #-8]
104; CHECK-NEXT:    msr TPIDR2_EL0, x9
105; CHECK-NEXT:    bl cosf
106; CHECK-NEXT:    smstart za
107; CHECK-NEXT:    mrs x8, TPIDR2_EL0
108; CHECK-NEXT:    sub x0, x29, #16
109; CHECK-NEXT:    cbnz x8, .LBB2_7
110; CHECK-NEXT:  // %bb.6: // %use_c
111; CHECK-NEXT:    bl __arm_tpidr2_restore
112; CHECK-NEXT:  .LBB2_7: // %use_c
113; CHECK-NEXT:    msr TPIDR2_EL0, xzr
114; CHECK-NEXT:  .LBB2_8: // %exit
115; CHECK-NEXT:    mov sp, x29
116; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
117; CHECK-NEXT:    ret
118  %cmp = icmp ne i32 %a, 0
119  br i1 %cmp, label %use_b, label %use_c
120
121use_b:
122  %faddr = fadd float %b, 4.0
123  br label %exit
124
125use_c:
126  %res2 = call float @llvm.cos.f32(float %c)
127  br label %exit
128
129exit:
130  %ret = phi float [%faddr, %use_b], [%res2, %use_c]
131  ret float %ret
132}
133
134declare float @llvm.cos.f32(float)
135