1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc < %s | FileCheck %s
3
4; Verify that the following code can be compiled without +sme, because if the
5; call is not entered in streaming-SVE mode at runtime, the codepath leading
6; to the smstop/smstart pair will not be executed either.
7
8target triple = "aarch64"
9
10define void @streaming_compatible() #0 {
11; CHECK-LABEL: streaming_compatible:
12; CHECK:       // %bb.0:
13; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
14; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
15; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
16; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
17; CHECK-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
18; CHECK-NEXT:    bl __arm_get_current_vg
19; CHECK-NEXT:    stp x0, x19, [sp, #72] // 16-byte Folded Spill
20; CHECK-NEXT:    bl __arm_sme_state
21; CHECK-NEXT:    and x19, x0, #0x1
22; CHECK-NEXT:    tbz w19, #0, .LBB0_2
23; CHECK-NEXT:  // %bb.1:
24; CHECK-NEXT:    smstop sm
25; CHECK-NEXT:  .LBB0_2:
26; CHECK-NEXT:    bl non_streaming
27; CHECK-NEXT:    tbz w19, #0, .LBB0_4
28; CHECK-NEXT:  // %bb.3:
29; CHECK-NEXT:    smstart sm
30; CHECK-NEXT:  .LBB0_4:
31; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
32; CHECK-NEXT:    ldr x19, [sp, #80] // 8-byte Folded Reload
33; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
34; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
35; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
36; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload
37; CHECK-NEXT:    ret
38  call void @non_streaming()
39  ret void
40}
41
42declare void @non_streaming()
43
44
45; Verify that COALESCER_BARRIER is also supported without +sme.
46
47define void @streaming_compatible_arg(float %f) #0 {
48; CHECK-LABEL: streaming_compatible_arg:
49; CHECK:       // %bb.0:
50; CHECK-NEXT:    sub sp, sp, #112
51; CHECK-NEXT:    stp d15, d14, [sp, #16] // 16-byte Folded Spill
52; CHECK-NEXT:    stp d13, d12, [sp, #32] // 16-byte Folded Spill
53; CHECK-NEXT:    stp d11, d10, [sp, #48] // 16-byte Folded Spill
54; CHECK-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill
55; CHECK-NEXT:    str x30, [sp, #80] // 8-byte Folded Spill
56; CHECK-NEXT:    bl __arm_get_current_vg
57; CHECK-NEXT:    stp x0, x19, [sp, #88] // 16-byte Folded Spill
58; CHECK-NEXT:    str s0, [sp, #12] // 4-byte Folded Spill
59; CHECK-NEXT:    bl __arm_sme_state
60; CHECK-NEXT:    and x19, x0, #0x1
61; CHECK-NEXT:    tbz w19, #0, .LBB1_2
62; CHECK-NEXT:  // %bb.1:
63; CHECK-NEXT:    smstop sm
64; CHECK-NEXT:  .LBB1_2:
65; CHECK-NEXT:    ldr s0, [sp, #12] // 4-byte Folded Reload
66; CHECK-NEXT:    bl non_streaming
67; CHECK-NEXT:    tbz w19, #0, .LBB1_4
68; CHECK-NEXT:  // %bb.3:
69; CHECK-NEXT:    smstart sm
70; CHECK-NEXT:  .LBB1_4:
71; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
72; CHECK-NEXT:    ldr x19, [sp, #96] // 8-byte Folded Reload
73; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
74; CHECK-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload
75; CHECK-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload
76; CHECK-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload
77; CHECK-NEXT:    add sp, sp, #112
78; CHECK-NEXT:    ret
79  call void @non_streaming(float %f)
80  ret void
81}
82
83
84attributes #0 = { nounwind "aarch64_pstate_sm_compatible" }
85