1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 < %s | FileCheck %s 3 4define i32 @no_tpidr2_save_required() "aarch64_inout_za" { 5; CHECK-LABEL: no_tpidr2_save_required: 6; CHECK: // %bb.0: // %entry 7; CHECK-NEXT: mov w0, #42 // =0x2a 8; CHECK-NEXT: ret 9entry: 10 ret i32 42 11} 12 13define float @multi_bb_stpidr2_save_required(i32 %a, float %b, float %c) "aarch64_inout_za" { 14; CHECK-LABEL: multi_bb_stpidr2_save_required: 15; CHECK: // %bb.0: 16; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill 17; CHECK-NEXT: mov x29, sp 18; CHECK-NEXT: sub sp, sp, #16 19; CHECK-NEXT: .cfi_def_cfa w29, 16 20; CHECK-NEXT: .cfi_offset w30, -8 21; CHECK-NEXT: .cfi_offset w29, -16 22; CHECK-NEXT: rdsvl x8, #1 23; CHECK-NEXT: mov x9, sp 24; CHECK-NEXT: msub x8, x8, x8, x9 25; CHECK-NEXT: mov sp, x8 26; CHECK-NEXT: stur x8, [x29, #-16] 27; CHECK-NEXT: sturh wzr, [x29, #-6] 28; CHECK-NEXT: stur wzr, [x29, #-4] 29; CHECK-NEXT: cbz w0, .LBB1_2 30; CHECK-NEXT: // %bb.1: // %use_b 31; CHECK-NEXT: fmov s1, #4.00000000 32; CHECK-NEXT: fadd s0, s0, s1 33; CHECK-NEXT: b .LBB1_5 34; CHECK-NEXT: .LBB1_2: // %use_c 35; CHECK-NEXT: fmov s0, s1 36; CHECK-NEXT: rdsvl x8, #1 37; CHECK-NEXT: sub x9, x29, #16 38; CHECK-NEXT: sturh w8, [x29, #-8] 39; CHECK-NEXT: msr TPIDR2_EL0, x9 40; CHECK-NEXT: bl cosf 41; CHECK-NEXT: smstart za 42; CHECK-NEXT: mrs x8, TPIDR2_EL0 43; CHECK-NEXT: sub x0, x29, #16 44; CHECK-NEXT: cbnz x8, .LBB1_4 45; CHECK-NEXT: // %bb.3: // %use_c 46; CHECK-NEXT: bl __arm_tpidr2_restore 47; CHECK-NEXT: .LBB1_4: // %use_c 48; CHECK-NEXT: msr TPIDR2_EL0, xzr 49; CHECK-NEXT: .LBB1_5: // %exit 50; CHECK-NEXT: mov sp, x29 51; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload 52; CHECK-NEXT: ret 53 %cmp = icmp ne i32 %a, 0 54 br i1 %cmp, label %use_b, label %use_c 55 56use_b: 57 %faddr = fadd float %b, 4.0 58 br label %exit 59 60use_c: 61 %res2 = call float @llvm.cos.f32(float %c) 62 br label %exit 63 64exit: 65 %ret = phi float [%faddr, %use_b], [%res2, %use_c] 66 ret float %ret 67} 68 69define float @multi_bb_stpidr2_save_required_stackprobe(i32 %a, float %b, float %c) "aarch64_inout_za" "probe-stack"="inline-asm" "stack-probe-size"="65536" { 70; CHECK-LABEL: multi_bb_stpidr2_save_required_stackprobe: 71; CHECK: // %bb.0: 72; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill 73; CHECK-NEXT: mov x29, sp 74; CHECK-NEXT: str xzr, [sp, #-16]! 75; CHECK-NEXT: .cfi_def_cfa w29, 16 76; CHECK-NEXT: .cfi_offset w30, -8 77; CHECK-NEXT: .cfi_offset w29, -16 78; CHECK-NEXT: rdsvl x8, #1 79; CHECK-NEXT: mov x9, sp 80; CHECK-NEXT: msub x8, x8, x8, x9 81; CHECK-NEXT: .LBB2_1: // =>This Inner Loop Header: Depth=1 82; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 83; CHECK-NEXT: cmp sp, x8 84; CHECK-NEXT: b.le .LBB2_3 85; CHECK-NEXT: // %bb.2: // in Loop: Header=BB2_1 Depth=1 86; CHECK-NEXT: str xzr, [sp] 87; CHECK-NEXT: b .LBB2_1 88; CHECK-NEXT: .LBB2_3: 89; CHECK-NEXT: mov sp, x8 90; CHECK-NEXT: ldr xzr, [sp] 91; CHECK-NEXT: stur x8, [x29, #-16] 92; CHECK-NEXT: sturh wzr, [x29, #-6] 93; CHECK-NEXT: stur wzr, [x29, #-4] 94; CHECK-NEXT: cbz w0, .LBB2_5 95; CHECK-NEXT: // %bb.4: // %use_b 96; CHECK-NEXT: fmov s1, #4.00000000 97; CHECK-NEXT: fadd s0, s0, s1 98; CHECK-NEXT: b .LBB2_8 99; CHECK-NEXT: .LBB2_5: // %use_c 100; CHECK-NEXT: fmov s0, s1 101; CHECK-NEXT: rdsvl x8, #1 102; CHECK-NEXT: sub x9, x29, #16 103; CHECK-NEXT: sturh w8, [x29, #-8] 104; CHECK-NEXT: msr TPIDR2_EL0, x9 105; CHECK-NEXT: bl cosf 106; CHECK-NEXT: smstart za 107; CHECK-NEXT: mrs x8, TPIDR2_EL0 108; CHECK-NEXT: sub x0, x29, #16 109; CHECK-NEXT: cbnz x8, .LBB2_7 110; CHECK-NEXT: // %bb.6: // %use_c 111; CHECK-NEXT: bl __arm_tpidr2_restore 112; CHECK-NEXT: .LBB2_7: // %use_c 113; CHECK-NEXT: msr TPIDR2_EL0, xzr 114; CHECK-NEXT: .LBB2_8: // %exit 115; CHECK-NEXT: mov sp, x29 116; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload 117; CHECK-NEXT: ret 118 %cmp = icmp ne i32 %a, 0 119 br i1 %cmp, label %use_b, label %use_c 120 121use_b: 122 %faddr = fadd float %b, 4.0 123 br label %exit 124 125use_c: 126 %res2 = call float @llvm.cos.f32(float %c) 127 br label %exit 128 129exit: 130 %ret = phi float [%faddr, %use_b], [%res2, %use_c] 131 ret float %ret 132} 133 134declare float @llvm.cos.f32(float) 135