1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64 -sink-insts-to-avoid-spills | FileCheck %s 3 4target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" 5 6@A = external dso_local global [100 x i32], align 4 7 8define i32 @sink_load_and_copy(i32 %n) { 9; CHECK-LABEL: sink_load_and_copy: 10; CHECK: // %bb.0: // %entry 11; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill 12; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill 13; CHECK-NEXT: .cfi_def_cfa_offset 32 14; CHECK-NEXT: .cfi_offset w19, -8 15; CHECK-NEXT: .cfi_offset w20, -16 16; CHECK-NEXT: .cfi_offset w21, -24 17; CHECK-NEXT: .cfi_offset w30, -32 18; CHECK-NEXT: mov w19, w0 19; CHECK-NEXT: cmp w0, #1 20; CHECK-NEXT: b.lt .LBB0_3 21; CHECK-NEXT: // %bb.1: // %for.body.preheader 22; CHECK-NEXT: adrp x8, A 23; CHECK-NEXT: mov w20, w19 24; CHECK-NEXT: ldr w21, [x8, :lo12:A] 25; CHECK-NEXT: .LBB0_2: // %for.body 26; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 27; CHECK-NEXT: mov w0, w21 28; CHECK-NEXT: bl _Z3usei 29; CHECK-NEXT: sdiv w20, w20, w0 30; CHECK-NEXT: subs w19, w19, #1 31; CHECK-NEXT: b.ne .LBB0_2 32; CHECK-NEXT: b .LBB0_4 33; CHECK-NEXT: .LBB0_3: 34; CHECK-NEXT: mov w20, w19 35; CHECK-NEXT: .LBB0_4: // %for.cond.cleanup 36; CHECK-NEXT: mov w0, w20 37; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload 38; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload 39; CHECK-NEXT: ret 40entry: 41 %cmp63 = icmp sgt i32 %n, 0 42 br i1 %cmp63, label %for.body.preheader, label %for.cond.cleanup 43 44for.body.preheader: 45 %0 = load i32, ptr @A, align 4 46 br label %for.body 47 48for.cond.cleanup: 49 %sum.0.lcssa = phi i32 [ %n, %entry ], [ %div, %for.body ] 50 ret i32 %sum.0.lcssa 51 52for.body: 53 %lsr.iv = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next, %for.body ] 54 %sum.065 = phi i32 [ %div, %for.body ], [ %n, %for.body.preheader ] 55 %call = tail call i32 @_Z3usei(i32 %0) 56 %div = sdiv i32 %sum.065, %call 57 %lsr.iv.next = add i32 %lsr.iv, -1 58 %exitcond.not = icmp eq i32 %lsr.iv.next, 0 59 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body 60} 61 62define i32 @cant_sink_successive_call(i32 %n) { 63; CHECK-LABEL: cant_sink_successive_call: 64; CHECK: // %bb.0: // %entry 65; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill 66; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill 67; CHECK-NEXT: .cfi_def_cfa_offset 32 68; CHECK-NEXT: .cfi_offset w19, -8 69; CHECK-NEXT: .cfi_offset w20, -16 70; CHECK-NEXT: .cfi_offset w21, -24 71; CHECK-NEXT: .cfi_offset w30, -32 72; CHECK-NEXT: mov w19, w0 73; CHECK-NEXT: cmp w0, #1 74; CHECK-NEXT: b.lt .LBB1_3 75; CHECK-NEXT: // %bb.1: // %for.body.preheader 76; CHECK-NEXT: adrp x8, A 77; CHECK-NEXT: mov w0, w19 78; CHECK-NEXT: ldr w20, [x8, :lo12:A] 79; CHECK-NEXT: bl _Z3usei 80; CHECK-NEXT: mov w21, w19 81; CHECK-NEXT: .LBB1_2: // %for.body 82; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 83; CHECK-NEXT: mov w0, w20 84; CHECK-NEXT: bl _Z3usei 85; CHECK-NEXT: sdiv w21, w21, w0 86; CHECK-NEXT: subs w19, w19, #1 87; CHECK-NEXT: b.ne .LBB1_2 88; CHECK-NEXT: b .LBB1_4 89; CHECK-NEXT: .LBB1_3: 90; CHECK-NEXT: mov w21, w19 91; CHECK-NEXT: .LBB1_4: // %for.cond.cleanup 92; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload 93; CHECK-NEXT: mov w0, w21 94; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload 95; CHECK-NEXT: ret 96entry: 97 %cmp63 = icmp sgt i32 %n, 0 98 br i1 %cmp63, label %for.body.preheader, label %for.cond.cleanup 99 100for.body.preheader: 101 %0 = load i32, ptr @A, align 4 102 %call0 = tail call i32 @_Z3usei(i32 %n) 103 br label %for.body 104 105for.cond.cleanup: 106 %sum.0.lcssa = phi i32 [ %n, %entry ], [ %div, %for.body ] 107 ret i32 %sum.0.lcssa 108 109for.body: 110 %lsr.iv = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next, %for.body ] 111 %sum.065 = phi i32 [ %div, %for.body ], [ %n, %for.body.preheader ] 112 %call = tail call i32 @_Z3usei(i32 %0) 113 %div = sdiv i32 %sum.065, %call 114 %lsr.iv.next = add i32 %lsr.iv, -1 115 %exitcond.not = icmp eq i32 %lsr.iv.next, 0 116 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body 117} 118 119define i32 @cant_sink_successive_store(ptr nocapture readnone %store, i32 %n) { 120; CHECK-LABEL: cant_sink_successive_store: 121; CHECK: // %bb.0: // %entry 122; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill 123; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill 124; CHECK-NEXT: .cfi_def_cfa_offset 32 125; CHECK-NEXT: .cfi_offset w19, -8 126; CHECK-NEXT: .cfi_offset w20, -16 127; CHECK-NEXT: .cfi_offset w21, -24 128; CHECK-NEXT: .cfi_offset w30, -32 129; CHECK-NEXT: mov w19, w1 130; CHECK-NEXT: cmp w1, #1 131; CHECK-NEXT: b.lt .LBB2_3 132; CHECK-NEXT: // %bb.1: // %for.body.preheader 133; CHECK-NEXT: adrp x8, A 134; CHECK-NEXT: mov w21, w19 135; CHECK-NEXT: ldr w20, [x8, :lo12:A] 136; CHECK-NEXT: mov w8, #42 // =0x2a 137; CHECK-NEXT: str w8, [x0] 138; CHECK-NEXT: .LBB2_2: // %for.body 139; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 140; CHECK-NEXT: mov w0, w20 141; CHECK-NEXT: bl _Z3usei 142; CHECK-NEXT: sdiv w21, w21, w0 143; CHECK-NEXT: subs w19, w19, #1 144; CHECK-NEXT: b.ne .LBB2_2 145; CHECK-NEXT: b .LBB2_4 146; CHECK-NEXT: .LBB2_3: 147; CHECK-NEXT: mov w21, w19 148; CHECK-NEXT: .LBB2_4: // %for.cond.cleanup 149; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload 150; CHECK-NEXT: mov w0, w21 151; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload 152; CHECK-NEXT: ret 153entry: 154 %cmp63 = icmp sgt i32 %n, 0 155 br i1 %cmp63, label %for.body.preheader, label %for.cond.cleanup 156 157for.body.preheader: 158 %0 = load i32, ptr @A, align 4 159 store i32 42, ptr %store, align 4 160 br label %for.body 161 162for.cond.cleanup: 163 %sum.0.lcssa = phi i32 [ %n, %entry ], [ %div, %for.body ] 164 ret i32 %sum.0.lcssa 165 166for.body: 167 %lsr.iv = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next, %for.body ] 168 %sum.065 = phi i32 [ %div, %for.body ], [ %n, %for.body.preheader ] 169 %call = tail call i32 @_Z3usei(i32 %0) 170 %div = sdiv i32 %sum.065, %call 171 %lsr.iv.next = add i32 %lsr.iv, -1 172 %exitcond.not = icmp eq i32 %lsr.iv.next, 0 173 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body 174} 175 176declare i32 @_Z3usei(i32) 177