1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s 3 4; Test LSR for giving small constants, which get re-associated as unfolded 5; offset, a chance to get combined with loop-invariant registers (same as 6; large constants which do not fit as add immediate operands). LSR 7; favors here to bump the base pointer outside the loop. 8 9; float test(float *arr, long long start, float threshold) { 10; for (long long i = start; i != 0; ++i) { 11; float x = arr[i + 7]; 12; if (x > threshold) 13; return x; 14; } 15; return -7; 16; } 17define float @test1(ptr nocapture readonly %arr, i64 %start, float %threshold) { 18; CHECK-LABEL: test1: 19; CHECK: // %bb.0: // %entry 20; CHECK-NEXT: cbz x1, .LBB0_4 21; CHECK-NEXT: // %bb.1: // %for.body.preheader 22; CHECK-NEXT: add x8, x0, #28 23; CHECK-NEXT: .LBB0_2: // %for.body 24; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 25; CHECK-NEXT: ldr s1, [x8, x1, lsl #2] 26; CHECK-NEXT: fcmp s1, s0 27; CHECK-NEXT: b.gt .LBB0_5 28; CHECK-NEXT: // %bb.3: // %for.cond 29; CHECK-NEXT: // in Loop: Header=BB0_2 Depth=1 30; CHECK-NEXT: add x1, x1, #1 31; CHECK-NEXT: cbnz x1, .LBB0_2 32; CHECK-NEXT: .LBB0_4: 33; CHECK-NEXT: fmov s0, #-7.00000000 34; CHECK-NEXT: ret 35; CHECK-NEXT: .LBB0_5: // %cleanup2 36; CHECK-NEXT: fmov s0, s1 37; CHECK-NEXT: ret 38entry: 39 %cmp11 = icmp eq i64 %start, 0 40 br i1 %cmp11, label %cleanup2, label %for.body 41 42for.cond: ; preds = %for.body 43 %cmp = icmp eq i64 %inc, 0 44 br i1 %cmp, label %cleanup2, label %for.body 45 46for.body: ; preds = %entry, %for.cond 47 %i.012 = phi i64 [ %inc, %for.cond ], [ %start, %entry ] 48 %add = add nsw i64 %i.012, 7 49 %arrayidx = getelementptr inbounds float, ptr %arr, i64 %add 50 %0 = load float, ptr %arrayidx, align 4 51 %cmp1 = fcmp ogt float %0, %threshold 52 %inc = add nsw i64 %i.012, 1 53 br i1 %cmp1, label %cleanup2, label %for.cond 54 55cleanup2: ; preds = %for.cond, %for.body, %entry 56 %1 = phi float [ -7.000000e+00, %entry ], [ %0, %for.body ], [ -7.000000e+00, %for.cond ] 57 ret float %1 58} 59 60; Same as test1, except i has another use: 61; if (x > threshold) ---> if (x > threshold + i) 62define float @test2(ptr nocapture readonly %arr, i64 %start, float %threshold) { 63; CHECK-LABEL: test2: 64; CHECK: // %bb.0: // %entry 65; CHECK-NEXT: cbz x1, .LBB1_4 66; CHECK-NEXT: // %bb.1: // %for.body.preheader 67; CHECK-NEXT: add x8, x0, #28 68; CHECK-NEXT: .LBB1_2: // %for.body 69; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 70; CHECK-NEXT: scvtf s1, x1 71; CHECK-NEXT: fadd s2, s1, s0 72; CHECK-NEXT: ldr s1, [x8, x1, lsl #2] 73; CHECK-NEXT: fcmp s1, s2 74; CHECK-NEXT: b.gt .LBB1_5 75; CHECK-NEXT: // %bb.3: // %for.cond 76; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1 77; CHECK-NEXT: add x1, x1, #1 78; CHECK-NEXT: cbnz x1, .LBB1_2 79; CHECK-NEXT: .LBB1_4: 80; CHECK-NEXT: fmov s0, #-7.00000000 81; CHECK-NEXT: ret 82; CHECK-NEXT: .LBB1_5: // %cleanup4 83; CHECK-NEXT: fmov s0, s1 84; CHECK-NEXT: ret 85entry: 86 %cmp14 = icmp eq i64 %start, 0 87 br i1 %cmp14, label %cleanup4, label %for.body 88 89for.cond: ; preds = %for.body 90 %cmp = icmp eq i64 %inc, 0 91 br i1 %cmp, label %cleanup4, label %for.body 92 93for.body: ; preds = %entry, %for.cond 94 %i.015 = phi i64 [ %inc, %for.cond ], [ %start, %entry ] 95 %add = add nsw i64 %i.015, 7 96 %arrayidx = getelementptr inbounds float, ptr %arr, i64 %add 97 %0 = load float, ptr %arrayidx, align 4 98 %conv = sitofp i64 %i.015 to float 99 %add1 = fadd float %conv, %threshold 100 %cmp2 = fcmp ogt float %0, %add1 101 %inc = add nsw i64 %i.015, 1 102 br i1 %cmp2, label %cleanup4, label %for.cond 103 104cleanup4: ; preds = %for.cond, %for.body, %entry 105 %1 = phi float [ -7.000000e+00, %entry ], [ %0, %for.body ], [ -7.000000e+00, %for.cond ] 106 ret float %1 107} 108