1; RUN: opt < %s -S -passes='loop(loop-flatten),verify' -verify-loop-info -verify-dom-info -verify-scev | FileCheck %s 2 3target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" 4 5; We should be able to flatten the loops and turn the two geps into one. 6; CHECK-LABEL: test1 7define void @test1(i32 %N, ptr %A) { 8entry: 9 %cmp3 = icmp ult i32 0, %N 10 br i1 %cmp3, label %for.outer.preheader, label %for.end 11 12; CHECK-LABEL: for.outer.preheader: 13; CHECK: %flatten.tripcount = mul i32 %N, %N 14for.outer.preheader: 15 br label %for.inner.preheader 16 17; CHECK-LABEL: for.inner.preheader: 18; CHECK: %flatten.arrayidx = getelementptr inbounds i32, ptr %A, i32 %i 19for.inner.preheader: 20 %i = phi i32 [ 0, %for.outer.preheader ], [ %inc2, %for.outer ] 21 br label %for.inner 22 23; CHECK-LABEL: for.inner: 24; CHECK: store i32 0, ptr %flatten.arrayidx, align 4 25; CHECK: br label %for.outer 26for.inner: 27 %j = phi i32 [ 0, %for.inner.preheader ], [ %inc1, %for.inner ] 28 %mul = mul i32 %i, %N 29 %gep = getelementptr inbounds i32, ptr %A, i32 %mul 30 %arrayidx = getelementptr inbounds i32, ptr %gep, i32 %j 31 store i32 0, ptr %arrayidx, align 4 32 %inc1 = add nuw i32 %j, 1 33 %cmp2 = icmp ult i32 %inc1, %N 34 br i1 %cmp2, label %for.inner, label %for.outer 35 36; CHECK-LABEL: for.outer: 37; CHECK: %cmp1 = icmp ult i32 %inc2, %flatten.tripcount 38for.outer: 39 %inc2 = add i32 %i, 1 40 %cmp1 = icmp ult i32 %inc2, %N 41 br i1 %cmp1, label %for.inner.preheader, label %for.end.loopexit 42 43for.end.loopexit: 44 br label %for.end 45 46for.end: 47 ret void 48} 49 50; We can flatten, but the flattened gep has to be inserted after the load it 51; depends on. 52; CHECK-LABEL: test2 53define void @test2(i32 %N, ptr %A) { 54entry: 55 %cmp3 = icmp ult i32 0, %N 56 br i1 %cmp3, label %for.outer.preheader, label %for.end 57 58; CHECK-LABEL: for.outer.preheader: 59; CHECK: %flatten.tripcount = mul i32 %N, %N 60for.outer.preheader: 61 br label %for.inner.preheader 62 63; CHECK-LABEL: for.inner.preheader: 64; CHECK-NOT: getelementptr inbounds i32, ptr %ptr, i32 %i 65for.inner.preheader: 66 %i = phi i32 [ 0, %for.outer.preheader ], [ %inc2, %for.outer ] 67 br label %for.inner 68 69; CHECK-LABEL: for.inner: 70; CHECK: %flatten.arrayidx = getelementptr inbounds i32, ptr %ptr, i32 %i 71; CHECK: store i32 0, ptr %flatten.arrayidx, align 4 72; CHECK: br label %for.outer 73for.inner: 74 %j = phi i32 [ 0, %for.inner.preheader ], [ %inc1, %for.inner ] 75 %ptr = load volatile ptr, ptr %A, align 4 76 %mul = mul i32 %i, %N 77 %gep = getelementptr inbounds i32, ptr %ptr, i32 %mul 78 %arrayidx = getelementptr inbounds i32, ptr %gep, i32 %j 79 store i32 0, ptr %arrayidx, align 4 80 %inc1 = add nuw i32 %j, 1 81 %cmp2 = icmp ult i32 %inc1, %N 82 br i1 %cmp2, label %for.inner, label %for.outer 83 84; CHECK-LABEL: for.outer: 85; CHECK: %cmp1 = icmp ult i32 %inc2, %flatten.tripcount 86for.outer: 87 %inc2 = add i32 %i, 1 88 %cmp1 = icmp ult i32 %inc2, %N 89 br i1 %cmp1, label %for.inner.preheader, label %for.end.loopexit 90 91for.end.loopexit: 92 br label %for.end 93 94for.end: 95 ret void 96} 97 98; We can't flatten if the gep offset is smaller than the pointer size. 99; CHECK-LABEL: test3 100define void @test3(i16 %N, ptr %A) { 101entry: 102 %cmp3 = icmp ult i16 0, %N 103 br i1 %cmp3, label %for.outer.preheader, label %for.end 104 105for.outer.preheader: 106 br label %for.inner.preheader 107 108; CHECK-LABEL: for.inner.preheader: 109; CHECK-NOT: getelementptr i32, ptr %A, i16 %i 110for.inner.preheader: 111 %i = phi i16 [ 0, %for.outer.preheader ], [ %inc2, %for.outer ] 112 br label %for.inner 113 114; CHECK-LABEL: for.inner: 115; CHECK-NOT: getelementptr i32, ptr %A, i16 %i 116; CHECK: br i1 %cmp2, label %for.inner, label %for.outer 117for.inner: 118 %j = phi i16 [ 0, %for.inner.preheader ], [ %inc1, %for.inner ] 119 %mul = mul i16 %i, %N 120 %gep = getelementptr inbounds i32, ptr %A, i16 %mul 121 %arrayidx = getelementptr inbounds i32, ptr %gep, i16 %j 122 store i32 0, ptr %arrayidx, align 4 123 %inc1 = add nuw i16 %j, 1 124 %cmp2 = icmp ult i16 %inc1, %N 125 br i1 %cmp2, label %for.inner, label %for.outer 126 127for.outer: 128 %inc2 = add i16 %i, 1 129 %cmp1 = icmp ult i16 %inc2, %N 130 br i1 %cmp1, label %for.inner.preheader, label %for.end.loopexit 131 132for.end.loopexit: 133 br label %for.end 134 135for.end: 136 ret void 137} 138