xref: /llvm-project/llvm/test/Transforms/LoopFlatten/loop-flatten-gep.ll (revision 4d1ecf192313b612090d60181937eff03c1a966b)
1; RUN: opt < %s -S -passes='loop(loop-flatten),verify' -verify-loop-info -verify-dom-info -verify-scev | FileCheck %s
2
3target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
4
5; We should be able to flatten the loops and turn the two geps into one.
6; CHECK-LABEL: test1
7define void @test1(i32 %N, ptr %A) {
8entry:
9  %cmp3 = icmp ult i32 0, %N
10  br i1 %cmp3, label %for.outer.preheader, label %for.end
11
12; CHECK-LABEL: for.outer.preheader:
13; CHECK: %flatten.tripcount = mul i32 %N, %N
14for.outer.preheader:
15  br label %for.inner.preheader
16
17; CHECK-LABEL: for.inner.preheader:
18; CHECK: %flatten.arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
19for.inner.preheader:
20  %i = phi i32 [ 0, %for.outer.preheader ], [ %inc2, %for.outer ]
21  br label %for.inner
22
23; CHECK-LABEL: for.inner:
24; CHECK: store i32 0, ptr %flatten.arrayidx, align 4
25; CHECK: br label %for.outer
26for.inner:
27  %j = phi i32 [ 0, %for.inner.preheader ], [ %inc1, %for.inner ]
28  %mul = mul i32 %i, %N
29  %gep = getelementptr inbounds i32, ptr %A, i32 %mul
30  %arrayidx = getelementptr inbounds i32, ptr %gep, i32 %j
31  store i32 0, ptr %arrayidx, align 4
32  %inc1 = add nuw i32 %j, 1
33  %cmp2 = icmp ult i32 %inc1, %N
34  br i1 %cmp2, label %for.inner, label %for.outer
35
36; CHECK-LABEL: for.outer:
37; CHECK: %cmp1 = icmp ult i32 %inc2, %flatten.tripcount
38for.outer:
39  %inc2 = add i32 %i, 1
40  %cmp1 = icmp ult i32 %inc2, %N
41  br i1 %cmp1, label %for.inner.preheader, label %for.end.loopexit
42
43for.end.loopexit:
44  br label %for.end
45
46for.end:
47  ret void
48}
49
50; We can flatten, but the flattened gep has to be inserted after the load it
51; depends on.
52; CHECK-LABEL: test2
53define void @test2(i32 %N, ptr %A) {
54entry:
55  %cmp3 = icmp ult i32 0, %N
56  br i1 %cmp3, label %for.outer.preheader, label %for.end
57
58; CHECK-LABEL: for.outer.preheader:
59; CHECK: %flatten.tripcount = mul i32 %N, %N
60for.outer.preheader:
61  br label %for.inner.preheader
62
63; CHECK-LABEL: for.inner.preheader:
64; CHECK-NOT: getelementptr inbounds i32, ptr %ptr, i32 %i
65for.inner.preheader:
66  %i = phi i32 [ 0, %for.outer.preheader ], [ %inc2, %for.outer ]
67  br label %for.inner
68
69; CHECK-LABEL: for.inner:
70; CHECK: %flatten.arrayidx = getelementptr inbounds i32, ptr %ptr, i32 %i
71; CHECK: store i32 0, ptr %flatten.arrayidx, align 4
72; CHECK: br label %for.outer
73for.inner:
74  %j = phi i32 [ 0, %for.inner.preheader ], [ %inc1, %for.inner ]
75  %ptr = load volatile ptr, ptr %A, align 4
76  %mul = mul i32 %i, %N
77  %gep = getelementptr inbounds i32, ptr %ptr, i32 %mul
78  %arrayidx = getelementptr inbounds i32, ptr %gep, i32 %j
79  store i32 0, ptr %arrayidx, align 4
80  %inc1 = add nuw i32 %j, 1
81  %cmp2 = icmp ult i32 %inc1, %N
82  br i1 %cmp2, label %for.inner, label %for.outer
83
84; CHECK-LABEL: for.outer:
85; CHECK: %cmp1 = icmp ult i32 %inc2, %flatten.tripcount
86for.outer:
87  %inc2 = add i32 %i, 1
88  %cmp1 = icmp ult i32 %inc2, %N
89  br i1 %cmp1, label %for.inner.preheader, label %for.end.loopexit
90
91for.end.loopexit:
92  br label %for.end
93
94for.end:
95  ret void
96}
97
98; We can't flatten if the gep offset is smaller than the pointer size.
99; CHECK-LABEL: test3
100define void @test3(i16 %N, ptr %A) {
101entry:
102  %cmp3 = icmp ult i16 0, %N
103  br i1 %cmp3, label %for.outer.preheader, label %for.end
104
105for.outer.preheader:
106  br label %for.inner.preheader
107
108; CHECK-LABEL: for.inner.preheader:
109; CHECK-NOT: getelementptr i32, ptr %A, i16 %i
110for.inner.preheader:
111  %i = phi i16 [ 0, %for.outer.preheader ], [ %inc2, %for.outer ]
112  br label %for.inner
113
114; CHECK-LABEL: for.inner:
115; CHECK-NOT: getelementptr i32, ptr %A, i16 %i
116; CHECK: br i1 %cmp2, label %for.inner, label %for.outer
117for.inner:
118  %j = phi i16 [ 0, %for.inner.preheader ], [ %inc1, %for.inner ]
119  %mul = mul i16 %i, %N
120  %gep = getelementptr inbounds i32, ptr %A, i16 %mul
121  %arrayidx = getelementptr inbounds i32, ptr %gep, i16 %j
122  store i32 0, ptr %arrayidx, align 4
123  %inc1 = add nuw i16 %j, 1
124  %cmp2 = icmp ult i16 %inc1, %N
125  br i1 %cmp2, label %for.inner, label %for.outer
126
127for.outer:
128  %inc2 = add i16 %i, 1
129  %cmp1 = icmp ult i16 %inc2, %N
130  br i1 %cmp1, label %for.inner.preheader, label %for.end.loopexit
131
132for.end.loopexit:
133  br label %for.end
134
135for.end:
136  ret void
137}
138