xref: /llvm-project/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll (revision 435e75db80e1ffd0f9752534d4544eba5e0610df)
1; REQUIRES: x86-registered-target
2; RUN: opt -aa-pipeline=basic-aa -passes=loop-distribute -enable-loop-distribute -verify-loop-info -verify-dom-info -S \
3; RUN:   < %s | FileCheck %s
4
5; RUN: opt -aa-pipeline=basic-aa -passes='loop-distribute,loop-vectorize' -enable-loop-distribute -force-vector-width=4 \
6; RUN:   -verify-loop-info -verify-dom-info -S < %s | \
7; RUN:   FileCheck --check-prefix=VECTORIZE %s
8
9; RUN: opt -aa-pipeline=basic-aa -passes='loop-distribute,print<access-info>' -enable-loop-distribute \
10; RUN:   -verify-loop-info -verify-dom-info -disable-output < %s 2>&1 | FileCheck %s --check-prefix=ANALYSIS
11
12; The memcheck version of basic.ll.  We should distribute and vectorize the
13; second part of this loop with 5 memchecks (A+1 x {C, D, E} + C x {A, B})
14;
15;   for (i = 0; i < n; i++) {
16;     A[i + 1] = A[i] * B[i];
17; -------------------------------
18;     C[i] = D[i] * E[i];
19;   }
20
21target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
22target triple = "x86_64-apple-macosx10.10.0"
23
24@B = common global ptr null, align 8
25@A = common global ptr null, align 8
26@C = common global ptr null, align 8
27@D = common global ptr null, align 8
28@E = common global ptr null, align 8
29
30; CHECK-LABEL: @f(
31define void @f() {
32entry:
33  %a = load ptr, ptr @A, align 8
34  %b = load ptr, ptr @B, align 8
35  %c = load ptr, ptr @C, align 8
36  %d = load ptr, ptr @D, align 8
37  %e = load ptr, ptr @E, align 8
38  br label %for.body
39
40; We have two compares for each array overlap check.
41; Since the checks to A and A + 4 get merged, this will give us a
42; total of 8 compares.
43;
44; CHECK: for.body.lver.check:
45; CHECK:     = icmp
46; CHECK:     = icmp
47
48; CHECK:     = icmp
49; CHECK:     = icmp
50
51; CHECK:     = icmp
52; CHECK:     = icmp
53
54; CHECK:     = icmp
55; CHECK:     = icmp
56
57; CHECK-NOT: = icmp
58; CHECK:     br i1 %conflict.rdx15, label %for.body.ph.lver.orig, label %for.body.ph.ldist1
59
60; The non-distributed loop that the memchecks fall back on.
61
62; CHECK: for.body.ph.lver.orig:
63; CHECK:     br label %for.body.lver.orig
64; CHECK: for.body.lver.orig:
65; CHECK:    br i1 %exitcond.lver.orig, label %for.end.loopexit, label %for.body.lver.orig
66
67; Verify the two distributed loops.
68
69; CHECK: for.body.ph.ldist1:
70; CHECK:     br label %for.body.ldist1
71; CHECK: for.body.ldist1:
72; CHECK:    %mulA.ldist1 = mul i32 %loadB.ldist1, %loadA.ldist1
73; CHECK:    br i1 %exitcond.ldist1, label %for.body.ph, label %for.body.ldist1
74
75; CHECK: for.body.ph:
76; CHECK:    br label %for.body
77; CHECK: for.body:
78; CHECK:    %mulC = mul i32 %loadD, %loadE
79; CHECK: for.end:
80
81
82; VECTORIZE: mul <4 x i32>
83; VECTORIZE: mul <4 x i32>
84; VECTORIZE-NOT: mul <4 x i32>
85
86for.body:                                         ; preds = %for.body, %entry
87  %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
88
89  %arrayidxA = getelementptr inbounds i32, ptr %a, i64 %ind
90  %loadA = load i32, ptr %arrayidxA, align 4
91
92  %arrayidxB = getelementptr inbounds i32, ptr %b, i64 %ind
93  %loadB = load i32, ptr %arrayidxB, align 4
94
95  %mulA = mul i32 %loadB, %loadA
96
97  %add = add nuw nsw i64 %ind, 1
98  %arrayidxA_plus_4 = getelementptr inbounds i32, ptr %a, i64 %add
99  store i32 %mulA, ptr %arrayidxA_plus_4, align 4
100
101  %arrayidxD = getelementptr inbounds i32, ptr %d, i64 %ind
102  %loadD = load i32, ptr %arrayidxD, align 4
103
104  %arrayidxE = getelementptr inbounds i32, ptr %e, i64 %ind
105  %loadE = load i32, ptr %arrayidxE, align 4
106
107  %mulC = mul i32 %loadD, %loadE
108
109  %arrayidxC = getelementptr inbounds i32, ptr %c, i64 %ind
110  store i32 %mulC, ptr %arrayidxC, align 4
111
112  %exitcond = icmp eq i64 %add, 20
113  br i1 %exitcond, label %for.end, label %for.body
114
115for.end:                                          ; preds = %for.body
116  ret void
117}
118
119; Make sure there's no "Multiple reports generated" assert with a
120; volatile load, and no distribution
121
122; TODO: Distribution of volatile may be possible under some
123; circumstance, but the current implementation does not touch them.
124
125; CHECK-LABEL: @f_volatile_load(
126; CHECK: br label %for.body{{$}}
127
128; CHECK-NOT: load
129
130; CHECK: {{^}}for.body:
131; CHECK: load i32
132; CHECK: load i32
133; CHECK: load volatile i32
134; CHECK: load i32
135; CHECK: br i1 %exitcond, label %for.end, label %for.body{{$}}
136
137; CHECK-NOT: load
138
139; VECTORIZE-NOT: load <4 x i32>
140; VECTORIZE-NOT: mul <4 x i32>
141define void @f_volatile_load() {
142entry:
143  %a = load ptr, ptr @A, align 8
144  %b = load ptr, ptr @B, align 8
145  %c = load ptr, ptr @C, align 8
146  %d = load ptr, ptr @D, align 8
147  %e = load ptr, ptr @E, align 8
148  br label %for.body
149
150for.body:
151  %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
152
153  %arrayidxA = getelementptr inbounds i32, ptr %a, i64 %ind
154  %loadA = load i32, ptr %arrayidxA, align 4
155
156  %arrayidxB = getelementptr inbounds i32, ptr %b, i64 %ind
157  %loadB = load i32, ptr %arrayidxB, align 4
158
159  %mulA = mul i32 %loadB, %loadA
160
161  %add = add nuw nsw i64 %ind, 1
162  %arrayidxA_plus_4 = getelementptr inbounds i32, ptr %a, i64 %add
163  store i32 %mulA, ptr %arrayidxA_plus_4, align 4
164
165  %arrayidxD = getelementptr inbounds i32, ptr %d, i64 %ind
166  %loadD = load volatile i32, ptr %arrayidxD, align 4
167
168  %arrayidxE = getelementptr inbounds i32, ptr %e, i64 %ind
169  %loadE = load i32, ptr %arrayidxE, align 4
170
171  %mulC = mul i32 %loadD, %loadE
172
173  %arrayidxC = getelementptr inbounds i32, ptr %c, i64 %ind
174  store i32 %mulC, ptr %arrayidxC, align 4
175
176  %exitcond = icmp eq i64 %add, 20
177  br i1 %exitcond, label %for.end, label %for.body
178
179for.end:
180  ret void
181}
182
183declare i32 @llvm.convergent(i32) #0
184
185; This is the same as f, and would require the same bounds
186; check. However, it is not OK to introduce new control dependencies
187; on the convergent call.
188
189; CHECK-LABEL: @f_with_convergent(
190; CHECK: call i32 @llvm.convergent
191; CHECK-NOT: call i32 @llvm.convergent
192
193; ANALYSIS: for.body:
194; ANALYSIS: Report: cannot add control dependency to convergent operation
195define void @f_with_convergent() #1 {
196entry:
197  %a = load ptr, ptr @A, align 8
198  %b = load ptr, ptr @B, align 8
199  %c = load ptr, ptr @C, align 8
200  %d = load ptr, ptr @D, align 8
201  %e = load ptr, ptr @E, align 8
202  br label %for.body
203
204for.body:                                         ; preds = %for.body, %entry
205  %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
206
207  %arrayidxA = getelementptr inbounds i32, ptr %a, i64 %ind
208  %loadA = load i32, ptr %arrayidxA, align 4
209
210  %arrayidxB = getelementptr inbounds i32, ptr %b, i64 %ind
211  %loadB = load i32, ptr %arrayidxB, align 4
212
213  %mulA = mul i32 %loadB, %loadA
214
215  %add = add nuw nsw i64 %ind, 1
216  %arrayidxA_plus_4 = getelementptr inbounds i32, ptr %a, i64 %add
217  store i32 %mulA, ptr %arrayidxA_plus_4, align 4
218
219  %arrayidxD = getelementptr inbounds i32, ptr %d, i64 %ind
220  %loadD = load i32, ptr %arrayidxD, align 4
221
222  %arrayidxE = getelementptr inbounds i32, ptr %e, i64 %ind
223  %loadE = load i32, ptr %arrayidxE, align 4
224
225  %convergentD = call i32 @llvm.convergent(i32 %loadD)
226  %mulC = mul i32 %convergentD, %loadE
227
228  %arrayidxC = getelementptr inbounds i32, ptr %c, i64 %ind
229  store i32 %mulC, ptr %arrayidxC, align 4
230
231  %exitcond = icmp eq i64 %add, 20
232  br i1 %exitcond, label %for.end, label %for.body
233
234for.end:                                          ; preds = %for.body
235  ret void
236}
237
238; Make sure an explicit request for distribution is ignored if it
239; requires possibly illegal checks.
240
241; CHECK-LABEL: @f_with_convergent_forced_distribute(
242; CHECK: call i32 @llvm.convergent
243; CHECK-NOT: call i32 @llvm.convergent
244define void @f_with_convergent_forced_distribute() #1 {
245entry:
246  %a = load ptr, ptr @A, align 8
247  %b = load ptr, ptr @B, align 8
248  %c = load ptr, ptr @C, align 8
249  %d = load ptr, ptr @D, align 8
250  %e = load ptr, ptr @E, align 8
251  br label %for.body
252
253for.body:                                         ; preds = %for.body, %entry
254  %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
255
256  %arrayidxA = getelementptr inbounds i32, ptr %a, i64 %ind
257  %loadA = load i32, ptr %arrayidxA, align 4
258
259  %arrayidxB = getelementptr inbounds i32, ptr %b, i64 %ind
260  %loadB = load i32, ptr %arrayidxB, align 4
261
262  %mulA = mul i32 %loadB, %loadA
263
264  %add = add nuw nsw i64 %ind, 1
265  %arrayidxA_plus_4 = getelementptr inbounds i32, ptr %a, i64 %add
266  store i32 %mulA, ptr %arrayidxA_plus_4, align 4
267
268  %arrayidxD = getelementptr inbounds i32, ptr %d, i64 %ind
269  %loadD = load i32, ptr %arrayidxD, align 4
270
271  %arrayidxE = getelementptr inbounds i32, ptr %e, i64 %ind
272  %loadE = load i32, ptr %arrayidxE, align 4
273
274  %convergentD = call i32 @llvm.convergent(i32 %loadD)
275  %mulC = mul i32 %convergentD, %loadE
276
277  %arrayidxC = getelementptr inbounds i32, ptr %c, i64 %ind
278  store i32 %mulC, ptr %arrayidxC, align 4
279
280  %exitcond = icmp eq i64 %add, 20
281  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
282
283for.end:                                          ; preds = %for.body
284  ret void
285}
286
287attributes #0 = { nounwind readnone convergent }
288attributes #1 = { nounwind convergent }
289
290!0 = distinct !{!0, !1}
291!1 = !{!"llvm.loop.distribute.enable", i1 true}
292