xref: /llvm-project/llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll (revision a94f08174c0312bca0ff6405640eb8a3ff986084)
1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
2; RUN: opt -passes='print<access-info>' -aa-pipeline='basic-aa' \
3; RUN:   -disable-output %s 2>&1 | FileCheck %s
4
5; For this loop:
6;   unsigned index = 0;
7;   for (int i = 0; i < n; i++) {
8;    A[2 * index] = A[2 * index] + B[i];
9;    index++;
10;   }
11;
12; SCEV is unable to prove that A[2 * i] does not overflow.
13;
14; Analyzing the IR does not help us because the GEPs are not
15; affine AddRecExprs. However, we can turn them into AddRecExprs
16; using SCEV Predicates.
17;
18; Once we have an affine expression we need to add an additional NUSW
19; to check that the pointers don't wrap since the GEPs are not
20; inbound.
21
22; The expression for %mul_ext as analyzed by SCEV is
23;    (zext i32 {0,+,2}<%for.body> to i64)
24; We have added the nusw flag to turn this expression into the SCEV expression:
25;    i64 {0,+,2}<%for.body>
26
27define void @f1(ptr noalias %a, ptr noalias %b, i64 %N) {
28; CHECK-LABEL: 'f1'
29; CHECK-NEXT:    for.body:
30; CHECK-NEXT:      Memory dependences are safe
31; CHECK-NEXT:      Dependences:
32; CHECK-NEXT:        Forward:
33; CHECK-NEXT:            %loadA = load i16, ptr %arrayidxA, align 2 ->
34; CHECK-NEXT:            store i16 %add, ptr %arrayidxA, align 2
35; CHECK-EMPTY:
36; CHECK-NEXT:      Run-time memory checks:
37; CHECK-NEXT:      Grouped accesses:
38; CHECK-EMPTY:
39; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
40; CHECK-NEXT:      SCEV assumptions:
41; CHECK-NEXT:      {0,+,2}<%for.body> Added Flags: <nusw>
42; CHECK-NEXT:      {%a,+,4}<%for.body> Added Flags: <nusw>
43; CHECK-EMPTY:
44; CHECK-NEXT:      Expressions re-written:
45; CHECK-NEXT:      [PSE] %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext:
46; CHECK-NEXT:        ((2 * (zext i32 {0,+,2}<%for.body> to i64))<nuw><nsw> + %a)
47; CHECK-NEXT:        --> {%a,+,4}<%for.body>
48;
49entry:
50  br label %for.body
51
52for.body:                                         ; preds = %for.body, %entry
53  %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
54  %ind1 = phi i32 [ 0, %entry ], [ %inc1, %for.body ]
55
56  %mul = mul i32 %ind1, 2
57  %mul_ext = zext i32 %mul to i64
58
59  %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext
60  %loadA = load i16, ptr %arrayidxA, align 2
61
62  %arrayidxB = getelementptr i16, ptr %b, i64 %ind
63  %loadB = load i16, ptr %arrayidxB, align 2
64
65  %add = mul i16 %loadA, %loadB
66
67  store i16 %add, ptr %arrayidxA, align 2
68
69  %inc = add nuw nsw i64 %ind, 1
70  %inc1 = add i32 %ind1, 1
71
72  %exitcond = icmp eq i64 %inc, %N
73  br i1 %exitcond, label %for.end, label %for.body
74
75for.end:                                          ; preds = %for.body
76  ret void
77}
78
79; For this loop:
80;   unsigned index = n;
81;   for (int i = 0; i < n; i++) {
82;    A[2 * index] = A[2 * index] + B[i];
83;    index--;
84;   }
85;
86; the SCEV expression for 2 * index is not an AddRecExpr
87; (and implictly not affine). However, we are able to make assumptions
88; that will turn the expression into an affine one and continue the
89; analysis.
90;
91; Once we have an affine expression we need to add an additional NUSW
92; to check that the pointers don't wrap since the GEPs are not
93; inbounds.
94;
95; This loop has a negative stride for A, and the nusw flag is required in
96; order to properly extend the increment from i32 -4 to i64 -4.
97
98; The expression for %mul_ext as analyzed by SCEV is
99;     (zext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)
100; We have added the nusw flag to turn this expression into the following SCEV:
101;     i64 {zext i32 (2 * (trunc i64 %N to i32)) to i64,+,-2}<%for.body>
102
103define void @f2(ptr noalias %a, ptr noalias %b, i64 %N) {
104; CHECK-LABEL: 'f2'
105; CHECK-NEXT:    for.body:
106; CHECK-NEXT:      Memory dependences are safe
107; CHECK-NEXT:      Dependences:
108; CHECK-NEXT:        Forward:
109; CHECK-NEXT:            %loadA = load i16, ptr %arrayidxA, align 2 ->
110; CHECK-NEXT:            store i16 %add, ptr %arrayidxA, align 2
111; CHECK-EMPTY:
112; CHECK-NEXT:      Run-time memory checks:
113; CHECK-NEXT:      Grouped accesses:
114; CHECK-EMPTY:
115; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
116; CHECK-NEXT:      SCEV assumptions:
117; CHECK-NEXT:      {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nusw>
118; CHECK-NEXT:      {((4 * (zext i31 (trunc i64 %N to i31) to i64))<nuw><nsw> + %a),+,-4}<%for.body> Added Flags: <nusw>
119; CHECK-EMPTY:
120; CHECK-NEXT:      Expressions re-written:
121; CHECK-NEXT:      [PSE] %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext:
122; CHECK-NEXT:        ((2 * (zext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64))<nuw><nsw> + %a)
123; CHECK-NEXT:        --> {((4 * (zext i31 (trunc i64 %N to i31) to i64))<nuw><nsw> + %a),+,-4}<%for.body>
124;
125entry:
126  %TruncN = trunc i64 %N to i32
127  br label %for.body
128
129for.body:                                         ; preds = %for.body, %entry
130  %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
131  %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
132
133  %mul = mul i32 %ind1, 2
134  %mul_ext = zext i32 %mul to i64
135
136  %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext
137  %loadA = load i16, ptr %arrayidxA, align 2
138
139  %arrayidxB = getelementptr i16, ptr %b, i64 %ind
140  %loadB = load i16, ptr %arrayidxB, align 2
141
142  %add = mul i16 %loadA, %loadB
143
144  store i16 %add, ptr %arrayidxA, align 2
145
146  %inc = add nuw nsw i64 %ind, 1
147  %dec = sub i32 %ind1, 1
148
149  %exitcond = icmp eq i64 %inc, %N
150  br i1 %exitcond, label %for.end, label %for.body
151
152for.end:                                          ; preds = %for.body
153  ret void
154}
155
156; We replicate the tests above, but this time sign extend 2 * index instead
157; of zero extending it.
158
159; The expression for %mul_ext as analyzed by SCEV is
160;     i64 (sext i32 {0,+,2}<%for.body> to i64)
161; We have added the nssw flag to turn this expression into the following SCEV:
162;     i64 {0,+,2}<%for.body>
163
164define void @f3(ptr noalias %a, ptr noalias %b, i64 %N) {
165; CHECK-LABEL: 'f3'
166; CHECK-NEXT:    for.body:
167; CHECK-NEXT:      Memory dependences are safe
168; CHECK-NEXT:      Dependences:
169; CHECK-NEXT:        Forward:
170; CHECK-NEXT:            %loadA = load i16, ptr %arrayidxA, align 2 ->
171; CHECK-NEXT:            store i16 %add, ptr %arrayidxA, align 2
172; CHECK-EMPTY:
173; CHECK-NEXT:      Run-time memory checks:
174; CHECK-NEXT:      Grouped accesses:
175; CHECK-EMPTY:
176; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
177; CHECK-NEXT:      SCEV assumptions:
178; CHECK-NEXT:      {0,+,2}<%for.body> Added Flags: <nssw>
179; CHECK-NEXT:      {%a,+,4}<%for.body> Added Flags: <nusw>
180; CHECK-EMPTY:
181; CHECK-NEXT:      Expressions re-written:
182; CHECK-NEXT:      [PSE] %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext:
183; CHECK-NEXT:        ((2 * (sext i32 {0,+,2}<%for.body> to i64))<nsw> + %a)
184; CHECK-NEXT:        --> {%a,+,4}<%for.body>
185;
186entry:
187  br label %for.body
188
189for.body:                                         ; preds = %for.body, %entry
190  %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
191  %ind1 = phi i32 [ 0, %entry ], [ %inc1, %for.body ]
192
193  %mul = mul i32 %ind1, 2
194  %mul_ext = sext i32 %mul to i64
195
196  %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext
197  %loadA = load i16, ptr %arrayidxA, align 2
198
199  %arrayidxB = getelementptr i16, ptr %b, i64 %ind
200  %loadB = load i16, ptr %arrayidxB, align 2
201
202  %add = mul i16 %loadA, %loadB
203
204  store i16 %add, ptr %arrayidxA, align 2
205
206  %inc = add nuw nsw i64 %ind, 1
207  %inc1 = add i32 %ind1, 1
208
209  %exitcond = icmp eq i64 %inc, %N
210  br i1 %exitcond, label %for.end, label %for.body
211
212for.end:                                          ; preds = %for.body
213  ret void
214}
215
216; The expression for %mul_ext as analyzed by SCEV is
217;     i64  (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)
218; We have added the nssw flag to turn this expression into the following SCEV:
219;     i64 {sext i32 (2 * (trunc i64 %N to i32)) to i64,+,-2}<%for.body>
220
221define void @f4(ptr noalias %a, ptr noalias %b, i64 %N) {
222; CHECK-LABEL: 'f4'
223; CHECK-NEXT:    for.body:
224; CHECK-NEXT:      Memory dependences are safe
225; CHECK-NEXT:      Dependences:
226; CHECK-NEXT:        Forward:
227; CHECK-NEXT:            %loadA = load i16, ptr %arrayidxA, align 2 ->
228; CHECK-NEXT:            store i16 %add, ptr %arrayidxA, align 2
229; CHECK-EMPTY:
230; CHECK-NEXT:      Run-time memory checks:
231; CHECK-NEXT:      Grouped accesses:
232; CHECK-EMPTY:
233; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
234; CHECK-NEXT:      SCEV assumptions:
235; CHECK-NEXT:      {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nssw>
236; CHECK-NEXT:      {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64))<nsw> + %a),+,-4}<%for.body> Added Flags: <nusw>
237; CHECK-EMPTY:
238; CHECK-NEXT:      Expressions re-written:
239; CHECK-NEXT:      [PSE] %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext:
240; CHECK-NEXT:        ((2 * (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64))<nsw> + %a)
241; CHECK-NEXT:        --> {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64))<nsw> + %a),+,-4}<%for.body>
242;
243entry:
244  %TruncN = trunc i64 %N to i32
245  br label %for.body
246
247for.body:                                         ; preds = %for.body, %entry
248  %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
249  %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
250
251  %mul = mul i32 %ind1, 2
252  %mul_ext = sext i32 %mul to i64
253
254  %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext
255  %loadA = load i16, ptr %arrayidxA, align 2
256
257  %arrayidxB = getelementptr i16, ptr %b, i64 %ind
258  %loadB = load i16, ptr %arrayidxB, align 2
259
260  %add = mul i16 %loadA, %loadB
261
262  store i16 %add, ptr %arrayidxA, align 2
263
264  %inc = add nuw nsw i64 %ind, 1
265  %dec = sub i32 %ind1, 1
266
267  %exitcond = icmp eq i64 %inc, %N
268  br i1 %exitcond, label %for.end, label %for.body
269
270for.end:                                          ; preds = %for.body
271  ret void
272}
273
274; The following function is similar to the one above, but has the GEP
275; to pointer %A inbounds. The index %mul doesn't have the nsw flag.
276; This means that the SCEV expression for %mul can wrap and we need
277; a SCEV predicate to continue analysis.
278;
279; We can still analyze this by adding the required no wrap SCEV predicates.
280
281define void @f5(ptr noalias %a, ptr noalias %b, i64 %N) {
282; CHECK-LABEL: 'f5'
283; CHECK-NEXT:    for.body:
284; CHECK-NEXT:      Memory dependences are safe
285; CHECK-NEXT:      Dependences:
286; CHECK-NEXT:        Forward:
287; CHECK-NEXT:            %loadA = load i16, ptr %arrayidxA, align 2 ->
288; CHECK-NEXT:            store i16 %add, ptr %arrayidxA, align 2
289; CHECK-EMPTY:
290; CHECK-NEXT:      Run-time memory checks:
291; CHECK-NEXT:      Grouped accesses:
292; CHECK-EMPTY:
293; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
294; CHECK-NEXT:      SCEV assumptions:
295; CHECK-NEXT:      {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nssw>
296; CHECK-EMPTY:
297; CHECK-NEXT:      Expressions re-written:
298; CHECK-NEXT:      [PSE] %arrayidxA = getelementptr inbounds i16, ptr %a, i32 %mul:
299; CHECK-NEXT:        ((2 * (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64))<nsw> + %a)
300; CHECK-NEXT:        --> {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64))<nsw> + %a),+,-4}<%for.body>
301;
302entry:
303  %TruncN = trunc i64 %N to i32
304  br label %for.body
305
306for.body:                                         ; preds = %for.body, %entry
307  %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
308  %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
309
310  %mul = mul i32 %ind1, 2
311
312  %arrayidxA = getelementptr inbounds i16, ptr %a, i32 %mul
313  %loadA = load i16, ptr %arrayidxA, align 2
314
315  %arrayidxB = getelementptr inbounds i16, ptr %b, i64 %ind
316  %loadB = load i16, ptr %arrayidxB, align 2
317
318  %add = mul i16 %loadA, %loadB
319
320  store i16 %add, ptr %arrayidxA, align 2
321
322  %inc = add nuw nsw i64 %ind, 1
323  %dec = sub i32 %ind1, 1
324
325  %exitcond = icmp eq i64 %inc, %N
326  br i1 %exitcond, label %for.end, label %for.body
327
328for.end:                                          ; preds = %for.body
329  ret void
330}
331