xref: /llvm-project/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll (revision d897ea37dbac66d51794938af4f112e05fb61b05)
1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 3
2; RUN: opt -S -disable-output -passes='print<access-info>' %s 2>&1 | FileCheck %s
3
4;
5target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
6
7; A forwarding in the presence of symbolic strides.
8define void @single_stride(ptr noalias %A, ptr noalias %B, i64 %N, i64 %stride) {
9; CHECK-LABEL: 'single_stride'
10; CHECK-NEXT:    loop:
11; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
12; CHECK-NEXT:  Backward loop carried data dependence.
13; CHECK-NEXT:      Dependences:
14; CHECK-NEXT:        Backward:
15; CHECK-NEXT:            %load = load i32, ptr %gep.A, align 4 ->
16; CHECK-NEXT:            store i32 %add, ptr %gep.A.next, align 4
17; CHECK-EMPTY:
18; CHECK-NEXT:      Run-time memory checks:
19; CHECK-NEXT:      Grouped accesses:
20; CHECK-EMPTY:
21; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
22; CHECK-NEXT:      SCEV assumptions:
23; CHECK-NEXT:      Equal predicate: %stride == 1
24; CHECK-EMPTY:
25; CHECK-NEXT:      Expressions re-written:
26; CHECK-NEXT:      [PSE] %gep.A = getelementptr inbounds i32, ptr %A, i64 %mul:
27; CHECK-NEXT:        {%A,+,(4 * %stride)}<%loop>
28; CHECK-NEXT:        --> {%A,+,4}<%loop>
29;
30entry:
31  br label %loop
32
33loop:
34  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
35  %mul = mul i64 %iv, %stride
36  %gep.A = getelementptr inbounds i32, ptr %A, i64 %mul
37  %load = load i32, ptr %gep.A, align 4
38  %gep.B = getelementptr inbounds i32, ptr %B, i64 %iv
39  %load_1 = load i32, ptr %gep.B, align 4
40  %add = add i32 %load_1, %load
41  %iv.next = add nuw nsw i64 %iv, 1
42  %gep.A.next = getelementptr inbounds i32, ptr %A, i64 %iv.next
43  store i32 %add, ptr %gep.A.next, align 4
44  %exitcond = icmp eq i64 %iv.next, %N
45  br i1 %exitcond, label %exit, label %loop
46
47exit:                                          ; preds = %loop
48  ret void
49}
50
51; A forwarding in the presence of symbolic strides,
52; with nusw instead of inbounds on the GEPs.
53define void @single_stride_nusw(ptr noalias %A, ptr noalias %B, i64 %N, i64 %stride) {
54; CHECK-LABEL: 'single_stride_nusw'
55; CHECK-NEXT:    loop:
56; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
57; CHECK-NEXT:  Backward loop carried data dependence.
58; CHECK-NEXT:      Dependences:
59; CHECK-NEXT:        Backward:
60; CHECK-NEXT:            %load = load i32, ptr %gep.A, align 4 ->
61; CHECK-NEXT:            store i32 %add, ptr %gep.A.next, align 4
62; CHECK-EMPTY:
63; CHECK-NEXT:      Run-time memory checks:
64; CHECK-NEXT:      Grouped accesses:
65; CHECK-EMPTY:
66; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
67; CHECK-NEXT:      SCEV assumptions:
68; CHECK-NEXT:      Equal predicate: %stride == 1
69; CHECK-EMPTY:
70; CHECK-NEXT:      Expressions re-written:
71; CHECK-NEXT:      [PSE] %gep.A = getelementptr nusw i32, ptr %A, i64 %mul:
72; CHECK-NEXT:        {%A,+,(4 * %stride)}<%loop>
73; CHECK-NEXT:        --> {%A,+,4}<%loop>
74;
75entry:
76  br label %loop
77
78loop:
79  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
80  %mul = mul i64 %iv, %stride
81  %gep.A = getelementptr nusw i32, ptr %A, i64 %mul
82  %load = load i32, ptr %gep.A, align 4
83  %gep.B = getelementptr nusw i32, ptr %B, i64 %iv
84  %load_1 = load i32, ptr %gep.B, align 4
85  %add = add i32 %load_1, %load
86  %iv.next = add nuw nsw i64 %iv, 1
87  %gep.A.next = getelementptr nusw i32, ptr %A, i64 %iv.next
88  store i32 %add, ptr %gep.A.next, align 4
89  %exitcond = icmp eq i64 %iv.next, %N
90  br i1 %exitcond, label %exit, label %loop
91
92exit:                                          ; preds = %loop
93  ret void
94}
95
96; Similar to @single_stride, but with struct types.
97define void @single_stride_struct(ptr noalias %A, ptr noalias %B, i64 %N, i64 %stride) {
98; CHECK-LABEL: 'single_stride_struct'
99; CHECK-NEXT:    loop:
100; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
101; CHECK-NEXT:  Backward loop carried data dependence.
102; CHECK-NEXT:      Dependences:
103; CHECK-NEXT:        Backward:
104; CHECK-NEXT:            %load = load { i32, i8 }, ptr %gep.A, align 4 ->
105; CHECK-NEXT:            store { i32, i8 } %ins, ptr %gep.A.next, align 4
106; CHECK-EMPTY:
107; CHECK-NEXT:      Run-time memory checks:
108; CHECK-NEXT:      Grouped accesses:
109; CHECK-EMPTY:
110; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
111; CHECK-NEXT:      SCEV assumptions:
112; CHECK-NEXT:      Equal predicate: %stride == 1
113; CHECK-EMPTY:
114; CHECK-NEXT:      Expressions re-written:
115; CHECK-NEXT:      [PSE] %gep.A = getelementptr inbounds { i32, i8 }, ptr %A, i64 %mul:
116; CHECK-NEXT:        {%A,+,(8 * %stride)}<%loop>
117; CHECK-NEXT:        --> {%A,+,8}<%loop>
118;
119entry:
120  br label %loop
121
122loop:
123  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
124  %mul = mul i64 %iv, %stride
125  %gep.A = getelementptr inbounds { i32, i8 }, ptr %A, i64 %mul
126  %load = load { i32, i8 }, ptr %gep.A, align 4
127  %gep.B = getelementptr inbounds { i32, i8 }, ptr %B, i64 %iv
128  %load_1 = load { i32, i8 }, ptr %gep.B, align 4
129  %v1 = extractvalue { i32, i8 } %load, 0
130  %v2 = extractvalue { i32, i8} %load_1, 0
131  %add = add i32 %v1, %v2
132  %ins = insertvalue { i32, i8 } undef, i32 %add, 0
133  %iv.next = add nuw nsw i64 %iv, 1
134  %gep.A.next = getelementptr inbounds { i32, i8 }, ptr %A, i64 %iv.next
135  store { i32, i8 } %ins, ptr %gep.A.next, align 4
136  %exitcond = icmp eq i64 %iv.next, %N
137  br i1 %exitcond, label %exit, label %loop
138
139exit:
140  ret void
141}
142
143define void @single_stride_castexpr(i32 %offset, ptr %src, ptr %dst, i1 %cond) {
144; CHECK-LABEL: 'single_stride_castexpr'
145; CHECK-NEXT:    inner.loop:
146; CHECK-NEXT:      Memory dependences are safe with run-time checks
147; CHECK-NEXT:      Dependences:
148; CHECK-NEXT:      Run-time memory checks:
149; CHECK-NEXT:      Check 0:
150; CHECK-NEXT:        Comparing group ([[GRP1:0x[0-9a-f]+]]):
151; CHECK-NEXT:          %gep.dst = getelementptr i32, ptr %dst, i64 %iv.2
152; CHECK-NEXT:        Against group ([[GRP2:0x[0-9a-f]+]]):
153; CHECK-NEXT:          %gep.src = getelementptr inbounds i32, ptr %src, i32 %iv.3
154; CHECK-NEXT:      Grouped accesses:
155; CHECK-NEXT:        Group [[GRP1]]:
156; CHECK-NEXT:          (Low: ((4 * %iv.1) + %dst) High: (804 + (4 * %iv.1) + %dst))
157; CHECK-NEXT:            Member: {((4 * %iv.1) + %dst),+,4}<%inner.loop>
158; CHECK-NEXT:        Group [[GRP2]]:
159; CHECK-NEXT:          (Low: %src High: (804 + %src))
160; CHECK-NEXT:            Member: {%src,+,4}<nuw><%inner.loop>
161; CHECK-EMPTY:
162; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
163; CHECK-NEXT:      SCEV assumptions:
164; CHECK-NEXT:      Equal predicate: %offset == 1
165; CHECK-EMPTY:
166; CHECK-NEXT:      Expressions re-written:
167; CHECK-NEXT:      [PSE] %gep.dst = getelementptr i32, ptr %dst, i64 %iv.2:
168; CHECK-NEXT:        {((4 * %iv.1) + %dst),+,(4 * (sext i32 %offset to i64))<nsw>}<%inner.loop>
169; CHECK-NEXT:        --> {((4 * %iv.1) + %dst),+,4}<%inner.loop>
170; CHECK-NEXT:    outer.header:
171; CHECK-NEXT:      Report: loop is not the innermost loop
172; CHECK-NEXT:      Dependences:
173; CHECK-NEXT:      Run-time memory checks:
174; CHECK-NEXT:      Grouped accesses:
175; CHECK-EMPTY:
176; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
177; CHECK-NEXT:      SCEV assumptions:
178; CHECK-EMPTY:
179; CHECK-NEXT:      Expressions re-written:
180;
181entry:
182  %offset.ext = sext i32 %offset to i64
183  br label %outer.header
184
185outer.header:
186  %iv.1 = phi i64 [ 0, %entry ], [ %iv.2.next, %inner.loop ]
187  br i1 %cond, label %inner.loop, label %exit
188
189inner.loop:
190  %iv.2 = phi i64 [ %iv.1, %outer.header ], [ %iv.2.next, %inner.loop ]
191  %iv.3 = phi i32 [ 0, %outer.header ], [ %iv.3.next, %inner.loop ]
192  %gep.src = getelementptr inbounds i32, ptr %src, i32 %iv.3
193  %load = load i32, ptr %gep.src, align 8
194  %gep.dst = getelementptr i32, ptr %dst, i64 %iv.2
195  store i32 %load, ptr %gep.dst, align 8
196  %iv.2.next = add i64 %iv.2, %offset.ext
197  %iv.3.next = add i32 %iv.3, 1
198  %ec = icmp eq i32 %iv.3, 200
199  br i1 %ec, label %outer.header, label %inner.loop
200
201exit:
202  ret void
203}
204
205define void @single_stride_castexpr_multiuse(i32 %offset, ptr %src, ptr %dst, i1 %cond) {
206; CHECK-LABEL: 'single_stride_castexpr_multiuse'
207; CHECK-NEXT:    inner.loop:
208; CHECK-NEXT:      Memory dependences are safe with run-time checks
209; CHECK-NEXT:      Dependences:
210; CHECK-NEXT:      Run-time memory checks:
211; CHECK-NEXT:      Check 0:
212; CHECK-NEXT:        Comparing group ([[GRP3:0x[0-9a-f]+]]):
213; CHECK-NEXT:          %gep.dst = getelementptr i32, ptr %dst, i64 %iv.2
214; CHECK-NEXT:        Against group ([[GRP4:0x[0-9a-f]+]]):
215; CHECK-NEXT:          %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv.3
216; CHECK-NEXT:      Grouped accesses:
217; CHECK-NEXT:        Group [[GRP3]]:
218; CHECK-NEXT:          (Low: ((4 * %iv.1) + %dst) High: (804 + (4 * %iv.1) + (-4 * (zext i32 %offset to i64))<nsw> + %dst))
219; CHECK-NEXT:            Member: {((4 * %iv.1) + %dst),+,4}<%inner.loop>
220; CHECK-NEXT:        Group [[GRP4]]:
221; CHECK-NEXT:          (Low: (4 + %src) High: (808 + (-4 * (zext i32 %offset to i64))<nsw> + %src))
222; CHECK-NEXT:            Member: {(4 + %src),+,4}<%inner.loop>
223; CHECK-EMPTY:
224; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
225; CHECK-NEXT:      SCEV assumptions:
226; CHECK-NEXT:      Equal predicate: %offset == 1
227; CHECK-EMPTY:
228; CHECK-NEXT:      Expressions re-written:
229; CHECK-NEXT:      [PSE] %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv.3:
230; CHECK-NEXT:        {((4 * (zext i32 %offset to i64))<nuw><nsw> + %src),+,4}<%inner.loop>
231; CHECK-NEXT:        --> {(4 + %src),+,4}<%inner.loop>
232; CHECK-NEXT:      [PSE] %gep.dst = getelementptr i32, ptr %dst, i64 %iv.2:
233; CHECK-NEXT:        {((4 * %iv.1) + %dst),+,(4 * (sext i32 %offset to i64))<nsw>}<%inner.loop>
234; CHECK-NEXT:        --> {((4 * %iv.1) + %dst),+,4}<%inner.loop>
235; CHECK-NEXT:    outer.header:
236; CHECK-NEXT:      Report: loop is not the innermost loop
237; CHECK-NEXT:      Dependences:
238; CHECK-NEXT:      Run-time memory checks:
239; CHECK-NEXT:      Grouped accesses:
240; CHECK-EMPTY:
241; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
242; CHECK-NEXT:      SCEV assumptions:
243; CHECK-EMPTY:
244; CHECK-NEXT:      Expressions re-written:
245;
246entry:
247  %offset.ext = sext i32 %offset to i64
248  %offset.zext = zext i32 %offset to i64
249  br label %outer.header
250
251outer.header:
252  %iv.1 = phi i64 [ 0, %entry ], [ %iv.2.next, %inner.loop ]
253  br i1 %cond, label %inner.loop, label %exit
254
255inner.loop:
256  %iv.2 = phi i64 [ %iv.1, %outer.header ], [ %iv.2.next, %inner.loop ]
257  %iv.3 = phi i64 [ %offset.zext, %outer.header ], [ %iv.3.next, %inner.loop ]
258  %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv.3
259  %load = load i32, ptr %gep.src, align 8
260  %gep.dst = getelementptr i32, ptr %dst, i64 %iv.2
261  store i32 %load, ptr %gep.dst, align 8
262  %iv.2.next = add i64 %iv.2, %offset.ext
263  %iv.3.next = add i64 %iv.3, 1
264  %ec = icmp eq i64 %iv.3, 200
265  br i1 %ec, label %outer.header, label %inner.loop
266
267exit:
268  ret void
269}
270
271define double @single_iteration_unknown_stride(i32 %x, ptr %y, i1 %cond) {
272; CHECK-LABEL: 'single_iteration_unknown_stride'
273; CHECK-NEXT:    loop.body:
274; CHECK-NEXT:      Memory dependences are safe
275; CHECK-NEXT:      Dependences:
276; CHECK-NEXT:      Run-time memory checks:
277; CHECK-NEXT:      Grouped accesses:
278; CHECK-EMPTY:
279; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
280; CHECK-NEXT:      SCEV assumptions:
281; CHECK-NEXT:      Equal predicate: %x == 1
282; CHECK-EMPTY:
283; CHECK-NEXT:      Expressions re-written:
284; CHECK-NEXT:      [PSE] %gep10 = getelementptr double, ptr %gep8, i64 %mul:
285; CHECK-NEXT:        {(8 + %y),+,(8 * (sext i32 %x to i64))<nsw>}<%loop.body>
286; CHECK-NEXT:        --> {(8 + %y),+,8}<%loop.body>
287;
288entry:
289  br i1 %cond, label %noloop.exit, label %loop.ph
290
291loop.ph:                                          ; preds = %entry
292  %sext7 = sext i32 %x to i64
293  %gep8 = getelementptr i8, ptr %y, i64 8
294  br label %loop.body
295
296loop.body:                                        ; preds = %loop.body, %loop.ph
297  %iv = phi i64 [ 0, %loop.ph ], [ %iv.next, %loop.body ]
298  %mul = mul i64 %iv, %sext7
299  %gep10 = getelementptr double, ptr %gep8, i64 %mul
300  %load11 = load double, ptr %gep10, align 8
301  store double %load11, ptr %y, align 8
302  %iv.next = add i64 %iv, 1
303  %icmp = icmp eq i64 %iv, 0
304  br i1 %icmp, label %loop.exit, label %loop.body
305
306noloop.exit:                                      ; preds = %entry
307  %sext = sext i32 %x to i64
308  %gep = getelementptr double, ptr %y, i64 %sext
309  %load5 = load double, ptr %gep, align 8
310  ret double %load5
311
312loop.exit:                                        ; preds = %loop.body
313  %sext2 = sext i32 %x to i64
314  %gep2 = getelementptr double, ptr %y, i64 %sext2
315  %load6 = load double, ptr %gep2, align 8
316  ret double %load6
317}
318
319; A loop with two symbolic strides.
320define void @two_strides(ptr noalias %A, ptr noalias %B, i64 %N, i64 %stride.1, i64 %stride.2) {
321; CHECK-LABEL: 'two_strides'
322; CHECK-NEXT:    loop:
323; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
324; CHECK-NEXT:  Backward loop carried data dependence.
325; CHECK-NEXT:      Dependences:
326; CHECK-NEXT:        Backward:
327; CHECK-NEXT:            %load = load i32, ptr %gep.A, align 4 ->
328; CHECK-NEXT:            store i32 %add, ptr %gep.A.next, align 4
329; CHECK-EMPTY:
330; CHECK-NEXT:      Run-time memory checks:
331; CHECK-NEXT:      Grouped accesses:
332; CHECK-EMPTY:
333; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
334; CHECK-NEXT:      SCEV assumptions:
335; CHECK-NEXT:      Equal predicate: %stride.2 == 1
336; CHECK-NEXT:      Equal predicate: %stride.1 == 1
337; CHECK-EMPTY:
338; CHECK-NEXT:      Expressions re-written:
339; CHECK-NEXT:      [PSE] %gep.A = getelementptr inbounds i32, ptr %A, i64 %mul:
340; CHECK-NEXT:        {%A,+,(4 * %stride.1)}<%loop>
341; CHECK-NEXT:        --> {%A,+,4}<%loop>
342; CHECK-NEXT:      [PSE] %gep.A.next = getelementptr inbounds i32, ptr %A, i64 %mul.2:
343; CHECK-NEXT:        {((4 * %stride.2) + %A),+,(4 * %stride.2)}<%loop>
344; CHECK-NEXT:        --> {(4 + %A),+,4}<%loop>
345;
346entry:
347  br label %loop
348
349loop:
350  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
351  %mul = mul i64 %iv, %stride.1
352  %gep.A = getelementptr inbounds i32, ptr %A, i64 %mul
353  %load = load i32, ptr %gep.A, align 4
354  %gep.B = getelementptr inbounds i32, ptr %B, i64 %iv
355  %load_1 = load i32, ptr %gep.B, align 4
356  %add = add i32 %load_1, %load
357  %iv.next = add nuw nsw i64 %iv, 1
358  %mul.2 = mul i64 %iv.next, %stride.2
359  %gep.A.next = getelementptr inbounds i32, ptr %A, i64 %mul.2
360  store i32 %add, ptr %gep.A.next, align 4
361  %exitcond = icmp eq i64 %iv.next, %N
362  br i1 %exitcond, label %exit, label %loop
363
364exit:
365  ret void
366}
367
368define void @single_stride_used_for_trip_count(ptr noalias %A, ptr noalias %B, i64 %N, i64 %stride) {
369; CHECK-LABEL: 'single_stride_used_for_trip_count'
370; CHECK-NEXT:    loop:
371; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
372; CHECK-NEXT:  Unsafe indirect dependence.
373; CHECK-NEXT:      Dependences:
374; CHECK-NEXT:        IndirectUnsafe:
375; CHECK-NEXT:            %load = load i32, ptr %gep.A, align 4 ->
376; CHECK-NEXT:            store i32 %add, ptr %gep.A.next, align 4
377; CHECK-EMPTY:
378; CHECK-NEXT:      Run-time memory checks:
379; CHECK-NEXT:      Grouped accesses:
380; CHECK-EMPTY:
381; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
382; CHECK-NEXT:      SCEV assumptions:
383; CHECK-EMPTY:
384; CHECK-NEXT:      Expressions re-written:
385;
386entry:
387  br label %loop
388
389loop:
390  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
391  %mul = mul i64 %iv, %stride
392  %gep.A = getelementptr inbounds i32, ptr %A, i64 %mul
393  %load = load i32, ptr %gep.A, align 4
394  %gep.B = getelementptr inbounds i32, ptr %B, i64 %iv
395  %load_1 = load i32, ptr %gep.B, align 4
396  %add = add i32 %load_1, %load
397  %iv.next = add nuw nsw i64 %iv, 1
398  %gep.A.next = getelementptr inbounds i32, ptr %A, i64 %iv.next
399  store i32 %add, ptr %gep.A.next, align 4
400  %exitcond = icmp eq i64 %iv.next, %stride
401  br i1 %exitcond, label %exit, label %loop
402
403exit:                                          ; preds = %loop
404  ret void
405}
406
407; Check the scenario where we have an unknown Stride, which happens to also be
408; the loop iteration count. If we speculate Stride==1, it implies that the loop
409; will iterate no more than a single iteration.
410define void @unknown_stride_equalto_tc(i32 %N, ptr %A, ptr %B, i32 %j)  {
411; CHECK-LABEL: 'unknown_stride_equalto_tc'
412; CHECK-NEXT:    loop:
413; CHECK-NEXT:      Memory dependences are safe with run-time checks
414; CHECK-NEXT:      Dependences:
415; CHECK-NEXT:      Run-time memory checks:
416; CHECK-NEXT:      Check 0:
417; CHECK-NEXT:        Comparing group ([[GRP5:0x[0-9a-f]+]]):
418; CHECK-NEXT:        ptr %A
419; CHECK-NEXT:        Against group ([[GRP6:0x[0-9a-f]+]]):
420; CHECK-NEXT:          %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
421; CHECK-NEXT:      Grouped accesses:
422; CHECK-NEXT:        Group [[GRP5]]:
423; CHECK-NEXT:          (Low: %A High: (4 + %A))
424; CHECK-NEXT:            Member: %A
425; CHECK-NEXT:        Group [[GRP6]]:
426; CHECK-NEXT:          (Low: (((2 * (sext i32 %j to i64))<nsw> + %B) umin ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + %N) to i64) * (sext i32 %N to i64)) + %B)) High: (2 + (((2 * (sext i32 %j to i64))<nsw> + %B) umax ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + %N) to i64) * (sext i32 %N to i64)) + %B))))
427; CHECK-NEXT:            Member: {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i32 %N to i64))<nsw>}<%loop>
428; CHECK-EMPTY:
429; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
430; CHECK-NEXT:      SCEV assumptions:
431; CHECK-NEXT:      {%j,+,%N}<%loop> Added Flags: <nssw>
432; CHECK-EMPTY:
433; CHECK-NEXT:      Expressions re-written:
434; CHECK-NEXT:      [PSE] %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add:
435; CHECK-NEXT:        ((2 * (sext i32 {%j,+,%N}<%loop> to i64))<nsw> + %B)
436; CHECK-NEXT:        --> {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i32 %N to i64))<nsw>}<%loop>
437;
438entry:
439  %cmp = icmp eq i32 %N, 0
440  br i1 %cmp, label %exit, label %loop
441
442loop:
443  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
444  %mul = mul i32 %iv, %N
445  %add = add i32 %mul, %j
446  %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
447  %load = load i16, ptr %arrayidx
448  %sext = sext i16 %load to i32
449  store i32 %sext, ptr %A
450  %iv.next = add nuw i32 %iv, 1
451  %exitcond = icmp eq i32 %iv.next, %N
452  br i1 %exitcond, label %exit, label %loop
453
454exit:
455  ret void
456}
457
458
459; Check the scenario where we have an unknown Stride, which happens to also be
460; the loop iteration count, but the TC is zero-extended from a narrower type.
461define void @unknown_stride_equalto_zext_tc(i16 zeroext %N, ptr %A, ptr %B, i32 %j) {
462; CHECK-LABEL: 'unknown_stride_equalto_zext_tc'
463; CHECK-NEXT:    loop:
464; CHECK-NEXT:      Memory dependences are safe with run-time checks
465; CHECK-NEXT:      Dependences:
466; CHECK-NEXT:      Run-time memory checks:
467; CHECK-NEXT:      Check 0:
468; CHECK-NEXT:        Comparing group ([[GRP7:0x[0-9a-f]+]]):
469; CHECK-NEXT:        ptr %A
470; CHECK-NEXT:        Against group ([[GRP8:0x[0-9a-f]+]]):
471; CHECK-NEXT:          %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
472; CHECK-NEXT:      Grouped accesses:
473; CHECK-NEXT:        Group [[GRP7]]:
474; CHECK-NEXT:          (Low: %A High: (4 + %A))
475; CHECK-NEXT:            Member: %A
476; CHECK-NEXT:        Group [[GRP8]]:
477; CHECK-NEXT:          (Low: (((2 * (sext i32 %j to i64))<nsw> + %B) umin ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (zext i16 %N to i32))<nsw> to i64) * (zext i16 %N to i64)) + %B)) High: (2 + (((2 * (sext i32 %j to i64))<nsw> + %B) umax ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (zext i16 %N to i32))<nsw> to i64) * (zext i16 %N to i64)) + %B))))
478; CHECK-NEXT:            Member: {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (zext i16 %N to i64))<nuw><nsw>}<%loop>
479; CHECK-EMPTY:
480; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
481; CHECK-NEXT:      SCEV assumptions:
482; CHECK-NEXT:      {%j,+,(zext i16 %N to i32)}<nw><%loop> Added Flags: <nssw>
483; CHECK-EMPTY:
484; CHECK-NEXT:      Expressions re-written:
485; CHECK-NEXT:      [PSE] %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add:
486; CHECK-NEXT:        ((2 * (sext i32 {%j,+,(zext i16 %N to i32)}<nw><%loop> to i64))<nsw> + %B)
487; CHECK-NEXT:        --> {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (zext i16 %N to i64))<nuw><nsw>}<%loop>
488;
489entry:
490  %N.ext = zext i16 %N to i32
491  %cmp = icmp eq i16 %N, 0
492  br i1 %cmp, label %exit, label %loop
493
494loop:
495  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
496  %mul = mul nuw i32 %iv, %N.ext
497  %add = add i32 %mul, %j
498  %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
499  %load = load i16, ptr %arrayidx
500  %sext = sext i16 %load to i32
501  store i32 %sext, ptr %A
502  %iv.next = add nuw nsw i32 %iv, 1
503  %exitcond = icmp eq i32 %iv.next, %N.ext
504  br i1 %exitcond, label %exit, label %loop
505
506exit:
507  ret void
508}
509
510; Check the scenario where we have an unknown Stride, which happens to also be
511; the loop iteration count, but the TC is sign-extended from a narrower type.
512define void @unknown_stride_equalto_sext_tc(i16 %N, ptr %A, ptr %B, i32 %j) {
513; CHECK-LABEL: 'unknown_stride_equalto_sext_tc'
514; CHECK-NEXT:    loop:
515; CHECK-NEXT:      Memory dependences are safe with run-time checks
516; CHECK-NEXT:      Dependences:
517; CHECK-NEXT:      Run-time memory checks:
518; CHECK-NEXT:      Check 0:
519; CHECK-NEXT:        Comparing group ([[GRP9:0x[0-9a-f]+]]):
520; CHECK-NEXT:        ptr %A
521; CHECK-NEXT:        Against group ([[GRP10:0x[0-9a-f]+]]):
522; CHECK-NEXT:          %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
523; CHECK-NEXT:      Grouped accesses:
524; CHECK-NEXT:        Group [[GRP9]]:
525; CHECK-NEXT:          (Low: %A High: (4 + %A))
526; CHECK-NEXT:            Member: %A
527; CHECK-NEXT:        Group [[GRP10]]:
528; CHECK-NEXT:          (Low: (((2 * (sext i32 %j to i64))<nsw> + %B) umin ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (sext i16 %N to i32))<nsw> to i64) * (sext i16 %N to i64)) + %B)) High: (2 + (((2 * (sext i32 %j to i64))<nsw> + %B) umax ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (sext i16 %N to i32))<nsw> to i64) * (sext i16 %N to i64)) + %B))))
529; CHECK-NEXT:            Member: {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i16 %N to i64))<nsw>}<%loop>
530; CHECK-EMPTY:
531; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
532; CHECK-NEXT:      SCEV assumptions:
533; CHECK-NEXT:      {%j,+,(sext i16 %N to i32)}<nw><%loop> Added Flags: <nssw>
534; CHECK-EMPTY:
535; CHECK-NEXT:      Expressions re-written:
536; CHECK-NEXT:      [PSE] %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add:
537; CHECK-NEXT:        ((2 * (sext i32 {%j,+,(sext i16 %N to i32)}<nw><%loop> to i64))<nsw> + %B)
538; CHECK-NEXT:        --> {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i16 %N to i64))<nsw>}<%loop>
539;
540entry:
541  %N.ext = sext i16 %N to i32
542  %cmp = icmp eq i16 %N, 0
543  br i1 %cmp, label %exit, label %loop
544
545loop:
546  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
547  %mul = mul nuw i32 %iv, %N.ext
548  %add = add i32 %mul, %j
549  %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
550  %load = load i16, ptr %arrayidx
551  %sext = sext i16 %load to i32
552  store i32 %sext, ptr %A
553  %iv.next = add nuw nsw i32 %iv, 1
554  %exitcond = icmp eq i32 %iv.next, %N.ext
555  br i1 %exitcond, label %exit, label %loop
556
557exit:
558  ret void
559}
560
561; Check the scenario where we have an unknown Stride, which happens to also be
562; the loop iteration count, but the TC is truncated from a wider type.
563define void @unknown_stride_equalto_trunc_tc(i64 %N, ptr %A, ptr %B, i32 %j) {
564; CHECK-LABEL: 'unknown_stride_equalto_trunc_tc'
565; CHECK-NEXT:    loop:
566; CHECK-NEXT:      Memory dependences are safe with run-time checks
567; CHECK-NEXT:      Dependences:
568; CHECK-NEXT:      Run-time memory checks:
569; CHECK-NEXT:      Check 0:
570; CHECK-NEXT:        Comparing group ([[GRP11:0x[0-9a-f]+]]):
571; CHECK-NEXT:        ptr %A
572; CHECK-NEXT:        Against group ([[GRP12:0x[0-9a-f]+]]):
573; CHECK-NEXT:          %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
574; CHECK-NEXT:      Grouped accesses:
575; CHECK-NEXT:        Group [[GRP11]]:
576; CHECK-NEXT:          (Low: %A High: (4 + %A))
577; CHECK-NEXT:            Member: %A
578; CHECK-NEXT:        Group [[GRP12]]:
579; CHECK-NEXT:          (Low: (((2 * (sext i32 %j to i64))<nsw> + %B) umin ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (trunc i64 %N to i32)) to i64) * (sext i32 (trunc i64 %N to i32) to i64)) + %B)) High: (2 + (((2 * (sext i32 %j to i64))<nsw> + %B) umax ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (trunc i64 %N to i32)) to i64) * (sext i32 (trunc i64 %N to i32) to i64)) + %B))))
580; CHECK-NEXT:            Member: {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i32 (trunc i64 %N to i32) to i64))<nsw>}<%loop>
581; CHECK-EMPTY:
582; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
583; CHECK-NEXT:      SCEV assumptions:
584; CHECK-NEXT:      {%j,+,(trunc i64 %N to i32)}<nw><%loop> Added Flags: <nssw>
585; CHECK-EMPTY:
586; CHECK-NEXT:      Expressions re-written:
587; CHECK-NEXT:      [PSE] %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add:
588; CHECK-NEXT:        ((2 * (sext i32 {%j,+,(trunc i64 %N to i32)}<nw><%loop> to i64))<nsw> + %B)
589; CHECK-NEXT:        --> {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i32 (trunc i64 %N to i32) to i64))<nsw>}<%loop>
590;
591entry:
592  %N.trunc = trunc i64 %N to i32
593  %cmp = icmp eq i64 %N, 0
594  br i1 %cmp, label %exit, label %loop
595
596loop:
597  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
598  %mul = mul nuw i32 %iv, %N.trunc
599  %add = add i32 %mul, %j
600  %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
601  %load = load i16, ptr %arrayidx
602  %sext = sext i16 %load to i32
603  store i32 %sext, ptr %A
604  %iv.next = add nuw nsw i32 %iv, 1
605  %exitcond = icmp eq i32 %iv.next, %N.trunc
606  br i1 %exitcond, label %exit, label %loop
607
608exit:
609  ret void
610}
611