xref: /llvm-project/llvm/test/Analysis/ScalarEvolution/trip-count-scalable-stride.ll (revision b812e57ac301c7f88171cc73e213b70078727b16)
1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
2; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
3
4target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
5
6define void @vscale_slt(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
7; CHECK-LABEL: 'vscale_slt'
8; CHECK-NEXT:  Classifying expressions for: @vscale_slt
9; CHECK-NEXT:    %vscale = call i32 @llvm.vscale.i32()
10; CHECK-NEXT:    --> vscale U: [2,1025) S: [2,1025)
11; CHECK-NEXT:    %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
12; CHECK-NEXT:    --> {0,+,vscale}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (vscale * ((-1 + %n) /u vscale))<nuw> LoopDispositions: { %for.body: Computable }
13; CHECK-NEXT:    %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
14; CHECK-NEXT:    --> {%A,+,(4 * vscale)<nuw><nsw>}<nuw><%for.body> U: full-set S: full-set Exits: ((4 * vscale * ((-1 + %n) /u vscale)) + %A) LoopDispositions: { %for.body: Computable }
15; CHECK-NEXT:    %add = add nsw i32 %i.05, %vscale
16; CHECK-NEXT:    --> {vscale,+,vscale}<nuw><nsw><%for.body> U: [2,-2147483648) S: [2,-2147483648) Exits: (vscale * (1 + ((-1 + %n) /u vscale))<nuw>) LoopDispositions: { %for.body: Computable }
17; CHECK-NEXT:  Determining loop execution counts for: @vscale_slt
18; CHECK-NEXT:  Loop %for.body: backedge-taken count is ((-1 + %n) /u vscale)
19; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is i32 1073741822
20; CHECK-NEXT:  Loop %for.body: symbolic max backedge-taken count is ((-1 + %n) /u vscale)
21; CHECK-NEXT:  Loop %for.body: Trip multiple is 1
22;
23entry:
24  %vscale = call i32 @llvm.vscale.i32()
25  %cmp4 = icmp sgt i32 %n, 0
26  br i1 %cmp4, label %for.body, label %for.end
27
28for.body:                                         ; preds = %entry, %for.body
29  %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
30  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
31  %0 = load <vscale x 4 x i32>, ptr %arrayidx, align 4
32  %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1)
33  store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4
34  %add = add nsw i32 %i.05, %vscale
35  %cmp = icmp slt i32 %add, %n
36  br i1 %cmp, label %for.body, label %for.end
37
38for.end:                                          ; preds = %for.body, %entry
39  ret void
40}
41
42define void @vscale_ult(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
43; CHECK-LABEL: 'vscale_ult'
44; CHECK-NEXT:  Classifying expressions for: @vscale_ult
45; CHECK-NEXT:    %vscale = call i32 @llvm.vscale.i32()
46; CHECK-NEXT:    --> vscale U: [2,1025) S: [2,1025)
47; CHECK-NEXT:    %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
48; CHECK-NEXT:    --> {0,+,vscale}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (vscale * ((-1 + %n) /u vscale))<nuw> LoopDispositions: { %for.body: Computable }
49; CHECK-NEXT:    %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
50; CHECK-NEXT:    --> {%A,+,(4 * vscale)<nuw><nsw>}<nuw><%for.body> U: full-set S: full-set Exits: ((4 * vscale * ((-1 + %n) /u vscale)) + %A) LoopDispositions: { %for.body: Computable }
51; CHECK-NEXT:    %add = add nsw i32 %i.05, %vscale
52; CHECK-NEXT:    --> {vscale,+,vscale}<nuw><nsw><%for.body> U: [2,-2147483648) S: [2,-2147483648) Exits: (vscale * (1 + ((-1 + %n) /u vscale))<nuw>) LoopDispositions: { %for.body: Computable }
53; CHECK-NEXT:  Determining loop execution counts for: @vscale_ult
54; CHECK-NEXT:  Loop %for.body: backedge-taken count is ((-1 + %n) /u vscale)
55; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is i32 2147483646
56; CHECK-NEXT:  Loop %for.body: symbolic max backedge-taken count is ((-1 + %n) /u vscale)
57; CHECK-NEXT:  Loop %for.body: Trip multiple is 1
58;
59entry:
60  %vscale = call i32 @llvm.vscale.i32()
61  %cmp4 = icmp sgt i32 %n, 0
62  br i1 %cmp4, label %for.body, label %for.end
63
64for.body:                                         ; preds = %entry, %for.body
65  %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
66  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
67  %0 = load <vscale x 4 x i32>, ptr %arrayidx, align 4
68  %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1)
69  store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4
70  %add = add nsw i32 %i.05, %vscale
71  %cmp = icmp ult i32 %add, %n
72  br i1 %cmp, label %for.body, label %for.end
73
74for.end:                                          ; preds = %for.body, %entry
75  ret void
76}
77
78define void @vscale_ule(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
79; CHECK-LABEL: 'vscale_ule'
80; CHECK-NEXT:  Classifying expressions for: @vscale_ule
81; CHECK-NEXT:    %vscale = call i32 @llvm.vscale.i32()
82; CHECK-NEXT:    --> vscale U: [2,1025) S: [2,1025)
83; CHECK-NEXT:    %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
84; CHECK-NEXT:    --> {0,+,vscale}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (vscale * (((-1 + vscale)<nsw> umax %n) /u vscale))<nuw> LoopDispositions: { %for.body: Computable }
85; CHECK-NEXT:    %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
86; CHECK-NEXT:    --> {%A,+,(4 * vscale)<nuw><nsw>}<nuw><%for.body> U: full-set S: full-set Exits: ((4 * vscale * (((-1 + vscale)<nsw> umax %n) /u vscale)) + %A) LoopDispositions: { %for.body: Computable }
87; CHECK-NEXT:    %add = add nsw i32 %i.05, %vscale
88; CHECK-NEXT:    --> {vscale,+,vscale}<nuw><nsw><%for.body> U: [2,-2147483648) S: [2,-2147483648) Exits: (vscale * (1 + (((-1 + vscale)<nsw> umax %n) /u vscale))<nuw>) LoopDispositions: { %for.body: Computable }
89; CHECK-NEXT:  Determining loop execution counts for: @vscale_ule
90; CHECK-NEXT:  Loop %for.body: backedge-taken count is (((-1 + vscale)<nsw> umax %n) /u vscale)
91; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is i32 2147483647
92; CHECK-NEXT:  Loop %for.body: symbolic max backedge-taken count is (((-1 + vscale)<nsw> umax %n) /u vscale)
93; CHECK-NEXT:  Loop %for.body: Trip multiple is 1
94;
95entry:
96  %vscale = call i32 @llvm.vscale.i32()
97  %cmp4 = icmp sgt i32 %n, 0
98  br i1 %cmp4, label %for.body, label %for.end
99
100for.body:                                         ; preds = %entry, %for.body
101  %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
102  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
103  %0 = load <vscale x 4 x i32>, ptr %arrayidx, align 4
104  %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1)
105  store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4
106  %add = add nsw i32 %i.05, %vscale
107  %cmp = icmp ule i32 %add, %n
108  br i1 %cmp, label %for.body, label %for.end
109
110for.end:                                          ; preds = %for.body, %entry
111  ret void
112}
113
114define void @vscale_ne(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
115; CHECK-LABEL: 'vscale_ne'
116; CHECK-NEXT:  Classifying expressions for: @vscale_ne
117; CHECK-NEXT:    %vscale = call i32 @llvm.vscale.i32()
118; CHECK-NEXT:    --> vscale U: [2,1025) S: [2,1025)
119; CHECK-NEXT:    %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
120; CHECK-NEXT:    --> {0,+,vscale}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (vscale * (((-1 * vscale)<nsw> + %n) /u vscale))<nuw> LoopDispositions: { %for.body: Computable }
121; CHECK-NEXT:    %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
122; CHECK-NEXT:    --> {%A,+,(4 * vscale)<nuw><nsw>}<nuw><%for.body> U: full-set S: full-set Exits: ((4 * vscale * (((-1 * vscale)<nsw> + %n) /u vscale)) + %A) LoopDispositions: { %for.body: Computable }
123; CHECK-NEXT:    %add = add nsw i32 %i.05, %vscale
124; CHECK-NEXT:    --> {vscale,+,vscale}<nuw><nsw><%for.body> U: [2,-2147483648) S: [2,-2147483648) Exits: (vscale * (1 + (((-1 * vscale)<nsw> + %n) /u vscale))<nuw>) LoopDispositions: { %for.body: Computable }
125; CHECK-NEXT:  Determining loop execution counts for: @vscale_ne
126; CHECK-NEXT:  Loop %for.body: backedge-taken count is (((-1 * vscale)<nsw> + %n) /u vscale)
127; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is i32 2147483647
128; CHECK-NEXT:  Loop %for.body: symbolic max backedge-taken count is (((-1 * vscale)<nsw> + %n) /u vscale)
129; CHECK-NEXT:  Loop %for.body: Trip multiple is 1
130;
131entry:
132  %vscale = call i32 @llvm.vscale.i32()
133  %cmp4 = icmp sgt i32 %n, 0
134  br i1 %cmp4, label %for.body, label %for.end
135
136for.body:                                         ; preds = %entry, %for.body
137  %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
138  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
139  %0 = load <vscale x 4 x i32>, ptr %arrayidx, align 4
140  %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1)
141  store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4
142  %add = add nsw i32 %i.05, %vscale
143  %cmp = icmp ne i32 %add, %n
144  br i1 %cmp, label %for.body, label %for.end
145
146for.end:                                          ; preds = %for.body, %entry
147  ret void
148}
149
150
151define void @vscalex4_slt(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
152; CHECK-LABEL: 'vscalex4_slt'
153; CHECK-NEXT:  Classifying expressions for: @vscalex4_slt
154; CHECK-NEXT:    %vscale = call i32 @llvm.vscale.i32()
155; CHECK-NEXT:    --> vscale U: [2,1025) S: [2,1025)
156; CHECK-NEXT:    %VF = mul i32 %vscale, 4
157; CHECK-NEXT:    --> (4 * vscale)<nuw><nsw> U: [8,4097) S: [8,4097)
158; CHECK-NEXT:    %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
159; CHECK-NEXT:    --> {0,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,2147483645) Exits: (4 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) LoopDispositions: { %for.body: Computable }
160; CHECK-NEXT:    %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
161; CHECK-NEXT:    --> {%A,+,(16 * vscale)<nuw><nsw>}<nuw><%for.body> U: full-set S: full-set Exits: ((16 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %A) LoopDispositions: { %for.body: Computable }
162; CHECK-NEXT:    %add = add nsw i32 %i.05, %VF
163; CHECK-NEXT:    --> {(4 * vscale)<nuw><nsw>,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [8,-2147483648) S: [8,2147483645) Exits: (vscale * (4 + (4 * ((-1 + %n) /u (4 * vscale)<nuw><nsw>))<nuw><nsw>)<nuw>) LoopDispositions: { %for.body: Computable }
164; CHECK-NEXT:  Determining loop execution counts for: @vscalex4_slt
165; CHECK-NEXT:  Loop %for.body: backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>)
166; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is i32 268435454
167; CHECK-NEXT:  Loop %for.body: symbolic max backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>)
168; CHECK-NEXT:  Loop %for.body: Trip multiple is 1
169;
170entry:
171  %vscale = call i32 @llvm.vscale.i32()
172  %VF = mul i32 %vscale, 4
173  %cmp4 = icmp sgt i32 %n, 0
174  br i1 %cmp4, label %for.body, label %for.end
175
176for.body:                                         ; preds = %entry, %for.body
177  %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
178  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
179  %0 = load <vscale x 4 x i32>, ptr %arrayidx, align 4
180  %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1)
181  store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4
182  %add = add nsw i32 %i.05, %VF
183  %cmp = icmp slt i32 %add, %n
184  br i1 %cmp, label %for.body, label %for.end
185
186for.end:                                          ; preds = %for.body, %entry
187  ret void
188}
189
190define void @vscalex4_ult(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
191; CHECK-LABEL: 'vscalex4_ult'
192; CHECK-NEXT:  Classifying expressions for: @vscalex4_ult
193; CHECK-NEXT:    %vscale = call i32 @llvm.vscale.i32()
194; CHECK-NEXT:    --> vscale U: [2,1025) S: [2,1025)
195; CHECK-NEXT:    %VF = mul i32 %vscale, 4
196; CHECK-NEXT:    --> (4 * vscale)<nuw><nsw> U: [8,4097) S: [8,4097)
197; CHECK-NEXT:    %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
198; CHECK-NEXT:    --> {0,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,2147483645) Exits: (4 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) LoopDispositions: { %for.body: Computable }
199; CHECK-NEXT:    %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
200; CHECK-NEXT:    --> {%A,+,(16 * vscale)<nuw><nsw>}<nuw><%for.body> U: full-set S: full-set Exits: ((16 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %A) LoopDispositions: { %for.body: Computable }
201; CHECK-NEXT:    %add = add nsw i32 %i.05, %VF
202; CHECK-NEXT:    --> {(4 * vscale)<nuw><nsw>,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [8,-2147483648) S: [8,2147483645) Exits: (vscale * (4 + (4 * ((-1 + %n) /u (4 * vscale)<nuw><nsw>))<nuw><nsw>)<nuw>) LoopDispositions: { %for.body: Computable }
203; CHECK-NEXT:  Determining loop execution counts for: @vscalex4_ult
204; CHECK-NEXT:  Loop %for.body: backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>)
205; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is i32 536870910
206; CHECK-NEXT:  Loop %for.body: symbolic max backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>)
207; CHECK-NEXT:  Loop %for.body: Trip multiple is 1
208;
209entry:
210  %vscale = call i32 @llvm.vscale.i32()
211  %VF = mul i32 %vscale, 4
212  %cmp4 = icmp sgt i32 %n, 0
213  br i1 %cmp4, label %for.body, label %for.end
214
215for.body:                                         ; preds = %entry, %for.body
216  %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
217  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
218  %0 = load <vscale x 4 x i32>, ptr %arrayidx, align 4
219  %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1)
220  store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4
221  %add = add nsw i32 %i.05, %VF
222  %cmp = icmp ult i32 %add, %n
223  br i1 %cmp, label %for.body, label %for.end
224
225for.end:                                          ; preds = %for.body, %entry
226  ret void
227}
228
229
230define void @vscale_slt_with_vp_plain(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
231; CHECK-LABEL: 'vscale_slt_with_vp_plain'
232; CHECK-NEXT:  Classifying expressions for: @vscale_slt_with_vp_plain
233; CHECK-NEXT:    %vscale = call i32 @llvm.vscale.i32()
234; CHECK-NEXT:    --> vscale U: [2,1025) S: [2,1025)
235; CHECK-NEXT:    %VF = mul i32 %vscale, 4
236; CHECK-NEXT:    --> (4 * vscale)<nuw><nsw> U: [8,4097) S: [8,4097)
237; CHECK-NEXT:    %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
238; CHECK-NEXT:    --> {0,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,2147483645) Exits: (4 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) LoopDispositions: { %for.body: Computable }
239; CHECK-NEXT:    %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
240; CHECK-NEXT:    --> {%A,+,(16 * vscale)<nuw><nsw>}<%for.body> U: full-set S: full-set Exits: ((16 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %A) LoopDispositions: { %for.body: Computable }
241; CHECK-NEXT:    %add = add nsw i32 %i.05, %VF
242; CHECK-NEXT:    --> {(4 * vscale)<nuw><nsw>,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [8,-2147483648) S: [8,2147483645) Exits: (vscale * (4 + (4 * ((-1 + %n) /u (4 * vscale)<nuw><nsw>))<nuw><nsw>)<nuw>) LoopDispositions: { %for.body: Computable }
243; CHECK-NEXT:  Determining loop execution counts for: @vscale_slt_with_vp_plain
244; CHECK-NEXT:  Loop %for.body: backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>)
245; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is i32 268435454
246; CHECK-NEXT:  Loop %for.body: symbolic max backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>)
247; CHECK-NEXT:  Loop %for.body: Trip multiple is 1
248;
249entry:
250  %vscale = call i32 @llvm.vscale.i32()
251  %VF = mul i32 %vscale, 4
252  %cmp4 = icmp sgt i32 %n, 0
253  br i1 %cmp4, label %for.body, label %for.end
254
255for.body:                                         ; preds = %entry, %for.body
256  %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
257  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
258
259  %0 = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 %arrayidx, <vscale x 4 x i1> splat (i1 true), i32 %VF)
260  %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1)
261  call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> %inc, ptr align 4 %arrayidx, <vscale x 4 x i1> splat (i1 true), i32 %VF)
262
263  %add = add nsw i32 %i.05, %VF
264  %cmp = icmp slt i32 %add, %n
265  br i1 %cmp, label %for.body, label %for.end
266
267for.end:                                          ; preds = %for.body, %entry
268  ret void
269}
270
271define void @vscale_slt_with_vp_umin(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
272; CHECK-LABEL: 'vscale_slt_with_vp_umin'
273; CHECK-NEXT:  Classifying expressions for: @vscale_slt_with_vp_umin
274; CHECK-NEXT:    %vscale = call i32 @llvm.vscale.i32()
275; CHECK-NEXT:    --> vscale U: [2,1025) S: [2,1025)
276; CHECK-NEXT:    %VF = mul i32 %vscale, 4
277; CHECK-NEXT:    --> (4 * vscale)<nuw><nsw> U: [8,4097) S: [8,4097)
278; CHECK-NEXT:    %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
279; CHECK-NEXT:    --> {0,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,2147483645) Exits: (4 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) LoopDispositions: { %for.body: Computable }
280; CHECK-NEXT:    %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
281; CHECK-NEXT:    --> {%A,+,(16 * vscale)<nuw><nsw>}<%for.body> U: full-set S: full-set Exits: ((16 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %A) LoopDispositions: { %for.body: Computable }
282; CHECK-NEXT:    %left = sub i32 %n, %i.05
283; CHECK-NEXT:    --> {%n,+,(-4 * vscale)<nsw>}<nw><%for.body> U: full-set S: full-set Exits: ((-4 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %n) LoopDispositions: { %for.body: Computable }
284; CHECK-NEXT:    %VF.capped = call i32 @llvm.umin.i32(i32 %VF, i32 %left)
285; CHECK-NEXT:    --> ((4 * vscale)<nuw><nsw> umin {%n,+,(-4 * vscale)<nsw>}<nw><%for.body>) U: [0,4097) S: [0,4097) Exits: (((-4 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %n) umin (4 * vscale)<nuw><nsw>) LoopDispositions: { %for.body: Computable }
286; CHECK-NEXT:    %add = add nsw i32 %i.05, %VF
287; CHECK-NEXT:    --> {(4 * vscale)<nuw><nsw>,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [8,-2147483648) S: [8,2147483645) Exits: (vscale * (4 + (4 * ((-1 + %n) /u (4 * vscale)<nuw><nsw>))<nuw><nsw>)<nuw>) LoopDispositions: { %for.body: Computable }
288; CHECK-NEXT:  Determining loop execution counts for: @vscale_slt_with_vp_umin
289; CHECK-NEXT:  Loop %for.body: backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>)
290; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is i32 268435454
291; CHECK-NEXT:  Loop %for.body: symbolic max backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>)
292; CHECK-NEXT:  Loop %for.body: Trip multiple is 1
293;
294entry:
295  %vscale = call i32 @llvm.vscale.i32()
296  %VF = mul i32 %vscale, 4
297  %cmp4 = icmp sgt i32 %n, 0
298  br i1 %cmp4, label %for.body, label %for.end
299
300for.body:                                         ; preds = %entry, %for.body
301  %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
302  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
303
304  %left = sub i32 %n, %i.05
305  %VF.capped = call i32 @llvm.umin(i32 %VF, i32 %left)
306
307  %0 = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 %arrayidx, <vscale x 4 x i1> splat (i1 true), i32 %VF.capped)
308  %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1)
309  call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> %inc, ptr align 4 %arrayidx, <vscale x 4 x i1> splat (i1 true), i32 %VF.capped)
310
311  %add = add nsw i32 %i.05, %VF
312  %cmp = icmp slt i32 %add, %n
313  br i1 %cmp, label %for.body, label %for.end
314
315for.end:                                          ; preds = %for.body, %entry
316  ret void
317}
318
319define void @vscale_slt_with_vp_umin2(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
320; CHECK-LABEL: 'vscale_slt_with_vp_umin2'
321; CHECK-NEXT:  Classifying expressions for: @vscale_slt_with_vp_umin2
322; CHECK-NEXT:    %vscale = call i32 @llvm.vscale.i32()
323; CHECK-NEXT:    --> vscale U: [2,1025) S: [2,1025)
324; CHECK-NEXT:    %VF = mul i32 %vscale, 4
325; CHECK-NEXT:    --> (4 * vscale)<nuw><nsw> U: [8,4097) S: [8,4097)
326; CHECK-NEXT:    %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
327; CHECK-NEXT:    --> %i.05 U: [0,-2147483648) S: [0,-2147483648) Exits: <<Unknown>> LoopDispositions: { %for.body: Variant }
328; CHECK-NEXT:    %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
329; CHECK-NEXT:    --> ((4 * %i.05) + %A) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.body: Variant }
330; CHECK-NEXT:    %left = sub i32 %n, %i.05
331; CHECK-NEXT:    --> ((-1 * %i.05)<nsw> + %n) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.body: Variant }
332; CHECK-NEXT:    %VF.capped = call i32 @llvm.umin.i32(i32 %VF, i32 %left)
333; CHECK-NEXT:    --> (((-1 * %i.05)<nsw> + %n) umin (4 * vscale)<nuw><nsw>) U: [0,4097) S: [0,4097) Exits: <<Unknown>> LoopDispositions: { %for.body: Variant }
334; CHECK-NEXT:    %add = add nsw i32 %i.05, %VF.capped
335; CHECK-NEXT:    --> ((((-1 * %i.05)<nsw> + %n) umin (4 * vscale)<nuw><nsw>) + %i.05)<nuw><nsw> U: [0,-2147483648) S: [0,-2147483648) Exits: <<Unknown>> LoopDispositions: { %for.body: Variant }
336; CHECK-NEXT:  Determining loop execution counts for: @vscale_slt_with_vp_umin2
337; CHECK-NEXT:  Loop %for.body: Unpredictable backedge-taken count.
338; CHECK-NEXT:  Loop %for.body: Unpredictable constant max backedge-taken count.
339; CHECK-NEXT:  Loop %for.body: Unpredictable symbolic max backedge-taken count.
340;
341entry:
342  %vscale = call i32 @llvm.vscale.i32()
343  %VF = mul i32 %vscale, 4
344  %cmp4 = icmp sgt i32 %n, 0
345  br i1 %cmp4, label %for.body, label %for.end
346
347for.body:                                         ; preds = %entry, %for.body
348  %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
349  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
350
351  %left = sub i32 %n, %i.05
352  %VF.capped = call i32 @llvm.umin(i32 %VF, i32 %left)
353
354  %0 = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 %arrayidx, <vscale x 4 x i1> splat (i1 true), i32 %VF.capped)
355  %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1)
356  call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> %inc, ptr align 4 %arrayidx, <vscale x 4 x i1> splat (i1 true), i32 %VF.capped)
357
358  %add = add nsw i32 %i.05, %VF.capped
359  %cmp = icmp slt i32 %add, %n
360  br i1 %cmp, label %for.body, label %for.end
361
362for.end:                                          ; preds = %for.body, %entry
363  ret void
364}
365
366; The next two cases check to see if we can infer the flags on the IV
367; of a countup loop using vscale strides.  vscale is a power of two
368; and these are finite loops by assumption.
369
370define void @vscale_slt_noflags(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
371; CHECK-LABEL: 'vscale_slt_noflags'
372; CHECK-NEXT:  Classifying expressions for: @vscale_slt_noflags
373; CHECK-NEXT:    %vscale = call i32 @llvm.vscale.i32()
374; CHECK-NEXT:    --> vscale U: [2,1025) S: [2,1025)
375; CHECK-NEXT:    %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
376; CHECK-NEXT:    --> {0,+,vscale}<%for.body> U: full-set S: full-set Exits: (vscale * ((-1 + %n) /u vscale))<nuw> LoopDispositions: { %for.body: Computable }
377; CHECK-NEXT:    %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
378; CHECK-NEXT:    --> {%A,+,(4 * vscale)<nuw><nsw>}<%for.body> U: full-set S: full-set Exits: ((4 * vscale * ((-1 + %n) /u vscale)) + %A) LoopDispositions: { %for.body: Computable }
379; CHECK-NEXT:    %add = add i32 %i.05, %vscale
380; CHECK-NEXT:    --> {vscale,+,vscale}<nuw><nsw><%for.body> U: [2,-2147483648) S: [2,-2147483648) Exits: (vscale * (1 + ((-1 + %n) /u vscale))<nuw>) LoopDispositions: { %for.body: Computable }
381; CHECK-NEXT:  Determining loop execution counts for: @vscale_slt_noflags
382; CHECK-NEXT:  Loop %for.body: backedge-taken count is ((-1 + %n) /u vscale)
383; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is i32 1073741822
384; CHECK-NEXT:  Loop %for.body: symbolic max backedge-taken count is ((-1 + %n) /u vscale)
385; CHECK-NEXT:  Loop %for.body: Trip multiple is 1
386;
387entry:
388  %vscale = call i32 @llvm.vscale.i32()
389  %cmp4 = icmp sgt i32 %n, 0
390  br i1 %cmp4, label %for.body, label %for.end
391
392for.body:                                         ; preds = %entry, %for.body
393  %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
394  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
395  %0 = load <vscale x 4 x i32>, ptr %arrayidx, align 4
396  %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1)
397  store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4
398  %add = add i32 %i.05, %vscale
399  %cmp = icmp slt i32 %add, %n
400  br i1 %cmp, label %for.body, label %for.end
401
402for.end:                                          ; preds = %for.body, %entry
403  ret void
404}
405
406define void @vscalex4_ult_noflags(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
407; CHECK-LABEL: 'vscalex4_ult_noflags'
408; CHECK-NEXT:  Classifying expressions for: @vscalex4_ult_noflags
409; CHECK-NEXT:    %vscale = call i32 @llvm.vscale.i32()
410; CHECK-NEXT:    --> vscale U: [2,1025) S: [2,1025)
411; CHECK-NEXT:    %VF = mul i32 %vscale, 4
412; CHECK-NEXT:    --> (4 * vscale)<nuw><nsw> U: [8,4097) S: [8,4097)
413; CHECK-NEXT:    %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
414; CHECK-NEXT:    --> {0,+,(4 * vscale)<nuw><nsw>}<%for.body> U: [0,-3) S: [-2147483648,2147483645) Exits: (4 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) LoopDispositions: { %for.body: Computable }
415; CHECK-NEXT:    %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
416; CHECK-NEXT:    --> {%A,+,(16 * vscale)<nuw><nsw>}<%for.body> U: full-set S: full-set Exits: ((16 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %A) LoopDispositions: { %for.body: Computable }
417; CHECK-NEXT:    %add = add i32 %i.05, %VF
418; CHECK-NEXT:    --> {(4 * vscale)<nuw><nsw>,+,(4 * vscale)<nuw><nsw>}<nuw><%for.body> U: [8,-3) S: [-2147483648,2147483645) Exits: (vscale * (4 + (4 * ((-1 + %n) /u (4 * vscale)<nuw><nsw>))<nuw><nsw>)<nuw>) LoopDispositions: { %for.body: Computable }
419; CHECK-NEXT:  Determining loop execution counts for: @vscalex4_ult_noflags
420; CHECK-NEXT:  Loop %for.body: backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>)
421; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is i32 536870910
422; CHECK-NEXT:  Loop %for.body: symbolic max backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>)
423; CHECK-NEXT:  Loop %for.body: Trip multiple is 1
424;
425entry:
426  %vscale = call i32 @llvm.vscale.i32()
427  %VF = mul i32 %vscale, 4
428  %cmp4 = icmp sgt i32 %n, 0
429  br i1 %cmp4, label %for.body, label %for.end
430
431for.body:                                         ; preds = %entry, %for.body
432  %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
433  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
434  %0 = load <vscale x 4 x i32>, ptr %arrayidx, align 4
435  %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1)
436  store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4
437  %add = add i32 %i.05, %VF
438  %cmp = icmp ult i32 %add, %n
439  br i1 %cmp, label %for.body, label %for.end
440
441for.end:                                          ; preds = %for.body, %entry
442  ret void
443}
444
445; The next two cases check to see if we can infer the flags on the IV
446; of a countdown loop using vscale strides.
447
448define void @vscale_countdown_ne(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
449; CHECK-LABEL: 'vscale_countdown_ne'
450; CHECK-NEXT:  Classifying expressions for: @vscale_countdown_ne
451; CHECK-NEXT:    %vscale = call i32 @llvm.vscale.i32()
452; CHECK-NEXT:    --> vscale U: [2,1025) S: [2,1025)
453; CHECK-NEXT:    %start = sub i32 %n, %vscale
454; CHECK-NEXT:    --> ((-1 * vscale)<nsw> + %n) U: full-set S: full-set
455; CHECK-NEXT:    %iv = phi i32 [ %sub, %for.body ], [ %start, %entry ]
456; CHECK-NEXT:    --> {((-1 * vscale)<nsw> + %n),+,(-1 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: ((vscale * (-1 + (-1 * (((-2 * vscale)<nsw> + %n) /u vscale))<nsw>)<nsw>) + %n) LoopDispositions: { %for.body: Computable }
457; CHECK-NEXT:    %arrayidx = getelementptr inbounds i32, ptr %A, i32 %iv
458; CHECK-NEXT:    --> {((4 * %n) + (-4 * vscale)<nsw> + %A),+,(-4 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: ((4 * %n) + (vscale * (-4 + (-4 * (((-2 * vscale)<nsw> + %n) /u vscale)))) + %A) LoopDispositions: { %for.body: Computable }
459; CHECK-NEXT:    %sub = sub i32 %iv, %vscale
460; CHECK-NEXT:    --> {((-2 * vscale)<nsw> + %n),+,(-1 * vscale)<nsw>}<nw><%for.body> U: full-set S: full-set Exits: ((vscale * (-2 + (-1 * (((-2 * vscale)<nsw> + %n) /u vscale))<nsw>)) + %n) LoopDispositions: { %for.body: Computable }
461; CHECK-NEXT:  Determining loop execution counts for: @vscale_countdown_ne
462; CHECK-NEXT:  Loop %for.body: backedge-taken count is (((-2 * vscale)<nsw> + %n) /u vscale)
463; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is i32 2147483647
464; CHECK-NEXT:  Loop %for.body: symbolic max backedge-taken count is (((-2 * vscale)<nsw> + %n) /u vscale)
465; CHECK-NEXT:  Loop %for.body: Trip multiple is 1
466;
467entry:
468  %vscale = call i32 @llvm.vscale.i32()
469  %cmp4 = icmp sgt i32 %n, 0
470  %start = sub i32 %n, %vscale
471  br i1 %cmp4, label %for.body, label %for.end
472
473for.body:                                         ; preds = %entry, %for.body
474  %iv = phi i32 [ %sub, %for.body ], [ %start, %entry ]
475  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %iv
476  %ld = load <vscale x 4 x i32>, ptr %arrayidx, align 4
477  %inc = add nsw <vscale x 4 x i32> %ld, splat (i32 1)
478  store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4
479  %sub = sub i32 %iv, %vscale
480  %cmp = icmp ne i32 %sub, 0
481  br i1 %cmp, label %for.body, label %for.end
482
483for.end:                                          ; preds = %for.body, %entry
484  ret void
485}
486
487define void @vscalex4_countdown_ne(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
488; CHECK-LABEL: 'vscalex4_countdown_ne'
489; CHECK-NEXT:  Classifying expressions for: @vscalex4_countdown_ne
490; CHECK-NEXT:    %vscale = call i32 @llvm.vscale.i32()
491; CHECK-NEXT:    --> vscale U: [2,1025) S: [2,1025)
492; CHECK-NEXT:    %VF = shl i32 %vscale, 2
493; CHECK-NEXT:    --> (4 * vscale)<nuw><nsw> U: [8,4097) S: [8,4097)
494; CHECK-NEXT:    %start = sub i32 %n, %VF
495; CHECK-NEXT:    --> ((-4 * vscale)<nsw> + %n) U: full-set S: full-set
496; CHECK-NEXT:    %iv = phi i32 [ %sub, %for.body ], [ %start, %entry ]
497; CHECK-NEXT:    --> {((-4 * vscale)<nsw> + %n),+,(-4 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: ((vscale * (-4 + (-4 * (((-8 * vscale)<nsw> + %n) /u (4 * vscale)<nuw><nsw>))<nsw>)<nsw>) + %n) LoopDispositions: { %for.body: Computable }
498; CHECK-NEXT:    %arrayidx = getelementptr inbounds i32, ptr %A, i32 %iv
499; CHECK-NEXT:    --> {((4 * %n) + (-16 * vscale)<nsw> + %A),+,(-16 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: ((4 * %n) + (vscale * (-16 + (-16 * (((-8 * vscale)<nsw> + %n) /u (4 * vscale)<nuw><nsw>)))) + %A) LoopDispositions: { %for.body: Computable }
500; CHECK-NEXT:    %sub = sub i32 %iv, %VF
501; CHECK-NEXT:    --> {((-8 * vscale)<nsw> + %n),+,(-4 * vscale)<nsw>}<nw><%for.body> U: full-set S: full-set Exits: ((vscale * (-8 + (-4 * (((-8 * vscale)<nsw> + %n) /u (4 * vscale)<nuw><nsw>))<nsw>)) + %n) LoopDispositions: { %for.body: Computable }
502; CHECK-NEXT:  Determining loop execution counts for: @vscalex4_countdown_ne
503; CHECK-NEXT:  Loop %for.body: backedge-taken count is (((-8 * vscale)<nsw> + %n) /u (4 * vscale)<nuw><nsw>)
504; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is i32 536870911
505; CHECK-NEXT:  Loop %for.body: symbolic max backedge-taken count is (((-8 * vscale)<nsw> + %n) /u (4 * vscale)<nuw><nsw>)
506; CHECK-NEXT:  Loop %for.body: Trip multiple is 1
507;
508entry:
509  %vscale = call i32 @llvm.vscale.i32()
510  %VF = shl i32 %vscale, 2
511  %cmp4 = icmp sgt i32 %n, 0
512  %start = sub i32 %n, %VF
513  br i1 %cmp4, label %for.body, label %for.end
514
515for.body:                                         ; preds = %entry, %for.body
516  %iv = phi i32 [ %sub, %for.body ], [ %start, %entry ]
517  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %iv
518  %ld = load <vscale x 4 x i32>, ptr %arrayidx, align 4
519  %inc = add nsw <vscale x 4 x i32> %ld, splat (i32 1)
520  store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4
521  %sub = sub i32 %iv, %VF
522  %cmp = icmp ne i32 %sub, 0
523  br i1 %cmp, label %for.body, label %for.end
524
525for.end:                                          ; preds = %for.body, %entry
526  ret void
527}
528