xref: /llvm-project/llvm/test/Transforms/LoopVectorize/trunc-reductions.ll (revision 462cb3cd6cecd0511ecaf0e3ebcaba455ece587d)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -passes=loop-vectorize,dce,instcombine -force-vector-interleave=1 -force-vector-width=8 -S < %s | FileCheck %s
3
4define i8 @reduction_and_trunc(ptr noalias nocapture %ptr) {
5; CHECK-LABEL: @reduction_and_trunc(
6; CHECK-NEXT:  entry:
7; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
8; CHECK:       vector.ph:
9; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
10; CHECK:       vector.body:
11; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
12; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i8> [ <i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
13; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
14; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP0]]
15; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1
16; CHECK-NEXT:    [[TMP2]] = and <8 x i8> [[VEC_PHI]], [[WIDE_LOAD]]
17; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
18; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
19; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
20; CHECK:       middle.block:
21; CHECK-NEXT:    [[TMP4:%.*]] = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> [[TMP2]])
22; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
23; CHECK:       scalar.ph:
24; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
25; CHECK:       for.body:
26; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
27; CHECK:       for.end:
28; CHECK-NEXT:    [[AND_LCSSA_OFF0:%.*]] = phi i8 [ poison, [[FOR_BODY]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
29; CHECK-NEXT:    ret i8 [[AND_LCSSA_OFF0]]
30;
31entry:
32  br label %for.body
33
34for.body:
35  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
36  %sum.02p = phi i32 [ %and, %for.body ], [ 0, %entry ]
37  %sum.02 = and i32 %sum.02p, 255
38  %gep = getelementptr inbounds i8, ptr %ptr, i32 %iv
39  %load = load i8, ptr %gep
40  %ext = zext i8 %load to i32
41  %and = and i32 %sum.02, %ext
42  %iv.next = add i32 %iv, 1
43  %exitcond = icmp eq i32 %iv.next, 256
44  br i1 %exitcond, label %for.end, label %for.body
45
46for.end:
47  %ret = trunc i32 %and to i8
48  ret i8 %ret
49}
50
51define i16 @reduction_or_trunc(ptr noalias nocapture %ptr) {
52; CHECK-LABEL: @reduction_or_trunc(
53; CHECK-NEXT:  entry:
54; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
55; CHECK:       vector.ph:
56; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
57; CHECK:       vector.body:
58; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
59; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
60; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
61; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i64 [[TMP0]]
62; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP1]], align 2
63; CHECK-NEXT:    [[TMP2]] = or <8 x i16> [[VEC_PHI]], [[WIDE_LOAD]]
64; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
65; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
66; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
67; CHECK:       middle.block:
68; CHECK-NEXT:    [[TMP4:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP2]])
69; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
70; CHECK:       scalar.ph:
71; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
72; CHECK:       for.body:
73; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
74; CHECK:       for.end:
75; CHECK-NEXT:    [[XOR_LCSSA_OFF0:%.*]] = phi i16 [ poison, [[FOR_BODY]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
76; CHECK-NEXT:    ret i16 [[XOR_LCSSA_OFF0]]
77;
78entry:
79  br label %for.body
80
81for.body:
82  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
83  %sum.02p = phi i32 [ %xor, %for.body ], [ 0, %entry ]
84  %sum.02 = and i32 %sum.02p, 65535
85  %gep = getelementptr inbounds i16, ptr %ptr, i32 %iv
86  %load = load i16, ptr %gep
87  %ext = zext i16 %load to i32
88  %xor = or i32 %sum.02, %ext
89  %iv.next = add i32 %iv, 1
90  %exitcond = icmp eq i32 %iv.next, 256
91  br i1 %exitcond, label %for.end, label %for.body
92
93for.end:
94  %ret = trunc i32 %xor to i16
95  ret i16 %ret
96}
97
98define i16 @reduction_xor_trunc(ptr noalias nocapture %ptr) {
99; CHECK-LABEL: @reduction_xor_trunc(
100; CHECK-NEXT:  entry:
101; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
102; CHECK:       vector.ph:
103; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
104; CHECK:       vector.body:
105; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
106; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
107; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
108; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i64 [[TMP0]]
109; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP1]], align 2
110; CHECK-NEXT:    [[TMP2]] = xor <8 x i16> [[VEC_PHI]], [[WIDE_LOAD]]
111; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
112; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
113; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
114; CHECK:       middle.block:
115; CHECK-NEXT:    [[TMP4:%.*]] = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> [[TMP2]])
116; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
117; CHECK:       scalar.ph:
118; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
119; CHECK:       for.body:
120; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
121; CHECK:       for.end:
122; CHECK-NEXT:    [[XOR_LCSSA_OFF0:%.*]] = phi i16 [ poison, [[FOR_BODY]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
123; CHECK-NEXT:    ret i16 [[XOR_LCSSA_OFF0]]
124;
125entry:
126  br label %for.body
127
128for.body:
129  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
130  %sum.02p = phi i32 [ %xor, %for.body ], [ 0, %entry ]
131  %sum.02 = and i32 %sum.02p, 65535
132  %gep = getelementptr inbounds i16, ptr %ptr, i32 %iv
133  %load = load i16, ptr %gep
134  %ext = zext i16 %load to i32
135  %xor = xor i32 %sum.02, %ext
136  %iv.next = add i32 %iv, 1
137  %exitcond = icmp eq i32 %iv.next, 256
138  br i1 %exitcond, label %for.end, label %for.body
139
140for.end:
141  %ret = trunc i32 %xor to i16
142  ret i16 %ret
143}
144
145define i8 @reduction_smin_trunc(ptr noalias nocapture %ptr) {
146; CHECK-LABEL: @reduction_smin_trunc(
147; CHECK-NEXT:  entry:
148; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
149; CHECK:       for.body:
150; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
151; CHECK-NEXT:    [[SUM_02P:%.*]] = phi i32 [ [[MIN:%.*]], [[FOR_BODY]] ], [ 256, [[ENTRY]] ]
152; CHECK-NEXT:    [[SUM_02:%.*]] = and i32 [[SUM_02P]], 255
153; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[IV]] to i64
154; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP0]]
155; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
156; CHECK-NEXT:    [[EXT:%.*]] = sext i8 [[LOAD]] to i32
157; CHECK-NEXT:    [[MIN]] = call i32 @llvm.smin.i32(i32 [[SUM_02]], i32 [[EXT]])
158; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
159; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 256
160; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
161; CHECK:       for.end:
162; CHECK-NEXT:    [[RET:%.*]] = trunc i32 [[MIN]] to i8
163; CHECK-NEXT:    ret i8 [[RET]]
164;
165entry:
166  br label %for.body
167
168for.body:
169  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
170  %sum.02p = phi i32 [ %min, %for.body ], [ 256, %entry ]
171  %sum.02 = and i32 %sum.02p, 255
172  %gep = getelementptr inbounds i8, ptr %ptr, i32 %iv
173  %load = load i8, ptr %gep
174  %ext = sext i8 %load to i32
175  %icmp = icmp slt i32 %sum.02, %ext
176  %min = select i1 %icmp, i32 %sum.02, i32 %ext
177  %iv.next = add i32 %iv, 1
178  %exitcond = icmp eq i32 %iv.next, 256
179  br i1 %exitcond, label %for.end, label %for.body
180
181for.end:
182  %ret = trunc i32 %min to i8
183  ret i8 %ret
184}
185
186define i8 @reduction_umin_trunc(ptr noalias nocapture %ptr) {
187; CHECK-LABEL: @reduction_umin_trunc(
188; CHECK-NEXT:  entry:
189; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
190; CHECK:       for.body:
191; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
192; CHECK-NEXT:    [[SUM_02P:%.*]] = phi i32 [ [[MIN:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
193; CHECK-NEXT:    [[SUM_02:%.*]] = and i32 [[SUM_02P]], 255
194; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[IV]] to i64
195; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP0]]
196; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
197; CHECK-NEXT:    [[EXT:%.*]] = zext i8 [[LOAD]] to i32
198; CHECK-NEXT:    [[MIN]] = call i32 @llvm.umin.i32(i32 [[SUM_02]], i32 [[EXT]])
199; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
200; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 256
201; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
202; CHECK:       for.end:
203; CHECK-NEXT:    [[RET:%.*]] = trunc nuw i32 [[MIN]] to i8
204; CHECK-NEXT:    ret i8 [[RET]]
205;
206entry:
207  br label %for.body
208
209for.body:
210  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
211  %sum.02p = phi i32 [ %min, %for.body ], [ 0, %entry ]
212  %sum.02 = and i32 %sum.02p, 255
213  %gep = getelementptr inbounds i8, ptr %ptr, i32 %iv
214  %load = load i8, ptr %gep
215  %ext = zext i8 %load to i32
216  %icmp = icmp ult i32 %sum.02, %ext
217  %min = select i1 %icmp, i32 %sum.02, i32 %ext
218  %iv.next = add i32 %iv, 1
219  %exitcond = icmp eq i32 %iv.next, 256
220  br i1 %exitcond, label %for.end, label %for.body
221
222for.end:
223  %ret = trunc i32 %min to i8
224  ret i8 %ret
225}
226
227define i16 @reduction_smax_trunc(ptr noalias nocapture %ptr) {
228; CHECK-LABEL: @reduction_smax_trunc(
229; CHECK-NEXT:  entry:
230; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
231; CHECK:       for.body:
232; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
233; CHECK-NEXT:    [[SUM_02P:%.*]] = phi i32 [ [[MIN:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
234; CHECK-NEXT:    [[SUM_02:%.*]] = and i32 [[SUM_02P]], 65535
235; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[IV]] to i64
236; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i64 [[TMP0]]
237; CHECK-NEXT:    [[LOAD:%.*]] = load i16, ptr [[GEP]], align 2
238; CHECK-NEXT:    [[EXT:%.*]] = sext i16 [[LOAD]] to i32
239; CHECK-NEXT:    [[MIN]] = call i32 @llvm.smax.i32(i32 [[SUM_02]], i32 [[EXT]])
240; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
241; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 256
242; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
243; CHECK:       for.end:
244; CHECK-NEXT:    [[RET:%.*]] = trunc i32 [[MIN]] to i16
245; CHECK-NEXT:    ret i16 [[RET]]
246;
247entry:
248  br label %for.body
249
250for.body:
251  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
252  %sum.02p = phi i32 [ %min, %for.body ], [ 0, %entry ]
253  %sum.02 = and i32 %sum.02p, 65535
254  %gep = getelementptr inbounds i16, ptr %ptr, i32 %iv
255  %load = load i16, ptr %gep
256  %ext = sext i16 %load to i32
257  %icmp = icmp sgt i32 %sum.02, %ext
258  %min = select i1 %icmp, i32 %sum.02, i32 %ext
259  %iv.next = add i32 %iv, 1
260  %exitcond = icmp eq i32 %iv.next, 256
261  br i1 %exitcond, label %for.end, label %for.body
262
263for.end:
264  %ret = trunc i32 %min to i16
265  ret i16 %ret
266}
267
268define i16 @reduction_umax_trunc(ptr noalias nocapture %ptr) {
269; CHECK-LABEL: @reduction_umax_trunc(
270; CHECK-NEXT:  entry:
271; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
272; CHECK:       for.body:
273; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
274; CHECK-NEXT:    [[SUM_02P:%.*]] = phi i32 [ [[MIN:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
275; CHECK-NEXT:    [[SUM_02:%.*]] = and i32 [[SUM_02P]], 65535
276; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[IV]] to i64
277; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i64 [[TMP0]]
278; CHECK-NEXT:    [[LOAD:%.*]] = load i16, ptr [[GEP]], align 2
279; CHECK-NEXT:    [[EXT:%.*]] = zext i16 [[LOAD]] to i32
280; CHECK-NEXT:    [[MIN]] = call i32 @llvm.umax.i32(i32 [[SUM_02]], i32 [[EXT]])
281; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
282; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 256
283; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
284; CHECK:       for.end:
285; CHECK-NEXT:    [[RET:%.*]] = trunc nuw i32 [[MIN]] to i16
286; CHECK-NEXT:    ret i16 [[RET]]
287;
288entry:
289  br label %for.body
290
291for.body:
292  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
293  %sum.02p = phi i32 [ %min, %for.body ], [ 0, %entry ]
294  %sum.02 = and i32 %sum.02p, 65535
295  %gep = getelementptr inbounds i16, ptr %ptr, i32 %iv
296  %load = load i16, ptr %gep
297  %ext = zext i16 %load to i32
298  %icmp = icmp ugt i32 %sum.02, %ext
299  %min = select i1 %icmp, i32 %sum.02, i32 %ext
300  %iv.next = add i32 %iv, 1
301  %exitcond = icmp eq i32 %iv.next, 256
302  br i1 %exitcond, label %for.end, label %for.body
303
304for.end:
305  %ret = trunc i32 %min to i16
306  ret i16 %ret
307}
308
309; Test case for https://github.com/llvm/llvm-project/issues/81415.
310define i32 @reduction_and_or(i16 %a, i32 %b, ptr %src) {
311; CHECK-LABEL: @reduction_and_or(
312; CHECK-NEXT:  entry:
313; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
314; CHECK:       vector.ph:
315; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
316; CHECK:       vector.body:
317; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
318; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i32> [ <i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
319; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
320; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[TMP0]]
321; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP1]], align 4
322; CHECK-NEXT:    [[TMP2]] = or <8 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
323; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
324; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
325; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
326; CHECK:       middle.block:
327; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP2]])
328; CHECK-NEXT:    br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
329; CHECK:       scalar.ph:
330; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ poison, [[ENTRY:%.*]] ]
331; CHECK-NEXT:    br label [[LOOP:%.*]]
332; CHECK:       loop:
333; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 992, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
334; CHECK-NEXT:    [[OR67:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[OR:%.*]], [[LOOP]] ]
335; CHECK-NEXT:    [[TMP5:%.*]] = zext nneg i32 [[IV]] to i64
336; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[SRC]], i64 [[TMP5]]
337; CHECK-NEXT:    [[L:%.*]] = load i32, ptr [[GEP]], align 4
338; CHECK-NEXT:    [[OR]] = or i32 [[OR67]], [[L]]
339; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
340; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], 999
341; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
342; CHECK:       exit:
343; CHECK-NEXT:    [[OR_LCSSA:%.*]] = phi i32 [ [[OR]], [[LOOP]] ], [ poison, [[MIDDLE_BLOCK]] ]
344; CHECK-NEXT:    ret i32 [[OR_LCSSA]]
345;
346entry:
347  %ext1 = zext i16 %a to i32
348  br label %loop
349
350loop:
351  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
352  %or67 = phi i32 [ 10, %entry ], [ %or, %loop ]
353  %t = trunc i32 %b to i16
354  %ext = sext i16 %t to i32
355  %cmp = icmp sgt i32 %ext, %ext1
356  %ext2 = zext i1 %cmp to i32
357  %cmp3 = icmp sge i32 %iv, %ext2
358  %ext4 = zext i1 %cmp3 to i32
359  %div = sdiv i32 %ext4, %b
360  %and = and i32 %div, 0
361  %gep = getelementptr inbounds i32, ptr %src, i32 %iv
362  %l = load i32, ptr %gep
363  %add = add i32 %and, %l
364  %or = or i32 %or67, %add
365  %iv.next = add nsw i32 %iv, 1
366  %tobool.not = icmp eq i32 %iv.next, 999
367  br i1 %tobool.not, label %exit, label %loop
368
369exit:
370  %or.lcssa = phi i32 [ %or, %loop ]
371  ret i32 %or.lcssa
372}
373