xref: /llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll (revision f48884ded884d982a7fd13394b0e93e6588f4143)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -passes=loop-vectorize -riscv-v-vector-bits-min=128 -scalable-vectorization=on -force-target-instruction-cost=1 -S < %s | FileCheck %s
3
4target triple = "riscv64"
5
6define void @trip1_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 {
7; CHECK-LABEL: @trip1_i8(
8; CHECK-NEXT:  entry:
9; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
10; CHECK:       for.body:
11; CHECK-NEXT:    [[I_08:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
12; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[I_08]]
13; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
14; CHECK-NEXT:    [[MUL:%.*]] = shl i8 [[TMP0]], 1
15; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[I_08]]
16; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
17; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[MUL]], [[TMP1]]
18; CHECK-NEXT:    store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1
19; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_08]], 1
20; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 1
21; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
22; CHECK:       for.end:
23; CHECK-NEXT:    ret void
24;
25entry:
26  br label %for.body
27
28for.body:                                         ; preds = %entry, %for.body
29  %i.08 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
30  %arrayidx = getelementptr inbounds i8, ptr %src, i64 %i.08
31  %0 = load i8, ptr %arrayidx, align 1
32  %mul = shl i8 %0, 1
33  %arrayidx1 = getelementptr inbounds i8, ptr %dst, i64 %i.08
34  %1 = load i8, ptr %arrayidx1, align 1
35  %add = add i8 %mul, %1
36  store i8 %add, ptr %arrayidx1, align 1
37  %inc = add nuw nsw i64 %i.08, 1
38  %exitcond.not = icmp eq i64 %inc, 1
39  br i1 %exitcond.not, label %for.end, label %for.body
40
41for.end:                                          ; preds = %for.body
42  ret void
43}
44
45define void @trip3_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 {
46; CHECK-LABEL: @trip3_i8(
47; CHECK-NEXT:  entry:
48; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
49; CHECK:       vector.ph:
50; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
51; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 2
52; CHECK-NEXT:    [[TMP2:%.*]] = sub i64 [[TMP1]], 1
53; CHECK-NEXT:    [[N_RND_UP:%.*]] = add i64 3, [[TMP2]]
54; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
55; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
56; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
57; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP3]], 2
58; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
59; CHECK:       vector.body:
60; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 3)
61; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 0
62; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0
63; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[TMP9]], i32 1, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i8> poison)
64; CHECK-NEXT:    [[TMP10:%.*]] = shl <vscale x 2 x i8> [[WIDE_MASKED_LOAD]], splat (i8 1)
65; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST1:%.*]], i64 0
66; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
67; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[TMP12]], i32 1, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i8> poison)
68; CHECK-NEXT:    [[TMP13:%.*]] = add <vscale x 2 x i8> [[TMP10]], [[WIDE_MASKED_LOAD1]]
69; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
70; CHECK-NEXT:    call void @llvm.masked.store.nxv2i8.p0(<vscale x 2 x i8> [[TMP13]], ptr [[TMP14]], i32 1, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
71; CHECK-NEXT:    br label [[MIDDLE_BLOCK:%.*]]
72; CHECK:       middle.block:
73; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
74; CHECK:       scalar.ph:
75; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
76; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
77; CHECK:       for.body:
78; CHECK-NEXT:    [[I_08:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
79; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I_08]]
80; CHECK-NEXT:    [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
81; CHECK-NEXT:    [[MUL:%.*]] = shl i8 [[TMP15]], 1
82; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST1]], i64 [[I_08]]
83; CHECK-NEXT:    [[TMP16:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
84; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[MUL]], [[TMP16]]
85; CHECK-NEXT:    store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1
86; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_08]], 1
87; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 3
88; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
89; CHECK:       for.end:
90; CHECK-NEXT:    ret void
91;
92entry:
93  br label %for.body
94
95for.body:                                         ; preds = %entry, %for.body
96  %i.08 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
97  %arrayidx = getelementptr inbounds i8, ptr %src, i64 %i.08
98  %0 = load i8, ptr %arrayidx, align 1
99  %mul = shl i8 %0, 1
100  %arrayidx1 = getelementptr inbounds i8, ptr %dst, i64 %i.08
101  %1 = load i8, ptr %arrayidx1, align 1
102  %add = add i8 %mul, %1
103  store i8 %add, ptr %arrayidx1, align 1
104  %inc = add nuw nsw i64 %i.08, 1
105  %exitcond.not = icmp eq i64 %inc, 3
106  br i1 %exitcond.not, label %for.end, label %for.body
107
108for.end:                                          ; preds = %for.body
109  ret void
110}
111
112define void @trip5_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 {
113; CHECK-LABEL: @trip5_i8(
114; CHECK-NEXT:  entry:
115; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
116; CHECK:       vector.ph:
117; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
118; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
119; CHECK-NEXT:    [[TMP2:%.*]] = sub i64 [[TMP1]], 1
120; CHECK-NEXT:    [[N_RND_UP:%.*]] = add i64 5, [[TMP2]]
121; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
122; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
123; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
124; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP3]], 4
125; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
126; CHECK:       vector.body:
127; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 5)
128; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 0
129; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0
130; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP9]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison)
131; CHECK-NEXT:    [[TMP10:%.*]] = shl <vscale x 4 x i8> [[WIDE_MASKED_LOAD]], splat (i8 1)
132; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST1:%.*]], i64 0
133; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
134; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP12]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison)
135; CHECK-NEXT:    [[TMP13:%.*]] = add <vscale x 4 x i8> [[TMP10]], [[WIDE_MASKED_LOAD1]]
136; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
137; CHECK-NEXT:    call void @llvm.masked.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP13]], ptr [[TMP14]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
138; CHECK-NEXT:    br label [[MIDDLE_BLOCK:%.*]]
139; CHECK:       middle.block:
140; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
141; CHECK:       scalar.ph:
142; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
143; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
144; CHECK:       for.body:
145; CHECK-NEXT:    [[I_08:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
146; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I_08]]
147; CHECK-NEXT:    [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
148; CHECK-NEXT:    [[MUL:%.*]] = shl i8 [[TMP15]], 1
149; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST1]], i64 [[I_08]]
150; CHECK-NEXT:    [[TMP16:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
151; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[MUL]], [[TMP16]]
152; CHECK-NEXT:    store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1
153; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_08]], 1
154; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 5
155; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
156; CHECK:       for.end:
157; CHECK-NEXT:    ret void
158;
159entry:
160  br label %for.body
161
162for.body:                                         ; preds = %entry, %for.body
163  %i.08 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
164  %arrayidx = getelementptr inbounds i8, ptr %src, i64 %i.08
165  %0 = load i8, ptr %arrayidx, align 1
166  %mul = shl i8 %0, 1
167  %arrayidx1 = getelementptr inbounds i8, ptr %dst, i64 %i.08
168  %1 = load i8, ptr %arrayidx1, align 1
169  %add = add i8 %mul, %1
170  store i8 %add, ptr %arrayidx1, align 1
171  %inc = add nuw nsw i64 %i.08, 1
172  %exitcond.not = icmp eq i64 %inc, 5
173  br i1 %exitcond.not, label %for.end, label %for.body
174
175for.end:                                          ; preds = %for.body
176  ret void
177}
178
179define void @trip8_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 {
180; CHECK-LABEL: @trip8_i8(
181; CHECK-NEXT:  entry:
182; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
183; CHECK:       vector.ph:
184; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
185; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
186; CHECK-NEXT:    [[TMP2:%.*]] = sub i64 [[TMP1]], 1
187; CHECK-NEXT:    [[N_RND_UP:%.*]] = add i64 8, [[TMP2]]
188; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
189; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
190; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
191; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP3]], 4
192; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
193; CHECK:       vector.body:
194; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 8)
195; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 0
196; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0
197; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP9]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison)
198; CHECK-NEXT:    [[TMP10:%.*]] = shl <vscale x 4 x i8> [[WIDE_MASKED_LOAD]], splat (i8 1)
199; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST1:%.*]], i64 0
200; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
201; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP12]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison)
202; CHECK-NEXT:    [[TMP13:%.*]] = add <vscale x 4 x i8> [[TMP10]], [[WIDE_MASKED_LOAD1]]
203; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
204; CHECK-NEXT:    call void @llvm.masked.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP13]], ptr [[TMP14]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
205; CHECK-NEXT:    br label [[MIDDLE_BLOCK:%.*]]
206; CHECK:       middle.block:
207; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
208; CHECK:       scalar.ph:
209; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
210; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
211; CHECK:       for.body:
212; CHECK-NEXT:    [[I_08:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
213; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I_08]]
214; CHECK-NEXT:    [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
215; CHECK-NEXT:    [[MUL:%.*]] = shl i8 [[TMP15]], 1
216; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST1]], i64 [[I_08]]
217; CHECK-NEXT:    [[TMP16:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
218; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[MUL]], [[TMP16]]
219; CHECK-NEXT:    store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1
220; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_08]], 1
221; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 8
222; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
223; CHECK:       for.end:
224; CHECK-NEXT:    ret void
225;
226entry:
227  br label %for.body
228
229for.body:                                         ; preds = %entry, %for.body
230  %i.08 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
231  %arrayidx = getelementptr inbounds i8, ptr %src, i64 %i.08
232  %0 = load i8, ptr %arrayidx, align 1
233  %mul = shl i8 %0, 1
234  %arrayidx1 = getelementptr inbounds i8, ptr %dst, i64 %i.08
235  %1 = load i8, ptr %arrayidx1, align 1
236  %add = add i8 %mul, %1
237  store i8 %add, ptr %arrayidx1, align 1
238  %inc = add nuw nsw i64 %i.08, 1
239  %exitcond.not = icmp eq i64 %inc, 8
240  br i1 %exitcond.not, label %for.end, label %for.body
241
242for.end:                                          ; preds = %for.body
243  ret void
244}
245
246define void @trip16_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 {
247; CHECK-LABEL: @trip16_i8(
248; CHECK-NEXT:  entry:
249; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
250; CHECK:       vector.ph:
251; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
252; CHECK:       vector.body:
253; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 0
254; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 0
255; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1
256; CHECK-NEXT:    [[TMP2:%.*]] = shl <16 x i8> [[WIDE_LOAD]], splat (i8 1)
257; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 0
258; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0
259; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1
260; CHECK-NEXT:    [[TMP5:%.*]] = add <16 x i8> [[TMP2]], [[WIDE_LOAD1]]
261; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0
262; CHECK-NEXT:    store <16 x i8> [[TMP5]], ptr [[TMP6]], align 1
263; CHECK-NEXT:    br label [[MIDDLE_BLOCK:%.*]]
264; CHECK:       middle.block:
265; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
266; CHECK:       scalar.ph:
267; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
268; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
269; CHECK:       for.body:
270; CHECK-NEXT:    [[I_08:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
271; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[I_08]]
272; CHECK-NEXT:    [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
273; CHECK-NEXT:    [[MUL:%.*]] = shl i8 [[TMP7]], 1
274; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I_08]]
275; CHECK-NEXT:    [[TMP8:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
276; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[MUL]], [[TMP8]]
277; CHECK-NEXT:    store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1
278; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_08]], 1
279; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 16
280; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
281; CHECK:       for.end:
282; CHECK-NEXT:    ret void
283;
284entry:
285  br label %for.body
286
287for.body:                                         ; preds = %entry, %for.body
288  %i.08 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
289  %arrayidx = getelementptr inbounds i8, ptr %src, i64 %i.08
290  %0 = load i8, ptr %arrayidx, align 1
291  %mul = shl i8 %0, 1
292  %arrayidx1 = getelementptr inbounds i8, ptr %dst, i64 %i.08
293  %1 = load i8, ptr %arrayidx1, align 1
294  %add = add i8 %mul, %1
295  store i8 %add, ptr %arrayidx1, align 1
296  %inc = add nuw nsw i64 %i.08, 1
297  %exitcond.not = icmp eq i64 %inc, 16
298  br i1 %exitcond.not, label %for.end, label %for.body
299
300for.end:                                          ; preds = %for.body
301  ret void
302}
303
304
305define void @trip32_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 {
306; CHECK-LABEL: @trip32_i8(
307; CHECK-NEXT:  entry:
308; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
309; CHECK:       vector.ph:
310; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
311; CHECK:       vector.body:
312; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 0
313; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 0
314; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[TMP1]], align 1
315; CHECK-NEXT:    [[TMP2:%.*]] = shl <32 x i8> [[WIDE_LOAD]], splat (i8 1)
316; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 0
317; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0
318; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <32 x i8>, ptr [[TMP4]], align 1
319; CHECK-NEXT:    [[TMP5:%.*]] = add <32 x i8> [[TMP2]], [[WIDE_LOAD1]]
320; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0
321; CHECK-NEXT:    store <32 x i8> [[TMP5]], ptr [[TMP6]], align 1
322; CHECK-NEXT:    br label [[MIDDLE_BLOCK:%.*]]
323; CHECK:       middle.block:
324; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
325; CHECK:       scalar.ph:
326; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 32, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
327; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
328; CHECK:       for.body:
329; CHECK-NEXT:    [[I_08:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
330; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[I_08]]
331; CHECK-NEXT:    [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
332; CHECK-NEXT:    [[MUL:%.*]] = shl i8 [[TMP7]], 1
333; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I_08]]
334; CHECK-NEXT:    [[TMP8:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
335; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[MUL]], [[TMP8]]
336; CHECK-NEXT:    store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1
337; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_08]], 1
338; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 32
339; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
340; CHECK:       for.end:
341; CHECK-NEXT:    ret void
342;
343entry:
344  br label %for.body
345
346for.body:                                         ; preds = %entry, %for.body
347  %i.08 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
348  %arrayidx = getelementptr inbounds i8, ptr %src, i64 %i.08
349  %0 = load i8, ptr %arrayidx, align 1
350  %mul = shl i8 %0, 1
351  %arrayidx1 = getelementptr inbounds i8, ptr %dst, i64 %i.08
352  %1 = load i8, ptr %arrayidx1, align 1
353  %add = add i8 %mul, %1
354  store i8 %add, ptr %arrayidx1, align 1
355  %inc = add nuw nsw i64 %i.08, 1
356  %exitcond.not = icmp eq i64 %inc, 32
357  br i1 %exitcond.not, label %for.end, label %for.body
358
359for.end:                                          ; preds = %for.body
360  ret void
361}
362
363define void @trip24_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 {
364; CHECK-LABEL: @trip24_i8(
365; CHECK-NEXT:  entry:
366; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
367; CHECK:       vector.ph:
368; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
369; CHECK:       vector.body:
370; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
371; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
372; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP0]]
373; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
374; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1
375; CHECK-NEXT:    [[TMP3:%.*]] = shl <8 x i8> [[WIDE_LOAD]], splat (i8 1)
376; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP0]]
377; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
378; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1
379; CHECK-NEXT:    [[TMP6:%.*]] = add <8 x i8> [[TMP3]], [[WIDE_LOAD1]]
380; CHECK-NEXT:    store <8 x i8> [[TMP6]], ptr [[TMP5]], align 1
381; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
382; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24
383; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
384; CHECK:       middle.block:
385; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
386; CHECK:       scalar.ph:
387; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 24, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
388; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
389; CHECK:       for.body:
390; CHECK-NEXT:    [[I_08:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
391; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[I_08]]
392; CHECK-NEXT:    [[TMP8:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
393; CHECK-NEXT:    [[MUL:%.*]] = shl i8 [[TMP8]], 1
394; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I_08]]
395; CHECK-NEXT:    [[TMP9:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
396; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[MUL]], [[TMP9]]
397; CHECK-NEXT:    store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1
398; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_08]], 1
399; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 24
400; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
401; CHECK:       for.end:
402; CHECK-NEXT:    ret void
403;
404entry:
405  br label %for.body
406
407for.body:                                         ; preds = %entry, %for.body
408  %i.08 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
409  %arrayidx = getelementptr inbounds i8, ptr %src, i64 %i.08
410  %0 = load i8, ptr %arrayidx, align 1
411  %mul = shl i8 %0, 1
412  %arrayidx1 = getelementptr inbounds i8, ptr %dst, i64 %i.08
413  %1 = load i8, ptr %arrayidx1, align 1
414  %add = add i8 %mul, %1
415  store i8 %add, ptr %arrayidx1, align 1
416  %inc = add nuw nsw i64 %i.08, 1
417  %exitcond.not = icmp eq i64 %inc, 24
418  br i1 %exitcond.not, label %for.end, label %for.body
419
420for.end:                                          ; preds = %for.body
421  ret void
422}
423
424attributes #0 = { "target-features"="+v,+d" vscale_range(2, 1024) }
425
426