xref: /llvm-project/llvm/test/Transforms/LoopVectorize/outer_loop_hcfg_construction.ll (revision 38fffa630ee80163dc65e759392ad29798905679)
1; RUN: opt -S -passes=loop-vectorize -enable-vplan-native-path < %s -S | FileCheck %s
2
3; void test(int n, int **a)
4; {
5;   for (int k = 0; k < n; ++k) {
6;     a[k][0] = 0;
7;     #pragma clang loop vectorize_width(4)
8;     for (int i = 0; i < n; ++i) {
9;         for (int j = 0; j < n; ++j) {
10;             a[i][j] = 2 + k;
11;         }
12;     }
13;   }
14; }
15;
16; Make sure VPlan HCFG is constructed when we try to vectorize non-outermost loop
17;
18define void @non_outermost_loop_hcfg_construction(i64 %n, ptr %a) {
19; CHECK-LABEL: define void @non_outermost_loop_hcfg_construction(
20; CHECK-SAME: i64 [[N:%.*]], ptr [[A:%.*]]) {
21; CHECK-NEXT:  entry:
22; CHECK-NEXT:    br label [[OUTERMOST_LOOP:%.*]]
23; CHECK:       outermost.loop:
24; CHECK-NEXT:    [[K:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[K_NEXT:%.*]], [[OUTERMOST_LOOP_LATCH:%.*]] ]
25; CHECK-NEXT:    [[ARRAYIDX_US:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[K]]
26; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX_US]], align 8
27; CHECK-NEXT:    store i32 0, ptr [[TMP0]], align 4
28; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[K]] to i32
29; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 2
30; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
31; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
32; CHECK:       vector.ph:
33; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
34; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
35; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 0
36; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
37; CHECK-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i64> poison, i64 [[N]], i64 0
38; CHECK-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT2]], <4 x i64> poison, <4 x i32> zeroinitializer
39; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
40; CHECK:       vector.body:
41; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[MIDDLE_LOOP_LATCH4:%.*]] ]
42; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[MIDDLE_LOOP_LATCH4]] ]
43; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[A]], <4 x i64> [[VEC_IND]]
44; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> [[TMP3]], i32 8, <4 x i1> splat (i1 true), <4 x ptr> poison)
45; CHECK-NEXT:    br label [[INNERMOST_LOOP1:%.*]]
46; CHECK:       innermost.loop1:
47; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP5:%.*]], [[INNERMOST_LOOP1]] ]
48; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, <4 x ptr> [[WIDE_MASKED_GATHER]], <4 x i64> [[VEC_PHI]]
49; CHECK-NEXT:    call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[BROADCAST_SPLAT]], <4 x ptr> [[TMP4]], i32 4, <4 x i1> splat (i1 true))
50; CHECK-NEXT:    [[TMP5]] = add nuw nsw <4 x i64> [[VEC_PHI]], splat (i64 1)
51; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq <4 x i64> [[TMP5]], [[BROADCAST_SPLAT3]]
52; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
53; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_LOOP_LATCH4]], label [[INNERMOST_LOOP1]]
54; CHECK:       vector.latch:
55; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
56; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
57; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
58; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
59; CHECK:       middle.block:
60; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
61; CHECK-NEXT:    br i1 [[CMP_N]], label [[OUTERMOST_LOOP_LATCH]], label [[SCALAR_PH]]
62; CHECK:       scalar.ph:
63; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[OUTERMOST_LOOP]] ]
64; CHECK-NEXT:    br label [[MIDDLE_LOOP:%.*]]
65; CHECK:       middle.loop:
66; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[MIDDLE_LOOP_LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
67; CHECK-NEXT:    [[ARRAYIDX11_US_US:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[I]]
68; CHECK-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[ARRAYIDX11_US_US]], align 8
69; CHECK-NEXT:    br label [[INNERMOST_LOOP:%.*]]
70; CHECK:       innermost.loop:
71; CHECK-NEXT:    [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[INNERMOST_LOOP]] ], [ 0, [[MIDDLE_LOOP]] ]
72; CHECK-NEXT:    [[ARRAYIDX13_US_US:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 [[J]]
73; CHECK-NEXT:    store i32 [[TMP2]], ptr [[ARRAYIDX13_US_US]], align 4
74; CHECK-NEXT:    [[J_NEXT]] = add nuw nsw i64 [[J]], 1
75; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[J_NEXT]], [[N]]
76; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[MIDDLE_LOOP_LATCH]], label [[INNERMOST_LOOP]]
77; CHECK:       middle.loop.latch:
78; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
79; CHECK-NEXT:    [[EXITCOND41_NOT:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
80; CHECK-NEXT:    br i1 [[EXITCOND41_NOT]], label [[OUTERMOST_LOOP_LATCH]], label [[MIDDLE_LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
81; CHECK:       outermost.loop.latch:
82; CHECK-NEXT:    [[K_NEXT]] = add nuw nsw i64 [[K]], 1
83; CHECK-NEXT:    [[EXITCOND47_NOT:%.*]] = icmp eq i64 [[K_NEXT]], [[N]]
84; CHECK-NEXT:    br i1 [[EXITCOND47_NOT]], label [[OUTERMOST_LOOP_POSTEXIT:%.*]], label [[OUTERMOST_LOOP]]
85; CHECK:       outermost.loop.postexit:
86; CHECK-NEXT:    br label [[FOR_COND_CLEANUP:%.*]]
87; CHECK:       for.cond.cleanup:
88; CHECK-NEXT:    ret void
89;
90entry:
91  br label %outermost.loop
92
93outermost.loop:
94  %k = phi i64 [ 0, %entry ], [ %k.next, %outermost.loop.latch ]
95  %arrayidx.us = getelementptr inbounds ptr, ptr %a, i64 %k
96  %0 = load ptr, ptr %arrayidx.us, align 8
97  store i32 0, ptr %0, align 4
98  %1 = trunc i64 %k to i32
99  %2 = add i32 %1, 2
100  br label %middle.loop
101
102middle.loop:
103  %i = phi i64 [ %i.next, %middle.loop.latch ], [ 0, %outermost.loop ]
104  %arrayidx11.us.us = getelementptr inbounds ptr, ptr %a, i64 %i
105  %3 = load ptr, ptr %arrayidx11.us.us, align 8
106  br label %innermost.loop
107
108innermost.loop:
109  %j = phi i64 [ %j.next, %innermost.loop ], [ 0, %middle.loop ]
110  %arrayidx13.us.us = getelementptr inbounds i32, ptr %3, i64 %j
111  store i32 %2, ptr %arrayidx13.us.us, align 4
112  %j.next = add nuw nsw i64 %j, 1
113  %exitcond.not = icmp eq i64 %j.next, %n
114  br i1 %exitcond.not, label %middle.loop.latch, label %innermost.loop
115
116middle.loop.latch:
117  %i.next = add nuw nsw i64 %i, 1
118  %exitcond41.not = icmp eq i64 %i.next, %n
119  br i1 %exitcond41.not, label %outermost.loop.latch, label %middle.loop, !llvm.loop !3
120
121outermost.loop.latch:
122  %k.next = add nuw nsw i64 %k, 1
123  %exitcond47.not = icmp eq i64 %k.next, %n
124  br i1 %exitcond47.not, label %outermost.loop.postexit, label %outermost.loop
125
126outermost.loop.postexit:
127  br label %for.cond.cleanup
128
129for.cond.cleanup:
130  ret void
131}
132
133; void non_outermost_loop_hcfg_construction_other_loops_at_same_level(long n, int **a)
134; {
135;   for (long k = 0; k < n; ++k) {
136;     a[k][0] = 0;
137;     for (long  i = 0; i < n; ++i) {
138;         #pragma clang loop vectorize_width(4)
139;         for (long j0 = 0; j0 < n; ++j0) {
140;             for (long x = 0; x < n; ++x) {
141;               a[x+i][j0] = 2 + k+x;
142;             }
143;         }
144;
145;         for (long j1 = n; j1 > 0; --j1) {
146;           a[i][j1] *= j1 & 1;
147;         }
148;     }
149;   }
150; }
151;
152; Make sure VPlan HCFG is constructed when we try to vectorize loop with other loops at level > 0
153;
154define void @non_outermost_loop_hcfg_construction_other_loops_at_same_level(i64 %n, ptr %a) {
155; CHECK-LABEL: define void @non_outermost_loop_hcfg_construction_other_loops_at_same_level(
156; CHECK-SAME: i64 [[N:%.*]], ptr [[A:%.*]]) {
157; CHECK-NEXT:  entry:
158; CHECK-NEXT:    br label [[OUTERMOST_LOOP_K:%.*]]
159; CHECK:       return:
160; CHECK-NEXT:    ret void
161; CHECK:       outermost.loop.k:
162; CHECK-NEXT:    [[K:%.*]] = phi i64 [ [[K_NEXT:%.*]], [[OUTERMOST_LOOP_K_CLEANUP:%.*]] ], [ 0, [[ENTRY:%.*]] ]
163; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[K]]
164; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
165; CHECK-NEXT:    store i32 0, ptr [[TMP0]], align 4
166; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i64 [[K]], 2
167; CHECK-NEXT:    br label [[MIDDLE_LOOP_I:%.*]]
168; CHECK:       middle.loop.i:
169; CHECK-NEXT:    [[I:%.*]] = phi i64 [ 0, [[OUTERMOST_LOOP_K]] ], [ [[I_NEXT:%.*]], [[MIDDLE_LOOP_I_CLEANUP:%.*]] ]
170; CHECK-NEXT:    [[INVARIANT_GEP:%.*]] = getelementptr ptr, ptr [[A]], i64 [[I]]
171; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
172; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
173; CHECK:       vector.ph:
174; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
175; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
176; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[ADD]], i64 0
177; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
178; CHECK-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i64> poison, i64 [[N]], i64 0
179; CHECK-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT2]], <4 x i64> poison, <4 x i32> zeroinitializer
180; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
181; CHECK:       vector.body:
182; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[MIDDLE_LOOP_J0_CLEANUP4:%.*]] ]
183; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[MIDDLE_LOOP_J0_CLEANUP4]] ]
184; CHECK-NEXT:    br label [[INNERMOST_LOOP1:%.*]]
185; CHECK:       innermost.loop1:
186; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP5:%.*]], [[INNERMOST_LOOP1]] ]
187; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw <4 x i64> [[BROADCAST_SPLAT]], [[VEC_PHI]]
188; CHECK-NEXT:    [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i32>
189; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr ptr, ptr [[INVARIANT_GEP]], <4 x i64> [[VEC_PHI]]
190; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> [[TMP3]], i32 8, <4 x i1> splat (i1 true), <4 x ptr> poison)
191; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, <4 x ptr> [[WIDE_MASKED_GATHER]], <4 x i64> [[VEC_IND]]
192; CHECK-NEXT:    call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP2]], <4 x ptr> [[TMP4]], i32 4, <4 x i1> splat (i1 true))
193; CHECK-NEXT:    [[TMP5]] = add nuw nsw <4 x i64> [[VEC_PHI]], splat (i64 1)
194; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq <4 x i64> [[TMP5]], [[BROADCAST_SPLAT3]]
195; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
196; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_LOOP_J0_CLEANUP4]], label [[INNERMOST_LOOP1]]
197; CHECK:       vector.latch:
198; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
199; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
200; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
201; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
202; CHECK:       middle.block:
203; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
204; CHECK-NEXT:    br i1 [[CMP_N]], label [[INNERMOST_LOOP_J1_LR_PH:%.*]], label [[SCALAR_PH]]
205; CHECK:       scalar.ph:
206; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[MIDDLE_LOOP_I]] ]
207; CHECK-NEXT:    br label [[MIDDLE_LOOP_J0_PH:%.*]]
208; CHECK:       outermost.loop.k.cleanup:
209; CHECK-NEXT:    [[K_NEXT]] = add nuw nsw i64 [[K]], 1
210; CHECK-NEXT:    [[EXITCOND71_NOT:%.*]] = icmp eq i64 [[K_NEXT]], [[N]]
211; CHECK-NEXT:    br i1 [[EXITCOND71_NOT]], label [[RETURN:%.*]], label [[OUTERMOST_LOOP_K]]
212; CHECK:       innermost.loop.j1.lr.ph:
213; CHECK-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[INVARIANT_GEP]], align 8
214; CHECK-NEXT:    br label [[INNERMOST_LOOP_J1:%.*]]
215; CHECK:       middle.loop.j0.ph:
216; CHECK-NEXT:    [[J0:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[J0_NEXT:%.*]], [[MIDDLE_LOOP_J0_CLEANUP:%.*]] ]
217; CHECK-NEXT:    br label [[INNERMOST_LOOP:%.*]]
218; CHECK:       middle.loop.j0.cleanup:
219; CHECK-NEXT:    [[J0_NEXT]] = add nuw nsw i64 [[J0]], 1
220; CHECK-NEXT:    [[J0_EXIT_COND_NOT:%.*]] = icmp eq i64 [[J0_NEXT]], [[N]]
221; CHECK-NEXT:    br i1 [[J0_EXIT_COND_NOT]], label [[INNERMOST_LOOP_J1_LR_PH]], label [[MIDDLE_LOOP_J0_PH]], !llvm.loop [[LOOP5:![0-9]+]]
222; CHECK:       innermost.loop:
223; CHECK-NEXT:    [[X:%.*]] = phi i64 [ 0, [[MIDDLE_LOOP_J0_PH]] ], [ [[X_NEXT:%.*]], [[INNERMOST_LOOP]] ]
224; CHECK-NEXT:    [[ADD14:%.*]] = add nuw nsw i64 [[ADD]], [[X]]
225; CHECK-NEXT:    [[CONV:%.*]] = trunc i64 [[ADD14]] to i32
226; CHECK-NEXT:    [[GEP:%.*]] = getelementptr ptr, ptr [[INVARIANT_GEP]], i64 [[X]]
227; CHECK-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[GEP]], align 8
228; CHECK-NEXT:    [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 [[J0]]
229; CHECK-NEXT:    store i32 [[CONV]], ptr [[ARRAYIDX17]], align 4
230; CHECK-NEXT:    [[X_NEXT]] = add nuw nsw i64 [[X]], 1
231; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[X_NEXT]], [[N]]
232; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[MIDDLE_LOOP_J0_CLEANUP]], label [[INNERMOST_LOOP]]
233; CHECK:       middle.loop.i.cleanup:
234; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
235; CHECK-NEXT:    [[EXITCOND70_NOT:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
236; CHECK-NEXT:    br i1 [[EXITCOND70_NOT]], label [[OUTERMOST_LOOP_K_CLEANUP]], label [[MIDDLE_LOOP_I]]
237; CHECK:       innermost.loop.j1:
238; CHECK-NEXT:    [[J21_064:%.*]] = phi i64 [ [[N]], [[INNERMOST_LOOP_J1_LR_PH]] ], [ [[DEC:%.*]], [[INNERMOST_LOOP_J1]] ]
239; CHECK-NEXT:    [[ARRAYIDX28:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 [[J21_064]]
240; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX28]], align 4
241; CHECK-NEXT:    [[TMP14:%.*]] = and i64 [[J21_064]], 1
242; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[TMP14]], 0
243; CHECK-NEXT:    [[CONV30:%.*]] = select i1 [[DOTNOT]], i32 0, i32 [[TMP13]]
244; CHECK-NEXT:    store i32 [[CONV30]], ptr [[ARRAYIDX28]], align 4
245; CHECK-NEXT:    [[DEC]] = add nsw i64 [[J21_064]], -1
246; CHECK-NEXT:    [[CMP23:%.*]] = icmp sgt i64 [[J21_064]], 1
247; CHECK-NEXT:    br i1 [[CMP23]], label [[INNERMOST_LOOP_J1]], label [[MIDDLE_LOOP_I_CLEANUP]]
248;
249entry:
250  br label %outermost.loop.k
251
252return:
253  ret void
254
255outermost.loop.k:
256  %k = phi i64 [ %k.next, %outermost.loop.k.cleanup ], [ 0, %entry ]
257  %arrayidx = getelementptr inbounds ptr, ptr %a, i64 %k
258  %0 = load ptr, ptr %arrayidx, align 8
259  store i32 0, ptr %0, align 4
260  %add = add nuw nsw i64 %k, 2
261  br label %middle.loop.i
262
263middle.loop.i:
264  %i = phi i64 [ 0, %outermost.loop.k ], [ %i.next, %middle.loop.i.cleanup ]
265  %invariant.gep = getelementptr ptr, ptr %a, i64 %i
266  br label %middle.loop.j0.ph
267
268outermost.loop.k.cleanup:
269  %k.next = add nuw nsw i64 %k, 1
270  %exitcond71.not = icmp eq i64 %k.next, %n
271  br i1 %exitcond71.not, label %return, label %outermost.loop.k
272
273innermost.loop.j1.lr.ph:                                 ; preds = %middle.loop.j0.cleanup
274  %1 = load ptr, ptr %invariant.gep, align 8
275  br label %innermost.loop.j1
276
277middle.loop.j0.ph:
278  %j0 = phi i64 [ 0, %middle.loop.i ], [ %j0.next, %middle.loop.j0.cleanup ]
279  br label %innermost.loop
280
281middle.loop.j0.cleanup:
282  %j0.next = add nuw nsw i64 %j0, 1
283  %j0.exit.cond.not = icmp eq i64 %j0.next, %n
284  br i1 %j0.exit.cond.not, label %innermost.loop.j1.lr.ph, label %middle.loop.j0.ph, !llvm.loop !3
285
286innermost.loop:
287  %x = phi i64 [ 0, %middle.loop.j0.ph ], [ %x.next, %innermost.loop ]
288  %add14 = add nuw nsw i64 %add, %x
289  %conv = trunc i64 %add14 to i32
290  %gep = getelementptr ptr, ptr %invariant.gep, i64 %x
291  %2 = load ptr, ptr %gep, align 8
292  %arrayidx17 = getelementptr inbounds i32, ptr %2, i64 %j0
293  store i32 %conv, ptr %arrayidx17, align 4
294  %x.next = add nuw nsw i64 %x, 1
295  %exitcond.not = icmp eq i64 %x.next, %n
296  br i1 %exitcond.not, label %middle.loop.j0.cleanup, label %innermost.loop
297
298middle.loop.i.cleanup:
299  %i.next = add nuw nsw i64 %i, 1
300  %exitcond70.not = icmp eq i64 %i.next, %n
301  br i1 %exitcond70.not, label %outermost.loop.k.cleanup, label %middle.loop.i
302
303innermost.loop.j1:
304  %j21.064 = phi i64 [ %n, %innermost.loop.j1.lr.ph ], [ %dec, %innermost.loop.j1 ]
305  %arrayidx28 = getelementptr inbounds i32, ptr %1, i64 %j21.064
306  %3 = load i32, ptr %arrayidx28, align 4
307  %4 = and i64 %j21.064, 1
308  %.not = icmp eq i64 %4, 0
309  %conv30 = select i1 %.not, i32 0, i32 %3
310  store i32 %conv30, ptr %arrayidx28, align 4
311  %dec = add nsw i64 %j21.064, -1
312  %cmp23 = icmp sgt i64 %j21.064, 1
313  br i1 %cmp23, label %innermost.loop.j1, label %middle.loop.i.cleanup
314}
315
316!3 = distinct !{!3, !4, !5, !6}
317!4 = !{!"llvm.loop.vectorize.width", i32 4}
318!5 = !{!"llvm.loop.vectorize.scalable.enable", i1 false}
319!6 = !{!"llvm.loop.vectorize.enable", i1 true}
320