xref: /llvm-project/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll (revision 1de3dc7d23dd6b856efad3a3a04f2396328726d7)
1; REQUIRES: asserts
2
3; RUN: opt -passes=loop-vectorize -force-vector-width=1 -force-vector-interleave=2 -debug -disable-output %s 2>&1 | FileCheck --check-prefix=DBG %s
4; RUN: opt -passes=loop-vectorize -force-vector-width=1 -force-vector-interleave=2 -S %s | FileCheck %s
5
6; DBG-LABEL: 'test_scalarize_call'
7; DBG:      VPlan 'Initial VPlan for VF={1},UF>=1' {
8; DBG-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
9; DBG-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
10; DBG-NEXT: vp<[[TC:%.+]]> = original trip-count
11; DBG-EMPTY:
12; DBG-NEXT: ir-bb<entry>:
13; DBG-NEXT:  EMIT vp<[[TC]]> = EXPAND SCEV (1000 + (-1 * %start))
14; DBG-NEXT: Successor(s): vector.ph
15; DBG-EMPTY:
16; DBG-NEXT: vector.ph:
17; DBG-NEXT:   vp<[[END:%.+]]> = DERIVED-IV ir<%start> + vp<[[VEC_TC]]> * ir<1>
18; DBG-NEXT: Successor(s): vector loop
19; DBG-EMPTY:
20; DBG-NEXT: <x1> vector loop: {
21; DBG-NEXT:   vector.body:
22; DBG-NEXT:     EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
23; DBG-NEXT:     vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<%start> + vp<[[CAN_IV]]> * ir<1>
24; DBG-NEXT:     vp<[[IV_STEPS:%.]]>    = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<1>
25; DBG-NEXT:     CLONE ir<%min> = call @llvm.smin.i32(vp<[[IV_STEPS]]>, ir<65535>)
26; DBG-NEXT:     CLONE ir<%arrayidx> = getelementptr inbounds ir<%dst>, vp<[[IV_STEPS]]>
27; DBG-NEXT:     CLONE store ir<%min>, ir<%arrayidx>
28; DBG-NEXT:     EMIT vp<[[INC:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
29; DBG-NEXT:     EMIT branch-on-count vp<[[INC]]>, vp<[[VEC_TC]]>
30; DBG-NEXT:   No successors
31; DBG-NEXT: }
32;
33define void @test_scalarize_call(i32 %start, ptr %dst) {
34; CHECK-LABEL: @test_scalarize_call(
35; CHECK:       vector.body:
36; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
37; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i32 %start, [[INDEX]]
38; CHECK-NEXT:    [[INDUCTION:%.*]] = add i32 [[OFFSET_IDX]], 0
39; CHECK-NEXT:    [[INDUCTION1:%.*]] = add i32 [[OFFSET_IDX]], 1
40; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.smin.i32(i32 [[INDUCTION]], i32 65535)
41; CHECK-NEXT:    [[TMP2:%.*]] = tail call i32 @llvm.smin.i32(i32 [[INDUCTION1]], i32 65535)
42; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[INDUCTION]]
43; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDUCTION1]]
44; CHECK-NEXT:    store i32 [[TMP1]], ptr [[TMP3]], align 8
45; CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP4]], align 8
46; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
47; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], %n.vec
48; CHECK-NEXT:    br i1 [[TMP5]], label %middle.block, label %vector.body
49; CHECK:       middle.block:
50;
51entry:
52  br label %loop
53
54loop:
55  %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop ]
56  %min = tail call i32 @llvm.smin.i32(i32 %iv, i32 65535)
57  %arrayidx = getelementptr inbounds i32 , ptr %dst, i32 %iv
58  store i32 %min, ptr %arrayidx, align 8
59  %iv.next = add nsw i32 %iv, 1
60  %tobool.not = icmp eq i32 %iv.next, 1000
61  br i1 %tobool.not, label %exit, label %loop
62
63exit:
64  ret void
65}
66
67declare i32 @llvm.smin.i32(i32, i32)
68
69
70; DBG-LABEL: 'test_scalarize_with_branch_cond'
71
72; DBG:       Live-in vp<[[VFxUF:%.+]]> = VF * UF
73; DBG-NEXT:  Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
74; DBG-NEXT:  Live-in ir<1000> = original trip-count
75; DBG-EMPTY:
76; DBG-NEXT: ir-bb<entry>:
77; DBG-NEXT: Successor(s): vector.ph
78; DBG-EMPTY:
79; DBG-NEXT: vector.ph:
80; DBG-NEXT:   vp<[[END:%.+]]> = DERIVED-IV ir<false> + vp<[[VEC_TC]]> * ir<true>
81; DBG-NEXT: Successor(s): vector loop
82; DBG-EMPTY:
83; DBG-NEXT: <x1> vector loop: {
84; DBG-NEXT:   vector.body:
85; DBG-NEXT:     EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
86; DBG-NEXT:     vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<false> + vp<[[CAN_IV]]> * ir<true>
87; DBG-NEXT:     vp<[[STEPS1:%.+]]>    = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<true>
88; DBG-NEXT:   Successor(s): pred.store
89; DBG-EMPTY:
90; DBG-NEXT:   <xVFxUF> pred.store: {
91; DBG-NEXT:     pred.store.entry:
92; DBG-NEXT:       BRANCH-ON-MASK vp<[[STEPS1]]>
93; DBG-NEXT:     Successor(s): pred.store.if, pred.store.continue
94; DBG-EMPTY:
95; DBG-NEXT:     pred.store.if:
96; DBG-NEXT:       vp<[[STEPS2:%.+]]>    = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
97; DBG-NEXT:       CLONE ir<%gep.src> = getelementptr inbounds ir<%src>, vp<[[STEPS2]]>
98; DBG-NEXT:       CLONE ir<%l> = load ir<%gep.src>
99; DBG-NEXT:       CLONE ir<%gep.dst> = getelementptr inbounds ir<%dst>, vp<[[STEPS2]]>
100; DBG-NEXT:       CLONE store ir<%l>, ir<%gep.dst>
101; DBG-NEXT:     Successor(s): pred.store.continue
102; DBG-EMPTY:
103; DBG-NEXT:     pred.store.continue:
104; DBG-NEXT:     No successors
105; DBG-NEXT:   }
106; DBG-NEXT:   Successor(s): cond.false.1
107; DBG-EMPTY:
108; DBG-NEXT:   cond.false.1:
109; DBG-NEXT:     EMIT vp<[[CAN_IV_INC:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
110; DBG-NEXT:     EMIT branch-on-count vp<[[CAN_IV_INC]]>, vp<[[VEC_TC]]>
111; DBG-NEXT:   No successors
112; DBG-NEXT: }
113; DBG-NEXT: Successor(s): middle.block
114; DBG-EMPTY:
115; DBG-NEXT: middle.block:
116; DBG-NEXT:   EMIT vp<[[CMP:%.+]]> = icmp eq ir<1000>, vp<[[VEC_TC]]>
117; DBG-NEXT:   EMIT branch-on-cond vp<[[CMP]]>
118; DBG-NEXT: Successor(s): ir-bb<exit>, scalar.ph
119; DBG-EMPTY:
120; DBG-NEXT: scalar.ph:
121; DBG-NEXT:  EMIT vp<[[RESUME1:%.+]]> = resume-phi vp<[[VEC_TC]]>, ir<0>
122; DBG-NEXT:  EMIT vp<[[RESUME2:%.+]]>.1 = resume-phi vp<[[END]]>, ir<false>
123; DBG-NEXT: Successor(s): ir-bb<loop.header>
124; DBG-EMPTY:
125; DBG-NEXT: ir-bb<loop.header>:
126; DBG-NEXT:   IR   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] (extra operand: vp<[[RESUME1]]> from scalar.ph)
127; DBG-NEXT:   IR   %d = phi i1 [ false, %entry ], [ %d.next, %loop.latch ] (extra operand: vp<[[RESUME2]]>.1 from scalar.ph)
128; DBG-NEXT:   IR   %d.next = xor i1 %d, true
129; DBG-NEXT: No successors
130; DBG-EMPTY:
131; DBG-NEXT: ir-bb<exit>:
132; DBG-NEXT: No successors
133; DBG-NEXT: }
134
135define void @test_scalarize_with_branch_cond(ptr %src, ptr %dst) {
136; CHECK-LABEL: @test_scalarize_with_branch_cond(
137; CHECK:       vector.body:
138; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %pred.store.continue4 ]
139; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[INDEX]] to i1
140; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = sub i1 false, [[TMP0]]
141; CHECK-NEXT:    [[INDUCTION:%.*]] = add i1 [[OFFSET_IDX]], false
142; CHECK-NEXT:    [[INDUCTION3:%.*]] = add i1 [[OFFSET_IDX]], true
143; CHECK-NEXT:    br i1 [[INDUCTION]], label %pred.store.if, label %pred.store.continue
144; CHECK:       pred.store.if:
145; CHECK-NEXT:    [[INDUCTION4:%.*]] = add i64 [[INDEX]], 0
146; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr %src, i64 [[INDUCTION4]]
147; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
148; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr %dst, i64 [[INDUCTION4]]
149; CHECK-NEXT:    store i32 [[TMP4]], ptr [[TMP1]], align 4
150; CHECK-NEXT:    br label %pred.store.continue
151; CHECK:       pred.store.continue:
152; CHECK-NEXT:    br i1 [[INDUCTION3]], label %pred.store.if3, label %pred.store.continue4
153; CHECK:       pred.store.if3:
154; CHECK-NEXT:    [[INDUCTION5:%.*]] = add i64 [[INDEX]], 1
155; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr %src, i64 [[INDUCTION5]]
156; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
157; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr %dst, i64 [[INDUCTION5]]
158; CHECK-NEXT:    store i32 [[TMP7]], ptr [[TMP2]], align 4
159; CHECK-NEXT:    br label %pred.store.continue4
160; CHECK:       pred.store.continue4:
161; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
162; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
163; CHECK-NEXT:    br i1 [[TMP9]], label %middle.block, label %vector.body
164; CHECK:       middle.block:
165;
166entry:
167  br label %loop.header
168
169loop.header:
170  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
171  %d = phi i1 [ false, %entry ], [ %d.next, %loop.latch ]
172  %d.next = xor i1 %d, true
173  br i1 %d, label %cond.false, label %loop.latch
174
175cond.false:
176  %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
177  %gep.dst = getelementptr inbounds i32, ptr %dst, i64 %iv
178  %l = load i32, ptr %gep.src, align 4
179  store i32 %l, ptr %gep.dst
180  br label %loop.latch
181
182loop.latch:
183  %iv.next = add nsw i64 %iv, 1
184  %ec = icmp eq i64 %iv.next, 1000
185  br i1 %ec, label %exit, label %loop.header
186
187exit:
188  ret void
189}
190
191; Make sure the widened induction gets replaced by scalar-steps for plans
192; including the scalar VF, if it is used in first-order recurrences.
193
194; DBG-LABEL: 'first_order_recurrence_using_induction'
195; DBG:      VPlan 'Initial VPlan for VF={1},UF>=1' {
196; DBG-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
197; DBG-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
198; DBG-NEXT: vp<[[TC:%.+]]> = original trip-count
199; DBG-EMPTY:
200; DBG-NEXT: ir-bb<entry>:
201; DBG-NEXT:  EMIT vp<[[TC]]> = EXPAND SCEV (zext i32 (1 smax %n) to i64)
202; DBG-NEXT: Successor(s): vector.ph
203; DBG-EMPTY:
204; DBG-NEXT: vector.ph:
205; DBG-NEXT:   SCALAR-CAST vp<[[CAST:%.+]]> = trunc ir<1> to i32
206; DBG-NEXT: Successor(s): vector loop
207; DBG-EMPTY:
208; DBG-NEXT: <x1> vector loop: {
209; DBG-NEXT:   vector.body:
210; DBG-NEXT:     EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
211; DBG-NEXT:     FIRST-ORDER-RECURRENCE-PHI ir<%for> = phi ir<0>, vp<[[SCALAR_STEPS:.+]]>
212; DBG-NEXT:     SCALAR-CAST vp<[[TRUNC_IV:%.+]]> = trunc vp<[[CAN_IV]]> to i32
213; DBG-NEXT:     vp<[[SCALAR_STEPS]]> = SCALAR-STEPS vp<[[TRUNC_IV]]>, vp<[[CAST]]>
214; DBG-NEXT:     EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%for>, vp<[[SCALAR_STEPS]]>
215; DBG-NEXT:     CLONE store vp<[[SPLICE]]>, ir<%dst>
216; DBG-NEXT:     EMIT vp<[[IV_INC:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
217; DBG-NEXT:     EMIT branch-on-count vp<[[IV_INC]]>, vp<[[VTC]]>
218; DBG-NEXT:   No successors
219; DBG-NEXT: }
220; DBG-NEXT: Successor(s): middle.block
221; DBG-EMPTY:
222; DBG-NEXT: middle.block:
223; DBG-NEXT:   EMIT vp<[[RESUME_1:%.+]]> = extract-from-end vp<[[SCALAR_STEPS]]>, ir<1>
224; DBG-NEXT:   EMIT vp<[[CMP:%.+]]> = icmp eq vp<[[TC]]>, vp<[[VEC_TC]]>
225; DBG-NEXT:   EMIT branch-on-cond vp<[[CMP]]>
226; DBG-NEXT: Successor(s): ir-bb<exit>, scalar.ph
227; DBG-EMPTY:
228; DBG-NEXT: scalar.ph:
229; DBG-NEXT:  EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0>
230; DBG-NEXT:  EMIT vp<[[RESUME_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0>
231; DBG-NEXT: Successor(s): ir-bb<loop>
232; DBG-EMPTY:
233; DBG-NEXT: ir-bb<loop>:
234; DBG-NEXT:   IR   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] (extra operand: vp<[[RESUME_IV]]> from scalar.ph)
235; DBG-NEXT:   IR   %for = phi i32 [ 0, %entry ], [ %iv.trunc, %loop ] (extra operand: vp<[[RESUME_P]]> from scalar.ph)
236; DBG:        IR   %ec = icmp slt i32 %iv.next.trunc, %n
237; DBG-NEXT: No successors
238; DBG-EMPTY:
239; DBG-NEXT: ir-bb<exit>:
240; DBG-NEXT: No successors
241; DBG-NEXT: }
242
243define void @first_order_recurrence_using_induction(i32 %n, ptr %dst) {
244; CHECK-LABEL: @first_order_recurrence_using_induction(
245; CHECK:       vector.body:
246; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
247; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDUCTION1:%.*]], %vector.body ]
248; CHECK-NEXT:    [[TMP3:%.*]] = trunc i64 [[INDEX]] to i32
249; CHECK-NEXT:    [[INDUCTION:%.*]] = add i32 [[TMP3]], 0
250; CHECK-NEXT:    [[INDUCTION1]] = add i32 [[TMP3]], 1
251; CHECK-NEXT:    store i32 [[INDUCTION]], ptr [[DST]], align 4
252; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
253; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], %n.vec
254; CHECK-NEXT:    br i1 [[TMP4]], label %middle.block, label %vector.body
255; CHECK:       middle.block:
256;
257entry:
258  br label %loop
259
260loop:
261  %iv = phi i64 [ 0, %entry ],[ %iv.next, %loop ]
262  %for = phi i32 [ 0, %entry ], [ %iv.trunc, %loop ]
263  %iv.trunc = trunc i64 %iv to i32
264  store i32 %for, ptr %dst
265  %iv.next = add nuw nsw i64 %iv, 1
266  %iv.next.trunc = trunc i64 %iv.next to i32
267  %ec = icmp slt i32 %iv.next.trunc, %n
268  br i1 %ec, label %loop, label %exit
269
270exit:
271  ret void
272}
273
274define i16 @reduction_with_casts() {
275; CHECK-LABEL: define i16 @reduction_with_casts() {
276; CHECK:       vector.body:
277; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH:%.+]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY:%.+]] ]
278; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
279; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
280; CHECK-NEXT:    [[TMP0:%.*]] = and i32 [[VEC_PHI]], 65535
281; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[VEC_PHI1]], 65535
282; CHECK-NEXT:    [[TMP2]] = add i32 [[TMP0]], 1
283; CHECK-NEXT:    [[TMP3]] = add i32 [[TMP1]], 1
284; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
285; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 9998
286; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]]
287; CHECK:       middle.block:
288; CHECK-NEXT:    [[BIN_RDX:%.*]] = add i32 [[TMP3]], [[TMP2]]
289; CHECK-NEXT:    br i1 false, label [[EXIT:%.*]], label %scalar.ph
290;
291entry:
292  br label %loop
293
294loop:
295  %count.0.in1 = phi i32 [ 0, %entry ], [ %add, %loop ]
296  %iv = phi i16 [ 1, %entry ], [ %iv.next, %loop ]
297  %conv1 = and i32 %count.0.in1, 65535
298  %add = add nuw nsw i32 %conv1, 1
299  %iv.next = add i16 %iv, 1
300  %cmp = icmp eq i16 %iv.next, 10000
301  br i1 %cmp, label %exit, label %loop
302
303exit:
304  %add.lcssa = phi i32 [ %add, %loop ]
305  %count.0 = trunc i32 %add.lcssa to i16
306  ret i16 %count.0
307}
308
309define void @scalarize_ptrtoint(ptr %src, ptr %dst) {
310; CHECK:       vector.body:
311; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
312; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
313; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr ptr, ptr %src, i64 [[TMP1]]
314; CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8
315; CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[TMP5]] to i64
316; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[TMP7]], 10
317; CHECK-NEXT:    [[TMP11:%.*]] = inttoptr i64 [[TMP9]] to ptr
318; CHECK-NEXT:    store ptr [[TMP11]], ptr %dst, align 8
319; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
320; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
321; CHECK-NEXT:    br i1 [[TMP12]], label %middle.block, label %vector.body
322
323entry:
324  br label %loop
325
326loop:
327  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
328  %gep = getelementptr ptr, ptr %src, i64 %iv
329  %l = load ptr, ptr %gep, align 8
330  %cast = ptrtoint ptr %l to i64
331  %add = add i64 %cast, 10
332  %cast.2 = inttoptr i64 %add to ptr
333  store ptr %cast.2, ptr %dst, align 8
334  %iv.next = add i64 %iv, 1
335  %ec = icmp eq i64 %iv.next, 1024
336  br i1 %ec, label %exit, label %loop
337
338exit:
339  ret void
340}
341
342define void @pr76986_trunc_sext_interleaving_only(i16 %arg, ptr noalias %src, ptr noalias %dst) {
343; CHECK-LABEL: define void @pr76986_trunc_sext_interleaving_only(
344; CHECK:       vector.body:
345; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
346; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
347; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
348; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr %src, i64 [[TMP0]]
349; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr %src, i64 [[TMP1]]
350; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP2]], align 1
351; CHECK-NEXT:    [[TMP5:%.*]] = load i8, ptr [[TMP3]], align 1
352; CHECK-NEXT:    [[TMP6:%.*]] = sext i8 [[TMP4]] to i32
353; CHECK-NEXT:    [[TMP7:%.*]] = sext i8 [[TMP5]] to i32
354; CHECK-NEXT:    [[TMP8:%.*]] = trunc i32 [[TMP6]] to i16
355; CHECK-NEXT:    [[TMP9:%.*]] = trunc i32 [[TMP7]] to i16
356; CHECK-NEXT:    [[TMP10:%.*]] = sdiv i16 [[TMP8]], %arg
357; CHECK-NEXT:    [[TMP11:%.*]] = sdiv i16 [[TMP9]], %arg
358; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i16, ptr %dst, i64 [[TMP0]]
359; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i16, ptr %dst, i64 [[TMP1]]
360; CHECK-NEXT:    store i16 [[TMP10]], ptr [[TMP12]], align 2
361; CHECK-NEXT:    store i16 [[TMP11]], ptr [[TMP13]], align 2
362; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
363; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 14934
364; CHECK-NEXT:    br i1 [[TMP14]], label %middle.block, label %vector.body
365;
366bb:
367  br label %loop
368
369loop:
370  %iv = phi i64 [ 0, %bb ], [ %iv.next, %loop ]
371  %gep.src = getelementptr inbounds i8, ptr %src, i64 %iv
372  %l = load i8, ptr %gep.src
373  %sext = sext i8 %l to i32
374  %trunc = trunc i32 %sext to i16
375  %sdiv = sdiv i16 %trunc, %arg
376  %gep.dst = getelementptr inbounds i16, ptr %dst, i64 %iv
377  store i16 %sdiv, ptr %gep.dst
378  %iv.next = add i64 %iv, 1
379  %icmp = icmp ult i64 %iv, 14933
380  br i1 %icmp, label %loop, label %exit
381
382exit:
383  ret void
384}
385
386