xref: /llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll (revision 46ea4b5ea10f9363503b047a2e6a577a8603797d)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -riscv-v-vector-bits-min=128 -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck %s -check-prefix=CHECK
3
4; A collection of fairly basic functional tests when both fixed and scalable vectorization is
5; allowed.  The primary goal of this is check for crashes during cost modeling, but it also
6; exercises the default heuristics in a useful way.
7
8target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
9target triple = "riscv64"
10
11define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) {
12; CHECK-LABEL: @vector_add(
13; CHECK-NEXT:  entry:
14; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
15; CHECK:       vector.ph:
16; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0
17; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
18; CHECK-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i32 0
19; CHECK-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer
20; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
21; CHECK:       vector.body:
22; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
23; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
24; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 2
25; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
26; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
27; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
28; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8
29; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2
30; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8
31; CHECK-NEXT:    [[TMP6:%.*]] = add <2 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
32; CHECK-NEXT:    [[TMP7:%.*]] = add <2 x i64> [[WIDE_LOAD1]], [[BROADCAST_SPLAT3]]
33; CHECK-NEXT:    store <2 x i64> [[TMP6]], ptr [[TMP4]], align 8
34; CHECK-NEXT:    store <2 x i64> [[TMP7]], ptr [[TMP5]], align 8
35; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
36; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
37; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
38; CHECK:       middle.block:
39; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, 1024
40; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
41; CHECK:       scalar.ph:
42; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
43; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
44; CHECK:       for.body:
45; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
46; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
47; CHECK-NEXT:    [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
48; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[ELEM]], [[V]]
49; CHECK-NEXT:    store i64 [[ADD]], ptr [[ARRAYIDX]], align 8
50; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
51; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
52; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
53; CHECK:       for.end:
54; CHECK-NEXT:    ret void
55;
56entry:
57  br label %for.body
58
59for.body:
60  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
61  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
62  %elem = load i64, ptr %arrayidx
63  %add = add i64 %elem, %v
64  store i64 %add, ptr %arrayidx
65  %iv.next = add nuw nsw i64 %iv, 1
66  %exitcond.not = icmp eq i64 %iv.next, 1024
67  br i1 %exitcond.not, label %for.end, label %for.body
68
69for.end:
70  ret void
71}
72
73; a[b[i]] += v, mostly to exercise scatter/gather costing
74; TODO: Currently fails to vectorize due to a memory conflict
75define void @indexed_add(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) {
76; CHECK-LABEL: @indexed_add(
77; CHECK-NEXT:  entry:
78; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
79; CHECK:       for.body:
80; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
81; CHECK-NEXT:    [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[IV]]
82; CHECK-NEXT:    [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8
83; CHECK-NEXT:    [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[AIDX]]
84; CHECK-NEXT:    [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8
85; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[ELEM]], [[V:%.*]]
86; CHECK-NEXT:    store i64 [[ADD]], ptr [[AADDR]], align 8
87; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
88; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
89; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
90; CHECK:       for.end:
91; CHECK-NEXT:    ret void
92;
93entry:
94  br label %for.body
95
96for.body:
97  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
98  %baddr = getelementptr inbounds i64, ptr %b, i64 %iv
99  %aidx = load i64, ptr %baddr
100  %aaddr = getelementptr inbounds i64, ptr %a, i64 %aidx
101  %elem = load i64, ptr %aaddr
102  %add = add i64 %elem, %v
103  store i64 %add, ptr %aaddr
104  %iv.next = add nuw nsw i64 %iv, 1
105  %exitcond.not = icmp eq i64 %iv.next, 1024
106  br i1 %exitcond.not, label %for.end, label %for.body
107
108for.end:
109  ret void
110}
111
112; a[b[i]] = v, exercise scatter support
113define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) {
114; CHECK-LABEL: @indexed_store(
115; CHECK-NEXT:  entry:
116; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
117; CHECK:       vector.ph:
118; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0
119; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
120; CHECK-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i32 0
121; CHECK-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer
122; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
123; CHECK:       vector.body:
124; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
125; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
126; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 2
127; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP0]]
128; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
129; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
130; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8
131; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2
132; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8
133; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <2 x i64> [[WIDE_LOAD]]
134; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], <2 x i64> [[WIDE_LOAD1]]
135; CHECK-NEXT:    call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> [[BROADCAST_SPLAT]], <2 x ptr> [[TMP6]], i32 8, <2 x i1> <i1 true, i1 true>)
136; CHECK-NEXT:    call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> [[BROADCAST_SPLAT3]], <2 x ptr> [[TMP7]], i32 8, <2 x i1> <i1 true, i1 true>)
137; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
138; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
139; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
140; CHECK:       middle.block:
141; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, 1024
142; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
143; CHECK:       scalar.ph:
144; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
145; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
146; CHECK:       for.body:
147; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
148; CHECK-NEXT:    [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
149; CHECK-NEXT:    [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8
150; CHECK-NEXT:    [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]]
151; CHECK-NEXT:    store i64 [[V]], ptr [[AADDR]], align 8
152; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
153; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
154; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
155; CHECK:       for.end:
156; CHECK-NEXT:    ret void
157;
158entry:
159  br label %for.body
160
161for.body:
162  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
163  %baddr = getelementptr inbounds i64, ptr %b, i64 %iv
164  %aidx = load i64, ptr %baddr
165  %aaddr = getelementptr inbounds i64, ptr %a, i64 %aidx
166  store i64 %v, ptr %aaddr
167  %iv.next = add nuw nsw i64 %iv, 1
168  %exitcond.not = icmp eq i64 %iv.next, 1024
169  br i1 %exitcond.not, label %for.end, label %for.body
170
171for.end:
172  ret void
173}
174
175define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) {
176; CHECK-LABEL: @indexed_load(
177; CHECK-NEXT:  entry:
178; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
179; CHECK:       vector.ph:
180; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
181; CHECK:       vector.body:
182; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
183; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
184; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
185; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
186; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 2
187; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP0]]
188; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
189; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
190; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8
191; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2
192; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8
193; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <2 x i64> [[WIDE_LOAD]]
194; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], <2 x i64> [[WIDE_LOAD2]]
195; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> [[TMP6]], i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> undef)
196; CHECK-NEXT:    [[WIDE_MASKED_GATHER3:%.*]] = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> [[TMP7]], i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> undef)
197; CHECK-NEXT:    [[TMP8]] = add <2 x i64> [[VEC_PHI]], [[WIDE_MASKED_GATHER]]
198; CHECK-NEXT:    [[TMP9]] = add <2 x i64> [[VEC_PHI1]], [[WIDE_MASKED_GATHER3]]
199; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
200; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
201; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
202; CHECK:       middle.block:
203; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <2 x i64> [[TMP9]], [[TMP8]]
204; CHECK-NEXT:    [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]])
205; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, 1024
206; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
207; CHECK:       scalar.ph:
208; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
209; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
210; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
211; CHECK:       for.body:
212; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
213; CHECK-NEXT:    [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ]
214; CHECK-NEXT:    [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
215; CHECK-NEXT:    [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8
216; CHECK-NEXT:    [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]]
217; CHECK-NEXT:    [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8
218; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
219; CHECK-NEXT:    [[SUM_NEXT]] = add i64 [[SUM]], [[ELEM]]
220; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
221; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
222; CHECK:       for.end:
223; CHECK-NEXT:    [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
224; CHECK-NEXT:    ret i64 [[SUM_NEXT_LCSSA]]
225;
226entry:
227  br label %for.body
228
229for.body:
230  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
231  %sum = phi i64 [0, %entry], [%sum.next, %for.body]
232  %baddr = getelementptr inbounds i64, ptr %b, i64 %iv
233  %aidx = load i64, ptr %baddr
234  %aaddr = getelementptr inbounds i64, ptr %a, i64 %aidx
235  %elem = load i64, ptr %aaddr
236  %iv.next = add nuw nsw i64 %iv, 1
237  %sum.next = add i64 %sum, %elem
238  %exitcond.not = icmp eq i64 %iv.next, 1024
239  br i1 %exitcond.not, label %for.end, label %for.body
240
241for.end:
242  ret i64 %sum.next
243}
244
245define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) {
246; CHECK-LABEL: @splat_int(
247; CHECK-NEXT:  entry:
248; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
249; CHECK:       vector.ph:
250; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0
251; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
252; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i32 0
253; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer
254; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
255; CHECK:       vector.body:
256; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
257; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
258; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 2
259; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
260; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
261; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
262; CHECK-NEXT:    store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8
263; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2
264; CHECK-NEXT:    store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP5]], align 8
265; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
266; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
267; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
268; CHECK:       middle.block:
269; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, 1024
270; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
271; CHECK:       scalar.ph:
272; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
273; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
274; CHECK:       for.body:
275; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
276; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
277; CHECK-NEXT:    store i64 [[V]], ptr [[ARRAYIDX]], align 8
278; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
279; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
280; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
281; CHECK:       for.end:
282; CHECK-NEXT:    ret void
283;
284entry:
285  br label %for.body
286
287for.body:
288  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
289  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
290  store i64 %v, ptr %arrayidx
291  %iv.next = add nuw nsw i64 %iv, 1
292  %exitcond.not = icmp eq i64 %iv.next, 1024
293  br i1 %exitcond.not, label %for.end, label %for.body
294
295for.end:
296  ret void
297}
298
299define void @splat_ptr(ptr noalias nocapture %a, ptr %v, i64 %n) {
300; CHECK-LABEL: @splat_ptr(
301; CHECK-NEXT:  entry:
302; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
303; CHECK:       vector.ph:
304; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[V:%.*]], i32 0
305; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer
306; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x ptr> poison, ptr [[V]], i32 0
307; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT1]], <2 x ptr> poison, <2 x i32> zeroinitializer
308; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
309; CHECK:       vector.body:
310; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
311; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
312; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 2
313; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
314; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
315; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0
316; CHECK-NEXT:    store <2 x ptr> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8
317; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 2
318; CHECK-NEXT:    store <2 x ptr> [[BROADCAST_SPLAT2]], ptr [[TMP5]], align 8
319; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
320; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
321; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
322; CHECK:       middle.block:
323; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, 1024
324; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
325; CHECK:       scalar.ph:
326; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
327; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
328; CHECK:       for.body:
329; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
330; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
331; CHECK-NEXT:    store ptr [[V]], ptr [[ARRAYIDX]], align 8
332; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
333; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
334; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
335; CHECK:       for.end:
336; CHECK-NEXT:    ret void
337;
338entry:
339  br label %for.body
340
341for.body:
342  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
343  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
344  store ptr %v, ptr %arrayidx
345  %iv.next = add nuw nsw i64 %iv, 1
346  %exitcond.not = icmp eq i64 %iv.next, 1024
347  br i1 %exitcond.not, label %for.end, label %for.body
348
349for.end:
350  ret void
351}
352
353define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) {
354; CHECK-LABEL: @uniform_store(
355; CHECK-NEXT:  entry:
356; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
357; CHECK:       vector.ph:
358; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0
359; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
360; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i32 0
361; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer
362; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
363; CHECK:       vector.body:
364; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
365; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
366; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 2
367; CHECK-NEXT:    store i64 [[V]], ptr [[B:%.*]], align 8
368; CHECK-NEXT:    store i64 [[V]], ptr [[B]], align 8
369; CHECK-NEXT:    store i64 [[V]], ptr [[B]], align 8
370; CHECK-NEXT:    store i64 [[V]], ptr [[B]], align 8
371; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
372; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
373; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
374; CHECK-NEXT:    store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8
375; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2
376; CHECK-NEXT:    store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP5]], align 8
377; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
378; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
379; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
380; CHECK:       middle.block:
381; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, 1024
382; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
383; CHECK:       scalar.ph:
384; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
385; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
386; CHECK:       for.body:
387; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
388; CHECK-NEXT:    store i64 [[V]], ptr [[B]], align 8
389; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
390; CHECK-NEXT:    store i64 [[V]], ptr [[ARRAYIDX]], align 8
391; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
392; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
393; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
394; CHECK:       for.end:
395; CHECK-NEXT:    ret void
396;
397entry:
398  br label %for.body
399
400for.body:
401  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
402  store i64 %v, ptr %b, align 8
403  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
404  store i64 %v, ptr %arrayidx
405  %iv.next = add nuw nsw i64 %iv, 1
406  %exitcond.not = icmp eq i64 %iv.next, 1024
407  br i1 %exitcond.not, label %for.end, label %for.body
408
409for.end:
410  ret void
411}
412
413define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) {
414; CHECK-LABEL: @uniform_store_unaligned(
415; CHECK-NEXT:  entry:
416; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
417; CHECK:       vector.ph:
418; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0
419; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
420; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i32 0
421; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer
422; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
423; CHECK:       vector.body:
424; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
425; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
426; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 2
427; CHECK-NEXT:    store i64 [[V]], ptr [[B:%.*]], align 1
428; CHECK-NEXT:    store i64 [[V]], ptr [[B]], align 1
429; CHECK-NEXT:    store i64 [[V]], ptr [[B]], align 1
430; CHECK-NEXT:    store i64 [[V]], ptr [[B]], align 1
431; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
432; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
433; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
434; CHECK-NEXT:    store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8
435; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2
436; CHECK-NEXT:    store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP5]], align 8
437; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
438; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
439; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
440; CHECK:       middle.block:
441; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, 1024
442; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
443; CHECK:       scalar.ph:
444; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
445; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
446; CHECK:       for.body:
447; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
448; CHECK-NEXT:    store i64 [[V]], ptr [[B]], align 1
449; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
450; CHECK-NEXT:    store i64 [[V]], ptr [[ARRAYIDX]], align 8
451; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
452; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
453; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
454; CHECK:       for.end:
455; CHECK-NEXT:    ret void
456;
457entry:
458  br label %for.body
459
460for.body:
461  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
462  store i64 %v, ptr %b, align 1
463  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
464  store i64 %v, ptr %arrayidx
465  %iv.next = add nuw nsw i64 %iv, 1
466  %exitcond.not = icmp eq i64 %iv.next, 1024
467  br i1 %exitcond.not, label %for.end, label %for.body
468
469for.end:
470  ret void
471}
472
473define i64 @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %n) {
474; CHECK-LABEL: @uniform_load(
475; CHECK-NEXT:  entry:
476; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
477; CHECK:       vector.ph:
478; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
479; CHECK:       vector.body:
480; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
481; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
482; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 2
483; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[B:%.*]], align 8
484; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0
485; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
486; CHECK-NEXT:    [[TMP3:%.*]] = load i64, ptr [[B]], align 8
487; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i32 0
488; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer
489; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
490; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
491; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0
492; CHECK-NEXT:    store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8
493; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 2
494; CHECK-NEXT:    store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP7]], align 8
495; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
496; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
497; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
498; CHECK:       middle.block:
499; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, 1024
500; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
501; CHECK:       scalar.ph:
502; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
503; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
504; CHECK:       for.body:
505; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
506; CHECK-NEXT:    [[V:%.*]] = load i64, ptr [[B]], align 8
507; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
508; CHECK-NEXT:    store i64 [[V]], ptr [[ARRAYIDX]], align 8
509; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
510; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
511; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
512; CHECK:       for.end:
513; CHECK-NEXT:    [[V_LCSSA:%.*]] = phi i64 [ [[V]], [[FOR_BODY]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ]
514; CHECK-NEXT:    ret i64 [[V_LCSSA]]
515;
516entry:
517  br label %for.body
518
519for.body:
520  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
521  %v = load i64, ptr %b, align 8
522  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
523  store i64 %v, ptr %arrayidx
524  %iv.next = add nuw nsw i64 %iv, 1
525  %exitcond.not = icmp eq i64 %iv.next, 1024
526  br i1 %exitcond.not, label %for.end, label %for.body
527
528for.end:
529  ret i64 %v
530}
531
532define i64 @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %n) {
533; CHECK-LABEL: @uniform_load_unaligned(
534; CHECK-NEXT:  entry:
535; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
536; CHECK:       vector.ph:
537; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
538; CHECK:       vector.body:
539; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
540; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
541; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 2
542; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[B:%.*]], align 1
543; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0
544; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
545; CHECK-NEXT:    [[TMP3:%.*]] = load i64, ptr [[B]], align 1
546; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i32 0
547; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer
548; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
549; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
550; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0
551; CHECK-NEXT:    store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8
552; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 2
553; CHECK-NEXT:    store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP7]], align 8
554; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
555; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
556; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
557; CHECK:       middle.block:
558; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, 1024
559; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
560; CHECK:       scalar.ph:
561; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
562; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
563; CHECK:       for.body:
564; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
565; CHECK-NEXT:    [[V:%.*]] = load i64, ptr [[B]], align 1
566; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
567; CHECK-NEXT:    store i64 [[V]], ptr [[ARRAYIDX]], align 8
568; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
569; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
570; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
571; CHECK:       for.end:
572; CHECK-NEXT:    [[V_LCSSA:%.*]] = phi i64 [ [[V]], [[FOR_BODY]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ]
573; CHECK-NEXT:    ret i64 [[V_LCSSA]]
574;
575entry:
576  br label %for.body
577
578for.body:
579  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
580  %v = load i64, ptr %b, align 1
581  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
582  store i64 %v, ptr %arrayidx
583  %iv.next = add nuw nsw i64 %iv, 1
584  %exitcond.not = icmp eq i64 %iv.next, 1024
585  br i1 %exitcond.not, label %for.end, label %for.body
586
587for.end:
588  ret i64 %v
589}
590