xref: /llvm-project/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll (revision 7f3428d3ed71d87a2088b77b6cab9f3d86544234)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; REQUIRES: asserts
3
4; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck %s
5
6target triple = "aarch64-unknown-linux-gnu"
7
8;; Given the choice between a masked and unmasked variant for the same VF (4)
9;; where no mask is required, make sure we choose the unmasked variant.
10
11; CHECK-LABEL: LV: Checking a loop in 'test_v4_v4m'
12; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
13; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
14; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
15; CHECK-NEXT: Live-in ir<1024> = original trip-count
16; CHECK-EMPTY:
17; CHECK-NEXT: ir-bb<entry>:
18; CHECK-NEXT: Successor(s): vector.ph
19; CHECK-EMPTY:
20; CHECK-NEXT: vector.ph:
21; CHECK-NEXT: Successor(s): vector loop
22; CHECK-EMPTY:
23; CHECK-NEXT: <x1> vector loop: {
24; CHECK-NEXT:   vector.body:
25; CHECK-NEXT:     EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
26; CHECK-NEXT:     vp<[[STEPS:%.+]]>    = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
27; CHECK-NEXT:     CLONE ir<%gep> = getelementptr ir<%b>, vp<[[STEPS]]>
28; CHECK-NEXT:     vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep>
29; CHECK-NEXT:     WIDEN ir<%load> = load vp<[[VEC_PTR]]>
30; CHECK-NEXT:     REPLICATE ir<%call> = call @foo(ir<%load>)
31; CHECK-NEXT:     CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]>
32; CHECK-NEXT:     vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%arrayidx>
33; CHECK-NEXT:     WIDEN store vp<[[VEC_PTR2]]>, ir<%call>
34; CHECK-NEXT:     EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
35; CHECK-NEXT:     EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]>
36; CHECK-NEXT:   No successors
37; CHECK-NEXT: }
38; CHECK-NEXT: Successor(s): middle.block
39; CHECK-EMPTY:
40; CHECK-NEXT: middle.block:
41; CHECK-NEXT:   EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VTC]]>
42; CHECK-NEXT:   EMIT branch-on-cond vp<[[CMP]]>
43; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup>, scalar.ph
44; CHECK-EMPTY:
45; CHECK-NEXT: scalar.ph:
46; CHECK-NEXT:  EMIT vp<[[RESUME:%.+]]> = resume-phi vp<[[VTC]]>, ir<0>
47; CHECK-NEXT: Successor(s): ir-bb<for.body>
48; CHECK-EMPTY:
49; CHECK-NEXT: ir-bb<for.body>:
50; CHECK-NEXT:   IR   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] (extra operand: vp<[[RESUME]]> from scalar.ph)
51; CHECK:        IR   %exitcond = icmp eq i64 %indvars.iv.next, 1024
52; CHECK-NEXT: No successors
53; CHECK-EMPTY:
54; CHECK-NEXT: ir-bb<for.cond.cleanup>:
55; CHECK-NEXT: No successors
56; CHECK-NEXT: }
57
58; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
59; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
60; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
61; CHECK-NEXT: Live-in ir<1024> = original trip-count
62; CHECK-EMPTY:
63; CHECK-NEXT: ir-bb<entry>:
64; CHECK-NEXT: Successor(s): vector.ph
65; CHECK-EMPTY:
66; CHECK-NEXT: vector.ph:
67; CHECK-NEXT: Successor(s): vector loop
68; CHECK-EMPTY:
69; CHECK-NEXT: <x1> vector loop: {
70; CHECK-NEXT:   vector.body:
71; CHECK-NEXT:     EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
72; CHECK-NEXT:     vp<[[STEPS]]>    = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
73; CHECK-NEXT:     CLONE ir<%gep> = getelementptr ir<%b>, vp<[[STEPS]]>
74; CHECK-NEXT:     vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep>
75; CHECK-NEXT:     WIDEN ir<%load> = load vp<[[VEC_PTR]]>
76; CHECK-NEXT:     WIDEN-CALL ir<%call> = call @foo(ir<%load>) (using library function: foo_vector_fixed4_nomask)
77; CHECK-NEXT:     CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]>
78; CHECK-NEXT:     vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%arrayidx>
79; CHECK-NEXT:     WIDEN store vp<[[VEC_PTR2]]>, ir<%call>
80; CHECK-NEXT:     EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
81; CHECK-NEXT:     EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]>
82; CHECK-NEXT:   No successors
83; CHECK-NEXT: }
84; CHECK-NEXT: Successor(s): middle.block
85; CHECK-EMPTY:
86; CHECK-NEXT: middle.block:
87; CHECK-NEXT:   EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VTC]]>
88; CHECK-NEXT:   EMIT branch-on-cond vp<[[CMP]]>
89; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup>, scalar.ph
90; CHECK-EMPTY:
91; CHECK-NEXT: scalar.ph:
92; CHECK-NEXT:  EMIT vp<[[RESUME:%.+]]> = resume-phi vp<[[VTC]]>, ir<0>
93; CHECK-NEXT: Successor(s): ir-bb<for.body>
94; CHECK-EMPTY:
95; CHECK-NEXT: ir-bb<for.body>:
96; CHECK-NEXT:   IR   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] (extra operand: vp<[[RESUME]]> from scalar.ph)
97; CHECK:        IR   %exitcond = icmp eq i64 %indvars.iv.next, 1024
98; CHECK-NEXT: No successors
99; CHECK-EMPTY:
100; CHECK-NEXT: ir-bb<for.cond.cleanup>:
101; CHECK-NEXT: No successors
102; CHECK-NEXT: }
103
104;; If we have a masked variant at one VF and an unmasked variant at a different
105;; VF, ensure we create appropriate recipes (including a synthesized all-true
106;; mask for the masked variant)
107
108; CHECK-LABEL: LV: Checking a loop in 'test_v2_v4m'
109; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
110; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
111; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
112; CHECK-NEXT: Live-in ir<1024> = original trip-count
113; CHECK-EMPTY:
114; CHECK-NEXT: ir-bb<entry>:
115; CHECK-NEXT: Successor(s): vector.ph
116; CHECK-EMPTY:
117; CHECK-NEXT: vector.ph:
118; CHECK-NEXT: Successor(s): vector loop
119; CHECK-EMPTY:
120; CHECK-NEXT: <x1> vector loop: {
121; CHECK-NEXT:   vector.body:
122; CHECK-NEXT:     EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
123; CHECK-NEXT:     vp<[[STEPS:%.+]]>    = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
124; CHECK-NEXT:     CLONE ir<%gep> = getelementptr ir<%b>, vp<[[STEPS]]>
125; CHECK-NEXT:     vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep>
126; CHECK-NEXT:     WIDEN ir<%load> = load vp<[[VEC_PTR]]>
127; CHECK-NEXT:     WIDEN-CALL ir<%call> = call @foo(ir<%load>) (using library function: foo_vector_fixed2_nomask)
128; CHECK-NEXT:     CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]>
129; CHECK-NEXT:     vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx>
130; CHECK-NEXT:     WIDEN store vp<[[VEC_PTR]]>, ir<%call>
131; CHECK-NEXT:     EMIT vp<[[CAN_IV_NEXST:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
132; CHECK-NEXT:     EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]>
133; CHECK-NEXT:   No successors
134; CHECK-NEXT: }
135; CHECK-NEXT: Successor(s): middle.block
136; CHECK-EMPTY:
137; CHECK-NEXT: middle.block:
138; CHECK-NEXT:   EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VTC]]>
139; CHECK-NEXT:   EMIT branch-on-cond vp<[[CMP]]>
140; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup>, scalar.ph
141; CHECK-EMPTY:
142; CHECK-NEXT: scalar.ph:
143; CHECK-NEXT:  EMIT vp<[[RESUME:%.+]]> = resume-phi vp<[[VTC]]>, ir<0>
144; CHECK-NEXT: Successor(s): ir-bb<for.body>
145; CHECK-EMPTY:
146; CHECK-NEXT: ir-bb<for.body>:
147; CHECK-NEXT:   IR   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
148; CHECK:        IR   %exitcond = icmp eq i64 %indvars.iv.next, 1024
149; CHECK-NEXT: No successors
150; CHECK-EMPTY:
151; CHECK-NEXT: ir-bb<for.cond.cleanup>:
152; CHECK-NEXT: No successors
153; CHECK-NEXT: }
154
155; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
156; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
157; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
158; CHECK-NEXT: Live-in ir<1024> = original trip-count
159; CHECK-EMPTY:
160; CHECK-NEXT: ir-bb<entry>:
161; CHECK-NEXT: Successor(s): vector.ph
162; CHECK-EMPTY:
163; CHECK-NEXT: vector.ph:
164; CHECK-NEXT: Successor(s): vector loop
165; CHECK-EMPTY:
166; CHECK-NEXT: <x1> vector loop: {
167; CHECK-NEXT:   vector.body:
168; CHECK-NEXT:     EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
169; CHECK-NEXT:     vp<[[STEPS:%.+]]>    = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
170; CHECK-NEXT:     CLONE ir<%gep> = getelementptr ir<%b>, vp<[[STEPS]]>
171; CHECK-NEXT:     vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep>
172; CHECK-NEXT:     WIDEN ir<%load> = load vp<[[VEC_PTR]]>
173; CHECK-NEXT:     WIDEN-CALL ir<%call> = call @foo(ir<%load>, ir<true>) (using library function: foo_vector_fixed4_mask)
174; CHECK-NEXT:     CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]>
175; CHECK-NEXT:     vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%arrayidx>
176; CHECK-NEXT:     WIDEN store vp<[[VEC_PTR2]]>, ir<%call>
177; CHECK-NEXT:     EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
178; CHECK-NEXT:     EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]>
179; CHECK-NEXT:   No successors
180; CHECK-NEXT: }
181; CHECK-NEXT: Successor(s): middle.block
182; CHECK-EMPTY:
183; CHECK-NEXT: middle.block:
184; CHECK-NEXT:   EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VTC]]>
185; CHECK-NEXT:   EMIT branch-on-cond vp<[[CMP]]>
186; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup>, scalar.ph
187; CHECK-EMPTY:
188; CHECK-NEXT: scalar.ph:
189; CHECK-NEXT:  EMIT vp<[[RESUME:%.+]]> = resume-phi vp<[[VTC]]>, ir<0>
190; CHECK-NEXT: Successor(s): ir-bb<for.body>
191; CHECK-EMPTY:
192; CHECK-NEXT: ir-bb<for.body>:
193; CHECK-NEXT:   IR   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] (extra operand: vp<[[RESUME]]> from scalar.ph)
194; CHECK:        IR   %exitcond = icmp eq i64 %indvars.iv.next, 1024
195; CHECK-NEXT: No successors
196; CHECK-EMPTY:
197; CHECK-NEXT: ir-bb<for.cond.cleanup>:
198; CHECK-NEXT: No successors
199; CHECK-NEXT: }
200
201;; If we have two variants at different VFs, neither of which are masked, we
202;; still expect to see a different vplan per VF.
203
204; CHECK-LABEL: LV: Checking a loop in 'test_v2_v4'
205; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
206; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
207; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
208; CHECK-NEXT: Live-in ir<1024> = original trip-count
209; CHECK-EMPTY:
210; CHECK-NEXT: ir-bb<entry>:
211; CHECK-NEXT: Successor(s): vector.ph
212; CHECK-EMPTY:
213; CHECK-NEXT: vector.ph:
214; CHECK-NEXT: Successor(s): vector loop
215; CHECK-EMPTY:
216; CHECK-NEXT: <x1> vector loop: {
217; CHECK-NEXT:   vector.body:
218; CHECK-NEXT:     EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
219; CHECK-NEXT:     vp<[[STEPS:%.+]]>    = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
220; CHECK-NEXT:     CLONE ir<%gep> = getelementptr ir<%b>, vp<[[STEPS]]>
221; CHECK-NEXT:     vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep>
222; CHECK-NEXT:     WIDEN ir<%load> = load vp<[[VEC_PTR]]>
223; CHECK-NEXT:     WIDEN-CALL ir<%call> = call @foo(ir<%load>) (using library function: foo_vector_fixed2_nomask)
224; CHECK-NEXT:     CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]>
225; CHECK-NEXT:     vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%arrayidx>
226; CHECK-NEXT:     WIDEN store vp<[[VEC_PTR2]]>, ir<%call>
227; CHECK-NEXT:     EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
228; CHECK-NEXT:     EMIT branch-on-count  vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]>
229; CHECK-NEXT:   No successors
230; CHECK-NEXT: }
231; CHECK-NEXT: Successor(s): middle.block
232; CHECK-EMPTY:
233; CHECK-NEXT: middle.block:
234; CHECK-NEXT:   EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VTC]]>
235; CHECK-NEXT:   EMIT branch-on-cond vp<[[CMP]]>
236; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup>, scalar.ph
237; CHECK-EMPTY:
238; CHECK-NEXT: scalar.ph:
239; CHECK-NEXT:  EMIT vp<[[RESUME:%.+]]> = resume-phi vp<[[VTC]]>, ir<0>
240; CHECK-NEXT: Successor(s): ir-bb<for.body>
241; CHECK-EMPTY:
242; CHECK-NEXT: ir-bb<for.body>:
243; CHECK-NEXT:   IR   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
244; CHECK:        IR   %exitcond = icmp eq i64 %indvars.iv.next, 1024
245; CHECK-NEXT: No successors
246; CHECK-EMPTY:
247; CHECK-NEXT: ir-bb<for.cond.cleanup>:
248; CHECK-NEXT: No successors
249; CHECK-NEXT: }
250
251; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
252; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
253; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
254; CHECK-NEXT: Live-in ir<1024> = original trip-count
255; CHECK-EMPTY:
256; CHECK-NEXT: ir-bb<entry>:
257; CHECK-NEXT: Successor(s): vector.ph
258; CHECK-EMPTY:
259; CHECK-NEXT: vector.ph:
260; CHECK-NEXT: Successor(s): vector loop
261; CHECK-EMPTY:
262; CHECK-NEXT: <x1> vector loop: {
263; CHECK-NEXT:   vector.body:
264; CHECK-NEXT:     EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
265; CHECK-NEXT:     vp<[[STEPS:%.+]]>    = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
266; CHECK-NEXT:     CLONE ir<%gep> = getelementptr ir<%b>, vp<[[STEPS]]>
267; CHECK-NEXT:     vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep>
268; CHECK-NEXT:     WIDEN ir<%load> = load vp<[[VEC_PTR]]>
269; CHECK-NEXT:     WIDEN-CALL ir<%call> = call @foo(ir<%load>) (using library function: foo_vector_fixed4_nomask)
270; CHECK-NEXT:     CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]>
271; CHECK-NEXT:     vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%arrayidx>
272; CHECK-NEXT:     WIDEN store vp<[[VEC_PTR2]]>, ir<%call>
273; CHECK-NEXT:     EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
274; CHECK-NEXT:     EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]>
275; CHECK-NEXT:   No successors
276; CHECK-NEXT: }
277; CHECK-NEXT: Successor(s): middle.block
278; CHECK-EMPTY:
279; CHECK-NEXT: middle.block:
280; CHECK-NEXT:   EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VTC]]>
281; CHECK-NEXT:   EMIT branch-on-cond vp<[[CMP]]>
282; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup>, scalar.ph
283; CHECK-EMPTY:
284; CHECK-NEXT: scalar.ph:
285; CHECK-NEXT:  EMIT vp<[[RESUME:%.+]]> = resume-phi vp<[[VTC]]>, ir<0>
286; CHECK-NEXT: Successor(s): ir-bb<for.body>
287; CHECK-EMPTY:
288; CHECK-NEXT: ir-bb<for.body>:
289; CHECK-NEXT:   IR   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] (extra operand: vp<[[RESUME]]> from scalar.ph)
290; CHECK:        IR   %exitcond = icmp eq i64 %indvars.iv.next, 1024
291; CHECK-NEXT: No successors
292; CHECK-EMPTY:
293; CHECK-NEXT: ir-bb<for.cond.cleanup>:
294; CHECK-NEXT: No successors
295; CHECK-NEXT: }
296
297define void @test_v4_v4m(ptr noalias %a, ptr readonly %b) #3 {
298; CHECK-LABEL: @test_v4_v4m(
299; CHECK-NEXT:  entry:
300; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
301; CHECK:       vector.ph:
302; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
303; CHECK:       vector.body:
304; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
305; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
306; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[TMP0]]
307; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i64, ptr [[TMP1]], i32 0
308; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
309; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i64> @foo_vector_fixed4_nomask(<4 x i64> [[WIDE_LOAD]])
310; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
311; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0
312; CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[TMP5]], align 8
313; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
314; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
315; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
316; CHECK:       middle.block:
317; CHECK-NEXT:    br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
318; CHECK:       scalar.ph:
319; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
320; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
321; CHECK:       for.body:
322; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
323; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]]
324; CHECK-NEXT:    [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8
325; CHECK-NEXT:    [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR1:[0-9]+]]
326; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
327; CHECK-NEXT:    store i64 [[CALL]], ptr [[ARRAYIDX]], align 8
328; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
329; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
330; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
331; CHECK:       for.cond.cleanup:
332; CHECK-NEXT:    ret void
333;
334entry:
335  br label %for.body
336
337for.body:
338  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
339  %gep = getelementptr i64, ptr %b, i64 %indvars.iv
340  %load = load i64, ptr %gep
341  %call = call i64 @foo(i64 %load) #0
342  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
343  store i64 %call, ptr %arrayidx
344  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
345  %exitcond = icmp eq i64 %indvars.iv.next, 1024
346  br i1 %exitcond, label %for.cond.cleanup, label %for.body
347
348for.cond.cleanup:
349  ret void
350
351}
352
353define void @test_v2_v4m(ptr noalias %a, ptr readonly %b) #3 {
354; CHECK-LABEL: @test_v2_v4m(
355; CHECK-NEXT:  entry:
356; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
357; CHECK:       vector.ph:
358; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
359; CHECK:       vector.body:
360; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
361; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
362; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[TMP0]]
363; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i64, ptr [[TMP1]], i32 0
364; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
365; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i64> @foo_vector_fixed4_mask(<4 x i64> [[WIDE_LOAD]], <4 x i1> splat (i1 true))
366; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
367; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0
368; CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[TMP5]], align 8
369; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
370; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
371; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
372; CHECK:       middle.block:
373; CHECK-NEXT:    br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
374; CHECK:       scalar.ph:
375; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
376; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
377; CHECK:       for.body:
378; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
379; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]]
380; CHECK-NEXT:    [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8
381; CHECK-NEXT:    [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR2:[0-9]+]]
382; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
383; CHECK-NEXT:    store i64 [[CALL]], ptr [[ARRAYIDX]], align 8
384; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
385; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
386; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
387; CHECK:       for.cond.cleanup:
388; CHECK-NEXT:    ret void
389;
390entry:
391  br label %for.body
392
393for.body:
394  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
395  %gep = getelementptr i64, ptr %b, i64 %indvars.iv
396  %load = load i64, ptr %gep
397  %call = call i64 @foo(i64 %load) #1
398  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
399  store i64 %call, ptr %arrayidx
400  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
401  %exitcond = icmp eq i64 %indvars.iv.next, 1024
402  br i1 %exitcond, label %for.cond.cleanup, label %for.body
403
404for.cond.cleanup:
405  ret void
406
407}
408
409define void @test_v2_v4(ptr noalias %a, ptr readonly %b) #3 {
410; CHECK-LABEL: @test_v2_v4(
411; CHECK-NEXT:  entry:
412; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
413; CHECK:       vector.ph:
414; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
415; CHECK:       vector.body:
416; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
417; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
418; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[TMP0]]
419; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i64, ptr [[TMP1]], i32 0
420; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
421; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i64> @foo_vector_fixed4_nomask(<4 x i64> [[WIDE_LOAD]])
422; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
423; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0
424; CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[TMP5]], align 8
425; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
426; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
427; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
428; CHECK:       middle.block:
429; CHECK-NEXT:    br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
430; CHECK:       scalar.ph:
431; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
432; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
433; CHECK:       for.body:
434; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
435; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]]
436; CHECK-NEXT:    [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8
437; CHECK-NEXT:    [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR3:[0-9]+]]
438; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
439; CHECK-NEXT:    store i64 [[CALL]], ptr [[ARRAYIDX]], align 8
440; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
441; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
442; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
443; CHECK:       for.cond.cleanup:
444; CHECK-NEXT:    ret void
445;
446entry:
447  br label %for.body
448
449for.body:
450  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
451  %gep = getelementptr i64, ptr %b, i64 %indvars.iv
452  %load = load i64, ptr %gep
453  %call = call i64 @foo(i64 %load) #2
454  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
455  store i64 %call, ptr %arrayidx
456  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
457  %exitcond = icmp eq i64 %indvars.iv.next, 1024
458  br i1 %exitcond, label %for.cond.cleanup, label %for.body
459
460for.cond.cleanup:
461  ret void
462
463}
464
465declare i64 @foo(i64)
466
467;; fixed vector variants of foo
468declare <2 x i64> @foo_vector_fixed2_nomask(<2 x i64>)
469declare <4 x i64> @foo_vector_fixed4_nomask(<4 x i64>)
470declare <4 x i64> @foo_vector_fixed4_mask(<4 x i64>, <4 x i1>)
471
472attributes #0 = { nounwind "vector-function-abi-variant"="_ZGV_LLVM_N4v_foo(foo_vector_fixed4_nomask),_ZGV_LLVM_M4v_foo(foo_vector_fixed4_mask)" }
473attributes #1 = { nounwind "vector-function-abi-variant"="_ZGV_LLVM_N2v_foo(foo_vector_fixed2_nomask),_ZGV_LLVM_M4v_foo(foo_vector_fixed4_mask)" }
474attributes #2 = { nounwind "vector-function-abi-variant"="_ZGV_LLVM_N2v_foo(foo_vector_fixed2_nomask),_ZGV_LLVM_N4v_foo(foo_vector_fixed4_nomask)" }
475attributes #3 = { "target-features"="+sve" vscale_range(2,16) "no-trapping-math"="false" }
476