xref: /llvm-project/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll (revision 462cb3cd6cecd0511ecaf0e3ebcaba455ece587d)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2; RUN: opt < %s -passes=loop-vectorize,simplifycfg,instcombine -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S | FileCheck %s
3; RUN: opt < %s -passes=loop-vectorize,simplifycfg,instcombine -force-vector-interleave=2 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S | FileCheck %s --check-prefix=INTERLEAVE
4
5target triple = "aarch64-unknown-linux-gnu"
6
7; A call whose argument can remain a scalar for a vectorized function variant
8; with a uniform argument because it's loop invariant
9define void @test_uniform(ptr noalias %dst, ptr readonly %src, i64 %uniform , i64 %n) #0 {
10; CHECK-LABEL: define void @test_uniform
11; CHECK-SAME: (ptr noalias [[DST:%.*]], ptr readonly [[SRC:%.*]], i64 [[UNIFORM:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
12; CHECK-NEXT:  entry:
13; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
14; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[TMP0]], 1
15; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
16; CHECK-NEXT:    [[TMP3:%.*]] = shl i64 [[TMP2]], 1
17; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP3]])
18; CHECK-NEXT:    [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]])
19; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
20; CHECK:       vector.body:
21; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
22; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
23; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]]
24; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[TMP5]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x double> poison)
25; CHECK-NEXT:    [[TMP6:%.*]] = call <vscale x 2 x double> @foo_uniform(<vscale x 2 x double> [[WIDE_MASKED_LOAD]], i64 [[UNIFORM]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
26; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]]
27; CHECK-NEXT:    call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP6]], ptr [[TMP7]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
28; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
29; CHECK-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP4]])
30; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
31; CHECK-NEXT:    br i1 [[TMP8]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
32; CHECK:       for.cond.cleanup:
33; CHECK-NEXT:    ret void
34;
35; INTERLEAVE-LABEL: define void @test_uniform
36; INTERLEAVE-SAME: (ptr noalias [[DST:%.*]], ptr readonly [[SRC:%.*]], i64 [[UNIFORM:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
37; INTERLEAVE-NEXT:  entry:
38; INTERLEAVE-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
39; INTERLEAVE-NEXT:    [[TMP1:%.*]] = shl i64 [[TMP0]], 2
40; INTERLEAVE-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
41; INTERLEAVE-NEXT:    [[TMP3:%.*]] = shl i64 [[TMP2]], 2
42; INTERLEAVE-NEXT:    [[TMP4:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP3]])
43; INTERLEAVE-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
44; INTERLEAVE-NEXT:    [[TMP6:%.*]] = shl i64 [[TMP5]], 1
45; INTERLEAVE-NEXT:    [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]])
46; INTERLEAVE-NEXT:    [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 [[N]])
47; INTERLEAVE-NEXT:    br label [[VECTOR_BODY:%.*]]
48; INTERLEAVE:       vector.body:
49; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
50; INTERLEAVE-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
51; INTERLEAVE-NEXT:    [[ACTIVE_LANE_MASK2:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT4:%.*]], [[VECTOR_BODY]] ]
52; INTERLEAVE-NEXT:    [[TMP7:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]]
53; INTERLEAVE-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
54; INTERLEAVE-NEXT:    [[DOTIDX:%.*]] = shl i64 [[TMP8]], 4
55; INTERLEAVE-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP7]], i64 [[DOTIDX]]
56; INTERLEAVE-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[TMP7]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x double> poison)
57; INTERLEAVE-NEXT:    [[WIDE_MASKED_LOAD3:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[TMP9]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x double> poison)
58; INTERLEAVE-NEXT:    [[TMP10:%.*]] = call <vscale x 2 x double> @foo_uniform(<vscale x 2 x double> [[WIDE_MASKED_LOAD]], i64 [[UNIFORM]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
59; INTERLEAVE-NEXT:    [[TMP11:%.*]] = call <vscale x 2 x double> @foo_uniform(<vscale x 2 x double> [[WIDE_MASKED_LOAD3]], i64 [[UNIFORM]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]])
60; INTERLEAVE-NEXT:    [[TMP12:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]]
61; INTERLEAVE-NEXT:    [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
62; INTERLEAVE-NEXT:    [[DOTIDX5:%.*]] = shl i64 [[TMP13]], 4
63; INTERLEAVE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 [[DOTIDX5]]
64; INTERLEAVE-NEXT:    call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP10]], ptr [[TMP12]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
65; INTERLEAVE-NEXT:    call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP11]], ptr [[TMP14]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]])
66; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
67; INTERLEAVE-NEXT:    [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
68; INTERLEAVE-NEXT:    [[TMP16:%.*]] = shl i64 [[TMP15]], 1
69; INTERLEAVE-NEXT:    [[TMP17:%.*]] = add i64 [[INDEX]], [[TMP16]]
70; INTERLEAVE-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP4]])
71; INTERLEAVE-NEXT:    [[ACTIVE_LANE_MASK_NEXT4]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP17]], i64 [[TMP4]])
72; INTERLEAVE-NEXT:    [[TMP18:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
73; INTERLEAVE-NEXT:    br i1 [[TMP18]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
74; INTERLEAVE:       for.cond.cleanup:
75; INTERLEAVE-NEXT:    ret void
76;
77entry:
78  br label %for.body
79
80for.body:
81  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
82  %gepsrc = getelementptr double, ptr %src, i64 %indvars.iv
83  %data = load double, ptr %gepsrc, align 8
84  %call = call double @foo(double %data, i64 %uniform) #1
85  %gepdst = getelementptr inbounds double, ptr %dst, i64 %indvars.iv
86  store double %call, ptr %gepdst
87  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
88  %exitcond = icmp eq i64 %indvars.iv.next, %n
89  br i1 %exitcond, label %for.cond.cleanup, label %for.body
90
91for.cond.cleanup:
92  ret void
93}
94
95define void @test_uniform_smaller_scalar(ptr noalias %dst, ptr readonly %src, i32 %uniform , i64 %n) #0 {
96; CHECK-LABEL: define void @test_uniform_smaller_scalar
97; CHECK-SAME: (ptr noalias [[DST:%.*]], ptr readonly [[SRC:%.*]], i32 [[UNIFORM:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
98; CHECK-NEXT:  entry:
99; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
100; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[TMP0]], 1
101; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
102; CHECK-NEXT:    [[TMP3:%.*]] = shl i64 [[TMP2]], 1
103; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP3]])
104; CHECK-NEXT:    [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]])
105; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
106; CHECK:       vector.body:
107; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
108; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
109; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]]
110; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[TMP5]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x double> poison)
111; CHECK-NEXT:    [[TMP6:%.*]] = call <vscale x 2 x double> @bar_uniform(<vscale x 2 x double> [[WIDE_MASKED_LOAD]], i32 [[UNIFORM]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
112; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]]
113; CHECK-NEXT:    call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP6]], ptr [[TMP7]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
114; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
115; CHECK-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP4]])
116; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
117; CHECK-NEXT:    br i1 [[TMP8]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP3:![0-9]+]]
118; CHECK:       for.cond.cleanup:
119; CHECK-NEXT:    ret void
120;
121; INTERLEAVE-LABEL: define void @test_uniform_smaller_scalar
122; INTERLEAVE-SAME: (ptr noalias [[DST:%.*]], ptr readonly [[SRC:%.*]], i32 [[UNIFORM:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
123; INTERLEAVE-NEXT:  entry:
124; INTERLEAVE-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
125; INTERLEAVE-NEXT:    [[TMP1:%.*]] = shl i64 [[TMP0]], 2
126; INTERLEAVE-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
127; INTERLEAVE-NEXT:    [[TMP3:%.*]] = shl i64 [[TMP2]], 2
128; INTERLEAVE-NEXT:    [[TMP4:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP3]])
129; INTERLEAVE-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
130; INTERLEAVE-NEXT:    [[TMP6:%.*]] = shl i64 [[TMP5]], 1
131; INTERLEAVE-NEXT:    [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]])
132; INTERLEAVE-NEXT:    [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 [[N]])
133; INTERLEAVE-NEXT:    br label [[VECTOR_BODY:%.*]]
134; INTERLEAVE:       vector.body:
135; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
136; INTERLEAVE-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
137; INTERLEAVE-NEXT:    [[ACTIVE_LANE_MASK2:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT4:%.*]], [[VECTOR_BODY]] ]
138; INTERLEAVE-NEXT:    [[TMP7:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]]
139; INTERLEAVE-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
140; INTERLEAVE-NEXT:    [[DOTIDX:%.*]] = shl i64 [[TMP8]], 4
141; INTERLEAVE-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP7]], i64 [[DOTIDX]]
142; INTERLEAVE-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[TMP7]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x double> poison)
143; INTERLEAVE-NEXT:    [[WIDE_MASKED_LOAD3:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[TMP9]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x double> poison)
144; INTERLEAVE-NEXT:    [[TMP10:%.*]] = call <vscale x 2 x double> @bar_uniform(<vscale x 2 x double> [[WIDE_MASKED_LOAD]], i32 [[UNIFORM]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
145; INTERLEAVE-NEXT:    [[TMP11:%.*]] = call <vscale x 2 x double> @bar_uniform(<vscale x 2 x double> [[WIDE_MASKED_LOAD3]], i32 [[UNIFORM]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]])
146; INTERLEAVE-NEXT:    [[TMP12:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]]
147; INTERLEAVE-NEXT:    [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
148; INTERLEAVE-NEXT:    [[DOTIDX5:%.*]] = shl i64 [[TMP13]], 4
149; INTERLEAVE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 [[DOTIDX5]]
150; INTERLEAVE-NEXT:    call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP10]], ptr [[TMP12]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
151; INTERLEAVE-NEXT:    call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP11]], ptr [[TMP14]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]])
152; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
153; INTERLEAVE-NEXT:    [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
154; INTERLEAVE-NEXT:    [[TMP16:%.*]] = shl i64 [[TMP15]], 1
155; INTERLEAVE-NEXT:    [[TMP17:%.*]] = add i64 [[INDEX]], [[TMP16]]
156; INTERLEAVE-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP4]])
157; INTERLEAVE-NEXT:    [[ACTIVE_LANE_MASK_NEXT4]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP17]], i64 [[TMP4]])
158; INTERLEAVE-NEXT:    [[TMP18:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
159; INTERLEAVE-NEXT:    br i1 [[TMP18]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP3:![0-9]+]]
160; INTERLEAVE:       for.cond.cleanup:
161; INTERLEAVE-NEXT:    ret void
162;
163entry:
164  br label %for.body
165
166for.body:
167  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
168  %gepsrc = getelementptr double, ptr %src, i64 %indvars.iv
169  %data = load double, ptr %gepsrc, align 8
170  %call = call double @bar(double %data, i32 %uniform) #2
171  %gepdst = getelementptr inbounds double, ptr %dst, i64 %indvars.iv
172  store double %call, ptr %gepdst
173  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
174  %exitcond = icmp eq i64 %indvars.iv.next, %n
175  br i1 %exitcond, label %for.cond.cleanup, label %for.body
176
177for.cond.cleanup:
178  ret void
179}
180
181; If the parameter is not uniform, then we can't use the vector variant.
182define void @test_uniform_not_invariant(ptr noalias %dst, ptr readonly %src, i64 %n) #0 {
183; CHECK-LABEL: define void @test_uniform_not_invariant
184; CHECK-SAME: (ptr noalias [[DST:%.*]], ptr readonly [[SRC:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
185; CHECK-NEXT:  entry:
186; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
187; CHECK:       for.body:
188; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
189; CHECK-NEXT:    [[GEPSRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDVARS_IV]]
190; CHECK-NEXT:    [[DATA:%.*]] = load double, ptr [[GEPSRC]], align 8
191; CHECK-NEXT:    [[CALL:%.*]] = call double @foo(double [[DATA]], i64 [[INDVARS_IV]]) #[[ATTR5:[0-9]+]]
192; CHECK-NEXT:    [[GEPDST:%.*]] = getelementptr inbounds nuw double, ptr [[DST]], i64 [[INDVARS_IV]]
193; CHECK-NEXT:    store double [[CALL]], ptr [[GEPDST]], align 8
194; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
195; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
196; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
197; CHECK:       for.cond.cleanup:
198; CHECK-NEXT:    ret void
199;
200; INTERLEAVE-LABEL: define void @test_uniform_not_invariant
201; INTERLEAVE-SAME: (ptr noalias [[DST:%.*]], ptr readonly [[SRC:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
202; INTERLEAVE-NEXT:  entry:
203; INTERLEAVE-NEXT:    [[TMP0:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 2)
204; INTERLEAVE-NEXT:    [[ACTIVE_LANE_MASK_ENTRY:%.*]] = icmp ne i64 [[N]], 0
205; INTERLEAVE-NEXT:    [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = icmp ugt i64 [[N]], 1
206; INTERLEAVE-NEXT:    br label [[VECTOR_BODY:%.*]]
207; INTERLEAVE:       vector.body:
208; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ]
209; INTERLEAVE-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ true, [[PRED_STORE_CONTINUE4]] ]
210; INTERLEAVE-NEXT:    [[ACTIVE_LANE_MASK2:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT5:%.*]], [[PRED_STORE_CONTINUE4]] ]
211; INTERLEAVE-NEXT:    br i1 [[ACTIVE_LANE_MASK]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
212; INTERLEAVE:       pred.store.if:
213; INTERLEAVE-NEXT:    [[TMP1:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]]
214; INTERLEAVE-NEXT:    [[TMP2:%.*]] = load double, ptr [[TMP1]], align 8
215; INTERLEAVE-NEXT:    [[TMP3:%.*]] = call double @foo(double [[TMP2]], i64 [[INDEX]]) #[[ATTR5:[0-9]+]]
216; INTERLEAVE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]]
217; INTERLEAVE-NEXT:    store double [[TMP3]], ptr [[TMP4]], align 8
218; INTERLEAVE-NEXT:    br label [[PRED_STORE_CONTINUE]]
219; INTERLEAVE:       pred.store.continue:
220; INTERLEAVE-NEXT:    br i1 [[ACTIVE_LANE_MASK2]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]]
221; INTERLEAVE:       pred.store.if3:
222; INTERLEAVE-NEXT:    [[TMP5:%.*]] = or disjoint i64 [[INDEX]], 1
223; INTERLEAVE-NEXT:    [[TMP6:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP5]]
224; INTERLEAVE-NEXT:    [[TMP7:%.*]] = load double, ptr [[TMP6]], align 8
225; INTERLEAVE-NEXT:    [[TMP8:%.*]] = call double @foo(double [[TMP7]], i64 [[TMP5]]) #[[ATTR5]]
226; INTERLEAVE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[TMP5]]
227; INTERLEAVE-NEXT:    store double [[TMP8]], ptr [[TMP9]], align 8
228; INTERLEAVE-NEXT:    br label [[PRED_STORE_CONTINUE4]]
229; INTERLEAVE:       pred.store.continue4:
230; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 2
231; INTERLEAVE-NEXT:    [[TMP10:%.*]] = or disjoint i64 [[INDEX]], 1
232; INTERLEAVE-NEXT:    [[ACTIVE_LANE_MASK_NEXT:%.*]] = icmp ult i64 [[INDEX]], [[TMP0]]
233; INTERLEAVE-NEXT:    [[ACTIVE_LANE_MASK_NEXT5]] = icmp ult i64 [[TMP10]], [[TMP0]]
234; INTERLEAVE-NEXT:    br i1 [[ACTIVE_LANE_MASK_NEXT]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP4:![0-9]+]]
235; INTERLEAVE:       for.cond.cleanup:
236; INTERLEAVE-NEXT:    ret void
237;
238entry:
239  br label %for.body
240
241for.body:
242  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
243  %gepsrc = getelementptr double, ptr %src, i64 %indvars.iv
244  %data = load double, ptr %gepsrc, align 8
245  %call = call double @foo(double %data, i64 %indvars.iv) #1
246  %gepdst = getelementptr inbounds double, ptr %dst, i64 %indvars.iv
247  store double %call, ptr %gepdst
248  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
249  %exitcond = icmp eq i64 %indvars.iv.next, %n
250  br i1 %exitcond, label %for.cond.cleanup, label %for.body
251
252for.cond.cleanup:
253  ret void
254}
255
256; Scalar functions
257declare double @foo(double, i64)
258declare double @bar(double, i32)
259
260; Vector variants
261declare <vscale x 2 x double> @foo_uniform(<vscale x 2 x double>, i64, <vscale x 2 x i1>)
262declare <vscale x 2 x double> @bar_uniform(<vscale x 2 x double>, i32, <vscale x 2 x i1>)
263
264attributes #0 = { "target-features"="+sve" }
265
266; Mappings
267attributes #1 = { nounwind "vector-function-abi-variant"="_ZGVsMxvu_foo(foo_uniform)" }
268attributes #2 = { nounwind "vector-function-abi-variant"="_ZGVsMxvu_bar(bar_uniform)" }
269