xref: /llvm-project/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll (revision 29441e4f5fa5f5c7709f7cf180815ba97f611297)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2; RUN: opt -passes="default<O3>" -S < %s  | FileCheck %s
3
4target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
5target triple = "aarch64"
6
7define nofpclass(nan inf) double @monte_simple(i32 noundef %nblocks, i32 noundef %RAND_BLOCK_LENGTH, ptr noundef %samples, double noundef nofpclass(nan inf) %Y, double noundef nofpclass(nan inf) %Z) {
8; CHECK-LABEL: define nofpclass(nan inf) double @monte_simple(
9; CHECK-SAME: i32 noundef [[NBLOCKS:%.*]], i32 noundef [[RAND_BLOCK_LENGTH:%.*]], ptr noundef readonly captures(none) [[SAMPLES:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
10; CHECK-NEXT:  [[ENTRY:.*]]:
11; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[RAND_BLOCK_LENGTH]], 0
12; CHECK-NEXT:    br i1 [[CMP8]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]]
13; CHECK:       [[FOR_BODY_PREHEADER]]:
14; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[RAND_BLOCK_LENGTH]] to i64
15; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[RAND_BLOCK_LENGTH]], 8
16; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY_PREHEADER23:.*]], label %[[VECTOR_PH:.*]]
17; CHECK:       [[VECTOR_PH]]:
18; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483640
19; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[Y]], i64 0
20; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
21; CHECK-NEXT:    [[BROADCAST_SPLATINSERT19:%.*]] = insertelement <4 x double> poison, double [[Z]], i64 0
22; CHECK-NEXT:    [[BROADCAST_SPLAT20:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT19]], <4 x double> poison, <4 x i32> zeroinitializer
23; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
24; CHECK:       [[VECTOR_BODY]]:
25; CHECK-NEXT:    [[INDVARS_IV1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
26; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x double> [ <double 0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ]
27; CHECK-NEXT:    [[VEC_PHI15:%.*]] = phi <4 x double> [ splat (double -0.000000e+00), %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
28; CHECK-NEXT:    [[VEC_PHI16:%.*]] = phi <4 x double> [ <double 0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ]
29; CHECK-NEXT:    [[VEC_PHI17:%.*]] = phi <4 x double> [ splat (double -0.000000e+00), %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
30; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw float, ptr [[SAMPLES]], i64 [[INDVARS_IV1]]
31; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX1]], i64 16
32; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[ARRAYIDX1]], align 4
33; CHECK-NEXT:    [[WIDE_LOAD18:%.*]] = load <4 x float>, ptr [[TMP23]], align 4
34; CHECK-NEXT:    [[TMP2:%.*]] = fpext <4 x float> [[WIDE_LOAD]] to <4 x double>
35; CHECK-NEXT:    [[TMP3:%.*]] = fpext <4 x float> [[WIDE_LOAD18]] to <4 x double>
36; CHECK-NEXT:    [[TMP4:%.*]] = fmul fast <4 x double> [[BROADCAST_SPLAT]], [[TMP2]]
37; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast <4 x double> [[BROADCAST_SPLAT]], [[TMP3]]
38; CHECK-NEXT:    [[TMP6:%.*]] = fsub fast <4 x double> [[TMP4]], [[BROADCAST_SPLAT20]]
39; CHECK-NEXT:    [[TMP7:%.*]] = fsub fast <4 x double> [[TMP5]], [[BROADCAST_SPLAT20]]
40; CHECK-NEXT:    [[TMP8:%.*]] = fcmp fast ogt <4 x double> [[TMP6]], zeroinitializer
41; CHECK-NEXT:    [[TMP9:%.*]] = fcmp fast ogt <4 x double> [[TMP7]], zeroinitializer
42; CHECK-NEXT:    [[TMP10:%.*]] = fmul fast <4 x double> [[TMP6]], [[TMP6]]
43; CHECK-NEXT:    [[TMP11:%.*]] = fmul fast <4 x double> [[TMP7]], [[TMP7]]
44; CHECK-NEXT:    [[TMP12:%.*]] = tail call fast <4 x double> @llvm.maxnum.v4f64(<4 x double> [[TMP6]], <4 x double> splat (double -0.000000e+00))
45; CHECK-NEXT:    [[TMP13:%.*]] = tail call fast <4 x double> @llvm.maxnum.v4f64(<4 x double> [[TMP7]], <4 x double> splat (double -0.000000e+00))
46; CHECK-NEXT:    [[TMP14]] = fadd reassoc arcp contract afn <4 x double> [[VEC_PHI16]], [[TMP12]]
47; CHECK-NEXT:    [[TMP15]] = fadd reassoc arcp contract afn <4 x double> [[VEC_PHI17]], [[TMP13]]
48; CHECK-NEXT:    [[TMP16:%.*]] = select <4 x i1> [[TMP8]], <4 x double> [[TMP10]], <4 x double> splat (double -0.000000e+00)
49; CHECK-NEXT:    [[TMP17:%.*]] = select <4 x i1> [[TMP9]], <4 x double> [[TMP11]], <4 x double> splat (double -0.000000e+00)
50; CHECK-NEXT:    [[TMP18]] = fadd reassoc arcp contract afn <4 x double> [[VEC_PHI]], [[TMP16]]
51; CHECK-NEXT:    [[TMP19]] = fadd reassoc arcp contract afn <4 x double> [[VEC_PHI15]], [[TMP17]]
52; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDVARS_IV1]], 8
53; CHECK-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
54; CHECK-NEXT:    br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
55; CHECK:       [[MIDDLE_BLOCK]]:
56; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd reassoc arcp contract afn <4 x double> [[TMP19]], [[TMP18]]
57; CHECK-NEXT:    [[TMP21:%.*]] = tail call reassoc arcp contract afn double @llvm.vector.reduce.fadd.v4f64(double -0.000000e+00, <4 x double> [[BIN_RDX]])
58; CHECK-NEXT:    [[BIN_RDX21:%.*]] = fadd reassoc arcp contract afn <4 x double> [[TMP15]], [[TMP14]]
59; CHECK-NEXT:    [[TMP22:%.*]] = tail call reassoc arcp contract afn double @llvm.vector.reduce.fadd.v4f64(double -0.000000e+00, <4 x double> [[BIN_RDX21]])
60; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]]
61; CHECK-NEXT:    br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY_PREHEADER23]]
62; CHECK:       [[FOR_BODY_PREHEADER23]]:
63; CHECK-NEXT:    [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ]
64; CHECK-NEXT:    [[V1_012_PH:%.*]] = phi double [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[TMP21]], %[[MIDDLE_BLOCK]] ]
65; CHECK-NEXT:    [[V0_011_PH:%.*]] = phi double [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[TMP22]], %[[MIDDLE_BLOCK]] ]
66; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
67; CHECK:       [[FOR_BODY]]:
68; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[INDVARS_IV_PH]], %[[FOR_BODY_PREHEADER23]] ]
69; CHECK-NEXT:    [[V1_012:%.*]] = phi double [ [[V1_2:%.*]], %[[FOR_BODY]] ], [ [[V1_012_PH]], %[[FOR_BODY_PREHEADER23]] ]
70; CHECK-NEXT:    [[V0_011:%.*]] = phi double [ [[V0_2:%.*]], %[[FOR_BODY]] ], [ [[V0_011_PH]], %[[FOR_BODY_PREHEADER23]] ]
71; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[SAMPLES]], i64 [[INDVARS_IV]]
72; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
73; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[TMP0]] to double
74; CHECK-NEXT:    [[MUL:%.*]] = fmul fast double [[Y]], [[CONV]]
75; CHECK-NEXT:    [[SUB:%.*]] = fsub fast double [[MUL]], [[Z]]
76; CHECK-NEXT:    [[CMP1:%.*]] = fcmp fast ogt double [[SUB]], 0.000000e+00
77; CHECK-NEXT:    [[MUL3:%.*]] = fmul fast double [[SUB]], [[SUB]]
78; CHECK-NEXT:    [[ADD8:%.*]] = tail call fast double @llvm.maxnum.f64(double [[SUB]], double -0.000000e+00)
79; CHECK-NEXT:    [[V0_2]] = fadd reassoc arcp contract afn double [[V0_011]], [[ADD8]]
80; CHECK-NEXT:    [[ADD4:%.*]] = select i1 [[CMP1]], double [[MUL3]], double -0.000000e+00
81; CHECK-NEXT:    [[V1_2]] = fadd reassoc arcp contract afn double [[V1_012]], [[ADD4]]
82; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
83; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
84; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
85; CHECK:       [[FOR_END_LOOPEXIT]]:
86; CHECK-NEXT:    [[V0_1:%.*]] = phi double [ [[TMP22]], %[[MIDDLE_BLOCK]] ], [ [[V0_2]], %[[FOR_BODY]] ]
87; CHECK-NEXT:    [[V1_1:%.*]] = phi double [ [[TMP21]], %[[MIDDLE_BLOCK]] ], [ [[V1_2]], %[[FOR_BODY]] ]
88; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast double [[V1_1]], [[V0_1]]
89; CHECK-NEXT:    br label %[[FOR_END]]
90; CHECK:       [[FOR_END]]:
91; CHECK-NEXT:    [[ADD5:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP1]], %[[FOR_END_LOOPEXIT]] ]
92; CHECK-NEXT:    ret double [[ADD5]]
93;
94entry:
95  %nblocks.addr = alloca i32, align 4
96  %RAND_BLOCK_LENGTH.addr = alloca i32, align 4
97  %samples.addr = alloca ptr, align 8
98  %Y.addr = alloca double, align 8
99  %Z.addr = alloca double, align 8
100  %i = alloca i32, align 4
101  %block = alloca i32, align 4
102  %rngVal = alloca double, align 8
103  %callValue = alloca double, align 8
104  %v0 = alloca double, align 8
105  %v1 = alloca double, align 8
106  store i32 %nblocks, ptr %nblocks.addr, align 4
107  store i32 %RAND_BLOCK_LENGTH, ptr %RAND_BLOCK_LENGTH.addr, align 4
108  store ptr %samples, ptr %samples.addr, align 8
109  store double %Y, ptr %Y.addr, align 8
110  store double %Z, ptr %Z.addr, align 8
111  call void @llvm.lifetime.start.p0(i64 4, ptr %i) #2
112  call void @llvm.lifetime.start.p0(i64 4, ptr %block) #2
113  call void @llvm.lifetime.start.p0(i64 8, ptr %rngVal) #2
114  call void @llvm.lifetime.start.p0(i64 8, ptr %callValue) #2
115  call void @llvm.lifetime.start.p0(i64 8, ptr %v0) #2
116  store double 0.000000e+00, ptr %v0, align 8
117  call void @llvm.lifetime.start.p0(i64 8, ptr %v1) #2
118  store double 0.000000e+00, ptr %v1, align 8
119  store i32 0, ptr %i, align 4
120  br label %for.cond
121
122for.cond:                                         ; preds = %for.inc, %entry
123  %0 = load i32, ptr %i, align 4
124  %1 = load i32, ptr %RAND_BLOCK_LENGTH.addr, align 4
125  %cmp = icmp slt i32 %0, %1
126  br i1 %cmp, label %for.body, label %for.end
127
128for.body:                                         ; preds = %for.cond
129  %2 = load ptr, ptr %samples.addr, align 8
130  %3 = load i32, ptr %i, align 4
131  %idxprom = sext i32 %3 to i64
132  %arrayidx = getelementptr inbounds float, ptr %2, i64 %idxprom
133  %4 = load float, ptr %arrayidx, align 4
134  %conv = fpext float %4 to double
135  store double %conv, ptr %rngVal, align 8
136  %5 = load double, ptr %Y.addr, align 8
137  %6 = load double, ptr %rngVal, align 8
138  %mul = fmul fast double %5, %6
139  %7 = load double, ptr %Z.addr, align 8
140  %sub = fsub fast double %mul, %7
141  store double %sub, ptr %callValue, align 8
142  %8 = load double, ptr %callValue, align 8
143  %cmp1 = fcmp fast ogt double %8, 0.000000e+00
144  br i1 %cmp1, label %if.then, label %if.end
145
146if.then:                                          ; preds = %for.body
147  %9 = load double, ptr %callValue, align 8
148  %10 = load double, ptr %v0, align 8
149  %add = fadd fast double %10, %9
150  store double %add, ptr %v0, align 8
151  %11 = load double, ptr %callValue, align 8
152  %12 = load double, ptr %callValue, align 8
153  %mul3 = fmul fast double %11, %12
154  %13 = load double, ptr %v1, align 8
155  %add4 = fadd fast double %13, %mul3
156  store double %add4, ptr %v1, align 8
157  br label %if.end
158
159if.end:                                           ; preds = %if.then, %for.body
160  br label %for.inc
161
162for.inc:                                          ; preds = %if.end
163  %14 = load i32, ptr %i, align 4
164  %inc = add nsw i32 %14, 1
165  store i32 %inc, ptr %i, align 4
166  br label %for.cond
167
168for.end:                                          ; preds = %for.cond
169  %15 = load double, ptr %v0, align 8
170  %16 = load double, ptr %v1, align 8
171  %add5 = fadd fast double %15, %16
172  call void @llvm.lifetime.end.p0(i64 8, ptr %v1) #2
173  call void @llvm.lifetime.end.p0(i64 8, ptr %v0) #2
174  call void @llvm.lifetime.end.p0(i64 8, ptr %callValue) #2
175  call void @llvm.lifetime.end.p0(i64 8, ptr %rngVal) #2
176  call void @llvm.lifetime.end.p0(i64 4, ptr %block) #2
177  call void @llvm.lifetime.end.p0(i64 4, ptr %i) #2
178  ret double %add5
179}
180
181define nofpclass(nan inf) double @monte_exp(i32 noundef %nblocks, i32 noundef %RAND_BLOCK_LENGTH, ptr noundef %samples, double noundef nofpclass(nan inf) %Y, double noundef nofpclass(nan inf) %Z) {
182; CHECK-LABEL: define nofpclass(nan inf) double @monte_exp(
183; CHECK-SAME: i32 noundef [[NBLOCKS:%.*]], i32 noundef [[RAND_BLOCK_LENGTH:%.*]], ptr noundef [[SAMPLES:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]]) local_unnamed_addr {
184; CHECK-NEXT:  [[ENTRY:.*]]:
185; CHECK-NEXT:    [[CMP16:%.*]] = icmp sgt i32 [[NBLOCKS]], 0
186; CHECK-NEXT:    br i1 [[CMP16]], label %[[FOR_BODY_LR_PH:.*]], label %[[FOR_END10:.*]]
187; CHECK:       [[FOR_BODY_LR_PH]]:
188; CHECK-NEXT:    [[CMP211:%.*]] = icmp sgt i32 [[RAND_BLOCK_LENGTH]], 0
189; CHECK-NEXT:    br i1 [[CMP211]], label %[[FOR_BODY_US_PREHEADER:.*]], label %[[FOR_BODY:.*]]
190; CHECK:       [[FOR_BODY_US_PREHEADER]]:
191; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[RAND_BLOCK_LENGTH]] to i64
192; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[RAND_BLOCK_LENGTH]], 8
193; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483640
194; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[Y]], i64 0
195; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
196; CHECK-NEXT:    [[BROADCAST_SPLATINSERT35:%.*]] = insertelement <4 x double> poison, double [[Z]], i64 0
197; CHECK-NEXT:    [[BROADCAST_SPLAT36:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT35]], <4 x double> poison, <4 x i32> zeroinitializer
198; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]]
199; CHECK-NEXT:    br label %[[FOR_BODY_US:.*]]
200; CHECK:       [[FOR_BODY_US]]:
201; CHECK-NEXT:    [[V1_021_US:%.*]] = phi double [ [[V1_2_US_LCSSA:%.*]], %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US:.*]] ], [ 0.000000e+00, %[[FOR_BODY_US_PREHEADER]] ]
202; CHECK-NEXT:    [[V0_020_US:%.*]] = phi double [ [[V0_2_US_LCSSA:%.*]], %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US]] ], [ 0.000000e+00, %[[FOR_BODY_US_PREHEADER]] ]
203; CHECK-NEXT:    [[BLOCK_017_US:%.*]] = phi i32 [ [[INC9_US:%.*]], %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US]] ], [ 0, %[[FOR_BODY_US_PREHEADER]] ]
204; CHECK-NEXT:    tail call void @resample(i32 noundef [[RAND_BLOCK_LENGTH]], ptr noundef [[SAMPLES]])
205; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY3_US_PREHEADER:.*]], label %[[VECTOR_PH:.*]]
206; CHECK:       [[VECTOR_PH]]:
207; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x double> <double poison, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, double [[V1_021_US]], i64 0
208; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x double> <double poison, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, double [[V0_020_US]], i64 0
209; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
210; CHECK:       [[VECTOR_BODY]]:
211; CHECK-NEXT:    [[INDVARS_IV1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
212; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x double> [ [[TMP2]], %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ]
213; CHECK-NEXT:    [[VEC_PHI31:%.*]] = phi <4 x double> [ splat (double -0.000000e+00), %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ]
214; CHECK-NEXT:    [[VEC_PHI32:%.*]] = phi <4 x double> [ [[TMP27]], %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ]
215; CHECK-NEXT:    [[VEC_PHI33:%.*]] = phi <4 x double> [ splat (double -0.000000e+00), %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
216; CHECK-NEXT:    [[ARRAYIDX_US1:%.*]] = getelementptr inbounds nuw float, ptr [[SAMPLES]], i64 [[INDVARS_IV1]]
217; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_US1]], i64 16
218; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[ARRAYIDX_US1]], align 4
219; CHECK-NEXT:    [[WIDE_LOAD34:%.*]] = load <4 x float>, ptr [[TMP3]], align 4
220; CHECK-NEXT:    [[TMP4:%.*]] = fpext <4 x float> [[WIDE_LOAD]] to <4 x double>
221; CHECK-NEXT:    [[TMP5:%.*]] = fpext <4 x float> [[WIDE_LOAD34]] to <4 x double>
222; CHECK-NEXT:    [[TMP6:%.*]] = tail call fast <4 x double> @llvm.exp2.v4f64(<4 x double> [[TMP4]])
223; CHECK-NEXT:    [[TMP7:%.*]] = tail call fast <4 x double> @llvm.exp2.v4f64(<4 x double> [[TMP5]])
224; CHECK-NEXT:    [[TMP8:%.*]] = fmul fast <4 x double> [[TMP6]], [[BROADCAST_SPLAT]]
225; CHECK-NEXT:    [[TMP9:%.*]] = fmul fast <4 x double> [[TMP7]], [[BROADCAST_SPLAT]]
226; CHECK-NEXT:    [[TMP10:%.*]] = fsub fast <4 x double> [[TMP8]], [[BROADCAST_SPLAT36]]
227; CHECK-NEXT:    [[TMP11:%.*]] = fsub fast <4 x double> [[TMP9]], [[BROADCAST_SPLAT36]]
228; CHECK-NEXT:    [[TMP12:%.*]] = fcmp fast ogt <4 x double> [[TMP10]], zeroinitializer
229; CHECK-NEXT:    [[TMP13:%.*]] = fcmp fast ogt <4 x double> [[TMP11]], zeroinitializer
230; CHECK-NEXT:    [[TMP14:%.*]] = fmul fast <4 x double> [[TMP10]], [[TMP10]]
231; CHECK-NEXT:    [[TMP15:%.*]] = fmul fast <4 x double> [[TMP11]], [[TMP11]]
232; CHECK-NEXT:    [[TMP16:%.*]] = tail call fast <4 x double> @llvm.maxnum.v4f64(<4 x double> [[TMP10]], <4 x double> splat (double -0.000000e+00))
233; CHECK-NEXT:    [[TMP17:%.*]] = tail call fast <4 x double> @llvm.maxnum.v4f64(<4 x double> [[TMP11]], <4 x double> splat (double -0.000000e+00))
234; CHECK-NEXT:    [[TMP18]] = fadd reassoc arcp contract afn <4 x double> [[VEC_PHI32]], [[TMP16]]
235; CHECK-NEXT:    [[TMP19]] = fadd reassoc arcp contract afn <4 x double> [[VEC_PHI33]], [[TMP17]]
236; CHECK-NEXT:    [[TMP20:%.*]] = select <4 x i1> [[TMP12]], <4 x double> [[TMP14]], <4 x double> splat (double -0.000000e+00)
237; CHECK-NEXT:    [[TMP21:%.*]] = select <4 x i1> [[TMP13]], <4 x double> [[TMP15]], <4 x double> splat (double -0.000000e+00)
238; CHECK-NEXT:    [[TMP22]] = fadd reassoc arcp contract afn <4 x double> [[VEC_PHI]], [[TMP20]]
239; CHECK-NEXT:    [[TMP23]] = fadd reassoc arcp contract afn <4 x double> [[VEC_PHI31]], [[TMP21]]
240; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDVARS_IV1]], 8
241; CHECK-NEXT:    [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
242; CHECK-NEXT:    br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
243; CHECK:       [[MIDDLE_BLOCK]]:
244; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd reassoc arcp contract afn <4 x double> [[TMP23]], [[TMP22]]
245; CHECK-NEXT:    [[TMP25:%.*]] = tail call reassoc arcp contract afn double @llvm.vector.reduce.fadd.v4f64(double -0.000000e+00, <4 x double> [[BIN_RDX]])
246; CHECK-NEXT:    [[BIN_RDX37:%.*]] = fadd reassoc arcp contract afn <4 x double> [[TMP19]], [[TMP18]]
247; CHECK-NEXT:    [[TMP26:%.*]] = tail call reassoc arcp contract afn double @llvm.vector.reduce.fadd.v4f64(double -0.000000e+00, <4 x double> [[BIN_RDX37]])
248; CHECK-NEXT:    br i1 [[CMP_N]], label %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US]], label %[[FOR_BODY3_US_PREHEADER]]
249; CHECK:       [[FOR_BODY3_US_PREHEADER]]:
250; CHECK-NEXT:    [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, %[[FOR_BODY_US]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ]
251; CHECK-NEXT:    [[V1_116_US_PH:%.*]] = phi double [ [[V1_021_US]], %[[FOR_BODY_US]] ], [ [[TMP25]], %[[MIDDLE_BLOCK]] ]
252; CHECK-NEXT:    [[V0_115_US_PH:%.*]] = phi double [ [[V0_020_US]], %[[FOR_BODY_US]] ], [ [[TMP26]], %[[MIDDLE_BLOCK]] ]
253; CHECK-NEXT:    br label %[[FOR_BODY3_US:.*]]
254; CHECK:       [[FOR_BODY3_US]]:
255; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY3_US]] ], [ [[INDVARS_IV_PH]], %[[FOR_BODY3_US_PREHEADER]] ]
256; CHECK-NEXT:    [[V1_116_US:%.*]] = phi double [ [[V1_2_US:%.*]], %[[FOR_BODY3_US]] ], [ [[V1_116_US_PH]], %[[FOR_BODY3_US_PREHEADER]] ]
257; CHECK-NEXT:    [[V0_115_US:%.*]] = phi double [ [[V0_2_US:%.*]], %[[FOR_BODY3_US]] ], [ [[V0_115_US_PH]], %[[FOR_BODY3_US_PREHEADER]] ]
258; CHECK-NEXT:    [[ARRAYIDX_US:%.*]] = getelementptr inbounds nuw float, ptr [[SAMPLES]], i64 [[INDVARS_IV]]
259; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX_US]], align 4
260; CHECK-NEXT:    [[CONV_US:%.*]] = fpext float [[TMP0]] to double
261; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast double @llvm.exp2.f64(double [[CONV_US]])
262; CHECK-NEXT:    [[MUL_US:%.*]] = fmul fast double [[TMP1]], [[Y]]
263; CHECK-NEXT:    [[SUB_US:%.*]] = fsub fast double [[MUL_US]], [[Z]]
264; CHECK-NEXT:    [[CMP4_US:%.*]] = fcmp fast ogt double [[SUB_US]], 0.000000e+00
265; CHECK-NEXT:    [[ADD7_US:%.*]] = fmul fast double [[SUB_US]], [[SUB_US]]
266; CHECK-NEXT:    [[ADD12_US:%.*]] = tail call fast double @llvm.maxnum.f64(double [[SUB_US]], double -0.000000e+00)
267; CHECK-NEXT:    [[V0_2_US]] = fadd reassoc arcp contract afn double [[V0_115_US]], [[ADD12_US]]
268; CHECK-NEXT:    [[ADD7_US1:%.*]] = select i1 [[CMP4_US]], double [[ADD7_US]], double -0.000000e+00
269; CHECK-NEXT:    [[V1_2_US]] = fadd reassoc arcp contract afn double [[V1_116_US]], [[ADD7_US1]]
270; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
271; CHECK-NEXT:    [[EXITCOND25_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
272; CHECK-NEXT:    br i1 [[EXITCOND25_NOT]], label %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US]], label %[[FOR_BODY3_US]], !llvm.loop [[LOOP5:![0-9]+]]
273; CHECK:       [[FOR_COND1_FOR_INC8_CRIT_EDGE_US]]:
274; CHECK-NEXT:    [[V0_2_US_LCSSA]] = phi double [ [[TMP26]], %[[MIDDLE_BLOCK]] ], [ [[V0_2_US]], %[[FOR_BODY3_US]] ]
275; CHECK-NEXT:    [[V1_2_US_LCSSA]] = phi double [ [[TMP25]], %[[MIDDLE_BLOCK]] ], [ [[V1_2_US]], %[[FOR_BODY3_US]] ]
276; CHECK-NEXT:    [[INC9_US]] = add nuw nsw i32 [[BLOCK_017_US]], 1
277; CHECK-NEXT:    [[EXITCOND26_NOT:%.*]] = icmp eq i32 [[INC9_US]], [[NBLOCKS]]
278; CHECK-NEXT:    br i1 [[EXITCOND26_NOT]], label %[[FOR_END10]], label %[[FOR_BODY_US]]
279; CHECK:       [[FOR_BODY]]:
280; CHECK-NEXT:    [[BLOCK_017:%.*]] = phi i32 [ [[INC9:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_LR_PH]] ]
281; CHECK-NEXT:    tail call void @resample(i32 noundef [[RAND_BLOCK_LENGTH]], ptr noundef [[SAMPLES]])
282; CHECK-NEXT:    [[INC9]] = add nuw nsw i32 [[BLOCK_017]], 1
283; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC9]], [[NBLOCKS]]
284; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_END10]], label %[[FOR_BODY]]
285; CHECK:       [[FOR_END10]]:
286; CHECK-NEXT:    [[V0_0_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[V0_2_US_LCSSA]], %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US]] ], [ 0.000000e+00, %[[FOR_BODY]] ]
287; CHECK-NEXT:    [[V1_0_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[V1_2_US_LCSSA]], %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US]] ], [ 0.000000e+00, %[[FOR_BODY]] ]
288; CHECK-NEXT:    [[ADD11:%.*]] = fadd fast double [[V1_0_LCSSA]], [[V0_0_LCSSA]]
289; CHECK-NEXT:    ret double [[ADD11]]
290;
291entry:
292  %nblocks.addr = alloca i32, align 4
293  %RAND_BLOCK_LENGTH.addr = alloca i32, align 4
294  %samples.addr = alloca ptr, align 8
295  %Y.addr = alloca double, align 8
296  %Z.addr = alloca double, align 8
297  %i = alloca i32, align 4
298  %block = alloca i32, align 4
299  %rngVal = alloca double, align 8
300  %callValue = alloca double, align 8
301  %v0 = alloca double, align 8
302  %v1 = alloca double, align 8
303  store i32 %nblocks, ptr %nblocks.addr, align 4
304  store i32 %RAND_BLOCK_LENGTH, ptr %RAND_BLOCK_LENGTH.addr, align 4
305  store ptr %samples, ptr %samples.addr, align 8
306  store double %Y, ptr %Y.addr, align 8
307  store double %Z, ptr %Z.addr, align 8
308  call void @llvm.lifetime.start.p0(i64 4, ptr %i) #4
309  call void @llvm.lifetime.start.p0(i64 4, ptr %block) #4
310  call void @llvm.lifetime.start.p0(i64 8, ptr %rngVal) #4
311  call void @llvm.lifetime.start.p0(i64 8, ptr %callValue) #4
312  call void @llvm.lifetime.start.p0(i64 8, ptr %v0) #4
313  store double 0.000000e+00, ptr %v0, align 8
314  call void @llvm.lifetime.start.p0(i64 8, ptr %v1) #4
315  store double 0.000000e+00, ptr %v1, align 8
316  store i32 0, ptr %block, align 4
317  br label %for.cond
318
319for.cond:                                         ; preds = %for.inc8, %entry
320  %0 = load i32, ptr %block, align 4
321  %1 = load i32, ptr %nblocks.addr, align 4
322  %cmp = icmp slt i32 %0, %1
323  br i1 %cmp, label %for.body, label %for.end10
324
325for.body:                                         ; preds = %for.cond
326  %2 = load i32, ptr %RAND_BLOCK_LENGTH.addr, align 4
327  %3 = load ptr, ptr %samples.addr, align 8
328  call void @resample(i32 noundef %2, ptr noundef %3)
329  store i32 0, ptr %i, align 4
330  br label %for.cond1
331
332for.cond1:                                        ; preds = %for.inc, %for.body
333  %4 = load i32, ptr %i, align 4
334  %5 = load i32, ptr %RAND_BLOCK_LENGTH.addr, align 4
335  %cmp2 = icmp slt i32 %4, %5
336  br i1 %cmp2, label %for.body3, label %for.end
337
338for.body3:                                        ; preds = %for.cond1
339  %6 = load ptr, ptr %samples.addr, align 8
340  %7 = load i32, ptr %i, align 4
341  %idxprom = sext i32 %7 to i64
342  %arrayidx = getelementptr inbounds float, ptr %6, i64 %idxprom
343  %8 = load float, ptr %arrayidx, align 4
344  %conv = fpext float %8 to double
345  store double %conv, ptr %rngVal, align 8
346  %9 = load double, ptr %Y.addr, align 8
347  %10 = load double, ptr %rngVal, align 8
348  %11 = call fast double @llvm.exp2.f64(double %10)
349  %mul = fmul fast double %9, %11
350  %12 = load double, ptr %Z.addr, align 8
351  %sub = fsub fast double %mul, %12
352  store double %sub, ptr %callValue, align 8
353  %13 = load double, ptr %callValue, align 8
354  %cmp4 = fcmp fast ogt double %13, 0.000000e+00
355  br i1 %cmp4, label %if.then, label %if.end
356
357if.then:                                          ; preds = %for.body3
358  %14 = load double, ptr %callValue, align 8
359  %15 = load double, ptr %v0, align 8
360  %add = fadd fast double %15, %14
361  store double %add, ptr %v0, align 8
362  %16 = load double, ptr %callValue, align 8
363  %17 = load double, ptr %callValue, align 8
364  %mul6 = fmul fast double %16, %17
365  %18 = load double, ptr %v1, align 8
366  %add7 = fadd fast double %18, %mul6
367  store double %add7, ptr %v1, align 8
368  br label %if.end
369
370if.end:                                           ; preds = %if.then, %for.body3
371  br label %for.inc
372
373for.inc:                                          ; preds = %if.end
374  %19 = load i32, ptr %i, align 4
375  %inc = add nsw i32 %19, 1
376  store i32 %inc, ptr %i, align 4
377  br label %for.cond1
378
379for.end:                                          ; preds = %for.cond1
380  br label %for.inc8
381
382for.inc8:                                         ; preds = %for.end
383  %20 = load i32, ptr %block, align 4
384  %inc9 = add nsw i32 %20, 1
385  store i32 %inc9, ptr %block, align 4
386  br label %for.cond
387
388for.end10:                                        ; preds = %for.cond
389  %21 = load double, ptr %v0, align 8
390  %22 = load double, ptr %v1, align 8
391  %add11 = fadd fast double %21, %22
392  call void @llvm.lifetime.end.p0(i64 8, ptr %v1) #4
393  call void @llvm.lifetime.end.p0(i64 8, ptr %v0) #4
394  call void @llvm.lifetime.end.p0(i64 8, ptr %callValue) #4
395  call void @llvm.lifetime.end.p0(i64 8, ptr %rngVal) #4
396  call void @llvm.lifetime.end.p0(i64 4, ptr %block) #4
397  call void @llvm.lifetime.end.p0(i64 4, ptr %i) #4
398  ret double %add11
399}
400
401declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
402declare void @resample(i32 noundef, ptr noundef)
403declare double @llvm.exp2.f64(double)
404declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
405;.
406; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
407; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
408; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
409; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
410; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
411; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
412;.
413