xref: /llvm-project/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll (revision 4ad0fdd1631eeae432714c03ede01a10dc00025d)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -O3 -S                                        | FileCheck %s
3; RUN: opt < %s -passes="default<O3>" -S | FileCheck %s
4
5; Test that IR is optimal after vectorization/unrolling/CSE/canonicalization.
6; In particular, there should be no fdivs inside loops because that is expensive.
7
8; TODO: There is a CSE opportunity to reduce the hoisted fdivs after vectorization/unrolling.
9; PR46115 - https://llvm.org/PR46115
10
11target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
12target triple = "x86_64-apple-macosx10.15.0"
13
14define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 {
15; CHECK-LABEL: @vdiv(
16; CHECK-NEXT:  entry:
17; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0
18; CHECK-NEXT:    br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
19; CHECK:       iter.check:
20; CHECK-NEXT:    [[X4:%.*]] = ptrtoint ptr [[X:%.*]] to i64
21; CHECK-NEXT:    [[Y5:%.*]] = ptrtoint ptr [[Y:%.*]] to i64
22; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64
23; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
24; CHECK-NEXT:    [[TMP0:%.*]] = sub i64 [[X4]], [[Y5]]
25; CHECK-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 128
26; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[MIN_ITERS_CHECK]], i1 true, i1 [[DIFF_CHECK]]
27; CHECK-NEXT:    br i1 [[OR_COND]], label [[FOR_BODY_PREHEADER9:%.*]], label [[VECTOR_PH:%.*]]
28; CHECK:       vector.main.loop.iter.check:
29; CHECK-NEXT:    [[MIN_ITERS_CHECK6:%.*]] = icmp ult i32 [[N]], 16
30; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK6]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH1:%.*]]
31; CHECK:       vector.ph:
32; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483632
33; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[A:%.*]], i64 0
34; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
35; CHECK-NEXT:    [[TMP1:%.*]] = fdiv fast <4 x double> splat (double 1.000000e+00), [[BROADCAST_SPLAT]]
36; CHECK-NEXT:    [[TMP2:%.*]] = fdiv fast <4 x double> splat (double 1.000000e+00), [[BROADCAST_SPLAT]]
37; CHECK-NEXT:    [[TMP3:%.*]] = fdiv fast <4 x double> splat (double 1.000000e+00), [[BROADCAST_SPLAT]]
38; CHECK-NEXT:    [[TMP4:%.*]] = fdiv fast <4 x double> splat (double 1.000000e+00), [[BROADCAST_SPLAT]]
39; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
40; CHECK:       vector.body:
41; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
42; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDEX]]
43; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 32
44; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 64
45; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 96
46; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP5]], align 8, !tbaa [[TBAA3:![0-9]+]]
47; CHECK-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x double>, ptr [[TMP6]], align 8, !tbaa [[TBAA3]]
48; CHECK-NEXT:    [[WIDE_LOAD7:%.*]] = load <4 x double>, ptr [[TMP7]], align 8, !tbaa [[TBAA3]]
49; CHECK-NEXT:    [[WIDE_LOAD8:%.*]] = load <4 x double>, ptr [[TMP8]], align 8, !tbaa [[TBAA3]]
50; CHECK-NEXT:    [[TMP9:%.*]] = fmul fast <4 x double> [[WIDE_LOAD]], [[TMP1]]
51; CHECK-NEXT:    [[TMP10:%.*]] = fmul fast <4 x double> [[WIDE_LOAD6]], [[TMP2]]
52; CHECK-NEXT:    [[TMP11:%.*]] = fmul fast <4 x double> [[WIDE_LOAD7]], [[TMP3]]
53; CHECK-NEXT:    [[TMP12:%.*]] = fmul fast <4 x double> [[WIDE_LOAD8]], [[TMP4]]
54; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDEX]]
55; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP13]], i64 32
56; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP13]], i64 64
57; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP13]], i64 96
58; CHECK-NEXT:    store <4 x double> [[TMP9]], ptr [[TMP13]], align 8, !tbaa [[TBAA3]]
59; CHECK-NEXT:    store <4 x double> [[TMP10]], ptr [[TMP14]], align 8, !tbaa [[TBAA3]]
60; CHECK-NEXT:    store <4 x double> [[TMP11]], ptr [[TMP15]], align 8, !tbaa [[TBAA3]]
61; CHECK-NEXT:    store <4 x double> [[TMP12]], ptr [[TMP16]], align 8, !tbaa [[TBAA3]]
62; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
63; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
64; CHECK-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
65; CHECK:       middle.block:
66; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]]
67; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
68; CHECK:       vec.epilog.iter.check:
69; CHECK-NEXT:    [[N_VEC_REMAINING:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 12
70; CHECK-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0
71; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[FOR_BODY_PREHEADER9]], label [[VEC_EPILOG_PH]]
72; CHECK:       vec.epilog.ph:
73; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_PH]] ]
74; CHECK-NEXT:    [[N_VEC11:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483644
75; CHECK-NEXT:    [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <4 x double> poison, double [[A]], i64 0
76; CHECK-NEXT:    [[BROADCAST_SPLAT15:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT14]], <4 x double> poison, <4 x i32> zeroinitializer
77; CHECK-NEXT:    [[TMP38:%.*]] = fdiv fast <4 x double> splat (double 1.000000e+00), [[BROADCAST_SPLAT15]]
78; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
79; CHECK:       vec.epilog.vector.body:
80; CHECK-NEXT:    [[INDEX12:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT16:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
81; CHECK-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDEX12]]
82; CHECK-NEXT:    [[WIDE_LOAD13:%.*]] = load <4 x double>, ptr [[TMP39]], align 8, !tbaa [[TBAA3]]
83; CHECK-NEXT:    [[TMP40:%.*]] = fmul fast <4 x double> [[WIDE_LOAD13]], [[TMP38]]
84; CHECK-NEXT:    [[TMP41:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDEX12]]
85; CHECK-NEXT:    store <4 x double> [[TMP40]], ptr [[TMP41]], align 8, !tbaa [[TBAA3]]
86; CHECK-NEXT:    [[INDEX_NEXT16]] = add nuw i64 [[INDEX12]], 4
87; CHECK-NEXT:    [[TMP42:%.*]] = icmp eq i64 [[INDEX_NEXT16]], [[N_VEC11]]
88; CHECK-NEXT:    br i1 [[TMP42]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
89; CHECK:       vec.epilog.middle.block:
90; CHECK-NEXT:    [[CMP_N17:%.*]] = icmp eq i64 [[N_VEC11]], [[WIDE_TRIP_COUNT]]
91; CHECK-NEXT:    br i1 [[CMP_N17]], label [[FOR_END]], label [[FOR_BODY_PREHEADER9]]
92; CHECK:       for.body.preheader:
93; CHECK-NEXT:    [[INDVARS_IV_PH:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC11]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
94; CHECK-NEXT:    [[TMP43:%.*]] = sub nsw i64 [[WIDE_TRIP_COUNT]], [[INDVARS_IV_PH]]
95; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP43]], 7
96; CHECK-NEXT:    [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0
97; CHECK-NEXT:    br i1 [[LCMP_MOD_NOT]], label [[FOR_BODY_PROL_LOOPEXIT:%.*]], label [[FOR_BODY_PROL_PREHEADER:%.*]]
98; CHECK:       for.body.prol.preheader:
99; CHECK-NEXT:    [[TMP18:%.*]] = fdiv fast double 1.000000e+00, [[A]]
100; CHECK-NEXT:    br label [[FOR_BODY_PROL:%.*]]
101; CHECK:       for.body.prol:
102; CHECK-NEXT:    [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL:%.*]], [[FOR_BODY_PROL]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PROL_PREHEADER]] ]
103; CHECK-NEXT:    [[PROL_ITER:%.*]] = phi i64 [ [[PROL_ITER_NEXT:%.*]], [[FOR_BODY_PROL]] ], [ 0, [[FOR_BODY_PROL_PREHEADER]] ]
104; CHECK-NEXT:    [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_PROL]]
105; CHECK-NEXT:    [[T0_PROL:%.*]] = load double, ptr [[ARRAYIDX_PROL]], align 8, !tbaa [[TBAA3]]
106; CHECK-NEXT:    [[TMP19:%.*]] = fmul fast double [[T0_PROL]], [[TMP18]]
107; CHECK-NEXT:    [[ARRAYIDX2_PROL:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_PROL]]
108; CHECK-NEXT:    store double [[TMP19]], ptr [[ARRAYIDX2_PROL]], align 8, !tbaa [[TBAA3]]
109; CHECK-NEXT:    [[INDVARS_IV_NEXT_PROL]] = add nuw nsw i64 [[INDVARS_IV_PROL]], 1
110; CHECK-NEXT:    [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
111; CHECK-NEXT:    [[PROL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
112; CHECK-NEXT:    br i1 [[PROL_ITER_CMP_NOT]], label [[FOR_BODY_PROL_LOOPEXIT]], label [[FOR_BODY_PROL]], !llvm.loop [[LOOP11:![0-9]+]]
113; CHECK:       for.body.prol.loopexit:
114; CHECK-NEXT:    [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER9]] ], [ [[INDVARS_IV_NEXT_PROL]], [[FOR_BODY_PROL]] ]
115; CHECK-NEXT:    [[TMP20:%.*]] = sub nsw i64 [[INDVARS_IV_PH]], [[WIDE_TRIP_COUNT]]
116; CHECK-NEXT:    [[TMP21:%.*]] = icmp ugt i64 [[TMP20]], -8
117; CHECK-NEXT:    br i1 [[TMP21]], label [[FOR_END]], label [[FOR_BODY_PREHEADER9_NEW:%.*]]
118; CHECK:       for.body.preheader.new:
119; CHECK-NEXT:    [[TMP22:%.*]] = fdiv fast double 1.000000e+00, [[A]]
120; CHECK-NEXT:    [[TMP23:%.*]] = fdiv fast double 1.000000e+00, [[A]]
121; CHECK-NEXT:    [[TMP24:%.*]] = fdiv fast double 1.000000e+00, [[A]]
122; CHECK-NEXT:    [[TMP25:%.*]] = fdiv fast double 1.000000e+00, [[A]]
123; CHECK-NEXT:    [[TMP26:%.*]] = fdiv fast double 1.000000e+00, [[A]]
124; CHECK-NEXT:    [[TMP27:%.*]] = fdiv fast double 1.000000e+00, [[A]]
125; CHECK-NEXT:    [[TMP28:%.*]] = fdiv fast double 1.000000e+00, [[A]]
126; CHECK-NEXT:    [[TMP29:%.*]] = fdiv fast double 1.000000e+00, [[A]]
127; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
128; CHECK:       for.body:
129; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], [[FOR_BODY_PREHEADER9_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY]] ]
130; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV]]
131; CHECK-NEXT:    [[T0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA3]]
132; CHECK-NEXT:    [[TMP30:%.*]] = fmul fast double [[T0]], [[TMP22]]
133; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV]]
134; CHECK-NEXT:    store double [[TMP30]], ptr [[ARRAYIDX2]], align 8, !tbaa [[TBAA3]]
135; CHECK-NEXT:    [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
136; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT]]
137; CHECK-NEXT:    [[T0_1:%.*]] = load double, ptr [[ARRAYIDX_1]], align 8, !tbaa [[TBAA3]]
138; CHECK-NEXT:    [[TMP31:%.*]] = fmul fast double [[T0_1]], [[TMP23]]
139; CHECK-NEXT:    [[ARRAYIDX2_1:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT]]
140; CHECK-NEXT:    store double [[TMP31]], ptr [[ARRAYIDX2_1]], align 8, !tbaa [[TBAA3]]
141; CHECK-NEXT:    [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
142; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_1]]
143; CHECK-NEXT:    [[T0_2:%.*]] = load double, ptr [[ARRAYIDX_2]], align 8, !tbaa [[TBAA3]]
144; CHECK-NEXT:    [[TMP32:%.*]] = fmul fast double [[T0_2]], [[TMP24]]
145; CHECK-NEXT:    [[ARRAYIDX2_2:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT_1]]
146; CHECK-NEXT:    store double [[TMP32]], ptr [[ARRAYIDX2_2]], align 8, !tbaa [[TBAA3]]
147; CHECK-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3
148; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_2]]
149; CHECK-NEXT:    [[T0_3:%.*]] = load double, ptr [[ARRAYIDX_3]], align 8, !tbaa [[TBAA3]]
150; CHECK-NEXT:    [[TMP33:%.*]] = fmul fast double [[T0_3]], [[TMP25]]
151; CHECK-NEXT:    [[ARRAYIDX2_3:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT_2]]
152; CHECK-NEXT:    store double [[TMP33]], ptr [[ARRAYIDX2_3]], align 8, !tbaa [[TBAA3]]
153; CHECK-NEXT:    [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 4
154; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_3]]
155; CHECK-NEXT:    [[T0_4:%.*]] = load double, ptr [[ARRAYIDX_4]], align 8, !tbaa [[TBAA3]]
156; CHECK-NEXT:    [[TMP34:%.*]] = fmul fast double [[T0_4]], [[TMP26]]
157; CHECK-NEXT:    [[ARRAYIDX2_4:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT_3]]
158; CHECK-NEXT:    store double [[TMP34]], ptr [[ARRAYIDX2_4]], align 8, !tbaa [[TBAA3]]
159; CHECK-NEXT:    [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 5
160; CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_4]]
161; CHECK-NEXT:    [[T0_5:%.*]] = load double, ptr [[ARRAYIDX_5]], align 8, !tbaa [[TBAA3]]
162; CHECK-NEXT:    [[TMP35:%.*]] = fmul fast double [[T0_5]], [[TMP27]]
163; CHECK-NEXT:    [[ARRAYIDX2_5:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT_4]]
164; CHECK-NEXT:    store double [[TMP35]], ptr [[ARRAYIDX2_5]], align 8, !tbaa [[TBAA3]]
165; CHECK-NEXT:    [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 6
166; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_5]]
167; CHECK-NEXT:    [[T0_6:%.*]] = load double, ptr [[ARRAYIDX_6]], align 8, !tbaa [[TBAA3]]
168; CHECK-NEXT:    [[TMP36:%.*]] = fmul fast double [[T0_6]], [[TMP28]]
169; CHECK-NEXT:    [[ARRAYIDX2_6:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT_5]]
170; CHECK-NEXT:    store double [[TMP36]], ptr [[ARRAYIDX2_6]], align 8, !tbaa [[TBAA3]]
171; CHECK-NEXT:    [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 7
172; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_6]]
173; CHECK-NEXT:    [[T0_7:%.*]] = load double, ptr [[ARRAYIDX_7]], align 8, !tbaa [[TBAA3]]
174; CHECK-NEXT:    [[TMP37:%.*]] = fmul fast double [[T0_7]], [[TMP29]]
175; CHECK-NEXT:    [[ARRAYIDX2_7:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT_6]]
176; CHECK-NEXT:    store double [[TMP37]], ptr [[ARRAYIDX2_7]], align 8, !tbaa [[TBAA3]]
177; CHECK-NEXT:    [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV]], 8
178; CHECK-NEXT:    [[EXITCOND_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_7]], [[WIDE_TRIP_COUNT]]
179; CHECK-NEXT:    br i1 [[EXITCOND_NOT_7]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
180; CHECK:       for.end:
181; CHECK-NEXT:    ret void
182;
183entry:
184  %div = fdiv fast double 1.0, %a
185  br label %for.cond
186
187for.cond:
188  %n.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
189  %cmp = icmp slt i32 %n.0, %N
190  br i1 %cmp, label %for.body, label %for.cond.cleanup
191
192for.cond.cleanup:
193  br label %for.end
194
195for.body:
196  %idxprom = sext i32 %n.0 to i64
197  %arrayidx = getelementptr inbounds double, ptr %y, i64 %idxprom
198  %t0 = load double, ptr %arrayidx, align 8, !tbaa !3
199  %mul = fmul fast double %t0, %div
200  %idxprom1 = sext i32 %n.0 to i64
201  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %idxprom1
202  store double %mul, ptr %arrayidx2, align 8, !tbaa !3
203  br label %for.inc
204
205for.inc:
206  %inc = add nsw i32 %n.0, 1
207  br label %for.cond
208
209for.end:
210  ret void
211}
212
213attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="skylake-avx512" "target-features"="+adx,+aes,+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+clflushopt,+clwb,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+pku,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "unsafe-fp-math"="true" "use-soft-float"="false" }
214
215!llvm.module.flags = !{!0, !1}
216!llvm.ident = !{!2}
217
218!0 = !{i32 1, !"wchar_size", i32 4}
219!1 = !{i32 7, !"PIC Level", i32 2}
220!2 = !{!"clang version 11.0.0 (https://github.com/llvm/llvm-project.git 45ebe38ffc40bb7221fc587bfb4481cf7f53ebbc)"}
221!3 = !{!4, !4, i64 0}
222!4 = !{!"double", !5, i64 0}
223!5 = !{!"omnipotent char", !6, i64 0}
224!6 = !{!"Simple C/C++ TBAA"}
225
226