1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -passes="default<O3>" -S %s | FileCheck %s
3
4target triple = "arm64-apple-darwin"
5
6; Make sure we can vectorize a loop that uses a function to clamp a double to
7; be between a given minimum and maximum value.
8
9define internal double @clamp(double %v) {
10entry:
11  %retval = alloca double, align 8
12  %v.addr = alloca double, align 8
13  store double %v, ptr %v.addr, align 8
14  %0 = load double, ptr %v.addr, align 8
15  %cmp = fcmp olt double %0, 0.000000e+00
16  br i1 %cmp, label %if.then, label %if.end
17
18if.then:                                          ; preds = %entry
19  store double 0.000000e+00, ptr %retval, align 8
20  br label %return
21
22if.end:                                           ; preds = %entry
23  %1 = load double, ptr %v.addr, align 8
24  %cmp1 = fcmp ogt double %1, 6.000000e+00
25  br i1 %cmp1, label %if.then2, label %if.end3
26
27if.then2:                                         ; preds = %if.end
28  store double 6.000000e+00, ptr %retval, align 8
29  br label %return
30
31if.end3:                                          ; preds = %if.end
32  %2 = load double, ptr %v.addr, align 8
33  store double %2, ptr %retval, align 8
34  br label %return
35
36return:                                           ; preds = %if.end3, %if.then2, %if.then
37  %3 = load double, ptr %retval, align 8
38  ret double %3
39}
40
41define void @loop(ptr %X, ptr %Y) {
42; CHECK-LABEL: @loop(
43; CHECK-NEXT:  entry:
44; CHECK-NEXT:    [[X6:%.*]] = ptrtoint ptr [[X:%.*]] to i64
45; CHECK-NEXT:    [[Y7:%.*]] = ptrtoint ptr [[Y:%.*]] to i64
46; CHECK-NEXT:    [[TMP0:%.*]] = sub i64 [[X6]], [[Y7]]
47; CHECK-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32
48; CHECK-NEXT:    br i1 [[DIFF_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY:%.*]]
49; CHECK:       vector.body:
50; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
51; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDEX]]
52; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
53; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP1]], align 8
54; CHECK-NEXT:    [[WIDE_LOAD8:%.*]] = load <2 x double>, ptr [[TMP2]], align 8
55; CHECK-NEXT:    [[TMP3:%.*]] = fcmp olt <2 x double> [[WIDE_LOAD]], zeroinitializer
56; CHECK-NEXT:    [[TMP4:%.*]] = fcmp olt <2 x double> [[WIDE_LOAD8]], zeroinitializer
57; CHECK-NEXT:    [[TMP5:%.*]] = fcmp ogt <2 x double> [[WIDE_LOAD]], splat (double 6.000000e+00)
58; CHECK-NEXT:    [[TMP6:%.*]] = fcmp ogt <2 x double> [[WIDE_LOAD8]], splat (double 6.000000e+00)
59; CHECK-NEXT:    [[TMP7:%.*]] = select <2 x i1> [[TMP5]], <2 x double> splat (double 6.000000e+00), <2 x double> [[WIDE_LOAD]]
60; CHECK-NEXT:    [[TMP8:%.*]] = select <2 x i1> [[TMP6]], <2 x double> splat (double 6.000000e+00), <2 x double> [[WIDE_LOAD8]]
61; CHECK-NEXT:    [[TMP9:%.*]] = select <2 x i1> [[TMP3]], <2 x double> zeroinitializer, <2 x double> [[TMP7]]
62; CHECK-NEXT:    [[TMP10:%.*]] = select <2 x i1> [[TMP4]], <2 x double> zeroinitializer, <2 x double> [[TMP8]]
63; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDEX]]
64; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP11]], i64 16
65; CHECK-NEXT:    store <2 x double> [[TMP9]], ptr [[TMP11]], align 8
66; CHECK-NEXT:    store <2 x double> [[TMP10]], ptr [[TMP12]], align 8
67; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
68; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000
69; CHECK-NEXT:    br i1 [[TMP13]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
70; CHECK:       for.cond.cleanup:
71; CHECK-NEXT:    ret void
72; CHECK:       for.body:
73; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
74; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV]]
75; CHECK-NEXT:    [[TMP14:%.*]] = load double, ptr [[ARRAYIDX]], align 8
76; CHECK-NEXT:    [[CMP_I:%.*]] = fcmp olt double [[TMP14]], 0.000000e+00
77; CHECK-NEXT:    [[CMP1_I:%.*]] = fcmp ogt double [[TMP14]], 6.000000e+00
78; CHECK-NEXT:    [[DOTV_I:%.*]] = select i1 [[CMP1_I]], double 6.000000e+00, double [[TMP14]]
79; CHECK-NEXT:    [[RETVAL_0_I:%.*]] = select i1 [[CMP_I]], double 0.000000e+00, double [[DOTV_I]]
80; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV]]
81; CHECK-NEXT:    store double [[RETVAL_0_I]], ptr [[ARRAYIDX2]], align 8
82; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
83; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 20000
84; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
85;
86entry:
87  %X.addr = alloca ptr, align 8
88  %Y.addr = alloca ptr, align 8
89  %i = alloca i32, align 4
90  store ptr %X, ptr %X.addr, align 8
91  store ptr %Y, ptr %Y.addr, align 8
92  call void @llvm.lifetime.start.p0(i64 4, ptr %i) #2
93  store i32 0, ptr %i, align 4
94  br label %for.cond
95
96for.cond:                                         ; preds = %for.inc, %entry
97  %0 = load i32, ptr %i, align 4
98  %cmp = icmp ult i32 %0, 20000
99  br i1 %cmp, label %for.body, label %for.cond.cleanup
100
101for.cond.cleanup:                                 ; preds = %for.cond
102  call void @llvm.lifetime.end.p0(i64 4, ptr %i) #2
103  br label %for.end
104
105for.body:                                         ; preds = %for.cond
106  %1 = load ptr, ptr %Y.addr, align 8
107  %2 = load i32, ptr %i, align 4
108  %idxprom = zext i32 %2 to i64
109  %arrayidx = getelementptr inbounds double, ptr %1, i64 %idxprom
110  %3 = load double, ptr %arrayidx, align 8
111  %call = call double @clamp(double %3)
112  %4 = load ptr, ptr %X.addr, align 8
113  %5 = load i32, ptr %i, align 4
114  %idxprom1 = zext i32 %5 to i64
115  %arrayidx2 = getelementptr inbounds double, ptr %4, i64 %idxprom1
116  store double %call, ptr %arrayidx2, align 8
117  br label %for.inc
118
119for.inc:                                          ; preds = %for.body
120  %6 = load i32, ptr %i, align 4
121  %inc = add i32 %6, 1
122  store i32 %inc, ptr %i, align 4
123  br label %for.cond
124
125for.end:                                          ; preds = %for.cond.cleanup
126  ret void
127}
128
129; Test that requires sinking/hoisting of instructions for vectorization.
130
131define void @loop2(ptr %A, ptr %B, ptr %C, float %x) {
132; CHECK-LABEL: @loop2(
133; CHECK-NEXT:  entry:
134; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 40000
135; CHECK-NEXT:    [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 40000
136; CHECK-NEXT:    [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 40000
137; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[B]], [[SCEVGEP2]]
138; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[C]], [[SCEVGEP]]
139; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
140; CHECK-NEXT:    [[BOUND04:%.*]] = icmp ult ptr [[B]], [[SCEVGEP3]]
141; CHECK-NEXT:    [[BOUND15:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]]
142; CHECK-NEXT:    [[FOUND_CONFLICT6:%.*]] = and i1 [[BOUND04]], [[BOUND15]]
143; CHECK-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT6]]
144; CHECK-NEXT:    br i1 [[CONFLICT_RDX]], label [[LOOP_BODY:%.*]], label [[VECTOR_PH:%.*]]
145; CHECK:       vector.ph:
146; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i64 0
147; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
148; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
149; CHECK:       vector.body:
150; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
151; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i32, ptr [[C]], i64 [[INDEX]]
152; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16
153; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4, !alias.scope [[META4:![0-9]+]]
154; CHECK-NEXT:    [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !alias.scope [[META4]]
155; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 20)
156; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD7]], splat (i32 20)
157; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX]]
158; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 16
159; CHECK-NEXT:    [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP4]], align 4, !alias.scope [[META7:![0-9]+]]
160; CHECK-NEXT:    [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP5]], align 4, !alias.scope [[META7]]
161; CHECK-NEXT:    [[TMP6:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD8]]
162; CHECK-NEXT:    [[TMP7:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD9]]
163; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr float, ptr [[B]], i64 [[INDEX]]
164; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i64 16
165; CHECK-NEXT:    [[WIDE_LOAD10:%.*]] = load <4 x float>, ptr [[TMP8]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META11:![0-9]+]]
166; CHECK-NEXT:    [[WIDE_LOAD11:%.*]] = load <4 x float>, ptr [[TMP9]], align 4, !alias.scope [[META9]], !noalias [[META11]]
167; CHECK-NEXT:    [[TMP10:%.*]] = fadd <4 x float> [[TMP6]], [[WIDE_LOAD10]]
168; CHECK-NEXT:    [[TMP11:%.*]] = fadd <4 x float> [[TMP7]], [[WIDE_LOAD11]]
169; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x float> [[TMP6]], <4 x float> [[TMP10]]
170; CHECK-NEXT:    [[PREDPHI12:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP7]], <4 x float> [[TMP11]]
171; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP8]], i64 16
172; CHECK-NEXT:    store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4, !alias.scope [[META9]], !noalias [[META11]]
173; CHECK-NEXT:    store <4 x float> [[PREDPHI12]], ptr [[TMP12]], align 4, !alias.scope [[META9]], !noalias [[META11]]
174; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
175; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
176; CHECK-NEXT:    br i1 [[TMP13]], label [[EXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
177; CHECK:       loop.body:
178; CHECK-NEXT:    [[IV1:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
179; CHECK-NEXT:    [[C_GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[C]], i64 [[IV1]]
180; CHECK-NEXT:    [[C_LV:%.*]] = load i32, ptr [[C_GEP]], align 4
181; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C_LV]], 20
182; CHECK-NEXT:    [[A_GEP_0:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV1]]
183; CHECK-NEXT:    [[A_LV_0:%.*]] = load float, ptr [[A_GEP_0]], align 4
184; CHECK-NEXT:    [[MUL2_I81_I:%.*]] = fmul float [[X]], [[A_LV_0]]
185; CHECK-NEXT:    [[B_GEP_0:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[IV1]]
186; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP_LATCH]], label [[ELSE:%.*]]
187; CHECK:       else:
188; CHECK-NEXT:    [[B_LV:%.*]] = load float, ptr [[B_GEP_0]], align 4
189; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[MUL2_I81_I]], [[B_LV]]
190; CHECK-NEXT:    br label [[LOOP_LATCH]]
191; CHECK:       loop.latch:
192; CHECK-NEXT:    [[ADD_SINK:%.*]] = phi float [ [[ADD]], [[ELSE]] ], [ [[MUL2_I81_I]], [[LOOP_BODY]] ]
193; CHECK-NEXT:    store float [[ADD_SINK]], ptr [[B_GEP_0]], align 4
194; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1
195; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 10000
196; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
197; CHECK:       exit:
198; CHECK-NEXT:    ret void
199;
200entry:
201  br label %loop.header
202
203loop.header:
204  %iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ]
205  %cmp.0 = icmp ult i64 %iv, 10000
206  br i1 %cmp.0, label %loop.body, label %exit
207
208loop.body:
209  %C.gep = getelementptr inbounds i32, ptr %C, i64 %iv
210  %C.lv = load i32, ptr %C.gep
211  %cmp = icmp eq i32 %C.lv, 20
212  br i1 %cmp, label %then, label %else
213
214then:
215  %A.gep.0 = getelementptr inbounds float, ptr %A, i64 %iv
216  %A.lv.0 = load float, ptr %A.gep.0, align 4
217  %mul2.i81.i = fmul float %A.lv.0, %x
218  %B.gep.0 = getelementptr inbounds float, ptr %B, i64 %iv
219  store float %mul2.i81.i, ptr %B.gep.0, align 4
220  br label %loop.latch
221
222else:
223  %A.gep.1 = getelementptr inbounds float, ptr %A, i64 %iv
224  %A.lv.1 = load float, ptr %A.gep.1, align 4
225  %mul2 = fmul float %A.lv.1, %x
226  %B.gep.1 = getelementptr inbounds float, ptr %B, i64 %iv
227  %B.lv = load float, ptr %B.gep.1, align 4
228  %add = fadd float %mul2, %B.lv
229  store float %add, ptr %B.gep.1, align 4
230  br label %loop.latch
231
232loop.latch:
233  %iv.next = add nuw nsw i64 %iv, 1
234  br label %loop.header
235
236exit:
237  ret void
238}
239
240declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
241
242declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
243