xref: /llvm-project/llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll (revision 99d6c6d936573d209f29c3cc6749eaf59912bd2a)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=loop-vectorize -force-vector-width=4 -S | FileCheck %s
3
4; The function finds the smallest value from a float vector.
5; Check if vectorization is enabled by instruction flag `fcmp nnan`.
6
7define float @minloop(ptr nocapture readonly %arg) {
8; CHECK-LABEL: @minloop(
9; CHECK-NEXT:  top:
10; CHECK-NEXT:    [[T:%.*]] = load float, ptr [[ARG:%.*]], align 4
11; CHECK-NEXT:    br label [[LOOP:%.*]]
12; CHECK:       loop:
13; CHECK-NEXT:    [[T1:%.*]] = phi i64 [ [[T7:%.*]], [[LOOP]] ], [ 1, [[TOP:%.*]] ]
14; CHECK-NEXT:    [[T2:%.*]] = phi float [ [[T6:%.*]], [[LOOP]] ], [ [[T]], [[TOP]] ]
15; CHECK-NEXT:    [[T3:%.*]] = getelementptr float, ptr [[ARG]], i64 [[T1]]
16; CHECK-NEXT:    [[T4:%.*]] = load float, ptr [[T3]], align 4
17; CHECK-NEXT:    [[T5:%.*]] = fcmp nnan olt float [[T2]], [[T4]]
18; CHECK-NEXT:    [[T6]] = select i1 [[T5]], float [[T2]], float [[T4]]
19; CHECK-NEXT:    [[T7]] = add i64 [[T1]], 1
20; CHECK-NEXT:    [[T8:%.*]] = icmp eq i64 [[T7]], 65537
21; CHECK-NEXT:    br i1 [[T8]], label [[OUT:%.*]], label [[LOOP]]
22; CHECK:       out:
23; CHECK-NEXT:    [[T6_LCSSA:%.*]] = phi float [ [[T6]], [[LOOP]] ]
24; CHECK-NEXT:    ret float [[T6_LCSSA]]
25;
26top:
27  %t = load float, ptr %arg
28  br label %loop
29
30loop:                                             ; preds = %loop, %top
31  %t1 = phi i64 [ %t7, %loop ], [ 1, %top ]
32  %t2 = phi float [ %t6, %loop ], [ %t, %top ]
33  %t3 = getelementptr float, ptr %arg, i64 %t1
34  %t4 = load float, ptr %t3, align 4
35  %t5 = fcmp nnan olt float %t2, %t4
36  %t6 = select i1 %t5, float %t2, float %t4
37  %t7 = add i64 %t1, 1
38  %t8 = icmp eq i64 %t7, 65537
39  br i1 %t8, label %out, label %loop
40
41out:                                              ; preds = %loop
42  ret float %t6
43}
44
45; Check if vectorization is still enabled by function attribute.
46
47define float @minloopattr(ptr nocapture readonly %arg) #0 {
48; CHECK-LABEL: @minloopattr(
49; CHECK-NEXT:  top:
50; CHECK-NEXT:    [[T:%.*]] = load float, ptr [[ARG:%.*]], align 4
51; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
52; CHECK:       vector.ph:
53; CHECK-NEXT:    [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[T]], i64 0
54; CHECK-NEXT:    [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x float> [[MINMAX_IDENT_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
55; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
56; CHECK:       vector.body:
57; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
58; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
59; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
60; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
61; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr float, ptr [[ARG]], i64 [[TMP0]]
62; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr float, ptr [[TMP1]], i32 0
63; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
64; CHECK-NEXT:    [[TMP3:%.*]] = fcmp olt <4 x float> [[VEC_PHI]], [[WIDE_LOAD]]
65; CHECK-NEXT:    [[TMP4]] = select <4 x i1> [[TMP3]], <4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD]]
66; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
67; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 65536
68; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
69; CHECK:       middle.block:
70; CHECK-NEXT:    [[TMP6:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[TMP4]])
71; CHECK-NEXT:    br i1 true, label [[OUT:%.*]], label [[SCALAR_PH]]
72; CHECK:       scalar.ph:
73; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 65537, [[MIDDLE_BLOCK]] ], [ 1, [[TOP:%.*]] ]
74; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ [[T]], [[TOP]] ]
75; CHECK-NEXT:    br label [[LOOP:%.*]]
76; CHECK:       loop:
77; CHECK-NEXT:    [[T1:%.*]] = phi i64 [ [[T7:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
78; CHECK-NEXT:    [[T2:%.*]] = phi float [ [[T6:%.*]], [[LOOP]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
79; CHECK-NEXT:    [[T3:%.*]] = getelementptr float, ptr [[ARG]], i64 [[T1]]
80; CHECK-NEXT:    [[T4:%.*]] = load float, ptr [[T3]], align 4
81; CHECK-NEXT:    [[T5:%.*]] = fcmp olt float [[T2]], [[T4]]
82; CHECK-NEXT:    [[T6]] = select i1 [[T5]], float [[T2]], float [[T4]]
83; CHECK-NEXT:    [[T7]] = add i64 [[T1]], 1
84; CHECK-NEXT:    [[T8:%.*]] = icmp eq i64 [[T7]], 65537
85; CHECK-NEXT:    br i1 [[T8]], label [[OUT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
86; CHECK:       out:
87; CHECK-NEXT:    [[T6_LCSSA:%.*]] = phi float [ [[T6]], [[LOOP]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
88; CHECK-NEXT:    ret float [[T6_LCSSA]]
89;
90top:
91  %t = load float, ptr %arg
92  br label %loop
93
94loop:                                             ; preds = %loop, %top
95  %t1 = phi i64 [ %t7, %loop ], [ 1, %top ]
96  %t2 = phi float [ %t6, %loop ], [ %t, %top ]
97  %t3 = getelementptr float, ptr %arg, i64 %t1
98  %t4 = load float, ptr %t3, align 4
99  %t5 = fcmp olt float %t2, %t4
100  %t6 = select i1 %t5, float %t2, float %t4
101  %t7 = add i64 %t1, 1
102  %t8 = icmp eq i64 %t7, 65537
103  br i1 %t8, label %out, label %loop
104
105out:                                              ; preds = %loop
106  ret float %t6
107}
108
109; Check if vectorization is prevented without the flag or attribute.
110
111define float @minloopnovec(ptr nocapture readonly %arg) {
112; CHECK-LABEL: @minloopnovec(
113; CHECK-NEXT:  top:
114; CHECK-NEXT:    [[T:%.*]] = load float, ptr [[ARG:%.*]], align 4
115; CHECK-NEXT:    br label [[LOOP:%.*]]
116; CHECK:       loop:
117; CHECK-NEXT:    [[T1:%.*]] = phi i64 [ [[T7:%.*]], [[LOOP]] ], [ 1, [[TOP:%.*]] ]
118; CHECK-NEXT:    [[T2:%.*]] = phi float [ [[T6:%.*]], [[LOOP]] ], [ [[T]], [[TOP]] ]
119; CHECK-NEXT:    [[T3:%.*]] = getelementptr float, ptr [[ARG]], i64 [[T1]]
120; CHECK-NEXT:    [[T4:%.*]] = load float, ptr [[T3]], align 4
121; CHECK-NEXT:    [[T5:%.*]] = fcmp olt float [[T2]], [[T4]]
122; CHECK-NEXT:    [[T6]] = select i1 [[T5]], float [[T2]], float [[T4]]
123; CHECK-NEXT:    [[T7]] = add i64 [[T1]], 1
124; CHECK-NEXT:    [[T8:%.*]] = icmp eq i64 [[T7]], 65537
125; CHECK-NEXT:    br i1 [[T8]], label [[OUT:%.*]], label [[LOOP]]
126; CHECK:       out:
127; CHECK-NEXT:    [[T6_LCSSA:%.*]] = phi float [ [[T6]], [[LOOP]] ]
128; CHECK-NEXT:    ret float [[T6_LCSSA]]
129;
130top:
131  %t = load float, ptr %arg
132  br label %loop
133
134loop:                                             ; preds = %loop, %top
135  %t1 = phi i64 [ %t7, %loop ], [ 1, %top ]
136  %t2 = phi float [ %t6, %loop ], [ %t, %top ]
137  %t3 = getelementptr float, ptr %arg, i64 %t1
138  %t4 = load float, ptr %t3, align 4
139  %t5 = fcmp olt float %t2, %t4
140  %t6 = select i1 %t5, float %t2, float %t4
141  %t7 = add i64 %t1, 1
142  %t8 = icmp eq i64 %t7, 65537
143  br i1 %t8, label %out, label %loop
144
145out:                                              ; preds = %loop
146  ret float %t6
147}
148
149; This test is checking that we don't vectorize when only one of the required attributes is set.
150; Note that this test should not vectorize even after switching to IR-level FMF.
151define float @minloopmissingnsz(ptr nocapture readonly %arg) #1 {
152; CHECK-LABEL: @minloopmissingnsz(
153; CHECK-NEXT:  top:
154; CHECK-NEXT:    [[T:%.*]] = load float, ptr [[ARG:%.*]], align 4
155; CHECK-NEXT:    br label [[LOOP:%.*]]
156; CHECK:       loop:
157; CHECK-NEXT:    [[T1:%.*]] = phi i64 [ [[T7:%.*]], [[LOOP]] ], [ 1, [[TOP:%.*]] ]
158; CHECK-NEXT:    [[T2:%.*]] = phi float [ [[T6:%.*]], [[LOOP]] ], [ [[T]], [[TOP]] ]
159; CHECK-NEXT:    [[T3:%.*]] = getelementptr float, ptr [[ARG]], i64 [[T1]]
160; CHECK-NEXT:    [[T4:%.*]] = load float, ptr [[T3]], align 4
161; CHECK-NEXT:    [[T5:%.*]] = fcmp olt float [[T2]], [[T4]]
162; CHECK-NEXT:    [[T6]] = select i1 [[T5]], float [[T2]], float [[T4]]
163; CHECK-NEXT:    [[T7]] = add i64 [[T1]], 1
164; CHECK-NEXT:    [[T8:%.*]] = icmp eq i64 [[T7]], 65537
165; CHECK-NEXT:    br i1 [[T8]], label [[OUT:%.*]], label [[LOOP]]
166; CHECK:       out:
167; CHECK-NEXT:    [[T6_LCSSA:%.*]] = phi float [ [[T6]], [[LOOP]] ]
168; CHECK-NEXT:    ret float [[T6_LCSSA]]
169;
170top:
171  %t = load float, ptr %arg
172  br label %loop
173
174loop:                                             ; preds = %loop, %top
175  %t1 = phi i64 [ %t7, %loop ], [ 1, %top ]
176  %t2 = phi float [ %t6, %loop ], [ %t, %top ]
177  %t3 = getelementptr float, ptr %arg, i64 %t1
178  %t4 = load float, ptr %t3, align 4
179  %t5 = fcmp olt float %t2, %t4
180  %t6 = select i1 %t5, float %t2, float %t4
181  %t7 = add i64 %t1, 1
182  %t8 = icmp eq i64 %t7, 65537
183  br i1 %t8, label %out, label %loop
184
185out:                                              ; preds = %loop
186  ret float %t6
187}
188
189; This would assert on FMF propagation.
190
191define void @not_a_min_max() {
192; CHECK-LABEL: @not_a_min_max(
193; CHECK-NEXT:  entry:
194; CHECK-NEXT:    br label [[LOOP:%.*]]
195; CHECK:       loop:
196; CHECK-NEXT:    [[F9_S0_V0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[LOOP]] ]
197; CHECK-NEXT:    [[T14:%.*]] = icmp eq i32 [[F9_S0_V0]], 5
198; CHECK-NEXT:    [[T15:%.*]] = select reassoc nnan ninf nsz contract afn i1 [[T14]], float 0x36A0000000000000, float 0.000000e+00
199; CHECK-NEXT:    [[ADD]] = add nuw nsw i32 [[F9_S0_V0]], 1
200; CHECK-NEXT:    br i1 false, label [[END:%.*]], label [[LOOP]]
201; CHECK:       end:
202; CHECK-NEXT:    ret void
203;
204entry:
205  br label %loop
206
207loop:
208  %f9.s0.v0 = phi i32 [ 0, %entry ], [ %add, %loop ]
209  %t14 = icmp eq i32 %f9.s0.v0, 5
210  %t15 = select reassoc nnan ninf nsz contract afn i1 %t14, float 0x36A0000000000000, float 0.0
211  %add = add nuw nsw i32 %f9.s0.v0, 1
212  br i1 false, label %end, label %loop
213
214end:
215  ret void
216}
217
218attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" }
219attributes #1 = { "no-nans-fp-math"="true" }
220