xref: /llvm-project/llvm/test/Transforms/InstCombine/fast-math.ll (revision 38fffa630ee80163dc65e759392ad29798905679)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=instcombine -S | FileCheck %s
3
4; testing-case "float fold(float a) { return 1.2f * a * 2.3f; }"
5; 1.2f and 2.3f is supposed to be fold.
6define float @fold(float %a) {
7; CHECK-LABEL: @fold(
8; CHECK-NEXT:    [[MUL1:%.*]] = fmul fast float [[A:%.*]], 0x4006147AE0000000
9; CHECK-NEXT:    ret float [[MUL1]]
10;
11  %mul = fmul fast float %a, 0x3FF3333340000000
12  %mul1 = fmul fast float %mul, 0x4002666660000000
13  ret float %mul1
14}
15
16; Same testing-case as the one used in fold() except that the operators have
17; fixed FP mode.
18define float @notfold(float %a) {
19; CHECK-LABEL: @notfold(
20; CHECK-NEXT:    [[MUL:%.*]] = fmul fast float [[A:%.*]], 0x3FF3333340000000
21; CHECK-NEXT:    [[MUL1:%.*]] = fmul float [[MUL]], 0x4002666660000000
22; CHECK-NEXT:    ret float [[MUL1]]
23;
24  %mul = fmul fast float %a, 0x3FF3333340000000
25  %mul1 = fmul float %mul, 0x4002666660000000
26  ret float %mul1
27}
28
29define float @fold2(float %a) {
30; CHECK-LABEL: @fold2(
31; CHECK-NEXT:    [[MUL1:%.*]] = fmul fast float [[A:%.*]], 0x4006147AE0000000
32; CHECK-NEXT:    ret float [[MUL1]]
33;
34  %mul = fmul float %a, 0x3FF3333340000000
35  %mul1 = fmul fast float %mul, 0x4002666660000000
36  ret float %mul1
37}
38
39; C * f1 + f1 = (C+1) * f1
40; TODO: The particular case where C is 2 (so the folded result is 3.0*f1) is
41; always safe, and so doesn't need any FMF.
42; That is, (x + x + x) and (3*x) each have only a single rounding.
43define double @fold3(double %f1) {
44; CHECK-LABEL: @fold3(
45; CHECK-NEXT:    [[T2:%.*]] = fmul fast double [[F1:%.*]], 6.000000e+00
46; CHECK-NEXT:    ret double [[T2]]
47;
48  %t1 = fmul fast double 5.000000e+00, %f1
49  %t2 = fadd fast double %f1, %t1
50  ret double %t2
51}
52
53; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
54define double @fold3_reassoc_nsz(double %f1) {
55; CHECK-LABEL: @fold3_reassoc_nsz(
56; CHECK-NEXT:    [[T2:%.*]] = fmul reassoc nsz double [[F1:%.*]], 6.000000e+00
57; CHECK-NEXT:    ret double [[T2]]
58;
59  %t1 = fmul reassoc nsz double 5.000000e+00, %f1
60  %t2 = fadd reassoc nsz double %f1, %t1
61  ret double %t2
62}
63
64; TODO: This doesn't require 'nsz'.  It should fold to f1 * 6.0.
65define double @fold3_reassoc(double %f1) {
66; CHECK-LABEL: @fold3_reassoc(
67; CHECK-NEXT:    [[T1:%.*]] = fmul reassoc double [[F1:%.*]], 5.000000e+00
68; CHECK-NEXT:    [[T2:%.*]] = fadd reassoc double [[F1]], [[T1]]
69; CHECK-NEXT:    ret double [[T2]]
70;
71  %t1 = fmul reassoc double 5.000000e+00, %f1
72  %t2 = fadd reassoc double %f1, %t1
73  ret double %t2
74}
75
76; (C1 - X) + (C2 - Y) => (C1+C2) - (X + Y)
77define float @fold4(float %f1, float %f2) {
78; CHECK-LABEL: @fold4(
79; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast float [[F1:%.*]], [[F2:%.*]]
80; CHECK-NEXT:    [[ADD:%.*]] = fsub fast float 9.000000e+00, [[TMP1]]
81; CHECK-NEXT:    ret float [[ADD]]
82;
83  %sub = fsub float 4.000000e+00, %f1
84  %sub1 = fsub float 5.000000e+00, %f2
85  %add = fadd fast float %sub, %sub1
86  ret float %add
87}
88
89; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
90define float @fold4_reassoc_nsz(float %f1, float %f2) {
91; CHECK-LABEL: @fold4_reassoc_nsz(
92; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc nsz float [[F1:%.*]], [[F2:%.*]]
93; CHECK-NEXT:    [[ADD:%.*]] = fsub reassoc nsz float 9.000000e+00, [[TMP1]]
94; CHECK-NEXT:    ret float [[ADD]]
95;
96  %sub = fsub float 4.000000e+00, %f1
97  %sub1 = fsub float 5.000000e+00, %f2
98  %add = fadd reassoc nsz float %sub, %sub1
99  ret float %add
100}
101
102; TODO: This doesn't require 'nsz'.  It should fold to (9.0 - (f1 + f2)).
103define float @fold4_reassoc(float %f1, float %f2) {
104; CHECK-LABEL: @fold4_reassoc(
105; CHECK-NEXT:    [[SUB:%.*]] = fsub float 4.000000e+00, [[F1:%.*]]
106; CHECK-NEXT:    [[SUB1:%.*]] = fsub float 5.000000e+00, [[F2:%.*]]
107; CHECK-NEXT:    [[ADD:%.*]] = fadd reassoc float [[SUB]], [[SUB1]]
108; CHECK-NEXT:    ret float [[ADD]]
109;
110  %sub = fsub float 4.000000e+00, %f1
111  %sub1 = fsub float 5.000000e+00, %f2
112  %add = fadd reassoc float %sub, %sub1
113  ret float %add
114}
115
116; (X + C1) + C2 => X + (C1 + C2)
117define float @fold5(float %f1) {
118; CHECK-LABEL: @fold5(
119; CHECK-NEXT:    [[ADD1:%.*]] = fadd fast float [[F1:%.*]], 9.000000e+00
120; CHECK-NEXT:    ret float [[ADD1]]
121;
122  %add = fadd float %f1, 4.000000e+00
123  %add1 = fadd fast float %add, 5.000000e+00
124  ret float %add1
125}
126
127; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
128define float @fold5_reassoc_nsz(float %f1) {
129; CHECK-LABEL: @fold5_reassoc_nsz(
130; CHECK-NEXT:    [[ADD1:%.*]] = fadd reassoc nsz float [[F1:%.*]], 9.000000e+00
131; CHECK-NEXT:    ret float [[ADD1]]
132;
133  %add = fadd float %f1, 4.000000e+00
134  %add1 = fadd reassoc nsz float %add, 5.000000e+00
135  ret float %add1
136}
137
138; TODO: This doesn't require 'nsz'.  It should fold to f1 + 9.0
139define float @fold5_reassoc(float %f1) {
140; CHECK-LABEL: @fold5_reassoc(
141; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[F1:%.*]], 4.000000e+00
142; CHECK-NEXT:    [[ADD1:%.*]] = fadd reassoc float [[ADD]], 5.000000e+00
143; CHECK-NEXT:    ret float [[ADD1]]
144;
145  %add = fadd float %f1, 4.000000e+00
146  %add1 = fadd reassoc float %add, 5.000000e+00
147  ret float %add1
148}
149
150; (X + X) + X + X => 4.0 * X
151define float @fold6(float %f1) {
152; CHECK-LABEL: @fold6(
153; CHECK-NEXT:    [[T3:%.*]] = fmul fast float [[F1:%.*]], 4.000000e+00
154; CHECK-NEXT:    ret float [[T3]]
155;
156  %t1 = fadd fast float %f1, %f1
157  %t2 = fadd fast float %f1, %t1
158  %t3 = fadd fast float %t2, %f1
159  ret float %t3
160}
161
162; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
163define float @fold6_reassoc_nsz(float %f1) {
164; CHECK-LABEL: @fold6_reassoc_nsz(
165; CHECK-NEXT:    [[T3:%.*]] = fmul reassoc nsz float [[F1:%.*]], 4.000000e+00
166; CHECK-NEXT:    ret float [[T3]]
167;
168  %t1 = fadd reassoc nsz float %f1, %f1
169  %t2 = fadd reassoc nsz float %f1, %t1
170  %t3 = fadd reassoc nsz float %t2, %f1
171  ret float %t3
172}
173
174; TODO: This doesn't require 'nsz'.  It should fold to f1 * 4.0.
175define float @fold6_reassoc(float %f1) {
176; CHECK-LABEL: @fold6_reassoc(
177; CHECK-NEXT:    [[T1:%.*]] = fadd reassoc float [[F1:%.*]], [[F1]]
178; CHECK-NEXT:    [[T2:%.*]] = fadd reassoc float [[F1]], [[T1]]
179; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc float [[T2]], [[F1]]
180; CHECK-NEXT:    ret float [[T3]]
181;
182  %t1 = fadd reassoc float %f1, %f1
183  %t2 = fadd reassoc float %f1, %t1
184  %t3 = fadd reassoc float %t2, %f1
185  ret float %t3
186}
187
188; C1 * X + (X + X) = (C1 + 2) * X
189define float @fold7(float %f1) {
190; CHECK-LABEL: @fold7(
191; CHECK-NEXT:    [[T3:%.*]] = fmul fast float [[F1:%.*]], 7.000000e+00
192; CHECK-NEXT:    ret float [[T3]]
193;
194  %t1 = fmul fast float %f1, 5.000000e+00
195  %t2 = fadd fast float %f1, %f1
196  %t3 = fadd fast float %t1, %t2
197  ret float %t3
198}
199
200; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
201define float @fold7_reassoc_nsz(float %f1) {
202; CHECK-LABEL: @fold7_reassoc_nsz(
203; CHECK-NEXT:    [[T3:%.*]] = fmul reassoc nsz float [[F1:%.*]], 7.000000e+00
204; CHECK-NEXT:    ret float [[T3]]
205;
206  %t1 = fmul reassoc nsz float %f1, 5.000000e+00
207  %t2 = fadd reassoc nsz float %f1, %f1
208  %t3 = fadd reassoc nsz float %t1, %t2
209  ret float %t3
210}
211
212; TODO: This doesn't require 'nsz'.  It should fold to f1 * 7.0.
213define float @fold7_reassoc(float %f1) {
214; CHECK-LABEL: @fold7_reassoc(
215; CHECK-NEXT:    [[T1:%.*]] = fmul reassoc float [[F1:%.*]], 5.000000e+00
216; CHECK-NEXT:    [[T2:%.*]] = fadd reassoc float [[F1]], [[F1]]
217; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc float [[T1]], [[T2]]
218; CHECK-NEXT:    ret float [[T3]]
219;
220  %t1 = fmul reassoc float %f1, 5.000000e+00
221  %t2 = fadd reassoc float %f1, %f1
222  %t3 = fadd reassoc float %t1, %t2
223  ret float %t3
224}
225
226; (X + X) + (X + X) + X => 5.0 * X
227define float @fold8(float %f1) {
228; CHECK-LABEL: @fold8(
229; CHECK-NEXT:    [[T4:%.*]] = fmul fast float [[F1:%.*]], 5.000000e+00
230; CHECK-NEXT:    ret float [[T4]]
231;
232  %t1 = fadd fast float %f1, %f1
233  %t2 = fadd fast float %f1, %f1
234  %t3 = fadd fast float %t1, %t2
235  %t4 = fadd fast float %t3, %f1
236  ret float %t4
237}
238
239; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
240define float @fold8_reassoc_nsz(float %f1) {
241; CHECK-LABEL: @fold8_reassoc_nsz(
242; CHECK-NEXT:    [[T4:%.*]] = fmul reassoc nsz float [[F1:%.*]], 5.000000e+00
243; CHECK-NEXT:    ret float [[T4]]
244;
245  %t1 = fadd reassoc nsz float %f1, %f1
246  %t2 = fadd reassoc nsz float %f1, %f1
247  %t3 = fadd reassoc nsz float %t1, %t2
248  %t4 = fadd reassoc nsz float %t3, %f1
249  ret float %t4
250}
251
252; TODO: This doesn't require 'nsz'.  It should fold to f1 * 5.0.
253define float @fold8_reassoc(float %f1) {
254; CHECK-LABEL: @fold8_reassoc(
255; CHECK-NEXT:    [[T1:%.*]] = fadd reassoc float [[F1:%.*]], [[F1]]
256; CHECK-NEXT:    [[T2:%.*]] = fadd reassoc float [[F1]], [[F1]]
257; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc float [[T1]], [[T2]]
258; CHECK-NEXT:    [[T4:%.*]] = fadd reassoc float [[T3]], [[F1]]
259; CHECK-NEXT:    ret float [[T4]]
260;
261  %t1 = fadd reassoc float %f1, %f1
262  %t2 = fadd reassoc float %f1, %f1
263  %t3 = fadd reassoc float %t1, %t2
264  %t4 = fadd reassoc float %t3, %f1
265  ret float %t4
266}
267
268; Y - (X + Y) --> -X
269
270define float @fsub_fadd_common_op_fneg(float %x, float %y) {
271; CHECK-LABEL: @fsub_fadd_common_op_fneg(
272; CHECK-NEXT:    [[R:%.*]] = fneg fast float [[X:%.*]]
273; CHECK-NEXT:    ret float [[R]]
274;
275  %a = fadd float %x, %y
276  %r = fsub fast float %y, %a
277  ret float %r
278}
279
280; Y - (X + Y) --> -X
281; Check again with 'reassoc' and 'nsz'.
282; nsz is required because: 0.0 - (0.0 + 0.0) -> 0.0, not -0.0
283
284define float @fsub_fadd_common_op_fneg_reassoc_nsz(float %x, float %y) {
285; CHECK-LABEL: @fsub_fadd_common_op_fneg_reassoc_nsz(
286; CHECK-NEXT:    [[R:%.*]] = fneg reassoc nsz float [[X:%.*]]
287; CHECK-NEXT:    ret float [[R]]
288;
289  %a = fadd float %x, %y
290  %r = fsub reassoc nsz float %y, %a
291  ret float %r
292}
293
294; Y - (X + Y) --> -X
295
296define <2 x float> @fsub_fadd_common_op_fneg_vec(<2 x float> %x, <2 x float> %y) {
297; CHECK-LABEL: @fsub_fadd_common_op_fneg_vec(
298; CHECK-NEXT:    [[R:%.*]] = fneg reassoc nsz <2 x float> [[X:%.*]]
299; CHECK-NEXT:    ret <2 x float> [[R]]
300;
301  %a = fadd <2 x float> %x, %y
302  %r = fsub nsz reassoc <2 x float> %y, %a
303  ret <2 x float> %r
304}
305
306; Y - (Y + X) --> -X
307; Commute operands of the 'add'.
308
309define float @fsub_fadd_common_op_fneg_commute(float %x, float %y) {
310; CHECK-LABEL: @fsub_fadd_common_op_fneg_commute(
311; CHECK-NEXT:    [[R:%.*]] = fneg reassoc nsz float [[X:%.*]]
312; CHECK-NEXT:    ret float [[R]]
313;
314  %a = fadd float %y, %x
315  %r = fsub reassoc nsz float %y, %a
316  ret float %r
317}
318
319; Y - (Y + X) --> -X
320
321define <2 x float> @fsub_fadd_common_op_fneg_commute_vec(<2 x float> %x, <2 x float> %y) {
322; CHECK-LABEL: @fsub_fadd_common_op_fneg_commute_vec(
323; CHECK-NEXT:    [[R:%.*]] = fneg reassoc nsz <2 x float> [[X:%.*]]
324; CHECK-NEXT:    ret <2 x float> [[R]]
325;
326  %a = fadd <2 x float> %y, %x
327  %r = fsub reassoc nsz <2 x float> %y, %a
328  ret <2 x float> %r
329}
330
331; (Y - X) - Y --> -X
332; nsz is required because: (0.0 - 0.0) - 0.0 -> 0.0, not -0.0
333
334define float @fsub_fsub_common_op_fneg(float %x, float %y) {
335; CHECK-LABEL: @fsub_fsub_common_op_fneg(
336; CHECK-NEXT:    [[R:%.*]] = fneg reassoc nsz float [[X:%.*]]
337; CHECK-NEXT:    ret float [[R]]
338;
339  %s = fsub float %y, %x
340  %r = fsub reassoc nsz float %s, %y
341  ret float %r
342}
343
344; (Y - X) - Y --> -X
345
346define <2 x float> @fsub_fsub_common_op_fneg_vec(<2 x float> %x, <2 x float> %y) {
347; CHECK-LABEL: @fsub_fsub_common_op_fneg_vec(
348; CHECK-NEXT:    [[R:%.*]] = fneg reassoc nsz <2 x float> [[X:%.*]]
349; CHECK-NEXT:    ret <2 x float> [[R]]
350;
351  %s = fsub <2 x float> %y, %x
352  %r = fsub reassoc nsz <2 x float> %s, %y
353  ret <2 x float> %r
354}
355
356; TODO: This doesn't require 'nsz'.  It should fold to 0 - f2
357define float @fold9_reassoc(float %f1, float %f2) {
358; CHECK-LABEL: @fold9_reassoc(
359; CHECK-NEXT:    [[T1:%.*]] = fadd float [[F1:%.*]], [[F2:%.*]]
360; CHECK-NEXT:    [[T3:%.*]] = fsub reassoc float [[F1]], [[T1]]
361; CHECK-NEXT:    ret float [[T3]]
362;
363  %t1 = fadd float %f1, %f2
364  %t3 = fsub reassoc float %f1, %t1
365  ret float %t3
366}
367
368; Let C3 = C1 + C2. (f1 + C1) + (f2 + C2) => (f1 + f2) + C3 instead of
369; "(f1 + C3) + f2" or "(f2 + C3) + f1". Placing constant-addend at the
370; top of resulting simplified expression tree may potentially reveal some
371; optimization opportunities in the super-expression trees.
372;
373define float @fold10(float %f1, float %f2) {
374; CHECK-LABEL: @fold10(
375; CHECK-NEXT:    [[T2:%.*]] = fadd fast float [[F1:%.*]], [[F2:%.*]]
376; CHECK-NEXT:    [[T3:%.*]] = fadd fast float [[T2]], -1.000000e+00
377; CHECK-NEXT:    ret float [[T3]]
378;
379  %t1 = fadd fast float 2.000000e+00, %f1
380  %t2 = fsub fast float %f2, 3.000000e+00
381  %t3 = fadd fast float %t1, %t2
382  ret float %t3
383}
384
385; Check again with 'reassoc' and 'nsz'.
386; TODO: We may be able to remove the 'nsz' requirement.
387define float @fold10_reassoc_nsz(float %f1, float %f2) {
388; CHECK-LABEL: @fold10_reassoc_nsz(
389; CHECK-NEXT:    [[T2:%.*]] = fadd reassoc nsz float [[F1:%.*]], [[F2:%.*]]
390; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc nsz float [[T2]], -1.000000e+00
391; CHECK-NEXT:    ret float [[T3]]
392;
393  %t1 = fadd reassoc nsz float 2.000000e+00, %f1
394  %t2 = fsub reassoc nsz float %f2, 3.000000e+00
395  %t3 = fadd reassoc nsz float %t1, %t2
396  ret float %t3
397}
398
399; Observe that the fold is not done with only reassoc (the instructions are
400; canonicalized, but not folded).
401; TODO: As noted above, 'nsz' may not be required for this to be fully folded.
402define float @fold10_reassoc(float %f1, float %f2) {
403; CHECK-LABEL: @fold10_reassoc(
404; CHECK-NEXT:    [[T1:%.*]] = fadd reassoc float [[F1:%.*]], 2.000000e+00
405; CHECK-NEXT:    [[T2:%.*]] = fadd reassoc float [[F2:%.*]], -3.000000e+00
406; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc float [[T1]], [[T2]]
407; CHECK-NEXT:    ret float [[T3]]
408;
409  %t1 = fadd reassoc float 2.000000e+00, %f1
410  %t2 = fsub reassoc float %f2, 3.000000e+00
411  %t3 = fadd reassoc float %t1, %t2
412  ret float %t3
413}
414
415; This used to crash/miscompile.
416
417define float @fail1(float %f1, float %f2) {
418; CHECK-LABEL: @fail1(
419; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[F1:%.*]], 3.000000e+00
420; CHECK-NEXT:    [[ADD2:%.*]] = fadd fast float [[TMP1]], -3.000000e+00
421; CHECK-NEXT:    ret float [[ADD2]]
422;
423  %conv3 = fadd fast float %f1, -1.000000e+00
424  %add = fadd fast float %conv3, %conv3
425  %add2 = fadd fast float %add, %conv3
426  ret float %add2
427}
428
429define double @fail2(double %f1, double %f2) {
430; CHECK-LABEL: @fail2(
431; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast double [[F2:%.*]], [[F2]]
432; CHECK-NEXT:    [[T3:%.*]] = fneg fast double [[TMP1]]
433; CHECK-NEXT:    ret double [[T3]]
434;
435  %t1 = fsub fast double %f1, %f2
436  %t2 = fadd fast double %f1, %f2
437  %t3 = fsub fast double %t1, %t2
438  ret double %t3
439}
440
441; (X * C) - X --> X * (C - 1.0)
442
443define float @fsub_op0_fmul_const(float %x) {
444; CHECK-LABEL: @fsub_op0_fmul_const(
445; CHECK-NEXT:    [[SUB:%.*]] = fmul reassoc nsz float [[X:%.*]], 6.000000e+00
446; CHECK-NEXT:    ret float [[SUB]]
447;
448  %mul = fmul float %x, 7.0
449  %sub = fsub reassoc nsz float %mul, %x
450  ret float %sub
451}
452
453; (X * C) - X --> X * (C - 1.0)
454
455define <2 x float> @fsub_op0_fmul_const_vec(<2 x float> %x) {
456; CHECK-LABEL: @fsub_op0_fmul_const_vec(
457; CHECK-NEXT:    [[SUB:%.*]] = fmul reassoc nsz <2 x float> [[X:%.*]], <float 6.000000e+00, float -4.300000e+01>
458; CHECK-NEXT:    ret <2 x float> [[SUB]]
459;
460  %mul = fmul <2 x float> %x, <float 7.0, float -42.0>
461  %sub = fsub reassoc nsz <2 x float> %mul, %x
462  ret <2 x float> %sub
463}
464
465; X - (X * C) --> X * (1.0 - C)
466
467define float @fsub_op1_fmul_const(float %x) {
468; CHECK-LABEL: @fsub_op1_fmul_const(
469; CHECK-NEXT:    [[SUB:%.*]] = fmul reassoc nsz float [[X:%.*]], -6.000000e+00
470; CHECK-NEXT:    ret float [[SUB]]
471;
472  %mul = fmul float %x, 7.0
473  %sub = fsub reassoc nsz float %x, %mul
474  ret float %sub
475}
476
477; X - (X * C) --> X * (1.0 - C)
478
479define <2 x float> @fsub_op1_fmul_const_vec(<2 x float> %x) {
480; CHECK-LABEL: @fsub_op1_fmul_const_vec(
481; CHECK-NEXT:    [[SUB:%.*]] = fmul reassoc nsz <2 x float> [[X:%.*]], <float -6.000000e+00, float 1.000000e+00>
482; CHECK-NEXT:    ret <2 x float> [[SUB]]
483;
484  %mul = fmul <2 x float> %x, <float 7.0, float 0.0>
485  %sub = fsub reassoc nsz <2 x float> %x, %mul
486  ret <2 x float> %sub
487}
488
489; Verify the fold is not done with only 'reassoc' ('nsz' is required).
490
491define float @fsub_op0_fmul_const_wrong_FMF(float %x) {
492; CHECK-LABEL: @fsub_op0_fmul_const_wrong_FMF(
493; CHECK-NEXT:    [[MUL:%.*]] = fmul reassoc float [[X:%.*]], 7.000000e+00
494; CHECK-NEXT:    [[SUB:%.*]] = fsub reassoc float [[MUL]], [[X]]
495; CHECK-NEXT:    ret float [[SUB]]
496;
497  %mul = fmul reassoc float %x, 7.0
498  %sub = fsub reassoc float %mul, %x
499  ret float %sub
500}
501
502; (select X+Y, X-Y) => X + (select Y, -Y)
503; This is always safe.  No FMF required.
504define float @fold16(float %x, float %y) {
505; CHECK-LABEL: @fold16(
506; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt float [[X:%.*]], [[Y:%.*]]
507; CHECK-NEXT:    [[TMP1:%.*]] = fneg float [[Y]]
508; CHECK-NEXT:    [[R_P:%.*]] = select i1 [[CMP]], float [[Y]], float [[TMP1]]
509; CHECK-NEXT:    [[R:%.*]] = fadd float [[X]], [[R_P]]
510; CHECK-NEXT:    ret float [[R]]
511;
512  %cmp = fcmp ogt float %x, %y
513  %plus = fadd float %x, %y
514  %minus = fsub float %x, %y
515  %r = select i1 %cmp, float %plus, float %minus
516  ret float %r
517}
518
519; =========================================================================
520;
521;   Testing-cases about negation
522;
523; =========================================================================
524define float @fneg1(float %f1, float %f2) {
525; CHECK-LABEL: @fneg1(
526; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[F1:%.*]], [[F2:%.*]]
527; CHECK-NEXT:    ret float [[MUL]]
528;
529  %sub = fsub float -0.000000e+00, %f1
530  %sub1 = fsub nsz float 0.000000e+00, %f2
531  %mul = fmul float %sub, %sub1
532  ret float %mul
533}
534
535define float @fneg2(float %x) {
536; CHECK-LABEL: @fneg2(
537; CHECK-NEXT:    [[SUB:%.*]] = fneg nsz float [[X:%.*]]
538; CHECK-NEXT:    ret float [[SUB]]
539;
540  %sub = fsub nsz float 0.0, %x
541  ret float %sub
542}
543
544define <2 x float> @fneg2_vec_poison(<2 x float> %x) {
545; CHECK-LABEL: @fneg2_vec_poison(
546; CHECK-NEXT:    [[SUB:%.*]] = fneg nsz <2 x float> [[X:%.*]]
547; CHECK-NEXT:    ret <2 x float> [[SUB]]
548;
549  %sub = fsub nsz <2 x float> <float poison, float 0.0>, %x
550  ret <2 x float> %sub
551}
552
553; =========================================================================
554;
555;   Testing-cases about div
556;
557; =========================================================================
558
559; X/C1 / C2 => X * (1/(C2*C1))
560define float @fdiv1(float %x) {
561; CHECK-LABEL: @fdiv1(
562; CHECK-NEXT:    [[DIV1:%.*]] = fmul fast float [[X:%.*]], 0x3FD7303B60000000
563; CHECK-NEXT:    ret float [[DIV1]]
564;
565  %div = fdiv fast float %x, 0x3FF3333340000000
566  %div1 = fdiv fast float %div, 0x4002666660000000
567  ret float %div1
568; 0x3FF3333340000000 = 1.2f
569; 0x4002666660000000 = 2.3f
570; 0x3FD7303B60000000 = 0.36231884057971014492
571}
572
573; X*C1 / C2 => X * (C1/C2)
574define float @fdiv2(float %x) {
575; CHECK-LABEL: @fdiv2(
576; CHECK-NEXT:    [[DIV1:%.*]] = fmul fast float [[X:%.*]], 0x3FE0B21660000000
577; CHECK-NEXT:    ret float [[DIV1]]
578;
579  %mul = fmul float %x, 0x3FF3333340000000
580  %div1 = fdiv fast float %mul, 0x4002666660000000
581  ret float %div1
582
583; 0x3FF3333340000000 = 1.2f
584; 0x4002666660000000 = 2.3f
585; 0x3FE0B21660000000 = 0.52173918485641479492
586}
587
588define <2 x float> @fdiv2_vec(<2 x float> %x) {
589; CHECK-LABEL: @fdiv2_vec(
590; CHECK-NEXT:    [[DIV1:%.*]] = fmul fast <2 x float> [[X:%.*]], splat (float 3.000000e+00)
591; CHECK-NEXT:    ret <2 x float> [[DIV1]]
592;
593  %mul = fmul <2 x float> %x, <float 6.0, float 9.0>
594  %div1 = fdiv fast <2 x float> %mul, <float 2.0, float 3.0>
595  ret <2 x float> %div1
596}
597
598; "X/C1 / C2 => X * (1/(C2*C1))" is disabled (for now) is C2/C1 is a denormal
599;
600define float @fdiv3(float %x) {
601; CHECK-LABEL: @fdiv3(
602; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[X:%.*]], 0x3FDBD37A80000000
603; CHECK-NEXT:    [[DIV1:%.*]] = fdiv fast float [[TMP1]], 0x47EFFFFFE0000000
604; CHECK-NEXT:    ret float [[DIV1]]
605;
606  %div = fdiv fast float %x, 0x47EFFFFFE0000000
607  %div1 = fdiv fast float %div, 0x4002666660000000
608  ret float %div1
609}
610
611; "X*C1 / C2 => X * (C1/C2)" is disabled if C1/C2 is a denormal
612define float @fdiv4(float %x) {
613; CHECK-LABEL: @fdiv4(
614; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[X:%.*]], 0x47EFFFFFE0000000
615; CHECK-NEXT:    [[DIV:%.*]] = fdiv float [[MUL]], 0x3FC99999A0000000
616; CHECK-NEXT:    ret float [[DIV]]
617;
618  %mul = fmul float %x, 0x47EFFFFFE0000000
619  %div = fdiv float %mul, 0x3FC99999A0000000
620  ret float %div
621}
622
623; =========================================================================
624;
625;   Test-cases for square root
626;
627; =========================================================================
628
629; A squared factor fed into a square root intrinsic should be hoisted out
630; as a fabs() value.
631
632declare double @llvm.sqrt.f64(double)
633
634define double @sqrt_intrinsic_arg_squared(double %x) {
635; CHECK-LABEL: @sqrt_intrinsic_arg_squared(
636; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
637; CHECK-NEXT:    ret double [[FABS]]
638;
639  %mul = fmul fast double %x, %x
640  %sqrt = call fast double @llvm.sqrt.f64(double %mul)
641  ret double %sqrt
642}
643
644; Check all 6 combinations of a 3-way multiplication tree where
645; one factor is repeated.
646
647define double @sqrt_intrinsic_three_args1(double %x, double %y) {
648; CHECK-LABEL: @sqrt_intrinsic_three_args1(
649; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
650; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]])
651; CHECK-NEXT:    [[SQRT:%.*]] = fmul fast double [[FABS]], [[SQRT1]]
652; CHECK-NEXT:    ret double [[SQRT]]
653;
654  %mul = fmul fast double %y, %x
655  %mul2 = fmul fast double %mul, %x
656  %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
657  ret double %sqrt
658}
659
660define double @sqrt_intrinsic_three_args2(double %x, double %y) {
661; CHECK-LABEL: @sqrt_intrinsic_three_args2(
662; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
663; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]])
664; CHECK-NEXT:    [[SQRT:%.*]] = fmul fast double [[FABS]], [[SQRT1]]
665; CHECK-NEXT:    ret double [[SQRT]]
666;
667  %mul = fmul fast double %x, %y
668  %mul2 = fmul fast double %mul, %x
669  %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
670  ret double %sqrt
671}
672
673define double @sqrt_intrinsic_three_args3(double %x, double %y) {
674; CHECK-LABEL: @sqrt_intrinsic_three_args3(
675; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
676; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]])
677; CHECK-NEXT:    [[SQRT:%.*]] = fmul fast double [[FABS]], [[SQRT1]]
678; CHECK-NEXT:    ret double [[SQRT]]
679;
680  %mul = fmul fast double %x, %x
681  %mul2 = fmul fast double %mul, %y
682  %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
683  ret double %sqrt
684}
685
686define double @sqrt_intrinsic_three_args4(double %x, double %y) {
687; CHECK-LABEL: @sqrt_intrinsic_three_args4(
688; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
689; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]])
690; CHECK-NEXT:    [[SQRT:%.*]] = fmul fast double [[FABS]], [[SQRT1]]
691; CHECK-NEXT:    ret double [[SQRT]]
692;
693  %mul = fmul fast double %y, %x
694  %mul2 = fmul fast double %x, %mul
695  %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
696  ret double %sqrt
697}
698
699define double @sqrt_intrinsic_three_args5(double %x, double %y) {
700; CHECK-LABEL: @sqrt_intrinsic_three_args5(
701; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
702; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]])
703; CHECK-NEXT:    [[SQRT:%.*]] = fmul fast double [[FABS]], [[SQRT1]]
704; CHECK-NEXT:    ret double [[SQRT]]
705;
706  %mul = fmul fast double %x, %y
707  %mul2 = fmul fast double %x, %mul
708  %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
709  ret double %sqrt
710}
711
712define double @sqrt_intrinsic_three_args6(double %x, ptr %yp) {
713; CHECK-LABEL: @sqrt_intrinsic_three_args6(
714; CHECK-NEXT:    [[Y:%.*]] = load double, ptr [[YP:%.*]], align 8
715; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
716; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y]])
717; CHECK-NEXT:    [[SQRT:%.*]] = fmul fast double [[FABS]], [[SQRT1]]
718; CHECK-NEXT:    ret double [[SQRT]]
719;
720  %y = load double, ptr %yp ; thwart complexity-based canonicalization
721  %mul = fmul fast double %x, %x
722  %mul2 = fmul fast double %y, %mul
723  %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
724  ret double %sqrt
725}
726
727; If any operation is not 'fast', we can't simplify.
728
729define double @sqrt_intrinsic_not_so_fast(double %x, double %y) {
730; CHECK-LABEL: @sqrt_intrinsic_not_so_fast(
731; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[X:%.*]], [[X]]
732; CHECK-NEXT:    [[MUL2:%.*]] = fmul fast double [[MUL]], [[Y:%.*]]
733; CHECK-NEXT:    [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[MUL2]])
734; CHECK-NEXT:    ret double [[SQRT]]
735;
736  %mul = fmul double %x, %x
737  %mul2 = fmul fast double %mul, %y
738  %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
739  ret double %sqrt
740}
741
742define double @sqrt_intrinsic_arg_4th(double %x) {
743; CHECK-LABEL: @sqrt_intrinsic_arg_4th(
744; CHECK-NEXT:    [[MUL:%.*]] = fmul fast double [[X:%.*]], [[X]]
745; CHECK-NEXT:    ret double [[MUL]]
746;
747  %mul = fmul fast double %x, %x
748  %mul2 = fmul fast double %mul, %mul
749  %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
750  ret double %sqrt
751}
752
753define double @sqrt_intrinsic_arg_5th(double %x) {
754; CHECK-LABEL: @sqrt_intrinsic_arg_5th(
755; CHECK-NEXT:    [[MUL:%.*]] = fmul fast double [[X:%.*]], [[X]]
756; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[X]])
757; CHECK-NEXT:    [[SQRT:%.*]] = fmul fast double [[MUL]], [[SQRT1]]
758; CHECK-NEXT:    ret double [[SQRT]]
759;
760  %mul = fmul fast double %x, %x
761  %mul2 = fmul fast double %mul, %x
762  %mul3 = fmul fast double %mul2, %mul
763  %sqrt = call fast double @llvm.sqrt.f64(double %mul3)
764  ret double %sqrt
765}
766
767; Check that square root calls have the same behavior.
768
769declare float @sqrtf(float)
770declare double @sqrt(double)
771declare fp128 @sqrtl(fp128)
772
773define float @sqrt_call_squared_f32(float %x) {
774; CHECK-LABEL: @sqrt_call_squared_f32(
775; CHECK-NEXT:    [[FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X:%.*]])
776; CHECK-NEXT:    ret float [[FABS]]
777;
778  %mul = fmul fast float %x, %x
779  %sqrt = call fast float @sqrtf(float %mul)
780  ret float %sqrt
781}
782
783define double @sqrt_call_squared_f64(double %x) {
784; CHECK-LABEL: @sqrt_call_squared_f64(
785; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
786; CHECK-NEXT:    ret double [[FABS]]
787;
788  %mul = fmul fast double %x, %x
789  %sqrt = call fast double @sqrt(double %mul)
790  ret double %sqrt
791}
792
793define fp128 @sqrt_call_squared_f128(fp128 %x) {
794; CHECK-LABEL: @sqrt_call_squared_f128(
795; CHECK-NEXT:    [[FABS:%.*]] = call fast fp128 @llvm.fabs.f128(fp128 [[X:%.*]])
796; CHECK-NEXT:    ret fp128 [[FABS]]
797;
798  %mul = fmul fast fp128 %x, %x
799  %sqrt = call fast fp128 @sqrtl(fp128 %mul)
800  ret fp128 %sqrt
801}
802
803; =========================================================================
804;
805;   Test-cases for fmin / fmax
806;
807; =========================================================================
808
809declare double @fmax(double, double)
810declare double @fmin(double, double)
811declare float @fmaxf(float, float)
812declare float @fminf(float, float)
813declare fp128 @fmaxl(fp128, fp128)
814declare fp128 @fminl(fp128, fp128)
815
816; 'nsz' is implied by the definition of fmax or fmin itself.
817
818; Shrink and replace the call.
819define float @max1(float %a, float %b) {
820; CHECK-LABEL: @max1(
821; CHECK-NEXT:    [[FMAXF:%.*]] = call fast float @llvm.maxnum.f32(float [[A:%.*]], float [[B:%.*]])
822; CHECK-NEXT:    ret float [[FMAXF]]
823;
824  %c = fpext float %a to double
825  %d = fpext float %b to double
826  %e = call fast double @fmax(double %c, double %d)
827  %f = fptrunc double %e to float
828  ret float %f
829}
830
831define float @fmax_no_fmf(float %a, float %b) {
832; CHECK-LABEL: @fmax_no_fmf(
833; CHECK-NEXT:    [[C:%.*]] = call nsz float @llvm.maxnum.f32(float [[A:%.*]], float [[B:%.*]])
834; CHECK-NEXT:    ret float [[C]]
835;
836  %c = call float @fmaxf(float %a, float %b)
837  ret float %c
838}
839
840define float @max2(float %a, float %b) {
841; CHECK-LABEL: @max2(
842; CHECK-NEXT:    [[C:%.*]] = call nnan nsz float @llvm.maxnum.f32(float [[A:%.*]], float [[B:%.*]])
843; CHECK-NEXT:    ret float [[C]]
844;
845  %c = call nnan float @fmaxf(float %a, float %b)
846  ret float %c
847}
848
849
850define double @max3(double %a, double %b) {
851; CHECK-LABEL: @max3(
852; CHECK-NEXT:    [[C:%.*]] = call fast double @llvm.maxnum.f64(double [[A:%.*]], double [[B:%.*]])
853; CHECK-NEXT:    ret double [[C]]
854;
855  %c = call fast double @fmax(double %a, double %b)
856  ret double %c
857}
858
859define fp128 @max4(fp128 %a, fp128 %b) {
860; CHECK-LABEL: @max4(
861; CHECK-NEXT:    [[C:%.*]] = call nnan nsz fp128 @llvm.maxnum.f128(fp128 [[A:%.*]], fp128 [[B:%.*]])
862; CHECK-NEXT:    ret fp128 [[C]]
863;
864  %c = call nnan fp128 @fmaxl(fp128 %a, fp128 %b)
865  ret fp128 %c
866}
867
868; Shrink and remove the call.
869define float @min1(float %a, float %b) {
870; CHECK-LABEL: @min1(
871; CHECK-NEXT:    [[FMINF:%.*]] = call nnan nsz float @llvm.minnum.f32(float [[A:%.*]], float [[B:%.*]])
872; CHECK-NEXT:    ret float [[FMINF]]
873;
874  %c = fpext float %a to double
875  %d = fpext float %b to double
876  %e = call nnan double @fmin(double %c, double %d)
877  %f = fptrunc double %e to float
878  ret float %f
879}
880
881define float @fmin_no_fmf(float %a, float %b) {
882; CHECK-LABEL: @fmin_no_fmf(
883; CHECK-NEXT:    [[C:%.*]] = call nsz float @llvm.minnum.f32(float [[A:%.*]], float [[B:%.*]])
884; CHECK-NEXT:    ret float [[C]]
885;
886  %c = call float @fminf(float %a, float %b)
887  ret float %c
888}
889
890define float @min2(float %a, float %b) {
891; CHECK-LABEL: @min2(
892; CHECK-NEXT:    [[C:%.*]] = call fast float @llvm.minnum.f32(float [[A:%.*]], float [[B:%.*]])
893; CHECK-NEXT:    ret float [[C]]
894;
895  %c = call fast float @fminf(float %a, float %b)
896  ret float %c
897}
898
899define double @min3(double %a, double %b) {
900; CHECK-LABEL: @min3(
901; CHECK-NEXT:    [[C:%.*]] = call nnan nsz double @llvm.minnum.f64(double [[A:%.*]], double [[B:%.*]])
902; CHECK-NEXT:    ret double [[C]]
903;
904  %c = call nnan double @fmin(double %a, double %b)
905  ret double %c
906}
907
908define fp128 @min4(fp128 %a, fp128 %b) {
909; CHECK-LABEL: @min4(
910; CHECK-NEXT:    [[C:%.*]] = call fast fp128 @llvm.minnum.f128(fp128 [[A:%.*]], fp128 [[B:%.*]])
911; CHECK-NEXT:    ret fp128 [[C]]
912;
913  %c = call fast fp128 @fminl(fp128 %a, fp128 %b)
914  ret fp128 %c
915}
916
917; ((which ? 2.0 : a) + 1.0) => (which ? 3.0 : (a + 1.0))
918; This is always safe.  No FMF required.
919define float @test55(i1 %which, float %a) {
920; CHECK-LABEL: @test55(
921; CHECK-NEXT:  entry:
922; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
923; CHECK:       delay:
924; CHECK-NEXT:    [[TMP0:%.*]] = fadd float [[A:%.*]], 1.000000e+00
925; CHECK-NEXT:    br label [[FINAL]]
926; CHECK:       final:
927; CHECK-NEXT:    [[PHI:%.*]] = phi float [ 3.000000e+00, [[ENTRY:%.*]] ], [ [[TMP0]], [[DELAY]] ]
928; CHECK-NEXT:    ret float [[PHI]]
929;
930entry:
931  br i1 %which, label %final, label %delay
932
933delay:
934  br label %final
935
936final:
937  %phi = phi float [ 2.0, %entry ], [ %a, %delay ]
938  %value = fadd float %phi, 1.0
939  ret float %value
940}
941