xref: /llvm-project/llvm/test/CodeGen/PowerPC/fmf-propagation.ll (revision 60822637bf007cbaf7401a6ec25cdf2ea7b7edbd)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; REQUIRES: asserts
3; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1                        | FileCheck %s --check-prefix=FMFDEBUG
4; RUN: llc < %s -mtriple=powerpc64le                                                           | FileCheck %s --check-prefix=FMF
5; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s --check-prefix=GLOBALDEBUG
6; RUN: llc < %s -mtriple=powerpc64le -enable-unsafe-fp-math -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math | FileCheck %s --check-prefix=GLOBAL
7
8; Test FP transforms using instruction/node-level fast-math-flags.
9; We're also checking debug output to verify that FMF is propagated to the newly created nodes.
10; The run with the global unsafe param tests the pre-FMF behavior using regular instructions/nodes.
11
12declare float @llvm.fma.f32(float, float, float)
13declare float @llvm.sqrt.f32(float)
14
15; X * Y + Z --> fma(X, Y, Z)
16
17; contract bits in fmul is checked.
18
19; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract1:'
20; FMFDEBUG-NOT:         fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
21; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_contract1:'
22
23define float @fmul_fadd_contract1(float %x, float %y, float %z) {
24; FMF-LABEL: fmul_fadd_contract1:
25; FMF:       # %bb.0:
26; FMF-NEXT:    xsmulsp 0, 1, 2
27; FMF-NEXT:    xsaddsp 1, 0, 3
28; FMF-NEXT:    blr
29;
30; GLOBAL-LABEL: fmul_fadd_contract1:
31; GLOBAL:       # %bb.0:
32; GLOBAL-NEXT:    xsmaddasp 3, 1, 2
33; GLOBAL-NEXT:    fmr 1, 3
34; GLOBAL-NEXT:    blr
35  %mul = fmul float %x, %y
36  %add = fadd contract float %mul, %z
37  ret float %add
38}
39
40; contract bits in fadd is also checked.
41
42; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract2:'
43; FMFDEBUG-NOT:         fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
44; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_contract2:'
45
46define float @fmul_fadd_contract2(float %x, float %y, float %z) {
47; FMF-LABEL: fmul_fadd_contract2:
48; FMF:       # %bb.0:
49; FMF-NEXT:    xsmulsp 0, 1, 2
50; FMF-NEXT:    xsaddsp 1, 0, 3
51; FMF-NEXT:    blr
52;
53; GLOBAL-LABEL: fmul_fadd_contract2:
54; GLOBAL:       # %bb.0:
55; GLOBAL-NEXT:    xsmaddasp 3, 1, 2
56; GLOBAL-NEXT:    fmr 1, 3
57; GLOBAL-NEXT:    blr
58  %mul = fmul contract float %x, %y
59  %add = fadd float %mul, %z
60  ret float %add
61}
62
63; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract3:'
64; FMFDEBUG:         fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
65; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_contract3:'
66
67define float @fmul_fadd_contract3(float %x, float %y, float %z) {
68; FMF-LABEL: fmul_fadd_contract3:
69; FMF:       # %bb.0:
70; FMF-NEXT:    xsmaddasp 3, 1, 2
71; FMF-NEXT:    fmr 1, 3
72; FMF-NEXT:    blr
73;
74; GLOBAL-LABEL: fmul_fadd_contract3:
75; GLOBAL:       # %bb.0:
76; GLOBAL-NEXT:    xsmaddasp 3, 1, 2
77; GLOBAL-NEXT:    fmr 1, 3
78; GLOBAL-NEXT:    blr
79  %mul = fmul contract float %x, %y
80  %add = fadd contract float %mul, %z
81  ret float %add
82}
83
84; Reassociation does NOT imply that FMA contraction is allowed.
85
86; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc1:'
87; FMFDEBUG-NOT:         fma reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
88; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc1:'
89
90define float @fmul_fadd_reassoc1(float %x, float %y, float %z) {
91; FMF-LABEL: fmul_fadd_reassoc1:
92; FMF:       # %bb.0:
93; FMF-NEXT:    xsmulsp 0, 1, 2
94; FMF-NEXT:    xsaddsp 1, 0, 3
95; FMF-NEXT:    blr
96;
97; GLOBAL-LABEL: fmul_fadd_reassoc1:
98; GLOBAL:       # %bb.0:
99; GLOBAL-NEXT:    xsmaddasp 3, 1, 2
100; GLOBAL-NEXT:    fmr 1, 3
101; GLOBAL-NEXT:    blr
102  %mul = fmul float %x, %y
103  %add = fadd reassoc float %mul, %z
104  ret float %add
105}
106
107; This shouldn't change anything - the intermediate fmul result is now also flagged.
108; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc2:'
109; FMFDEBUG-NOT:         fma reassoc {{t[0-9]+}}, {{t[0-9]+}}
110; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc2:'
111
112define float @fmul_fadd_reassoc2(float %x, float %y, float %z) {
113; FMF-LABEL: fmul_fadd_reassoc2:
114; FMF:       # %bb.0:
115; FMF-NEXT:    xsmulsp 0, 1, 2
116; FMF-NEXT:    xsaddsp 1, 0, 3
117; FMF-NEXT:    blr
118;
119; GLOBAL-LABEL: fmul_fadd_reassoc2:
120; GLOBAL:       # %bb.0:
121; GLOBAL-NEXT:    xsmaddasp 3, 1, 2
122; GLOBAL-NEXT:    fmr 1, 3
123; GLOBAL-NEXT:    blr
124  %mul = fmul reassoc float %x, %y
125  %add = fadd reassoc float %mul, %z
126  ret float %add
127}
128
129; The fadd is now fully 'fast', but fmul is not yet.
130
131; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_fast1:'
132; FMFDEBUG-NOT:         fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
133; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_fast1:'
134
135define float @fmul_fadd_fast1(float %x, float %y, float %z) {
136; FMF-LABEL: fmul_fadd_fast1:
137; FMF:       # %bb.0:
138; FMF-NEXT:    xsmulsp 0, 1, 2
139; FMF-NEXT:    xsaddsp 1, 0, 3
140; FMF-NEXT:    blr
141;
142; GLOBAL-LABEL: fmul_fadd_fast1:
143; GLOBAL:       # %bb.0:
144; GLOBAL-NEXT:    xsmaddasp 3, 1, 2
145; GLOBAL-NEXT:    fmr 1, 3
146; GLOBAL-NEXT:    blr
147  %mul = fmul float %x, %y
148  %add = fadd fast float %mul, %z
149  ret float %add
150}
151
152; This implies that contraction is allowed - the intermediate fmul result is now also flagged.
153
154; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_fast2:'
155; FMFDEBUG:         fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
156; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_fast2:'
157
158define float @fmul_fadd_fast2(float %x, float %y, float %z) {
159; FMF-LABEL: fmul_fadd_fast2:
160; FMF:       # %bb.0:
161; FMF-NEXT:    xsmaddasp 3, 1, 2
162; FMF-NEXT:    fmr 1, 3
163; FMF-NEXT:    blr
164;
165; GLOBAL-LABEL: fmul_fadd_fast2:
166; GLOBAL:       # %bb.0:
167; GLOBAL-NEXT:    xsmaddasp 3, 1, 2
168; GLOBAL-NEXT:    fmr 1, 3
169; GLOBAL-NEXT:    blr
170  %mul = fmul fast float %x, %y
171  %add = fadd fast float %mul, %z
172  ret float %add
173}
174
175; fma(X, 7.0, X * 42.0) --> X * 49.0
176; This is the minimum FMF needed for this transform - the FMA allows reassociation.
177
178; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:'
179; FMFDEBUG:         fmul reassoc {{t[0-9]+}},
180; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:'
181
182; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:'
183; GLOBALDEBUG:         fmul reassoc {{t[0-9]+}}
184; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:'
185
186define float @fmul_fma_reassoc1(float %x) {
187; FMF-LABEL: fmul_fma_reassoc1:
188; FMF:       # %bb.0:
189; FMF-NEXT:    addis 3, 2, .LCPI7_0@toc@ha
190; FMF-NEXT:    lfs 0, .LCPI7_0@toc@l(3)
191; FMF-NEXT:    xsmulsp 1, 1, 0
192; FMF-NEXT:    blr
193;
194; GLOBAL-LABEL: fmul_fma_reassoc1:
195; GLOBAL:       # %bb.0:
196; GLOBAL-NEXT:    addis 3, 2, .LCPI7_0@toc@ha
197; GLOBAL-NEXT:    lfs 0, .LCPI7_0@toc@l(3)
198; GLOBAL-NEXT:    xsmulsp 1, 1, 0
199; GLOBAL-NEXT:    blr
200  %mul = fmul float %x, 42.0
201  %fma = call reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul)
202  ret float %fma
203}
204
205; This shouldn't change anything - the intermediate fmul result is now also flagged.
206
207; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:'
208; FMFDEBUG:         fmul reassoc {{t[0-9]+}}
209; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:'
210
211; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:'
212; GLOBALDEBUG:         fmul reassoc {{t[0-9]+}}
213; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:'
214
215define float @fmul_fma_reassoc2(float %x) {
216; FMF-LABEL: fmul_fma_reassoc2:
217; FMF:       # %bb.0:
218; FMF-NEXT:    addis 3, 2, .LCPI8_0@toc@ha
219; FMF-NEXT:    lfs 0, .LCPI8_0@toc@l(3)
220; FMF-NEXT:    xsmulsp 1, 1, 0
221; FMF-NEXT:    blr
222;
223; GLOBAL-LABEL: fmul_fma_reassoc2:
224; GLOBAL:       # %bb.0:
225; GLOBAL-NEXT:    addis 3, 2, .LCPI8_0@toc@ha
226; GLOBAL-NEXT:    lfs 0, .LCPI8_0@toc@l(3)
227; GLOBAL-NEXT:    xsmulsp 1, 1, 0
228; GLOBAL-NEXT:    blr
229  %mul = fmul reassoc float %x, 42.0
230  %fma = call reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul)
231  ret float %fma
232}
233
234; The FMA is now fully 'fast'. This implies that reassociation is allowed.
235
236; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:'
237; FMFDEBUG:         fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
238; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:'
239
240; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:'
241; GLOBALDEBUG:         fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
242; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:'
243
244define float @fmul_fma_fast1(float %x) {
245; FMF-LABEL: fmul_fma_fast1:
246; FMF:       # %bb.0:
247; FMF-NEXT:    addis 3, 2, .LCPI9_0@toc@ha
248; FMF-NEXT:    lfs 0, .LCPI9_0@toc@l(3)
249; FMF-NEXT:    xsmulsp 1, 1, 0
250; FMF-NEXT:    blr
251;
252; GLOBAL-LABEL: fmul_fma_fast1:
253; GLOBAL:       # %bb.0:
254; GLOBAL-NEXT:    addis 3, 2, .LCPI9_0@toc@ha
255; GLOBAL-NEXT:    lfs 0, .LCPI9_0@toc@l(3)
256; GLOBAL-NEXT:    xsmulsp 1, 1, 0
257; GLOBAL-NEXT:    blr
258  %mul = fmul float %x, 42.0
259  %fma = call fast float @llvm.fma.f32(float %x, float 7.0, float %mul)
260  ret float %fma
261}
262
263; This shouldn't change anything - the intermediate fmul result is now also flagged.
264
265; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:'
266; FMFDEBUG:         fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
267; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:'
268
269; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:'
270; GLOBALDEBUG:         fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
271; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:'
272
273define float @fmul_fma_fast2(float %x) {
274; FMF-LABEL: fmul_fma_fast2:
275; FMF:       # %bb.0:
276; FMF-NEXT:    addis 3, 2, .LCPI10_0@toc@ha
277; FMF-NEXT:    lfs 0, .LCPI10_0@toc@l(3)
278; FMF-NEXT:    xsmulsp 1, 1, 0
279; FMF-NEXT:    blr
280;
281; GLOBAL-LABEL: fmul_fma_fast2:
282; GLOBAL:       # %bb.0:
283; GLOBAL-NEXT:    addis 3, 2, .LCPI10_0@toc@ha
284; GLOBAL-NEXT:    lfs 0, .LCPI10_0@toc@l(3)
285; GLOBAL-NEXT:    xsmulsp 1, 1, 0
286; GLOBAL-NEXT:    blr
287  %mul = fmul fast float %x, 42.0
288  %fma = call fast float @llvm.fma.f32(float %x, float 7.0, float %mul)
289  ret float %fma
290}
291
292; Reduced precision for sqrt is allowed - should use estimate and NR iterations.
293
294; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_ieee:'
295; FMFDEBUG:         fmul ninf afn {{t[0-9]+}}
296; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_afn_ieee:'
297
298; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_ieee:'
299; GLOBALDEBUG:         fmul ninf afn {{t[0-9]+}}
300; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_afn_ieee:'
301
302define float @sqrt_afn_ieee(float %x) #0 {
303; FMF-LABEL: sqrt_afn_ieee:
304; FMF:       # %bb.0:
305; FMF-NEXT:    addis 3, 2, .LCPI11_1@toc@ha
306; FMF-NEXT:    xsabsdp 0, 1
307; FMF-NEXT:    lfs 2, .LCPI11_1@toc@l(3)
308; FMF-NEXT:    fcmpu 0, 0, 2
309; FMF-NEXT:    xxlxor 0, 0, 0
310; FMF-NEXT:    blt 0, .LBB11_2
311; FMF-NEXT:  # %bb.1:
312; FMF-NEXT:    xsrsqrtesp 2, 1
313; FMF-NEXT:    addis 3, 2, .LCPI11_0@toc@ha
314; FMF-NEXT:    vspltisw 2, -3
315; FMF-NEXT:    lfs 0, .LCPI11_0@toc@l(3)
316; FMF-NEXT:    xsmulsp 1, 1, 2
317; FMF-NEXT:    xsmulsp 0, 1, 0
318; FMF-NEXT:    xsmulsp 1, 1, 2
319; FMF-NEXT:    xvcvsxwdp 2, 34
320; FMF-NEXT:    xsaddsp 1, 1, 2
321; FMF-NEXT:    xsmulsp 0, 0, 1
322; FMF-NEXT:  .LBB11_2:
323; FMF-NEXT:    fmr 1, 0
324; FMF-NEXT:    blr
325;
326; GLOBAL-LABEL: sqrt_afn_ieee:
327; GLOBAL:       # %bb.0:
328; GLOBAL-NEXT:    addis 3, 2, .LCPI11_1@toc@ha
329; GLOBAL-NEXT:    xsabsdp 0, 1
330; GLOBAL-NEXT:    lfs 2, .LCPI11_1@toc@l(3)
331; GLOBAL-NEXT:    fcmpu 0, 0, 2
332; GLOBAL-NEXT:    xxlxor 0, 0, 0
333; GLOBAL-NEXT:    blt 0, .LBB11_2
334; GLOBAL-NEXT:  # %bb.1:
335; GLOBAL-NEXT:    xsrsqrtesp 0, 1
336; GLOBAL-NEXT:    vspltisw 2, -3
337; GLOBAL-NEXT:    addis 3, 2, .LCPI11_0@toc@ha
338; GLOBAL-NEXT:    xvcvsxwdp 2, 34
339; GLOBAL-NEXT:    xsmulsp 1, 1, 0
340; GLOBAL-NEXT:    xsmaddasp 2, 1, 0
341; GLOBAL-NEXT:    lfs 0, .LCPI11_0@toc@l(3)
342; GLOBAL-NEXT:    xsmulsp 0, 1, 0
343; GLOBAL-NEXT:    xsmulsp 0, 0, 2
344; GLOBAL-NEXT:  .LBB11_2:
345; GLOBAL-NEXT:    fmr 1, 0
346; GLOBAL-NEXT:    blr
347  %rt = call afn ninf float @llvm.sqrt.f32(float %x)
348  ret float %rt
349}
350
351define float @sqrt_afn_ieee_inf(float %x) #0 {
352; FMF-LABEL: sqrt_afn_ieee_inf:
353; FMF:       # %bb.0:
354; FMF-NEXT:    xssqrtsp 1, 1
355; FMF-NEXT:    blr
356;
357; GLOBAL-LABEL: sqrt_afn_ieee_inf:
358; GLOBAL:       # %bb.0:
359; GLOBAL-NEXT:    xssqrtsp 1, 1
360; GLOBAL-NEXT:    blr
361  %rt = call afn float @llvm.sqrt.f32(float %x)
362  ret float %rt
363}
364
365; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_preserve_sign:'
366; FMFDEBUG:         fmul ninf afn {{t[0-9]+}}
367; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_afn_preserve_sign:'
368
369; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_preserve_sign:'
370; GLOBALDEBUG:         fmul ninf afn {{t[0-9]+}}
371; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_afn_preserve_sign:'
372
373define float @sqrt_afn_preserve_sign(float %x) #1 {
374; FMF-LABEL: sqrt_afn_preserve_sign:
375; FMF:       # %bb.0:
376; FMF-NEXT:    xxlxor 0, 0, 0
377; FMF-NEXT:    fcmpu 0, 1, 0
378; FMF-NEXT:    beq 0, .LBB13_2
379; FMF-NEXT:  # %bb.1:
380; FMF-NEXT:    xsrsqrtesp 0, 1
381; FMF-NEXT:    addis 3, 2, .LCPI13_0@toc@ha
382; FMF-NEXT:    vspltisw 2, -3
383; FMF-NEXT:    lfs 2, .LCPI13_0@toc@l(3)
384; FMF-NEXT:    xsmulsp 1, 1, 0
385; FMF-NEXT:    xsmulsp 2, 1, 2
386; FMF-NEXT:    xsmulsp 0, 1, 0
387; FMF-NEXT:    xvcvsxwdp 1, 34
388; FMF-NEXT:    xsaddsp 0, 0, 1
389; FMF-NEXT:    xsmulsp 0, 2, 0
390; FMF-NEXT:  .LBB13_2:
391; FMF-NEXT:    fmr 1, 0
392; FMF-NEXT:    blr
393;
394; GLOBAL-LABEL: sqrt_afn_preserve_sign:
395; GLOBAL:       # %bb.0:
396; GLOBAL-NEXT:    xxlxor 0, 0, 0
397; GLOBAL-NEXT:    fcmpu 0, 1, 0
398; GLOBAL-NEXT:    beq 0, .LBB13_2
399; GLOBAL-NEXT:  # %bb.1:
400; GLOBAL-NEXT:    xsrsqrtesp 0, 1
401; GLOBAL-NEXT:    vspltisw 2, -3
402; GLOBAL-NEXT:    addis 3, 2, .LCPI13_0@toc@ha
403; GLOBAL-NEXT:    xvcvsxwdp 2, 34
404; GLOBAL-NEXT:    xsmulsp 1, 1, 0
405; GLOBAL-NEXT:    xsmaddasp 2, 1, 0
406; GLOBAL-NEXT:    lfs 0, .LCPI13_0@toc@l(3)
407; GLOBAL-NEXT:    xsmulsp 0, 1, 0
408; GLOBAL-NEXT:    xsmulsp 0, 0, 2
409; GLOBAL-NEXT:  .LBB13_2:
410; GLOBAL-NEXT:    fmr 1, 0
411; GLOBAL-NEXT:    blr
412  %rt = call afn ninf float @llvm.sqrt.f32(float %x)
413  ret float %rt
414}
415
416define float @sqrt_afn_preserve_sign_inf(float %x) #1 {
417; FMF-LABEL: sqrt_afn_preserve_sign_inf:
418; FMF:       # %bb.0:
419; FMF-NEXT:    xssqrtsp 1, 1
420; FMF-NEXT:    blr
421;
422; GLOBAL-LABEL: sqrt_afn_preserve_sign_inf:
423; GLOBAL:       # %bb.0:
424; GLOBAL-NEXT:    xssqrtsp 1, 1
425; GLOBAL-NEXT:    blr
426  %rt = call afn float @llvm.sqrt.f32(float %x)
427  ret float %rt
428}
429
430; The call is now fully 'fast'. This implies that approximation is allowed.
431
432; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_ieee:'
433; FMFDEBUG:         fmul ninf contract afn reassoc {{t[0-9]+}}
434; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_fast_ieee:'
435
436; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_ieee:'
437; GLOBALDEBUG:         fmul ninf contract afn reassoc {{t[0-9]+}}
438; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_fast_ieee:'
439
440define float @sqrt_fast_ieee(float %x) #0 {
441; FMF-LABEL: sqrt_fast_ieee:
442; FMF:       # %bb.0:
443; FMF-NEXT:    addis 3, 2, .LCPI15_1@toc@ha
444; FMF-NEXT:    xsabsdp 0, 1
445; FMF-NEXT:    lfs 2, .LCPI15_1@toc@l(3)
446; FMF-NEXT:    fcmpu 0, 0, 2
447; FMF-NEXT:    xxlxor 0, 0, 0
448; FMF-NEXT:    blt 0, .LBB15_2
449; FMF-NEXT:  # %bb.1:
450; FMF-NEXT:    xsrsqrtesp 0, 1
451; FMF-NEXT:    vspltisw 2, -3
452; FMF-NEXT:    addis 3, 2, .LCPI15_0@toc@ha
453; FMF-NEXT:    xvcvsxwdp 2, 34
454; FMF-NEXT:    xsmulsp 1, 1, 0
455; FMF-NEXT:    xsmaddasp 2, 1, 0
456; FMF-NEXT:    lfs 0, .LCPI15_0@toc@l(3)
457; FMF-NEXT:    xsmulsp 0, 1, 0
458; FMF-NEXT:    xsmulsp 0, 0, 2
459; FMF-NEXT:  .LBB15_2:
460; FMF-NEXT:    fmr 1, 0
461; FMF-NEXT:    blr
462;
463; GLOBAL-LABEL: sqrt_fast_ieee:
464; GLOBAL:       # %bb.0:
465; GLOBAL-NEXT:    addis 3, 2, .LCPI15_1@toc@ha
466; GLOBAL-NEXT:    xsabsdp 0, 1
467; GLOBAL-NEXT:    lfs 2, .LCPI15_1@toc@l(3)
468; GLOBAL-NEXT:    fcmpu 0, 0, 2
469; GLOBAL-NEXT:    xxlxor 0, 0, 0
470; GLOBAL-NEXT:    blt 0, .LBB15_2
471; GLOBAL-NEXT:  # %bb.1:
472; GLOBAL-NEXT:    xsrsqrtesp 0, 1
473; GLOBAL-NEXT:    vspltisw 2, -3
474; GLOBAL-NEXT:    addis 3, 2, .LCPI15_0@toc@ha
475; GLOBAL-NEXT:    xvcvsxwdp 2, 34
476; GLOBAL-NEXT:    xsmulsp 1, 1, 0
477; GLOBAL-NEXT:    xsmaddasp 2, 1, 0
478; GLOBAL-NEXT:    lfs 0, .LCPI15_0@toc@l(3)
479; GLOBAL-NEXT:    xsmulsp 0, 1, 0
480; GLOBAL-NEXT:    xsmulsp 0, 0, 2
481; GLOBAL-NEXT:  .LBB15_2:
482; GLOBAL-NEXT:    fmr 1, 0
483; GLOBAL-NEXT:    blr
484  %rt = call contract reassoc afn ninf float @llvm.sqrt.f32(float %x)
485  ret float %rt
486}
487
488; The call is now fully 'fast'. This implies that approximation is allowed.
489
490; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_preserve_sign:'
491; FMFDEBUG:         fmul ninf contract afn reassoc {{t[0-9]+}}
492; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_fast_preserve_sign:'
493
494; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_preserve_sign:'
495; GLOBALDEBUG:         fmul ninf contract afn reassoc {{t[0-9]+}}
496; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_fast_preserve_sign:'
497
498define float @sqrt_fast_preserve_sign(float %x) #1 {
499; FMF-LABEL: sqrt_fast_preserve_sign:
500; FMF:       # %bb.0:
501; FMF-NEXT:    xxlxor 0, 0, 0
502; FMF-NEXT:    fcmpu 0, 1, 0
503; FMF-NEXT:    beq 0, .LBB16_2
504; FMF-NEXT:  # %bb.1:
505; FMF-NEXT:    xsrsqrtesp 0, 1
506; FMF-NEXT:    vspltisw 2, -3
507; FMF-NEXT:    addis 3, 2, .LCPI16_0@toc@ha
508; FMF-NEXT:    xvcvsxwdp 2, 34
509; FMF-NEXT:    xsmulsp 1, 1, 0
510; FMF-NEXT:    xsmaddasp 2, 1, 0
511; FMF-NEXT:    lfs 0, .LCPI16_0@toc@l(3)
512; FMF-NEXT:    xsmulsp 0, 1, 0
513; FMF-NEXT:    xsmulsp 0, 0, 2
514; FMF-NEXT:  .LBB16_2:
515; FMF-NEXT:    fmr 1, 0
516; FMF-NEXT:    blr
517;
518; GLOBAL-LABEL: sqrt_fast_preserve_sign:
519; GLOBAL:       # %bb.0:
520; GLOBAL-NEXT:    xxlxor 0, 0, 0
521; GLOBAL-NEXT:    fcmpu 0, 1, 0
522; GLOBAL-NEXT:    beq 0, .LBB16_2
523; GLOBAL-NEXT:  # %bb.1:
524; GLOBAL-NEXT:    xsrsqrtesp 0, 1
525; GLOBAL-NEXT:    vspltisw 2, -3
526; GLOBAL-NEXT:    addis 3, 2, .LCPI16_0@toc@ha
527; GLOBAL-NEXT:    xvcvsxwdp 2, 34
528; GLOBAL-NEXT:    xsmulsp 1, 1, 0
529; GLOBAL-NEXT:    xsmaddasp 2, 1, 0
530; GLOBAL-NEXT:    lfs 0, .LCPI16_0@toc@l(3)
531; GLOBAL-NEXT:    xsmulsp 0, 1, 0
532; GLOBAL-NEXT:    xsmulsp 0, 0, 2
533; GLOBAL-NEXT:  .LBB16_2:
534; GLOBAL-NEXT:    fmr 1, 0
535; GLOBAL-NEXT:    blr
536  %rt = call contract reassoc ninf afn float @llvm.sqrt.f32(float %x)
537  ret float %rt
538}
539
540; fcmp can have fast-math-flags.
541
542; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fcmp_nnan:'
543; FMFDEBUG:         select_cc nnan {{t[0-9]+}}
544; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fcmp_nnan:'
545
546; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fcmp_nnan:'
547; GLOBALDEBUG:         select_cc nnan {{t[0-9]+}}
548; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'fcmp_nnan:'
549
550define double @fcmp_nnan(double %a, double %y, double %z) {
551; FMF-LABEL: fcmp_nnan:
552; FMF:       # %bb.0:
553; FMF-NEXT:    xxlxor 0, 0, 0
554; FMF-NEXT:    xscmpudp 0, 1, 0
555; FMF-NEXT:    blt 0, .LBB17_2
556; FMF-NEXT:  # %bb.1:
557; FMF-NEXT:    fmr 3, 2
558; FMF-NEXT:  .LBB17_2:
559; FMF-NEXT:    fmr 1, 3
560; FMF-NEXT:    blr
561;
562; GLOBAL-LABEL: fcmp_nnan:
563; GLOBAL:       # %bb.0:
564; GLOBAL-NEXT:    xxlxor 0, 0, 0
565; GLOBAL-NEXT:    xscmpudp 0, 1, 0
566; GLOBAL-NEXT:    blt 0, .LBB17_2
567; GLOBAL-NEXT:  # %bb.1:
568; GLOBAL-NEXT:    fmr 3, 2
569; GLOBAL-NEXT:  .LBB17_2:
570; GLOBAL-NEXT:    fmr 1, 3
571; GLOBAL-NEXT:    blr
572  %cmp = fcmp nnan ult double %a, 0.0
573  %z.y = select i1 %cmp, double %z, double %y
574  ret double %z.y
575}
576
577; FP library calls can have fast-math-flags.
578
579; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'log2_approx:'
580; FMFDEBUG:         ch,glue = PPCISD::CALL_NOP {{t[0-9]+}}, TargetGlobalAddress:i64<ptr @log2>
581; FMFDEBUG:         ch,glue = callseq_end [[T15:t[0-9]+]], TargetConstant:i64<32>, TargetConstant:i64<0>, [[T15]]:1
582; FMFDEBUG:         f64,ch,glue = CopyFromReg [[T16:t[0-9]+]], Register:f64 $f1, [[T16]]:1
583; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'log2_approx:'
584
585; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'log2_approx:'
586; GLOBALDEBUG:         ch,glue = PPCISD::CALL_NOP {{t[0-9]+}}, TargetGlobalAddress:i64<ptr @log2>
587; GLOBALDEBUG:         ch,glue = callseq_end [[T15:t[0-9]+]], TargetConstant:i64<32>, TargetConstant:i64<0>, [[T15]]:1
588; GLOBALDEBUG:         f64,ch,glue = CopyFromReg [[T16:t[0-9]+]], Register:f64 $f1, [[T16]]:1
589; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'log2_approx:'
590
591declare double @log2(double)
592define double @log2_approx(double %x) nounwind {
593; FMF-LABEL: log2_approx:
594; FMF:       # %bb.0:
595; FMF-NEXT:    mflr 0
596; FMF-NEXT:    stdu 1, -32(1)
597; FMF-NEXT:    std 0, 48(1)
598; FMF-NEXT:    bl log2
599; FMF-NEXT:    nop
600; FMF-NEXT:    addi 1, 1, 32
601; FMF-NEXT:    ld 0, 16(1)
602; FMF-NEXT:    mtlr 0
603; FMF-NEXT:    blr
604;
605; GLOBAL-LABEL: log2_approx:
606; GLOBAL:       # %bb.0:
607; GLOBAL-NEXT:    mflr 0
608; GLOBAL-NEXT:    stdu 1, -32(1)
609; GLOBAL-NEXT:    std 0, 48(1)
610; GLOBAL-NEXT:    bl log2
611; GLOBAL-NEXT:    nop
612; GLOBAL-NEXT:    addi 1, 1, 32
613; GLOBAL-NEXT:    ld 0, 16(1)
614; GLOBAL-NEXT:    mtlr 0
615; GLOBAL-NEXT:    blr
616  %r = call afn double @log2(double %x)
617  ret double %r
618}
619
620; -(X - Y) --> (Y - X)
621
622; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fneg_fsub_nozeros_1:'
623; FMFDEBUG:         fsub nsz {{t[0-9]+}}, {{t[0-9]+}}
624; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fneg_fsub_nozeros_1:'
625
626; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fneg_fsub_nozeros_1:'
627; GLOBALDEBUG:         fsub nsz {{t[0-9]+}}, {{t[0-9]+}}
628; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'fneg_fsub_nozeros_1:'
629
630define float @fneg_fsub_nozeros_1(float %x, float %y, float %z) {
631; FMF-LABEL: fneg_fsub_nozeros_1:
632; FMF:       # %bb.0:
633; FMF-NEXT:    xssubsp 1, 2, 1
634; FMF-NEXT:    blr
635;
636; GLOBAL-LABEL: fneg_fsub_nozeros_1:
637; GLOBAL:       # %bb.0:
638; GLOBAL-NEXT:    xssubsp 1, 2, 1
639; GLOBAL-NEXT:    blr
640  %neg = fsub float %x, %y
641  %add = fsub nsz float 0.0, %neg
642  ret float %add
643}
644
645attributes #0 = { "denormal-fp-math"="ieee,ieee" }
646attributes #1 = { "denormal-fp-math"="preserve-sign,preserve-sign" }
647;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
648; FMFDEBUG: {{.*}}
649; GLOBALDEBUG: {{.*}}
650