xref: /llvm-project/llvm/test/CodeGen/PowerPC/fp-strict.ll (revision aa91d90cb07d72b32176a966fe798ab71ecb0a76)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64-unknown-linux -mcpu=pwr8 | FileCheck %s
3; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 | FileCheck %s
4; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr8 -mattr=-vsx | FileCheck %s -check-prefix=NOVSX
5; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc-unknown-linux -mattr=spe | FileCheck %s -check-prefix=SPE
6
7declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
8declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)
9declare <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float>, <4 x float>, metadata, metadata)
10declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata)
11
12declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata)
13declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata)
14declare <4 x float> @llvm.experimental.constrained.fsub.v4f32(<4 x float>, <4 x float>, metadata, metadata)
15declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata)
16
17declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata)
18declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
19declare <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float>, <4 x float>, metadata, metadata)
20declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata)
21
22declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata)
23declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
24declare <4 x float> @llvm.experimental.constrained.fdiv.v4f32(<4 x float>, <4 x float>, metadata, metadata)
25declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata)
26
27declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
28declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
29declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata)
30declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata)
31
32declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata)
33declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata)
34declare <4 x float> @llvm.experimental.constrained.sqrt.v4f32(<4 x float>, metadata, metadata)
35declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata)
36
37define float @fadd_f32(float %f1, float %f2) #0 {
38; CHECK-LABEL: fadd_f32:
39; CHECK:       # %bb.0:
40; CHECK-NEXT:    xsaddsp f1, f1, f2
41; CHECK-NEXT:    blr
42;
43; NOVSX-LABEL: fadd_f32:
44; NOVSX:       # %bb.0:
45; NOVSX-NEXT:    fadds f1, f1, f2
46; NOVSX-NEXT:    blr
47;
48; SPE-LABEL: fadd_f32:
49; SPE:       # %bb.0:
50; SPE-NEXT:    efsadd r3, r3, r4
51; SPE-NEXT:    blr
52  %res = call float @llvm.experimental.constrained.fadd.f32(
53                        float %f1, float %f2,
54                        metadata !"round.dynamic",
55                        metadata !"fpexcept.strict") #0
56  ret float %res
57}
58
59define double @fadd_f64(double %f1, double %f2) #0 {
60; CHECK-LABEL: fadd_f64:
61; CHECK:       # %bb.0:
62; CHECK-NEXT:    xsadddp f1, f1, f2
63; CHECK-NEXT:    blr
64;
65; NOVSX-LABEL: fadd_f64:
66; NOVSX:       # %bb.0:
67; NOVSX-NEXT:    fadd f1, f1, f2
68; NOVSX-NEXT:    blr
69;
70; SPE-LABEL: fadd_f64:
71; SPE:       # %bb.0:
72; SPE-NEXT:    evmergelo r5, r5, r6
73; SPE-NEXT:    evmergelo r3, r3, r4
74; SPE-NEXT:    efdadd r4, r3, r5
75; SPE-NEXT:    evmergehi r3, r4, r4
76; SPE-NEXT:    blr
77  %res = call double @llvm.experimental.constrained.fadd.f64(
78                        double %f1, double %f2,
79                        metadata !"round.dynamic",
80                        metadata !"fpexcept.strict") #0
81  ret double %res
82}
83
84define <4 x float> @fadd_v4f32(<4 x float> %vf1, <4 x float> %vf2) #0 {
85; CHECK-LABEL: fadd_v4f32:
86; CHECK:       # %bb.0:
87; CHECK-NEXT:    xvaddsp v2, v2, v3
88; CHECK-NEXT:    blr
89;
90; NOVSX-LABEL: fadd_v4f32:
91; NOVSX:       # %bb.0:
92; NOVSX-NEXT:    addi r3, r1, -32
93; NOVSX-NEXT:    stvx v3, 0, r3
94; NOVSX-NEXT:    addi r3, r1, -48
95; NOVSX-NEXT:    stvx v2, 0, r3
96; NOVSX-NEXT:    addi r3, r1, -16
97; NOVSX-NEXT:    lfs f0, -20(r1)
98; NOVSX-NEXT:    lfs f1, -36(r1)
99; NOVSX-NEXT:    fadds f0, f1, f0
100; NOVSX-NEXT:    lfs f1, -40(r1)
101; NOVSX-NEXT:    stfs f0, -4(r1)
102; NOVSX-NEXT:    lfs f0, -24(r1)
103; NOVSX-NEXT:    fadds f0, f1, f0
104; NOVSX-NEXT:    lfs f1, -44(r1)
105; NOVSX-NEXT:    stfs f0, -8(r1)
106; NOVSX-NEXT:    lfs f0, -28(r1)
107; NOVSX-NEXT:    fadds f0, f1, f0
108; NOVSX-NEXT:    lfs f1, -48(r1)
109; NOVSX-NEXT:    stfs f0, -12(r1)
110; NOVSX-NEXT:    lfs f0, -32(r1)
111; NOVSX-NEXT:    fadds f0, f1, f0
112; NOVSX-NEXT:    stfs f0, -16(r1)
113; NOVSX-NEXT:    lvx v2, 0, r3
114; NOVSX-NEXT:    blr
115;
116; SPE-LABEL: fadd_v4f32:
117; SPE:       # %bb.0:
118; SPE-NEXT:    efsadd r6, r6, r10
119; SPE-NEXT:    efsadd r5, r5, r9
120; SPE-NEXT:    efsadd r4, r4, r8
121; SPE-NEXT:    efsadd r3, r3, r7
122; SPE-NEXT:    blr
123  %res = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(
124                        <4 x float> %vf1, <4 x float> %vf2,
125                        metadata !"round.dynamic",
126                        metadata !"fpexcept.strict") #0
127  ret <4 x float> %res
128}
129
130define <2 x double> @fadd_v2f64(<2 x double> %vf1, <2 x double> %vf2) #0 {
131; CHECK-LABEL: fadd_v2f64:
132; CHECK:       # %bb.0:
133; CHECK-NEXT:    xvadddp v2, v2, v3
134; CHECK-NEXT:    blr
135;
136; NOVSX-LABEL: fadd_v2f64:
137; NOVSX:       # %bb.0:
138; NOVSX-NEXT:    fadd f2, f2, f4
139; NOVSX-NEXT:    fadd f1, f1, f3
140; NOVSX-NEXT:    blr
141;
142; SPE-LABEL: fadd_v2f64:
143; SPE:       # %bb.0:
144; SPE-NEXT:    evldd r4, 8(r1)
145; SPE-NEXT:    evmergelo r7, r7, r8
146; SPE-NEXT:    evmergelo r8, r9, r10
147; SPE-NEXT:    li r9, 8
148; SPE-NEXT:    evmergelo r5, r5, r6
149; SPE-NEXT:    efdadd r4, r7, r4
150; SPE-NEXT:    evstddx r4, r3, r9
151; SPE-NEXT:    efdadd r4, r5, r8
152; SPE-NEXT:    evstdd r4, 0(r3)
153; SPE-NEXT:    blr
154  %res = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(
155                        <2 x double> %vf1, <2 x double> %vf2,
156                        metadata !"round.dynamic",
157                        metadata !"fpexcept.strict") #0
158  ret <2 x double> %res
159}
160
161define float @fsub_f32(float %f1, float %f2) #0 {
162; CHECK-LABEL: fsub_f32:
163; CHECK:       # %bb.0:
164; CHECK-NEXT:    xssubsp f1, f1, f2
165; CHECK-NEXT:    blr
166;
167; NOVSX-LABEL: fsub_f32:
168; NOVSX:       # %bb.0:
169; NOVSX-NEXT:    fsubs f1, f1, f2
170; NOVSX-NEXT:    blr
171;
172; SPE-LABEL: fsub_f32:
173; SPE:       # %bb.0:
174; SPE-NEXT:    efssub r3, r3, r4
175; SPE-NEXT:    blr
176
177  %res = call float @llvm.experimental.constrained.fsub.f32(
178                        float %f1, float %f2,
179                        metadata !"round.dynamic",
180                        metadata !"fpexcept.strict") #0
181  ret float %res;
182}
183
184define double @fsub_f64(double %f1, double %f2) #0 {
185; CHECK-LABEL: fsub_f64:
186; CHECK:       # %bb.0:
187; CHECK-NEXT:    xssubdp f1, f1, f2
188; CHECK-NEXT:    blr
189;
190; NOVSX-LABEL: fsub_f64:
191; NOVSX:       # %bb.0:
192; NOVSX-NEXT:    fsub f1, f1, f2
193; NOVSX-NEXT:    blr
194;
195; SPE-LABEL: fsub_f64:
196; SPE:       # %bb.0:
197; SPE-NEXT:    evmergelo r5, r5, r6
198; SPE-NEXT:    evmergelo r3, r3, r4
199; SPE-NEXT:    efdsub r4, r3, r5
200; SPE-NEXT:    evmergehi r3, r4, r4
201; SPE-NEXT:    blr
202
203  %res = call double @llvm.experimental.constrained.fsub.f64(
204                        double %f1, double %f2,
205                        metadata !"round.dynamic",
206                        metadata !"fpexcept.strict") #0
207  ret double %res;
208}
209
210define <4 x float> @fsub_v4f32(<4 x float> %vf1, <4 x float> %vf2) #0 {
211; CHECK-LABEL: fsub_v4f32:
212; CHECK:       # %bb.0:
213; CHECK-NEXT:    xvsubsp v2, v2, v3
214; CHECK-NEXT:    blr
215;
216; NOVSX-LABEL: fsub_v4f32:
217; NOVSX:       # %bb.0:
218; NOVSX-NEXT:    addi r3, r1, -32
219; NOVSX-NEXT:    stvx v3, 0, r3
220; NOVSX-NEXT:    addi r3, r1, -48
221; NOVSX-NEXT:    stvx v2, 0, r3
222; NOVSX-NEXT:    addi r3, r1, -16
223; NOVSX-NEXT:    lfs f0, -20(r1)
224; NOVSX-NEXT:    lfs f1, -36(r1)
225; NOVSX-NEXT:    fsubs f0, f1, f0
226; NOVSX-NEXT:    lfs f1, -40(r1)
227; NOVSX-NEXT:    stfs f0, -4(r1)
228; NOVSX-NEXT:    lfs f0, -24(r1)
229; NOVSX-NEXT:    fsubs f0, f1, f0
230; NOVSX-NEXT:    lfs f1, -44(r1)
231; NOVSX-NEXT:    stfs f0, -8(r1)
232; NOVSX-NEXT:    lfs f0, -28(r1)
233; NOVSX-NEXT:    fsubs f0, f1, f0
234; NOVSX-NEXT:    lfs f1, -48(r1)
235; NOVSX-NEXT:    stfs f0, -12(r1)
236; NOVSX-NEXT:    lfs f0, -32(r1)
237; NOVSX-NEXT:    fsubs f0, f1, f0
238; NOVSX-NEXT:    stfs f0, -16(r1)
239; NOVSX-NEXT:    lvx v2, 0, r3
240; NOVSX-NEXT:    blr
241;
242; SPE-LABEL: fsub_v4f32:
243; SPE:       # %bb.0:
244; SPE-NEXT:    efssub r6, r6, r10
245; SPE-NEXT:    efssub r5, r5, r9
246; SPE-NEXT:    efssub r4, r4, r8
247; SPE-NEXT:    efssub r3, r3, r7
248; SPE-NEXT:    blr
249  %res = call <4 x float> @llvm.experimental.constrained.fsub.v4f32(
250                        <4 x float> %vf1, <4 x float> %vf2,
251                        metadata !"round.dynamic",
252                        metadata !"fpexcept.strict") #0
253  ret <4 x float> %res;
254}
255
256define <2 x double> @fsub_v2f64(<2 x double> %vf1, <2 x double> %vf2) #0 {
257; CHECK-LABEL: fsub_v2f64:
258; CHECK:       # %bb.0:
259; CHECK-NEXT:    xvsubdp v2, v2, v3
260; CHECK-NEXT:    blr
261;
262; NOVSX-LABEL: fsub_v2f64:
263; NOVSX:       # %bb.0:
264; NOVSX-NEXT:    fsub f2, f2, f4
265; NOVSX-NEXT:    fsub f1, f1, f3
266; NOVSX-NEXT:    blr
267;
268; SPE-LABEL: fsub_v2f64:
269; SPE:       # %bb.0:
270; SPE-NEXT:    evldd r4, 8(r1)
271; SPE-NEXT:    evmergelo r7, r7, r8
272; SPE-NEXT:    evmergelo r8, r9, r10
273; SPE-NEXT:    li r9, 8
274; SPE-NEXT:    evmergelo r5, r5, r6
275; SPE-NEXT:    efdsub r4, r7, r4
276; SPE-NEXT:    evstddx r4, r3, r9
277; SPE-NEXT:    efdsub r4, r5, r8
278; SPE-NEXT:    evstdd r4, 0(r3)
279; SPE-NEXT:    blr
280  %res = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(
281                        <2 x double> %vf1, <2 x double> %vf2,
282                        metadata !"round.dynamic",
283                        metadata !"fpexcept.strict") #0
284  ret <2 x double> %res;
285}
286
287define float @fmul_f32(float %f1, float %f2) #0 {
288; CHECK-LABEL: fmul_f32:
289; CHECK:       # %bb.0:
290; CHECK-NEXT:    xsmulsp f1, f1, f2
291; CHECK-NEXT:    blr
292;
293; NOVSX-LABEL: fmul_f32:
294; NOVSX:       # %bb.0:
295; NOVSX-NEXT:    fmuls f1, f1, f2
296; NOVSX-NEXT:    blr
297;
298; SPE-LABEL: fmul_f32:
299; SPE:       # %bb.0:
300; SPE-NEXT:    efsmul r3, r3, r4
301; SPE-NEXT:    blr
302
303  %res = call float @llvm.experimental.constrained.fmul.f32(
304                        float %f1, float %f2,
305                        metadata !"round.dynamic",
306                        metadata !"fpexcept.strict") #0
307  ret float %res;
308}
309
310define double @fmul_f64(double %f1, double %f2) #0 {
311; CHECK-LABEL: fmul_f64:
312; CHECK:       # %bb.0:
313; CHECK-NEXT:    xsmuldp f1, f1, f2
314; CHECK-NEXT:    blr
315;
316; NOVSX-LABEL: fmul_f64:
317; NOVSX:       # %bb.0:
318; NOVSX-NEXT:    fmul f1, f1, f2
319; NOVSX-NEXT:    blr
320;
321; SPE-LABEL: fmul_f64:
322; SPE:       # %bb.0:
323; SPE-NEXT:    evmergelo r5, r5, r6
324; SPE-NEXT:    evmergelo r3, r3, r4
325; SPE-NEXT:    efdmul r4, r3, r5
326; SPE-NEXT:    evmergehi r3, r4, r4
327; SPE-NEXT:    blr
328
329  %res = call double @llvm.experimental.constrained.fmul.f64(
330                        double %f1, double %f2,
331                        metadata !"round.dynamic",
332                        metadata !"fpexcept.strict") #0
333  ret double %res;
334}
335
336define <4 x float> @fmul_v4f32(<4 x float> %vf1, <4 x float> %vf2) #0 {
337; CHECK-LABEL: fmul_v4f32:
338; CHECK:       # %bb.0:
339; CHECK-NEXT:    xvmulsp v2, v2, v3
340; CHECK-NEXT:    blr
341;
342; NOVSX-LABEL: fmul_v4f32:
343; NOVSX:       # %bb.0:
344; NOVSX-NEXT:    addi r3, r1, -32
345; NOVSX-NEXT:    stvx v3, 0, r3
346; NOVSX-NEXT:    addi r3, r1, -48
347; NOVSX-NEXT:    stvx v2, 0, r3
348; NOVSX-NEXT:    addi r3, r1, -16
349; NOVSX-NEXT:    lfs f0, -20(r1)
350; NOVSX-NEXT:    lfs f1, -36(r1)
351; NOVSX-NEXT:    fmuls f0, f1, f0
352; NOVSX-NEXT:    lfs f1, -40(r1)
353; NOVSX-NEXT:    stfs f0, -4(r1)
354; NOVSX-NEXT:    lfs f0, -24(r1)
355; NOVSX-NEXT:    fmuls f0, f1, f0
356; NOVSX-NEXT:    lfs f1, -44(r1)
357; NOVSX-NEXT:    stfs f0, -8(r1)
358; NOVSX-NEXT:    lfs f0, -28(r1)
359; NOVSX-NEXT:    fmuls f0, f1, f0
360; NOVSX-NEXT:    lfs f1, -48(r1)
361; NOVSX-NEXT:    stfs f0, -12(r1)
362; NOVSX-NEXT:    lfs f0, -32(r1)
363; NOVSX-NEXT:    fmuls f0, f1, f0
364; NOVSX-NEXT:    stfs f0, -16(r1)
365; NOVSX-NEXT:    lvx v2, 0, r3
366; NOVSX-NEXT:    blr
367;
368; SPE-LABEL: fmul_v4f32:
369; SPE:       # %bb.0:
370; SPE-NEXT:    efsmul r6, r6, r10
371; SPE-NEXT:    efsmul r5, r5, r9
372; SPE-NEXT:    efsmul r4, r4, r8
373; SPE-NEXT:    efsmul r3, r3, r7
374; SPE-NEXT:    blr
375  %res = call <4 x float> @llvm.experimental.constrained.fmul.v4f32(
376                        <4 x float> %vf1, <4 x float> %vf2,
377                        metadata !"round.dynamic",
378                        metadata !"fpexcept.strict") #0
379  ret <4 x float> %res;
380}
381
382define <2 x double> @fmul_v2f64(<2 x double> %vf1, <2 x double> %vf2) #0 {
383; CHECK-LABEL: fmul_v2f64:
384; CHECK:       # %bb.0:
385; CHECK-NEXT:    xvmuldp v2, v2, v3
386; CHECK-NEXT:    blr
387;
388; NOVSX-LABEL: fmul_v2f64:
389; NOVSX:       # %bb.0:
390; NOVSX-NEXT:    fmul f2, f2, f4
391; NOVSX-NEXT:    fmul f1, f1, f3
392; NOVSX-NEXT:    blr
393;
394; SPE-LABEL: fmul_v2f64:
395; SPE:       # %bb.0:
396; SPE-NEXT:    evldd r4, 8(r1)
397; SPE-NEXT:    evmergelo r7, r7, r8
398; SPE-NEXT:    evmergelo r8, r9, r10
399; SPE-NEXT:    li r9, 8
400; SPE-NEXT:    evmergelo r5, r5, r6
401; SPE-NEXT:    efdmul r4, r7, r4
402; SPE-NEXT:    evstddx r4, r3, r9
403; SPE-NEXT:    efdmul r4, r5, r8
404; SPE-NEXT:    evstdd r4, 0(r3)
405; SPE-NEXT:    blr
406  %res = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(
407                        <2 x double> %vf1, <2 x double> %vf2,
408                        metadata !"round.dynamic",
409                        metadata !"fpexcept.strict") #0
410  ret <2 x double> %res;
411}
412
413define float @fdiv_f32(float %f1, float %f2) #0 {
414; CHECK-LABEL: fdiv_f32:
415; CHECK:       # %bb.0:
416; CHECK-NEXT:    xsdivsp f1, f1, f2
417; CHECK-NEXT:    blr
418;
419; NOVSX-LABEL: fdiv_f32:
420; NOVSX:       # %bb.0:
421; NOVSX-NEXT:    fdivs f1, f1, f2
422; NOVSX-NEXT:    blr
423;
424; SPE-LABEL: fdiv_f32:
425; SPE:       # %bb.0:
426; SPE-NEXT:    efsdiv r3, r3, r4
427; SPE-NEXT:    blr
428
429  %res = call float @llvm.experimental.constrained.fdiv.f32(
430                        float %f1, float %f2,
431                        metadata !"round.dynamic",
432                        metadata !"fpexcept.strict") #0
433  ret float %res;
434}
435
436define double @fdiv_f64(double %f1, double %f2) #0 {
437; CHECK-LABEL: fdiv_f64:
438; CHECK:       # %bb.0:
439; CHECK-NEXT:    xsdivdp f1, f1, f2
440; CHECK-NEXT:    blr
441;
442; NOVSX-LABEL: fdiv_f64:
443; NOVSX:       # %bb.0:
444; NOVSX-NEXT:    fdiv f1, f1, f2
445; NOVSX-NEXT:    blr
446;
447; SPE-LABEL: fdiv_f64:
448; SPE:       # %bb.0:
449; SPE-NEXT:    evmergelo r5, r5, r6
450; SPE-NEXT:    evmergelo r3, r3, r4
451; SPE-NEXT:    efddiv r4, r3, r5
452; SPE-NEXT:    evmergehi r3, r4, r4
453; SPE-NEXT:    blr
454
455  %res = call double @llvm.experimental.constrained.fdiv.f64(
456                        double %f1, double %f2,
457                        metadata !"round.dynamic",
458                        metadata !"fpexcept.strict") #0
459  ret double %res;
460}
461
462define <4 x float> @fdiv_v4f32(<4 x float> %vf1, <4 x float> %vf2) #0 {
463; CHECK-LABEL: fdiv_v4f32:
464; CHECK:       # %bb.0:
465; CHECK-NEXT:    xvdivsp v2, v2, v3
466; CHECK-NEXT:    blr
467;
468; NOVSX-LABEL: fdiv_v4f32:
469; NOVSX:       # %bb.0:
470; NOVSX-NEXT:    addi r3, r1, -32
471; NOVSX-NEXT:    stvx v3, 0, r3
472; NOVSX-NEXT:    addi r3, r1, -48
473; NOVSX-NEXT:    stvx v2, 0, r3
474; NOVSX-NEXT:    addi r3, r1, -16
475; NOVSX-NEXT:    lfs f0, -20(r1)
476; NOVSX-NEXT:    lfs f1, -36(r1)
477; NOVSX-NEXT:    fdivs f0, f1, f0
478; NOVSX-NEXT:    lfs f1, -40(r1)
479; NOVSX-NEXT:    stfs f0, -4(r1)
480; NOVSX-NEXT:    lfs f0, -24(r1)
481; NOVSX-NEXT:    fdivs f0, f1, f0
482; NOVSX-NEXT:    lfs f1, -44(r1)
483; NOVSX-NEXT:    stfs f0, -8(r1)
484; NOVSX-NEXT:    lfs f0, -28(r1)
485; NOVSX-NEXT:    fdivs f0, f1, f0
486; NOVSX-NEXT:    lfs f1, -48(r1)
487; NOVSX-NEXT:    stfs f0, -12(r1)
488; NOVSX-NEXT:    lfs f0, -32(r1)
489; NOVSX-NEXT:    fdivs f0, f1, f0
490; NOVSX-NEXT:    stfs f0, -16(r1)
491; NOVSX-NEXT:    lvx v2, 0, r3
492; NOVSX-NEXT:    blr
493;
494; SPE-LABEL: fdiv_v4f32:
495; SPE:       # %bb.0:
496; SPE-NEXT:    efsdiv r6, r6, r10
497; SPE-NEXT:    efsdiv r5, r5, r9
498; SPE-NEXT:    efsdiv r4, r4, r8
499; SPE-NEXT:    efsdiv r3, r3, r7
500; SPE-NEXT:    blr
501  %res = call <4 x float> @llvm.experimental.constrained.fdiv.v4f32(
502                        <4 x float> %vf1, <4 x float> %vf2,
503                        metadata !"round.dynamic",
504                        metadata !"fpexcept.strict") #0
505  ret <4 x float> %res
506}
507
508define <2 x double> @fdiv_v2f64(<2 x double> %vf1, <2 x double> %vf2) #0 {
509; CHECK-LABEL: fdiv_v2f64:
510; CHECK:       # %bb.0:
511; CHECK-NEXT:    xvdivdp v2, v2, v3
512; CHECK-NEXT:    blr
513;
514; NOVSX-LABEL: fdiv_v2f64:
515; NOVSX:       # %bb.0:
516; NOVSX-NEXT:    fdiv f2, f2, f4
517; NOVSX-NEXT:    fdiv f1, f1, f3
518; NOVSX-NEXT:    blr
519;
520; SPE-LABEL: fdiv_v2f64:
521; SPE:       # %bb.0:
522; SPE-NEXT:    evldd r4, 8(r1)
523; SPE-NEXT:    evmergelo r7, r7, r8
524; SPE-NEXT:    evmergelo r8, r9, r10
525; SPE-NEXT:    evmergelo r5, r5, r6
526; SPE-NEXT:    efddiv r4, r7, r4
527; SPE-NEXT:    li r7, 8
528; SPE-NEXT:    evstddx r4, r3, r7
529; SPE-NEXT:    efddiv r4, r5, r8
530; SPE-NEXT:    evstdd r4, 0(r3)
531; SPE-NEXT:    blr
532  %res = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64(
533                        <2 x double> %vf1, <2 x double> %vf2,
534                        metadata !"round.dynamic",
535                        metadata !"fpexcept.strict") #0
536  ret <2 x double> %res
537}
538
539define double @no_fma_fold(double %f1, double %f2, double %f3) #0 {
540; CHECK-LABEL: no_fma_fold:
541; CHECK:       # %bb.0:
542; CHECK-NEXT:    xsmuldp f0, f1, f2
543; CHECK-NEXT:    xsadddp f1, f0, f3
544; CHECK-NEXT:    blr
545;
546; NOVSX-LABEL: no_fma_fold:
547; NOVSX:       # %bb.0:
548; NOVSX-NEXT:    fmul f0, f1, f2
549; NOVSX-NEXT:    fadd f1, f0, f3
550; NOVSX-NEXT:    blr
551;
552; SPE-LABEL: no_fma_fold:
553; SPE:       # %bb.0:
554; SPE-NEXT:    evmergelo r7, r7, r8
555; SPE-NEXT:    evmergelo r5, r5, r6
556; SPE-NEXT:    evmergelo r3, r3, r4
557; SPE-NEXT:    efdmul r3, r3, r5
558; SPE-NEXT:    efdadd r4, r3, r7
559; SPE-NEXT:    evmergehi r3, r4, r4
560; SPE-NEXT:    blr
561  %mul = call double @llvm.experimental.constrained.fmul.f64(
562                        double %f1, double %f2,
563                        metadata !"round.dynamic",
564                        metadata !"fpexcept.strict") #0
565  %add = call double @llvm.experimental.constrained.fadd.f64(
566                        double %mul, double %f3,
567                        metadata !"round.dynamic",
568                        metadata !"fpexcept.strict") #0
569  ret double %add
570}
571
572define float @fmadd_f32(float %f0, float %f1, float %f2) #0 {
573; CHECK-LABEL: fmadd_f32:
574; CHECK:       # %bb.0:
575; CHECK-NEXT:    xsmaddasp f3, f1, f2
576; CHECK-NEXT:    fmr f1, f3
577; CHECK-NEXT:    blr
578;
579; NOVSX-LABEL: fmadd_f32:
580; NOVSX:       # %bb.0:
581; NOVSX-NEXT:    fmadds f1, f1, f2, f3
582; NOVSX-NEXT:    blr
583;
584; SPE-LABEL: fmadd_f32:
585; SPE:       # %bb.0:
586; SPE-NEXT:    mflr r0
587; SPE-NEXT:    stwu r1, -16(r1)
588; SPE-NEXT:    stw r0, 20(r1)
589; SPE-NEXT:    .cfi_def_cfa_offset 16
590; SPE-NEXT:    .cfi_offset lr, 4
591; SPE-NEXT:    bl fmaf
592; SPE-NEXT:    lwz r0, 20(r1)
593; SPE-NEXT:    addi r1, r1, 16
594; SPE-NEXT:    mtlr r0
595; SPE-NEXT:    blr
596  %res = call float @llvm.experimental.constrained.fma.f32(
597                        float %f0, float %f1, float %f2,
598                        metadata !"round.dynamic",
599                        metadata !"fpexcept.strict") #0
600  ret float %res
601}
602
603define double @fmadd_f64(double %f0, double %f1, double %f2) #0 {
604; CHECK-LABEL: fmadd_f64:
605; CHECK:       # %bb.0:
606; CHECK-NEXT:    xsmaddadp f3, f1, f2
607; CHECK-NEXT:    fmr f1, f3
608; CHECK-NEXT:    blr
609;
610; NOVSX-LABEL: fmadd_f64:
611; NOVSX:       # %bb.0:
612; NOVSX-NEXT:    fmadd f1, f1, f2, f3
613; NOVSX-NEXT:    blr
614;
615; SPE-LABEL: fmadd_f64:
616; SPE:       # %bb.0:
617; SPE-NEXT:    mflr r0
618; SPE-NEXT:    stwu r1, -16(r1)
619; SPE-NEXT:    stw r0, 20(r1)
620; SPE-NEXT:    .cfi_def_cfa_offset 16
621; SPE-NEXT:    .cfi_offset lr, 4
622; SPE-NEXT:    evmergelo r8, r7, r8
623; SPE-NEXT:    evmergelo r6, r5, r6
624; SPE-NEXT:    evmergelo r4, r3, r4
625; SPE-NEXT:    evmergehi r3, r4, r4
626; SPE-NEXT:    evmergehi r5, r6, r6
627; SPE-NEXT:    evmergehi r7, r8, r8
628; SPE-NEXT:    bl fma
629; SPE-NEXT:    evmergelo r4, r3, r4
630; SPE-NEXT:    evmergehi r3, r4, r4
631; SPE-NEXT:    lwz r0, 20(r1)
632; SPE-NEXT:    addi r1, r1, 16
633; SPE-NEXT:    mtlr r0
634; SPE-NEXT:    blr
635  %res = call double @llvm.experimental.constrained.fma.f64(
636                        double %f0, double %f1, double %f2,
637                        metadata !"round.dynamic",
638                        metadata !"fpexcept.strict") #0
639  ret double %res
640}
641
642define <4 x float> @fmadd_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) #0 {
643; CHECK-LABEL: fmadd_v4f32:
644; CHECK:       # %bb.0:
645; CHECK-NEXT:    xvmaddasp v4, v2, v3
646; CHECK-NEXT:    vmr v2, v4
647; CHECK-NEXT:    blr
648;
649; NOVSX-LABEL: fmadd_v4f32:
650; NOVSX:       # %bb.0:
651; NOVSX-NEXT:    addi r3, r1, -32
652; NOVSX-NEXT:    stvx v4, 0, r3
653; NOVSX-NEXT:    addi r3, r1, -48
654; NOVSX-NEXT:    stvx v3, 0, r3
655; NOVSX-NEXT:    addi r3, r1, -64
656; NOVSX-NEXT:    stvx v2, 0, r3
657; NOVSX-NEXT:    addi r3, r1, -16
658; NOVSX-NEXT:    lfs f0, -20(r1)
659; NOVSX-NEXT:    lfs f1, -36(r1)
660; NOVSX-NEXT:    lfs f2, -52(r1)
661; NOVSX-NEXT:    fmadds f0, f2, f1, f0
662; NOVSX-NEXT:    lfs f1, -40(r1)
663; NOVSX-NEXT:    lfs f2, -56(r1)
664; NOVSX-NEXT:    stfs f0, -4(r1)
665; NOVSX-NEXT:    lfs f0, -24(r1)
666; NOVSX-NEXT:    fmadds f0, f2, f1, f0
667; NOVSX-NEXT:    lfs f1, -44(r1)
668; NOVSX-NEXT:    lfs f2, -60(r1)
669; NOVSX-NEXT:    stfs f0, -8(r1)
670; NOVSX-NEXT:    lfs f0, -28(r1)
671; NOVSX-NEXT:    fmadds f0, f2, f1, f0
672; NOVSX-NEXT:    lfs f1, -48(r1)
673; NOVSX-NEXT:    lfs f2, -64(r1)
674; NOVSX-NEXT:    stfs f0, -12(r1)
675; NOVSX-NEXT:    lfs f0, -32(r1)
676; NOVSX-NEXT:    fmadds f0, f2, f1, f0
677; NOVSX-NEXT:    stfs f0, -16(r1)
678; NOVSX-NEXT:    lvx v2, 0, r3
679; NOVSX-NEXT:    blr
680;
681; SPE-LABEL: fmadd_v4f32:
682; SPE:       # %bb.0:
683; SPE-NEXT:    mflr r0
684; SPE-NEXT:    stwu r1, -64(r1)
685; SPE-NEXT:    stw r0, 68(r1)
686; SPE-NEXT:    .cfi_def_cfa_offset 64
687; SPE-NEXT:    .cfi_offset lr, 4
688; SPE-NEXT:    .cfi_offset r21, -44
689; SPE-NEXT:    .cfi_offset r22, -40
690; SPE-NEXT:    .cfi_offset r23, -36
691; SPE-NEXT:    .cfi_offset r24, -32
692; SPE-NEXT:    .cfi_offset r25, -28
693; SPE-NEXT:    .cfi_offset r26, -24
694; SPE-NEXT:    .cfi_offset r27, -20
695; SPE-NEXT:    .cfi_offset r28, -16
696; SPE-NEXT:    .cfi_offset r29, -12
697; SPE-NEXT:    .cfi_offset r30, -8
698; SPE-NEXT:    stw r27, 44(r1) # 4-byte Folded Spill
699; SPE-NEXT:    mr r27, r5
700; SPE-NEXT:    lwz r5, 84(r1)
701; SPE-NEXT:    stw r25, 36(r1) # 4-byte Folded Spill
702; SPE-NEXT:    mr r25, r3
703; SPE-NEXT:    stw r26, 40(r1) # 4-byte Folded Spill
704; SPE-NEXT:    mr r26, r4
705; SPE-NEXT:    mr r3, r6
706; SPE-NEXT:    mr r4, r10
707; SPE-NEXT:    stw r21, 20(r1) # 4-byte Folded Spill
708; SPE-NEXT:    stw r22, 24(r1) # 4-byte Folded Spill
709; SPE-NEXT:    stw r23, 28(r1) # 4-byte Folded Spill
710; SPE-NEXT:    stw r24, 32(r1) # 4-byte Folded Spill
711; SPE-NEXT:    stw r28, 48(r1) # 4-byte Folded Spill
712; SPE-NEXT:    mr r28, r7
713; SPE-NEXT:    stw r29, 52(r1) # 4-byte Folded Spill
714; SPE-NEXT:    mr r29, r8
715; SPE-NEXT:    stw r30, 56(r1) # 4-byte Folded Spill
716; SPE-NEXT:    mr r30, r9
717; SPE-NEXT:    lwz r24, 72(r1)
718; SPE-NEXT:    lwz r23, 76(r1)
719; SPE-NEXT:    lwz r22, 80(r1)
720; SPE-NEXT:    bl fmaf
721; SPE-NEXT:    mr r21, r3
722; SPE-NEXT:    mr r3, r27
723; SPE-NEXT:    mr r4, r30
724; SPE-NEXT:    mr r5, r22
725; SPE-NEXT:    bl fmaf
726; SPE-NEXT:    mr r30, r3
727; SPE-NEXT:    mr r3, r26
728; SPE-NEXT:    mr r4, r29
729; SPE-NEXT:    mr r5, r23
730; SPE-NEXT:    bl fmaf
731; SPE-NEXT:    mr r29, r3
732; SPE-NEXT:    mr r3, r25
733; SPE-NEXT:    mr r4, r28
734; SPE-NEXT:    mr r5, r24
735; SPE-NEXT:    bl fmaf
736; SPE-NEXT:    mr r4, r29
737; SPE-NEXT:    mr r5, r30
738; SPE-NEXT:    mr r6, r21
739; SPE-NEXT:    lwz r30, 56(r1) # 4-byte Folded Reload
740; SPE-NEXT:    lwz r29, 52(r1) # 4-byte Folded Reload
741; SPE-NEXT:    lwz r28, 48(r1) # 4-byte Folded Reload
742; SPE-NEXT:    lwz r27, 44(r1) # 4-byte Folded Reload
743; SPE-NEXT:    lwz r26, 40(r1) # 4-byte Folded Reload
744; SPE-NEXT:    lwz r25, 36(r1) # 4-byte Folded Reload
745; SPE-NEXT:    lwz r24, 32(r1) # 4-byte Folded Reload
746; SPE-NEXT:    lwz r23, 28(r1) # 4-byte Folded Reload
747; SPE-NEXT:    lwz r22, 24(r1) # 4-byte Folded Reload
748; SPE-NEXT:    lwz r21, 20(r1) # 4-byte Folded Reload
749; SPE-NEXT:    lwz r0, 68(r1)
750; SPE-NEXT:    addi r1, r1, 64
751; SPE-NEXT:    mtlr r0
752; SPE-NEXT:    blr
753  %res = call <4 x float> @llvm.experimental.constrained.fma.v4f32(
754                        <4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2,
755                        metadata !"round.dynamic",
756                        metadata !"fpexcept.strict") #0
757  ret <4 x float> %res
758}
759
760define <2 x double> @fmadd_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) #0 {
761; CHECK-LABEL: fmadd_v2f64:
762; CHECK:       # %bb.0:
763; CHECK-NEXT:    xvmaddadp v4, v2, v3
764; CHECK-NEXT:    vmr v2, v4
765; CHECK-NEXT:    blr
766;
767; NOVSX-LABEL: fmadd_v2f64:
768; NOVSX:       # %bb.0:
769; NOVSX-NEXT:    fmadd f2, f2, f4, f6
770; NOVSX-NEXT:    fmadd f1, f1, f3, f5
771; NOVSX-NEXT:    blr
772;
773; SPE-LABEL: fmadd_v2f64:
774; SPE:       # %bb.0:
775; SPE-NEXT:    mflr r0
776; SPE-NEXT:    stwu r1, -80(r1)
777; SPE-NEXT:    stw r0, 84(r1)
778; SPE-NEXT:    .cfi_def_cfa_offset 80
779; SPE-NEXT:    .cfi_offset lr, 4
780; SPE-NEXT:    .cfi_offset r26, -64
781; SPE-NEXT:    .cfi_offset r27, -56
782; SPE-NEXT:    .cfi_offset r28, -48
783; SPE-NEXT:    .cfi_offset r29, -40
784; SPE-NEXT:    .cfi_offset r30, -8
785; SPE-NEXT:    evstdd r26, 16(r1) # 8-byte Folded Spill
786; SPE-NEXT:    evstdd r27, 24(r1) # 8-byte Folded Spill
787; SPE-NEXT:    evstdd r28, 32(r1) # 8-byte Folded Spill
788; SPE-NEXT:    evstdd r29, 40(r1) # 8-byte Folded Spill
789; SPE-NEXT:    stw r30, 72(r1) # 4-byte Folded Spill
790; SPE-NEXT:    evmergelo r27, r7, r8
791; SPE-NEXT:    evmergelo r9, r9, r10
792; SPE-NEXT:    evmergelo r4, r5, r6
793; SPE-NEXT:    mr r30, r3
794; SPE-NEXT:    evldd r8, 96(r1)
795; SPE-NEXT:    evmergehi r3, r4, r4
796; SPE-NEXT:    evmergehi r5, r9, r9
797; SPE-NEXT:    mr r6, r9
798; SPE-NEXT:    evldd r29, 104(r1)
799; SPE-NEXT:    evmergehi r7, r8, r8
800; SPE-NEXT:    evldd r28, 88(r1)
801; SPE-NEXT:    bl fma
802; SPE-NEXT:    evmergelo r26, r3, r4
803; SPE-NEXT:    evmergehi r3, r27, r27
804; SPE-NEXT:    evmergehi r5, r28, r28
805; SPE-NEXT:    evmergehi r7, r29, r29
806; SPE-NEXT:    mr r4, r27
807; SPE-NEXT:    mr r6, r28
808; SPE-NEXT:    mr r8, r29
809; SPE-NEXT:    bl fma
810; SPE-NEXT:    li r5, 8
811; SPE-NEXT:    evmergelo r3, r3, r4
812; SPE-NEXT:    evstddx r3, r30, r5
813; SPE-NEXT:    evstdd r26, 0(r30)
814; SPE-NEXT:    lwz r30, 72(r1) # 4-byte Folded Reload
815; SPE-NEXT:    evldd r29, 40(r1) # 8-byte Folded Reload
816; SPE-NEXT:    evldd r28, 32(r1) # 8-byte Folded Reload
817; SPE-NEXT:    evldd r27, 24(r1) # 8-byte Folded Reload
818; SPE-NEXT:    evldd r26, 16(r1) # 8-byte Folded Reload
819; SPE-NEXT:    lwz r0, 84(r1)
820; SPE-NEXT:    addi r1, r1, 80
821; SPE-NEXT:    mtlr r0
822; SPE-NEXT:    blr
823  %res = call <2 x double> @llvm.experimental.constrained.fma.v2f64(
824                        <2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2,
825                        metadata !"round.dynamic",
826                        metadata !"fpexcept.strict") #0
827  ret <2 x double> %res
828}
829
830define float @fmsub_f32(float %f0, float %f1, float %f2) #0 {
831; CHECK-LABEL: fmsub_f32:
832; CHECK:       # %bb.0:
833; CHECK-NEXT:    xsmsubasp f3, f1, f2
834; CHECK-NEXT:    fmr f1, f3
835; CHECK-NEXT:    blr
836;
837; NOVSX-LABEL: fmsub_f32:
838; NOVSX:       # %bb.0:
839; NOVSX-NEXT:    fmsubs f1, f1, f2, f3
840; NOVSX-NEXT:    blr
841;
842; SPE-LABEL: fmsub_f32:
843; SPE:       # %bb.0:
844; SPE-NEXT:    mflr r0
845; SPE-NEXT:    stwu r1, -16(r1)
846; SPE-NEXT:    stw r0, 20(r1)
847; SPE-NEXT:    .cfi_def_cfa_offset 16
848; SPE-NEXT:    .cfi_offset lr, 4
849; SPE-NEXT:    efsneg r5, r5
850; SPE-NEXT:    bl fmaf
851; SPE-NEXT:    lwz r0, 20(r1)
852; SPE-NEXT:    addi r1, r1, 16
853; SPE-NEXT:    mtlr r0
854; SPE-NEXT:    blr
855  %neg = fneg float %f2
856  %res = call float @llvm.experimental.constrained.fma.f32(
857                        float %f0, float %f1, float %neg,
858                        metadata !"round.dynamic",
859                        metadata !"fpexcept.strict") #0
860  ret float %res
861}
862
863define double @fmsub_f64(double %f0, double %f1, double %f2) #0 {
864; CHECK-LABEL: fmsub_f64:
865; CHECK:       # %bb.0:
866; CHECK-NEXT:    xsmsubadp f3, f1, f2
867; CHECK-NEXT:    fmr f1, f3
868; CHECK-NEXT:    blr
869;
870; NOVSX-LABEL: fmsub_f64:
871; NOVSX:       # %bb.0:
872; NOVSX-NEXT:    fmsub f1, f1, f2, f3
873; NOVSX-NEXT:    blr
874;
875; SPE-LABEL: fmsub_f64:
876; SPE:       # %bb.0:
877; SPE-NEXT:    mflr r0
878; SPE-NEXT:    stwu r1, -16(r1)
879; SPE-NEXT:    stw r0, 20(r1)
880; SPE-NEXT:    .cfi_def_cfa_offset 16
881; SPE-NEXT:    .cfi_offset lr, 4
882; SPE-NEXT:    evmergelo r6, r5, r6
883; SPE-NEXT:    evmergelo r4, r3, r4
884; SPE-NEXT:    evmergelo r3, r7, r8
885; SPE-NEXT:    efdneg r8, r3
886; SPE-NEXT:    evmergehi r3, r4, r4
887; SPE-NEXT:    evmergehi r5, r6, r6
888; SPE-NEXT:    evmergehi r7, r8, r8
889; SPE-NEXT:    bl fma
890; SPE-NEXT:    evmergelo r4, r3, r4
891; SPE-NEXT:    evmergehi r3, r4, r4
892; SPE-NEXT:    lwz r0, 20(r1)
893; SPE-NEXT:    addi r1, r1, 16
894; SPE-NEXT:    mtlr r0
895; SPE-NEXT:    blr
896  %neg = fneg double %f2
897  %res = call double @llvm.experimental.constrained.fma.f64(
898                        double %f0, double %f1, double %neg,
899                        metadata !"round.dynamic",
900                        metadata !"fpexcept.strict") #0
901  ret double %res
902}
903
904define <4 x float> @fmsub_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) #0 {
905; CHECK-LABEL: fmsub_v4f32:
906; CHECK:       # %bb.0:
907; CHECK-NEXT:    xvmsubasp v4, v2, v3
908; CHECK-NEXT:    vmr v2, v4
909; CHECK-NEXT:    blr
910;
911; NOVSX-LABEL: fmsub_v4f32:
912; NOVSX:       # %bb.0:
913; NOVSX-NEXT:    vspltisb v5, -1
914; NOVSX-NEXT:    addi r3, r1, -48
915; NOVSX-NEXT:    vslw v5, v5, v5
916; NOVSX-NEXT:    stvx v3, 0, r3
917; NOVSX-NEXT:    addi r3, r1, -64
918; NOVSX-NEXT:    vxor v4, v4, v5
919; NOVSX-NEXT:    stvx v2, 0, r3
920; NOVSX-NEXT:    addi r3, r1, -32
921; NOVSX-NEXT:    stvx v4, 0, r3
922; NOVSX-NEXT:    addi r3, r1, -16
923; NOVSX-NEXT:    lfs f0, -36(r1)
924; NOVSX-NEXT:    lfs f1, -52(r1)
925; NOVSX-NEXT:    lfs f2, -20(r1)
926; NOVSX-NEXT:    fmadds f0, f1, f0, f2
927; NOVSX-NEXT:    lfs f1, -56(r1)
928; NOVSX-NEXT:    lfs f2, -24(r1)
929; NOVSX-NEXT:    stfs f0, -4(r1)
930; NOVSX-NEXT:    lfs f0, -40(r1)
931; NOVSX-NEXT:    fmadds f0, f1, f0, f2
932; NOVSX-NEXT:    lfs f1, -60(r1)
933; NOVSX-NEXT:    lfs f2, -28(r1)
934; NOVSX-NEXT:    stfs f0, -8(r1)
935; NOVSX-NEXT:    lfs f0, -44(r1)
936; NOVSX-NEXT:    fmadds f0, f1, f0, f2
937; NOVSX-NEXT:    lfs f1, -64(r1)
938; NOVSX-NEXT:    lfs f2, -32(r1)
939; NOVSX-NEXT:    stfs f0, -12(r1)
940; NOVSX-NEXT:    lfs f0, -48(r1)
941; NOVSX-NEXT:    fmadds f0, f1, f0, f2
942; NOVSX-NEXT:    stfs f0, -16(r1)
943; NOVSX-NEXT:    lvx v2, 0, r3
944; NOVSX-NEXT:    blr
945;
946; SPE-LABEL: fmsub_v4f32:
947; SPE:       # %bb.0:
948; SPE-NEXT:    mflr r0
949; SPE-NEXT:    stwu r1, -64(r1)
950; SPE-NEXT:    stw r0, 68(r1)
951; SPE-NEXT:    .cfi_def_cfa_offset 64
952; SPE-NEXT:    .cfi_offset lr, 4
953; SPE-NEXT:    .cfi_offset r21, -44
954; SPE-NEXT:    .cfi_offset r22, -40
955; SPE-NEXT:    .cfi_offset r23, -36
956; SPE-NEXT:    .cfi_offset r24, -32
957; SPE-NEXT:    .cfi_offset r25, -28
958; SPE-NEXT:    .cfi_offset r26, -24
959; SPE-NEXT:    .cfi_offset r27, -20
960; SPE-NEXT:    .cfi_offset r28, -16
961; SPE-NEXT:    .cfi_offset r29, -12
962; SPE-NEXT:    .cfi_offset r30, -8
963; SPE-NEXT:    stw r25, 36(r1) # 4-byte Folded Spill
964; SPE-NEXT:    mr r25, r3
965; SPE-NEXT:    stw r26, 40(r1) # 4-byte Folded Spill
966; SPE-NEXT:    mr r26, r4
967; SPE-NEXT:    stw r27, 44(r1) # 4-byte Folded Spill
968; SPE-NEXT:    mr r27, r5
969; SPE-NEXT:    stw r28, 48(r1) # 4-byte Folded Spill
970; SPE-NEXT:    mr r28, r7
971; SPE-NEXT:    lwz r3, 80(r1)
972; SPE-NEXT:    lwz r4, 72(r1)
973; SPE-NEXT:    lwz r5, 76(r1)
974; SPE-NEXT:    lwz r7, 84(r1)
975; SPE-NEXT:    stw r22, 24(r1) # 4-byte Folded Spill
976; SPE-NEXT:    efsneg r22, r3
977; SPE-NEXT:    stw r23, 28(r1) # 4-byte Folded Spill
978; SPE-NEXT:    efsneg r23, r5
979; SPE-NEXT:    stw r24, 32(r1) # 4-byte Folded Spill
980; SPE-NEXT:    efsneg r24, r4
981; SPE-NEXT:    efsneg r5, r7
982; SPE-NEXT:    mr r3, r6
983; SPE-NEXT:    mr r4, r10
984; SPE-NEXT:    stw r21, 20(r1) # 4-byte Folded Spill
985; SPE-NEXT:    stw r29, 52(r1) # 4-byte Folded Spill
986; SPE-NEXT:    mr r29, r8
987; SPE-NEXT:    stw r30, 56(r1) # 4-byte Folded Spill
988; SPE-NEXT:    mr r30, r9
989; SPE-NEXT:    bl fmaf
990; SPE-NEXT:    mr r21, r3
991; SPE-NEXT:    mr r3, r27
992; SPE-NEXT:    mr r4, r30
993; SPE-NEXT:    mr r5, r22
994; SPE-NEXT:    bl fmaf
995; SPE-NEXT:    mr r30, r3
996; SPE-NEXT:    mr r3, r26
997; SPE-NEXT:    mr r4, r29
998; SPE-NEXT:    mr r5, r23
999; SPE-NEXT:    bl fmaf
1000; SPE-NEXT:    mr r29, r3
1001; SPE-NEXT:    mr r3, r25
1002; SPE-NEXT:    mr r4, r28
1003; SPE-NEXT:    mr r5, r24
1004; SPE-NEXT:    bl fmaf
1005; SPE-NEXT:    mr r4, r29
1006; SPE-NEXT:    mr r5, r30
1007; SPE-NEXT:    mr r6, r21
1008; SPE-NEXT:    lwz r30, 56(r1) # 4-byte Folded Reload
1009; SPE-NEXT:    lwz r29, 52(r1) # 4-byte Folded Reload
1010; SPE-NEXT:    lwz r28, 48(r1) # 4-byte Folded Reload
1011; SPE-NEXT:    lwz r27, 44(r1) # 4-byte Folded Reload
1012; SPE-NEXT:    lwz r26, 40(r1) # 4-byte Folded Reload
1013; SPE-NEXT:    lwz r25, 36(r1) # 4-byte Folded Reload
1014; SPE-NEXT:    lwz r24, 32(r1) # 4-byte Folded Reload
1015; SPE-NEXT:    lwz r23, 28(r1) # 4-byte Folded Reload
1016; SPE-NEXT:    lwz r22, 24(r1) # 4-byte Folded Reload
1017; SPE-NEXT:    lwz r21, 20(r1) # 4-byte Folded Reload
1018; SPE-NEXT:    lwz r0, 68(r1)
1019; SPE-NEXT:    addi r1, r1, 64
1020; SPE-NEXT:    mtlr r0
1021; SPE-NEXT:    blr
1022  %neg = fneg <4 x float> %vf2
1023  %res = call <4 x float> @llvm.experimental.constrained.fma.v4f32(
1024                        <4 x float> %vf0, <4 x float> %vf1, <4 x float> %neg,
1025                        metadata !"round.dynamic",
1026                        metadata !"fpexcept.strict") #0
1027  ret <4 x float> %res
1028}
1029
1030define <2 x double> @fmsub_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) #0 {
1031; CHECK-LABEL: fmsub_v2f64:
1032; CHECK:       # %bb.0:
1033; CHECK-NEXT:    xvmsubadp v4, v2, v3
1034; CHECK-NEXT:    vmr v2, v4
1035; CHECK-NEXT:    blr
1036;
1037; NOVSX-LABEL: fmsub_v2f64:
1038; NOVSX:       # %bb.0:
1039; NOVSX-NEXT:    fmsub f2, f2, f4, f6
1040; NOVSX-NEXT:    fmsub f1, f1, f3, f5
1041; NOVSX-NEXT:    blr
1042;
1043; SPE-LABEL: fmsub_v2f64:
1044; SPE:       # %bb.0:
1045; SPE-NEXT:    mflr r0
1046; SPE-NEXT:    stwu r1, -80(r1)
1047; SPE-NEXT:    stw r0, 84(r1)
1048; SPE-NEXT:    .cfi_def_cfa_offset 80
1049; SPE-NEXT:    .cfi_offset lr, 4
1050; SPE-NEXT:    .cfi_offset r26, -64
1051; SPE-NEXT:    .cfi_offset r27, -56
1052; SPE-NEXT:    .cfi_offset r28, -48
1053; SPE-NEXT:    .cfi_offset r29, -40
1054; SPE-NEXT:    .cfi_offset r30, -8
1055; SPE-NEXT:    stw r30, 72(r1) # 4-byte Folded Spill
1056; SPE-NEXT:    mr r30, r3
1057; SPE-NEXT:    evldd r3, 96(r1)
1058; SPE-NEXT:    evldd r11, 104(r1)
1059; SPE-NEXT:    evstdd r26, 16(r1) # 8-byte Folded Spill
1060; SPE-NEXT:    evstdd r27, 24(r1) # 8-byte Folded Spill
1061; SPE-NEXT:    efdneg r27, r11
1062; SPE-NEXT:    evstdd r28, 32(r1) # 8-byte Folded Spill
1063; SPE-NEXT:    evstdd r29, 40(r1) # 8-byte Folded Spill
1064; SPE-NEXT:    evmergelo r29, r7, r8
1065; SPE-NEXT:    evmergelo r9, r9, r10
1066; SPE-NEXT:    evmergelo r4, r5, r6
1067; SPE-NEXT:    efdneg r8, r3
1068; SPE-NEXT:    evmergehi r3, r4, r4
1069; SPE-NEXT:    evmergehi r5, r9, r9
1070; SPE-NEXT:    evmergehi r7, r8, r8
1071; SPE-NEXT:    mr r6, r9
1072; SPE-NEXT:    evldd r28, 88(r1)
1073; SPE-NEXT:    bl fma
1074; SPE-NEXT:    evmergelo r26, r3, r4
1075; SPE-NEXT:    evmergehi r3, r29, r29
1076; SPE-NEXT:    evmergehi r5, r28, r28
1077; SPE-NEXT:    evmergehi r7, r27, r27
1078; SPE-NEXT:    mr r4, r29
1079; SPE-NEXT:    mr r6, r28
1080; SPE-NEXT:    mr r8, r27
1081; SPE-NEXT:    bl fma
1082; SPE-NEXT:    li r5, 8
1083; SPE-NEXT:    evmergelo r3, r3, r4
1084; SPE-NEXT:    evstddx r3, r30, r5
1085; SPE-NEXT:    evstdd r26, 0(r30)
1086; SPE-NEXT:    lwz r30, 72(r1) # 4-byte Folded Reload
1087; SPE-NEXT:    evldd r29, 40(r1) # 8-byte Folded Reload
1088; SPE-NEXT:    evldd r28, 32(r1) # 8-byte Folded Reload
1089; SPE-NEXT:    evldd r27, 24(r1) # 8-byte Folded Reload
1090; SPE-NEXT:    evldd r26, 16(r1) # 8-byte Folded Reload
1091; SPE-NEXT:    lwz r0, 84(r1)
1092; SPE-NEXT:    addi r1, r1, 80
1093; SPE-NEXT:    mtlr r0
1094; SPE-NEXT:    blr
1095  %neg = fneg <2 x double> %vf2
1096  %res = call <2 x double> @llvm.experimental.constrained.fma.v2f64(
1097                        <2 x double> %vf0, <2 x double> %vf1, <2 x double> %neg,
1098                        metadata !"round.dynamic",
1099                        metadata !"fpexcept.strict") #0
1100  ret <2 x double> %res
1101}
1102
1103define float @fnmadd_f32(float %f0, float %f1, float %f2) #0 {
1104; CHECK-LABEL: fnmadd_f32:
1105; CHECK:       # %bb.0:
1106; CHECK-NEXT:    xsnmaddasp f3, f1, f2
1107; CHECK-NEXT:    fmr f1, f3
1108; CHECK-NEXT:    blr
1109;
1110; NOVSX-LABEL: fnmadd_f32:
1111; NOVSX:       # %bb.0:
1112; NOVSX-NEXT:    fnmadds f1, f1, f2, f3
1113; NOVSX-NEXT:    blr
1114;
1115; SPE-LABEL: fnmadd_f32:
1116; SPE:       # %bb.0:
1117; SPE-NEXT:    mflr r0
1118; SPE-NEXT:    stwu r1, -16(r1)
1119; SPE-NEXT:    stw r0, 20(r1)
1120; SPE-NEXT:    .cfi_def_cfa_offset 16
1121; SPE-NEXT:    .cfi_offset lr, 4
1122; SPE-NEXT:    bl fmaf
1123; SPE-NEXT:    efsneg r3, r3
1124; SPE-NEXT:    lwz r0, 20(r1)
1125; SPE-NEXT:    addi r1, r1, 16
1126; SPE-NEXT:    mtlr r0
1127; SPE-NEXT:    blr
1128  %fma = call float @llvm.experimental.constrained.fma.f32(
1129                        float %f0, float %f1, float %f2,
1130                        metadata !"round.dynamic",
1131                        metadata !"fpexcept.strict") #0
1132  %res = fneg float %fma
1133  ret float %res
1134}
1135
1136define double @fnmadd_f64(double %f0, double %f1, double %f2) #0 {
1137; CHECK-LABEL: fnmadd_f64:
1138; CHECK:       # %bb.0:
1139; CHECK-NEXT:    xsnmaddadp f3, f1, f2
1140; CHECK-NEXT:    fmr f1, f3
1141; CHECK-NEXT:    blr
1142;
1143; NOVSX-LABEL: fnmadd_f64:
1144; NOVSX:       # %bb.0:
1145; NOVSX-NEXT:    fnmadd f1, f1, f2, f3
1146; NOVSX-NEXT:    blr
1147;
1148; SPE-LABEL: fnmadd_f64:
1149; SPE:       # %bb.0:
1150; SPE-NEXT:    mflr r0
1151; SPE-NEXT:    stwu r1, -16(r1)
1152; SPE-NEXT:    stw r0, 20(r1)
1153; SPE-NEXT:    .cfi_def_cfa_offset 16
1154; SPE-NEXT:    .cfi_offset lr, 4
1155; SPE-NEXT:    evmergelo r8, r7, r8
1156; SPE-NEXT:    evmergelo r6, r5, r6
1157; SPE-NEXT:    evmergelo r4, r3, r4
1158; SPE-NEXT:    evmergehi r3, r4, r4
1159; SPE-NEXT:    evmergehi r5, r6, r6
1160; SPE-NEXT:    evmergehi r7, r8, r8
1161; SPE-NEXT:    bl fma
1162; SPE-NEXT:    evmergelo r3, r3, r4
1163; SPE-NEXT:    efdneg r4, r3
1164; SPE-NEXT:    evmergehi r3, r4, r4
1165; SPE-NEXT:    lwz r0, 20(r1)
1166; SPE-NEXT:    addi r1, r1, 16
1167; SPE-NEXT:    mtlr r0
1168; SPE-NEXT:    blr
1169  %fma = call double @llvm.experimental.constrained.fma.f64(
1170                        double %f0, double %f1, double %f2,
1171                        metadata !"round.dynamic",
1172                        metadata !"fpexcept.strict") #0
1173  %res = fneg double %fma
1174  ret double %res
1175}
1176
1177define <4 x float> @fnmadd_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) #0 {
1178; CHECK-LABEL: fnmadd_v4f32:
1179; CHECK:       # %bb.0:
1180; CHECK-NEXT:    xvmaddasp v4, v2, v3
1181; CHECK-NEXT:    xvnegsp v2, v4
1182; CHECK-NEXT:    blr
1183;
1184; NOVSX-LABEL: fnmadd_v4f32:
1185; NOVSX:       # %bb.0:
1186; NOVSX-NEXT:    addi r3, r1, -32
1187; NOVSX-NEXT:    vspltisb v5, -1
1188; NOVSX-NEXT:    stvx v4, 0, r3
1189; NOVSX-NEXT:    addi r3, r1, -48
1190; NOVSX-NEXT:    stvx v3, 0, r3
1191; NOVSX-NEXT:    addi r3, r1, -64
1192; NOVSX-NEXT:    vslw v3, v5, v5
1193; NOVSX-NEXT:    stvx v2, 0, r3
1194; NOVSX-NEXT:    addi r3, r1, -16
1195; NOVSX-NEXT:    lfs f0, -20(r1)
1196; NOVSX-NEXT:    lfs f1, -36(r1)
1197; NOVSX-NEXT:    lfs f2, -52(r1)
1198; NOVSX-NEXT:    fmadds f0, f2, f1, f0
1199; NOVSX-NEXT:    lfs f1, -40(r1)
1200; NOVSX-NEXT:    lfs f2, -56(r1)
1201; NOVSX-NEXT:    stfs f0, -4(r1)
1202; NOVSX-NEXT:    lfs f0, -24(r1)
1203; NOVSX-NEXT:    fmadds f0, f2, f1, f0
1204; NOVSX-NEXT:    lfs f1, -44(r1)
1205; NOVSX-NEXT:    lfs f2, -60(r1)
1206; NOVSX-NEXT:    stfs f0, -8(r1)
1207; NOVSX-NEXT:    lfs f0, -28(r1)
1208; NOVSX-NEXT:    fmadds f0, f2, f1, f0
1209; NOVSX-NEXT:    lfs f1, -48(r1)
1210; NOVSX-NEXT:    lfs f2, -64(r1)
1211; NOVSX-NEXT:    stfs f0, -12(r1)
1212; NOVSX-NEXT:    lfs f0, -32(r1)
1213; NOVSX-NEXT:    fmadds f0, f2, f1, f0
1214; NOVSX-NEXT:    stfs f0, -16(r1)
1215; NOVSX-NEXT:    lvx v2, 0, r3
1216; NOVSX-NEXT:    vxor v2, v2, v3
1217; NOVSX-NEXT:    blr
1218;
1219; SPE-LABEL: fnmadd_v4f32:
1220; SPE:       # %bb.0:
1221; SPE-NEXT:    mflr r0
1222; SPE-NEXT:    stwu r1, -64(r1)
1223; SPE-NEXT:    stw r0, 68(r1)
1224; SPE-NEXT:    .cfi_def_cfa_offset 64
1225; SPE-NEXT:    .cfi_offset lr, 4
1226; SPE-NEXT:    .cfi_offset r21, -44
1227; SPE-NEXT:    .cfi_offset r22, -40
1228; SPE-NEXT:    .cfi_offset r23, -36
1229; SPE-NEXT:    .cfi_offset r24, -32
1230; SPE-NEXT:    .cfi_offset r25, -28
1231; SPE-NEXT:    .cfi_offset r26, -24
1232; SPE-NEXT:    .cfi_offset r27, -20
1233; SPE-NEXT:    .cfi_offset r28, -16
1234; SPE-NEXT:    .cfi_offset r29, -12
1235; SPE-NEXT:    .cfi_offset r30, -8
1236; SPE-NEXT:    stw r27, 44(r1) # 4-byte Folded Spill
1237; SPE-NEXT:    mr r27, r5
1238; SPE-NEXT:    lwz r5, 84(r1)
1239; SPE-NEXT:    stw r25, 36(r1) # 4-byte Folded Spill
1240; SPE-NEXT:    mr r25, r3
1241; SPE-NEXT:    stw r26, 40(r1) # 4-byte Folded Spill
1242; SPE-NEXT:    mr r26, r4
1243; SPE-NEXT:    mr r3, r6
1244; SPE-NEXT:    mr r4, r10
1245; SPE-NEXT:    stw r21, 20(r1) # 4-byte Folded Spill
1246; SPE-NEXT:    stw r22, 24(r1) # 4-byte Folded Spill
1247; SPE-NEXT:    stw r23, 28(r1) # 4-byte Folded Spill
1248; SPE-NEXT:    stw r24, 32(r1) # 4-byte Folded Spill
1249; SPE-NEXT:    stw r28, 48(r1) # 4-byte Folded Spill
1250; SPE-NEXT:    mr r28, r7
1251; SPE-NEXT:    stw r29, 52(r1) # 4-byte Folded Spill
1252; SPE-NEXT:    mr r29, r8
1253; SPE-NEXT:    stw r30, 56(r1) # 4-byte Folded Spill
1254; SPE-NEXT:    mr r30, r9
1255; SPE-NEXT:    lwz r24, 72(r1)
1256; SPE-NEXT:    lwz r23, 76(r1)
1257; SPE-NEXT:    lwz r22, 80(r1)
1258; SPE-NEXT:    bl fmaf
1259; SPE-NEXT:    mr r21, r3
1260; SPE-NEXT:    mr r3, r27
1261; SPE-NEXT:    mr r4, r30
1262; SPE-NEXT:    mr r5, r22
1263; SPE-NEXT:    bl fmaf
1264; SPE-NEXT:    mr r30, r3
1265; SPE-NEXT:    mr r3, r26
1266; SPE-NEXT:    mr r4, r29
1267; SPE-NEXT:    mr r5, r23
1268; SPE-NEXT:    bl fmaf
1269; SPE-NEXT:    mr r29, r3
1270; SPE-NEXT:    mr r3, r25
1271; SPE-NEXT:    mr r4, r28
1272; SPE-NEXT:    mr r5, r24
1273; SPE-NEXT:    bl fmaf
1274; SPE-NEXT:    efsneg r4, r29
1275; SPE-NEXT:    efsneg r5, r30
1276; SPE-NEXT:    efsneg r3, r3
1277; SPE-NEXT:    efsneg r6, r21
1278; SPE-NEXT:    lwz r30, 56(r1) # 4-byte Folded Reload
1279; SPE-NEXT:    lwz r29, 52(r1) # 4-byte Folded Reload
1280; SPE-NEXT:    lwz r28, 48(r1) # 4-byte Folded Reload
1281; SPE-NEXT:    lwz r27, 44(r1) # 4-byte Folded Reload
1282; SPE-NEXT:    lwz r26, 40(r1) # 4-byte Folded Reload
1283; SPE-NEXT:    lwz r25, 36(r1) # 4-byte Folded Reload
1284; SPE-NEXT:    lwz r24, 32(r1) # 4-byte Folded Reload
1285; SPE-NEXT:    lwz r23, 28(r1) # 4-byte Folded Reload
1286; SPE-NEXT:    lwz r22, 24(r1) # 4-byte Folded Reload
1287; SPE-NEXT:    lwz r21, 20(r1) # 4-byte Folded Reload
1288; SPE-NEXT:    lwz r0, 68(r1)
1289; SPE-NEXT:    addi r1, r1, 64
1290; SPE-NEXT:    mtlr r0
1291; SPE-NEXT:    blr
1292  %fma = call <4 x float> @llvm.experimental.constrained.fma.v4f32(
1293                        <4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2,
1294                        metadata !"round.dynamic",
1295                        metadata !"fpexcept.strict") #0
1296  %res = fneg <4 x float> %fma
1297  ret <4 x float> %res
1298}
1299
1300define <2 x double> @fnmadd_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) #0 {
1301; CHECK-LABEL: fnmadd_v2f64:
1302; CHECK:       # %bb.0:
1303; CHECK-NEXT:    xvnmaddadp v4, v2, v3
1304; CHECK-NEXT:    vmr v2, v4
1305; CHECK-NEXT:    blr
1306;
1307; NOVSX-LABEL: fnmadd_v2f64:
1308; NOVSX:       # %bb.0:
1309; NOVSX-NEXT:    fnmadd f2, f2, f4, f6
1310; NOVSX-NEXT:    fnmadd f1, f1, f3, f5
1311; NOVSX-NEXT:    blr
1312;
1313; SPE-LABEL: fnmadd_v2f64:
1314; SPE:       # %bb.0:
1315; SPE-NEXT:    mflr r0
1316; SPE-NEXT:    stwu r1, -80(r1)
1317; SPE-NEXT:    stw r0, 84(r1)
1318; SPE-NEXT:    .cfi_def_cfa_offset 80
1319; SPE-NEXT:    .cfi_offset lr, 4
1320; SPE-NEXT:    .cfi_offset r26, -64
1321; SPE-NEXT:    .cfi_offset r27, -56
1322; SPE-NEXT:    .cfi_offset r28, -48
1323; SPE-NEXT:    .cfi_offset r29, -40
1324; SPE-NEXT:    .cfi_offset r30, -8
1325; SPE-NEXT:    evstdd r26, 16(r1) # 8-byte Folded Spill
1326; SPE-NEXT:    evstdd r27, 24(r1) # 8-byte Folded Spill
1327; SPE-NEXT:    evstdd r28, 32(r1) # 8-byte Folded Spill
1328; SPE-NEXT:    evstdd r29, 40(r1) # 8-byte Folded Spill
1329; SPE-NEXT:    stw r30, 72(r1) # 4-byte Folded Spill
1330; SPE-NEXT:    evmergelo r27, r7, r8
1331; SPE-NEXT:    evmergelo r9, r9, r10
1332; SPE-NEXT:    evmergelo r4, r5, r6
1333; SPE-NEXT:    mr r30, r3
1334; SPE-NEXT:    evldd r8, 96(r1)
1335; SPE-NEXT:    evmergehi r3, r4, r4
1336; SPE-NEXT:    evmergehi r5, r9, r9
1337; SPE-NEXT:    mr r6, r9
1338; SPE-NEXT:    evldd r29, 104(r1)
1339; SPE-NEXT:    evmergehi r7, r8, r8
1340; SPE-NEXT:    evldd r28, 88(r1)
1341; SPE-NEXT:    bl fma
1342; SPE-NEXT:    evmergelo r26, r3, r4
1343; SPE-NEXT:    evmergehi r3, r27, r27
1344; SPE-NEXT:    evmergehi r5, r28, r28
1345; SPE-NEXT:    evmergehi r7, r29, r29
1346; SPE-NEXT:    mr r4, r27
1347; SPE-NEXT:    mr r6, r28
1348; SPE-NEXT:    mr r8, r29
1349; SPE-NEXT:    bl fma
1350; SPE-NEXT:    evmergelo r3, r3, r4
1351; SPE-NEXT:    li r5, 8
1352; SPE-NEXT:    efdneg r3, r3
1353; SPE-NEXT:    evstddx r3, r30, r5
1354; SPE-NEXT:    efdneg r3, r26
1355; SPE-NEXT:    evstdd r3, 0(r30)
1356; SPE-NEXT:    lwz r30, 72(r1) # 4-byte Folded Reload
1357; SPE-NEXT:    evldd r29, 40(r1) # 8-byte Folded Reload
1358; SPE-NEXT:    evldd r28, 32(r1) # 8-byte Folded Reload
1359; SPE-NEXT:    evldd r27, 24(r1) # 8-byte Folded Reload
1360; SPE-NEXT:    evldd r26, 16(r1) # 8-byte Folded Reload
1361; SPE-NEXT:    lwz r0, 84(r1)
1362; SPE-NEXT:    addi r1, r1, 80
1363; SPE-NEXT:    mtlr r0
1364; SPE-NEXT:    blr
1365  %fma = call <2 x double> @llvm.experimental.constrained.fma.v2f64(
1366                        <2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2,
1367                        metadata !"round.dynamic",
1368                        metadata !"fpexcept.strict") #0
1369  %res = fneg <2 x double> %fma
1370  ret <2 x double> %res
1371}
1372
1373define float @fnmsub_f32(float %f0, float %f1, float %f2) #0 {
1374; CHECK-LABEL: fnmsub_f32:
1375; CHECK:       # %bb.0:
1376; CHECK-NEXT:    xsnmsubasp f3, f1, f2
1377; CHECK-NEXT:    fmr f1, f3
1378; CHECK-NEXT:    blr
1379;
1380; NOVSX-LABEL: fnmsub_f32:
1381; NOVSX:       # %bb.0:
1382; NOVSX-NEXT:    fnmsubs f1, f1, f2, f3
1383; NOVSX-NEXT:    blr
1384;
1385; SPE-LABEL: fnmsub_f32:
1386; SPE:       # %bb.0:
1387; SPE-NEXT:    mflr r0
1388; SPE-NEXT:    stwu r1, -16(r1)
1389; SPE-NEXT:    stw r0, 20(r1)
1390; SPE-NEXT:    .cfi_def_cfa_offset 16
1391; SPE-NEXT:    .cfi_offset lr, 4
1392; SPE-NEXT:    efsneg r5, r5
1393; SPE-NEXT:    bl fmaf
1394; SPE-NEXT:    efsneg r3, r3
1395; SPE-NEXT:    lwz r0, 20(r1)
1396; SPE-NEXT:    addi r1, r1, 16
1397; SPE-NEXT:    mtlr r0
1398; SPE-NEXT:    blr
1399  %neg = fneg float %f2
1400  %fma = call float @llvm.experimental.constrained.fma.f32(
1401                        float %f0, float %f1, float %neg,
1402                        metadata !"round.dynamic",
1403                        metadata !"fpexcept.strict") #0
1404  %res = fneg float %fma
1405  ret float %res
1406}
1407
1408define double @fnmsub_f64(double %f0, double %f1, double %f2) #0 {
1409; CHECK-LABEL: fnmsub_f64:
1410; CHECK:       # %bb.0:
1411; CHECK-NEXT:    xsnmsubadp f3, f1, f2
1412; CHECK-NEXT:    fmr f1, f3
1413; CHECK-NEXT:    blr
1414;
1415; NOVSX-LABEL: fnmsub_f64:
1416; NOVSX:       # %bb.0:
1417; NOVSX-NEXT:    fnmsub f1, f1, f2, f3
1418; NOVSX-NEXT:    blr
1419;
1420; SPE-LABEL: fnmsub_f64:
1421; SPE:       # %bb.0:
1422; SPE-NEXT:    mflr r0
1423; SPE-NEXT:    stwu r1, -16(r1)
1424; SPE-NEXT:    stw r0, 20(r1)
1425; SPE-NEXT:    .cfi_def_cfa_offset 16
1426; SPE-NEXT:    .cfi_offset lr, 4
1427; SPE-NEXT:    evmergelo r6, r5, r6
1428; SPE-NEXT:    evmergelo r4, r3, r4
1429; SPE-NEXT:    evmergelo r3, r7, r8
1430; SPE-NEXT:    efdneg r8, r3
1431; SPE-NEXT:    evmergehi r3, r4, r4
1432; SPE-NEXT:    evmergehi r5, r6, r6
1433; SPE-NEXT:    evmergehi r7, r8, r8
1434; SPE-NEXT:    bl fma
1435; SPE-NEXT:    evmergelo r3, r3, r4
1436; SPE-NEXT:    efdneg r4, r3
1437; SPE-NEXT:    evmergehi r3, r4, r4
1438; SPE-NEXT:    lwz r0, 20(r1)
1439; SPE-NEXT:    addi r1, r1, 16
1440; SPE-NEXT:    mtlr r0
1441; SPE-NEXT:    blr
1442  %neg = fneg double %f2
1443  %fma = call double @llvm.experimental.constrained.fma.f64(
1444                        double %f0, double %f1, double %neg,
1445                        metadata !"round.dynamic",
1446                        metadata !"fpexcept.strict") #0
1447  %res = fneg double %fma
1448  ret double %res
1449}
1450
1451define <4 x float> @fnmsub_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) #0 {
1452; CHECK-LABEL: fnmsub_v4f32:
1453; CHECK:       # %bb.0:
1454; CHECK-NEXT:    xvnmsubasp v4, v2, v3
1455; CHECK-NEXT:    vmr v2, v4
1456; CHECK-NEXT:    blr
1457;
1458; NOVSX-LABEL: fnmsub_v4f32:
1459; NOVSX:       # %bb.0:
1460; NOVSX-NEXT:    vspltisb v5, -1
1461; NOVSX-NEXT:    addi r3, r1, -48
1462; NOVSX-NEXT:    vslw v5, v5, v5
1463; NOVSX-NEXT:    stvx v3, 0, r3
1464; NOVSX-NEXT:    addi r3, r1, -64
1465; NOVSX-NEXT:    vxor v4, v4, v5
1466; NOVSX-NEXT:    stvx v2, 0, r3
1467; NOVSX-NEXT:    addi r3, r1, -32
1468; NOVSX-NEXT:    stvx v4, 0, r3
1469; NOVSX-NEXT:    addi r3, r1, -16
1470; NOVSX-NEXT:    lfs f0, -36(r1)
1471; NOVSX-NEXT:    lfs f1, -52(r1)
1472; NOVSX-NEXT:    lfs f2, -20(r1)
1473; NOVSX-NEXT:    fmadds f0, f1, f0, f2
1474; NOVSX-NEXT:    lfs f1, -56(r1)
1475; NOVSX-NEXT:    lfs f2, -24(r1)
1476; NOVSX-NEXT:    stfs f0, -4(r1)
1477; NOVSX-NEXT:    lfs f0, -40(r1)
1478; NOVSX-NEXT:    fmadds f0, f1, f0, f2
1479; NOVSX-NEXT:    lfs f1, -60(r1)
1480; NOVSX-NEXT:    lfs f2, -28(r1)
1481; NOVSX-NEXT:    stfs f0, -8(r1)
1482; NOVSX-NEXT:    lfs f0, -44(r1)
1483; NOVSX-NEXT:    fmadds f0, f1, f0, f2
1484; NOVSX-NEXT:    lfs f1, -64(r1)
1485; NOVSX-NEXT:    lfs f2, -32(r1)
1486; NOVSX-NEXT:    stfs f0, -12(r1)
1487; NOVSX-NEXT:    lfs f0, -48(r1)
1488; NOVSX-NEXT:    fmadds f0, f1, f0, f2
1489; NOVSX-NEXT:    stfs f0, -16(r1)
1490; NOVSX-NEXT:    lvx v2, 0, r3
1491; NOVSX-NEXT:    vxor v2, v2, v5
1492; NOVSX-NEXT:    blr
1493;
1494; SPE-LABEL: fnmsub_v4f32:
1495; SPE:       # %bb.0:
1496; SPE-NEXT:    mflr r0
1497; SPE-NEXT:    stwu r1, -64(r1)
1498; SPE-NEXT:    stw r0, 68(r1)
1499; SPE-NEXT:    .cfi_def_cfa_offset 64
1500; SPE-NEXT:    .cfi_offset lr, 4
1501; SPE-NEXT:    .cfi_offset r21, -44
1502; SPE-NEXT:    .cfi_offset r22, -40
1503; SPE-NEXT:    .cfi_offset r23, -36
1504; SPE-NEXT:    .cfi_offset r24, -32
1505; SPE-NEXT:    .cfi_offset r25, -28
1506; SPE-NEXT:    .cfi_offset r26, -24
1507; SPE-NEXT:    .cfi_offset r27, -20
1508; SPE-NEXT:    .cfi_offset r28, -16
1509; SPE-NEXT:    .cfi_offset r29, -12
1510; SPE-NEXT:    .cfi_offset r30, -8
1511; SPE-NEXT:    stw r25, 36(r1) # 4-byte Folded Spill
1512; SPE-NEXT:    mr r25, r3
1513; SPE-NEXT:    stw r26, 40(r1) # 4-byte Folded Spill
1514; SPE-NEXT:    mr r26, r4
1515; SPE-NEXT:    stw r27, 44(r1) # 4-byte Folded Spill
1516; SPE-NEXT:    mr r27, r5
1517; SPE-NEXT:    stw r28, 48(r1) # 4-byte Folded Spill
1518; SPE-NEXT:    mr r28, r7
1519; SPE-NEXT:    lwz r3, 80(r1)
1520; SPE-NEXT:    lwz r4, 72(r1)
1521; SPE-NEXT:    lwz r5, 76(r1)
1522; SPE-NEXT:    lwz r7, 84(r1)
1523; SPE-NEXT:    stw r22, 24(r1) # 4-byte Folded Spill
1524; SPE-NEXT:    efsneg r22, r3
1525; SPE-NEXT:    stw r23, 28(r1) # 4-byte Folded Spill
1526; SPE-NEXT:    efsneg r23, r5
1527; SPE-NEXT:    stw r24, 32(r1) # 4-byte Folded Spill
1528; SPE-NEXT:    efsneg r24, r4
1529; SPE-NEXT:    efsneg r5, r7
1530; SPE-NEXT:    mr r3, r6
1531; SPE-NEXT:    mr r4, r10
1532; SPE-NEXT:    stw r21, 20(r1) # 4-byte Folded Spill
1533; SPE-NEXT:    stw r29, 52(r1) # 4-byte Folded Spill
1534; SPE-NEXT:    mr r29, r8
1535; SPE-NEXT:    stw r30, 56(r1) # 4-byte Folded Spill
1536; SPE-NEXT:    mr r30, r9
1537; SPE-NEXT:    bl fmaf
1538; SPE-NEXT:    mr r21, r3
1539; SPE-NEXT:    mr r3, r27
1540; SPE-NEXT:    mr r4, r30
1541; SPE-NEXT:    mr r5, r22
1542; SPE-NEXT:    bl fmaf
1543; SPE-NEXT:    mr r30, r3
1544; SPE-NEXT:    mr r3, r26
1545; SPE-NEXT:    mr r4, r29
1546; SPE-NEXT:    mr r5, r23
1547; SPE-NEXT:    bl fmaf
1548; SPE-NEXT:    mr r29, r3
1549; SPE-NEXT:    mr r3, r25
1550; SPE-NEXT:    mr r4, r28
1551; SPE-NEXT:    mr r5, r24
1552; SPE-NEXT:    bl fmaf
1553; SPE-NEXT:    efsneg r4, r29
1554; SPE-NEXT:    efsneg r5, r30
1555; SPE-NEXT:    efsneg r3, r3
1556; SPE-NEXT:    efsneg r6, r21
1557; SPE-NEXT:    lwz r30, 56(r1) # 4-byte Folded Reload
1558; SPE-NEXT:    lwz r29, 52(r1) # 4-byte Folded Reload
1559; SPE-NEXT:    lwz r28, 48(r1) # 4-byte Folded Reload
1560; SPE-NEXT:    lwz r27, 44(r1) # 4-byte Folded Reload
1561; SPE-NEXT:    lwz r26, 40(r1) # 4-byte Folded Reload
1562; SPE-NEXT:    lwz r25, 36(r1) # 4-byte Folded Reload
1563; SPE-NEXT:    lwz r24, 32(r1) # 4-byte Folded Reload
1564; SPE-NEXT:    lwz r23, 28(r1) # 4-byte Folded Reload
1565; SPE-NEXT:    lwz r22, 24(r1) # 4-byte Folded Reload
1566; SPE-NEXT:    lwz r21, 20(r1) # 4-byte Folded Reload
1567; SPE-NEXT:    lwz r0, 68(r1)
1568; SPE-NEXT:    addi r1, r1, 64
1569; SPE-NEXT:    mtlr r0
1570; SPE-NEXT:    blr
1571  %neg = fneg <4 x float> %vf2
1572  %fma = call <4 x float> @llvm.experimental.constrained.fma.v4f32(
1573                        <4 x float> %vf0, <4 x float> %vf1, <4 x float> %neg,
1574                        metadata !"round.dynamic",
1575                        metadata !"fpexcept.strict") #0
1576  %res = fneg <4 x float> %fma
1577  ret <4 x float> %res
1578}
1579
1580define <2 x double> @fnmsub_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) #0 {
1581; CHECK-LABEL: fnmsub_v2f64:
1582; CHECK:       # %bb.0:
1583; CHECK-NEXT:    xvnmsubadp v4, v2, v3
1584; CHECK-NEXT:    vmr v2, v4
1585; CHECK-NEXT:    blr
1586;
1587; NOVSX-LABEL: fnmsub_v2f64:
1588; NOVSX:       # %bb.0:
1589; NOVSX-NEXT:    fnmsub f2, f2, f4, f6
1590; NOVSX-NEXT:    fnmsub f1, f1, f3, f5
1591; NOVSX-NEXT:    blr
1592;
1593; SPE-LABEL: fnmsub_v2f64:
1594; SPE:       # %bb.0:
1595; SPE-NEXT:    mflr r0
1596; SPE-NEXT:    stwu r1, -80(r1)
1597; SPE-NEXT:    stw r0, 84(r1)
1598; SPE-NEXT:    .cfi_def_cfa_offset 80
1599; SPE-NEXT:    .cfi_offset lr, 4
1600; SPE-NEXT:    .cfi_offset r26, -64
1601; SPE-NEXT:    .cfi_offset r27, -56
1602; SPE-NEXT:    .cfi_offset r28, -48
1603; SPE-NEXT:    .cfi_offset r29, -40
1604; SPE-NEXT:    .cfi_offset r30, -8
1605; SPE-NEXT:    stw r30, 72(r1) # 4-byte Folded Spill
1606; SPE-NEXT:    mr r30, r3
1607; SPE-NEXT:    evldd r3, 96(r1)
1608; SPE-NEXT:    evldd r11, 104(r1)
1609; SPE-NEXT:    evstdd r26, 16(r1) # 8-byte Folded Spill
1610; SPE-NEXT:    evstdd r27, 24(r1) # 8-byte Folded Spill
1611; SPE-NEXT:    efdneg r27, r11
1612; SPE-NEXT:    evstdd r28, 32(r1) # 8-byte Folded Spill
1613; SPE-NEXT:    evstdd r29, 40(r1) # 8-byte Folded Spill
1614; SPE-NEXT:    evmergelo r29, r7, r8
1615; SPE-NEXT:    evmergelo r9, r9, r10
1616; SPE-NEXT:    evmergelo r4, r5, r6
1617; SPE-NEXT:    efdneg r8, r3
1618; SPE-NEXT:    evmergehi r3, r4, r4
1619; SPE-NEXT:    evmergehi r5, r9, r9
1620; SPE-NEXT:    evmergehi r7, r8, r8
1621; SPE-NEXT:    mr r6, r9
1622; SPE-NEXT:    evldd r28, 88(r1)
1623; SPE-NEXT:    bl fma
1624; SPE-NEXT:    evmergelo r26, r3, r4
1625; SPE-NEXT:    evmergehi r3, r29, r29
1626; SPE-NEXT:    evmergehi r5, r28, r28
1627; SPE-NEXT:    evmergehi r7, r27, r27
1628; SPE-NEXT:    mr r4, r29
1629; SPE-NEXT:    mr r6, r28
1630; SPE-NEXT:    mr r8, r27
1631; SPE-NEXT:    bl fma
1632; SPE-NEXT:    evmergelo r3, r3, r4
1633; SPE-NEXT:    li r5, 8
1634; SPE-NEXT:    efdneg r3, r3
1635; SPE-NEXT:    evstddx r3, r30, r5
1636; SPE-NEXT:    efdneg r3, r26
1637; SPE-NEXT:    evstdd r3, 0(r30)
1638; SPE-NEXT:    lwz r30, 72(r1) # 4-byte Folded Reload
1639; SPE-NEXT:    evldd r29, 40(r1) # 8-byte Folded Reload
1640; SPE-NEXT:    evldd r28, 32(r1) # 8-byte Folded Reload
1641; SPE-NEXT:    evldd r27, 24(r1) # 8-byte Folded Reload
1642; SPE-NEXT:    evldd r26, 16(r1) # 8-byte Folded Reload
1643; SPE-NEXT:    lwz r0, 84(r1)
1644; SPE-NEXT:    addi r1, r1, 80
1645; SPE-NEXT:    mtlr r0
1646; SPE-NEXT:    blr
1647  %neg = fneg <2 x double> %vf2
1648  %fma = call <2 x double> @llvm.experimental.constrained.fma.v2f64(
1649                        <2 x double> %vf0, <2 x double> %vf1, <2 x double> %neg,
1650                        metadata !"round.dynamic",
1651                        metadata !"fpexcept.strict") #0
1652  %res = fneg <2 x double> %fma
1653  ret <2 x double> %res
1654}
1655
1656define float @fsqrt_f32(float %f1) #0 {
1657; CHECK-LABEL: fsqrt_f32:
1658; CHECK:       # %bb.0:
1659; CHECK-NEXT:    xssqrtsp f1, f1
1660; CHECK-NEXT:    blr
1661;
1662; NOVSX-LABEL: fsqrt_f32:
1663; NOVSX:       # %bb.0:
1664; NOVSX-NEXT:    fsqrts f1, f1
1665; NOVSX-NEXT:    blr
1666;
1667; SPE-LABEL: fsqrt_f32:
1668; SPE:       # %bb.0:
1669; SPE-NEXT:    mflr r0
1670; SPE-NEXT:    stwu r1, -16(r1)
1671; SPE-NEXT:    stw r0, 20(r1)
1672; SPE-NEXT:    .cfi_def_cfa_offset 16
1673; SPE-NEXT:    .cfi_offset lr, 4
1674; SPE-NEXT:    bl sqrtf
1675; SPE-NEXT:    lwz r0, 20(r1)
1676; SPE-NEXT:    addi r1, r1, 16
1677; SPE-NEXT:    mtlr r0
1678; SPE-NEXT:    blr
1679  %res = call float @llvm.experimental.constrained.sqrt.f32(
1680                        float %f1,
1681                        metadata !"round.dynamic",
1682                        metadata !"fpexcept.strict") #0
1683  ret float %res
1684}
1685
1686define double @fsqrt_f64(double %f1) #0 {
1687; CHECK-LABEL: fsqrt_f64:
1688; CHECK:       # %bb.0:
1689; CHECK-NEXT:    xssqrtdp f1, f1
1690; CHECK-NEXT:    blr
1691;
1692; NOVSX-LABEL: fsqrt_f64:
1693; NOVSX:       # %bb.0:
1694; NOVSX-NEXT:    fsqrt f1, f1
1695; NOVSX-NEXT:    blr
1696;
1697; SPE-LABEL: fsqrt_f64:
1698; SPE:       # %bb.0:
1699; SPE-NEXT:    mflr r0
1700; SPE-NEXT:    stwu r1, -16(r1)
1701; SPE-NEXT:    stw r0, 20(r1)
1702; SPE-NEXT:    .cfi_def_cfa_offset 16
1703; SPE-NEXT:    .cfi_offset lr, 4
1704; SPE-NEXT:    evmergelo r4, r3, r4
1705; SPE-NEXT:    evmergehi r3, r4, r4
1706; SPE-NEXT:    bl sqrt
1707; SPE-NEXT:    evmergelo r4, r3, r4
1708; SPE-NEXT:    evmergehi r3, r4, r4
1709; SPE-NEXT:    lwz r0, 20(r1)
1710; SPE-NEXT:    addi r1, r1, 16
1711; SPE-NEXT:    mtlr r0
1712; SPE-NEXT:    blr
1713  %res = call double @llvm.experimental.constrained.sqrt.f64(
1714                        double %f1,
1715                        metadata !"round.dynamic",
1716                        metadata !"fpexcept.strict") #0
1717  ret double %res
1718}
1719
1720define <4 x float> @fsqrt_v4f32(<4 x float> %vf1) #0 {
1721; CHECK-LABEL: fsqrt_v4f32:
1722; CHECK:       # %bb.0:
1723; CHECK-NEXT:    xvsqrtsp v2, v2
1724; CHECK-NEXT:    blr
1725;
1726; NOVSX-LABEL: fsqrt_v4f32:
1727; NOVSX:       # %bb.0:
1728; NOVSX-NEXT:    addi r3, r1, -32
1729; NOVSX-NEXT:    stvx v2, 0, r3
1730; NOVSX-NEXT:    addi r3, r1, -16
1731; NOVSX-NEXT:    lfs f0, -20(r1)
1732; NOVSX-NEXT:    fsqrts f0, f0
1733; NOVSX-NEXT:    stfs f0, -4(r1)
1734; NOVSX-NEXT:    lfs f0, -24(r1)
1735; NOVSX-NEXT:    fsqrts f0, f0
1736; NOVSX-NEXT:    stfs f0, -8(r1)
1737; NOVSX-NEXT:    lfs f0, -28(r1)
1738; NOVSX-NEXT:    fsqrts f0, f0
1739; NOVSX-NEXT:    stfs f0, -12(r1)
1740; NOVSX-NEXT:    lfs f0, -32(r1)
1741; NOVSX-NEXT:    fsqrts f0, f0
1742; NOVSX-NEXT:    stfs f0, -16(r1)
1743; NOVSX-NEXT:    lvx v2, 0, r3
1744; NOVSX-NEXT:    blr
1745;
1746; SPE-LABEL: fsqrt_v4f32:
1747; SPE:       # %bb.0:
1748; SPE-NEXT:    mflr r0
1749; SPE-NEXT:    stwu r1, -32(r1)
1750; SPE-NEXT:    stw r0, 36(r1)
1751; SPE-NEXT:    .cfi_def_cfa_offset 32
1752; SPE-NEXT:    .cfi_offset lr, 4
1753; SPE-NEXT:    .cfi_offset r27, -20
1754; SPE-NEXT:    .cfi_offset r28, -16
1755; SPE-NEXT:    .cfi_offset r29, -12
1756; SPE-NEXT:    .cfi_offset r30, -8
1757; SPE-NEXT:    stw r28, 16(r1) # 4-byte Folded Spill
1758; SPE-NEXT:    mr r28, r3
1759; SPE-NEXT:    mr r3, r6
1760; SPE-NEXT:    stw r27, 12(r1) # 4-byte Folded Spill
1761; SPE-NEXT:    stw r29, 20(r1) # 4-byte Folded Spill
1762; SPE-NEXT:    mr r29, r4
1763; SPE-NEXT:    stw r30, 24(r1) # 4-byte Folded Spill
1764; SPE-NEXT:    mr r30, r5
1765; SPE-NEXT:    bl sqrtf
1766; SPE-NEXT:    mr r27, r3
1767; SPE-NEXT:    mr r3, r30
1768; SPE-NEXT:    bl sqrtf
1769; SPE-NEXT:    mr r30, r3
1770; SPE-NEXT:    mr r3, r29
1771; SPE-NEXT:    bl sqrtf
1772; SPE-NEXT:    mr r29, r3
1773; SPE-NEXT:    mr r3, r28
1774; SPE-NEXT:    bl sqrtf
1775; SPE-NEXT:    mr r4, r29
1776; SPE-NEXT:    mr r5, r30
1777; SPE-NEXT:    mr r6, r27
1778; SPE-NEXT:    lwz r30, 24(r1) # 4-byte Folded Reload
1779; SPE-NEXT:    lwz r29, 20(r1) # 4-byte Folded Reload
1780; SPE-NEXT:    lwz r28, 16(r1) # 4-byte Folded Reload
1781; SPE-NEXT:    lwz r27, 12(r1) # 4-byte Folded Reload
1782; SPE-NEXT:    lwz r0, 36(r1)
1783; SPE-NEXT:    addi r1, r1, 32
1784; SPE-NEXT:    mtlr r0
1785; SPE-NEXT:    blr
1786  %res = call <4 x float> @llvm.experimental.constrained.sqrt.v4f32(
1787                        <4 x float> %vf1,
1788                        metadata !"round.dynamic",
1789                        metadata !"fpexcept.strict") #0
1790  ret <4 x float> %res
1791}
1792
1793define <2 x double> @fsqrt_v2f64(<2 x double> %vf1) #0 {
1794; CHECK-LABEL: fsqrt_v2f64:
1795; CHECK:       # %bb.0:
1796; CHECK-NEXT:    xvsqrtdp v2, v2
1797; CHECK-NEXT:    blr
1798;
1799; NOVSX-LABEL: fsqrt_v2f64:
1800; NOVSX:       # %bb.0:
1801; NOVSX-NEXT:    fsqrt f2, f2
1802; NOVSX-NEXT:    fsqrt f1, f1
1803; NOVSX-NEXT:    blr
1804;
1805; SPE-LABEL: fsqrt_v2f64:
1806; SPE:       # %bb.0:
1807; SPE-NEXT:    mflr r0
1808; SPE-NEXT:    stwu r1, -64(r1)
1809; SPE-NEXT:    stw r0, 68(r1)
1810; SPE-NEXT:    .cfi_def_cfa_offset 64
1811; SPE-NEXT:    .cfi_offset lr, 4
1812; SPE-NEXT:    .cfi_offset r28, -48
1813; SPE-NEXT:    .cfi_offset r29, -40
1814; SPE-NEXT:    .cfi_offset r30, -8
1815; SPE-NEXT:    evstdd r28, 16(r1) # 8-byte Folded Spill
1816; SPE-NEXT:    evstdd r29, 24(r1) # 8-byte Folded Spill
1817; SPE-NEXT:    stw r30, 56(r1) # 4-byte Folded Spill
1818; SPE-NEXT:    evmergelo r29, r7, r8
1819; SPE-NEXT:    evmergelo r4, r5, r6
1820; SPE-NEXT:    mr r30, r3
1821; SPE-NEXT:    evmergehi r3, r4, r4
1822; SPE-NEXT:    bl sqrt
1823; SPE-NEXT:    evmergelo r28, r3, r4
1824; SPE-NEXT:    evmergehi r3, r29, r29
1825; SPE-NEXT:    mr r4, r29
1826; SPE-NEXT:    bl sqrt
1827; SPE-NEXT:    li r5, 8
1828; SPE-NEXT:    evmergelo r3, r3, r4
1829; SPE-NEXT:    evstddx r3, r30, r5
1830; SPE-NEXT:    evstdd r28, 0(r30)
1831; SPE-NEXT:    lwz r30, 56(r1) # 4-byte Folded Reload
1832; SPE-NEXT:    evldd r29, 24(r1) # 8-byte Folded Reload
1833; SPE-NEXT:    evldd r28, 16(r1) # 8-byte Folded Reload
1834; SPE-NEXT:    lwz r0, 68(r1)
1835; SPE-NEXT:    addi r1, r1, 64
1836; SPE-NEXT:    mtlr r0
1837; SPE-NEXT:    blr
1838  %res = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64(
1839                        <2 x double> %vf1,
1840                        metadata !"round.dynamic",
1841                        metadata !"fpexcept.strict") #0
1842  ret <2 x double> %res
1843}
1844
1845attributes #0 = { strictfp }
1846