xref: /llvm-project/llvm/test/CodeGen/PowerPC/f128-fma.ll (revision 032014ef103157bfd8403418538e25f3f58efa9d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \
3; RUN:   -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s
4; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \
5; RUN:   -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s \
6; RUN:   -check-prefix=CHECK-P8
7
8define void @qpFmadd(ptr nocapture readonly %a, ptr nocapture %b,
9; CHECK-LABEL: qpFmadd:
10; CHECK:       # %bb.0: # %entry
11; CHECK-NEXT:    lxv v2, 0(r3)
12; CHECK-NEXT:    lxv v3, 0(r4)
13; CHECK-NEXT:    lxv v4, 0(r5)
14; CHECK-NEXT:    xsmaddqp v4, v2, v3
15; CHECK-NEXT:    stxv v4, 0(r6)
16; CHECK-NEXT:    blr
17;
18; CHECK-P8-LABEL: qpFmadd:
19; CHECK-P8:       # %bb.0: # %entry
20; CHECK-P8-NEXT:    mflr r0
21; CHECK-P8-NEXT:    stdu r1, -80(r1)
22; CHECK-P8-NEXT:    std r0, 96(r1)
23; CHECK-P8-NEXT:    .cfi_def_cfa_offset 80
24; CHECK-P8-NEXT:    .cfi_offset lr, 16
25; CHECK-P8-NEXT:    .cfi_offset r30, -16
26; CHECK-P8-NEXT:    .cfi_offset v31, -32
27; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
28; CHECK-P8-NEXT:    li r7, 48
29; CHECK-P8-NEXT:    std r30, 64(r1) # 8-byte Folded Spill
30; CHECK-P8-NEXT:    mr r30, r6
31; CHECK-P8-NEXT:    stvx v31, r1, r7 # 16-byte Folded Spill
32; CHECK-P8-NEXT:    xxswapd v2, vs0
33; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
34; CHECK-P8-NEXT:    xxswapd v3, vs0
35; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
36; CHECK-P8-NEXT:    xxswapd v31, vs0
37; CHECK-P8-NEXT:    bl __mulkf3
38; CHECK-P8-NEXT:    nop
39; CHECK-P8-NEXT:    vmr v3, v31
40; CHECK-P8-NEXT:    bl __addkf3
41; CHECK-P8-NEXT:    nop
42; CHECK-P8-NEXT:    li r3, 48
43; CHECK-P8-NEXT:    xxswapd vs0, v2
44; CHECK-P8-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
45; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
46; CHECK-P8-NEXT:    ld r30, 64(r1) # 8-byte Folded Reload
47; CHECK-P8-NEXT:    addi r1, r1, 80
48; CHECK-P8-NEXT:    ld r0, 16(r1)
49; CHECK-P8-NEXT:    mtlr r0
50; CHECK-P8-NEXT:    blr
51                   ptr nocapture readonly %c, ptr nocapture %res) {
52entry:
53  %0 = load fp128, ptr %a, align 16
54  %1 = load fp128, ptr %b, align 16
55  %2 = load fp128, ptr %c, align 16
56  %madd = tail call fp128 @llvm.fmuladd.f128(fp128 %0, fp128 %1, fp128 %2)
57  store fp128 %madd, ptr %res, align 16
58  ret void
59}
60declare fp128 @llvm.fmuladd.f128(fp128, fp128, fp128)
61
62; Function Attrs: norecurse nounwind
63define void @qpFmadd_02(ptr nocapture readonly %a,
64; CHECK-LABEL: qpFmadd_02:
65; CHECK:       # %bb.0: # %entry
66; CHECK-NEXT:    lxv v2, 0(r3)
67; CHECK-NEXT:    lxv v3, 0(r4)
68; CHECK-NEXT:    lxv v4, 0(r5)
69; CHECK-NEXT:    xsmaddqp v2, v3, v4
70; CHECK-NEXT:    stxv v2, 0(r6)
71; CHECK-NEXT:    blr
72;
73; CHECK-P8-LABEL: qpFmadd_02:
74; CHECK-P8:       # %bb.0: # %entry
75; CHECK-P8-NEXT:    mflr r0
76; CHECK-P8-NEXT:    stdu r1, -80(r1)
77; CHECK-P8-NEXT:    std r0, 96(r1)
78; CHECK-P8-NEXT:    .cfi_def_cfa_offset 80
79; CHECK-P8-NEXT:    .cfi_offset lr, 16
80; CHECK-P8-NEXT:    .cfi_offset r30, -16
81; CHECK-P8-NEXT:    .cfi_offset v31, -32
82; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
83; CHECK-P8-NEXT:    li r7, 48
84; CHECK-P8-NEXT:    std r30, 64(r1) # 8-byte Folded Spill
85; CHECK-P8-NEXT:    mr r30, r6
86; CHECK-P8-NEXT:    stvx v31, r1, r7 # 16-byte Folded Spill
87; CHECK-P8-NEXT:    xxswapd v31, vs0
88; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
89; CHECK-P8-NEXT:    xxswapd v2, vs0
90; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
91; CHECK-P8-NEXT:    xxswapd v3, vs0
92; CHECK-P8-NEXT:    bl __mulkf3
93; CHECK-P8-NEXT:    nop
94; CHECK-P8-NEXT:    vmr v3, v2
95; CHECK-P8-NEXT:    vmr v2, v31
96; CHECK-P8-NEXT:    bl __addkf3
97; CHECK-P8-NEXT:    nop
98; CHECK-P8-NEXT:    li r3, 48
99; CHECK-P8-NEXT:    xxswapd vs0, v2
100; CHECK-P8-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
101; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
102; CHECK-P8-NEXT:    ld r30, 64(r1) # 8-byte Folded Reload
103; CHECK-P8-NEXT:    addi r1, r1, 80
104; CHECK-P8-NEXT:    ld r0, 16(r1)
105; CHECK-P8-NEXT:    mtlr r0
106; CHECK-P8-NEXT:    blr
107                        ptr nocapture readonly %b,
108                        ptr nocapture readonly %c, ptr nocapture %res) {
109entry:
110  %0 = load fp128, ptr %a, align 16
111  %1 = load fp128, ptr %b, align 16
112  %2 = load fp128, ptr %c, align 16
113  %mul = fmul contract fp128 %1, %2
114  %add = fadd contract fp128 %0, %mul
115  store fp128 %add, ptr %res, align 16
116  ret void
117}
118
119; Function Attrs: norecurse nounwind
120define void @qpFmadd_03(ptr nocapture readonly %a,
121; CHECK-LABEL: qpFmadd_03:
122; CHECK:       # %bb.0: # %entry
123; CHECK-NEXT:    lxv v2, 0(r3)
124; CHECK-NEXT:    lxv v3, 0(r4)
125; CHECK-NEXT:    lxv v4, 0(r5)
126; CHECK-NEXT:    xsmaddqp v4, v2, v3
127; CHECK-NEXT:    stxv v4, 0(r6)
128; CHECK-NEXT:    blr
129;
130; CHECK-P8-LABEL: qpFmadd_03:
131; CHECK-P8:       # %bb.0: # %entry
132; CHECK-P8-NEXT:    mflr r0
133; CHECK-P8-NEXT:    .cfi_def_cfa_offset 64
134; CHECK-P8-NEXT:    .cfi_offset lr, 16
135; CHECK-P8-NEXT:    .cfi_offset r29, -24
136; CHECK-P8-NEXT:    .cfi_offset r30, -16
137; CHECK-P8-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
138; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
139; CHECK-P8-NEXT:    stdu r1, -64(r1)
140; CHECK-P8-NEXT:    std r0, 80(r1)
141; CHECK-P8-NEXT:    mr r30, r6
142; CHECK-P8-NEXT:    mr r29, r5
143; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
144; CHECK-P8-NEXT:    xxswapd v2, vs0
145; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
146; CHECK-P8-NEXT:    xxswapd v3, vs0
147; CHECK-P8-NEXT:    bl __mulkf3
148; CHECK-P8-NEXT:    nop
149; CHECK-P8-NEXT:    lxvd2x vs0, 0, r29
150; CHECK-P8-NEXT:    xxswapd v3, vs0
151; CHECK-P8-NEXT:    bl __addkf3
152; CHECK-P8-NEXT:    nop
153; CHECK-P8-NEXT:    xxswapd vs0, v2
154; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
155; CHECK-P8-NEXT:    addi r1, r1, 64
156; CHECK-P8-NEXT:    ld r0, 16(r1)
157; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
158; CHECK-P8-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
159; CHECK-P8-NEXT:    mtlr r0
160; CHECK-P8-NEXT:    blr
161                        ptr nocapture readonly %b,
162                        ptr nocapture readonly %c, ptr nocapture %res) {
163entry:
164  %0 = load fp128, ptr %a, align 16
165  %1 = load fp128, ptr %b, align 16
166  %mul = fmul contract fp128 %0, %1
167  %2 = load fp128, ptr %c, align 16
168  %add = fadd contract fp128 %mul, %2
169  store fp128 %add, ptr %res, align 16
170  ret void
171}
172
173; Function Attrs: norecurse nounwind
174define void @qpFnmadd(ptr nocapture readonly %a,
175; CHECK-LABEL: qpFnmadd:
176; CHECK:       # %bb.0: # %entry
177; CHECK-NEXT:    lxv v2, 0(r3)
178; CHECK-NEXT:    lxv v3, 0(r4)
179; CHECK-NEXT:    lxv v4, 0(r5)
180; CHECK-NEXT:    xsnmaddqp v2, v3, v4
181; CHECK-NEXT:    stxv v2, 0(r6)
182; CHECK-NEXT:    blr
183;
184; CHECK-P8-LABEL: qpFnmadd:
185; CHECK-P8:       # %bb.0: # %entry
186; CHECK-P8-NEXT:    mflr r0
187; CHECK-P8-NEXT:    stdu r1, -96(r1)
188; CHECK-P8-NEXT:    std r0, 112(r1)
189; CHECK-P8-NEXT:    .cfi_def_cfa_offset 96
190; CHECK-P8-NEXT:    .cfi_offset lr, 16
191; CHECK-P8-NEXT:    .cfi_offset r30, -16
192; CHECK-P8-NEXT:    .cfi_offset v31, -32
193; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
194; CHECK-P8-NEXT:    li r7, 64
195; CHECK-P8-NEXT:    std r30, 80(r1) # 8-byte Folded Spill
196; CHECK-P8-NEXT:    mr r30, r6
197; CHECK-P8-NEXT:    stvx v31, r1, r7 # 16-byte Folded Spill
198; CHECK-P8-NEXT:    xxswapd v31, vs0
199; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
200; CHECK-P8-NEXT:    xxswapd v2, vs0
201; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
202; CHECK-P8-NEXT:    xxswapd v3, vs0
203; CHECK-P8-NEXT:    bl __mulkf3
204; CHECK-P8-NEXT:    nop
205; CHECK-P8-NEXT:    vmr v3, v2
206; CHECK-P8-NEXT:    vmr v2, v31
207; CHECK-P8-NEXT:    bl __addkf3
208; CHECK-P8-NEXT:    nop
209; CHECK-P8-NEXT:    xxswapd vs0, v2
210; CHECK-P8-NEXT:    addi r3, r1, 48
211; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
212; CHECK-P8-NEXT:    lbz r4, 63(r1)
213; CHECK-P8-NEXT:    xori r4, r4, 128
214; CHECK-P8-NEXT:    stb r4, 63(r1)
215; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
216; CHECK-P8-NEXT:    li r3, 64
217; CHECK-P8-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
218; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
219; CHECK-P8-NEXT:    ld r30, 80(r1) # 8-byte Folded Reload
220; CHECK-P8-NEXT:    addi r1, r1, 96
221; CHECK-P8-NEXT:    ld r0, 16(r1)
222; CHECK-P8-NEXT:    mtlr r0
223; CHECK-P8-NEXT:    blr
224                      ptr nocapture readonly %b,
225                      ptr nocapture readonly %c, ptr nocapture %res) {
226entry:
227  %0 = load fp128, ptr %a, align 16
228  %1 = load fp128, ptr %b, align 16
229  %2 = load fp128, ptr %c, align 16
230  %mul = fmul contract fp128 %1, %2
231  %add = fadd contract fp128 %0, %mul
232  %sub = fsub fp128 0xL00000000000000008000000000000000, %add
233  store fp128 %sub, ptr %res, align 16
234  ret void
235}
236
237; Function Attrs: norecurse nounwind
238define void @qpFnmadd_02(ptr nocapture readonly %a,
239; CHECK-LABEL: qpFnmadd_02:
240; CHECK:       # %bb.0: # %entry
241; CHECK-NEXT:    lxv v2, 0(r3)
242; CHECK-NEXT:    lxv v3, 0(r4)
243; CHECK-NEXT:    lxv v4, 0(r5)
244; CHECK-NEXT:    xsnmaddqp v4, v2, v3
245; CHECK-NEXT:    stxv v4, 0(r6)
246; CHECK-NEXT:    blr
247;
248; CHECK-P8-LABEL: qpFnmadd_02:
249; CHECK-P8:       # %bb.0: # %entry
250; CHECK-P8-NEXT:    mflr r0
251; CHECK-P8-NEXT:    .cfi_def_cfa_offset 80
252; CHECK-P8-NEXT:    .cfi_offset lr, 16
253; CHECK-P8-NEXT:    .cfi_offset r29, -24
254; CHECK-P8-NEXT:    .cfi_offset r30, -16
255; CHECK-P8-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
256; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
257; CHECK-P8-NEXT:    stdu r1, -80(r1)
258; CHECK-P8-NEXT:    std r0, 96(r1)
259; CHECK-P8-NEXT:    mr r30, r6
260; CHECK-P8-NEXT:    mr r29, r5
261; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
262; CHECK-P8-NEXT:    xxswapd v2, vs0
263; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
264; CHECK-P8-NEXT:    xxswapd v3, vs0
265; CHECK-P8-NEXT:    bl __mulkf3
266; CHECK-P8-NEXT:    nop
267; CHECK-P8-NEXT:    lxvd2x vs0, 0, r29
268; CHECK-P8-NEXT:    xxswapd v3, vs0
269; CHECK-P8-NEXT:    bl __addkf3
270; CHECK-P8-NEXT:    nop
271; CHECK-P8-NEXT:    xxswapd vs0, v2
272; CHECK-P8-NEXT:    addi r3, r1, 32
273; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
274; CHECK-P8-NEXT:    lbz r4, 47(r1)
275; CHECK-P8-NEXT:    xori r4, r4, 128
276; CHECK-P8-NEXT:    stb r4, 47(r1)
277; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
278; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
279; CHECK-P8-NEXT:    addi r1, r1, 80
280; CHECK-P8-NEXT:    ld r0, 16(r1)
281; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
282; CHECK-P8-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
283; CHECK-P8-NEXT:    mtlr r0
284; CHECK-P8-NEXT:    blr
285                      ptr nocapture readonly %b,
286                      ptr nocapture readonly %c, ptr nocapture %res) {
287entry:
288  %0 = load fp128, ptr %a, align 16
289  %1 = load fp128, ptr %b, align 16
290  %mul = fmul contract fp128 %0, %1
291  %2 = load fp128, ptr %c, align 16
292  %add = fadd contract fp128 %mul, %2
293  %sub = fsub fp128 0xL00000000000000008000000000000000, %add
294  store fp128 %sub, ptr %res, align 16
295  ret void
296}
297
298; Function Attrs: norecurse nounwind
299define void @qpFmsub(ptr nocapture readonly %a,
300; CHECK-LABEL: qpFmsub:
301; CHECK:       # %bb.0: # %entry
302; CHECK-NEXT:    lxv v2, 0(r3)
303; CHECK-NEXT:    lxv v3, 0(r4)
304; CHECK-NEXT:    lxv v4, 0(r5)
305; CHECK-NEXT:    xsnmsubqp v2, v3, v4
306; CHECK-NEXT:    stxv v2, 0(r6)
307; CHECK-NEXT:    blr
308;
309; CHECK-P8-LABEL: qpFmsub:
310; CHECK-P8:       # %bb.0: # %entry
311; CHECK-P8-NEXT:    mflr r0
312; CHECK-P8-NEXT:    stdu r1, -80(r1)
313; CHECK-P8-NEXT:    std r0, 96(r1)
314; CHECK-P8-NEXT:    .cfi_def_cfa_offset 80
315; CHECK-P8-NEXT:    .cfi_offset lr, 16
316; CHECK-P8-NEXT:    .cfi_offset r30, -16
317; CHECK-P8-NEXT:    .cfi_offset v31, -32
318; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
319; CHECK-P8-NEXT:    li r7, 48
320; CHECK-P8-NEXT:    std r30, 64(r1) # 8-byte Folded Spill
321; CHECK-P8-NEXT:    mr r30, r6
322; CHECK-P8-NEXT:    stvx v31, r1, r7 # 16-byte Folded Spill
323; CHECK-P8-NEXT:    xxswapd v31, vs0
324; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
325; CHECK-P8-NEXT:    xxswapd v2, vs0
326; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
327; CHECK-P8-NEXT:    xxswapd v3, vs0
328; CHECK-P8-NEXT:    bl __mulkf3
329; CHECK-P8-NEXT:    nop
330; CHECK-P8-NEXT:    vmr v3, v2
331; CHECK-P8-NEXT:    vmr v2, v31
332; CHECK-P8-NEXT:    bl __subkf3
333; CHECK-P8-NEXT:    nop
334; CHECK-P8-NEXT:    li r3, 48
335; CHECK-P8-NEXT:    xxswapd vs0, v2
336; CHECK-P8-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
337; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
338; CHECK-P8-NEXT:    ld r30, 64(r1) # 8-byte Folded Reload
339; CHECK-P8-NEXT:    addi r1, r1, 80
340; CHECK-P8-NEXT:    ld r0, 16(r1)
341; CHECK-P8-NEXT:    mtlr r0
342; CHECK-P8-NEXT:    blr
343                      ptr nocapture readonly %b,
344                      ptr nocapture readonly %c, ptr nocapture %res) {
345entry:
346  %0 = load fp128, ptr %a, align 16
347  %1 = load fp128, ptr %b, align 16
348  %2 = load fp128, ptr %c, align 16
349  %mul = fmul contract fp128 %1, %2
350  %sub = fsub contract nsz fp128 %0, %mul
351  store fp128 %sub, ptr %res, align 16
352  ret void
353}
354
355; Function Attrs: norecurse nounwind
356define void @qpFmsub_02(ptr nocapture readonly %a,
357; CHECK-LABEL: qpFmsub_02:
358; CHECK:       # %bb.0: # %entry
359; CHECK-NEXT:    lxv v2, 0(r3)
360; CHECK-NEXT:    lxv v3, 0(r4)
361; CHECK-NEXT:    lxv v4, 0(r5)
362; CHECK-NEXT:    xsmsubqp v4, v2, v3
363; CHECK-NEXT:    stxv v4, 0(r6)
364; CHECK-NEXT:    blr
365;
366; CHECK-P8-LABEL: qpFmsub_02:
367; CHECK-P8:       # %bb.0: # %entry
368; CHECK-P8-NEXT:    mflr r0
369; CHECK-P8-NEXT:    .cfi_def_cfa_offset 64
370; CHECK-P8-NEXT:    .cfi_offset lr, 16
371; CHECK-P8-NEXT:    .cfi_offset r29, -24
372; CHECK-P8-NEXT:    .cfi_offset r30, -16
373; CHECK-P8-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
374; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
375; CHECK-P8-NEXT:    stdu r1, -64(r1)
376; CHECK-P8-NEXT:    std r0, 80(r1)
377; CHECK-P8-NEXT:    mr r30, r6
378; CHECK-P8-NEXT:    mr r29, r5
379; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
380; CHECK-P8-NEXT:    xxswapd v2, vs0
381; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
382; CHECK-P8-NEXT:    xxswapd v3, vs0
383; CHECK-P8-NEXT:    bl __mulkf3
384; CHECK-P8-NEXT:    nop
385; CHECK-P8-NEXT:    lxvd2x vs0, 0, r29
386; CHECK-P8-NEXT:    xxswapd v3, vs0
387; CHECK-P8-NEXT:    bl __subkf3
388; CHECK-P8-NEXT:    nop
389; CHECK-P8-NEXT:    xxswapd vs0, v2
390; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
391; CHECK-P8-NEXT:    addi r1, r1, 64
392; CHECK-P8-NEXT:    ld r0, 16(r1)
393; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
394; CHECK-P8-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
395; CHECK-P8-NEXT:    mtlr r0
396; CHECK-P8-NEXT:    blr
397                      ptr nocapture readonly %b,
398                      ptr nocapture readonly %c, ptr nocapture %res) {
399entry:
400  %0 = load fp128, ptr %a, align 16
401  %1 = load fp128, ptr %b, align 16
402  %mul = fmul contract fp128 %0, %1
403  %2 = load fp128, ptr %c, align 16
404  %sub = fsub contract fp128 %mul, %2
405  store fp128 %sub, ptr %res, align 16
406  ret void
407}
408
409; Function Attrs: norecurse nounwind
410define void @qpFnmsub(ptr nocapture readonly %a,
411; CHECK-LABEL: qpFnmsub:
412; CHECK:       # %bb.0: # %entry
413; CHECK-NEXT:    lxv v3, 0(r4)
414; CHECK-NEXT:    lxv v2, 0(r3)
415; CHECK-NEXT:    lxv v4, 0(r5)
416; CHECK-NEXT:    xsnegqp v3, v3
417; CHECK-NEXT:    xsnmaddqp v2, v3, v4
418; CHECK-NEXT:    stxv v2, 0(r6)
419; CHECK-NEXT:    blr
420;
421; CHECK-P8-LABEL: qpFnmsub:
422; CHECK-P8:       # %bb.0: # %entry
423; CHECK-P8-NEXT:    mflr r0
424; CHECK-P8-NEXT:    stdu r1, -96(r1)
425; CHECK-P8-NEXT:    std r0, 112(r1)
426; CHECK-P8-NEXT:    .cfi_def_cfa_offset 96
427; CHECK-P8-NEXT:    .cfi_offset lr, 16
428; CHECK-P8-NEXT:    .cfi_offset r30, -16
429; CHECK-P8-NEXT:    .cfi_offset v31, -32
430; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
431; CHECK-P8-NEXT:    li r7, 64
432; CHECK-P8-NEXT:    std r30, 80(r1) # 8-byte Folded Spill
433; CHECK-P8-NEXT:    mr r30, r6
434; CHECK-P8-NEXT:    stvx v31, r1, r7 # 16-byte Folded Spill
435; CHECK-P8-NEXT:    xxswapd v31, vs0
436; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
437; CHECK-P8-NEXT:    xxswapd v2, vs0
438; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
439; CHECK-P8-NEXT:    xxswapd v3, vs0
440; CHECK-P8-NEXT:    bl __mulkf3
441; CHECK-P8-NEXT:    nop
442; CHECK-P8-NEXT:    vmr v3, v2
443; CHECK-P8-NEXT:    vmr v2, v31
444; CHECK-P8-NEXT:    bl __subkf3
445; CHECK-P8-NEXT:    nop
446; CHECK-P8-NEXT:    xxswapd vs0, v2
447; CHECK-P8-NEXT:    addi r3, r1, 48
448; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
449; CHECK-P8-NEXT:    lbz r4, 63(r1)
450; CHECK-P8-NEXT:    xori r4, r4, 128
451; CHECK-P8-NEXT:    stb r4, 63(r1)
452; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
453; CHECK-P8-NEXT:    li r3, 64
454; CHECK-P8-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
455; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
456; CHECK-P8-NEXT:    ld r30, 80(r1) # 8-byte Folded Reload
457; CHECK-P8-NEXT:    addi r1, r1, 96
458; CHECK-P8-NEXT:    ld r0, 16(r1)
459; CHECK-P8-NEXT:    mtlr r0
460; CHECK-P8-NEXT:    blr
461                      ptr nocapture readonly %b,
462                      ptr nocapture readonly %c, ptr nocapture %res) {
463entry:
464  %0 = load fp128, ptr %a, align 16
465  %1 = load fp128, ptr %b, align 16
466  %2 = load fp128, ptr %c, align 16
467  %mul = fmul contract fp128 %1, %2
468  %sub = fsub contract fp128 %0, %mul
469  %sub1 = fsub fp128 0xL00000000000000008000000000000000, %sub
470  store fp128 %sub1, ptr %res, align 16
471  ret void
472}
473
474; Function Attrs: norecurse nounwind
475define void @qpFnmsub_02(ptr nocapture readonly %a,
476; CHECK-LABEL: qpFnmsub_02:
477; CHECK:       # %bb.0: # %entry
478; CHECK-NEXT:    lxv v2, 0(r3)
479; CHECK-NEXT:    lxv v3, 0(r4)
480; CHECK-NEXT:    lxv v4, 0(r5)
481; CHECK-NEXT:    xsnmsubqp v4, v2, v3
482; CHECK-NEXT:    stxv v4, 0(r6)
483; CHECK-NEXT:    blr
484;
485; CHECK-P8-LABEL: qpFnmsub_02:
486; CHECK-P8:       # %bb.0: # %entry
487; CHECK-P8-NEXT:    mflr r0
488; CHECK-P8-NEXT:    .cfi_def_cfa_offset 80
489; CHECK-P8-NEXT:    .cfi_offset lr, 16
490; CHECK-P8-NEXT:    .cfi_offset r29, -24
491; CHECK-P8-NEXT:    .cfi_offset r30, -16
492; CHECK-P8-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
493; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
494; CHECK-P8-NEXT:    stdu r1, -80(r1)
495; CHECK-P8-NEXT:    std r0, 96(r1)
496; CHECK-P8-NEXT:    mr r30, r6
497; CHECK-P8-NEXT:    mr r29, r5
498; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
499; CHECK-P8-NEXT:    xxswapd v2, vs0
500; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
501; CHECK-P8-NEXT:    xxswapd v3, vs0
502; CHECK-P8-NEXT:    bl __mulkf3
503; CHECK-P8-NEXT:    nop
504; CHECK-P8-NEXT:    lxvd2x vs0, 0, r29
505; CHECK-P8-NEXT:    xxswapd v3, vs0
506; CHECK-P8-NEXT:    bl __subkf3
507; CHECK-P8-NEXT:    nop
508; CHECK-P8-NEXT:    xxswapd vs0, v2
509; CHECK-P8-NEXT:    addi r3, r1, 32
510; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
511; CHECK-P8-NEXT:    lbz r4, 47(r1)
512; CHECK-P8-NEXT:    xori r4, r4, 128
513; CHECK-P8-NEXT:    stb r4, 47(r1)
514; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
515; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
516; CHECK-P8-NEXT:    addi r1, r1, 80
517; CHECK-P8-NEXT:    ld r0, 16(r1)
518; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
519; CHECK-P8-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
520; CHECK-P8-NEXT:    mtlr r0
521; CHECK-P8-NEXT:    blr
522                      ptr nocapture readonly %b,
523                      ptr nocapture readonly %c, ptr nocapture %res) {
524entry:
525  %0 = load fp128, ptr %a, align 16
526  %1 = load fp128, ptr %b, align 16
527  %mul = fmul contract fp128 %0, %1
528  %2 = load fp128, ptr %c, align 16
529  %sub = fsub contract fp128 %mul, %2
530  %sub1 = fsub fp128 0xL00000000000000008000000000000000, %sub
531  store fp128 %sub1, ptr %res, align 16
532  ret void
533}
534