xref: /llvm-project/llvm/test/CodeGen/PowerPC/f128-arith.ll (revision a7dafea384a519342b2fbe210ed101c1e67f3be7)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs \
3; RUN:   -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s
4; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs \
5; RUN:   -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s \
6; RUN:   -check-prefix=CHECK-P8
7
8; Function Attrs: norecurse nounwind
9define dso_local void @qpAdd(ptr nocapture readonly %a, ptr nocapture %res) {
10; CHECK-LABEL: qpAdd:
11; CHECK:       # %bb.0: # %entry
12; CHECK-NEXT:    lxv v2, 0(r3)
13; CHECK-NEXT:    xsaddqp v2, v2, v2
14; CHECK-NEXT:    stxv v2, 0(r4)
15; CHECK-NEXT:    blr
16;
17; CHECK-P8-LABEL: qpAdd:
18; CHECK-P8:       # %bb.0: # %entry
19; CHECK-P8-NEXT:    mflr r0
20; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
21; CHECK-P8-NEXT:    .cfi_offset lr, 16
22; CHECK-P8-NEXT:    .cfi_offset r30, -16
23; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
24; CHECK-P8-NEXT:    stdu r1, -48(r1)
25; CHECK-P8-NEXT:    std r0, 64(r1)
26; CHECK-P8-NEXT:    mr r30, r4
27; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
28; CHECK-P8-NEXT:    xxswapd v2, vs0
29; CHECK-P8-NEXT:    vmr v3, v2
30; CHECK-P8-NEXT:    bl __addkf3
31; CHECK-P8-NEXT:    nop
32; CHECK-P8-NEXT:    xxswapd vs0, v2
33; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
34; CHECK-P8-NEXT:    addi r1, r1, 48
35; CHECK-P8-NEXT:    ld r0, 16(r1)
36; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
37; CHECK-P8-NEXT:    mtlr r0
38; CHECK-P8-NEXT:    blr
39entry:
40  %0 = load fp128, ptr %a, align 16
41  %add = fadd fp128 %0, %0
42  store fp128 %add, ptr %res, align 16
43  ret void
44}
45
46; Function Attrs: norecurse nounwind
47define dso_local void @qpSub(ptr nocapture readonly %a, ptr nocapture %res) {
48; CHECK-LABEL: qpSub:
49; CHECK:       # %bb.0: # %entry
50; CHECK-NEXT:    lxv v2, 0(r3)
51; CHECK-NEXT:    xssubqp v2, v2, v2
52; CHECK-NEXT:    stxv v2, 0(r4)
53; CHECK-NEXT:    blr
54;
55; CHECK-P8-LABEL: qpSub:
56; CHECK-P8:       # %bb.0: # %entry
57; CHECK-P8-NEXT:    mflr r0
58; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
59; CHECK-P8-NEXT:    .cfi_offset lr, 16
60; CHECK-P8-NEXT:    .cfi_offset r30, -16
61; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
62; CHECK-P8-NEXT:    stdu r1, -48(r1)
63; CHECK-P8-NEXT:    std r0, 64(r1)
64; CHECK-P8-NEXT:    mr r30, r4
65; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
66; CHECK-P8-NEXT:    xxswapd v2, vs0
67; CHECK-P8-NEXT:    vmr v3, v2
68; CHECK-P8-NEXT:    bl __subkf3
69; CHECK-P8-NEXT:    nop
70; CHECK-P8-NEXT:    xxswapd vs0, v2
71; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
72; CHECK-P8-NEXT:    addi r1, r1, 48
73; CHECK-P8-NEXT:    ld r0, 16(r1)
74; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
75; CHECK-P8-NEXT:    mtlr r0
76; CHECK-P8-NEXT:    blr
77entry:
78  %0 = load fp128, ptr %a, align 16
79  %sub = fsub fp128 %0, %0
80  store fp128 %sub, ptr %res, align 16
81  ret void
82}
83
84; Function Attrs: norecurse nounwind
85define dso_local void @qpMul(ptr nocapture readonly %a, ptr nocapture %res) {
86; CHECK-LABEL: qpMul:
87; CHECK:       # %bb.0: # %entry
88; CHECK-NEXT:    lxv v2, 0(r3)
89; CHECK-NEXT:    xsmulqp v2, v2, v2
90; CHECK-NEXT:    stxv v2, 0(r4)
91; CHECK-NEXT:    blr
92;
93; CHECK-P8-LABEL: qpMul:
94; CHECK-P8:       # %bb.0: # %entry
95; CHECK-P8-NEXT:    mflr r0
96; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
97; CHECK-P8-NEXT:    .cfi_offset lr, 16
98; CHECK-P8-NEXT:    .cfi_offset r30, -16
99; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
100; CHECK-P8-NEXT:    stdu r1, -48(r1)
101; CHECK-P8-NEXT:    std r0, 64(r1)
102; CHECK-P8-NEXT:    mr r30, r4
103; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
104; CHECK-P8-NEXT:    xxswapd v2, vs0
105; CHECK-P8-NEXT:    vmr v3, v2
106; CHECK-P8-NEXT:    bl __mulkf3
107; CHECK-P8-NEXT:    nop
108; CHECK-P8-NEXT:    xxswapd vs0, v2
109; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
110; CHECK-P8-NEXT:    addi r1, r1, 48
111; CHECK-P8-NEXT:    ld r0, 16(r1)
112; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
113; CHECK-P8-NEXT:    mtlr r0
114; CHECK-P8-NEXT:    blr
115entry:
116  %0 = load fp128, ptr %a, align 16
117  %mul = fmul fp128 %0, %0
118  store fp128 %mul, ptr %res, align 16
119  ret void
120}
121
122; Function Attrs: norecurse nounwind
123define dso_local void @qpDiv(ptr nocapture readonly %a, ptr nocapture %res) {
124; CHECK-LABEL: qpDiv:
125; CHECK:       # %bb.0: # %entry
126; CHECK-NEXT:    lxv v2, 0(r3)
127; CHECK-NEXT:    xsdivqp v2, v2, v2
128; CHECK-NEXT:    stxv v2, 0(r4)
129; CHECK-NEXT:    blr
130;
131; CHECK-P8-LABEL: qpDiv:
132; CHECK-P8:       # %bb.0: # %entry
133; CHECK-P8-NEXT:    mflr r0
134; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
135; CHECK-P8-NEXT:    .cfi_offset lr, 16
136; CHECK-P8-NEXT:    .cfi_offset r30, -16
137; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
138; CHECK-P8-NEXT:    stdu r1, -48(r1)
139; CHECK-P8-NEXT:    std r0, 64(r1)
140; CHECK-P8-NEXT:    mr r30, r4
141; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
142; CHECK-P8-NEXT:    xxswapd v2, vs0
143; CHECK-P8-NEXT:    vmr v3, v2
144; CHECK-P8-NEXT:    bl __divkf3
145; CHECK-P8-NEXT:    nop
146; CHECK-P8-NEXT:    xxswapd vs0, v2
147; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
148; CHECK-P8-NEXT:    addi r1, r1, 48
149; CHECK-P8-NEXT:    ld r0, 16(r1)
150; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
151; CHECK-P8-NEXT:    mtlr r0
152; CHECK-P8-NEXT:    blr
153entry:
154  %0 = load fp128, ptr %a, align 16
155  %div = fdiv fp128 %0, %0
156  store fp128 %div, ptr %res, align 16
157  ret void
158}
159
160define dso_local void @testLdNSt(ptr nocapture readonly %PtrC, ptr nocapture %PtrF) {
161; CHECK-LABEL: testLdNSt:
162; CHECK:       # %bb.0: # %entry
163; CHECK-NEXT:    li r5, 4
164; CHECK-NEXT:    lxvx vs0, r3, r5
165; CHECK-NEXT:    li r3, 8
166; CHECK-NEXT:    stxvx vs0, r4, r3
167; CHECK-NEXT:    blr
168;
169; CHECK-P8-LABEL: testLdNSt:
170; CHECK-P8:       # %bb.0: # %entry
171; CHECK-P8-NEXT:    addi r3, r3, 4
172; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
173; CHECK-P8-NEXT:    addi r3, r4, 8
174; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
175; CHECK-P8-NEXT:    blr
176entry:
177  %add.ptr = getelementptr inbounds i8, ptr %PtrC, i64 4
178  %0 = load fp128, ptr %add.ptr, align 16
179  %add.ptr1 = getelementptr inbounds i8, ptr %PtrF, i64 8
180  store fp128 %0, ptr %add.ptr1, align 16
181  ret void
182}
183
184define dso_local void @qpSqrt(ptr nocapture readonly %a, ptr nocapture %res) {
185; CHECK-LABEL: qpSqrt:
186; CHECK:       # %bb.0: # %entry
187; CHECK-NEXT:    lxv v2, 0(r3)
188; CHECK-NEXT:    xssqrtqp v2, v2
189; CHECK-NEXT:    stxv v2, 0(r4)
190; CHECK-NEXT:    blr
191;
192; CHECK-P8-LABEL: qpSqrt:
193; CHECK-P8:       # %bb.0: # %entry
194; CHECK-P8-NEXT:    mflr r0
195; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
196; CHECK-P8-NEXT:    .cfi_offset lr, 16
197; CHECK-P8-NEXT:    .cfi_offset r30, -16
198; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
199; CHECK-P8-NEXT:    stdu r1, -48(r1)
200; CHECK-P8-NEXT:    std r0, 64(r1)
201; CHECK-P8-NEXT:    mr r30, r4
202; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
203; CHECK-P8-NEXT:    xxswapd v2, vs0
204; CHECK-P8-NEXT:    bl sqrtf128
205; CHECK-P8-NEXT:    nop
206; CHECK-P8-NEXT:    xxswapd vs0, v2
207; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
208; CHECK-P8-NEXT:    addi r1, r1, 48
209; CHECK-P8-NEXT:    ld r0, 16(r1)
210; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
211; CHECK-P8-NEXT:    mtlr r0
212; CHECK-P8-NEXT:    blr
213entry:
214  %0 = load fp128, ptr %a, align 16
215  %1 = tail call fp128 @llvm.sqrt.f128(fp128 %0)
216  store fp128 %1, ptr %res, align 16
217  ret void
218
219}
220declare fp128 @llvm.sqrt.f128(fp128 %Val)
221
222define dso_local void @qpCpsgn(ptr nocapture readonly %a, ptr nocapture readonly %b,
223; CHECK-LABEL: qpCpsgn:
224; CHECK:       # %bb.0: # %entry
225; CHECK-NEXT:    lxv v2, 0(r3)
226; CHECK-NEXT:    lxv v3, 0(r4)
227; CHECK-NEXT:    xscpsgnqp v2, v3, v2
228; CHECK-NEXT:    stxv v2, 0(r5)
229; CHECK-NEXT:    blr
230;
231; CHECK-P8-LABEL: qpCpsgn:
232; CHECK-P8:       # %bb.0: # %entry
233; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
234; CHECK-P8-NEXT:    addi r4, r1, -16
235; CHECK-P8-NEXT:    stxvd2x vs0, 0, r4
236; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
237; CHECK-P8-NEXT:    addi r3, r1, -32
238; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
239; CHECK-P8-NEXT:    lbz r4, -1(r1)
240; CHECK-P8-NEXT:    lbz r6, -17(r1)
241; CHECK-P8-NEXT:    rlwimi r6, r4, 0, 0, 24
242; CHECK-P8-NEXT:    stb r6, -17(r1)
243; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
244; CHECK-P8-NEXT:    stxvd2x vs0, 0, r5
245; CHECK-P8-NEXT:    blr
246                     ptr nocapture %res) {
247entry:
248  %0 = load fp128, ptr %a, align 16
249  %1 = load fp128, ptr %b, align 16
250  %2 = tail call fp128 @llvm.copysign.f128(fp128 %0, fp128 %1)
251  store fp128 %2, ptr %res, align 16
252  ret void
253
254}
255declare fp128 @llvm.copysign.f128(fp128 %Mag, fp128 %Sgn)
256
257define dso_local void @qpAbs(ptr nocapture readonly %a, ptr nocapture %res) {
258; CHECK-LABEL: qpAbs:
259; CHECK:       # %bb.0: # %entry
260; CHECK-NEXT:    lxv v2, 0(r3)
261; CHECK-NEXT:    xsabsqp v2, v2
262; CHECK-NEXT:    stxv v2, 0(r4)
263; CHECK-NEXT:    blr
264;
265; CHECK-P8-LABEL: qpAbs:
266; CHECK-P8:       # %bb.0: # %entry
267; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
268; CHECK-P8-NEXT:    addi r3, r1, -16
269; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
270; CHECK-P8-NEXT:    lbz r5, -1(r1)
271; CHECK-P8-NEXT:    clrlwi r5, r5, 25
272; CHECK-P8-NEXT:    stb r5, -1(r1)
273; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
274; CHECK-P8-NEXT:    stxvd2x vs0, 0, r4
275; CHECK-P8-NEXT:    blr
276entry:
277  %0 = load fp128, ptr %a, align 16
278  %1 = tail call fp128 @llvm.fabs.f128(fp128 %0)
279  store fp128 %1, ptr %res, align 16
280  ret void
281
282}
283declare fp128 @llvm.fabs.f128(fp128 %Val)
284
285define dso_local void @qpNAbs(ptr nocapture readonly %a, ptr nocapture %res) {
286; CHECK-LABEL: qpNAbs:
287; CHECK:       # %bb.0: # %entry
288; CHECK-NEXT:    lxv v2, 0(r3)
289; CHECK-NEXT:    xsnabsqp v2, v2
290; CHECK-NEXT:    stxv v2, 0(r4)
291; CHECK-NEXT:    blr
292;
293; CHECK-P8-LABEL: qpNAbs:
294; CHECK-P8:       # %bb.0: # %entry
295; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
296; CHECK-P8-NEXT:    addi r3, r1, -32
297; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
298; CHECK-P8-NEXT:    lbz r5, -17(r1)
299; CHECK-P8-NEXT:    clrlwi r5, r5, 25
300; CHECK-P8-NEXT:    stb r5, -17(r1)
301; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
302; CHECK-P8-NEXT:    addi r3, r1, -16
303; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
304; CHECK-P8-NEXT:    lbz r5, -1(r1)
305; CHECK-P8-NEXT:    xori r5, r5, 128
306; CHECK-P8-NEXT:    stb r5, -1(r1)
307; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
308; CHECK-P8-NEXT:    stxvd2x vs0, 0, r4
309; CHECK-P8-NEXT:    blr
310entry:
311  %0 = load fp128, ptr %a, align 16
312  %1 = tail call fp128 @llvm.fabs.f128(fp128 %0)
313  %neg = fsub fp128 0xL00000000000000008000000000000000, %1
314  store fp128 %neg, ptr %res, align 16
315  ret void
316
317}
318
319define dso_local void @qpNeg(ptr nocapture readonly %a, ptr nocapture %res) {
320; CHECK-LABEL: qpNeg:
321; CHECK:       # %bb.0: # %entry
322; CHECK-NEXT:    lxv v2, 0(r3)
323; CHECK-NEXT:    xsnegqp v2, v2
324; CHECK-NEXT:    stxv v2, 0(r4)
325; CHECK-NEXT:    blr
326;
327; CHECK-P8-LABEL: qpNeg:
328; CHECK-P8:       # %bb.0: # %entry
329; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
330; CHECK-P8-NEXT:    addi r3, r1, -16
331; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
332; CHECK-P8-NEXT:    lbz r5, -1(r1)
333; CHECK-P8-NEXT:    xori r5, r5, 128
334; CHECK-P8-NEXT:    stb r5, -1(r1)
335; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
336; CHECK-P8-NEXT:    stxvd2x vs0, 0, r4
337; CHECK-P8-NEXT:    blr
338entry:
339  %0 = load fp128, ptr %a, align 16
340  %sub = fsub fp128 0xL00000000000000008000000000000000, %0
341  store fp128 %sub, ptr %res, align 16
342  ret void
343
344}
345
346define fp128 @qp_sin(ptr nocapture readonly %a) {
347; CHECK-LABEL: qp_sin:
348; CHECK:       # %bb.0: # %entry
349; CHECK-NEXT:    mflr r0
350; CHECK-NEXT:    stdu r1, -32(r1)
351; CHECK-NEXT:    std r0, 48(r1)
352; CHECK-NEXT:    .cfi_def_cfa_offset 32
353; CHECK-NEXT:    .cfi_offset lr, 16
354; CHECK-NEXT:    lxv v2, 0(r3)
355; CHECK-NEXT:    bl sinf128
356; CHECK-NEXT:    nop
357; CHECK-NEXT:    addi r1, r1, 32
358; CHECK-NEXT:    ld r0, 16(r1)
359; CHECK-NEXT:    mtlr r0
360; CHECK-NEXT:    blr
361;
362; CHECK-P8-LABEL: qp_sin:
363; CHECK-P8:       # %bb.0: # %entry
364; CHECK-P8-NEXT:    mflr r0
365; CHECK-P8-NEXT:    stdu r1, -32(r1)
366; CHECK-P8-NEXT:    std r0, 48(r1)
367; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
368; CHECK-P8-NEXT:    .cfi_offset lr, 16
369; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
370; CHECK-P8-NEXT:    xxswapd v2, vs0
371; CHECK-P8-NEXT:    bl sinf128
372; CHECK-P8-NEXT:    nop
373; CHECK-P8-NEXT:    addi r1, r1, 32
374; CHECK-P8-NEXT:    ld r0, 16(r1)
375; CHECK-P8-NEXT:    mtlr r0
376; CHECK-P8-NEXT:    blr
377entry:
378  %0 = load fp128, ptr %a, align 16
379  %1 = tail call fp128 @llvm.sin.f128(fp128 %0)
380  ret fp128 %1
381}
382declare fp128 @llvm.sin.f128(fp128 %Val)
383
384define fp128 @qp_cos(ptr nocapture readonly %a) {
385; CHECK-LABEL: qp_cos:
386; CHECK:       # %bb.0: # %entry
387; CHECK-NEXT:    mflr r0
388; CHECK-NEXT:    stdu r1, -32(r1)
389; CHECK-NEXT:    std r0, 48(r1)
390; CHECK-NEXT:    .cfi_def_cfa_offset 32
391; CHECK-NEXT:    .cfi_offset lr, 16
392; CHECK-NEXT:    lxv v2, 0(r3)
393; CHECK-NEXT:    bl cosf128
394; CHECK-NEXT:    nop
395; CHECK-NEXT:    addi r1, r1, 32
396; CHECK-NEXT:    ld r0, 16(r1)
397; CHECK-NEXT:    mtlr r0
398; CHECK-NEXT:    blr
399;
400; CHECK-P8-LABEL: qp_cos:
401; CHECK-P8:       # %bb.0: # %entry
402; CHECK-P8-NEXT:    mflr r0
403; CHECK-P8-NEXT:    stdu r1, -32(r1)
404; CHECK-P8-NEXT:    std r0, 48(r1)
405; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
406; CHECK-P8-NEXT:    .cfi_offset lr, 16
407; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
408; CHECK-P8-NEXT:    xxswapd v2, vs0
409; CHECK-P8-NEXT:    bl cosf128
410; CHECK-P8-NEXT:    nop
411; CHECK-P8-NEXT:    addi r1, r1, 32
412; CHECK-P8-NEXT:    ld r0, 16(r1)
413; CHECK-P8-NEXT:    mtlr r0
414; CHECK-P8-NEXT:    blr
415entry:
416  %0 = load fp128, ptr %a, align 16
417  %1 = tail call fp128 @llvm.cos.f128(fp128 %0)
418  ret fp128 %1
419}
420declare fp128 @llvm.cos.f128(fp128 %Val)
421
422define fp128 @qp_sincos(ptr nocapture readonly %a) nounwind {
423; CHECK-LABEL: qp_sincos:
424; CHECK:       # %bb.0: # %entry
425; CHECK-NEXT:    mflr r0
426; CHECK-NEXT:    stdu r1, -64(r1)
427; CHECK-NEXT:    std r0, 80(r1)
428; CHECK-NEXT:    addi r5, r1, 48
429; CHECK-NEXT:    addi r6, r1, 32
430; CHECK-NEXT:    lxv v2, 0(r3)
431; CHECK-NEXT:    bl sincosf128
432; CHECK-NEXT:    nop
433; CHECK-NEXT:    lxv v2, 48(r1)
434; CHECK-NEXT:    lxv v3, 32(r1)
435; CHECK-NEXT:    xsmulqp v2, v3, v2
436; CHECK-NEXT:    addi r1, r1, 64
437; CHECK-NEXT:    ld r0, 16(r1)
438; CHECK-NEXT:    mtlr r0
439; CHECK-NEXT:    blr
440;
441; CHECK-P8-LABEL: qp_sincos:
442; CHECK-P8:       # %bb.0: # %entry
443; CHECK-P8-NEXT:    mflr r0
444; CHECK-P8-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
445; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
446; CHECK-P8-NEXT:    stdu r1, -96(r1)
447; CHECK-P8-NEXT:    std r0, 112(r1)
448; CHECK-P8-NEXT:    addi r30, r1, 48
449; CHECK-P8-NEXT:    addi r29, r1, 32
450; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
451; CHECK-P8-NEXT:    mr r5, r30
452; CHECK-P8-NEXT:    mr r6, r29
453; CHECK-P8-NEXT:    xxswapd v2, vs0
454; CHECK-P8-NEXT:    bl sincosf128
455; CHECK-P8-NEXT:    nop
456; CHECK-P8-NEXT:    lxvd2x vs0, 0, r29
457; CHECK-P8-NEXT:    xxswapd v2, vs0
458; CHECK-P8-NEXT:    lxvd2x vs0, 0, r30
459; CHECK-P8-NEXT:    xxswapd v3, vs0
460; CHECK-P8-NEXT:    bl __mulkf3
461; CHECK-P8-NEXT:    nop
462; CHECK-P8-NEXT:    addi r1, r1, 96
463; CHECK-P8-NEXT:    ld r0, 16(r1)
464; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
465; CHECK-P8-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
466; CHECK-P8-NEXT:    mtlr r0
467; CHECK-P8-NEXT:    blr
468entry:
469  %0 = load fp128, ptr %a, align 16
470  %1 = tail call fp128 @llvm.cos.f128(fp128 %0)
471  %2 = tail call fp128 @llvm.sin.f128(fp128 %0)
472  %3 = fmul fp128 %1, %2
473  ret fp128 %3
474}
475
476define fp128 @qp_log(ptr nocapture readonly %a) {
477; CHECK-LABEL: qp_log:
478; CHECK:       # %bb.0: # %entry
479; CHECK-NEXT:    mflr r0
480; CHECK-NEXT:    stdu r1, -32(r1)
481; CHECK-NEXT:    std r0, 48(r1)
482; CHECK-NEXT:    .cfi_def_cfa_offset 32
483; CHECK-NEXT:    .cfi_offset lr, 16
484; CHECK-NEXT:    lxv v2, 0(r3)
485; CHECK-NEXT:    bl logf128
486; CHECK-NEXT:    nop
487; CHECK-NEXT:    addi r1, r1, 32
488; CHECK-NEXT:    ld r0, 16(r1)
489; CHECK-NEXT:    mtlr r0
490; CHECK-NEXT:    blr
491;
492; CHECK-P8-LABEL: qp_log:
493; CHECK-P8:       # %bb.0: # %entry
494; CHECK-P8-NEXT:    mflr r0
495; CHECK-P8-NEXT:    stdu r1, -32(r1)
496; CHECK-P8-NEXT:    std r0, 48(r1)
497; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
498; CHECK-P8-NEXT:    .cfi_offset lr, 16
499; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
500; CHECK-P8-NEXT:    xxswapd v2, vs0
501; CHECK-P8-NEXT:    bl logf128
502; CHECK-P8-NEXT:    nop
503; CHECK-P8-NEXT:    addi r1, r1, 32
504; CHECK-P8-NEXT:    ld r0, 16(r1)
505; CHECK-P8-NEXT:    mtlr r0
506; CHECK-P8-NEXT:    blr
507entry:
508  %0 = load fp128, ptr %a, align 16
509  %1 = tail call fp128 @llvm.log.f128(fp128 %0)
510  ret fp128 %1
511}
512declare fp128     @llvm.log.f128(fp128 %Val)
513
514define fp128 @qp_log10(ptr nocapture readonly %a) {
515; CHECK-LABEL: qp_log10:
516; CHECK:       # %bb.0: # %entry
517; CHECK-NEXT:    mflr r0
518; CHECK-NEXT:    stdu r1, -32(r1)
519; CHECK-NEXT:    std r0, 48(r1)
520; CHECK-NEXT:    .cfi_def_cfa_offset 32
521; CHECK-NEXT:    .cfi_offset lr, 16
522; CHECK-NEXT:    lxv v2, 0(r3)
523; CHECK-NEXT:    bl log10f128
524; CHECK-NEXT:    nop
525; CHECK-NEXT:    addi r1, r1, 32
526; CHECK-NEXT:    ld r0, 16(r1)
527; CHECK-NEXT:    mtlr r0
528; CHECK-NEXT:    blr
529;
530; CHECK-P8-LABEL: qp_log10:
531; CHECK-P8:       # %bb.0: # %entry
532; CHECK-P8-NEXT:    mflr r0
533; CHECK-P8-NEXT:    stdu r1, -32(r1)
534; CHECK-P8-NEXT:    std r0, 48(r1)
535; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
536; CHECK-P8-NEXT:    .cfi_offset lr, 16
537; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
538; CHECK-P8-NEXT:    xxswapd v2, vs0
539; CHECK-P8-NEXT:    bl log10f128
540; CHECK-P8-NEXT:    nop
541; CHECK-P8-NEXT:    addi r1, r1, 32
542; CHECK-P8-NEXT:    ld r0, 16(r1)
543; CHECK-P8-NEXT:    mtlr r0
544; CHECK-P8-NEXT:    blr
545entry:
546  %0 = load fp128, ptr %a, align 16
547  %1 = tail call fp128 @llvm.log10.f128(fp128 %0)
548  ret fp128 %1
549}
550declare fp128     @llvm.log10.f128(fp128 %Val)
551
552define fp128 @qp_log2(ptr nocapture readonly %a) {
553; CHECK-LABEL: qp_log2:
554; CHECK:       # %bb.0: # %entry
555; CHECK-NEXT:    mflr r0
556; CHECK-NEXT:    stdu r1, -32(r1)
557; CHECK-NEXT:    std r0, 48(r1)
558; CHECK-NEXT:    .cfi_def_cfa_offset 32
559; CHECK-NEXT:    .cfi_offset lr, 16
560; CHECK-NEXT:    lxv v2, 0(r3)
561; CHECK-NEXT:    bl log2f128
562; CHECK-NEXT:    nop
563; CHECK-NEXT:    addi r1, r1, 32
564; CHECK-NEXT:    ld r0, 16(r1)
565; CHECK-NEXT:    mtlr r0
566; CHECK-NEXT:    blr
567;
568; CHECK-P8-LABEL: qp_log2:
569; CHECK-P8:       # %bb.0: # %entry
570; CHECK-P8-NEXT:    mflr r0
571; CHECK-P8-NEXT:    stdu r1, -32(r1)
572; CHECK-P8-NEXT:    std r0, 48(r1)
573; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
574; CHECK-P8-NEXT:    .cfi_offset lr, 16
575; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
576; CHECK-P8-NEXT:    xxswapd v2, vs0
577; CHECK-P8-NEXT:    bl log2f128
578; CHECK-P8-NEXT:    nop
579; CHECK-P8-NEXT:    addi r1, r1, 32
580; CHECK-P8-NEXT:    ld r0, 16(r1)
581; CHECK-P8-NEXT:    mtlr r0
582; CHECK-P8-NEXT:    blr
583entry:
584  %0 = load fp128, ptr %a, align 16
585  %1 = tail call fp128 @llvm.log2.f128(fp128 %0)
586  ret fp128 %1
587}
588declare fp128     @llvm.log2.f128(fp128 %Val)
589
590define fp128 @qp_minnum(ptr nocapture readonly %a,
591; CHECK-LABEL: qp_minnum:
592; CHECK:       # %bb.0: # %entry
593; CHECK-NEXT:    mflr r0
594; CHECK-NEXT:    stdu r1, -32(r1)
595; CHECK-NEXT:    std r0, 48(r1)
596; CHECK-NEXT:    .cfi_def_cfa_offset 32
597; CHECK-NEXT:    .cfi_offset lr, 16
598; CHECK-NEXT:    lxv v2, 0(r3)
599; CHECK-NEXT:    lxv v3, 0(r4)
600; CHECK-NEXT:    bl fminf128
601; CHECK-NEXT:    nop
602; CHECK-NEXT:    addi r1, r1, 32
603; CHECK-NEXT:    ld r0, 16(r1)
604; CHECK-NEXT:    mtlr r0
605; CHECK-NEXT:    blr
606;
607; CHECK-P8-LABEL: qp_minnum:
608; CHECK-P8:       # %bb.0: # %entry
609; CHECK-P8-NEXT:    mflr r0
610; CHECK-P8-NEXT:    stdu r1, -32(r1)
611; CHECK-P8-NEXT:    std r0, 48(r1)
612; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
613; CHECK-P8-NEXT:    .cfi_offset lr, 16
614; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
615; CHECK-P8-NEXT:    xxswapd v2, vs0
616; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
617; CHECK-P8-NEXT:    xxswapd v3, vs0
618; CHECK-P8-NEXT:    bl fminf128
619; CHECK-P8-NEXT:    nop
620; CHECK-P8-NEXT:    addi r1, r1, 32
621; CHECK-P8-NEXT:    ld r0, 16(r1)
622; CHECK-P8-NEXT:    mtlr r0
623; CHECK-P8-NEXT:    blr
624                        ptr nocapture readonly %b) {
625entry:
626  %0 = load fp128, ptr %a, align 16
627  %1 = load fp128, ptr %b, align 16
628  %2 = tail call fp128 @llvm.minnum.f128(fp128 %0, fp128 %1)
629  ret fp128 %2
630}
631declare fp128     @llvm.minnum.f128(fp128 %Val0, fp128 %Val1)
632
633define fp128 @qp_maxnum(ptr nocapture readonly %a,
634; CHECK-LABEL: qp_maxnum:
635; CHECK:       # %bb.0: # %entry
636; CHECK-NEXT:    mflr r0
637; CHECK-NEXT:    stdu r1, -32(r1)
638; CHECK-NEXT:    std r0, 48(r1)
639; CHECK-NEXT:    .cfi_def_cfa_offset 32
640; CHECK-NEXT:    .cfi_offset lr, 16
641; CHECK-NEXT:    lxv v2, 0(r3)
642; CHECK-NEXT:    lxv v3, 0(r4)
643; CHECK-NEXT:    bl fmaxf128
644; CHECK-NEXT:    nop
645; CHECK-NEXT:    addi r1, r1, 32
646; CHECK-NEXT:    ld r0, 16(r1)
647; CHECK-NEXT:    mtlr r0
648; CHECK-NEXT:    blr
649;
650; CHECK-P8-LABEL: qp_maxnum:
651; CHECK-P8:       # %bb.0: # %entry
652; CHECK-P8-NEXT:    mflr r0
653; CHECK-P8-NEXT:    stdu r1, -32(r1)
654; CHECK-P8-NEXT:    std r0, 48(r1)
655; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
656; CHECK-P8-NEXT:    .cfi_offset lr, 16
657; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
658; CHECK-P8-NEXT:    xxswapd v2, vs0
659; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
660; CHECK-P8-NEXT:    xxswapd v3, vs0
661; CHECK-P8-NEXT:    bl fmaxf128
662; CHECK-P8-NEXT:    nop
663; CHECK-P8-NEXT:    addi r1, r1, 32
664; CHECK-P8-NEXT:    ld r0, 16(r1)
665; CHECK-P8-NEXT:    mtlr r0
666; CHECK-P8-NEXT:    blr
667                        ptr nocapture readonly %b) {
668entry:
669  %0 = load fp128, ptr %a, align 16
670  %1 = load fp128, ptr %b, align 16
671  %2 = tail call fp128 @llvm.maxnum.f128(fp128 %0, fp128 %1)
672  ret fp128 %2
673}
674declare fp128     @llvm.maxnum.f128(fp128 %Val0, fp128 %Val1)
675
676define fp128 @qp_pow(ptr nocapture readonly %a,
677; CHECK-LABEL: qp_pow:
678; CHECK:       # %bb.0: # %entry
679; CHECK-NEXT:    mflr r0
680; CHECK-NEXT:    stdu r1, -32(r1)
681; CHECK-NEXT:    std r0, 48(r1)
682; CHECK-NEXT:    .cfi_def_cfa_offset 32
683; CHECK-NEXT:    .cfi_offset lr, 16
684; CHECK-NEXT:    lxv v2, 0(r3)
685; CHECK-NEXT:    lxv v3, 0(r4)
686; CHECK-NEXT:    bl powf128
687; CHECK-NEXT:    nop
688; CHECK-NEXT:    addi r1, r1, 32
689; CHECK-NEXT:    ld r0, 16(r1)
690; CHECK-NEXT:    mtlr r0
691; CHECK-NEXT:    blr
692;
693; CHECK-P8-LABEL: qp_pow:
694; CHECK-P8:       # %bb.0: # %entry
695; CHECK-P8-NEXT:    mflr r0
696; CHECK-P8-NEXT:    stdu r1, -32(r1)
697; CHECK-P8-NEXT:    std r0, 48(r1)
698; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
699; CHECK-P8-NEXT:    .cfi_offset lr, 16
700; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
701; CHECK-P8-NEXT:    xxswapd v2, vs0
702; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
703; CHECK-P8-NEXT:    xxswapd v3, vs0
704; CHECK-P8-NEXT:    bl powf128
705; CHECK-P8-NEXT:    nop
706; CHECK-P8-NEXT:    addi r1, r1, 32
707; CHECK-P8-NEXT:    ld r0, 16(r1)
708; CHECK-P8-NEXT:    mtlr r0
709; CHECK-P8-NEXT:    blr
710                     ptr nocapture readonly %b) {
711entry:
712  %0 = load fp128, ptr %a, align 16
713  %1 = load fp128, ptr %b, align 16
714  %2 = tail call fp128 @llvm.pow.f128(fp128 %0, fp128 %1)
715  ret fp128 %2
716}
717declare fp128 @llvm.pow.f128(fp128 %Val, fp128 %Power)
718
719define fp128 @qp_exp(ptr nocapture readonly %a) {
720; CHECK-LABEL: qp_exp:
721; CHECK:       # %bb.0: # %entry
722; CHECK-NEXT:    mflr r0
723; CHECK-NEXT:    stdu r1, -32(r1)
724; CHECK-NEXT:    std r0, 48(r1)
725; CHECK-NEXT:    .cfi_def_cfa_offset 32
726; CHECK-NEXT:    .cfi_offset lr, 16
727; CHECK-NEXT:    lxv v2, 0(r3)
728; CHECK-NEXT:    bl expf128
729; CHECK-NEXT:    nop
730; CHECK-NEXT:    addi r1, r1, 32
731; CHECK-NEXT:    ld r0, 16(r1)
732; CHECK-NEXT:    mtlr r0
733; CHECK-NEXT:    blr
734;
735; CHECK-P8-LABEL: qp_exp:
736; CHECK-P8:       # %bb.0: # %entry
737; CHECK-P8-NEXT:    mflr r0
738; CHECK-P8-NEXT:    stdu r1, -32(r1)
739; CHECK-P8-NEXT:    std r0, 48(r1)
740; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
741; CHECK-P8-NEXT:    .cfi_offset lr, 16
742; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
743; CHECK-P8-NEXT:    xxswapd v2, vs0
744; CHECK-P8-NEXT:    bl expf128
745; CHECK-P8-NEXT:    nop
746; CHECK-P8-NEXT:    addi r1, r1, 32
747; CHECK-P8-NEXT:    ld r0, 16(r1)
748; CHECK-P8-NEXT:    mtlr r0
749; CHECK-P8-NEXT:    blr
750entry:
751  %0 = load fp128, ptr %a, align 16
752  %1 = tail call fp128 @llvm.exp.f128(fp128 %0)
753  ret fp128 %1
754}
755declare fp128     @llvm.exp.f128(fp128 %Val)
756
757define fp128 @qp_exp2(ptr nocapture readonly %a) {
758; CHECK-LABEL: qp_exp2:
759; CHECK:       # %bb.0: # %entry
760; CHECK-NEXT:    mflr r0
761; CHECK-NEXT:    stdu r1, -32(r1)
762; CHECK-NEXT:    std r0, 48(r1)
763; CHECK-NEXT:    .cfi_def_cfa_offset 32
764; CHECK-NEXT:    .cfi_offset lr, 16
765; CHECK-NEXT:    lxv v2, 0(r3)
766; CHECK-NEXT:    bl exp2f128
767; CHECK-NEXT:    nop
768; CHECK-NEXT:    addi r1, r1, 32
769; CHECK-NEXT:    ld r0, 16(r1)
770; CHECK-NEXT:    mtlr r0
771; CHECK-NEXT:    blr
772;
773; CHECK-P8-LABEL: qp_exp2:
774; CHECK-P8:       # %bb.0: # %entry
775; CHECK-P8-NEXT:    mflr r0
776; CHECK-P8-NEXT:    stdu r1, -32(r1)
777; CHECK-P8-NEXT:    std r0, 48(r1)
778; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
779; CHECK-P8-NEXT:    .cfi_offset lr, 16
780; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
781; CHECK-P8-NEXT:    xxswapd v2, vs0
782; CHECK-P8-NEXT:    bl exp2f128
783; CHECK-P8-NEXT:    nop
784; CHECK-P8-NEXT:    addi r1, r1, 32
785; CHECK-P8-NEXT:    ld r0, 16(r1)
786; CHECK-P8-NEXT:    mtlr r0
787; CHECK-P8-NEXT:    blr
788entry:
789  %0 = load fp128, ptr %a, align 16
790  %1 = tail call fp128 @llvm.exp2.f128(fp128 %0)
791  ret fp128 %1
792}
793declare fp128     @llvm.exp2.f128(fp128 %Val)
794
795define dso_local void @qp_powi(ptr nocapture readonly %a, ptr nocapture readonly %b,
796; CHECK-LABEL: qp_powi:
797; CHECK:       # %bb.0: # %entry
798; CHECK-NEXT:    mflr r0
799; CHECK-NEXT:    .cfi_def_cfa_offset 48
800; CHECK-NEXT:    .cfi_offset lr, 16
801; CHECK-NEXT:    .cfi_offset r30, -16
802; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
803; CHECK-NEXT:    stdu r1, -48(r1)
804; CHECK-NEXT:    std r0, 64(r1)
805; CHECK-NEXT:    mr r30, r5
806; CHECK-NEXT:    lxv v2, 0(r3)
807; CHECK-NEXT:    lwz r5, 0(r4)
808; CHECK-NEXT:    bl __powikf2
809; CHECK-NEXT:    nop
810; CHECK-NEXT:    stxv v2, 0(r30)
811; CHECK-NEXT:    addi r1, r1, 48
812; CHECK-NEXT:    ld r0, 16(r1)
813; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
814; CHECK-NEXT:    mtlr r0
815; CHECK-NEXT:    blr
816;
817; CHECK-P8-LABEL: qp_powi:
818; CHECK-P8:       # %bb.0: # %entry
819; CHECK-P8-NEXT:    mflr r0
820; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
821; CHECK-P8-NEXT:    .cfi_offset lr, 16
822; CHECK-P8-NEXT:    .cfi_offset r30, -16
823; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
824; CHECK-P8-NEXT:    stdu r1, -48(r1)
825; CHECK-P8-NEXT:    std r0, 64(r1)
826; CHECK-P8-NEXT:    mr r30, r5
827; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
828; CHECK-P8-NEXT:    lwz r5, 0(r4)
829; CHECK-P8-NEXT:    xxswapd v2, vs0
830; CHECK-P8-NEXT:    bl __powikf2
831; CHECK-P8-NEXT:    nop
832; CHECK-P8-NEXT:    xxswapd vs0, v2
833; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
834; CHECK-P8-NEXT:    addi r1, r1, 48
835; CHECK-P8-NEXT:    ld r0, 16(r1)
836; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
837; CHECK-P8-NEXT:    mtlr r0
838; CHECK-P8-NEXT:    blr
839                     ptr nocapture %res) {
840entry:
841  %0 = load fp128, ptr %a, align 16
842  %1 = load i32, ptr %b, align 8
843  %2 = tail call fp128 @llvm.powi.f128.i32(fp128 %0, i32 %1)
844  store fp128 %2, ptr %res, align 16
845  ret void
846}
847declare fp128 @llvm.powi.f128.i32(fp128 %Val, i32 %power)
848
849@a = common dso_local global fp128 0xL00000000000000000000000000000000, align 16
850@b = common dso_local global fp128 0xL00000000000000000000000000000000, align 16
851
852define fp128 @qp_frem() #0 {
853; CHECK-LABEL: qp_frem:
854; CHECK:       # %bb.0: # %entry
855; CHECK-NEXT:    mflr r0
856; CHECK-NEXT:    stdu r1, -32(r1)
857; CHECK-NEXT:    std r0, 48(r1)
858; CHECK-NEXT:    .cfi_def_cfa_offset 32
859; CHECK-NEXT:    .cfi_offset lr, 16
860; CHECK-NEXT:    addis r3, r2, a@toc@ha
861; CHECK-NEXT:    addi r3, r3, a@toc@l
862; CHECK-NEXT:    lxv v2, 0(r3)
863; CHECK-NEXT:    addis r3, r2, b@toc@ha
864; CHECK-NEXT:    addi r3, r3, b@toc@l
865; CHECK-NEXT:    lxv v3, 0(r3)
866; CHECK-NEXT:    bl fmodf128
867; CHECK-NEXT:    nop
868; CHECK-NEXT:    addi r1, r1, 32
869; CHECK-NEXT:    ld r0, 16(r1)
870; CHECK-NEXT:    mtlr r0
871; CHECK-NEXT:    blr
872;
873; CHECK-P8-LABEL: qp_frem:
874; CHECK-P8:       # %bb.0: # %entry
875; CHECK-P8-NEXT:    mflr r0
876; CHECK-P8-NEXT:    stdu r1, -32(r1)
877; CHECK-P8-NEXT:    std r0, 48(r1)
878; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
879; CHECK-P8-NEXT:    .cfi_offset lr, 16
880; CHECK-P8-NEXT:    addis r3, r2, a@toc@ha
881; CHECK-P8-NEXT:    addi r3, r3, a@toc@l
882; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
883; CHECK-P8-NEXT:    addis r3, r2, b@toc@ha
884; CHECK-P8-NEXT:    addi r3, r3, b@toc@l
885; CHECK-P8-NEXT:    xxswapd v2, vs0
886; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
887; CHECK-P8-NEXT:    xxswapd v3, vs0
888; CHECK-P8-NEXT:    bl fmodf128
889; CHECK-P8-NEXT:    nop
890; CHECK-P8-NEXT:    addi r1, r1, 32
891; CHECK-P8-NEXT:    ld r0, 16(r1)
892; CHECK-P8-NEXT:    mtlr r0
893; CHECK-P8-NEXT:    blr
894entry:
895  %0 = load fp128, ptr @a, align 16
896  %1 = load fp128, ptr @b, align 16
897  %rem = frem fp128 %0, %1
898  ret fp128 %rem
899}
900
901define dso_local void @qpCeil(ptr nocapture readonly %a, ptr nocapture %res) {
902; CHECK-LABEL: qpCeil:
903; CHECK:       # %bb.0: # %entry
904; CHECK-NEXT:    lxv v2, 0(r3)
905; CHECK-NEXT:    xsrqpi 1, v2, v2, 2
906; CHECK-NEXT:    stxv v2, 0(r4)
907; CHECK-NEXT:    blr
908;
909; CHECK-P8-LABEL: qpCeil:
910; CHECK-P8:       # %bb.0: # %entry
911; CHECK-P8-NEXT:    mflr r0
912; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
913; CHECK-P8-NEXT:    .cfi_offset lr, 16
914; CHECK-P8-NEXT:    .cfi_offset r30, -16
915; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
916; CHECK-P8-NEXT:    stdu r1, -48(r1)
917; CHECK-P8-NEXT:    std r0, 64(r1)
918; CHECK-P8-NEXT:    mr r30, r4
919; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
920; CHECK-P8-NEXT:    xxswapd v2, vs0
921; CHECK-P8-NEXT:    bl ceilf128
922; CHECK-P8-NEXT:    nop
923; CHECK-P8-NEXT:    xxswapd vs0, v2
924; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
925; CHECK-P8-NEXT:    addi r1, r1, 48
926; CHECK-P8-NEXT:    ld r0, 16(r1)
927; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
928; CHECK-P8-NEXT:    mtlr r0
929; CHECK-P8-NEXT:    blr
930entry:
931  %0 = load fp128, ptr %a, align 16
932  %1 = tail call fp128 @llvm.ceil.f128(fp128 %0)
933  store fp128 %1, ptr %res, align 16
934  ret void
935}
936declare fp128 @llvm.ceil.f128(fp128 %Val)
937
938define dso_local void @qpFloor(ptr nocapture readonly %a, ptr nocapture %res) {
939; CHECK-LABEL: qpFloor:
940; CHECK:       # %bb.0: # %entry
941; CHECK-NEXT:    lxv v2, 0(r3)
942; CHECK-NEXT:    xsrqpi 1, v2, v2, 3
943; CHECK-NEXT:    stxv v2, 0(r4)
944; CHECK-NEXT:    blr
945;
946; CHECK-P8-LABEL: qpFloor:
947; CHECK-P8:       # %bb.0: # %entry
948; CHECK-P8-NEXT:    mflr r0
949; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
950; CHECK-P8-NEXT:    .cfi_offset lr, 16
951; CHECK-P8-NEXT:    .cfi_offset r30, -16
952; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
953; CHECK-P8-NEXT:    stdu r1, -48(r1)
954; CHECK-P8-NEXT:    std r0, 64(r1)
955; CHECK-P8-NEXT:    mr r30, r4
956; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
957; CHECK-P8-NEXT:    xxswapd v2, vs0
958; CHECK-P8-NEXT:    bl floorf128
959; CHECK-P8-NEXT:    nop
960; CHECK-P8-NEXT:    xxswapd vs0, v2
961; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
962; CHECK-P8-NEXT:    addi r1, r1, 48
963; CHECK-P8-NEXT:    ld r0, 16(r1)
964; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
965; CHECK-P8-NEXT:    mtlr r0
966; CHECK-P8-NEXT:    blr
967entry:
968  %0 = load fp128, ptr %a, align 16
969  %1 = tail call fp128 @llvm.floor.f128(fp128 %0)
970  store fp128 %1, ptr %res, align 16
971  ret void
972}
973declare fp128 @llvm.floor.f128(fp128 %Val)
974
975define dso_local void @qpTrunc(ptr nocapture readonly %a, ptr nocapture %res) {
976; CHECK-LABEL: qpTrunc:
977; CHECK:       # %bb.0: # %entry
978; CHECK-NEXT:    lxv v2, 0(r3)
979; CHECK-NEXT:    xsrqpi 1, v2, v2, 1
980; CHECK-NEXT:    stxv v2, 0(r4)
981; CHECK-NEXT:    blr
982;
983; CHECK-P8-LABEL: qpTrunc:
984; CHECK-P8:       # %bb.0: # %entry
985; CHECK-P8-NEXT:    mflr r0
986; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
987; CHECK-P8-NEXT:    .cfi_offset lr, 16
988; CHECK-P8-NEXT:    .cfi_offset r30, -16
989; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
990; CHECK-P8-NEXT:    stdu r1, -48(r1)
991; CHECK-P8-NEXT:    std r0, 64(r1)
992; CHECK-P8-NEXT:    mr r30, r4
993; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
994; CHECK-P8-NEXT:    xxswapd v2, vs0
995; CHECK-P8-NEXT:    bl truncf128
996; CHECK-P8-NEXT:    nop
997; CHECK-P8-NEXT:    xxswapd vs0, v2
998; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
999; CHECK-P8-NEXT:    addi r1, r1, 48
1000; CHECK-P8-NEXT:    ld r0, 16(r1)
1001; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
1002; CHECK-P8-NEXT:    mtlr r0
1003; CHECK-P8-NEXT:    blr
1004entry:
1005  %0 = load fp128, ptr %a, align 16
1006  %1 = tail call fp128 @llvm.trunc.f128(fp128 %0)
1007  store fp128 %1, ptr %res, align 16
1008  ret void
1009}
1010declare fp128 @llvm.trunc.f128(fp128 %Val)
1011
1012define dso_local void @qpRound(ptr nocapture readonly %a, ptr nocapture %res) {
1013; CHECK-LABEL: qpRound:
1014; CHECK:       # %bb.0: # %entry
1015; CHECK-NEXT:    lxv v2, 0(r3)
1016; CHECK-NEXT:    xsrqpi 0, v2, v2, 0
1017; CHECK-NEXT:    stxv v2, 0(r4)
1018; CHECK-NEXT:    blr
1019;
1020; CHECK-P8-LABEL: qpRound:
1021; CHECK-P8:       # %bb.0: # %entry
1022; CHECK-P8-NEXT:    mflr r0
1023; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
1024; CHECK-P8-NEXT:    .cfi_offset lr, 16
1025; CHECK-P8-NEXT:    .cfi_offset r30, -16
1026; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
1027; CHECK-P8-NEXT:    stdu r1, -48(r1)
1028; CHECK-P8-NEXT:    std r0, 64(r1)
1029; CHECK-P8-NEXT:    mr r30, r4
1030; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
1031; CHECK-P8-NEXT:    xxswapd v2, vs0
1032; CHECK-P8-NEXT:    bl roundf128
1033; CHECK-P8-NEXT:    nop
1034; CHECK-P8-NEXT:    xxswapd vs0, v2
1035; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
1036; CHECK-P8-NEXT:    addi r1, r1, 48
1037; CHECK-P8-NEXT:    ld r0, 16(r1)
1038; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
1039; CHECK-P8-NEXT:    mtlr r0
1040; CHECK-P8-NEXT:    blr
1041entry:
1042  %0 = load fp128, ptr %a, align 16
1043  %1 = tail call fp128 @llvm.round.f128(fp128 %0)
1044  store fp128 %1, ptr %res, align 16
1045  ret void
1046}
1047declare fp128 @llvm.round.f128(fp128 %Val)
1048
1049define dso_local void @qpLRound(ptr nocapture readonly %a, ptr nocapture %res) {
1050; CHECK-LABEL: qpLRound:
1051; CHECK:       # %bb.0: # %entry
1052; CHECK-NEXT:    mflr r0
1053; CHECK-NEXT:    .cfi_def_cfa_offset 48
1054; CHECK-NEXT:    .cfi_offset lr, 16
1055; CHECK-NEXT:    .cfi_offset r30, -16
1056; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
1057; CHECK-NEXT:    stdu r1, -48(r1)
1058; CHECK-NEXT:    std r0, 64(r1)
1059; CHECK-NEXT:    mr r30, r4
1060; CHECK-NEXT:    lxv v2, 0(r3)
1061; CHECK-NEXT:    bl lroundf128
1062; CHECK-NEXT:    nop
1063; CHECK-NEXT:    stw r3, 0(r30)
1064; CHECK-NEXT:    addi r1, r1, 48
1065; CHECK-NEXT:    ld r0, 16(r1)
1066; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
1067; CHECK-NEXT:    mtlr r0
1068; CHECK-NEXT:    blr
1069;
1070; CHECK-P8-LABEL: qpLRound:
1071; CHECK-P8:       # %bb.0: # %entry
1072; CHECK-P8-NEXT:    mflr r0
1073; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
1074; CHECK-P8-NEXT:    .cfi_offset lr, 16
1075; CHECK-P8-NEXT:    .cfi_offset r30, -16
1076; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
1077; CHECK-P8-NEXT:    stdu r1, -48(r1)
1078; CHECK-P8-NEXT:    std r0, 64(r1)
1079; CHECK-P8-NEXT:    mr r30, r4
1080; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
1081; CHECK-P8-NEXT:    xxswapd v2, vs0
1082; CHECK-P8-NEXT:    bl lroundf128
1083; CHECK-P8-NEXT:    nop
1084; CHECK-P8-NEXT:    stw r3, 0(r30)
1085; CHECK-P8-NEXT:    addi r1, r1, 48
1086; CHECK-P8-NEXT:    ld r0, 16(r1)
1087; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
1088; CHECK-P8-NEXT:    mtlr r0
1089; CHECK-P8-NEXT:    blr
1090entry:
1091  %0 = load fp128, ptr %a, align 16
1092  %1 = tail call i32 @llvm.lround.f128(fp128 %0)
1093  store i32 %1, ptr %res, align 16
1094  ret void
1095}
1096declare i32 @llvm.lround.f128(fp128 %Val)
1097
1098define dso_local void @qpLLRound(ptr nocapture readonly %a, ptr nocapture %res) {
1099; CHECK-LABEL: qpLLRound:
1100; CHECK:       # %bb.0: # %entry
1101; CHECK-NEXT:    mflr r0
1102; CHECK-NEXT:    .cfi_def_cfa_offset 48
1103; CHECK-NEXT:    .cfi_offset lr, 16
1104; CHECK-NEXT:    .cfi_offset r30, -16
1105; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
1106; CHECK-NEXT:    stdu r1, -48(r1)
1107; CHECK-NEXT:    std r0, 64(r1)
1108; CHECK-NEXT:    mr r30, r4
1109; CHECK-NEXT:    lxv v2, 0(r3)
1110; CHECK-NEXT:    bl llroundf128
1111; CHECK-NEXT:    nop
1112; CHECK-NEXT:    std r3, 0(r30)
1113; CHECK-NEXT:    addi r1, r1, 48
1114; CHECK-NEXT:    ld r0, 16(r1)
1115; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
1116; CHECK-NEXT:    mtlr r0
1117; CHECK-NEXT:    blr
1118;
1119; CHECK-P8-LABEL: qpLLRound:
1120; CHECK-P8:       # %bb.0: # %entry
1121; CHECK-P8-NEXT:    mflr r0
1122; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
1123; CHECK-P8-NEXT:    .cfi_offset lr, 16
1124; CHECK-P8-NEXT:    .cfi_offset r30, -16
1125; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
1126; CHECK-P8-NEXT:    stdu r1, -48(r1)
1127; CHECK-P8-NEXT:    std r0, 64(r1)
1128; CHECK-P8-NEXT:    mr r30, r4
1129; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
1130; CHECK-P8-NEXT:    xxswapd v2, vs0
1131; CHECK-P8-NEXT:    bl llroundf128
1132; CHECK-P8-NEXT:    nop
1133; CHECK-P8-NEXT:    std r3, 0(r30)
1134; CHECK-P8-NEXT:    addi r1, r1, 48
1135; CHECK-P8-NEXT:    ld r0, 16(r1)
1136; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
1137; CHECK-P8-NEXT:    mtlr r0
1138; CHECK-P8-NEXT:    blr
1139entry:
1140  %0 = load fp128, ptr %a, align 16
1141  %1 = tail call i64 @llvm.llround.f128(fp128 %0)
1142  store i64 %1, ptr %res, align 16
1143  ret void
1144}
1145declare i64 @llvm.llround.f128(fp128 %Val)
1146
1147define dso_local void @qpRint(ptr nocapture readonly %a, ptr nocapture %res) {
1148; CHECK-LABEL: qpRint:
1149; CHECK:       # %bb.0: # %entry
1150; CHECK-NEXT:    lxv v2, 0(r3)
1151; CHECK-NEXT:    xsrqpix 0, v2, v2, 3
1152; CHECK-NEXT:    stxv v2, 0(r4)
1153; CHECK-NEXT:    blr
1154;
1155; CHECK-P8-LABEL: qpRint:
1156; CHECK-P8:       # %bb.0: # %entry
1157; CHECK-P8-NEXT:    mflr r0
1158; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
1159; CHECK-P8-NEXT:    .cfi_offset lr, 16
1160; CHECK-P8-NEXT:    .cfi_offset r30, -16
1161; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
1162; CHECK-P8-NEXT:    stdu r1, -48(r1)
1163; CHECK-P8-NEXT:    std r0, 64(r1)
1164; CHECK-P8-NEXT:    mr r30, r4
1165; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
1166; CHECK-P8-NEXT:    xxswapd v2, vs0
1167; CHECK-P8-NEXT:    bl rintf128
1168; CHECK-P8-NEXT:    nop
1169; CHECK-P8-NEXT:    xxswapd vs0, v2
1170; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
1171; CHECK-P8-NEXT:    addi r1, r1, 48
1172; CHECK-P8-NEXT:    ld r0, 16(r1)
1173; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
1174; CHECK-P8-NEXT:    mtlr r0
1175; CHECK-P8-NEXT:    blr
1176entry:
1177  %0 = load fp128, ptr %a, align 16
1178  %1 = tail call fp128 @llvm.rint.f128(fp128 %0)
1179  store fp128 %1, ptr %res, align 16
1180  ret void
1181}
1182declare fp128 @llvm.rint.f128(fp128 %Val)
1183
1184define dso_local void @qpLRint(ptr nocapture readonly %a, ptr nocapture %res) {
1185; CHECK-LABEL: qpLRint:
1186; CHECK:       # %bb.0: # %entry
1187; CHECK-NEXT:    mflr r0
1188; CHECK-NEXT:    .cfi_def_cfa_offset 48
1189; CHECK-NEXT:    .cfi_offset lr, 16
1190; CHECK-NEXT:    .cfi_offset r30, -16
1191; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
1192; CHECK-NEXT:    stdu r1, -48(r1)
1193; CHECK-NEXT:    std r0, 64(r1)
1194; CHECK-NEXT:    mr r30, r4
1195; CHECK-NEXT:    lxv v2, 0(r3)
1196; CHECK-NEXT:    bl lrintf128
1197; CHECK-NEXT:    nop
1198; CHECK-NEXT:    stw r3, 0(r30)
1199; CHECK-NEXT:    addi r1, r1, 48
1200; CHECK-NEXT:    ld r0, 16(r1)
1201; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
1202; CHECK-NEXT:    mtlr r0
1203; CHECK-NEXT:    blr
1204;
1205; CHECK-P8-LABEL: qpLRint:
1206; CHECK-P8:       # %bb.0: # %entry
1207; CHECK-P8-NEXT:    mflr r0
1208; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
1209; CHECK-P8-NEXT:    .cfi_offset lr, 16
1210; CHECK-P8-NEXT:    .cfi_offset r30, -16
1211; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
1212; CHECK-P8-NEXT:    stdu r1, -48(r1)
1213; CHECK-P8-NEXT:    std r0, 64(r1)
1214; CHECK-P8-NEXT:    mr r30, r4
1215; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
1216; CHECK-P8-NEXT:    xxswapd v2, vs0
1217; CHECK-P8-NEXT:    bl lrintf128
1218; CHECK-P8-NEXT:    nop
1219; CHECK-P8-NEXT:    stw r3, 0(r30)
1220; CHECK-P8-NEXT:    addi r1, r1, 48
1221; CHECK-P8-NEXT:    ld r0, 16(r1)
1222; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
1223; CHECK-P8-NEXT:    mtlr r0
1224; CHECK-P8-NEXT:    blr
1225entry:
1226  %0 = load fp128, ptr %a, align 16
1227  %1 = tail call i32 @llvm.lrint.f128(fp128 %0)
1228  store i32 %1, ptr %res, align 16
1229  ret void
1230}
1231declare i32 @llvm.lrint.f128(fp128 %Val)
1232
1233define dso_local void @qpLLRint(ptr nocapture readonly %a, ptr nocapture %res) {
1234; CHECK-LABEL: qpLLRint:
1235; CHECK:       # %bb.0: # %entry
1236; CHECK-NEXT:    mflr r0
1237; CHECK-NEXT:    .cfi_def_cfa_offset 48
1238; CHECK-NEXT:    .cfi_offset lr, 16
1239; CHECK-NEXT:    .cfi_offset r30, -16
1240; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
1241; CHECK-NEXT:    stdu r1, -48(r1)
1242; CHECK-NEXT:    std r0, 64(r1)
1243; CHECK-NEXT:    mr r30, r4
1244; CHECK-NEXT:    lxv v2, 0(r3)
1245; CHECK-NEXT:    bl llrintf128
1246; CHECK-NEXT:    nop
1247; CHECK-NEXT:    std r3, 0(r30)
1248; CHECK-NEXT:    addi r1, r1, 48
1249; CHECK-NEXT:    ld r0, 16(r1)
1250; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
1251; CHECK-NEXT:    mtlr r0
1252; CHECK-NEXT:    blr
1253;
1254; CHECK-P8-LABEL: qpLLRint:
1255; CHECK-P8:       # %bb.0: # %entry
1256; CHECK-P8-NEXT:    mflr r0
1257; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
1258; CHECK-P8-NEXT:    .cfi_offset lr, 16
1259; CHECK-P8-NEXT:    .cfi_offset r30, -16
1260; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
1261; CHECK-P8-NEXT:    stdu r1, -48(r1)
1262; CHECK-P8-NEXT:    std r0, 64(r1)
1263; CHECK-P8-NEXT:    mr r30, r4
1264; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
1265; CHECK-P8-NEXT:    xxswapd v2, vs0
1266; CHECK-P8-NEXT:    bl llrintf128
1267; CHECK-P8-NEXT:    nop
1268; CHECK-P8-NEXT:    std r3, 0(r30)
1269; CHECK-P8-NEXT:    addi r1, r1, 48
1270; CHECK-P8-NEXT:    ld r0, 16(r1)
1271; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
1272; CHECK-P8-NEXT:    mtlr r0
1273; CHECK-P8-NEXT:    blr
1274entry:
1275  %0 = load fp128, ptr %a, align 16
1276  %1 = tail call i64 @llvm.llrint.f128(fp128 %0)
1277  store i64 %1, ptr %res, align 16
1278  ret void
1279}
1280declare i64 @llvm.llrint.f128(fp128 %Val)
1281
1282define dso_local void @qpNearByInt(ptr nocapture readonly %a, ptr nocapture %res) {
1283; CHECK-LABEL: qpNearByInt:
1284; CHECK:       # %bb.0: # %entry
1285; CHECK-NEXT:    lxv v2, 0(r3)
1286; CHECK-NEXT:    xsrqpi 0, v2, v2, 3
1287; CHECK-NEXT:    stxv v2, 0(r4)
1288; CHECK-NEXT:    blr
1289;
1290; CHECK-P8-LABEL: qpNearByInt:
1291; CHECK-P8:       # %bb.0: # %entry
1292; CHECK-P8-NEXT:    mflr r0
1293; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
1294; CHECK-P8-NEXT:    .cfi_offset lr, 16
1295; CHECK-P8-NEXT:    .cfi_offset r30, -16
1296; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
1297; CHECK-P8-NEXT:    stdu r1, -48(r1)
1298; CHECK-P8-NEXT:    std r0, 64(r1)
1299; CHECK-P8-NEXT:    mr r30, r4
1300; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
1301; CHECK-P8-NEXT:    xxswapd v2, vs0
1302; CHECK-P8-NEXT:    bl nearbyintf128
1303; CHECK-P8-NEXT:    nop
1304; CHECK-P8-NEXT:    xxswapd vs0, v2
1305; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
1306; CHECK-P8-NEXT:    addi r1, r1, 48
1307; CHECK-P8-NEXT:    ld r0, 16(r1)
1308; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
1309; CHECK-P8-NEXT:    mtlr r0
1310; CHECK-P8-NEXT:    blr
1311entry:
1312  %0 = load fp128, ptr %a, align 16
1313  %1 = tail call fp128 @llvm.nearbyint.f128(fp128 %0)
1314  store fp128 %1, ptr %res, align 16
1315  ret void
1316}
1317declare fp128 @llvm.nearbyint.f128(fp128 %Val)
1318
1319define dso_local void @qpFMA(ptr %a, ptr %b, ptr %c, ptr %res) {
1320; CHECK-LABEL: qpFMA:
1321; CHECK:       # %bb.0: # %entry
1322; CHECK-NEXT:    lxv v2, 0(r3)
1323; CHECK-NEXT:    lxv v3, 0(r4)
1324; CHECK-NEXT:    lxv v4, 0(r5)
1325; CHECK-NEXT:    xsmaddqp v4, v2, v3
1326; CHECK-NEXT:    stxv v4, 0(r6)
1327; CHECK-NEXT:    blr
1328;
1329; CHECK-P8-LABEL: qpFMA:
1330; CHECK-P8:       # %bb.0: # %entry
1331; CHECK-P8-NEXT:    mflr r0
1332; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
1333; CHECK-P8-NEXT:    .cfi_offset lr, 16
1334; CHECK-P8-NEXT:    .cfi_offset r30, -16
1335; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
1336; CHECK-P8-NEXT:    stdu r1, -48(r1)
1337; CHECK-P8-NEXT:    std r0, 64(r1)
1338; CHECK-P8-NEXT:    mr r30, r6
1339; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
1340; CHECK-P8-NEXT:    xxswapd v2, vs0
1341; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
1342; CHECK-P8-NEXT:    xxswapd v3, vs0
1343; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
1344; CHECK-P8-NEXT:    xxswapd v4, vs0
1345; CHECK-P8-NEXT:    bl fmaf128
1346; CHECK-P8-NEXT:    nop
1347; CHECK-P8-NEXT:    xxswapd vs0, v2
1348; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
1349; CHECK-P8-NEXT:    addi r1, r1, 48
1350; CHECK-P8-NEXT:    ld r0, 16(r1)
1351; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
1352; CHECK-P8-NEXT:    mtlr r0
1353; CHECK-P8-NEXT:    blr
1354entry:
1355  %0 = load fp128, ptr %a, align 16
1356  %1 = load fp128, ptr %b, align 16
1357  %2 = load fp128, ptr %c, align 16
1358  %3 = tail call fp128 @llvm.fma.f128(fp128 %0, fp128 %1, fp128 %2)
1359  store fp128 %3, ptr %res, align 16
1360  ret void
1361}
1362declare fp128 @llvm.fma.f128(fp128, fp128, fp128)
1363
1364define dso_local fp128 @qpFREXP(ptr %a, ptr %b) {
1365; CHECK-LABEL: qpFREXP:
1366; CHECK:       # %bb.0: # %entry
1367; CHECK-NEXT:    mflr r0
1368; CHECK-NEXT:    stdu r1, -32(r1)
1369; CHECK-NEXT:    std r0, 48(r1)
1370; CHECK-NEXT:    .cfi_def_cfa_offset 32
1371; CHECK-NEXT:    .cfi_offset lr, 16
1372; CHECK-NEXT:    lxv v2, 0(r3)
1373; CHECK-NEXT:    mr r5, r4
1374; CHECK-NEXT:    bl frexpf128
1375; CHECK-NEXT:    nop
1376; CHECK-NEXT:    addi r1, r1, 32
1377; CHECK-NEXT:    ld r0, 16(r1)
1378; CHECK-NEXT:    mtlr r0
1379; CHECK-NEXT:    blr
1380;
1381; CHECK-P8-LABEL: qpFREXP:
1382; CHECK-P8:       # %bb.0: # %entry
1383; CHECK-P8-NEXT:    mflr r0
1384; CHECK-P8-NEXT:    stdu r1, -32(r1)
1385; CHECK-P8-NEXT:    std r0, 48(r1)
1386; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
1387; CHECK-P8-NEXT:    .cfi_offset lr, 16
1388; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
1389; CHECK-P8-NEXT:    mr r5, r4
1390; CHECK-P8-NEXT:    xxswapd v2, vs0
1391; CHECK-P8-NEXT:    bl frexpf128
1392; CHECK-P8-NEXT:    nop
1393; CHECK-P8-NEXT:    addi r1, r1, 32
1394; CHECK-P8-NEXT:    ld r0, 16(r1)
1395; CHECK-P8-NEXT:    mtlr r0
1396; CHECK-P8-NEXT:    blr
1397entry:
1398  %0 = load fp128, ptr %a, align 16
1399  %1 = tail call { fp128, i32 } @llvm.frexp.f128.i32(fp128 %0)
1400  %2 = extractvalue { fp128, i32 } %1, 1
1401  store i32 %2, ptr %b, align 4
1402  %3 = extractvalue { fp128, i32 } %1, 0
1403  ret fp128 %3
1404}
1405declare { fp128, i32 } @llvm.frexp.f128.i32(fp128)
1406