xref: /llvm-project/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll (revision aaa37d6755e635bbd62ba58896acd54ceef64610)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown | FileCheck %s -check-prefix=PWR9
3; RUN: llc < %s -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown | FileCheck %s -check-prefix=PWR8
4
5@a = internal global fp128 0xL00000000000000000000000000000000, align 16
6@x = internal global [4 x fp128] zeroinitializer, align 16
7@y = internal global [4 x fp128] zeroinitializer, align 16
8
9define void @fmul_ctrloop_fp128() nounwind {
10; PWR9-LABEL: fmul_ctrloop_fp128:
11; PWR9:       # %bb.0: # %entry
12; PWR9-NEXT:    addis 3, 2, .L_MergedGlobals@toc@ha
13; PWR9-NEXT:    li 4, 4
14; PWR9-NEXT:    addi 3, 3, .L_MergedGlobals@toc@l
15; PWR9-NEXT:    lxv 34, 0(3)
16; PWR9-NEXT:    mtctr 4
17; PWR9-NEXT:    .p2align 5
18; PWR9-NEXT:  .LBB0_1: # %for.body
19; PWR9-NEXT:    #
20; PWR9-NEXT:    lxv 35, 16(3)
21; PWR9-NEXT:    xsmulqp 3, 2, 3
22; PWR9-NEXT:    stxv 35, 80(3)
23; PWR9-NEXT:    addi 3, 3, 16
24; PWR9-NEXT:    bdnz .LBB0_1
25; PWR9-NEXT:  # %bb.2: # %for.end
26; PWR9-NEXT:    blr
27;
28; PWR8-LABEL: fmul_ctrloop_fp128:
29; PWR8:       # %bb.0: # %entry
30; PWR8-NEXT:    mflr 0
31; PWR8-NEXT:    stdu 1, -112(1)
32; PWR8-NEXT:    li 3, 48
33; PWR8-NEXT:    std 0, 128(1)
34; PWR8-NEXT:    std 30, 96(1) # 8-byte Folded Spill
35; PWR8-NEXT:    std 27, 72(1) # 8-byte Folded Spill
36; PWR8-NEXT:    std 28, 80(1) # 8-byte Folded Spill
37; PWR8-NEXT:    li 28, 16
38; PWR8-NEXT:    li 27, 80
39; PWR8-NEXT:    std 29, 88(1) # 8-byte Folded Spill
40; PWR8-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
41; PWR8-NEXT:    addis 3, 2, .L_MergedGlobals@toc@ha
42; PWR8-NEXT:    li 29, 4
43; PWR8-NEXT:    addi 30, 3, .L_MergedGlobals@toc@l
44; PWR8-NEXT:    std 26, 64(1) # 8-byte Folded Spill
45; PWR8-NEXT:    lxvd2x 0, 0, 30
46; PWR8-NEXT:    xxswapd 63, 0
47; PWR8-NEXT:    .p2align 4
48; PWR8-NEXT:  .LBB0_1: # %for.body
49; PWR8-NEXT:    #
50; PWR8-NEXT:    lxvd2x 0, 30, 28
51; PWR8-NEXT:    vmr 2, 31
52; PWR8-NEXT:    addi 26, 30, 16
53; PWR8-NEXT:    xxswapd 35, 0
54; PWR8-NEXT:    bl __mulkf3
55; PWR8-NEXT:    nop
56; PWR8-NEXT:    addi 29, 29, -1
57; PWR8-NEXT:    xxswapd 0, 34
58; PWR8-NEXT:    cmpldi 29, 0
59; PWR8-NEXT:    stxvd2x 0, 30, 27
60; PWR8-NEXT:    mr 30, 26
61; PWR8-NEXT:    bc 12, 1, .LBB0_1
62; PWR8-NEXT:  # %bb.2: # %for.end
63; PWR8-NEXT:    li 3, 48
64; PWR8-NEXT:    ld 30, 96(1) # 8-byte Folded Reload
65; PWR8-NEXT:    ld 29, 88(1) # 8-byte Folded Reload
66; PWR8-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
67; PWR8-NEXT:    ld 28, 80(1) # 8-byte Folded Reload
68; PWR8-NEXT:    ld 27, 72(1) # 8-byte Folded Reload
69; PWR8-NEXT:    ld 26, 64(1) # 8-byte Folded Reload
70; PWR8-NEXT:    addi 1, 1, 112
71; PWR8-NEXT:    ld 0, 16(1)
72; PWR8-NEXT:    mtlr 0
73; PWR8-NEXT:    blr
74entry:
75  %0 = load fp128, ptr @a, align 16
76  br label %for.body
77
78for.body:                                         ; preds = %for.body, %entry
79  %i.06 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
80  %arrayidx = getelementptr inbounds [4 x fp128], ptr @x, i64 0, i64 %i.06
81  %1 = load fp128, ptr %arrayidx, align 16
82  %mul = fmul fp128 %0, %1
83  %arrayidx1 = getelementptr inbounds [4 x fp128], ptr @y, i64 0, i64 %i.06
84  store fp128 %mul, ptr %arrayidx1, align 16
85  %inc = add nuw nsw i64 %i.06, 1
86  %exitcond = icmp eq i64 %inc, 4
87  br i1 %exitcond, label %for.end, label %for.body
88
89for.end:                                          ; preds = %for.body
90  ret void
91}
92
93define void @fpext_ctrloop_fp128(ptr %a) nounwind {
94; PWR9-LABEL: fpext_ctrloop_fp128:
95; PWR9:       # %bb.0: # %entry
96; PWR9-NEXT:    li 4, 4
97; PWR9-NEXT:    addi 3, 3, -8
98; PWR9-NEXT:    mtctr 4
99; PWR9-NEXT:    addis 4, 2, .L_MergedGlobals@toc@ha
100; PWR9-NEXT:    addi 4, 4, .L_MergedGlobals@toc@l
101; PWR9-NEXT:    addi 4, 4, 64
102; PWR9-NEXT:    .p2align 5
103; PWR9-NEXT:  .LBB1_1: # %for.body
104; PWR9-NEXT:    #
105; PWR9-NEXT:    lfdu 0, 8(3)
106; PWR9-NEXT:    xscpsgndp 34, 0, 0
107; PWR9-NEXT:    xscvdpqp 2, 2
108; PWR9-NEXT:    stxv 34, 16(4)
109; PWR9-NEXT:    addi 4, 4, 16
110; PWR9-NEXT:    bdnz .LBB1_1
111; PWR9-NEXT:  # %bb.2: # %for.end
112; PWR9-NEXT:    blr
113;
114; PWR8-LABEL: fpext_ctrloop_fp128:
115; PWR8:       # %bb.0: # %entry
116; PWR8-NEXT:    mflr 0
117; PWR8-NEXT:    std 28, -32(1) # 8-byte Folded Spill
118; PWR8-NEXT:    std 29, -24(1) # 8-byte Folded Spill
119; PWR8-NEXT:    std 30, -16(1) # 8-byte Folded Spill
120; PWR8-NEXT:    stdu 1, -64(1)
121; PWR8-NEXT:    addi 30, 3, -8
122; PWR8-NEXT:    addis 3, 2, .L_MergedGlobals@toc@ha
123; PWR8-NEXT:    li 29, 4
124; PWR8-NEXT:    std 0, 80(1)
125; PWR8-NEXT:    addi 3, 3, .L_MergedGlobals@toc@l
126; PWR8-NEXT:    addi 28, 3, 64
127; PWR8-NEXT:    .p2align 4
128; PWR8-NEXT:  .LBB1_1: # %for.body
129; PWR8-NEXT:    #
130; PWR8-NEXT:    lfdu 1, 8(30)
131; PWR8-NEXT:    addi 28, 28, 16
132; PWR8-NEXT:    bl __extenddfkf2
133; PWR8-NEXT:    nop
134; PWR8-NEXT:    addi 29, 29, -1
135; PWR8-NEXT:    xxswapd 0, 34
136; PWR8-NEXT:    cmpldi 29, 0
137; PWR8-NEXT:    stxvd2x 0, 0, 28
138; PWR8-NEXT:    bc 12, 1, .LBB1_1
139; PWR8-NEXT:  # %bb.2: # %for.end
140; PWR8-NEXT:    addi 1, 1, 64
141; PWR8-NEXT:    ld 0, 16(1)
142; PWR8-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
143; PWR8-NEXT:    ld 29, -24(1) # 8-byte Folded Reload
144; PWR8-NEXT:    ld 28, -32(1) # 8-byte Folded Reload
145; PWR8-NEXT:    mtlr 0
146; PWR8-NEXT:    blr
147entry:
148  br label %for.body
149
150for.body:
151  %i.06 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
152  %arrayidx = getelementptr inbounds double, ptr %a, i64 %i.06
153  %0 = load double, ptr %arrayidx, align 8
154  %ext = fpext double %0 to fp128
155  %arrayidx1 = getelementptr inbounds [4 x fp128], ptr @y, i64 0, i64 %i.06
156  store fp128 %ext, ptr %arrayidx1, align 16
157  %inc = add nuw nsw i64 %i.06, 1
158  %exitcond = icmp eq i64 %inc, 4
159  br i1 %exitcond, label %for.end, label %for.body
160
161for.end:
162  ret void
163}
164
165define void @fptrunc_ctrloop_fp128(ptr %a) nounwind {
166; PWR9-LABEL: fptrunc_ctrloop_fp128:
167; PWR9:       # %bb.0: # %entry
168; PWR9-NEXT:    li 4, 4
169; PWR9-NEXT:    addi 3, 3, -8
170; PWR9-NEXT:    mtctr 4
171; PWR9-NEXT:    addis 4, 2, .L_MergedGlobals@toc@ha
172; PWR9-NEXT:    addi 4, 4, .L_MergedGlobals@toc@l
173; PWR9-NEXT:    .p2align 5
174; PWR9-NEXT:  .LBB2_1: # %for.body
175; PWR9-NEXT:    #
176; PWR9-NEXT:    lxv 34, 16(4)
177; PWR9-NEXT:    addi 4, 4, 16
178; PWR9-NEXT:    xscvqpdp 2, 2
179; PWR9-NEXT:    xscpsgndp 0, 34, 34
180; PWR9-NEXT:    stfdu 0, 8(3)
181; PWR9-NEXT:    bdnz .LBB2_1
182; PWR9-NEXT:  # %bb.2: # %for.end
183; PWR9-NEXT:    blr
184;
185; PWR8-LABEL: fptrunc_ctrloop_fp128:
186; PWR8:       # %bb.0: # %entry
187; PWR8-NEXT:    mflr 0
188; PWR8-NEXT:    std 28, -32(1) # 8-byte Folded Spill
189; PWR8-NEXT:    std 29, -24(1) # 8-byte Folded Spill
190; PWR8-NEXT:    std 30, -16(1) # 8-byte Folded Spill
191; PWR8-NEXT:    stdu 1, -64(1)
192; PWR8-NEXT:    addi 30, 3, -8
193; PWR8-NEXT:    addis 3, 2, .L_MergedGlobals@toc@ha
194; PWR8-NEXT:    li 29, 4
195; PWR8-NEXT:    std 0, 80(1)
196; PWR8-NEXT:    addi 28, 3, .L_MergedGlobals@toc@l
197; PWR8-NEXT:    .p2align 4
198; PWR8-NEXT:  .LBB2_1: # %for.body
199; PWR8-NEXT:    #
200; PWR8-NEXT:    addi 28, 28, 16
201; PWR8-NEXT:    lxvd2x 0, 0, 28
202; PWR8-NEXT:    xxswapd 34, 0
203; PWR8-NEXT:    bl __trunckfdf2
204; PWR8-NEXT:    nop
205; PWR8-NEXT:    addi 29, 29, -1
206; PWR8-NEXT:    stfdu 1, 8(30)
207; PWR8-NEXT:    cmpldi 29, 0
208; PWR8-NEXT:    bc 12, 1, .LBB2_1
209; PWR8-NEXT:  # %bb.2: # %for.end
210; PWR8-NEXT:    addi 1, 1, 64
211; PWR8-NEXT:    ld 0, 16(1)
212; PWR8-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
213; PWR8-NEXT:    ld 29, -24(1) # 8-byte Folded Reload
214; PWR8-NEXT:    ld 28, -32(1) # 8-byte Folded Reload
215; PWR8-NEXT:    mtlr 0
216; PWR8-NEXT:    blr
217entry:
218  br label %for.body
219
220for.body:
221  %i.06 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
222  %arrayidx = getelementptr inbounds [4 x fp128], ptr @x, i64 0, i64 %i.06
223  %0 = load fp128, ptr %arrayidx, align 16
224  %trunc = fptrunc fp128 %0 to double
225  %arrayidx1 = getelementptr inbounds double, ptr %a, i64 %i.06
226  store double %trunc, ptr %arrayidx1, align 16
227  %inc = add nuw nsw i64 %i.06, 1
228  %exitcond = icmp eq i64 %inc, 4
229  br i1 %exitcond, label %for.end, label %for.body
230
231for.end:
232  ret void
233}
234
235declare void @obfuscate(ptr, ...) local_unnamed_addr #2
236