xref: /llvm-project/llvm/test/CodeGen/PowerPC/ppc-float-spill.ll (revision 2e47aafb02f3e46fc3e01799053e01835239151d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3; RUN:   -mcpu=pwr7 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
4; RUN:   -O0 --data-sections < %s | FileCheck %s --check-prefix LE-LINUX-P7
5; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
6; RUN:   -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
7; RUN:   -O0 --data-sections < %s | FileCheck %s --check-prefix LE-LINUX-P8
8; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
9; RUN:   -mcpu=pwr7 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
10; RUN:   -O0 --data-sections < %s | FileCheck %s --check-prefix BE-LINUX-P7
11; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
12; RUN:   -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
13; RUN:   -O0 --data-sections < %s | FileCheck %s --check-prefix BE-LINUX-P8
14; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix \
15; RUN:   -mcpu=pwr7 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
16; RUN:   -O0 --data-sections < %s | FileCheck %s --check-prefix AIX64-P7
17; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix \
18; RUN:   -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
19; RUN:   -O0 --data-sections < %s | FileCheck %s --check-prefix AIX64-P8
20; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix \
21; RUN:   -mcpu=pwr7 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
22; RUN:   -O0 --data-sections < %s | FileCheck %s --check-prefix AIX32-P7
23; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix \
24; RUN:   -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
25; RUN:   -O0 --data-sections < %s | FileCheck %s --check-prefix AIX32-P8
26
27;; Note that the above run lines all use -O0 because this causes the spill that
28;; exposed the issue in the first place. Higher opt levels will remove the spills
29;; and as a result will not expose the issue.
30
31define dso_local void @caller() #0 {
32; LE-LINUX-P7-LABEL: caller:
33; LE-LINUX-P7:       # %bb.0: # %entry
34; LE-LINUX-P7-NEXT:    mflr r0
35; LE-LINUX-P7-NEXT:    stdu r1, -112(r1)
36; LE-LINUX-P7-NEXT:    std r0, 128(r1)
37; LE-LINUX-P7-NEXT:    addis r3, r2, .LCPI0_1@toc@ha
38; LE-LINUX-P7-NEXT:    lfs f1, .LCPI0_1@toc@l(r3)
39; LE-LINUX-P7-NEXT:    bl callee
40; LE-LINUX-P7-NEXT:    nop
41; LE-LINUX-P7-NEXT:    stfs f1, 100(r1) # 4-byte Folded Spill
42; LE-LINUX-P7-NEXT:    addis r3, r2, .LCPI0_1@toc@ha
43; LE-LINUX-P7-NEXT:    lfs f1, .LCPI0_1@toc@l(r3)
44; LE-LINUX-P7-NEXT:    bl callee
45; LE-LINUX-P7-NEXT:    nop
46; LE-LINUX-P7-NEXT:    fmr f2, f1
47; LE-LINUX-P7-NEXT:    lfs f1, 100(r1) # 4-byte Folded Reload
48; LE-LINUX-P7-NEXT:    addis r3, r2, .LCPI0_0@toc@ha
49; LE-LINUX-P7-NEXT:    lfs f0, .LCPI0_0@toc@l(r3)
50; LE-LINUX-P7-NEXT:    fmuls f0, f0, f2
51; LE-LINUX-P7-NEXT:    stfs f1, 104(r1)
52; LE-LINUX-P7-NEXT:    stfs f0, 108(r1)
53; LE-LINUX-P7-NEXT:    addi r1, r1, 112
54; LE-LINUX-P7-NEXT:    ld r0, 16(r1)
55; LE-LINUX-P7-NEXT:    mtlr r0
56; LE-LINUX-P7-NEXT:    blr
57;
58; LE-LINUX-P8-LABEL: caller:
59; LE-LINUX-P8:       # %bb.0: # %entry
60; LE-LINUX-P8-NEXT:    mflr r0
61; LE-LINUX-P8-NEXT:    stdu r1, -64(r1)
62; LE-LINUX-P8-NEXT:    std r0, 80(r1)
63; LE-LINUX-P8-NEXT:    vspltisw v2, 1
64; LE-LINUX-P8-NEXT:    xxlor vs0, v2, v2
65; LE-LINUX-P8-NEXT:    xvcvsxwdp vs0, vs0
66; LE-LINUX-P8-NEXT:    fmr f1, f0
67; LE-LINUX-P8-NEXT:    li r3, 48
68; LE-LINUX-P8-NEXT:    stxsspx f1, r1, r3 # 4-byte Folded Spill
69; LE-LINUX-P8-NEXT:    bl callee
70; LE-LINUX-P8-NEXT:    nop
71; LE-LINUX-P8-NEXT:    fmr f0, f1
72; LE-LINUX-P8-NEXT:    li r3, 48
73; LE-LINUX-P8-NEXT:    lxsspx f1, r1, r3 # 4-byte Folded Reload
74; LE-LINUX-P8-NEXT:    stfs f0, 52(r1) # 4-byte Folded Spill
75; LE-LINUX-P8-NEXT:    bl callee
76; LE-LINUX-P8-NEXT:    nop
77; LE-LINUX-P8-NEXT:    fmr f0, f1
78; LE-LINUX-P8-NEXT:    lfs f1, 52(r1) # 4-byte Folded Reload
79; LE-LINUX-P8-NEXT:    xsaddsp f0, f0, f0
80; LE-LINUX-P8-NEXT:    stfs f1, 56(r1)
81; LE-LINUX-P8-NEXT:    stfs f0, 60(r1)
82; LE-LINUX-P8-NEXT:    addi r1, r1, 64
83; LE-LINUX-P8-NEXT:    ld r0, 16(r1)
84; LE-LINUX-P8-NEXT:    mtlr r0
85; LE-LINUX-P8-NEXT:    blr
86;
87; BE-LINUX-P7-LABEL: caller:
88; BE-LINUX-P7:       # %bb.0: # %entry
89; BE-LINUX-P7-NEXT:    mflr r0
90; BE-LINUX-P7-NEXT:    stdu r1, -128(r1)
91; BE-LINUX-P7-NEXT:    std r0, 144(r1)
92; BE-LINUX-P7-NEXT:    addis r3, r2, .LCPI0_1@toc@ha
93; BE-LINUX-P7-NEXT:    lfs f1, .LCPI0_1@toc@l(r3)
94; BE-LINUX-P7-NEXT:    bl callee
95; BE-LINUX-P7-NEXT:    nop
96; BE-LINUX-P7-NEXT:    stfs f1, 116(r1) # 4-byte Folded Spill
97; BE-LINUX-P7-NEXT:    addis r3, r2, .LCPI0_1@toc@ha
98; BE-LINUX-P7-NEXT:    lfs f1, .LCPI0_1@toc@l(r3)
99; BE-LINUX-P7-NEXT:    bl callee
100; BE-LINUX-P7-NEXT:    nop
101; BE-LINUX-P7-NEXT:    fmr f2, f1
102; BE-LINUX-P7-NEXT:    lfs f1, 116(r1) # 4-byte Folded Reload
103; BE-LINUX-P7-NEXT:    addis r3, r2, .LCPI0_0@toc@ha
104; BE-LINUX-P7-NEXT:    lfs f0, .LCPI0_0@toc@l(r3)
105; BE-LINUX-P7-NEXT:    fmuls f0, f0, f2
106; BE-LINUX-P7-NEXT:    stfs f1, 120(r1)
107; BE-LINUX-P7-NEXT:    stfs f0, 124(r1)
108; BE-LINUX-P7-NEXT:    addi r1, r1, 128
109; BE-LINUX-P7-NEXT:    ld r0, 16(r1)
110; BE-LINUX-P7-NEXT:    mtlr r0
111; BE-LINUX-P7-NEXT:    blr
112;
113; BE-LINUX-P8-LABEL: caller:
114; BE-LINUX-P8:       # %bb.0: # %entry
115; BE-LINUX-P8-NEXT:    mflr r0
116; BE-LINUX-P8-NEXT:    stdu r1, -144(r1)
117; BE-LINUX-P8-NEXT:    std r0, 160(r1)
118; BE-LINUX-P8-NEXT:    vspltisw v2, 1
119; BE-LINUX-P8-NEXT:    xxlor vs0, v2, v2
120; BE-LINUX-P8-NEXT:    xvcvsxwdp vs0, vs0
121; BE-LINUX-P8-NEXT:    fmr f1, f0
122; BE-LINUX-P8-NEXT:    li r3, 128
123; BE-LINUX-P8-NEXT:    stxsspx f1, r1, r3 # 4-byte Folded Spill
124; BE-LINUX-P8-NEXT:    bl callee
125; BE-LINUX-P8-NEXT:    nop
126; BE-LINUX-P8-NEXT:    fmr f0, f1
127; BE-LINUX-P8-NEXT:    li r3, 128
128; BE-LINUX-P8-NEXT:    lxsspx f1, r1, r3 # 4-byte Folded Reload
129; BE-LINUX-P8-NEXT:    stfs f0, 132(r1) # 4-byte Folded Spill
130; BE-LINUX-P8-NEXT:    bl callee
131; BE-LINUX-P8-NEXT:    nop
132; BE-LINUX-P8-NEXT:    fmr f0, f1
133; BE-LINUX-P8-NEXT:    lfs f1, 132(r1) # 4-byte Folded Reload
134; BE-LINUX-P8-NEXT:    xsaddsp f0, f0, f0
135; BE-LINUX-P8-NEXT:    stfs f1, 136(r1)
136; BE-LINUX-P8-NEXT:    stfs f0, 140(r1)
137; BE-LINUX-P8-NEXT:    addi r1, r1, 144
138; BE-LINUX-P8-NEXT:    ld r0, 16(r1)
139; BE-LINUX-P8-NEXT:    mtlr r0
140; BE-LINUX-P8-NEXT:    blr
141;
142; AIX64-P7-LABEL: caller:
143; AIX64-P7:       # %bb.0: # %entry
144; AIX64-P7-NEXT:    mflr r0
145; AIX64-P7-NEXT:    stdu r1, -128(r1)
146; AIX64-P7-NEXT:    std r0, 144(r1)
147; AIX64-P7-NEXT:    vspltisw v2, 1
148; AIX64-P7-NEXT:    xxlor vs0, v2, v2
149; AIX64-P7-NEXT:    xvcvsxwdp vs0, vs0
150; AIX64-P7-NEXT:    fmr f1, f0
151; AIX64-P7-NEXT:    bl .callee[PR]
152; AIX64-P7-NEXT:    nop
153; AIX64-P7-NEXT:    stfs f1, 116(r1) # 4-byte Folded Spill
154; AIX64-P7-NEXT:    vspltisw v2, 1
155; AIX64-P7-NEXT:    xxlor vs0, v2, v2
156; AIX64-P7-NEXT:    xvcvsxwdp vs0, vs0
157; AIX64-P7-NEXT:    fmr f1, f0
158; AIX64-P7-NEXT:    bl .callee[PR]
159; AIX64-P7-NEXT:    nop
160; AIX64-P7-NEXT:    fmr f2, f1
161; AIX64-P7-NEXT:    lfs f1, 116(r1) # 4-byte Folded Reload
162; AIX64-P7-NEXT:    ld r3, L..C0(r2) # %const.0
163; AIX64-P7-NEXT:    lfs f0, 0(r3)
164; AIX64-P7-NEXT:    fmuls f0, f0, f2
165; AIX64-P7-NEXT:    stfs f1, 120(r1)
166; AIX64-P7-NEXT:    stfs f0, 124(r1)
167; AIX64-P7-NEXT:    addi r1, r1, 128
168; AIX64-P7-NEXT:    ld r0, 16(r1)
169; AIX64-P7-NEXT:    mtlr r0
170; AIX64-P7-NEXT:    blr
171;
172; AIX64-P8-LABEL: caller:
173; AIX64-P8:       # %bb.0: # %entry
174; AIX64-P8-NEXT:    mflr r0
175; AIX64-P8-NEXT:    stdu r1, -144(r1)
176; AIX64-P8-NEXT:    std r0, 160(r1)
177; AIX64-P8-NEXT:    vspltisw v2, 1
178; AIX64-P8-NEXT:    xxlor vs0, v2, v2
179; AIX64-P8-NEXT:    xvcvsxwdp vs0, vs0
180; AIX64-P8-NEXT:    fmr f1, f0
181; AIX64-P8-NEXT:    li r3, 128
182; AIX64-P8-NEXT:    stxsspx f1, r1, r3 # 4-byte Folded Spill
183; AIX64-P8-NEXT:    bl .callee[PR]
184; AIX64-P8-NEXT:    nop
185; AIX64-P8-NEXT:    fmr f0, f1
186; AIX64-P8-NEXT:    li r3, 128
187; AIX64-P8-NEXT:    lxsspx f1, r1, r3 # 4-byte Folded Reload
188; AIX64-P8-NEXT:    stfs f0, 132(r1) # 4-byte Folded Spill
189; AIX64-P8-NEXT:    bl .callee[PR]
190; AIX64-P8-NEXT:    nop
191; AIX64-P8-NEXT:    fmr f0, f1
192; AIX64-P8-NEXT:    lfs f1, 132(r1) # 4-byte Folded Reload
193; AIX64-P8-NEXT:    xsaddsp f0, f0, f0
194; AIX64-P8-NEXT:    stfs f1, 136(r1)
195; AIX64-P8-NEXT:    stfs f0, 140(r1)
196; AIX64-P8-NEXT:    addi r1, r1, 144
197; AIX64-P8-NEXT:    ld r0, 16(r1)
198; AIX64-P8-NEXT:    mtlr r0
199; AIX64-P8-NEXT:    blr
200;
201; AIX32-P7-LABEL: caller:
202; AIX32-P7:       # %bb.0: # %entry
203; AIX32-P7-NEXT:    mflr r0
204; AIX32-P7-NEXT:    stwu r1, -80(r1)
205; AIX32-P7-NEXT:    stw r0, 88(r1)
206; AIX32-P7-NEXT:    vspltisw v2, 1
207; AIX32-P7-NEXT:    xxlor vs0, v2, v2
208; AIX32-P7-NEXT:    xvcvsxwdp vs0, vs0
209; AIX32-P7-NEXT:    fmr f1, f0
210; AIX32-P7-NEXT:    stfs f1, 64(r1) # 4-byte Folded Spill
211; AIX32-P7-NEXT:    bl .callee[PR]
212; AIX32-P7-NEXT:    nop
213; AIX32-P7-NEXT:    fmr f0, f1
214; AIX32-P7-NEXT:    lfs f1, 64(r1) # 4-byte Folded Reload
215; AIX32-P7-NEXT:    stfs f0, 68(r1) # 4-byte Folded Spill
216; AIX32-P7-NEXT:    bl .callee[PR]
217; AIX32-P7-NEXT:    nop
218; AIX32-P7-NEXT:    fmr f0, f1
219; AIX32-P7-NEXT:    lfs f1, 68(r1) # 4-byte Folded Reload
220; AIX32-P7-NEXT:    fadds f0, f0, f0
221; AIX32-P7-NEXT:    stfs f1, 72(r1)
222; AIX32-P7-NEXT:    stfs f0, 76(r1)
223; AIX32-P7-NEXT:    addi r1, r1, 80
224; AIX32-P7-NEXT:    lwz r0, 8(r1)
225; AIX32-P7-NEXT:    mtlr r0
226; AIX32-P7-NEXT:    blr
227;
228; AIX32-P8-LABEL: caller:
229; AIX32-P8:       # %bb.0: # %entry
230; AIX32-P8-NEXT:    mflr r0
231; AIX32-P8-NEXT:    stwu r1, -80(r1)
232; AIX32-P8-NEXT:    stw r0, 88(r1)
233; AIX32-P8-NEXT:    vspltisw v2, 1
234; AIX32-P8-NEXT:    xxlor vs0, v2, v2
235; AIX32-P8-NEXT:    xvcvsxwdp vs0, vs0
236; AIX32-P8-NEXT:    fmr f1, f0
237; AIX32-P8-NEXT:    li r3, 64
238; AIX32-P8-NEXT:    stxsspx f1, r1, r3 # 4-byte Folded Spill
239; AIX32-P8-NEXT:    bl .callee[PR]
240; AIX32-P8-NEXT:    nop
241; AIX32-P8-NEXT:    fmr f0, f1
242; AIX32-P8-NEXT:    li r3, 64
243; AIX32-P8-NEXT:    lxsspx f1, r1, r3 # 4-byte Folded Reload
244; AIX32-P8-NEXT:    stfs f0, 68(r1) # 4-byte Folded Spill
245; AIX32-P8-NEXT:    bl .callee[PR]
246; AIX32-P8-NEXT:    nop
247; AIX32-P8-NEXT:    fmr f0, f1
248; AIX32-P8-NEXT:    lfs f1, 68(r1) # 4-byte Folded Reload
249; AIX32-P8-NEXT:    xsaddsp f0, f0, f0
250; AIX32-P8-NEXT:    stfs f1, 72(r1)
251; AIX32-P8-NEXT:    stfs f0, 76(r1)
252; AIX32-P8-NEXT:    addi r1, r1, 80
253; AIX32-P8-NEXT:    lwz r0, 8(r1)
254; AIX32-P8-NEXT:    mtlr r0
255; AIX32-P8-NEXT:    blr
256entry:
257  %com1 = alloca { float, float }, align 8
258  %0 = call contract float @callee(float 1.000000e+00)
259  %1 = call contract float @callee(float 1.000000e+00)
260  %mult = fmul contract float 2.000000e+00, %1
261  %addr0 = getelementptr inbounds { float, float }, ptr %com1, i32 0, i32 0
262  store float %0, ptr %addr0, align 4
263  %addr1 = getelementptr inbounds { float, float }, ptr %com1, i32 0, i32 1
264  store float %mult, ptr %addr1, align 4
265  ret void
266}
267
268declare float @callee(float) #0
269
270attributes #0 = { nounwind }
271