xref: /llvm-project/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll (revision 0c1000cbd6d25d749c78f9c27fa985a2608ff217)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \
3; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names \
4; RUN:     -ppc-vsr-nums-as-vr < %s | FileCheck %s
5; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \
6; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names \
7; RUN:     -ppc-vsr-nums-as-vr < %s | FileCheck %s
8; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \
9; RUN:     -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
10; RUN:     < %s | FileCheck %s --check-prefixes=CHECK,CHECK-P10
11; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \
12; RUN:     -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
13; RUN:     < %s | FileCheck %s --check-prefixes=CHECK,CHECK-P10-BE
14; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr \
15; RUN:   -ppc-asm-full-reg-names -mtriple=powerpc64-ibm-aix-xcoff < %s | \
16; RUN: FileCheck %s --check-prefix=AIX-64
17; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr \
18; RUN:   -ppc-asm-full-reg-names -mtriple=powerpc-ibm-aix-xcoff < %s | \
19; RUN: FileCheck %s --check-prefix=AIX-32
20
21; Function Attrs: norecurse nounwind readonly
22define dso_local <2 x double> @test1(<2 x float>* nocapture readonly %Ptr) {
23; CHECK-LABEL: test1:
24; CHECK:       # %bb.0: # %entry
25; CHECK-NEXT:    lfd f0, 0(r3)
26; CHECK-NEXT:    xxmrghw vs0, vs0, vs0
27; CHECK-NEXT:    xvcvspdp v2, vs0
28; CHECK-NEXT:    blr
29;
30; AIX-64-LABEL: test1:
31; AIX-64:       # %bb.0: # %entry
32; AIX-64-NEXT:    lfdx f0, 0, r3
33; AIX-64-NEXT:    xxmrghw vs0, vs0, vs0
34; AIX-64-NEXT:    xvcvspdp v2, vs0
35; AIX-64-NEXT:    blr
36;
37; AIX-32-LABEL: test1:
38; AIX-32:       # %bb.0: # %entry
39; AIX-32-NEXT:    lfs f0, 4(r3)
40; AIX-32-NEXT:    lfs f1, 0(r3)
41; AIX-32-NEXT:    xxmrghd v2, vs1, vs0
42; AIX-32-NEXT:    blr
43entry:
44  %0 = load <2 x float>, <2 x float>* %Ptr, align 8
45  %1 = fpext <2 x float> %0 to <2 x double>
46  ret <2 x double> %1
47}
48
49; Function Attrs: norecurse nounwind readonly
50define dso_local <2 x double> @test2(<2 x float>* nocapture readonly %a, <2 x float>* nocapture readonly %b) {
51; CHECK-LABEL: test2:
52; CHECK:       # %bb.0: # %entry
53; CHECK-NEXT:    lfd f0, 0(r4)
54; CHECK-NEXT:    lfd f1, 0(r3)
55; CHECK-NEXT:    xvsubsp vs0, vs1, vs0
56; CHECK-NEXT:    xxmrghw vs0, vs0, vs0
57; CHECK-NEXT:    xvcvspdp v2, vs0
58; CHECK-NEXT:    blr
59;
60; AIX-64-LABEL: test2:
61; AIX-64:       # %bb.0: # %entry
62; AIX-64-NEXT:    lfdx f0, 0, r3
63; AIX-64-NEXT:    lfdx f1, 0, r4
64; AIX-64-NEXT:    xvsubsp vs0, vs0, vs1
65; AIX-64-NEXT:    xxmrghw vs0, vs0, vs0
66; AIX-64-NEXT:    xvcvspdp v2, vs0
67; AIX-64-NEXT:    blr
68;
69; AIX-32-LABEL: test2:
70; AIX-32:       # %bb.0: # %entry
71; AIX-32-NEXT:    lfs f0, 4(r3)
72; AIX-32-NEXT:    lfs f1, 0(r3)
73; AIX-32-NEXT:    lwz r5, L..C0(r2) # %const.0
74; AIX-32-NEXT:    lfs f2, 4(r4)
75; AIX-32-NEXT:    xscvdpspn v2, f0
76; AIX-32-NEXT:    lfs f0, 0(r4)
77; AIX-32-NEXT:    lxvw4x v0, 0, r5
78; AIX-32-NEXT:    xscvdpspn v3, f1
79; AIX-32-NEXT:    xscvdpspn v4, f2
80; AIX-32-NEXT:    xscvdpspn v5, f0
81; AIX-32-NEXT:    vperm v2, v3, v2, v0
82; AIX-32-NEXT:    vperm v3, v5, v4, v0
83; AIX-32-NEXT:    xvsubsp vs0, v2, v3
84; AIX-32-NEXT:    xxsldwi vs1, vs0, vs0, 1
85; AIX-32-NEXT:    xscvspdpn f0, vs0
86; AIX-32-NEXT:    xscvspdpn f1, vs1
87; AIX-32-NEXT:    xxmrghd v2, vs0, vs1
88; AIX-32-NEXT:    blr
89entry:
90  %0 = load <2 x float>, <2 x float>* %a, align 8
91  %1 = load <2 x float>, <2 x float>* %b, align 8
92  %sub = fsub <2 x float> %0, %1
93  %2 = fpext <2 x float> %sub to <2 x double>
94  ret <2 x double> %2
95}
96
97; Function Attrs: norecurse nounwind readonly
98; Function Attrs: norecurse nounwind readonly
99define dso_local <2 x double> @test3(<2 x float>* nocapture readonly %a, <2 x float>* nocapture readonly %b) {
100; CHECK-LABEL: test3:
101; CHECK:       # %bb.0: # %entry
102; CHECK-NEXT:    lfd f0, 0(r4)
103; CHECK-NEXT:    lfd f1, 0(r3)
104; CHECK-NEXT:    xvaddsp vs0, vs1, vs0
105; CHECK-NEXT:    xxmrghw vs0, vs0, vs0
106; CHECK-NEXT:    xvcvspdp v2, vs0
107; CHECK-NEXT:    blr
108;
109; AIX-64-LABEL: test3:
110; AIX-64:       # %bb.0: # %entry
111; AIX-64-NEXT:    lfdx f0, 0, r3
112; AIX-64-NEXT:    lfdx f1, 0, r4
113; AIX-64-NEXT:    xvaddsp vs0, vs0, vs1
114; AIX-64-NEXT:    xxmrghw vs0, vs0, vs0
115; AIX-64-NEXT:    xvcvspdp v2, vs0
116; AIX-64-NEXT:    blr
117;
118; AIX-32-LABEL: test3:
119; AIX-32:       # %bb.0: # %entry
120; AIX-32-NEXT:    lfs f0, 4(r3)
121; AIX-32-NEXT:    lfs f1, 0(r3)
122; AIX-32-NEXT:    lwz r5, L..C1(r2) # %const.0
123; AIX-32-NEXT:    lfs f2, 4(r4)
124; AIX-32-NEXT:    xscvdpspn v2, f0
125; AIX-32-NEXT:    lfs f0, 0(r4)
126; AIX-32-NEXT:    lxvw4x v0, 0, r5
127; AIX-32-NEXT:    xscvdpspn v3, f1
128; AIX-32-NEXT:    xscvdpspn v4, f2
129; AIX-32-NEXT:    xscvdpspn v5, f0
130; AIX-32-NEXT:    vperm v2, v3, v2, v0
131; AIX-32-NEXT:    vperm v3, v5, v4, v0
132; AIX-32-NEXT:    xvaddsp vs0, v2, v3
133; AIX-32-NEXT:    xxsldwi vs1, vs0, vs0, 1
134; AIX-32-NEXT:    xscvspdpn f0, vs0
135; AIX-32-NEXT:    xscvspdpn f1, vs1
136; AIX-32-NEXT:    xxmrghd v2, vs0, vs1
137; AIX-32-NEXT:    blr
138entry:
139  %0 = load <2 x float>, <2 x float>* %a, align 8
140  %1 = load <2 x float>, <2 x float>* %b, align 8
141  %sub = fadd <2 x float> %0, %1
142  %2 = fpext <2 x float> %sub to <2 x double>
143  ret <2 x double> %2
144}
145
146; Function Attrs: norecurse nounwind readonly
147; Function Attrs: norecurse nounwind readonly
148define dso_local <2 x double> @test4(<2 x float>* nocapture readonly %a, <2 x float>* nocapture readonly %b) {
149; CHECK-LABEL: test4:
150; CHECK:       # %bb.0: # %entry
151; CHECK-NEXT:    lfd f0, 0(r4)
152; CHECK-NEXT:    lfd f1, 0(r3)
153; CHECK-NEXT:    xvmulsp vs0, vs1, vs0
154; CHECK-NEXT:    xxmrghw vs0, vs0, vs0
155; CHECK-NEXT:    xvcvspdp v2, vs0
156; CHECK-NEXT:    blr
157;
158; AIX-64-LABEL: test4:
159; AIX-64:       # %bb.0: # %entry
160; AIX-64-NEXT:    lfdx f0, 0, r3
161; AIX-64-NEXT:    lfdx f1, 0, r4
162; AIX-64-NEXT:    xvmulsp vs0, vs0, vs1
163; AIX-64-NEXT:    xxmrghw vs0, vs0, vs0
164; AIX-64-NEXT:    xvcvspdp v2, vs0
165; AIX-64-NEXT:    blr
166;
167; AIX-32-LABEL: test4:
168; AIX-32:       # %bb.0: # %entry
169; AIX-32-NEXT:    lfs f0, 4(r3)
170; AIX-32-NEXT:    lfs f1, 0(r3)
171; AIX-32-NEXT:    lwz r5, L..C2(r2) # %const.0
172; AIX-32-NEXT:    lfs f2, 4(r4)
173; AIX-32-NEXT:    xscvdpspn v2, f0
174; AIX-32-NEXT:    lfs f0, 0(r4)
175; AIX-32-NEXT:    lxvw4x v0, 0, r5
176; AIX-32-NEXT:    xscvdpspn v3, f1
177; AIX-32-NEXT:    xscvdpspn v4, f2
178; AIX-32-NEXT:    xscvdpspn v5, f0
179; AIX-32-NEXT:    vperm v2, v3, v2, v0
180; AIX-32-NEXT:    vperm v3, v5, v4, v0
181; AIX-32-NEXT:    xvmulsp vs0, v2, v3
182; AIX-32-NEXT:    xxsldwi vs1, vs0, vs0, 1
183; AIX-32-NEXT:    xscvspdpn f0, vs0
184; AIX-32-NEXT:    xscvspdpn f1, vs1
185; AIX-32-NEXT:    xxmrghd v2, vs0, vs1
186; AIX-32-NEXT:    blr
187entry:
188  %0 = load <2 x float>, <2 x float>* %a, align 8
189  %1 = load <2 x float>, <2 x float>* %b, align 8
190  %sub = fmul <2 x float> %0, %1
191  %2 = fpext <2 x float> %sub to <2 x double>
192  ret <2 x double> %2
193}
194
195@G = dso_local local_unnamed_addr global <2 x float> <float 3.000000e+00, float 0x3FF3333340000000>, align 8
196
197; Function Attrs: mustprogress nofree norecurse nosync nounwind readonly uwtable willreturn
198define dso_local <2 x double> @test5(<2 x double> %a) {
199; CHECK-P10-LABEL: test5:
200; CHECK-P10:       # %bb.0: # %entry
201; CHECK-P10-NEXT:    plfd f0, G@PCREL(0), 1
202; CHECK-P10-NEXT:    xxmrghw vs0, vs0, vs0
203; CHECK-P10-NEXT:    xvcvspdp vs0, vs0
204; CHECK-P10-NEXT:    xvadddp v2, vs0, v2
205; CHECK-P10-NEXT:    blr
206;
207; CHECK-P10-BE-LABEL: test5:
208; CHECK-P10-BE:       # %bb.0: # %entry
209; CHECK-P10-BE-NEXT:    addis r3, r2, G@toc@ha
210; CHECK-P10-BE-NEXT:    lfd f0, G@toc@l(r3)
211; CHECK-P10-BE-NEXT:    xxmrghw vs0, vs0, vs0
212; CHECK-P10-BE-NEXT:    xvcvspdp vs0, vs0
213; CHECK-P10-BE-NEXT:    xvadddp v2, vs0, v2
214; CHECK-P10-BE-NEXT:    blr
215;
216; AIX-64-LABEL: test5:
217; AIX-64:       # %bb.0: # %entry
218; AIX-64-NEXT:    ld r3, L..C0(r2) # @G
219; AIX-64-NEXT:    lfdx f0, 0, r3
220; AIX-64-NEXT:    xxmrghw vs0, vs0, vs0
221; AIX-64-NEXT:    xvcvspdp vs0, vs0
222; AIX-64-NEXT:    xvadddp v2, vs0, v2
223; AIX-64-NEXT:    blr
224;
225; AIX-32-LABEL: test5:
226; AIX-32:       # %bb.0: # %entry
227; AIX-32-NEXT:    lwz r3, L..C3(r2) # @G
228; AIX-32-NEXT:    lfs f0, 4(r3)
229; AIX-32-NEXT:    lfs f1, 0(r3)
230; AIX-32-NEXT:    xxmrghd vs0, vs1, vs0
231; AIX-32-NEXT:    xvadddp v2, vs0, v2
232; AIX-32-NEXT:    blr
233entry:
234  %0 = load <2 x float>, <2 x float>* @G, align 8
235  %1 = fpext <2 x float> %0 to <2 x double>
236  %add = fadd <2 x double> %1, %a
237  ret <2 x double> %add
238}
239
240%0 = type <{ i32, i8, [1 x i8], i16, i32, i32, i8, [1 x i8], i16, i32, float, float, double, double, ppc_fp128, { float, float }, { float, float }, { double, double }, { double, double }, { ppc_fp128, ppc_fp128 }, [89856 x i8] }>
241@Glob1 = external dso_local unnamed_addr global [25 x %0], align 16
242
243define dso_local i32 @test6() #0 {
244; CHECK-P10-LABEL: test6:
245; CHECK-P10:       # %bb.0: # %bb
246; CHECK-P10-NEXT:    plfd f0, Glob1@PCREL+562536(0), 1
247; CHECK-P10-NEXT:    xxlxor vs1, vs1, vs1
248; CHECK-P10-NEXT:    xxmrghw vs0, vs0, vs0
249; CHECK-P10-NEXT:    xvcvspdp vs0, vs0
250; CHECK-P10-NEXT:    xvcmpeqdp v2, vs1, vs0
251; CHECK-P10-NEXT:    xxswapd v3, v2
252; CHECK-P10-NEXT:    xxland vs0, v2, v3
253; CHECK-P10-NEXT:    mfvsrld r3, vs0
254; CHECK-P10-NEXT:    andi. r3, r3, 1
255; CHECK-P10-NEXT:    bc 4, gt, .LBB5_2
256; CHECK-P10-NEXT:  # %bb.1: # %bb8
257; CHECK-P10-NEXT:  .LBB5_2: # %bb7
258;
259; CHECK-P10-BE-LABEL: test6:
260; CHECK-P10-BE:       # %bb.0: # %bb
261; CHECK-P10-BE-NEXT:    addis r3, r2, Glob1@toc@ha
262; CHECK-P10-BE-NEXT:    xxlxor vs1, vs1, vs1
263; CHECK-P10-BE-NEXT:    addi r3, r3, Glob1@toc@l
264; CHECK-P10-BE-NEXT:    plfd f0, 562536(r3), 0
265; CHECK-P10-BE-NEXT:    xxmrghw vs0, vs0, vs0
266; CHECK-P10-BE-NEXT:    xvcvspdp vs0, vs0
267; CHECK-P10-BE-NEXT:    xvcmpeqdp v2, vs1, vs0
268; CHECK-P10-BE-NEXT:    xxswapd v3, v2
269; CHECK-P10-BE-NEXT:    xxland vs0, v2, v3
270; CHECK-P10-BE-NEXT:    mffprd r3, f0
271; CHECK-P10-BE-NEXT:    andi. r3, r3, 1
272; CHECK-P10-BE-NEXT:    bc 4, gt, .LBB5_2
273; CHECK-P10-BE-NEXT:  # %bb.1: # %bb8
274; CHECK-P10-BE-NEXT:  .LBB5_2: # %bb7
275;
276; AIX-64-LABEL: test6:
277; AIX-64:       # %bb.0: # %bb
278; AIX-64-NEXT:    ld r3, L..C1(r2) # @Glob1
279; AIX-64-NEXT:    lis r4, 8
280; AIX-64-NEXT:    xxlxor vs1, vs1, vs1
281; AIX-64-NEXT:    ori r4, r4, 38248
282; AIX-64-NEXT:    lfdx f0, r3, r4
283; AIX-64-NEXT:    xxmrghw vs0, vs0, vs0
284; AIX-64-NEXT:    xvcvspdp vs0, vs0
285; AIX-64-NEXT:    xvcmpeqdp v2, vs1, vs0
286; AIX-64-NEXT:    xxswapd v3, v2
287; AIX-64-NEXT:    xxland vs0, v2, v3
288; AIX-64-NEXT:    mffprd r3, f0
289; AIX-64-NEXT:    andi. r3, r3, 1
290; AIX-64-NEXT:    bc 4, gt, L..BB5_2
291; AIX-64-NEXT:  # %bb.1: # %bb8
292; AIX-64-NEXT:  L..BB5_2: # %bb7
293;
294; AIX-32-LABEL: test6:
295; AIX-32:       # %bb.0: # %bb
296; AIX-32-NEXT:    lwz r3, L..C4(r2) # @Glob1
297; AIX-32-NEXT:    lis r4, 8
298; AIX-32-NEXT:    ori r4, r4, 38248
299; AIX-32-NEXT:    lfsux f0, r3, r4
300; AIX-32-NEXT:    lfs f1, 4(r3)
301; AIX-32-NEXT:    addi r3, r1, -16
302; AIX-32-NEXT:    xxmrghd vs0, vs0, vs1
303; AIX-32-NEXT:    xxlxor vs1, vs1, vs1
304; AIX-32-NEXT:    xvcmpeqdp v2, vs1, vs0
305; AIX-32-NEXT:    xxswapd v3, v2
306; AIX-32-NEXT:    xxland vs0, v2, v3
307; AIX-32-NEXT:    stxvw4x vs0, 0, r3
308; AIX-32-NEXT:    lwz r3, -12(r1)
309; AIX-32-NEXT:    andi. r3, r3, 1
310; AIX-32-NEXT:    bc 4, gt, L..BB5_2
311; AIX-32-NEXT:  # %bb.1: # %bb8
312; AIX-32-NEXT:  L..BB5_2: # %bb7
313bb:
314  br label %bb1
315
316bb1:                                              ; preds = %bb
317  %i = load <2 x float>, <2 x float>* bitcast (i8* getelementptr inbounds ([25 x %0], [25 x %0]* @Glob1, i64 0, i64 6, i32 20, i64 22392) to <2 x float>*), align 8
318  %i2 = fpext <2 x float> %i to <2 x double>
319  %i3 = fcmp contract oeq <2 x double> zeroinitializer, %i2
320  %i4 = shufflevector <2 x i1> %i3, <2 x i1> poison, <2 x i32> <i32 1, i32 undef>
321  %i5 = and <2 x i1> %i3, %i4
322  %i6 = extractelement <2 x i1> %i5, i32 0
323  br i1 %i6, label %bb8, label %bb7
324
325bb7:                                              ; preds = %bb1
326  unreachable
327
328bb8:                                              ; preds = %bb1
329  unreachable
330}
331