xref: /llvm-project/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll (revision b922a3621116b404d868af8b74cab25ab78555be)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \
3; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names \
4; RUN:     -ppc-vsr-nums-as-vr < %s | FileCheck %s
5; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \
6; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names \
7; RUN:     -ppc-vsr-nums-as-vr < %s | FileCheck %s
8; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \
9; RUN:     -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
10; RUN:     < %s | FileCheck %s --check-prefixes=CHECK,CHECK-P10
11; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \
12; RUN:     -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
13; RUN:     < %s | FileCheck %s --check-prefixes=CHECK,CHECK-P10-BE
14; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr \
15; RUN:   -ppc-asm-full-reg-names -mtriple=powerpc64-ibm-aix-xcoff < %s | \
16; RUN: FileCheck %s --check-prefix=AIX-64
17; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr \
18; RUN:   -ppc-asm-full-reg-names -mtriple=powerpc-ibm-aix-xcoff < %s | \
19; RUN: FileCheck %s --check-prefix=AIX-32
20
21; Function Attrs: norecurse nounwind readonly
22define dso_local <2 x double> @test1(ptr nocapture readonly %Ptr) {
23; CHECK-LABEL: test1:
24; CHECK:       # %bb.0: # %entry
25; CHECK-NEXT:    lfd f0, 0(r3)
26; CHECK-NEXT:    xxmrghw vs0, vs0, vs0
27; CHECK-NEXT:    xvcvspdp v2, vs0
28; CHECK-NEXT:    blr
29;
30; AIX-64-LABEL: test1:
31; AIX-64:       # %bb.0: # %entry
32; AIX-64-NEXT:    lfdx f0, 0, r3
33; AIX-64-NEXT:    xxmrghw vs0, vs0, vs0
34; AIX-64-NEXT:    xvcvspdp v2, vs0
35; AIX-64-NEXT:    blr
36;
37; AIX-32-LABEL: test1:
38; AIX-32:       # %bb.0: # %entry
39; AIX-32-NEXT:    lfs f0, 4(r3)
40; AIX-32-NEXT:    lfs f1, 0(r3)
41; AIX-32-NEXT:    xxmrghd v2, vs1, vs0
42; AIX-32-NEXT:    blr
43entry:
44  %0 = load <2 x float>, ptr %Ptr, align 8
45  %1 = fpext <2 x float> %0 to <2 x double>
46  ret <2 x double> %1
47}
48
49; Function Attrs: norecurse nounwind readonly
50define dso_local <2 x double> @test2(ptr nocapture readonly %a, ptr nocapture readonly %b) {
51; CHECK-LABEL: test2:
52; CHECK:       # %bb.0: # %entry
53; CHECK-NEXT:    lfd f0, 0(r4)
54; CHECK-NEXT:    lfd f1, 0(r3)
55; CHECK-NEXT:    xvsubsp vs0, vs1, vs0
56; CHECK-NEXT:    xxmrghw vs0, vs0, vs0
57; CHECK-NEXT:    xvcvspdp v2, vs0
58; CHECK-NEXT:    blr
59;
60; AIX-64-LABEL: test2:
61; AIX-64:       # %bb.0: # %entry
62; AIX-64-NEXT:    lfdx f0, 0, r3
63; AIX-64-NEXT:    lfdx f1, 0, r4
64; AIX-64-NEXT:    xvsubsp vs0, vs0, vs1
65; AIX-64-NEXT:    xxmrghw vs0, vs0, vs0
66; AIX-64-NEXT:    xvcvspdp v2, vs0
67; AIX-64-NEXT:    blr
68;
69; AIX-32-LABEL: test2:
70; AIX-32:       # %bb.0: # %entry
71; AIX-32-NEXT:    li r5, 4
72; AIX-32-NEXT:    lxsiwzx v3, 0, r3
73; AIX-32-NEXT:    lxsiwzx v4, 0, r4
74; AIX-32-NEXT:    lxsiwzx v2, r3, r5
75; AIX-32-NEXT:    vmrgow v2, v3, v2
76; AIX-32-NEXT:    lxsiwzx v3, r4, r5
77; AIX-32-NEXT:    vmrgow v3, v4, v3
78; AIX-32-NEXT:    xvsubsp vs0, v2, v3
79; AIX-32-NEXT:    xxsldwi vs1, vs0, vs0, 1
80; AIX-32-NEXT:    xscvspdpn f0, vs0
81; AIX-32-NEXT:    xscvspdpn f1, vs1
82; AIX-32-NEXT:    xxmrghd v2, vs0, vs1
83; AIX-32-NEXT:    blr
84entry:
85  %0 = load <2 x float>, ptr %a, align 8
86  %1 = load <2 x float>, ptr %b, align 8
87  %sub = fsub <2 x float> %0, %1
88  %2 = fpext <2 x float> %sub to <2 x double>
89  ret <2 x double> %2
90}
91
92; Function Attrs: norecurse nounwind readonly
93; Function Attrs: norecurse nounwind readonly
94define dso_local <2 x double> @test3(ptr nocapture readonly %a, ptr nocapture readonly %b) {
95; CHECK-LABEL: test3:
96; CHECK:       # %bb.0: # %entry
97; CHECK-NEXT:    lfd f0, 0(r4)
98; CHECK-NEXT:    lfd f1, 0(r3)
99; CHECK-NEXT:    xvaddsp vs0, vs1, vs0
100; CHECK-NEXT:    xxmrghw vs0, vs0, vs0
101; CHECK-NEXT:    xvcvspdp v2, vs0
102; CHECK-NEXT:    blr
103;
104; AIX-64-LABEL: test3:
105; AIX-64:       # %bb.0: # %entry
106; AIX-64-NEXT:    lfdx f0, 0, r3
107; AIX-64-NEXT:    lfdx f1, 0, r4
108; AIX-64-NEXT:    xvaddsp vs0, vs0, vs1
109; AIX-64-NEXT:    xxmrghw vs0, vs0, vs0
110; AIX-64-NEXT:    xvcvspdp v2, vs0
111; AIX-64-NEXT:    blr
112;
113; AIX-32-LABEL: test3:
114; AIX-32:       # %bb.0: # %entry
115; AIX-32-NEXT:    li r5, 4
116; AIX-32-NEXT:    lxsiwzx v3, 0, r3
117; AIX-32-NEXT:    lxsiwzx v4, 0, r4
118; AIX-32-NEXT:    lxsiwzx v2, r3, r5
119; AIX-32-NEXT:    vmrgow v2, v3, v2
120; AIX-32-NEXT:    lxsiwzx v3, r4, r5
121; AIX-32-NEXT:    vmrgow v3, v4, v3
122; AIX-32-NEXT:    xvaddsp vs0, v2, v3
123; AIX-32-NEXT:    xxsldwi vs1, vs0, vs0, 1
124; AIX-32-NEXT:    xscvspdpn f0, vs0
125; AIX-32-NEXT:    xscvspdpn f1, vs1
126; AIX-32-NEXT:    xxmrghd v2, vs0, vs1
127; AIX-32-NEXT:    blr
128entry:
129  %0 = load <2 x float>, ptr %a, align 8
130  %1 = load <2 x float>, ptr %b, align 8
131  %sub = fadd <2 x float> %0, %1
132  %2 = fpext <2 x float> %sub to <2 x double>
133  ret <2 x double> %2
134}
135
136; Function Attrs: norecurse nounwind readonly
137; Function Attrs: norecurse nounwind readonly
138define dso_local <2 x double> @test4(ptr nocapture readonly %a, ptr nocapture readonly %b) {
139; CHECK-LABEL: test4:
140; CHECK:       # %bb.0: # %entry
141; CHECK-NEXT:    lfd f0, 0(r4)
142; CHECK-NEXT:    lfd f1, 0(r3)
143; CHECK-NEXT:    xvmulsp vs0, vs1, vs0
144; CHECK-NEXT:    xxmrghw vs0, vs0, vs0
145; CHECK-NEXT:    xvcvspdp v2, vs0
146; CHECK-NEXT:    blr
147;
148; AIX-64-LABEL: test4:
149; AIX-64:       # %bb.0: # %entry
150; AIX-64-NEXT:    lfdx f0, 0, r3
151; AIX-64-NEXT:    lfdx f1, 0, r4
152; AIX-64-NEXT:    xvmulsp vs0, vs0, vs1
153; AIX-64-NEXT:    xxmrghw vs0, vs0, vs0
154; AIX-64-NEXT:    xvcvspdp v2, vs0
155; AIX-64-NEXT:    blr
156;
157; AIX-32-LABEL: test4:
158; AIX-32:       # %bb.0: # %entry
159; AIX-32-NEXT:    li r5, 4
160; AIX-32-NEXT:    lxsiwzx v3, 0, r3
161; AIX-32-NEXT:    lxsiwzx v4, 0, r4
162; AIX-32-NEXT:    lxsiwzx v2, r3, r5
163; AIX-32-NEXT:    vmrgow v2, v3, v2
164; AIX-32-NEXT:    lxsiwzx v3, r4, r5
165; AIX-32-NEXT:    vmrgow v3, v4, v3
166; AIX-32-NEXT:    xvmulsp vs0, v2, v3
167; AIX-32-NEXT:    xxsldwi vs1, vs0, vs0, 1
168; AIX-32-NEXT:    xscvspdpn f0, vs0
169; AIX-32-NEXT:    xscvspdpn f1, vs1
170; AIX-32-NEXT:    xxmrghd v2, vs0, vs1
171; AIX-32-NEXT:    blr
172entry:
173  %0 = load <2 x float>, ptr %a, align 8
174  %1 = load <2 x float>, ptr %b, align 8
175  %sub = fmul <2 x float> %0, %1
176  %2 = fpext <2 x float> %sub to <2 x double>
177  ret <2 x double> %2
178}
179
180@G = dso_local local_unnamed_addr global <2 x float> <float 3.000000e+00, float 0x3FF3333340000000>, align 8
181
182; Function Attrs: mustprogress nofree norecurse nosync nounwind readonly uwtable willreturn
183define dso_local <2 x double> @test5(<2 x double> %a) {
184; CHECK-P10-LABEL: test5:
185; CHECK-P10:       # %bb.0: # %entry
186; CHECK-P10-NEXT:    plfd f0, G@PCREL(0), 1
187; CHECK-P10-NEXT:    xxmrghw vs0, vs0, vs0
188; CHECK-P10-NEXT:    xvcvspdp vs0, vs0
189; CHECK-P10-NEXT:    xvadddp v2, vs0, v2
190; CHECK-P10-NEXT:    blr
191;
192; CHECK-P10-BE-LABEL: test5:
193; CHECK-P10-BE:       # %bb.0: # %entry
194; CHECK-P10-BE-NEXT:    addis r3, r2, G@toc@ha
195; CHECK-P10-BE-NEXT:    lfd f0, G@toc@l(r3)
196; CHECK-P10-BE-NEXT:    xxmrghw vs0, vs0, vs0
197; CHECK-P10-BE-NEXT:    xvcvspdp vs0, vs0
198; CHECK-P10-BE-NEXT:    xvadddp v2, vs0, v2
199; CHECK-P10-BE-NEXT:    blr
200;
201; AIX-64-LABEL: test5:
202; AIX-64:       # %bb.0: # %entry
203; AIX-64-NEXT:    ld r3, L..C0(r2) # @G
204; AIX-64-NEXT:    lfdx f0, 0, r3
205; AIX-64-NEXT:    xxmrghw vs0, vs0, vs0
206; AIX-64-NEXT:    xvcvspdp vs0, vs0
207; AIX-64-NEXT:    xvadddp v2, vs0, v2
208; AIX-64-NEXT:    blr
209;
210; AIX-32-LABEL: test5:
211; AIX-32:       # %bb.0: # %entry
212; AIX-32-NEXT:    lwz r3, L..C0(r2) # @G
213; AIX-32-NEXT:    lfs f0, 4(r3)
214; AIX-32-NEXT:    lfs f1, 0(r3)
215; AIX-32-NEXT:    xxmrghd vs0, vs1, vs0
216; AIX-32-NEXT:    xvadddp v2, vs0, v2
217; AIX-32-NEXT:    blr
218entry:
219  %0 = load <2 x float>, ptr @G, align 8
220  %1 = fpext <2 x float> %0 to <2 x double>
221  %add = fadd <2 x double> %1, %a
222  ret <2 x double> %add
223}
224
225%0 = type <{ i32, i8, [1 x i8], i16, i32, i32, i8, [1 x i8], i16, i32, float, float, double, double, ppc_fp128, { float, float }, { float, float }, { double, double }, { double, double }, { ppc_fp128, ppc_fp128 }, [89856 x i8] }>
226@Glob1 = external dso_local unnamed_addr global [25 x %0], align 16
227
228define dso_local i32 @test6() #0 {
229; CHECK-P10-LABEL: test6:
230; CHECK-P10:       # %bb.0: # %bb
231; CHECK-P10-NEXT:    plfd f0, Glob1@PCREL+562536(0), 1
232; CHECK-P10-NEXT:    xxlxor vs1, vs1, vs1
233; CHECK-P10-NEXT:    xxmrghw vs0, vs0, vs0
234; CHECK-P10-NEXT:    xvcvspdp vs0, vs0
235; CHECK-P10-NEXT:    xvcmpeqdp v2, vs1, vs0
236; CHECK-P10-NEXT:    xxswapd v3, v2
237; CHECK-P10-NEXT:    xxland vs0, v2, v3
238; CHECK-P10-NEXT:    mfvsrld r3, vs0
239; CHECK-P10-NEXT:    andi. r3, r3, 1
240; CHECK-P10-NEXT:    bc 4, gt, .LBB5_2
241; CHECK-P10-NEXT:  # %bb.1: # %bb8
242; CHECK-P10-NEXT:  .LBB5_2: # %bb7
243;
244; CHECK-P10-BE-LABEL: test6:
245; CHECK-P10-BE:       # %bb.0: # %bb
246; CHECK-P10-BE-NEXT:    addis r3, r2, Glob1@toc@ha
247; CHECK-P10-BE-NEXT:    xxlxor vs1, vs1, vs1
248; CHECK-P10-BE-NEXT:    addi r3, r3, Glob1@toc@l
249; CHECK-P10-BE-NEXT:    plfd f0, 562536(r3), 0
250; CHECK-P10-BE-NEXT:    xxmrghw vs0, vs0, vs0
251; CHECK-P10-BE-NEXT:    xvcvspdp vs0, vs0
252; CHECK-P10-BE-NEXT:    xvcmpeqdp v2, vs1, vs0
253; CHECK-P10-BE-NEXT:    xxswapd v3, v2
254; CHECK-P10-BE-NEXT:    xxland vs0, v2, v3
255; CHECK-P10-BE-NEXT:    mffprd r3, f0
256; CHECK-P10-BE-NEXT:    andi. r3, r3, 1
257; CHECK-P10-BE-NEXT:    bc 4, gt, .LBB5_2
258; CHECK-P10-BE-NEXT:  # %bb.1: # %bb8
259; CHECK-P10-BE-NEXT:  .LBB5_2: # %bb7
260;
261; AIX-64-LABEL: test6:
262; AIX-64:       # %bb.0: # %bb
263; AIX-64-NEXT:    lis r4, 8
264; AIX-64-NEXT:    ld r3, L..C1(r2) # @Glob1
265; AIX-64-NEXT:    xxlxor vs1, vs1, vs1
266; AIX-64-NEXT:    ori r4, r4, 38248
267; AIX-64-NEXT:    lfdx f0, r3, r4
268; AIX-64-NEXT:    xxmrghw vs0, vs0, vs0
269; AIX-64-NEXT:    xvcvspdp vs0, vs0
270; AIX-64-NEXT:    xvcmpeqdp v2, vs1, vs0
271; AIX-64-NEXT:    xxswapd v3, v2
272; AIX-64-NEXT:    xxland vs0, v2, v3
273; AIX-64-NEXT:    mffprd r3, f0
274; AIX-64-NEXT:    andi. r3, r3, 1
275; AIX-64-NEXT:    bc 4, gt, L..BB5_2
276; AIX-64-NEXT:  # %bb.1: # %bb8
277; AIX-64-NEXT:  L..BB5_2: # %bb7
278;
279; AIX-32-LABEL: test6:
280; AIX-32:       # %bb.0: # %bb
281; AIX-32-NEXT:    lis r4, 8
282; AIX-32-NEXT:    lwz r3, L..C1(r2) # @Glob1
283; AIX-32-NEXT:    ori r4, r4, 38248
284; AIX-32-NEXT:    lfsux f0, r3, r4
285; AIX-32-NEXT:    lfs f1, 4(r3)
286; AIX-32-NEXT:    addi r3, r1, -16
287; AIX-32-NEXT:    xxmrghd vs0, vs0, vs1
288; AIX-32-NEXT:    xxlxor vs1, vs1, vs1
289; AIX-32-NEXT:    xvcmpeqdp v2, vs1, vs0
290; AIX-32-NEXT:    xxswapd v3, v2
291; AIX-32-NEXT:    xxland vs0, v2, v3
292; AIX-32-NEXT:    stxvw4x vs0, 0, r3
293; AIX-32-NEXT:    lwz r3, -12(r1)
294; AIX-32-NEXT:    andi. r3, r3, 1
295; AIX-32-NEXT:    bc 4, gt, L..BB5_2
296; AIX-32-NEXT:  # %bb.1: # %bb8
297; AIX-32-NEXT:  L..BB5_2: # %bb7
298bb:
299  br label %bb1
300
301bb1:                                              ; preds = %bb
302  %i = load <2 x float>, ptr getelementptr inbounds ([25 x %0], ptr @Glob1, i64 0, i64 6, i32 20, i64 22392), align 8
303  %i2 = fpext <2 x float> %i to <2 x double>
304  %i3 = fcmp contract oeq <2 x double> zeroinitializer, %i2
305  %i4 = shufflevector <2 x i1> %i3, <2 x i1> poison, <2 x i32> <i32 1, i32 undef>
306  %i5 = and <2 x i1> %i3, %i4
307  %i6 = extractelement <2 x i1> %i5, i32 0
308  br i1 %i6, label %bb8, label %bb7
309
310bb7:                                              ; preds = %bb1
311  unreachable
312
313bb8:                                              ; preds = %bb1
314  unreachable
315}
316