xref: /llvm-project/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll (revision 2af57b6099e1750c375cfc1321d1a5ef0a65c085)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \
3; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names \
4; RUN:     -ppc-vsr-nums-as-vr < %s | FileCheck %s
5; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \
6; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names \
7; RUN:     -ppc-vsr-nums-as-vr < %s | FileCheck %s
8; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \
9; RUN:     -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
10; RUN:     < %s | FileCheck %s --check-prefixes=CHECK,CHECK-P10
11; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \
12; RUN:     -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
13; RUN:     < %s | FileCheck %s --check-prefixes=CHECK,CHECK-P10-BE
14
15; Function Attrs: norecurse nounwind readonly
16define dso_local <2 x double> @test1(<2 x float>* nocapture readonly %Ptr) {
17; CHECK-LABEL: test1:
18; CHECK:       # %bb.0: # %entry
19; CHECK-NEXT:    lfd f0, 0(r3)
20; CHECK-NEXT:    xxmrghw vs0, vs0, vs0
21; CHECK-NEXT:    xvcvspdp v2, vs0
22; CHECK-NEXT:    blr
23entry:
24  %0 = load <2 x float>, <2 x float>* %Ptr, align 8
25  %1 = fpext <2 x float> %0 to <2 x double>
26  ret <2 x double> %1
27}
28
29; Function Attrs: norecurse nounwind readonly
30define dso_local <2 x double> @test2(<2 x float>* nocapture readonly %a, <2 x float>* nocapture readonly %b) {
31; CHECK-LABEL: test2:
32; CHECK:       # %bb.0: # %entry
33; CHECK-NEXT:    lfd f0, 0(r4)
34; CHECK-NEXT:    lfd f1, 0(r3)
35; CHECK-NEXT:    xvsubsp vs0, vs1, vs0
36; CHECK-NEXT:    xxmrghw vs0, vs0, vs0
37; CHECK-NEXT:    xvcvspdp v2, vs0
38; CHECK-NEXT:    blr
39entry:
40  %0 = load <2 x float>, <2 x float>* %a, align 8
41  %1 = load <2 x float>, <2 x float>* %b, align 8
42  %sub = fsub <2 x float> %0, %1
43  %2 = fpext <2 x float> %sub to <2 x double>
44  ret <2 x double> %2
45}
46
47; Function Attrs: norecurse nounwind readonly
48; Function Attrs: norecurse nounwind readonly
49define dso_local <2 x double> @test3(<2 x float>* nocapture readonly %a, <2 x float>* nocapture readonly %b) {
50; CHECK-LABEL: test3:
51; CHECK:       # %bb.0: # %entry
52; CHECK-NEXT:    lfd f0, 0(r4)
53; CHECK-NEXT:    lfd f1, 0(r3)
54; CHECK-NEXT:    xvaddsp vs0, vs1, vs0
55; CHECK-NEXT:    xxmrghw vs0, vs0, vs0
56; CHECK-NEXT:    xvcvspdp v2, vs0
57; CHECK-NEXT:    blr
58entry:
59  %0 = load <2 x float>, <2 x float>* %a, align 8
60  %1 = load <2 x float>, <2 x float>* %b, align 8
61  %sub = fadd <2 x float> %0, %1
62  %2 = fpext <2 x float> %sub to <2 x double>
63  ret <2 x double> %2
64}
65
66; Function Attrs: norecurse nounwind readonly
67; Function Attrs: norecurse nounwind readonly
68define dso_local <2 x double> @test4(<2 x float>* nocapture readonly %a, <2 x float>* nocapture readonly %b) {
69; CHECK-LABEL: test4:
70; CHECK:       # %bb.0: # %entry
71; CHECK-NEXT:    lfd f0, 0(r4)
72; CHECK-NEXT:    lfd f1, 0(r3)
73; CHECK-NEXT:    xvmulsp vs0, vs1, vs0
74; CHECK-NEXT:    xxmrghw vs0, vs0, vs0
75; CHECK-NEXT:    xvcvspdp v2, vs0
76; CHECK-NEXT:    blr
77entry:
78  %0 = load <2 x float>, <2 x float>* %a, align 8
79  %1 = load <2 x float>, <2 x float>* %b, align 8
80  %sub = fmul <2 x float> %0, %1
81  %2 = fpext <2 x float> %sub to <2 x double>
82  ret <2 x double> %2
83}
84
85@G = dso_local local_unnamed_addr global <2 x float> <float 3.000000e+00, float 0x3FF3333340000000>, align 8
86
87; Function Attrs: mustprogress nofree norecurse nosync nounwind readonly uwtable willreturn
88define dso_local <2 x double> @test5(<2 x double> %a) {
89; CHECK-P10-LABEL: test5:
90; CHECK-P10:       # %bb.0: # %entry
91; CHECK-P10-NEXT:    plfd f0, G@PCREL(0), 1
92; CHECK-P10-NEXT:    xxmrghw vs0, vs0, vs0
93; CHECK-P10-NEXT:    xvcvspdp vs0, vs0
94; CHECK-P10-NEXT:    xvadddp v2, vs0, v2
95; CHECK-P10-NEXT:    blr
96;
97; CHECK-P10-BE-LABEL: test5:
98; CHECK-P10-BE:       # %bb.0: # %entry
99; CHECK-P10-BE-NEXT:    addis r3, r2, G@toc@ha
100; CHECK-P10-BE-NEXT:    lfd f0, G@toc@l(r3)
101; CHECK-P10-BE-NEXT:    xxmrghw vs0, vs0, vs0
102; CHECK-P10-BE-NEXT:    xvcvspdp vs0, vs0
103; CHECK-P10-BE-NEXT:    xvadddp v2, vs0, v2
104; CHECK-P10-BE-NEXT:    blr
105entry:
106  %0 = load <2 x float>, <2 x float>* @G, align 8
107  %1 = fpext <2 x float> %0 to <2 x double>
108  %add = fadd <2 x double> %1, %a
109  ret <2 x double> %add
110}
111
112%0 = type <{ i32, i8, [1 x i8], i16, i32, i32, i8, [1 x i8], i16, i32, float, float, double, double, ppc_fp128, { float, float }, { float, float }, { double, double }, { double, double }, { ppc_fp128, ppc_fp128 }, [89856 x i8] }>
113@Glob1 = external dso_local unnamed_addr global [25 x %0], align 16
114
115define dso_local i32 @test6() #0 {
116; CHECK-P10-LABEL: test6:
117; CHECK-P10:       # %bb.0: # %bb
118; CHECK-P10-NEXT:    plfd f0, Glob1@PCREL+562536(0), 1
119; CHECK-P10-NEXT:    xxlxor vs1, vs1, vs1
120; CHECK-P10-NEXT:    xxmrghw vs0, vs0, vs0
121; CHECK-P10-NEXT:    xvcvspdp vs0, vs0
122; CHECK-P10-NEXT:    xvcmpeqdp v2, vs1, vs0
123; CHECK-P10-NEXT:    xxswapd v3, v2
124; CHECK-P10-NEXT:    xxland vs0, v2, v3
125; CHECK-P10-NEXT:    mfvsrld r3, vs0
126; CHECK-P10-NEXT:    andi. r3, r3, 1
127; CHECK-P10-NEXT:    bc 4, gt, .LBB5_2
128; CHECK-P10-NEXT:  # %bb.1: # %bb8
129; CHECK-P10-NEXT:  .LBB5_2: # %bb7
130;
131; CHECK-P10-BE-LABEL: test6:
132; CHECK-P10-BE:       # %bb.0: # %bb
133; CHECK-P10-BE-NEXT:    addis r3, r2, Glob1@toc@ha
134; CHECK-P10-BE-NEXT:    xxlxor vs1, vs1, vs1
135; CHECK-P10-BE-NEXT:    addi r3, r3, Glob1@toc@l
136; CHECK-P10-BE-NEXT:    plfd f0, 562536(r3), 0
137; CHECK-P10-BE-NEXT:    xxmrghw vs0, vs0, vs0
138; CHECK-P10-BE-NEXT:    xvcvspdp vs0, vs0
139; CHECK-P10-BE-NEXT:    xvcmpeqdp v2, vs1, vs0
140; CHECK-P10-BE-NEXT:    xxswapd v3, v2
141; CHECK-P10-BE-NEXT:    xxland vs0, v2, v3
142; CHECK-P10-BE-NEXT:    mffprd r3, f0
143; CHECK-P10-BE-NEXT:    andi. r3, r3, 1
144; CHECK-P10-BE-NEXT:    bc 4, gt, .LBB5_2
145; CHECK-P10-BE-NEXT:  # %bb.1: # %bb8
146; CHECK-P10-BE-NEXT:  .LBB5_2: # %bb7
147bb:
148  br label %bb1
149
150bb1:                                              ; preds = %bb
151  %i = load <2 x float>, <2 x float>* bitcast (i8* getelementptr inbounds ([25 x %0], [25 x %0]* @Glob1, i64 0, i64 6, i32 20, i64 22392) to <2 x float>*), align 8
152  %i2 = fpext <2 x float> %i to <2 x double>
153  %i3 = fcmp contract oeq <2 x double> zeroinitializer, %i2
154  %i4 = shufflevector <2 x i1> %i3, <2 x i1> poison, <2 x i32> <i32 1, i32 undef>
155  %i5 = and <2 x i1> %i3, %i4
156  %i6 = extractelement <2 x i1> %i5, i32 0
157  br i1 %i6, label %bb8, label %bb7
158
159bb7:                                              ; preds = %bb1
160  unreachable
161
162bb8:                                              ; preds = %bb1
163  unreachable
164}
165