xref: /llvm-project/llvm/test/CodeGen/PowerPC/vector-reduce-mul.ll (revision e9d12c248013b2d2b9880436727857e0ec8a7085)
1*e9d12c24SStefan Pintilie; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2*e9d12c24SStefan Pintilie; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
3*e9d12c24SStefan Pintilie; RUN:   -mcpu=pwr9 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR9LE
4*e9d12c24SStefan Pintilie; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
5*e9d12c24SStefan Pintilie; RUN:   -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR9BE
6*e9d12c24SStefan Pintilie; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
7*e9d12c24SStefan Pintilie; RUN:   -mcpu=pwr10 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR10LE
8*e9d12c24SStefan Pintilie; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
9*e9d12c24SStefan Pintilie; RUN:   -mcpu=pwr10 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR10BE
10*e9d12c24SStefan Pintilie
11*e9d12c24SStefan Pintiliedefine dso_local i32 @v2i32(<2 x i32> %a) local_unnamed_addr #0 {
12*e9d12c24SStefan Pintilie; PWR9LE-LABEL: v2i32:
13*e9d12c24SStefan Pintilie; PWR9LE:       # %bb.0: # %entry
14*e9d12c24SStefan Pintilie; PWR9LE-NEXT:    xxspltw v3, v2, 2
15*e9d12c24SStefan Pintilie; PWR9LE-NEXT:    li r3, 0
16*e9d12c24SStefan Pintilie; PWR9LE-NEXT:    vmuluwm v2, v2, v3
17*e9d12c24SStefan Pintilie; PWR9LE-NEXT:    vextuwrx r3, r3, v2
18*e9d12c24SStefan Pintilie; PWR9LE-NEXT:    blr
19*e9d12c24SStefan Pintilie;
20*e9d12c24SStefan Pintilie; PWR9BE-LABEL: v2i32:
21*e9d12c24SStefan Pintilie; PWR9BE:       # %bb.0: # %entry
22*e9d12c24SStefan Pintilie; PWR9BE-NEXT:    xxspltw v3, v2, 1
23*e9d12c24SStefan Pintilie; PWR9BE-NEXT:    li r3, 0
24*e9d12c24SStefan Pintilie; PWR9BE-NEXT:    vmuluwm v2, v2, v3
25*e9d12c24SStefan Pintilie; PWR9BE-NEXT:    vextuwlx r3, r3, v2
26*e9d12c24SStefan Pintilie; PWR9BE-NEXT:    blr
27*e9d12c24SStefan Pintilie;
28*e9d12c24SStefan Pintilie; PWR10LE-LABEL: v2i32:
29*e9d12c24SStefan Pintilie; PWR10LE:       # %bb.0: # %entry
30*e9d12c24SStefan Pintilie; PWR10LE-NEXT:    xxspltw v3, v2, 2
31*e9d12c24SStefan Pintilie; PWR10LE-NEXT:    li r3, 0
32*e9d12c24SStefan Pintilie; PWR10LE-NEXT:    vmuluwm v2, v2, v3
33*e9d12c24SStefan Pintilie; PWR10LE-NEXT:    vextuwrx r3, r3, v2
34*e9d12c24SStefan Pintilie; PWR10LE-NEXT:    blr
35*e9d12c24SStefan Pintilie;
36*e9d12c24SStefan Pintilie; PWR10BE-LABEL: v2i32:
37*e9d12c24SStefan Pintilie; PWR10BE:       # %bb.0: # %entry
38*e9d12c24SStefan Pintilie; PWR10BE-NEXT:    xxspltw v3, v2, 1
39*e9d12c24SStefan Pintilie; PWR10BE-NEXT:    li r3, 0
40*e9d12c24SStefan Pintilie; PWR10BE-NEXT:    vmuluwm v2, v2, v3
41*e9d12c24SStefan Pintilie; PWR10BE-NEXT:    vextuwlx r3, r3, v2
42*e9d12c24SStefan Pintilie; PWR10BE-NEXT:    blr
43*e9d12c24SStefan Pintilieentry:
44*e9d12c24SStefan Pintilie  %0 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> %a)
45*e9d12c24SStefan Pintilie  ret i32 %0
46*e9d12c24SStefan Pintilie}
47*e9d12c24SStefan Pintilie
48*e9d12c24SStefan Pintiliedefine dso_local i32 @v4i32(<4 x i32> %a) local_unnamed_addr #0 {
49*e9d12c24SStefan Pintilie; PWR9LE-LABEL: v4i32:
50*e9d12c24SStefan Pintilie; PWR9LE:       # %bb.0: # %entry
51*e9d12c24SStefan Pintilie; PWR9LE-NEXT:    xxswapd v3, v2
52*e9d12c24SStefan Pintilie; PWR9LE-NEXT:    li r3, 0
53*e9d12c24SStefan Pintilie; PWR9LE-NEXT:    vmuluwm v2, v2, v3
54*e9d12c24SStefan Pintilie; PWR9LE-NEXT:    xxspltw v3, v2, 2
55*e9d12c24SStefan Pintilie; PWR9LE-NEXT:    vmuluwm v2, v2, v3
56*e9d12c24SStefan Pintilie; PWR9LE-NEXT:    vextuwrx r3, r3, v2
57*e9d12c24SStefan Pintilie; PWR9LE-NEXT:    blr
58*e9d12c24SStefan Pintilie;
59*e9d12c24SStefan Pintilie; PWR9BE-LABEL: v4i32:
60*e9d12c24SStefan Pintilie; PWR9BE:       # %bb.0: # %entry
61*e9d12c24SStefan Pintilie; PWR9BE-NEXT:    xxswapd v3, v2
62*e9d12c24SStefan Pintilie; PWR9BE-NEXT:    li r3, 0
63*e9d12c24SStefan Pintilie; PWR9BE-NEXT:    vmuluwm v2, v2, v3
64*e9d12c24SStefan Pintilie; PWR9BE-NEXT:    xxspltw v3, v2, 1
65*e9d12c24SStefan Pintilie; PWR9BE-NEXT:    vmuluwm v2, v2, v3
66*e9d12c24SStefan Pintilie; PWR9BE-NEXT:    vextuwlx r3, r3, v2
67*e9d12c24SStefan Pintilie; PWR9BE-NEXT:    blr
68*e9d12c24SStefan Pintilie;
69*e9d12c24SStefan Pintilie; PWR10LE-LABEL: v4i32:
70*e9d12c24SStefan Pintilie; PWR10LE:       # %bb.0: # %entry
71*e9d12c24SStefan Pintilie; PWR10LE-NEXT:    xxswapd v3, v2
72*e9d12c24SStefan Pintilie; PWR10LE-NEXT:    li r3, 0
73*e9d12c24SStefan Pintilie; PWR10LE-NEXT:    vmuluwm v2, v2, v3
74*e9d12c24SStefan Pintilie; PWR10LE-NEXT:    xxspltw v3, v2, 2
75*e9d12c24SStefan Pintilie; PWR10LE-NEXT:    vmuluwm v2, v2, v3
76*e9d12c24SStefan Pintilie; PWR10LE-NEXT:    vextuwrx r3, r3, v2
77*e9d12c24SStefan Pintilie; PWR10LE-NEXT:    blr
78*e9d12c24SStefan Pintilie;
79*e9d12c24SStefan Pintilie; PWR10BE-LABEL: v4i32:
80*e9d12c24SStefan Pintilie; PWR10BE:       # %bb.0: # %entry
81*e9d12c24SStefan Pintilie; PWR10BE-NEXT:    xxswapd v3, v2
82*e9d12c24SStefan Pintilie; PWR10BE-NEXT:    li r3, 0
83*e9d12c24SStefan Pintilie; PWR10BE-NEXT:    vmuluwm v2, v2, v3
84*e9d12c24SStefan Pintilie; PWR10BE-NEXT:    xxspltw v3, v2, 1
85*e9d12c24SStefan Pintilie; PWR10BE-NEXT:    vmuluwm v2, v2, v3
86*e9d12c24SStefan Pintilie; PWR10BE-NEXT:    vextuwlx r3, r3, v2
87*e9d12c24SStefan Pintilie; PWR10BE-NEXT:    blr
88*e9d12c24SStefan Pintilieentry:
89*e9d12c24SStefan Pintilie  %0 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %a)
90*e9d12c24SStefan Pintilie  ret i32 %0
91*e9d12c24SStefan Pintilie}
92*e9d12c24SStefan Pintilie
93*e9d12c24SStefan Pintiliedefine dso_local i32 @v8i32(<8 x i32> %a) local_unnamed_addr #0 {
94*e9d12c24SStefan Pintilie; PWR9LE-LABEL: v8i32:
95*e9d12c24SStefan Pintilie; PWR9LE:       # %bb.0: # %entry
96*e9d12c24SStefan Pintilie; PWR9LE-NEXT:    vmuluwm v2, v2, v3
97*e9d12c24SStefan Pintilie; PWR9LE-NEXT:    li r3, 0
98*e9d12c24SStefan Pintilie; PWR9LE-NEXT:    xxswapd v3, v2
99*e9d12c24SStefan Pintilie; PWR9LE-NEXT:    vmuluwm v2, v2, v3
100*e9d12c24SStefan Pintilie; PWR9LE-NEXT:    xxspltw v3, v2, 2
101*e9d12c24SStefan Pintilie; PWR9LE-NEXT:    vmuluwm v2, v2, v3
102*e9d12c24SStefan Pintilie; PWR9LE-NEXT:    vextuwrx r3, r3, v2
103*e9d12c24SStefan Pintilie; PWR9LE-NEXT:    blr
104*e9d12c24SStefan Pintilie;
105*e9d12c24SStefan Pintilie; PWR9BE-LABEL: v8i32:
106*e9d12c24SStefan Pintilie; PWR9BE:       # %bb.0: # %entry
107*e9d12c24SStefan Pintilie; PWR9BE-NEXT:    vmuluwm v2, v2, v3
108*e9d12c24SStefan Pintilie; PWR9BE-NEXT:    li r3, 0
109*e9d12c24SStefan Pintilie; PWR9BE-NEXT:    xxswapd v3, v2
110*e9d12c24SStefan Pintilie; PWR9BE-NEXT:    vmuluwm v2, v2, v3
111*e9d12c24SStefan Pintilie; PWR9BE-NEXT:    xxspltw v3, v2, 1
112*e9d12c24SStefan Pintilie; PWR9BE-NEXT:    vmuluwm v2, v2, v3
113*e9d12c24SStefan Pintilie; PWR9BE-NEXT:    vextuwlx r3, r3, v2
114*e9d12c24SStefan Pintilie; PWR9BE-NEXT:    blr
115*e9d12c24SStefan Pintilie;
116*e9d12c24SStefan Pintilie; PWR10LE-LABEL: v8i32:
117*e9d12c24SStefan Pintilie; PWR10LE:       # %bb.0: # %entry
118*e9d12c24SStefan Pintilie; PWR10LE-NEXT:    vmuluwm v2, v2, v3
119*e9d12c24SStefan Pintilie; PWR10LE-NEXT:    li r3, 0
120*e9d12c24SStefan Pintilie; PWR10LE-NEXT:    xxswapd v3, v2
121*e9d12c24SStefan Pintilie; PWR10LE-NEXT:    vmuluwm v2, v2, v3
122*e9d12c24SStefan Pintilie; PWR10LE-NEXT:    xxspltw v3, v2, 2
123*e9d12c24SStefan Pintilie; PWR10LE-NEXT:    vmuluwm v2, v2, v3
124*e9d12c24SStefan Pintilie; PWR10LE-NEXT:    vextuwrx r3, r3, v2
125*e9d12c24SStefan Pintilie; PWR10LE-NEXT:    blr
126*e9d12c24SStefan Pintilie;
127*e9d12c24SStefan Pintilie; PWR10BE-LABEL: v8i32:
128*e9d12c24SStefan Pintilie; PWR10BE:       # %bb.0: # %entry
129*e9d12c24SStefan Pintilie; PWR10BE-NEXT:    vmuluwm v2, v2, v3
130*e9d12c24SStefan Pintilie; PWR10BE-NEXT:    li r3, 0
131*e9d12c24SStefan Pintilie; PWR10BE-NEXT:    xxswapd v3, v2
132*e9d12c24SStefan Pintilie; PWR10BE-NEXT:    vmuluwm v2, v2, v3
133*e9d12c24SStefan Pintilie; PWR10BE-NEXT:    xxspltw v3, v2, 1
134*e9d12c24SStefan Pintilie; PWR10BE-NEXT:    vmuluwm v2, v2, v3
135*e9d12c24SStefan Pintilie; PWR10BE-NEXT:    vextuwlx r3, r3, v2
136*e9d12c24SStefan Pintilie; PWR10BE-NEXT:    blr
137*e9d12c24SStefan Pintilieentry:
138*e9d12c24SStefan Pintilie  %0 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> %a)
139*e9d12c24SStefan Pintilie  ret i32 %0
140*e9d12c24SStefan Pintilie}
141*e9d12c24SStefan Pintilie
142*e9d12c24SStefan Pintiliedefine dso_local i32 @v16i32(<16 x i32> %a) local_unnamed_addr #0 {
143*e9d12c24SStefan Pintilie; PWR9LE-LABEL: v16i32:
144*e9d12c24SStefan Pintilie; PWR9LE:       # %bb.0: # %entry
145*e9d12c24SStefan Pintilie; PWR9LE-NEXT:    vmuluwm v3, v3, v5
146*e9d12c24SStefan Pintilie; PWR9LE-NEXT:    vmuluwm v2, v2, v4
147*e9d12c24SStefan Pintilie; PWR9LE-NEXT:    li r3, 0
148*e9d12c24SStefan Pintilie; PWR9LE-NEXT:    vmuluwm v2, v2, v3
149*e9d12c24SStefan Pintilie; PWR9LE-NEXT:    xxswapd v3, v2
150*e9d12c24SStefan Pintilie; PWR9LE-NEXT:    vmuluwm v2, v2, v3
151*e9d12c24SStefan Pintilie; PWR9LE-NEXT:    xxspltw v3, v2, 2
152*e9d12c24SStefan Pintilie; PWR9LE-NEXT:    vmuluwm v2, v2, v3
153*e9d12c24SStefan Pintilie; PWR9LE-NEXT:    vextuwrx r3, r3, v2
154*e9d12c24SStefan Pintilie; PWR9LE-NEXT:    blr
155*e9d12c24SStefan Pintilie;
156*e9d12c24SStefan Pintilie; PWR9BE-LABEL: v16i32:
157*e9d12c24SStefan Pintilie; PWR9BE:       # %bb.0: # %entry
158*e9d12c24SStefan Pintilie; PWR9BE-NEXT:    vmuluwm v3, v3, v5
159*e9d12c24SStefan Pintilie; PWR9BE-NEXT:    vmuluwm v2, v2, v4
160*e9d12c24SStefan Pintilie; PWR9BE-NEXT:    li r3, 0
161*e9d12c24SStefan Pintilie; PWR9BE-NEXT:    vmuluwm v2, v2, v3
162*e9d12c24SStefan Pintilie; PWR9BE-NEXT:    xxswapd v3, v2
163*e9d12c24SStefan Pintilie; PWR9BE-NEXT:    vmuluwm v2, v2, v3
164*e9d12c24SStefan Pintilie; PWR9BE-NEXT:    xxspltw v3, v2, 1
165*e9d12c24SStefan Pintilie; PWR9BE-NEXT:    vmuluwm v2, v2, v3
166*e9d12c24SStefan Pintilie; PWR9BE-NEXT:    vextuwlx r3, r3, v2
167*e9d12c24SStefan Pintilie; PWR9BE-NEXT:    blr
168*e9d12c24SStefan Pintilie;
169*e9d12c24SStefan Pintilie; PWR10LE-LABEL: v16i32:
170*e9d12c24SStefan Pintilie; PWR10LE:       # %bb.0: # %entry
171*e9d12c24SStefan Pintilie; PWR10LE-NEXT:    vmuluwm v3, v3, v5
172*e9d12c24SStefan Pintilie; PWR10LE-NEXT:    vmuluwm v2, v2, v4
173*e9d12c24SStefan Pintilie; PWR10LE-NEXT:    li r3, 0
174*e9d12c24SStefan Pintilie; PWR10LE-NEXT:    vmuluwm v2, v2, v3
175*e9d12c24SStefan Pintilie; PWR10LE-NEXT:    xxswapd v3, v2
176*e9d12c24SStefan Pintilie; PWR10LE-NEXT:    vmuluwm v2, v2, v3
177*e9d12c24SStefan Pintilie; PWR10LE-NEXT:    xxspltw v3, v2, 2
178*e9d12c24SStefan Pintilie; PWR10LE-NEXT:    vmuluwm v2, v2, v3
179*e9d12c24SStefan Pintilie; PWR10LE-NEXT:    vextuwrx r3, r3, v2
180*e9d12c24SStefan Pintilie; PWR10LE-NEXT:    blr
181*e9d12c24SStefan Pintilie;
182*e9d12c24SStefan Pintilie; PWR10BE-LABEL: v16i32:
183*e9d12c24SStefan Pintilie; PWR10BE:       # %bb.0: # %entry
184*e9d12c24SStefan Pintilie; PWR10BE-NEXT:    vmuluwm v3, v3, v5
185*e9d12c24SStefan Pintilie; PWR10BE-NEXT:    vmuluwm v2, v2, v4
186*e9d12c24SStefan Pintilie; PWR10BE-NEXT:    li r3, 0
187*e9d12c24SStefan Pintilie; PWR10BE-NEXT:    vmuluwm v2, v2, v3
188*e9d12c24SStefan Pintilie; PWR10BE-NEXT:    xxswapd v3, v2
189*e9d12c24SStefan Pintilie; PWR10BE-NEXT:    vmuluwm v2, v2, v3
190*e9d12c24SStefan Pintilie; PWR10BE-NEXT:    xxspltw v3, v2, 1
191*e9d12c24SStefan Pintilie; PWR10BE-NEXT:    vmuluwm v2, v2, v3
192*e9d12c24SStefan Pintilie; PWR10BE-NEXT:    vextuwlx r3, r3, v2
193*e9d12c24SStefan Pintilie; PWR10BE-NEXT:    blr
194*e9d12c24SStefan Pintilieentry:
195*e9d12c24SStefan Pintilie  %0 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %a)
196*e9d12c24SStefan Pintilie  ret i32 %0
197*e9d12c24SStefan Pintilie}
198*e9d12c24SStefan Pintilie
199*e9d12c24SStefan Pintiliedeclare i32 @llvm.vector.reduce.mul.v2i32(<2 x i32>) #0
200*e9d12c24SStefan Pintiliedeclare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>) #0
201*e9d12c24SStefan Pintiliedeclare i32 @llvm.vector.reduce.mul.v8i32(<8 x i32>) #0
202*e9d12c24SStefan Pintiliedeclare i32 @llvm.vector.reduce.mul.v16i32(<16 x i32>) #0
203*e9d12c24SStefan Pintilie
204*e9d12c24SStefan Pintilieattributes #0 = { nounwind }
205