xref: /llvm-project/llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll (revision bc5c637376ce024df7964c6095180c2ddf59e690)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
4; RUN:   FileCheck %s
5; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
6; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
7; RUN:   FileCheck %s
8; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff \
9; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
10; RUN:   FileCheck %s
11
12; This test case aims to test the vector multiply instructions on Power10.
13; This includes the low order and high order versions of vector multiply.
14; The low order version operates on doublewords, whereas the high order version
15; operates on signed and unsigned words and doublewords.
16; This file also includes 128 bit vector multiply instructions.
17
18define <2 x i64> @test_vmulld(<2 x i64> %a, <2 x i64> %b) {
19; CHECK-LABEL: test_vmulld:
20; CHECK:       # %bb.0: # %entry
21; CHECK-NEXT:    vmulld v2, v3, v2
22; CHECK-NEXT:    blr
23entry:
24  %mul = mul <2 x i64> %b, %a
25  ret <2 x i64> %mul
26}
27
28define <2 x i64> @test_vmulhsd(<2 x i64> %a, <2 x i64> %b) {
29; CHECK-LABEL: test_vmulhsd:
30; CHECK:       # %bb.0: # %entry
31; CHECK-NEXT:    vmulhsd v2, v3, v2
32; CHECK-NEXT:    blr
33entry:
34  %0 = sext <2 x i64> %a to <2 x i128>
35  %1 = sext <2 x i64> %b to <2 x i128>
36  %mul = mul <2 x i128> %1, %0
37  %shr = lshr <2 x i128> %mul, <i128 64, i128 64>
38  %tr = trunc <2 x i128> %shr to <2 x i64>
39  ret <2 x i64> %tr
40}
41
42define <2 x i64> @test_vmulhud(<2 x i64> %a, <2 x i64> %b) {
43; CHECK-LABEL: test_vmulhud:
44; CHECK:       # %bb.0: # %entry
45; CHECK-NEXT:    vmulhud v2, v3, v2
46; CHECK-NEXT:    blr
47entry:
48  %0 = zext <2 x i64> %a to <2 x i128>
49  %1 = zext <2 x i64> %b to <2 x i128>
50  %mul = mul <2 x i128> %1, %0
51  %shr = lshr <2 x i128> %mul, <i128 64, i128 64>
52  %tr = trunc <2 x i128> %shr to <2 x i64>
53  ret <2 x i64> %tr
54}
55
56define <4 x i32> @test_vmulhsw(<4 x i32> %a, <4 x i32> %b) {
57; CHECK-LABEL: test_vmulhsw:
58; CHECK:       # %bb.0: # %entry
59; CHECK-NEXT:    vmulhsw v2, v3, v2
60; CHECK-NEXT:    blr
61entry:
62  %0 = sext <4 x i32> %a to <4 x i64>
63  %1 = sext <4 x i32> %b to <4 x i64>
64  %mul = mul <4 x i64> %1, %0
65  %shr = lshr <4 x i64> %mul, <i64 32, i64 32, i64 32, i64 32>
66  %tr = trunc <4 x i64> %shr to <4 x i32>
67  ret <4 x i32> %tr
68}
69
70define <4 x i32> @test_vmulhuw(<4 x i32> %a, <4 x i32> %b) {
71; CHECK-LABEL: test_vmulhuw:
72; CHECK:       # %bb.0: # %entry
73; CHECK-NEXT:    vmulhuw v2, v3, v2
74; CHECK-NEXT:    blr
75entry:
76  %0 = zext <4 x i32> %a to <4 x i64>
77  %1 = zext <4 x i32> %b to <4 x i64>
78  %mul = mul <4 x i64> %1, %0
79  %shr = lshr <4 x i64> %mul, <i64 32, i64 32, i64 32, i64 32>
80  %tr = trunc <4 x i64> %shr to <4 x i32>
81  ret <4 x i32> %tr
82}
83
84; Test the vector multiply high intrinsics.
85declare <4 x i32> @llvm.ppc.altivec.vmulhsw(<4 x i32>, <4 x i32>)
86declare <4 x i32> @llvm.ppc.altivec.vmulhuw(<4 x i32>, <4 x i32>)
87declare <2 x i64> @llvm.ppc.altivec.vmulhsd(<2 x i64>, <2 x i64>)
88declare <2 x i64> @llvm.ppc.altivec.vmulhud(<2 x i64>, <2 x i64>)
89
90define <4 x i32> @test_vmulhsw_intrinsic(<4 x i32> %a, <4 x i32> %b) {
91; CHECK-LABEL: test_vmulhsw_intrinsic:
92; CHECK:       # %bb.0: # %entry
93; CHECK-NEXT:    vmulhsw v2, v2, v3
94; CHECK-NEXT:    blr
95entry:
96  %mulh = tail call <4 x i32> @llvm.ppc.altivec.vmulhsw(<4 x i32> %a, <4 x i32> %b)
97  ret <4 x i32> %mulh
98}
99
100define <4 x i32> @test_vmulhuw_intrinsic(<4 x i32> %a, <4 x i32> %b) {
101; CHECK-LABEL: test_vmulhuw_intrinsic:
102; CHECK:       # %bb.0: # %entry
103; CHECK-NEXT:    vmulhuw v2, v2, v3
104; CHECK-NEXT:    blr
105entry:
106  %mulh = tail call <4 x i32> @llvm.ppc.altivec.vmulhuw(<4 x i32> %a, <4 x i32> %b)
107  ret <4 x i32> %mulh
108}
109
110define <2 x i64> @test_vmulhsd_intrinsic(<2 x i64> %a, <2 x i64> %b) {
111; CHECK-LABEL: test_vmulhsd_intrinsic:
112; CHECK:       # %bb.0: # %entry
113; CHECK-NEXT:    vmulhsd v2, v2, v3
114; CHECK-NEXT:    blr
115entry:
116  %mulh = tail call <2 x i64> @llvm.ppc.altivec.vmulhsd(<2 x i64> %a, <2 x i64> %b)
117  ret <2 x i64> %mulh
118}
119
120define <2 x i64> @test_vmulhud_intrinsic(<2 x i64> %a, <2 x i64> %b) {
121; CHECK-LABEL: test_vmulhud_intrinsic:
122; CHECK:       # %bb.0: # %entry
123; CHECK-NEXT:    vmulhud v2, v2, v3
124; CHECK-NEXT:    blr
125entry:
126  %mulh = tail call <2 x i64> @llvm.ppc.altivec.vmulhud(<2 x i64> %a, <2 x i64> %b)
127  ret <2 x i64> %mulh
128}
129
130declare <1 x i128> @llvm.ppc.altivec.vmuleud(<2 x i64>, <2 x i64>) nounwind readnone
131declare <1 x i128> @llvm.ppc.altivec.vmuloud(<2 x i64>, <2 x i64>) nounwind readnone
132declare <1 x i128> @llvm.ppc.altivec.vmulesd(<2 x i64>, <2 x i64>) nounwind readnone
133declare <1 x i128> @llvm.ppc.altivec.vmulosd(<2 x i64>, <2 x i64>) nounwind readnone
134declare <1 x i128> @llvm.ppc.altivec.vmsumcud(<2 x i64>, <2 x i64>, <1 x i128>) nounwind readnone
135
136define <1 x i128> @test_vmuleud(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
137; CHECK-LABEL: test_vmuleud:
138; CHECK:       # %bb.0:
139; CHECK-NEXT:    vmuleud v2, v2, v3
140; CHECK-NEXT:    blr
141  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmuleud(<2 x i64> %x, <2 x i64> %y)
142  ret <1 x i128> %tmp
143}
144
145define <1 x i128> @test_vmuloud(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
146; CHECK-LABEL: test_vmuloud:
147; CHECK:       # %bb.0:
148; CHECK-NEXT:    vmuloud v2, v2, v3
149; CHECK-NEXT:    blr
150  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmuloud(<2 x i64> %x, <2 x i64> %y)
151  ret <1 x i128> %tmp
152}
153
154define <1 x i128> @test_vmulesd(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
155; CHECK-LABEL: test_vmulesd:
156; CHECK:       # %bb.0:
157; CHECK-NEXT:    vmulesd v2, v2, v3
158; CHECK-NEXT:    blr
159  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmulesd(<2 x i64> %x, <2 x i64> %y)
160  ret <1 x i128> %tmp
161}
162
163define <1 x i128> @test_vmulosd(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
164; CHECK-LABEL: test_vmulosd:
165; CHECK:       # %bb.0:
166; CHECK-NEXT:    vmulosd v2, v2, v3
167; CHECK-NEXT:    blr
168  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmulosd(<2 x i64> %x, <2 x i64> %y)
169  ret <1 x i128> %tmp
170}
171
172define <1 x i128> @test_vmsumcud(<2 x i64> %x, <2 x i64> %y, <1 x i128> %z) nounwind readnone {
173; CHECK-LABEL: test_vmsumcud:
174; CHECK:       # %bb.0:
175; CHECK-NEXT:    vmsumcud v2, v2, v3, v4
176; CHECK-NEXT:    blr
177  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmsumcud(<2 x i64> %x, <2 x i64> %y, <1 x i128> %z)
178  ret <1 x i128> %tmp
179}
180