xref: /llvm-project/llvm/test/CodeGen/LoongArch/lasx/mulh.ll (revision a5c90e48b6f11bc6db7344503589648f76b16d80)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
3
4define void @mulhs_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
5; CHECK-LABEL: mulhs_v32i8:
6; CHECK:       # %bb.0: # %entry
7; CHECK-NEXT:    xvld $xr0, $a1, 0
8; CHECK-NEXT:    xvld $xr1, $a2, 0
9; CHECK-NEXT:    xvmuh.b $xr0, $xr0, $xr1
10; CHECK-NEXT:    xvst $xr0, $a0, 0
11; CHECK-NEXT:    ret
12entry:
13  %v0 = load <32 x i8>, ptr %a0
14  %v1 = load <32 x i8>, ptr %a1
15  %v0s = sext <32 x i8> %v0 to <32 x i16>
16  %v1s = sext <32 x i8> %v1 to <32 x i16>
17  %m = mul <32 x i16> %v0s, %v1s
18  %s = ashr <32 x i16> %m, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
19  %v2 = trunc <32 x i16> %s to <32 x i8>
20  store <32 x i8> %v2, ptr %res
21  ret void
22}
23
24define void @mulhu_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
25; CHECK-LABEL: mulhu_v32i8:
26; CHECK:       # %bb.0: # %entry
27; CHECK-NEXT:    xvld $xr0, $a1, 0
28; CHECK-NEXT:    xvld $xr1, $a2, 0
29; CHECK-NEXT:    xvmuh.bu $xr0, $xr0, $xr1
30; CHECK-NEXT:    xvst $xr0, $a0, 0
31; CHECK-NEXT:    ret
32entry:
33  %v0 = load <32 x i8>, ptr %a0
34  %v1 = load <32 x i8>, ptr %a1
35  %v0z = zext <32 x i8> %v0 to <32 x i16>
36  %v1z = zext <32 x i8> %v1 to <32 x i16>
37  %m = mul <32 x i16> %v0z, %v1z
38  %s = lshr <32 x i16> %m, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
39  %v2 = trunc <32 x i16> %s to <32 x i8>
40  store <32 x i8> %v2, ptr %res
41  ret void
42}
43
44define void @mulhs_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind {
45; CHECK-LABEL: mulhs_v16i16:
46; CHECK:       # %bb.0: # %entry
47; CHECK-NEXT:    xvld $xr0, $a1, 0
48; CHECK-NEXT:    xvld $xr1, $a2, 0
49; CHECK-NEXT:    xvmuh.h $xr0, $xr0, $xr1
50; CHECK-NEXT:    xvst $xr0, $a0, 0
51; CHECK-NEXT:    ret
52entry:
53  %v0 = load <16 x i16>, ptr %a0
54  %v1 = load <16 x i16>, ptr %a1
55  %v0s = sext <16 x i16> %v0 to <16 x i32>
56  %v1s = sext <16 x i16> %v1 to <16 x i32>
57  %m = mul <16 x i32> %v0s, %v1s
58  %s = ashr <16 x i32> %m, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
59  %v2 = trunc <16 x i32> %s to <16 x i16>
60  store <16 x i16> %v2, ptr %res
61  ret void
62}
63
64define void @mulhu_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind {
65; CHECK-LABEL: mulhu_v16i16:
66; CHECK:       # %bb.0: # %entry
67; CHECK-NEXT:    xvld $xr0, $a1, 0
68; CHECK-NEXT:    xvld $xr1, $a2, 0
69; CHECK-NEXT:    xvmuh.hu $xr0, $xr0, $xr1
70; CHECK-NEXT:    xvst $xr0, $a0, 0
71; CHECK-NEXT:    ret
72entry:
73  %v0 = load <16 x i16>, ptr %a0
74  %v1 = load <16 x i16>, ptr %a1
75  %v0z = zext <16 x i16> %v0 to <16 x i32>
76  %v1z = zext <16 x i16> %v1 to <16 x i32>
77  %m = mul <16 x i32> %v0z, %v1z
78  %s = lshr <16 x i32> %m, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
79  %v2 = trunc <16 x i32> %s to <16 x i16>
80  store <16 x i16> %v2, ptr %res
81  ret void
82}
83
84define void @mulhs_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
85; CHECK-LABEL: mulhs_v8i32:
86; CHECK:       # %bb.0: # %entry
87; CHECK-NEXT:    xvld $xr0, $a1, 0
88; CHECK-NEXT:    xvld $xr1, $a2, 0
89; CHECK-NEXT:    xvmuh.w $xr0, $xr0, $xr1
90; CHECK-NEXT:    xvst $xr0, $a0, 0
91; CHECK-NEXT:    ret
92entry:
93  %v0 = load <8 x i32>, ptr %a0
94  %v1 = load <8 x i32>, ptr %a1
95  %v0s = sext <8 x i32> %v0 to <8 x i64>
96  %v1s = sext <8 x i32> %v1 to <8 x i64>
97  %m = mul <8 x i64> %v0s, %v1s
98  %s = ashr <8 x i64> %m, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
99  %v2 = trunc <8 x i64> %s to <8 x i32>
100  store <8 x i32> %v2, ptr %res
101  ret void
102}
103
104define void @mulhu_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
105; CHECK-LABEL: mulhu_v8i32:
106; CHECK:       # %bb.0: # %entry
107; CHECK-NEXT:    xvld $xr0, $a1, 0
108; CHECK-NEXT:    xvld $xr1, $a2, 0
109; CHECK-NEXT:    xvmuh.wu $xr0, $xr0, $xr1
110; CHECK-NEXT:    xvst $xr0, $a0, 0
111; CHECK-NEXT:    ret
112entry:
113  %v0 = load <8 x i32>, ptr %a0
114  %v1 = load <8 x i32>, ptr %a1
115  %v0z = zext <8 x i32> %v0 to <8 x i64>
116  %v1z = zext <8 x i32> %v1 to <8 x i64>
117  %m = mul <8 x i64> %v0z, %v1z
118  %s = lshr <8 x i64> %m, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
119  %v2 = trunc <8 x i64> %s to <8 x i32>
120  store <8 x i32> %v2, ptr %res
121  ret void
122}
123
124define void @mulhs_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
125; CHECK-LABEL: mulhs_v4i64:
126; CHECK:       # %bb.0: # %entry
127; CHECK-NEXT:    xvld $xr0, $a1, 0
128; CHECK-NEXT:    xvld $xr1, $a2, 0
129; CHECK-NEXT:    xvmuh.d $xr0, $xr0, $xr1
130; CHECK-NEXT:    xvst $xr0, $a0, 0
131; CHECK-NEXT:    ret
132entry:
133  %v0 = load <4 x i64>, ptr %a0
134  %v1 = load <4 x i64>, ptr %a1
135  %v0s = sext <4 x i64> %v0 to <4 x i128>
136  %v1s = sext <4 x i64> %v1 to <4 x i128>
137  %m = mul <4 x i128> %v0s, %v1s
138  %s = ashr <4 x i128> %m, <i128 64, i128 64, i128 64, i128 64>
139  %v2 = trunc <4 x i128> %s to <4 x i64>
140  store <4 x i64> %v2, ptr %res
141  ret void
142}
143
144define void @mulhu_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
145; CHECK-LABEL: mulhu_v4i64:
146; CHECK:       # %bb.0: # %entry
147; CHECK-NEXT:    xvld $xr0, $a1, 0
148; CHECK-NEXT:    xvld $xr1, $a2, 0
149; CHECK-NEXT:    xvmuh.du $xr0, $xr0, $xr1
150; CHECK-NEXT:    xvst $xr0, $a0, 0
151; CHECK-NEXT:    ret
152entry:
153  %v0 = load <4 x i64>, ptr %a0
154  %v1 = load <4 x i64>, ptr %a1
155  %v0z = zext <4 x i64> %v0 to <4 x i128>
156  %v1z = zext <4 x i64> %v1 to <4 x i128>
157  %m = mul <4 x i128> %v0z, %v1z
158  %s = lshr <4 x i128> %m, <i128 64, i128 64, i128 64, i128 64>
159  %v2 = trunc <4 x i128> %s to <4 x i64>
160  store <4 x i64> %v2, ptr %res
161  ret void
162}
163