xref: /llvm-project/llvm/test/CodeGen/SystemZ/signbits-intrinsics-binop.ll (revision 0a76f7d9d8c1fc693568ed26420c47d92a6ba0e7)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; Test that DAGCombiner gets helped by ComputeNumSignBitsForTargetNode() with
3; vector intrinsics.
4;
5; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 < %s  | FileCheck %s
6
7declare {<16 x i8>, i32} @llvm.s390.vpkshs(<8 x i16>, <8 x i16>)
8declare {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32>, <4 x i32>)
9declare {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64>, <2 x i64>)
10
11; PACKS_CC: i64 -> i32
12define <4 x i32> @f0() {
13; CHECK-LABEL: f0:
14; CHECK:       # %bb.0:
15; CHECK-NEXT:    larl %r1, .LCPI0_0
16; CHECK-NEXT:    vl %v0, 0(%r1), 3
17; CHECK-NEXT:    vpksgs %v24, %v0, %v0
18; CHECK-NEXT:    br %r14
19  %call = call {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64> <i64 0, i64 1>, <2 x i64> <i64 0, i64 1>)
20  %extr = extractvalue {<4 x i32>, i32} %call, 0
21  %trunc = trunc <4 x i32> %extr to <4 x i16>
22  %ret = sext <4 x i16> %trunc to <4 x i32>
23  ret <4 x i32> %ret
24}
25
26; PACKS_CC: i32 -> i16
27define <8 x i16> @f1() {
28; CHECK-LABEL: f1:
29; CHECK:       # %bb.0:
30; CHECK-NEXT:    larl %r1, .LCPI1_0
31; CHECK-NEXT:    vl %v0, 0(%r1), 3
32; CHECK-NEXT:    vpksfs %v24, %v0, %v0
33; CHECK-NEXT:    br %r14
34  %call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> <i32 0, i32 1, i32 1, i32 0>,
35                                                  <4 x i32> <i32 0, i32 1, i32 1, i32 0>)
36  %extr = extractvalue {<8 x i16>, i32} %call, 0
37  %trunc = trunc <8 x i16> %extr to <8 x i8>
38  %ret = sext <8 x i8> %trunc to <8 x i16>
39  ret <8 x i16> %ret
40}
41
42; PACKS_CC: i16 -> i8
43define <16 x i8> @f2() {
44; CHECK-LABEL: f2:
45; CHECK:       # %bb.0:
46; CHECK-NEXT:    larl %r1, .LCPI2_0
47; CHECK-NEXT:    vl %v0, 0(%r1), 3
48; CHECK-NEXT:    vpkshs %v24, %v0, %v0
49; CHECK-NEXT:    br %r14
50  %call = call {<16 x i8>, i32} @llvm.s390.vpkshs(
51                <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>,
52                <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>)
53  %extr = extractvalue {<16 x i8>, i32} %call, 0
54  %trunc = trunc <16 x i8> %extr to <16 x i4>
55  %ret = sext <16 x i4> %trunc to <16 x i8>
56  ret <16 x i8> %ret
57}
58
59declare {<16 x i8>, i32} @llvm.s390.vpklshs(<8 x i16>, <8 x i16>)
60declare {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32>, <4 x i32>)
61declare {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64>, <2 x i64>)
62
63; PACKLS_CC: i64 -> i32
64define <4 x i32> @f3() {
65; CHECK-LABEL: f3:
66; CHECK:       # %bb.0:
67; CHECK-NEXT:    larl %r1, .LCPI3_0
68; CHECK-NEXT:    vl %v0, 0(%r1), 3
69; CHECK-NEXT:    larl %r1, .LCPI3_1
70; CHECK-NEXT:    vl %v1, 0(%r1), 3
71; CHECK-NEXT:    vpklsgs %v24, %v1, %v0
72; CHECK-NEXT:    br %r14
73  %call = call {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64> <i64 0, i64 1>, <2 x i64> <i64 1, i64 0>)
74  %extr = extractvalue {<4 x i32>, i32} %call, 0
75  %trunc = trunc <4 x i32> %extr to <4 x i16>
76  %ret = sext <4 x i16> %trunc to <4 x i32>
77  ret <4 x i32> %ret
78}
79
80; PACKLS_CC: i32 -> i16
81define <8 x i16> @f4() {
82; CHECK-LABEL: f4:
83; CHECK:       # %bb.0:
84; CHECK-NEXT:    larl %r1, .LCPI4_0
85; CHECK-NEXT:    vl %v0, 0(%r1), 3
86; CHECK-NEXT:    vpklsfs %v24, %v0, %v0
87; CHECK-NEXT:    br %r14
88  %call = call {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32> <i32 0, i32 1, i32 1, i32 0>,
89                                                   <4 x i32> <i32 0, i32 1, i32 1, i32 0>)
90  %extr = extractvalue {<8 x i16>, i32} %call, 0
91  %trunc = trunc <8 x i16> %extr to <8 x i8>
92  %ret = sext <8 x i8> %trunc to <8 x i16>
93  ret <8 x i16> %ret
94}
95
96; PACKLS_CC: i16 -> i8
97define <16 x i8> @f5() {
98; CHECK-LABEL: f5:
99; CHECK:       # %bb.0:
100; CHECK-NEXT:    larl %r1, .LCPI5_0
101; CHECK-NEXT:    vl %v0, 0(%r1), 3
102; CHECK-NEXT:    vpklshs %v24, %v0, %v0
103; CHECK-NEXT:    br %r14
104  %call = call {<16 x i8>, i32} @llvm.s390.vpklshs(
105                <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>,
106                <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>)
107  %extr = extractvalue {<16 x i8>, i32} %call, 0
108  %trunc = trunc <16 x i8> %extr to <16 x i4>
109  %ret = sext <16 x i4> %trunc to <16 x i8>
110  ret <16 x i8> %ret
111}
112
113declare <16 x i8> @llvm.s390.vpksh(<8 x i16>, <8 x i16>)
114declare <8 x i16> @llvm.s390.vpksf(<4 x i32>, <4 x i32>)
115declare <4 x i32> @llvm.s390.vpksg(<2 x i64>, <2 x i64>)
116
117; PACKS: i64 -> i32
118define <4 x i32> @f6() {
119; CHECK-LABEL: f6:
120; CHECK:       # %bb.0:
121; CHECK-NEXT:    larl %r1, .LCPI6_0
122; CHECK-NEXT:    vl %v0, 0(%r1), 3
123; CHECK-NEXT:    larl %r1, .LCPI6_1
124; CHECK-NEXT:    vl %v1, 0(%r1), 3
125; CHECK-NEXT:    vpksg %v24, %v1, %v0
126; CHECK-NEXT:    br %r14
127  %call = call <4 x i32> @llvm.s390.vpksg(<2 x i64> <i64 0, i64 1>, <2 x i64> <i64 1, i64 0>)
128  %trunc = trunc <4 x i32> %call to <4 x i16>
129  %ret = sext <4 x i16> %trunc to <4 x i32>
130  ret <4 x i32> %ret
131}
132
133; PACKS: i32 -> i16
134define <8 x i16> @f7() {
135; CHECK-LABEL: f7:
136; CHECK:       # %bb.0:
137; CHECK-NEXT:    larl %r1, .LCPI7_0
138; CHECK-NEXT:    vl %v0, 0(%r1), 3
139; CHECK-NEXT:    vpksf %v24, %v0, %v0
140; CHECK-NEXT:    br %r14
141  %call = call <8 x i16> @llvm.s390.vpksf(<4 x i32> <i32 0, i32 1, i32 1, i32 0>,
142                                          <4 x i32> <i32 0, i32 1, i32 1, i32 0>)
143  %trunc = trunc <8 x i16> %call to <8 x i8>
144  %ret = sext <8 x i8> %trunc to <8 x i16>
145  ret <8 x i16> %ret
146}
147
148; PACKS: i16 -> i8
149define <16 x i8> @f8() {
150; CHECK-LABEL: f8:
151; CHECK:       # %bb.0:
152; CHECK-NEXT:    larl %r1, .LCPI8_0
153; CHECK-NEXT:    vl %v0, 0(%r1), 3
154; CHECK-NEXT:    vpksh %v24, %v0, %v0
155; CHECK-NEXT:    br %r14
156  %call = call <16 x i8> @llvm.s390.vpksh(
157                <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>,
158                <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>)
159  %trunc = trunc <16 x i8> %call to <16 x i4>
160  %ret = sext <16 x i4> %trunc to <16 x i8>
161  ret <16 x i8> %ret
162}
163
164declare <16 x i8> @llvm.s390.vpklsh(<8 x i16>, <8 x i16>)
165declare <8 x i16> @llvm.s390.vpklsf(<4 x i32>, <4 x i32>)
166declare <4 x i32> @llvm.s390.vpklsg(<2 x i64>, <2 x i64>)
167
168; PACKLS: i64 -> i32
169define <4 x i32> @f9() {
170; CHECK-LABEL: f9:
171; CHECK:       # %bb.0:
172; CHECK-NEXT:    larl %r1, .LCPI9_0
173; CHECK-NEXT:    vl %v0, 0(%r1), 3
174; CHECK-NEXT:    larl %r1, .LCPI9_1
175; CHECK-NEXT:    vl %v1, 0(%r1), 3
176; CHECK-NEXT:    vpklsg %v24, %v1, %v0
177; CHECK-NEXT:    br %r14
178  %call = call <4 x i32> @llvm.s390.vpklsg(<2 x i64> <i64 0, i64 1>, <2 x i64> <i64 1, i64 0>)
179  %trunc = trunc <4 x i32> %call to <4 x i16>
180  %ret = sext <4 x i16> %trunc to <4 x i32>
181  ret <4 x i32> %ret
182}
183
184; PACKLS: i32 -> i16
185define <8 x i16> @f10() {
186; CHECK-LABEL: f10:
187; CHECK:       # %bb.0:
188; CHECK-NEXT:    larl %r1, .LCPI10_0
189; CHECK-NEXT:    vl %v0, 0(%r1), 3
190; CHECK-NEXT:    vpklsf %v24, %v0, %v0
191; CHECK-NEXT:    br %r14
192  %call = call <8 x i16> @llvm.s390.vpklsf(<4 x i32> <i32 0, i32 1, i32 1, i32 0>,
193                                           <4 x i32> <i32 0, i32 1, i32 1, i32 0>)
194  %trunc = trunc <8 x i16> %call to <8 x i8>
195  %ret = sext <8 x i8> %trunc to <8 x i16>
196  ret <8 x i16> %ret
197}
198
199; PACKLS: i16 -> i8
200define <16 x i8> @f11() {
201; CHECK-LABEL: f11:
202; CHECK:       # %bb.0:
203; CHECK-NEXT:    larl %r1, .LCPI11_0
204; CHECK-NEXT:    vl %v0, 0(%r1), 3
205; CHECK-NEXT:    vpklsh %v24, %v0, %v0
206; CHECK-NEXT:    br %r14
207  %call = call <16 x i8> @llvm.s390.vpklsh(
208                <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>,
209                <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>)
210  %trunc = trunc <16 x i8> %call to <16 x i4>
211  %ret = sext <16 x i4> %trunc to <16 x i8>
212  ret <16 x i8> %ret
213}
214
215declare <2 x i64> @llvm.s390.vpdi(<2 x i64>, <2 x i64>, i32)
216
217; VPDI:
218define <2 x i64> @f12() {
219; CHECK-LABEL: f12:
220; CHECK:       # %bb.0:
221; CHECK-NEXT:    larl %r1, .LCPI12_0
222; CHECK-NEXT:    vl %v0, 0(%r1), 3
223; CHECK-NEXT:    larl %r1, .LCPI12_1
224; CHECK-NEXT:    vl %v1, 0(%r1), 3
225; CHECK-NEXT:    vpdi %v24, %v1, %v0, 0
226; CHECK-NEXT:    br %r14
227  %perm = call <2 x i64> @llvm.s390.vpdi(<2 x i64> <i64 0, i64 1>,
228                                         <2 x i64> <i64 1, i64 0>, i32 0)
229  %trunc = trunc <2 x i64> %perm to <2 x i32>
230  %ret = sext <2 x i32> %trunc to <2 x i64>
231  ret <2 x i64> %ret
232}
233
234declare <16 x i8> @llvm.s390.vsldb(<16 x i8>, <16 x i8>, i32)
235
236; VSLDB:
237define <16 x i8> @f13() {
238; CHECK-LABEL: f13:
239; CHECK:       # %bb.0:
240; CHECK-NEXT:    larl %r1, .LCPI13_0
241; CHECK-NEXT:    vl %v0, 0(%r1), 3
242; CHECK-NEXT:    vsldb %v24, %v0, %v0, 1
243; CHECK-NEXT:    br %r14
244  %shfd = call <16 x i8> @llvm.s390.vsldb(<16 x i8>
245                 <i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1,
246                  i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>, <16 x i8>
247                 <i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1,
248                  i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>,
249                  i32 1)
250  %trunc = trunc <16 x i8> %shfd to <16 x i4>
251  %ret = sext <16 x i4> %trunc to <16 x i8>
252  ret <16 x i8> %ret
253}
254
255declare <16 x i8> @llvm.s390.vperm(<16 x i8>, <16 x i8>, <16 x i8>)
256
257; Test VPERM:
258define <16 x i8> @f14() {
259; CHECK-LABEL: f14:
260; CHECK:       # %bb.0:
261; CHECK-NEXT:    larl %r1, .LCPI14_0
262; CHECK-NEXT:    vl %v0, 0(%r1), 3
263; CHECK-NEXT:    vperm %v24, %v0, %v0, %v0
264; CHECK-NEXT:    br %r14
265  %perm = call <16 x i8> @llvm.s390.vperm(
266                  <16 x i8> <i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1,
267                             i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>,
268                  <16 x i8> <i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1,
269                             i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>,
270                  <16 x i8> <i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1,
271                             i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>)
272  %trunc = trunc <16 x i8> %perm to <16 x i4>
273  %ret = sext <16 x i4> %trunc to <16 x i8>
274  ret <16 x i8> %ret
275}
276