xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/abd.ll (revision 13d04fa560e156797c21f16b05e950649bfb9dff)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
4
5;
6; SABD
7;
8
9define <vscale x 16 x i8> @sabd_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
10; CHECK-LABEL: sabd_b:
11; CHECK:       # %bb.0:
12; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
13; CHECK-NEXT:    vmin.vv v12, v8, v10
14; CHECK-NEXT:    vmax.vv v8, v8, v10
15; CHECK-NEXT:    vsub.vv v8, v8, v12
16; CHECK-NEXT:    ret
17  %a.sext = sext <vscale x 16 x i8> %a to <vscale x 16 x i16>
18  %b.sext = sext <vscale x 16 x i8> %b to <vscale x 16 x i16>
19  %sub = sub <vscale x 16 x i16> %a.sext, %b.sext
20  %abs = call <vscale x 16 x i16> @llvm.abs.nxv16i16(<vscale x 16 x i16> %sub, i1 true)
21  %trunc = trunc <vscale x 16 x i16> %abs to <vscale x 16 x i8>
22  ret <vscale x 16 x i8> %trunc
23}
24
25define <vscale x 16 x i8> @sabd_b_promoted_ops(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
26; CHECK-LABEL: sabd_b_promoted_ops:
27; CHECK:       # %bb.0:
28; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
29; CHECK-NEXT:    vmxor.mm v0, v0, v8
30; CHECK-NEXT:    vmv.v.i v8, 0
31; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
32; CHECK-NEXT:    ret
33  %a.sext = sext <vscale x 16 x i1> %a to <vscale x 16 x i8>
34  %b.sext = sext <vscale x 16 x i1> %b to <vscale x 16 x i8>
35  %sub = sub <vscale x 16 x i8> %a.sext, %b.sext
36  %abs = call <vscale x 16 x i8> @llvm.abs.nxv16i8(<vscale x 16 x i8> %sub, i1 true)
37  ret <vscale x 16 x i8> %abs
38}
39
40define <vscale x 8 x i16> @sabd_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
41; CHECK-LABEL: sabd_h:
42; CHECK:       # %bb.0:
43; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
44; CHECK-NEXT:    vmin.vv v12, v8, v10
45; CHECK-NEXT:    vmax.vv v8, v8, v10
46; CHECK-NEXT:    vsub.vv v8, v8, v12
47; CHECK-NEXT:    ret
48  %a.sext = sext <vscale x 8 x i16> %a to <vscale x 8 x i32>
49  %b.sext = sext <vscale x 8 x i16> %b to <vscale x 8 x i32>
50  %sub = sub <vscale x 8 x i32> %a.sext, %b.sext
51  %abs = call <vscale x 8 x i32> @llvm.abs.nxv8i32(<vscale x 8 x i32> %sub, i1 true)
52  %trunc = trunc <vscale x 8 x i32> %abs to <vscale x 8 x i16>
53  ret <vscale x 8 x i16> %trunc
54}
55
56define <vscale x 8 x i16> @sabd_h_promoted_ops(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
57; CHECK-LABEL: sabd_h_promoted_ops:
58; CHECK:       # %bb.0:
59; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
60; CHECK-NEXT:    vmin.vv v10, v8, v9
61; CHECK-NEXT:    vmax.vv v8, v8, v9
62; CHECK-NEXT:    vsub.vv v10, v8, v10
63; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
64; CHECK-NEXT:    vzext.vf2 v8, v10
65; CHECK-NEXT:    ret
66  %a.sext = sext <vscale x 8 x i8> %a to <vscale x 8 x i16>
67  %b.sext = sext <vscale x 8 x i8> %b to <vscale x 8 x i16>
68  %sub = sub <vscale x 8 x i16> %a.sext, %b.sext
69  %abs = call <vscale x 8 x i16> @llvm.abs.nxv8i16(<vscale x 8 x i16> %sub, i1 true)
70  ret <vscale x 8 x i16> %abs
71}
72
73define <vscale x 4 x i32> @sabd_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
74; CHECK-LABEL: sabd_s:
75; CHECK:       # %bb.0:
76; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
77; CHECK-NEXT:    vmin.vv v12, v8, v10
78; CHECK-NEXT:    vmax.vv v8, v8, v10
79; CHECK-NEXT:    vsub.vv v8, v8, v12
80; CHECK-NEXT:    ret
81  %a.sext = sext <vscale x 4 x i32> %a to <vscale x 4 x i64>
82  %b.sext = sext <vscale x 4 x i32> %b to <vscale x 4 x i64>
83  %sub = sub <vscale x 4 x i64> %a.sext, %b.sext
84  %abs = call <vscale x 4 x i64> @llvm.abs.nxv4i64(<vscale x 4 x i64> %sub, i1 true)
85  %trunc = trunc <vscale x 4 x i64> %abs to <vscale x 4 x i32>
86  ret <vscale x 4 x i32> %trunc
87}
88
89define <vscale x 4 x i32> @sabd_s_promoted_ops(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b) {
90; CHECK-LABEL: sabd_s_promoted_ops:
91; CHECK:       # %bb.0:
92; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
93; CHECK-NEXT:    vmin.vv v10, v8, v9
94; CHECK-NEXT:    vmax.vv v8, v8, v9
95; CHECK-NEXT:    vsub.vv v10, v8, v10
96; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
97; CHECK-NEXT:    vzext.vf2 v8, v10
98; CHECK-NEXT:    ret
99  %a.sext = sext <vscale x 4 x i16> %a to <vscale x 4 x i32>
100  %b.sext = sext <vscale x 4 x i16> %b to <vscale x 4 x i32>
101  %sub = sub <vscale x 4 x i32> %a.sext, %b.sext
102  %abs = call <vscale x 4 x i32> @llvm.abs.nxv4i32(<vscale x 4 x i32> %sub, i1 true)
103  ret <vscale x 4 x i32> %abs
104}
105
106; FIXME: Crashes legalization if enabled
107;; define <vscale x 2 x i64> @sabd_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
108;;   %a.sext = sext <vscale x 2 x i64> %a to <vscale x 2 x i128>
109;;   %b.sext = sext <vscale x 2 x i64> %b to <vscale x 2 x i128>
110;;   %sub = sub <vscale x 2 x i128> %a.sext, %b.sext
111;;   %abs = call <vscale x 2 x i128> @llvm.abs.nxv2i128(<vscale x 2 x i128> %sub, i1 true)
112;;   %trunc = trunc <vscale x 2 x i128> %abs to <vscale x 2 x i64>
113;;   ret <vscale x 2 x i64> %trunc
114;; }
115
116define <vscale x 2 x i64> @sabd_d_promoted_ops(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b) {
117; CHECK-LABEL: sabd_d_promoted_ops:
118; CHECK:       # %bb.0:
119; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
120; CHECK-NEXT:    vmin.vv v10, v8, v9
121; CHECK-NEXT:    vmax.vv v8, v8, v9
122; CHECK-NEXT:    vsub.vv v10, v8, v10
123; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
124; CHECK-NEXT:    vzext.vf2 v8, v10
125; CHECK-NEXT:    ret
126  %a.sext = sext <vscale x 2 x i32> %a to <vscale x 2 x i64>
127  %b.sext = sext <vscale x 2 x i32> %b to <vscale x 2 x i64>
128  %sub = sub <vscale x 2 x i64> %a.sext, %b.sext
129  %abs = call <vscale x 2 x i64> @llvm.abs.nxv2i64(<vscale x 2 x i64> %sub, i1 true)
130  ret <vscale x 2 x i64> %abs
131}
132
133;
134; UABD
135;
136
137define <vscale x 16 x i8> @uabd_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
138; CHECK-LABEL: uabd_b:
139; CHECK:       # %bb.0:
140; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
141; CHECK-NEXT:    vminu.vv v12, v8, v10
142; CHECK-NEXT:    vmaxu.vv v8, v8, v10
143; CHECK-NEXT:    vsub.vv v8, v8, v12
144; CHECK-NEXT:    ret
145  %a.zext = zext <vscale x 16 x i8> %a to <vscale x 16 x i16>
146  %b.zext = zext <vscale x 16 x i8> %b to <vscale x 16 x i16>
147  %sub = sub <vscale x 16 x i16> %a.zext, %b.zext
148  %abs = call <vscale x 16 x i16> @llvm.abs.nxv16i16(<vscale x 16 x i16> %sub, i1 true)
149  %trunc = trunc <vscale x 16 x i16> %abs to <vscale x 16 x i8>
150  ret <vscale x 16 x i8> %trunc
151}
152
153define <vscale x 16 x i8> @uabd_b_promoted_ops(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
154; CHECK-LABEL: uabd_b_promoted_ops:
155; CHECK:       # %bb.0:
156; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
157; CHECK-NEXT:    vmxor.mm v0, v0, v8
158; CHECK-NEXT:    vmv.v.i v8, 0
159; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
160; CHECK-NEXT:    ret
161  %a.zext = zext <vscale x 16 x i1> %a to <vscale x 16 x i8>
162  %b.zext = zext <vscale x 16 x i1> %b to <vscale x 16 x i8>
163  %sub = sub <vscale x 16 x i8> %a.zext, %b.zext
164  %abs = call <vscale x 16 x i8> @llvm.abs.nxv16i8(<vscale x 16 x i8> %sub, i1 true)
165  ret <vscale x 16 x i8> %abs
166}
167
168define <vscale x 8 x i16> @uabd_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
169; CHECK-LABEL: uabd_h:
170; CHECK:       # %bb.0:
171; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
172; CHECK-NEXT:    vminu.vv v12, v8, v10
173; CHECK-NEXT:    vmaxu.vv v8, v8, v10
174; CHECK-NEXT:    vsub.vv v8, v8, v12
175; CHECK-NEXT:    ret
176  %a.zext = zext <vscale x 8 x i16> %a to <vscale x 8 x i32>
177  %b.zext = zext <vscale x 8 x i16> %b to <vscale x 8 x i32>
178  %sub = sub <vscale x 8 x i32> %a.zext, %b.zext
179  %abs = call <vscale x 8 x i32> @llvm.abs.nxv8i32(<vscale x 8 x i32> %sub, i1 true)
180  %trunc = trunc <vscale x 8 x i32> %abs to <vscale x 8 x i16>
181  ret <vscale x 8 x i16> %trunc
182}
183
184define <vscale x 8 x i16> @uabd_h_promoted_ops(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
185; CHECK-LABEL: uabd_h_promoted_ops:
186; CHECK:       # %bb.0:
187; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
188; CHECK-NEXT:    vminu.vv v10, v8, v9
189; CHECK-NEXT:    vmaxu.vv v8, v8, v9
190; CHECK-NEXT:    vsub.vv v10, v8, v10
191; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
192; CHECK-NEXT:    vzext.vf2 v8, v10
193; CHECK-NEXT:    ret
194  %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i16>
195  %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i16>
196  %sub = sub <vscale x 8 x i16> %a.zext, %b.zext
197  %abs = call <vscale x 8 x i16> @llvm.abs.nxv8i16(<vscale x 8 x i16> %sub, i1 true)
198  ret <vscale x 8 x i16> %abs
199}
200
201define <vscale x 4 x i32> @uabd_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
202; CHECK-LABEL: uabd_s:
203; CHECK:       # %bb.0:
204; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
205; CHECK-NEXT:    vminu.vv v12, v8, v10
206; CHECK-NEXT:    vmaxu.vv v8, v8, v10
207; CHECK-NEXT:    vsub.vv v8, v8, v12
208; CHECK-NEXT:    ret
209  %a.zext = zext <vscale x 4 x i32> %a to <vscale x 4 x i64>
210  %b.zext = zext <vscale x 4 x i32> %b to <vscale x 4 x i64>
211  %sub = sub <vscale x 4 x i64> %a.zext, %b.zext
212  %abs = call <vscale x 4 x i64> @llvm.abs.nxv4i64(<vscale x 4 x i64> %sub, i1 true)
213  %trunc = trunc <vscale x 4 x i64> %abs to <vscale x 4 x i32>
214  ret <vscale x 4 x i32> %trunc
215}
216
217define <vscale x 4 x i32> @uabd_s_promoted_ops(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b) {
218; CHECK-LABEL: uabd_s_promoted_ops:
219; CHECK:       # %bb.0:
220; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
221; CHECK-NEXT:    vminu.vv v10, v8, v9
222; CHECK-NEXT:    vmaxu.vv v8, v8, v9
223; CHECK-NEXT:    vsub.vv v10, v8, v10
224; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
225; CHECK-NEXT:    vzext.vf2 v8, v10
226; CHECK-NEXT:    ret
227  %a.zext = zext <vscale x 4 x i16> %a to <vscale x 4 x i32>
228  %b.zext = zext <vscale x 4 x i16> %b to <vscale x 4 x i32>
229  %sub = sub <vscale x 4 x i32> %a.zext, %b.zext
230  %abs = call <vscale x 4 x i32> @llvm.abs.nxv4i32(<vscale x 4 x i32> %sub, i1 true)
231  ret <vscale x 4 x i32> %abs
232}
233
234; FIXME: Crashes legalization if enabled
235;; define <vscale x 2 x i64> @uabd_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
236;;   %a.zext = zext <vscale x 2 x i64> %a to <vscale x 2 x i128>
237;;   %b.zext = zext <vscale x 2 x i64> %b to <vscale x 2 x i128>
238;;   %sub = sub <vscale x 2 x i128> %a.zext, %b.zext
239;;   %abs = call <vscale x 2 x i128> @llvm.abs.nxv2i128(<vscale x 2 x i128> %sub, i1 true)
240;;   %trunc = trunc <vscale x 2 x i128> %abs to <vscale x 2 x i64>
241;;   ret <vscale x 2 x i64> %trunc
242;; }
243
244define <vscale x 2 x i64> @uabd_d_promoted_ops(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b) {
245; CHECK-LABEL: uabd_d_promoted_ops:
246; CHECK:       # %bb.0:
247; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
248; CHECK-NEXT:    vminu.vv v10, v8, v9
249; CHECK-NEXT:    vmaxu.vv v8, v8, v9
250; CHECK-NEXT:    vsub.vv v10, v8, v10
251; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
252; CHECK-NEXT:    vzext.vf2 v8, v10
253; CHECK-NEXT:    ret
254  %a.zext = zext <vscale x 2 x i32> %a to <vscale x 2 x i64>
255  %b.zext = zext <vscale x 2 x i32> %b to <vscale x 2 x i64>
256  %sub = sub <vscale x 2 x i64> %a.zext, %b.zext
257  %abs = call <vscale x 2 x i64> @llvm.abs.nxv2i64(<vscale x 2 x i64> %sub, i1 true)
258  ret <vscale x 2 x i64> %abs
259}
260
261; Test the situation where isLegal(ISD::ABD, typeof(%a)) returns true but %a and
262; %b have differing types.
263define <vscale x 4 x i32> @uabd_non_matching_extension(<vscale x 4 x i32> %a, <vscale x 4 x i8> %b) {
264; CHECK-LABEL: uabd_non_matching_extension:
265; CHECK:       # %bb.0:
266; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
267; CHECK-NEXT:    vzext.vf4 v12, v10
268; CHECK-NEXT:    vminu.vv v10, v8, v12
269; CHECK-NEXT:    vmaxu.vv v8, v8, v12
270; CHECK-NEXT:    vsub.vv v8, v8, v10
271; CHECK-NEXT:    ret
272  %a.zext = zext <vscale x 4 x i32> %a to <vscale x 4 x i64>
273  %b.zext = zext <vscale x 4 x i8> %b to <vscale x 4 x i64>
274  %sub = sub <vscale x 4 x i64> %a.zext, %b.zext
275  %abs = call <vscale x 4 x i64> @llvm.abs.nxv4i64(<vscale x 4 x i64> %sub, i1 true)
276  %trunc = trunc <vscale x 4 x i64> %abs to <vscale x 4 x i32>
277  ret <vscale x 4 x i32> %trunc
278}
279
280; Test the situation where isLegal(ISD::ABD, typeof(%a.zext)) returns true but
281; %a and %b have differing types.
282define <vscale x 4 x i32> @uabd_non_matching_promoted_ops(<vscale x 4 x i8> %a, <vscale x 4 x i16> %b) {
283; CHECK-LABEL: uabd_non_matching_promoted_ops:
284; CHECK:       # %bb.0:
285; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
286; CHECK-NEXT:    vzext.vf2 v10, v8
287; CHECK-NEXT:    vminu.vv v8, v10, v9
288; CHECK-NEXT:    vmaxu.vv v9, v10, v9
289; CHECK-NEXT:    vsub.vv v10, v9, v8
290; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
291; CHECK-NEXT:    vzext.vf2 v8, v10
292; CHECK-NEXT:    ret
293  %a.zext = zext <vscale x 4 x i8> %a to <vscale x 4 x i32>
294  %b.zext = zext <vscale x 4 x i16> %b to <vscale x 4 x i32>
295  %sub = sub <vscale x 4 x i32> %a.zext, %b.zext
296  %abs = call <vscale x 4 x i32> @llvm.abs.nxv4i32(<vscale x 4 x i32> %sub, i1 true)
297  ret <vscale x 4 x i32> %abs
298}
299
300; Test the situation where isLegal(ISD::ABD, typeof(%a)) returns true but %a and
301; %b are promoted differently.
302define <vscale x 4 x i32> @uabd_non_matching_promotion(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b) {
303; CHECK-LABEL: uabd_non_matching_promotion:
304; CHECK:       # %bb.0:
305; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
306; CHECK-NEXT:    vzext.vf4 v10, v8
307; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
308; CHECK-NEXT:    vsext.vf2 v8, v9
309; CHECK-NEXT:    vwsub.wv v10, v10, v8
310; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
311; CHECK-NEXT:    vrsub.vi v8, v10, 0
312; CHECK-NEXT:    vmax.vv v8, v10, v8
313; CHECK-NEXT:    ret
314  %a.zext = zext <vscale x 4 x i8> %a to <vscale x 4 x i32>
315  %b.zext = sext <vscale x 4 x i8> %b to <vscale x 4 x i32>
316  %sub = sub <vscale x 4 x i32> %a.zext, %b.zext
317  %abs = call <vscale x 4 x i32> @llvm.abs.nxv4i32(<vscale x 4 x i32> %sub, i1 true)
318  ret <vscale x 4 x i32> %abs
319}
320
321declare <vscale x 16 x i8> @llvm.abs.nxv16i8(<vscale x 16 x i8>, i1)
322
323declare <vscale x 8 x i16> @llvm.abs.nxv8i16(<vscale x 8 x i16>, i1)
324declare <vscale x 16 x i16> @llvm.abs.nxv16i16(<vscale x 16 x i16>, i1)
325
326declare <vscale x 4 x i32> @llvm.abs.nxv4i32(<vscale x 4 x i32>, i1)
327declare <vscale x 8 x i32> @llvm.abs.nxv8i32(<vscale x 8 x i32>, i1)
328
329declare <vscale x 2 x i64> @llvm.abs.nxv2i64(<vscale x 2 x i64>, i1)
330declare <vscale x 4 x i64> @llvm.abs.nxv4i64(<vscale x 4 x i64>, i1)
331
332declare <vscale x 2 x i128> @llvm.abs.nxv2i128(<vscale x 2 x i128>, i1)
333;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
334; RV32: {{.*}}
335; RV64: {{.*}}
336