xref: /llvm-project/llvm/test/CodeGen/AArch64/neon-abd.ll (revision 13d04fa560e156797c21f16b05e950649bfb9dff)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s | FileCheck %s
3
4target triple = "aarch64-unknown-linux-gnu"
5
6;
7; SABD
8;
9
10define <8 x i8> @sabd_8b(<8 x i8> %a, <8 x i8> %b) #0 {
11; CHECK-LABEL: sabd_8b:
12; CHECK:       // %bb.0:
13; CHECK-NEXT:    sabd v0.8b, v0.8b, v1.8b
14; CHECK-NEXT:    ret
15  %a.sext = sext <8 x i8> %a to <8 x i16>
16  %b.sext = sext <8 x i8> %b to <8 x i16>
17  %sub = sub <8 x i16> %a.sext, %b.sext
18  %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
19  %trunc = trunc <8 x i16> %abs to <8 x i8>
20  ret <8 x i8> %trunc
21}
22
23define <16 x i8> @sabd_16b(<16 x i8> %a, <16 x i8> %b) #0 {
24; CHECK-LABEL: sabd_16b:
25; CHECK:       // %bb.0:
26; CHECK-NEXT:    sabd v0.16b, v0.16b, v1.16b
27; CHECK-NEXT:    ret
28  %a.sext = sext <16 x i8> %a to <16 x i16>
29  %b.sext = sext <16 x i8> %b to <16 x i16>
30  %sub = sub <16 x i16> %a.sext, %b.sext
31  %abs = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %sub, i1 true)
32  %trunc = trunc <16 x i16> %abs to <16 x i8>
33  ret <16 x i8> %trunc
34}
35
36define <4 x i16> @sabd_4h(<4 x i16> %a, <4 x i16> %b) #0 {
37; CHECK-LABEL: sabd_4h:
38; CHECK:       // %bb.0:
39; CHECK-NEXT:    sabd v0.4h, v0.4h, v1.4h
40; CHECK-NEXT:    ret
41  %a.sext = sext <4 x i16> %a to <4 x i32>
42  %b.sext = sext <4 x i16> %b to <4 x i32>
43  %sub = sub <4 x i32> %a.sext, %b.sext
44  %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
45  %trunc = trunc <4 x i32> %abs to <4 x i16>
46  ret <4 x i16> %trunc
47}
48
49define <4 x i16> @sabd_4h_promoted_ops(<4 x i8> %a, <4 x i8> %b) #0 {
50; CHECK-LABEL: sabd_4h_promoted_ops:
51; CHECK:       // %bb.0:
52; CHECK-NEXT:    shl v1.4h, v1.4h, #8
53; CHECK-NEXT:    shl v0.4h, v0.4h, #8
54; CHECK-NEXT:    sshr v1.4h, v1.4h, #8
55; CHECK-NEXT:    sshr v0.4h, v0.4h, #8
56; CHECK-NEXT:    sabd v0.4h, v0.4h, v1.4h
57; CHECK-NEXT:    ret
58  %a.sext = sext <4 x i8> %a to <4 x i16>
59  %b.sext = sext <4 x i8> %b to <4 x i16>
60  %sub = sub <4 x i16> %a.sext, %b.sext
61  %abs = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %sub, i1 true)
62  ret <4 x i16> %abs
63}
64
65define <8 x i16> @sabd_8h(<8 x i16> %a, <8 x i16> %b) #0 {
66; CHECK-LABEL: sabd_8h:
67; CHECK:       // %bb.0:
68; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
69; CHECK-NEXT:    ret
70  %a.sext = sext <8 x i16> %a to <8 x i32>
71  %b.sext = sext <8 x i16> %b to <8 x i32>
72  %sub = sub <8 x i32> %a.sext, %b.sext
73  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 true)
74  %trunc = trunc <8 x i32> %abs to <8 x i16>
75  ret <8 x i16> %trunc
76}
77
78define <8 x i16> @sabd_8h_promoted_ops(<8 x i8> %a, <8 x i8> %b) #0 {
79; CHECK-LABEL: sabd_8h_promoted_ops:
80; CHECK:       // %bb.0:
81; CHECK-NEXT:    sabdl v0.8h, v0.8b, v1.8b
82; CHECK-NEXT:    ret
83  %a.sext = sext <8 x i8> %a to <8 x i16>
84  %b.sext = sext <8 x i8> %b to <8 x i16>
85  %sub = sub <8 x i16> %a.sext, %b.sext
86  %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
87  ret <8 x i16> %abs
88}
89
90define <2 x i32> @sabd_2s(<2 x i32> %a, <2 x i32> %b) #0 {
91; CHECK-LABEL: sabd_2s:
92; CHECK:       // %bb.0:
93; CHECK-NEXT:    sabd v0.2s, v0.2s, v1.2s
94; CHECK-NEXT:    ret
95  %a.sext = sext <2 x i32> %a to <2 x i64>
96  %b.sext = sext <2 x i32> %b to <2 x i64>
97  %sub = sub <2 x i64> %a.sext, %b.sext
98  %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
99  %trunc = trunc <2 x i64> %abs to <2 x i32>
100  ret <2 x i32> %trunc
101}
102
103define <2 x i32> @sabd_2s_promoted_ops(<2 x i16> %a, <2 x i16> %b) #0 {
104; CHECK-LABEL: sabd_2s_promoted_ops:
105; CHECK:       // %bb.0:
106; CHECK-NEXT:    shl v1.2s, v1.2s, #16
107; CHECK-NEXT:    shl v0.2s, v0.2s, #16
108; CHECK-NEXT:    sshr v1.2s, v1.2s, #16
109; CHECK-NEXT:    sshr v0.2s, v0.2s, #16
110; CHECK-NEXT:    sabd v0.2s, v0.2s, v1.2s
111; CHECK-NEXT:    ret
112  %a.sext = sext <2 x i16> %a to <2 x i32>
113  %b.sext = sext <2 x i16> %b to <2 x i32>
114  %sub = sub <2 x i32> %a.sext, %b.sext
115  %abs = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %sub, i1 true)
116  ret <2 x i32> %abs
117}
118
119define <4 x i32> @sabd_4s(<4 x i32> %a, <4 x i32> %b) #0 {
120; CHECK-LABEL: sabd_4s:
121; CHECK:       // %bb.0:
122; CHECK-NEXT:    sabd v0.4s, v0.4s, v1.4s
123; CHECK-NEXT:    ret
124  %a.sext = sext <4 x i32> %a to <4 x i64>
125  %b.sext = sext <4 x i32> %b to <4 x i64>
126  %sub = sub <4 x i64> %a.sext, %b.sext
127  %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %sub, i1 true)
128  %trunc = trunc <4 x i64> %abs to <4 x i32>
129  ret <4 x i32> %trunc
130}
131
132define <4 x i32> @sabd_4s_promoted_ops(<4 x i16> %a, <4 x i16> %b) #0 {
133; CHECK-LABEL: sabd_4s_promoted_ops:
134; CHECK:       // %bb.0:
135; CHECK-NEXT:    sabdl v0.4s, v0.4h, v1.4h
136; CHECK-NEXT:    ret
137  %a.sext = sext <4 x i16> %a to <4 x i32>
138  %b.sext = sext <4 x i16> %b to <4 x i32>
139  %sub = sub <4 x i32> %a.sext, %b.sext
140  %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
141  ret <4 x i32> %abs
142}
143
144define <2 x i64> @sabd_2d(<2 x i64> %a, <2 x i64> %b) #0 {
145; CHECK-LABEL: sabd_2d:
146; CHECK:       // %bb.0:
147; CHECK-NEXT:    cmgt v2.2d, v0.2d, v1.2d
148; CHECK-NEXT:    sub v0.2d, v0.2d, v1.2d
149; CHECK-NEXT:    eor v0.16b, v0.16b, v2.16b
150; CHECK-NEXT:    sub v0.2d, v2.2d, v0.2d
151; CHECK-NEXT:    ret
152  %a.sext = sext <2 x i64> %a to <2 x i128>
153  %b.sext = sext <2 x i64> %b to <2 x i128>
154  %sub = sub <2 x i128> %a.sext, %b.sext
155  %abs = call <2 x i128> @llvm.abs.v2i128(<2 x i128> %sub, i1 true)
156  %trunc = trunc <2 x i128> %abs to <2 x i64>
157  ret <2 x i64> %trunc
158}
159
160define <2 x i64> @sabd_2d_promoted_ops(<2 x i32> %a, <2 x i32> %b) #0 {
161; CHECK-LABEL: sabd_2d_promoted_ops:
162; CHECK:       // %bb.0:
163; CHECK-NEXT:    sabdl v0.2d, v0.2s, v1.2s
164; CHECK-NEXT:    ret
165  %a.sext = sext <2 x i32> %a to <2 x i64>
166  %b.sext = sext <2 x i32> %b to <2 x i64>
167  %sub = sub <2 x i64> %a.sext, %b.sext
168  %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
169  ret <2 x i64> %abs
170}
171
172;
173; UABD
174;
175
176define <8 x i8> @uabd_8b(<8 x i8> %a, <8 x i8> %b) #0 {
177; CHECK-LABEL: uabd_8b:
178; CHECK:       // %bb.0:
179; CHECK-NEXT:    uabd v0.8b, v0.8b, v1.8b
180; CHECK-NEXT:    ret
181  %a.zext = zext <8 x i8> %a to <8 x i16>
182  %b.zext = zext <8 x i8> %b to <8 x i16>
183  %sub = sub <8 x i16> %a.zext, %b.zext
184  %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
185  %trunc = trunc <8 x i16> %abs to <8 x i8>
186  ret <8 x i8> %trunc
187}
188
189define <16 x i8> @uabd_16b(<16 x i8> %a, <16 x i8> %b) #0 {
190; CHECK-LABEL: uabd_16b:
191; CHECK:       // %bb.0:
192; CHECK-NEXT:    uabd v0.16b, v0.16b, v1.16b
193; CHECK-NEXT:    ret
194  %a.zext = zext <16 x i8> %a to <16 x i16>
195  %b.zext = zext <16 x i8> %b to <16 x i16>
196  %sub = sub <16 x i16> %a.zext, %b.zext
197  %abs = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %sub, i1 true)
198  %trunc = trunc <16 x i16> %abs to <16 x i8>
199  ret <16 x i8> %trunc
200}
201
202define <4 x i16> @uabd_4h(<4 x i16> %a, <4 x i16> %b) #0 {
203; CHECK-LABEL: uabd_4h:
204; CHECK:       // %bb.0:
205; CHECK-NEXT:    uabd v0.4h, v0.4h, v1.4h
206; CHECK-NEXT:    ret
207  %a.zext = zext <4 x i16> %a to <4 x i32>
208  %b.zext = zext <4 x i16> %b to <4 x i32>
209  %sub = sub <4 x i32> %a.zext, %b.zext
210  %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
211  %trunc = trunc <4 x i32> %abs to <4 x i16>
212  ret <4 x i16> %trunc
213}
214
215define <4 x i16> @uabd_4h_promoted_ops(<4 x i8> %a, <4 x i8> %b) #0 {
216; CHECK-LABEL: uabd_4h_promoted_ops:
217; CHECK:       // %bb.0:
218; CHECK-NEXT:    bic v1.4h, #255, lsl #8
219; CHECK-NEXT:    bic v0.4h, #255, lsl #8
220; CHECK-NEXT:    uabd v0.4h, v0.4h, v1.4h
221; CHECK-NEXT:    ret
222  %a.zext = zext <4 x i8> %a to <4 x i16>
223  %b.zext = zext <4 x i8> %b to <4 x i16>
224  %sub = sub <4 x i16> %a.zext, %b.zext
225  %abs = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %sub, i1 true)
226  ret <4 x i16> %abs
227}
228
229define <8 x i16> @uabd_8h(<8 x i16> %a, <8 x i16> %b) #0 {
230; CHECK-LABEL: uabd_8h:
231; CHECK:       // %bb.0:
232; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
233; CHECK-NEXT:    ret
234  %a.zext = zext <8 x i16> %a to <8 x i32>
235  %b.zext = zext <8 x i16> %b to <8 x i32>
236  %sub = sub <8 x i32> %a.zext, %b.zext
237  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 true)
238  %trunc = trunc <8 x i32> %abs to <8 x i16>
239  ret <8 x i16> %trunc
240}
241
242define <8 x i16> @uabd_8h_promoted_ops(<8 x i8> %a, <8 x i8> %b) #0 {
243; CHECK-LABEL: uabd_8h_promoted_ops:
244; CHECK:       // %bb.0:
245; CHECK-NEXT:    uabdl v0.8h, v0.8b, v1.8b
246; CHECK-NEXT:    ret
247  %a.zext = zext <8 x i8> %a to <8 x i16>
248  %b.zext = zext <8 x i8> %b to <8 x i16>
249  %sub = sub <8 x i16> %a.zext, %b.zext
250  %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
251  ret <8 x i16> %abs
252}
253
254define <2 x i32> @uabd_2s(<2 x i32> %a, <2 x i32> %b) #0 {
255; CHECK-LABEL: uabd_2s:
256; CHECK:       // %bb.0:
257; CHECK-NEXT:    uabd v0.2s, v0.2s, v1.2s
258; CHECK-NEXT:    ret
259  %a.zext = zext <2 x i32> %a to <2 x i64>
260  %b.zext = zext <2 x i32> %b to <2 x i64>
261  %sub = sub <2 x i64> %a.zext, %b.zext
262  %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
263  %trunc = trunc <2 x i64> %abs to <2 x i32>
264  ret <2 x i32> %trunc
265}
266
267define <2 x i32> @uabd_2s_promoted_ops(<2 x i16> %a, <2 x i16> %b) #0 {
268; CHECK-LABEL: uabd_2s_promoted_ops:
269; CHECK:       // %bb.0:
270; CHECK-NEXT:    movi d2, #0x00ffff0000ffff
271; CHECK-NEXT:    and v1.8b, v1.8b, v2.8b
272; CHECK-NEXT:    and v0.8b, v0.8b, v2.8b
273; CHECK-NEXT:    uabd v0.2s, v0.2s, v1.2s
274; CHECK-NEXT:    ret
275  %a.zext = zext <2 x i16> %a to <2 x i32>
276  %b.zext = zext <2 x i16> %b to <2 x i32>
277  %sub = sub <2 x i32> %a.zext, %b.zext
278  %abs = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %sub, i1 true)
279  ret <2 x i32> %abs
280}
281
282define <4 x i32> @uabd_4s(<4 x i32> %a, <4 x i32> %b) #0 {
283; CHECK-LABEL: uabd_4s:
284; CHECK:       // %bb.0:
285; CHECK-NEXT:    uabd v0.4s, v0.4s, v1.4s
286; CHECK-NEXT:    ret
287  %a.zext = zext <4 x i32> %a to <4 x i64>
288  %b.zext = zext <4 x i32> %b to <4 x i64>
289  %sub = sub <4 x i64> %a.zext, %b.zext
290  %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %sub, i1 true)
291  %trunc = trunc <4 x i64> %abs to <4 x i32>
292  ret <4 x i32> %trunc
293}
294
295define <4 x i32> @uabd_4s_promoted_ops(<4 x i16> %a, <4 x i16> %b) #0 {
296; CHECK-LABEL: uabd_4s_promoted_ops:
297; CHECK:       // %bb.0:
298; CHECK-NEXT:    uabdl v0.4s, v0.4h, v1.4h
299; CHECK-NEXT:    ret
300  %a.zext = zext <4 x i16> %a to <4 x i32>
301  %b.zext = zext <4 x i16> %b to <4 x i32>
302  %sub = sub <4 x i32> %a.zext, %b.zext
303  %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
304  ret <4 x i32> %abs
305}
306
307define <2 x i64> @uabd_2d(<2 x i64> %a, <2 x i64> %b) #0 {
308; CHECK-LABEL: uabd_2d:
309; CHECK:       // %bb.0:
310; CHECK-NEXT:    uqsub v2.2d, v1.2d, v0.2d
311; CHECK-NEXT:    uqsub v0.2d, v0.2d, v1.2d
312; CHECK-NEXT:    orr v0.16b, v0.16b, v2.16b
313; CHECK-NEXT:    ret
314  %a.zext = zext <2 x i64> %a to <2 x i128>
315  %b.zext = zext <2 x i64> %b to <2 x i128>
316  %sub = sub <2 x i128> %a.zext, %b.zext
317  %abs = call <2 x i128> @llvm.abs.v2i128(<2 x i128> %sub, i1 true)
318  %trunc = trunc <2 x i128> %abs to <2 x i64>
319  ret <2 x i64> %trunc
320}
321
322define <2 x i64> @uabd_2d_promoted_ops(<2 x i32> %a, <2 x i32> %b) #0 {
323; CHECK-LABEL: uabd_2d_promoted_ops:
324; CHECK:       // %bb.0:
325; CHECK-NEXT:    uabdl v0.2d, v0.2s, v1.2s
326; CHECK-NEXT:    ret
327  %a.zext = zext <2 x i32> %a to <2 x i64>
328  %b.zext = zext <2 x i32> %b to <2 x i64>
329  %sub = sub <2 x i64> %a.zext, %b.zext
330  %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
331  ret <2 x i64> %abs
332}
333
334define <16 x i8> @uabd_v16i8_nuw(<16 x i8> %a, <16 x i8> %b) #0 {
335; CHECK-LABEL: uabd_v16i8_nuw:
336; CHECK:       // %bb.0:
337; CHECK-NEXT:    sub v0.16b, v0.16b, v1.16b
338; CHECK-NEXT:    abs v0.16b, v0.16b
339; CHECK-NEXT:    ret
340  %sub = sub nuw <16 x i8> %a, %b
341  %abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %sub, i1 true)
342  ret <16 x i8> %abs
343}
344
345define <8 x i16> @uabd_v8i16_nuw(<8 x i16> %a, <8 x i16> %b) #0 {
346; CHECK-LABEL: uabd_v8i16_nuw:
347; CHECK:       // %bb.0:
348; CHECK-NEXT:    sub v0.8h, v0.8h, v1.8h
349; CHECK-NEXT:    abs v0.8h, v0.8h
350; CHECK-NEXT:    ret
351  %sub = sub nuw <8 x i16> %a, %b
352  %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
353  ret <8 x i16> %abs
354}
355
356define <4 x i32> @uabd_v4i32_nuw(<4 x i32> %a, <4 x i32> %b) #0 {
357; CHECK-LABEL: uabd_v4i32_nuw:
358; CHECK:       // %bb.0:
359; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
360; CHECK-NEXT:    abs v0.4s, v0.4s
361; CHECK-NEXT:    ret
362  %sub = sub nuw <4 x i32> %a, %b
363  %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
364  ret <4 x i32> %abs
365}
366
367define <2 x i64> @uabd_v2i64_nuw(<2 x i64> %a, <2 x i64> %b) #0 {
368; CHECK-LABEL: uabd_v2i64_nuw:
369; CHECK:       // %bb.0:
370; CHECK-NEXT:    sub v0.2d, v0.2d, v1.2d
371; CHECK-NEXT:    abs v0.2d, v0.2d
372; CHECK-NEXT:    ret
373  %sub = sub nuw <2 x i64> %a, %b
374  %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
375  ret <2 x i64> %abs
376}
377
378define <16 x i8> @sabd_v16i8_nsw(<16 x i8> %a, <16 x i8> %b) #0 {
379; CHECK-LABEL: sabd_v16i8_nsw:
380; CHECK:       // %bb.0:
381; CHECK-NEXT:    sabd v0.16b, v0.16b, v1.16b
382; CHECK-NEXT:    ret
383  %sub = sub nsw <16 x i8> %a, %b
384  %abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %sub, i1 true)
385  ret <16 x i8> %abs
386}
387
388define <8 x i16> @sabd_v8i16_nsw(<8 x i16> %a, <8 x i16> %b) #0 {
389; CHECK-LABEL: sabd_v8i16_nsw:
390; CHECK:       // %bb.0:
391; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
392; CHECK-NEXT:    ret
393  %sub = sub nsw <8 x i16> %a, %b
394  %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
395  ret <8 x i16> %abs
396}
397
398define <4 x i32> @sabd_v4i32_nsw(<4 x i32> %a, <4 x i32> %b) #0 {
399; CHECK-LABEL: sabd_v4i32_nsw:
400; CHECK:       // %bb.0:
401; CHECK-NEXT:    sabd v0.4s, v0.4s, v1.4s
402; CHECK-NEXT:    ret
403  %sub = sub nsw <4 x i32> %a, %b
404  %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
405  ret <4 x i32> %abs
406}
407
408define <2 x i64> @sabd_v2i64_nsw(<2 x i64> %a, <2 x i64> %b) #0 {
409; CHECK-LABEL: sabd_v2i64_nsw:
410; CHECK:       // %bb.0:
411; CHECK-NEXT:    sub v0.2d, v0.2d, v1.2d
412; CHECK-NEXT:    abs v0.2d, v0.2d
413; CHECK-NEXT:    ret
414  %sub = sub nsw <2 x i64> %a, %b
415  %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
416  ret <2 x i64> %abs
417}
418
419define <16 x i8> @smaxmin_v16i8(<16 x i8> %0, <16 x i8> %1) {
420; CHECK-LABEL: smaxmin_v16i8:
421; CHECK:       // %bb.0:
422; CHECK-NEXT:    sabd v0.16b, v0.16b, v1.16b
423; CHECK-NEXT:    ret
424  %a = tail call <16 x i8> @llvm.smax.v16i8(<16 x i8> %0, <16 x i8> %1)
425  %b = tail call <16 x i8> @llvm.smin.v16i8(<16 x i8> %0, <16 x i8> %1)
426  %sub = sub <16 x i8> %a, %b
427  ret <16 x i8> %sub
428}
429
430define <8 x i16> @smaxmin_v8i16(<8 x i16> %0, <8 x i16> %1) {
431; CHECK-LABEL: smaxmin_v8i16:
432; CHECK:       // %bb.0:
433; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
434; CHECK-NEXT:    ret
435  %a = tail call <8 x i16> @llvm.smax.v8i16(<8 x i16> %0, <8 x i16> %1)
436  %b = tail call <8 x i16> @llvm.smin.v8i16(<8 x i16> %0, <8 x i16> %1)
437  %sub = sub <8 x i16> %a, %b
438  ret <8 x i16> %sub
439}
440
441define <4 x i32> @smaxmin_v4i32(<4 x i32> %0, <4 x i32> %1) {
442; CHECK-LABEL: smaxmin_v4i32:
443; CHECK:       // %bb.0:
444; CHECK-NEXT:    sabd v0.4s, v0.4s, v1.4s
445; CHECK-NEXT:    ret
446  %a = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %0, <4 x i32> %1)
447  %b = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %0, <4 x i32> %1)
448  %sub = sub <4 x i32> %a, %b
449  ret <4 x i32> %sub
450}
451
452define <2 x i64> @smaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) {
453; CHECK-LABEL: smaxmin_v2i64:
454; CHECK:       // %bb.0:
455; CHECK-NEXT:    cmgt v2.2d, v0.2d, v1.2d
456; CHECK-NEXT:    sub v0.2d, v0.2d, v1.2d
457; CHECK-NEXT:    eor v0.16b, v0.16b, v2.16b
458; CHECK-NEXT:    sub v0.2d, v2.2d, v0.2d
459; CHECK-NEXT:    ret
460  %a = tail call <2 x i64> @llvm.smax.v2i64(<2 x i64> %0, <2 x i64> %1)
461  %b = tail call <2 x i64> @llvm.smin.v2i64(<2 x i64> %0, <2 x i64> %1)
462  %sub = sub <2 x i64> %a, %b
463  ret <2 x i64> %sub
464}
465
466define <16 x i8> @umaxmin_v16i8(<16 x i8> %0, <16 x i8> %1) {
467; CHECK-LABEL: umaxmin_v16i8:
468; CHECK:       // %bb.0:
469; CHECK-NEXT:    uabd v0.16b, v0.16b, v1.16b
470; CHECK-NEXT:    ret
471  %a = tail call <16 x i8> @llvm.umax.v16i8(<16 x i8> %0, <16 x i8> %1)
472  %b = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %0, <16 x i8> %1)
473  %sub = sub <16 x i8> %a, %b
474  ret <16 x i8> %sub
475}
476
477define <8 x i16> @umaxmin_v8i16(<8 x i16> %0, <8 x i16> %1) {
478; CHECK-LABEL: umaxmin_v8i16:
479; CHECK:       // %bb.0:
480; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
481; CHECK-NEXT:    ret
482  %a = tail call <8 x i16> @llvm.umax.v8i16(<8 x i16> %0, <8 x i16> %1)
483  %b = tail call <8 x i16> @llvm.umin.v8i16(<8 x i16> %0, <8 x i16> %1)
484  %sub = sub <8 x i16> %a, %b
485  ret <8 x i16> %sub
486}
487
488define <4 x i32> @umaxmin_v4i32(<4 x i32> %0, <4 x i32> %1) {
489; CHECK-LABEL: umaxmin_v4i32:
490; CHECK:       // %bb.0:
491; CHECK-NEXT:    uabd v0.4s, v0.4s, v1.4s
492; CHECK-NEXT:    ret
493  %a = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> %0, <4 x i32> %1)
494  %b = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %0, <4 x i32> %1)
495  %sub = sub <4 x i32> %a, %b
496  ret <4 x i32> %sub
497}
498
499define <2 x i64> @umaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) {
500; CHECK-LABEL: umaxmin_v2i64:
501; CHECK:       // %bb.0:
502; CHECK-NEXT:    uqsub v2.2d, v1.2d, v0.2d
503; CHECK-NEXT:    uqsub v0.2d, v0.2d, v1.2d
504; CHECK-NEXT:    orr v0.16b, v0.16b, v2.16b
505; CHECK-NEXT:    ret
506  %a = tail call <2 x i64> @llvm.umax.v2i64(<2 x i64> %0, <2 x i64> %1)
507  %b = tail call <2 x i64> @llvm.umin.v2i64(<2 x i64> %0, <2 x i64> %1)
508  %sub = sub <2 x i64> %a, %b
509  ret <2 x i64> %sub
510}
511
512define <16 x i8> @umaxmin_v16i8_com1(<16 x i8> %0, <16 x i8> %1) {
513; CHECK-LABEL: umaxmin_v16i8_com1:
514; CHECK:       // %bb.0:
515; CHECK-NEXT:    uabd v0.16b, v0.16b, v1.16b
516; CHECK-NEXT:    ret
517  %a = tail call <16 x i8> @llvm.umax.v16i8(<16 x i8> %0, <16 x i8> %1)
518  %b = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %1, <16 x i8> %0)
519  %sub = sub <16 x i8> %a, %b
520  ret <16 x i8> %sub
521}
522
523; (abds x, y) upper bits are known zero if x and y have extra sign bits
524define <4 x i16> @combine_sabd_4h_zerosign(<4 x i16> %a, <4 x i16> %b) #0 {
525; CHECK-LABEL: combine_sabd_4h_zerosign:
526; CHECK:       // %bb.0:
527; CHECK-NEXT:    movi v0.2d, #0000000000000000
528; CHECK-NEXT:    ret
529  %a.ext = ashr <4 x i16> %a, <i16 7, i16 8, i16 9, i16 10>
530  %b.ext = ashr <4 x i16> %b, <i16 11, i16 12, i16 13, i16 14>
531  %max = tail call <4 x i16> @llvm.smax.v4i16(<4 x i16> %a.ext, <4 x i16> %b.ext)
532  %min = tail call <4 x i16> @llvm.smin.v4i16(<4 x i16> %a.ext, <4 x i16> %b.ext)
533  %sub = sub <4 x i16> %max, %min
534  %mask = and <4 x i16> %sub, <i16 32768, i16 32768, i16 32768, i16 32768>
535  ret <4 x i16> %mask
536}
537
538; negative test - mask extends beyond known zero bits
539define <2 x i32> @combine_sabd_2s_zerosign_negative(<2 x i32> %a, <2 x i32> %b) {
540; CHECK-LABEL: combine_sabd_2s_zerosign_negative:
541; CHECK:       // %bb.0:
542; CHECK-NEXT:    sshr v0.2s, v0.2s, #3
543; CHECK-NEXT:    sshr v1.2s, v1.2s, #15
544; CHECK-NEXT:    mvni v2.2s, #7, msl #16
545; CHECK-NEXT:    sabd v0.2s, v0.2s, v1.2s
546; CHECK-NEXT:    and v0.8b, v0.8b, v2.8b
547; CHECK-NEXT:    ret
548  %a.ext = ashr <2 x i32> %a, <i32 3, i32 3>
549  %b.ext = ashr <2 x i32> %b, <i32 15, i32 15>
550  %max = tail call <2 x i32> @llvm.smax.v2i32(<2 x i32> %a.ext, <2 x i32> %b.ext)
551  %min = tail call <2 x i32> @llvm.smin.v2i32(<2 x i32> %a.ext, <2 x i32> %b.ext)
552  %sub = sub <2 x i32> %max, %min
553  %mask = and <2 x i32> %sub, <i32 -524288, i32 -524288> ; 0xFFF80000
554  ret <2 x i32> %mask
555}
556
557declare <8 x i8> @llvm.abs.v8i8(<8 x i8>, i1)
558declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1)
559
560declare <4 x i16> @llvm.abs.v4i16(<4 x i16>, i1)
561declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1)
562declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1)
563
564declare <2 x i32> @llvm.abs.v2i32(<2 x i32>, i1)
565declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1)
566declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1)
567
568declare <2 x i64> @llvm.abs.v2i64(<2 x i64>, i1)
569declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1)
570
571declare <2 x i128> @llvm.abs.v2i128(<2 x i128>, i1)
572
573declare <16 x i8> @llvm.smax.v16i8(<16 x i8>, <16 x i8>)
574declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>)
575declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
576declare <2 x i64> @llvm.smax.v2i64(<2 x i64>, <2 x i64>)
577declare <16 x i8> @llvm.smin.v16i8(<16 x i8>, <16 x i8>)
578declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>)
579declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
580declare <2 x i64> @llvm.smin.v2i64(<2 x i64>, <2 x i64>)
581declare <16 x i8> @llvm.umax.v16i8(<16 x i8>, <16 x i8>)
582declare <8 x i16> @llvm.umax.v8i16(<8 x i16>, <8 x i16>)
583declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>)
584declare <2 x i64> @llvm.umax.v2i64(<2 x i64>, <2 x i64>)
585declare <16 x i8> @llvm.umin.v16i8(<16 x i8>, <16 x i8>)
586declare <8 x i16> @llvm.umin.v8i16(<8 x i16>, <8 x i16>)
587declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>)
588declare <2 x i64> @llvm.umin.v2i64(<2 x i64>, <2 x i64>)
589
590attributes #0 = { "target-features"="+neon" }
591