xref: /llvm-project/llvm/test/CodeGen/AArch64/abd-combine.ll (revision ab037c4ff3452a680efb758ccacb6ee210ce333b)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
3
4define <8 x i16> @abdu_base(<8 x i16> %src1, <8 x i16> %src2) {
5; CHECK-LABEL: abdu_base:
6; CHECK:       // %bb.0:
7; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
8; CHECK-NEXT:    ret
9  %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
10  %zextsrc2 = zext <8 x i16> %src2 to <8 x i32>
11  %sub = sub <8 x i32> %zextsrc1, %zextsrc2
12  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
13  %result = trunc <8 x i32> %abs to <8 x i16>
14  ret <8 x i16> %result
15}
16
17define <8 x i16> @abdu_const(<8 x i16> %src1) {
18; CHECK-LABEL: abdu_const:
19; CHECK:       // %bb.0:
20; CHECK-NEXT:    movi v1.4s, #1
21; CHECK-NEXT:    ushll2 v2.4s, v0.8h, #0
22; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
23; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
24; CHECK-NEXT:    sub v1.4s, v2.4s, v1.4s
25; CHECK-NEXT:    abs v1.4s, v1.4s
26; CHECK-NEXT:    abs v0.4s, v0.4s
27; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
28; CHECK-NEXT:    ret
29  %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
30  %sub = sub <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
31  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
32  %result = trunc <8 x i32> %abs to <8 x i16>
33  ret <8 x i16> %result
34}
35
36define <8 x i16> @abdu_const_lhs(<8 x i16> %src1) {
37; CHECK-LABEL: abdu_const_lhs:
38; CHECK:       // %bb.0:
39; CHECK-NEXT:    movi v1.4s, #1
40; CHECK-NEXT:    usubw v2.4s, v1.4s, v0.4h
41; CHECK-NEXT:    usubw2 v0.4s, v1.4s, v0.8h
42; CHECK-NEXT:    abs v0.4s, v0.4s
43; CHECK-NEXT:    abs v1.4s, v2.4s
44; CHECK-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
45; CHECK-NEXT:    ret
46  %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
47  %sub = sub <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %zextsrc1
48  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
49  %result = trunc <8 x i32> %abs to <8 x i16>
50  ret <8 x i16> %result
51}
52
53define <8 x i16> @abdu_const_zero(<8 x i16> %src1) {
54; CHECK-LABEL: abdu_const_zero:
55; CHECK:       // %bb.0:
56; CHECK-NEXT:    movi v1.2d, #0000000000000000
57; CHECK-NEXT:    ushll v2.4s, v0.4h, #0
58; CHECK-NEXT:    usubw2 v0.4s, v1.4s, v0.8h
59; CHECK-NEXT:    neg v1.4s, v2.4s
60; CHECK-NEXT:    abs v0.4s, v0.4s
61; CHECK-NEXT:    abs v1.4s, v1.4s
62; CHECK-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
63; CHECK-NEXT:    ret
64  %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
65  %sub = sub <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
66  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
67  %result = trunc <8 x i32> %abs to <8 x i16>
68  ret <8 x i16> %result
69}
70
71define <8 x i16> @abdu_const_both() {
72; CHECK-LABEL: abdu_const_both:
73; CHECK:       // %bb.0:
74; CHECK-NEXT:    movi v0.8h, #2
75; CHECK-NEXT:    ret
76  %sub = sub <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
77  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
78  %result = trunc <8 x i32> %abs to <8 x i16>
79  ret <8 x i16> %result
80}
81
82define <8 x i16> @abdu_const_bothhigh() {
83; CHECK-LABEL: abdu_const_bothhigh:
84; CHECK:       // %bb.0:
85; CHECK-NEXT:    movi v0.8h, #1
86; CHECK-NEXT:    ret
87  %zextsrc1 = zext <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534> to <8 x i32>
88  %zextsrc2 = zext <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535> to <8 x i32>
89  %sub = sub <8 x i32> %zextsrc1, %zextsrc2
90  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
91  %result = trunc <8 x i32> %abs to <8 x i16>
92  ret <8 x i16> %result
93}
94
95define <8 x i16> @abdu_undef(<8 x i16> %src1) {
96; CHECK-LABEL: abdu_undef:
97; CHECK:       // %bb.0:
98; CHECK-NEXT:    ret
99  %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
100  %zextsrc2 = zext <8 x i16> undef to <8 x i32>
101  %sub = sub <8 x i32> %zextsrc1, %zextsrc2
102  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
103  %result = trunc <8 x i32> %abs to <8 x i16>
104  ret <8 x i16> %result
105}
106
107define <8 x i16> @abdu_ugt(<8 x i16>, <8 x i16>) {
108; CHECK-LABEL: abdu_ugt:
109; CHECK:       // %bb.0:
110; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
111; CHECK-NEXT:    ret
112  %3 = icmp ugt <8 x i16> %0, %1
113  %4 = sub <8 x i16> %0, %1
114  %5 = sub <8 x i16> %1, %0
115  %6 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %5
116  ret <8 x i16> %6
117}
118
119define <8 x i16> @abdu_uge(<8 x i16>, <8 x i16>) {
120; CHECK-LABEL: abdu_uge:
121; CHECK:       // %bb.0:
122; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
123; CHECK-NEXT:    ret
124  %3 = icmp uge <8 x i16> %0, %1
125  %4 = sub <8 x i16> %0, %1
126  %5 = sub <8 x i16> %1, %0
127  %6 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %5
128  ret <8 x i16> %6
129}
130
131define <8 x i16> @abdu_ult(<8 x i16>, <8 x i16>) {
132; CHECK-LABEL: abdu_ult:
133; CHECK:       // %bb.0:
134; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
135; CHECK-NEXT:    ret
136  %3 = icmp ult <8 x i16> %0, %1
137  %4 = sub <8 x i16> %0, %1
138  %5 = sub <8 x i16> %1, %0
139  %6 = select <8 x i1> %3, <8 x i16> %5, <8 x i16> %4
140  ret <8 x i16> %6
141}
142
143define <8 x i16> @abdu_ule(<8 x i16>, <8 x i16>) {
144; CHECK-LABEL: abdu_ule:
145; CHECK:       // %bb.0:
146; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
147; CHECK-NEXT:    ret
148  %3 = icmp ule <8 x i16> %0, %1
149  %4 = sub <8 x i16> %0, %1
150  %5 = sub <8 x i16> %1, %0
151  %6 = select <8 x i1> %3, <8 x i16> %5, <8 x i16> %4
152  ret <8 x i16> %6
153}
154
155define <8 x i16> @abds_sgt(<8 x i16>, <8 x i16>) {
156; CHECK-LABEL: abds_sgt:
157; CHECK:       // %bb.0:
158; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
159; CHECK-NEXT:    ret
160  %3 = icmp sgt <8 x i16> %0, %1
161  %4 = sub <8 x i16> %0, %1
162  %5 = sub <8 x i16> %1, %0
163  %6 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %5
164  ret <8 x i16> %6
165}
166
167define <8 x i16> @abds_sge(<8 x i16>, <8 x i16>) {
168; CHECK-LABEL: abds_sge:
169; CHECK:       // %bb.0:
170; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
171; CHECK-NEXT:    ret
172  %3 = icmp sge <8 x i16> %0, %1
173  %4 = sub <8 x i16> %0, %1
174  %5 = sub <8 x i16> %1, %0
175  %6 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %5
176  ret <8 x i16> %6
177}
178
179define <8 x i16> @abds_slt(<8 x i16>, <8 x i16>) {
180; CHECK-LABEL: abds_slt:
181; CHECK:       // %bb.0:
182; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
183; CHECK-NEXT:    ret
184  %3 = icmp slt <8 x i16> %0, %1
185  %4 = sub <8 x i16> %0, %1
186  %5 = sub <8 x i16> %1, %0
187  %6 = select <8 x i1> %3, <8 x i16> %5, <8 x i16> %4
188  ret <8 x i16> %6
189}
190
191define <8 x i16> @abds_sle(<8 x i16>, <8 x i16>) {
192; CHECK-LABEL: abds_sle:
193; CHECK:       // %bb.0:
194; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
195; CHECK-NEXT:    ret
196  %3 = icmp sle <8 x i16> %0, %1
197  %4 = sub <8 x i16> %0, %1
198  %5 = sub <8 x i16> %1, %0
199  %6 = select <8 x i1> %3, <8 x i16> %5, <8 x i16> %4
200  ret <8 x i16> %6
201}
202
203
204define <8 x i16> @abdu_i_base(<8 x i16> %src1, <8 x i16> %src2) {
205; CHECK-LABEL: abdu_i_base:
206; CHECK:       // %bb.0:
207; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
208; CHECK-NEXT:    ret
209  %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %src1, <8 x i16> %src2)
210  ret <8 x i16> %result
211}
212
213define <8 x i16> @abdu_i_const(<8 x i16> %src1) {
214; CHECK-LABEL: abdu_i_const:
215; CHECK:       // %bb.0:
216; CHECK-NEXT:    movi v1.8h, #1
217; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
218; CHECK-NEXT:    ret
219  %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %src1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
220  ret <8 x i16> %result
221}
222
223define <8 x i16> @abdu_i_const_lhs(<8 x i16> %src1) {
224; CHECK-LABEL: abdu_i_const_lhs:
225; CHECK:       // %bb.0:
226; CHECK-NEXT:    movi v1.8h, #1
227; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
228; CHECK-NEXT:    ret
229  %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %src1)
230  ret <8 x i16> %result
231}
232
233define <8 x i16> @abdu_i_const_zero(float %t, <8 x i16> %src1) {
234; CHECK-LABEL: abdu_i_const_zero:
235; CHECK:       // %bb.0:
236; CHECK-NEXT:    mov v0.16b, v1.16b
237; CHECK-NEXT:    ret
238  %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
239  ret <8 x i16> %result
240}
241
242define <8 x i16> @abdu_i_const_both() {
243; CHECK-LABEL: abdu_i_const_both:
244; CHECK:       // %bb.0:
245; CHECK-NEXT:    movi v0.8h, #2
246; CHECK-NEXT:    ret
247  %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
248  ret <8 x i16> %result
249}
250
251define <8 x i16> @abdu_i_const_bothhigh() {
252; CHECK-LABEL: abdu_i_const_bothhigh:
253; CHECK:       // %bb.0:
254; CHECK-NEXT:    movi v0.8h, #1
255; CHECK-NEXT:    ret
256  %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>, <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535>)
257  ret <8 x i16> %result
258}
259
260define <8 x i16> @abdu_i_const_onehigh() {
261; CHECK-LABEL: abdu_i_const_onehigh:
262; CHECK:       // %bb.0:
263; CHECK-NEXT:    mov w8, #32765 // =0x7ffd
264; CHECK-NEXT:    dup v0.8h, w8
265; CHECK-NEXT:    ret
266  %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
267  ret <8 x i16> %result
268}
269
270define <8 x i16> @abdu_i_const_oneneg() {
271; CHECK-LABEL: abdu_i_const_oneneg:
272; CHECK:       // %bb.0:
273; CHECK-NEXT:    movi v0.8h, #128, lsl #8
274; CHECK-NEXT:    ret
275  %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>)
276  ret <8 x i16> %result
277}
278
279define <8 x i16> @abdu_i_zero(<8 x i16> %t, <8 x i16> %src1) {
280; CHECK-LABEL: abdu_i_zero:
281; CHECK:       // %bb.0:
282; CHECK-NEXT:    mov v0.16b, v1.16b
283; CHECK-NEXT:    ret
284  %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
285  ret <8 x i16> %result
286}
287
288define <8 x i16> @abdu_i_undef(<8 x i16> %t, <8 x i16> %src1) {
289; CHECK-LABEL: abdu_i_undef:
290; CHECK:       // %bb.0:
291; CHECK-NEXT:    movi v0.2d, #0000000000000000
292; CHECK-NEXT:    ret
293  %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> undef, <8 x i16> %src1)
294  ret <8 x i16> %result
295}
296
297define <8 x i16> @abdu_i_reassoc(<8 x i16> %src1) {
298; CHECK-LABEL: abdu_i_reassoc:
299; CHECK:       // %bb.0:
300; CHECK-NEXT:    movi v1.8h, #3
301; CHECK-NEXT:    movi v2.8h, #1
302; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
303; CHECK-NEXT:    uabd v0.8h, v0.8h, v2.8h
304; CHECK-NEXT:    ret
305  %r1 = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %src1, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
306  %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %r1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
307  ret <8 x i16> %result
308}
309
310
311
312
313
314define <8 x i16> @abds_base(<8 x i16> %src1, <8 x i16> %src2) {
315; CHECK-LABEL: abds_base:
316; CHECK:       // %bb.0:
317; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
318; CHECK-NEXT:    ret
319  %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
320  %zextsrc2 = sext <8 x i16> %src2 to <8 x i32>
321  %sub = sub <8 x i32> %zextsrc1, %zextsrc2
322  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
323  %result = trunc <8 x i32> %abs to <8 x i16>
324  ret <8 x i16> %result
325}
326
327define <8 x i16> @abds_const(<8 x i16> %src1) {
328; CHECK-LABEL: abds_const:
329; CHECK:       // %bb.0:
330; CHECK-NEXT:    movi v1.4s, #1
331; CHECK-NEXT:    sshll2 v2.4s, v0.8h, #0
332; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
333; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
334; CHECK-NEXT:    sub v1.4s, v2.4s, v1.4s
335; CHECK-NEXT:    abs v1.4s, v1.4s
336; CHECK-NEXT:    abs v0.4s, v0.4s
337; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
338; CHECK-NEXT:    ret
339  %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
340  %sub = sub <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
341  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
342  %result = trunc <8 x i32> %abs to <8 x i16>
343  ret <8 x i16> %result
344}
345
346define <8 x i16> @abds_const_lhs(<8 x i16> %src1) {
347; CHECK-LABEL: abds_const_lhs:
348; CHECK:       // %bb.0:
349; CHECK-NEXT:    movi v1.4s, #1
350; CHECK-NEXT:    ssubw v2.4s, v1.4s, v0.4h
351; CHECK-NEXT:    ssubw2 v0.4s, v1.4s, v0.8h
352; CHECK-NEXT:    abs v0.4s, v0.4s
353; CHECK-NEXT:    abs v1.4s, v2.4s
354; CHECK-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
355; CHECK-NEXT:    ret
356  %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
357  %sub = sub <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %zextsrc1
358  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
359  %result = trunc <8 x i32> %abs to <8 x i16>
360  ret <8 x i16> %result
361}
362
363define <8 x i16> @abds_const_zero(<8 x i16> %src1) {
364; CHECK-LABEL: abds_const_zero:
365; CHECK:       // %bb.0:
366; CHECK-NEXT:    movi v1.2d, #0000000000000000
367; CHECK-NEXT:    sshll v2.4s, v0.4h, #0
368; CHECK-NEXT:    ssubw2 v0.4s, v1.4s, v0.8h
369; CHECK-NEXT:    neg v1.4s, v2.4s
370; CHECK-NEXT:    abs v0.4s, v0.4s
371; CHECK-NEXT:    abs v1.4s, v1.4s
372; CHECK-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
373; CHECK-NEXT:    ret
374  %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
375  %sub = sub <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
376  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
377  %result = trunc <8 x i32> %abs to <8 x i16>
378  ret <8 x i16> %result
379}
380
381define <8 x i16> @abds_const_both() {
382; CHECK-LABEL: abds_const_both:
383; CHECK:       // %bb.0:
384; CHECK-NEXT:    movi v0.8h, #2
385; CHECK-NEXT:    ret
386  %sub = sub <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
387  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
388  %result = trunc <8 x i32> %abs to <8 x i16>
389  ret <8 x i16> %result
390}
391
392define <8 x i16> @abds_const_bothhigh() {
393; CHECK-LABEL: abds_const_bothhigh:
394; CHECK:       // %bb.0:
395; CHECK-NEXT:    movi v0.8h, #1
396; CHECK-NEXT:    ret
397  %zextsrc1 = sext <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534> to <8 x i32>
398  %zextsrc2 = sext <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535> to <8 x i32>
399  %sub = sub <8 x i32> %zextsrc1, %zextsrc2
400  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
401  %result = trunc <8 x i32> %abs to <8 x i16>
402  ret <8 x i16> %result
403}
404
405define <8 x i16> @abds_undef(<8 x i16> %src1) {
406; CHECK-LABEL: abds_undef:
407; CHECK:       // %bb.0:
408; CHECK-NEXT:    sshll v1.4s, v0.4h, #0
409; CHECK-NEXT:    sshll2 v0.4s, v0.8h, #0
410; CHECK-NEXT:    abs v0.4s, v0.4s
411; CHECK-NEXT:    abs v1.4s, v1.4s
412; CHECK-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
413; CHECK-NEXT:    ret
414  %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
415  %zextsrc2 = sext <8 x i16> undef to <8 x i32>
416  %sub = sub <8 x i32> %zextsrc1, %zextsrc2
417  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
418  %result = trunc <8 x i32> %abs to <8 x i16>
419  ret <8 x i16> %result
420}
421
422
423
424define <8 x i16> @abds_i_base(<8 x i16> %src1, <8 x i16> %src2) {
425; CHECK-LABEL: abds_i_base:
426; CHECK:       // %bb.0:
427; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
428; CHECK-NEXT:    ret
429  %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %src1, <8 x i16> %src2)
430  ret <8 x i16> %result
431}
432
433define <8 x i16> @abds_i_const(<8 x i16> %src1) {
434; CHECK-LABEL: abds_i_const:
435; CHECK:       // %bb.0:
436; CHECK-NEXT:    movi v1.8h, #1
437; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
438; CHECK-NEXT:    ret
439  %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %src1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
440  ret <8 x i16> %result
441}
442
443define <8 x i16> @abds_i_const_lhs(<8 x i16> %src1) {
444; CHECK-LABEL: abds_i_const_lhs:
445; CHECK:       // %bb.0:
446; CHECK-NEXT:    movi v1.8h, #1
447; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
448; CHECK-NEXT:    ret
449  %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %src1)
450  ret <8 x i16> %result
451}
452
453define <8 x i16> @abds_i_const_zero(<8 x i16> %src1) {
454; CHECK-LABEL: abds_i_const_zero:
455; CHECK:       // %bb.0:
456; CHECK-NEXT:    abs v0.8h, v0.8h
457; CHECK-NEXT:    ret
458  %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
459  ret <8 x i16> %result
460}
461
462define <8 x i16> @abds_i_const_both() {
463; CHECK-LABEL: abds_i_const_both:
464; CHECK:       // %bb.0:
465; CHECK-NEXT:    movi v0.8h, #2
466; CHECK-NEXT:    ret
467  %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
468  ret <8 x i16> %result
469}
470
471define <8 x i16> @abds_i_const_bothhigh() {
472; CHECK-LABEL: abds_i_const_bothhigh:
473; CHECK:       // %bb.0:
474; CHECK-NEXT:    movi v0.8h, #1
475; CHECK-NEXT:    ret
476  %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>)
477  ret <8 x i16> %result
478}
479
480define <8 x i16> @abds_i_const_onehigh() {
481; CHECK-LABEL: abds_i_const_onehigh:
482; CHECK:       // %bb.0:
483; CHECK-NEXT:    mov w8, #32765 // =0x7ffd
484; CHECK-NEXT:    dup v0.8h, w8
485; CHECK-NEXT:    ret
486  %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
487  ret <8 x i16> %result
488}
489
490define <8 x i16> @abds_i_const_oneneg() {
491; CHECK-LABEL: abds_i_const_oneneg:
492; CHECK:       // %bb.0:
493; CHECK-NEXT:    movi v0.8h, #128, lsl #8
494; CHECK-NEXT:    ret
495  %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>)
496  ret <8 x i16> %result
497}
498
499define <8 x i16> @abds_i_zero(<8 x i16> %t, <8 x i16> %src1) {
500; CHECK-LABEL: abds_i_zero:
501; CHECK:       // %bb.0:
502; CHECK-NEXT:    abs v0.8h, v1.8h
503; CHECK-NEXT:    ret
504  %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
505  ret <8 x i16> %result
506}
507
508define <8 x i16> @abds_i_undef(<8 x i16> %t, <8 x i16> %src1) {
509; CHECK-LABEL: abds_i_undef:
510; CHECK:       // %bb.0:
511; CHECK-NEXT:    movi v0.2d, #0000000000000000
512; CHECK-NEXT:    ret
513  %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> undef, <8 x i16> %src1)
514  ret <8 x i16> %result
515}
516
517define <8 x i16> @abds_i_reassoc(<8 x i16> %src1) {
518; CHECK-LABEL: abds_i_reassoc:
519; CHECK:       // %bb.0:
520; CHECK-NEXT:    movi v1.8h, #3
521; CHECK-NEXT:    movi v2.8h, #1
522; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
523; CHECK-NEXT:    sabd v0.8h, v0.8h, v2.8h
524; CHECK-NEXT:    ret
525  %r1 = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %src1, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
526  %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %r1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
527  ret <8 x i16> %result
528}
529
530define <1 x i64> @recursive() {
531; CHECK-LABEL: recursive:
532; CHECK:       // %bb.0:
533; CHECK-NEXT:    movi v0.8b, #254
534; CHECK-NEXT:    ushll v1.8h, v0.8b, #0
535; CHECK-NEXT:    dup v0.8b, v0.b[0]
536; CHECK-NEXT:    saddlp v1.1d, v1.2s
537; CHECK-NEXT:    orr v0.8b, v0.8b, v1.8b
538; CHECK-NEXT:    ret
539  %1 = tail call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> zeroinitializer, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
540  %2 = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
541  %3 = zext <8 x i8> %2 to <8 x i16>
542  %4 = bitcast <8 x i16> %3 to <4 x i32>
543  %5 = shufflevector <4 x i32> %4, <4 x i32> zeroinitializer, <2 x i32> <i32 0, i32 1>
544  %6 = shufflevector <8 x i8> %2, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
545  %7 = bitcast <16 x i8> %6 to <2 x i64>
546  %8 = shufflevector <2 x i64> %7, <2 x i64> zeroinitializer, <1 x i32> zeroinitializer
547  %9 = tail call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> %5)
548  %10 = or <1 x i64> %8, %9
549  ret <1 x i64> %10
550}
551
552declare <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8>, <8 x i8>)
553declare <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32>)
554declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>)
555declare <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16>, <8 x i16>)
556declare <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16>, <8 x i16>)
557declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1)
558