xref: /llvm-project/llvm/test/CodeGen/AArch64/usub_sat_vec.ll (revision c537c752787e9da8bd8762dd5298a152f546861b)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4
5; CHECK-GI:       warning: Instruction selection used fallback path for v16i4
6; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for v16i1
7
8declare <1 x i8> @llvm.usub.sat.v1i8(<1 x i8>, <1 x i8>)
9declare <2 x i8> @llvm.usub.sat.v2i8(<2 x i8>, <2 x i8>)
10declare <4 x i8> @llvm.usub.sat.v4i8(<4 x i8>, <4 x i8>)
11declare <8 x i8> @llvm.usub.sat.v8i8(<8 x i8>, <8 x i8>)
12declare <12 x i8> @llvm.usub.sat.v12i8(<12 x i8>, <12 x i8>)
13declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>)
14declare <32 x i8> @llvm.usub.sat.v32i8(<32 x i8>, <32 x i8>)
15declare <64 x i8> @llvm.usub.sat.v64i8(<64 x i8>, <64 x i8>)
16
17declare <1 x i16> @llvm.usub.sat.v1i16(<1 x i16>, <1 x i16>)
18declare <2 x i16> @llvm.usub.sat.v2i16(<2 x i16>, <2 x i16>)
19declare <4 x i16> @llvm.usub.sat.v4i16(<4 x i16>, <4 x i16>)
20declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>)
21declare <12 x i16> @llvm.usub.sat.v12i16(<12 x i16>, <12 x i16>)
22declare <16 x i16> @llvm.usub.sat.v16i16(<16 x i16>, <16 x i16>)
23declare <32 x i16> @llvm.usub.sat.v32i16(<32 x i16>, <32 x i16>)
24
25declare <16 x i1> @llvm.usub.sat.v16i1(<16 x i1>, <16 x i1>)
26declare <16 x i4> @llvm.usub.sat.v16i4(<16 x i4>, <16 x i4>)
27
28declare <2 x i32> @llvm.usub.sat.v2i32(<2 x i32>, <2 x i32>)
29declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>)
30declare <8 x i32> @llvm.usub.sat.v8i32(<8 x i32>, <8 x i32>)
31declare <16 x i32> @llvm.usub.sat.v16i32(<16 x i32>, <16 x i32>)
32declare <2 x i64> @llvm.usub.sat.v2i64(<2 x i64>, <2 x i64>)
33declare <4 x i64> @llvm.usub.sat.v4i64(<4 x i64>, <4 x i64>)
34declare <8 x i64> @llvm.usub.sat.v8i64(<8 x i64>, <8 x i64>)
35
36declare <4 x i24> @llvm.usub.sat.v4i24(<4 x i24>, <4 x i24>)
37declare <2 x i128> @llvm.usub.sat.v2i128(<2 x i128>, <2 x i128>)
38
39
40define <16 x i8> @v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
41; CHECK-LABEL: v16i8:
42; CHECK:       // %bb.0:
43; CHECK-NEXT:    uqsub v0.16b, v0.16b, v1.16b
44; CHECK-NEXT:    ret
45  %z = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
46  ret <16 x i8> %z
47}
48
49define <32 x i8> @v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
50; CHECK-SD-LABEL: v32i8:
51; CHECK-SD:       // %bb.0:
52; CHECK-SD-NEXT:    uqsub v1.16b, v1.16b, v3.16b
53; CHECK-SD-NEXT:    uqsub v0.16b, v0.16b, v2.16b
54; CHECK-SD-NEXT:    ret
55;
56; CHECK-GI-LABEL: v32i8:
57; CHECK-GI:       // %bb.0:
58; CHECK-GI-NEXT:    uqsub v0.16b, v0.16b, v2.16b
59; CHECK-GI-NEXT:    uqsub v1.16b, v1.16b, v3.16b
60; CHECK-GI-NEXT:    ret
61  %z = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %x, <32 x i8> %y)
62  ret <32 x i8> %z
63}
64
65define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
66; CHECK-SD-LABEL: v64i8:
67; CHECK-SD:       // %bb.0:
68; CHECK-SD-NEXT:    uqsub v2.16b, v2.16b, v6.16b
69; CHECK-SD-NEXT:    uqsub v0.16b, v0.16b, v4.16b
70; CHECK-SD-NEXT:    uqsub v1.16b, v1.16b, v5.16b
71; CHECK-SD-NEXT:    uqsub v3.16b, v3.16b, v7.16b
72; CHECK-SD-NEXT:    ret
73;
74; CHECK-GI-LABEL: v64i8:
75; CHECK-GI:       // %bb.0:
76; CHECK-GI-NEXT:    uqsub v0.16b, v0.16b, v4.16b
77; CHECK-GI-NEXT:    uqsub v1.16b, v1.16b, v5.16b
78; CHECK-GI-NEXT:    uqsub v2.16b, v2.16b, v6.16b
79; CHECK-GI-NEXT:    uqsub v3.16b, v3.16b, v7.16b
80; CHECK-GI-NEXT:    ret
81  %z = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> %x, <64 x i8> %y)
82  ret <64 x i8> %z
83}
84
85define <8 x i16> @v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
86; CHECK-LABEL: v8i16:
87; CHECK:       // %bb.0:
88; CHECK-NEXT:    uqsub v0.8h, v0.8h, v1.8h
89; CHECK-NEXT:    ret
90  %z = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
91  ret <8 x i16> %z
92}
93
94define <16 x i16> @v16i16(<16 x i16> %x, <16 x i16> %y) nounwind {
95; CHECK-SD-LABEL: v16i16:
96; CHECK-SD:       // %bb.0:
97; CHECK-SD-NEXT:    uqsub v1.8h, v1.8h, v3.8h
98; CHECK-SD-NEXT:    uqsub v0.8h, v0.8h, v2.8h
99; CHECK-SD-NEXT:    ret
100;
101; CHECK-GI-LABEL: v16i16:
102; CHECK-GI:       // %bb.0:
103; CHECK-GI-NEXT:    uqsub v0.8h, v0.8h, v2.8h
104; CHECK-GI-NEXT:    uqsub v1.8h, v1.8h, v3.8h
105; CHECK-GI-NEXT:    ret
106  %z = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %x, <16 x i16> %y)
107  ret <16 x i16> %z
108}
109
110define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
111; CHECK-SD-LABEL: v32i16:
112; CHECK-SD:       // %bb.0:
113; CHECK-SD-NEXT:    uqsub v2.8h, v2.8h, v6.8h
114; CHECK-SD-NEXT:    uqsub v0.8h, v0.8h, v4.8h
115; CHECK-SD-NEXT:    uqsub v1.8h, v1.8h, v5.8h
116; CHECK-SD-NEXT:    uqsub v3.8h, v3.8h, v7.8h
117; CHECK-SD-NEXT:    ret
118;
119; CHECK-GI-LABEL: v32i16:
120; CHECK-GI:       // %bb.0:
121; CHECK-GI-NEXT:    uqsub v0.8h, v0.8h, v4.8h
122; CHECK-GI-NEXT:    uqsub v1.8h, v1.8h, v5.8h
123; CHECK-GI-NEXT:    uqsub v2.8h, v2.8h, v6.8h
124; CHECK-GI-NEXT:    uqsub v3.8h, v3.8h, v7.8h
125; CHECK-GI-NEXT:    ret
126  %z = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> %x, <32 x i16> %y)
127  ret <32 x i16> %z
128}
129
130define void @v8i8(ptr %px, ptr %py, ptr %pz) nounwind {
131; CHECK-LABEL: v8i8:
132; CHECK:       // %bb.0:
133; CHECK-NEXT:    ldr d0, [x0]
134; CHECK-NEXT:    ldr d1, [x1]
135; CHECK-NEXT:    uqsub v0.8b, v0.8b, v1.8b
136; CHECK-NEXT:    str d0, [x2]
137; CHECK-NEXT:    ret
138  %x = load <8 x i8>, ptr %px
139  %y = load <8 x i8>, ptr %py
140  %z = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> %x, <8 x i8> %y)
141  store <8 x i8> %z, ptr %pz
142  ret void
143}
144
145define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind {
146; CHECK-SD-LABEL: v4i8:
147; CHECK-SD:       // %bb.0:
148; CHECK-SD-NEXT:    ldr s0, [x0]
149; CHECK-SD-NEXT:    ldr s1, [x1]
150; CHECK-SD-NEXT:    ushll v0.8h, v0.8b, #0
151; CHECK-SD-NEXT:    ushll v1.8h, v1.8b, #0
152; CHECK-SD-NEXT:    uqsub v0.4h, v0.4h, v1.4h
153; CHECK-SD-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
154; CHECK-SD-NEXT:    str s0, [x2]
155; CHECK-SD-NEXT:    ret
156;
157; CHECK-GI-LABEL: v4i8:
158; CHECK-GI:       // %bb.0:
159; CHECK-GI-NEXT:    ldr w8, [x0]
160; CHECK-GI-NEXT:    ldr w9, [x1]
161; CHECK-GI-NEXT:    fmov s0, w8
162; CHECK-GI-NEXT:    fmov s1, w9
163; CHECK-GI-NEXT:    mov b2, v0.b[1]
164; CHECK-GI-NEXT:    mov v3.b[0], v0.b[0]
165; CHECK-GI-NEXT:    mov b4, v1.b[1]
166; CHECK-GI-NEXT:    mov v5.b[0], v1.b[0]
167; CHECK-GI-NEXT:    mov v3.b[1], v2.b[0]
168; CHECK-GI-NEXT:    mov b2, v0.b[2]
169; CHECK-GI-NEXT:    mov b0, v0.b[3]
170; CHECK-GI-NEXT:    mov v5.b[1], v4.b[0]
171; CHECK-GI-NEXT:    mov b4, v1.b[2]
172; CHECK-GI-NEXT:    mov b1, v1.b[3]
173; CHECK-GI-NEXT:    mov v3.b[2], v2.b[0]
174; CHECK-GI-NEXT:    mov v5.b[2], v4.b[0]
175; CHECK-GI-NEXT:    mov v3.b[3], v0.b[0]
176; CHECK-GI-NEXT:    mov v5.b[3], v1.b[0]
177; CHECK-GI-NEXT:    uqsub v0.8b, v3.8b, v5.8b
178; CHECK-GI-NEXT:    fmov w8, s0
179; CHECK-GI-NEXT:    str w8, [x2]
180; CHECK-GI-NEXT:    ret
181  %x = load <4 x i8>, ptr %px
182  %y = load <4 x i8>, ptr %py
183  %z = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> %x, <4 x i8> %y)
184  store <4 x i8> %z, ptr %pz
185  ret void
186}
187
188define void @v2i8(ptr %px, ptr %py, ptr %pz) nounwind {
189; CHECK-SD-LABEL: v2i8:
190; CHECK-SD:       // %bb.0:
191; CHECK-SD-NEXT:    ldrb w8, [x0]
192; CHECK-SD-NEXT:    ldrb w9, [x1]
193; CHECK-SD-NEXT:    ldrb w10, [x0, #1]
194; CHECK-SD-NEXT:    ldrb w11, [x1, #1]
195; CHECK-SD-NEXT:    fmov s0, w8
196; CHECK-SD-NEXT:    fmov s1, w9
197; CHECK-SD-NEXT:    mov v0.s[1], w10
198; CHECK-SD-NEXT:    mov v1.s[1], w11
199; CHECK-SD-NEXT:    uqsub v0.2s, v0.2s, v1.2s
200; CHECK-SD-NEXT:    mov w8, v0.s[1]
201; CHECK-SD-NEXT:    fmov w9, s0
202; CHECK-SD-NEXT:    strb w9, [x2]
203; CHECK-SD-NEXT:    strb w8, [x2, #1]
204; CHECK-SD-NEXT:    ret
205;
206; CHECK-GI-LABEL: v2i8:
207; CHECK-GI:       // %bb.0:
208; CHECK-GI-NEXT:    ldr b0, [x0]
209; CHECK-GI-NEXT:    ldr b1, [x1]
210; CHECK-GI-NEXT:    add x8, x2, #1
211; CHECK-GI-NEXT:    ldr b2, [x0, #1]
212; CHECK-GI-NEXT:    ldr b3, [x1, #1]
213; CHECK-GI-NEXT:    mov v0.b[0], v0.b[0]
214; CHECK-GI-NEXT:    mov v1.b[0], v1.b[0]
215; CHECK-GI-NEXT:    mov v0.b[1], v2.b[0]
216; CHECK-GI-NEXT:    mov v1.b[1], v3.b[0]
217; CHECK-GI-NEXT:    uqsub v0.8b, v0.8b, v1.8b
218; CHECK-GI-NEXT:    st1 { v0.b }[0], [x2]
219; CHECK-GI-NEXT:    st1 { v0.b }[1], [x8]
220; CHECK-GI-NEXT:    ret
221  %x = load <2 x i8>, ptr %px
222  %y = load <2 x i8>, ptr %py
223  %z = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %x, <2 x i8> %y)
224  store <2 x i8> %z, ptr %pz
225  ret void
226}
227
228define void @v4i16(ptr %px, ptr %py, ptr %pz) nounwind {
229; CHECK-LABEL: v4i16:
230; CHECK:       // %bb.0:
231; CHECK-NEXT:    ldr d0, [x0]
232; CHECK-NEXT:    ldr d1, [x1]
233; CHECK-NEXT:    uqsub v0.4h, v0.4h, v1.4h
234; CHECK-NEXT:    str d0, [x2]
235; CHECK-NEXT:    ret
236  %x = load <4 x i16>, ptr %px
237  %y = load <4 x i16>, ptr %py
238  %z = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> %x, <4 x i16> %y)
239  store <4 x i16> %z, ptr %pz
240  ret void
241}
242
243define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind {
244; CHECK-SD-LABEL: v2i16:
245; CHECK-SD:       // %bb.0:
246; CHECK-SD-NEXT:    ldrh w8, [x0]
247; CHECK-SD-NEXT:    ldrh w9, [x1]
248; CHECK-SD-NEXT:    ldrh w10, [x0, #2]
249; CHECK-SD-NEXT:    ldrh w11, [x1, #2]
250; CHECK-SD-NEXT:    fmov s0, w8
251; CHECK-SD-NEXT:    fmov s1, w9
252; CHECK-SD-NEXT:    mov v0.s[1], w10
253; CHECK-SD-NEXT:    mov v1.s[1], w11
254; CHECK-SD-NEXT:    uqsub v0.2s, v0.2s, v1.2s
255; CHECK-SD-NEXT:    mov w8, v0.s[1]
256; CHECK-SD-NEXT:    fmov w9, s0
257; CHECK-SD-NEXT:    strh w9, [x2]
258; CHECK-SD-NEXT:    strh w8, [x2, #2]
259; CHECK-SD-NEXT:    ret
260;
261; CHECK-GI-LABEL: v2i16:
262; CHECK-GI:       // %bb.0:
263; CHECK-GI-NEXT:    ldr h0, [x0]
264; CHECK-GI-NEXT:    ldr h1, [x1]
265; CHECK-GI-NEXT:    add x8, x0, #2
266; CHECK-GI-NEXT:    add x9, x1, #2
267; CHECK-GI-NEXT:    ld1 { v0.h }[1], [x8]
268; CHECK-GI-NEXT:    ld1 { v1.h }[1], [x9]
269; CHECK-GI-NEXT:    add x8, x2, #2
270; CHECK-GI-NEXT:    uqsub v0.4h, v0.4h, v1.4h
271; CHECK-GI-NEXT:    str h0, [x2]
272; CHECK-GI-NEXT:    st1 { v0.h }[1], [x8]
273; CHECK-GI-NEXT:    ret
274  %x = load <2 x i16>, ptr %px
275  %y = load <2 x i16>, ptr %py
276  %z = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> %x, <2 x i16> %y)
277  store <2 x i16> %z, ptr %pz
278  ret void
279}
280
281define <12 x i8> @v12i8(<12 x i8> %x, <12 x i8> %y) nounwind {
282; CHECK-LABEL: v12i8:
283; CHECK:       // %bb.0:
284; CHECK-NEXT:    uqsub v0.16b, v0.16b, v1.16b
285; CHECK-NEXT:    ret
286  %z = call <12 x i8> @llvm.usub.sat.v12i8(<12 x i8> %x, <12 x i8> %y)
287  ret <12 x i8> %z
288}
289
290define void @v12i16(ptr %px, ptr %py, ptr %pz) nounwind {
291; CHECK-SD-LABEL: v12i16:
292; CHECK-SD:       // %bb.0:
293; CHECK-SD-NEXT:    ldp q0, q3, [x1]
294; CHECK-SD-NEXT:    ldp q1, q2, [x0]
295; CHECK-SD-NEXT:    uqsub v0.8h, v1.8h, v0.8h
296; CHECK-SD-NEXT:    uqsub v1.8h, v2.8h, v3.8h
297; CHECK-SD-NEXT:    str q0, [x2]
298; CHECK-SD-NEXT:    str d1, [x2, #16]
299; CHECK-SD-NEXT:    ret
300;
301; CHECK-GI-LABEL: v12i16:
302; CHECK-GI:       // %bb.0:
303; CHECK-GI-NEXT:    ldr q0, [x0]
304; CHECK-GI-NEXT:    ldr q1, [x1]
305; CHECK-GI-NEXT:    ldr d2, [x0, #16]
306; CHECK-GI-NEXT:    ldr d3, [x1, #16]
307; CHECK-GI-NEXT:    uqsub v0.8h, v0.8h, v1.8h
308; CHECK-GI-NEXT:    uqsub v1.4h, v2.4h, v3.4h
309; CHECK-GI-NEXT:    str q0, [x2]
310; CHECK-GI-NEXT:    str d1, [x2, #16]
311; CHECK-GI-NEXT:    ret
312  %x = load <12 x i16>, ptr %px
313  %y = load <12 x i16>, ptr %py
314  %z = call <12 x i16> @llvm.usub.sat.v12i16(<12 x i16> %x, <12 x i16> %y)
315  store <12 x i16> %z, ptr %pz
316  ret void
317}
318
319define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind {
320; CHECK-SD-LABEL: v1i8:
321; CHECK-SD:       // %bb.0:
322; CHECK-SD-NEXT:    ldr b0, [x0]
323; CHECK-SD-NEXT:    ldr b1, [x1]
324; CHECK-SD-NEXT:    uqsub v0.8b, v0.8b, v1.8b
325; CHECK-SD-NEXT:    st1 { v0.b }[0], [x2]
326; CHECK-SD-NEXT:    ret
327;
328; CHECK-GI-LABEL: v1i8:
329; CHECK-GI:       // %bb.0:
330; CHECK-GI-NEXT:    ldrb w8, [x0]
331; CHECK-GI-NEXT:    ldrb w9, [x1]
332; CHECK-GI-NEXT:    sub w8, w8, w9
333; CHECK-GI-NEXT:    cmp w8, w8, uxtb
334; CHECK-GI-NEXT:    csel w8, wzr, w8, ne
335; CHECK-GI-NEXT:    strb w8, [x2]
336; CHECK-GI-NEXT:    ret
337  %x = load <1 x i8>, ptr %px
338  %y = load <1 x i8>, ptr %py
339  %z = call <1 x i8> @llvm.usub.sat.v1i8(<1 x i8> %x, <1 x i8> %y)
340  store <1 x i8> %z, ptr %pz
341  ret void
342}
343
344define void @v1i16(ptr %px, ptr %py, ptr %pz) nounwind {
345; CHECK-SD-LABEL: v1i16:
346; CHECK-SD:       // %bb.0:
347; CHECK-SD-NEXT:    ldr h0, [x0]
348; CHECK-SD-NEXT:    ldr h1, [x1]
349; CHECK-SD-NEXT:    uqsub v0.4h, v0.4h, v1.4h
350; CHECK-SD-NEXT:    str h0, [x2]
351; CHECK-SD-NEXT:    ret
352;
353; CHECK-GI-LABEL: v1i16:
354; CHECK-GI:       // %bb.0:
355; CHECK-GI-NEXT:    ldrh w8, [x0]
356; CHECK-GI-NEXT:    ldrh w9, [x1]
357; CHECK-GI-NEXT:    sub w8, w8, w9
358; CHECK-GI-NEXT:    cmp w8, w8, uxth
359; CHECK-GI-NEXT:    csel w8, wzr, w8, ne
360; CHECK-GI-NEXT:    strh w8, [x2]
361; CHECK-GI-NEXT:    ret
362  %x = load <1 x i16>, ptr %px
363  %y = load <1 x i16>, ptr %py
364  %z = call <1 x i16> @llvm.usub.sat.v1i16(<1 x i16> %x, <1 x i16> %y)
365  store <1 x i16> %z, ptr %pz
366  ret void
367}
368
369define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
370; CHECK-LABEL: v16i4:
371; CHECK:       // %bb.0:
372; CHECK-NEXT:    movi v2.16b, #15
373; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
374; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
375; CHECK-NEXT:    uqsub v0.16b, v0.16b, v1.16b
376; CHECK-NEXT:    ret
377  %z = call <16 x i4> @llvm.usub.sat.v16i4(<16 x i4> %x, <16 x i4> %y)
378  ret <16 x i4> %z
379}
380
381define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
382; CHECK-LABEL: v16i1:
383; CHECK:       // %bb.0:
384; CHECK-NEXT:    movi v2.16b, #1
385; CHECK-NEXT:    eor v1.16b, v1.16b, v2.16b
386; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
387; CHECK-NEXT:    ret
388  %z = call <16 x i1> @llvm.usub.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
389  ret <16 x i1> %z
390}
391
392define <2 x i32> @v2i32(<2 x i32> %x, <2 x i32> %y) nounwind {
393; CHECK-LABEL: v2i32:
394; CHECK:       // %bb.0:
395; CHECK-NEXT:    uqsub v0.2s, v0.2s, v1.2s
396; CHECK-NEXT:    ret
397  %z = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %x, <2 x i32> %y)
398  ret <2 x i32> %z
399}
400
401define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
402; CHECK-LABEL: v4i32:
403; CHECK:       // %bb.0:
404; CHECK-NEXT:    uqsub v0.4s, v0.4s, v1.4s
405; CHECK-NEXT:    ret
406  %z = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
407  ret <4 x i32> %z
408}
409
410define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
411; CHECK-SD-LABEL: v8i32:
412; CHECK-SD:       // %bb.0:
413; CHECK-SD-NEXT:    uqsub v1.4s, v1.4s, v3.4s
414; CHECK-SD-NEXT:    uqsub v0.4s, v0.4s, v2.4s
415; CHECK-SD-NEXT:    ret
416;
417; CHECK-GI-LABEL: v8i32:
418; CHECK-GI:       // %bb.0:
419; CHECK-GI-NEXT:    uqsub v0.4s, v0.4s, v2.4s
420; CHECK-GI-NEXT:    uqsub v1.4s, v1.4s, v3.4s
421; CHECK-GI-NEXT:    ret
422  %z = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> %x, <8 x i32> %y)
423  ret <8 x i32> %z
424}
425
426define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
427; CHECK-SD-LABEL: v16i32:
428; CHECK-SD:       // %bb.0:
429; CHECK-SD-NEXT:    uqsub v2.4s, v2.4s, v6.4s
430; CHECK-SD-NEXT:    uqsub v0.4s, v0.4s, v4.4s
431; CHECK-SD-NEXT:    uqsub v1.4s, v1.4s, v5.4s
432; CHECK-SD-NEXT:    uqsub v3.4s, v3.4s, v7.4s
433; CHECK-SD-NEXT:    ret
434;
435; CHECK-GI-LABEL: v16i32:
436; CHECK-GI:       // %bb.0:
437; CHECK-GI-NEXT:    uqsub v0.4s, v0.4s, v4.4s
438; CHECK-GI-NEXT:    uqsub v1.4s, v1.4s, v5.4s
439; CHECK-GI-NEXT:    uqsub v2.4s, v2.4s, v6.4s
440; CHECK-GI-NEXT:    uqsub v3.4s, v3.4s, v7.4s
441; CHECK-GI-NEXT:    ret
442  %z = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> %x, <16 x i32> %y)
443  ret <16 x i32> %z
444}
445
446define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
447; CHECK-LABEL: v2i64:
448; CHECK:       // %bb.0:
449; CHECK-NEXT:    uqsub v0.2d, v0.2d, v1.2d
450; CHECK-NEXT:    ret
451  %z = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %x, <2 x i64> %y)
452  ret <2 x i64> %z
453}
454
455define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
456; CHECK-SD-LABEL: v4i64:
457; CHECK-SD:       // %bb.0:
458; CHECK-SD-NEXT:    uqsub v1.2d, v1.2d, v3.2d
459; CHECK-SD-NEXT:    uqsub v0.2d, v0.2d, v2.2d
460; CHECK-SD-NEXT:    ret
461;
462; CHECK-GI-LABEL: v4i64:
463; CHECK-GI:       // %bb.0:
464; CHECK-GI-NEXT:    uqsub v0.2d, v0.2d, v2.2d
465; CHECK-GI-NEXT:    uqsub v1.2d, v1.2d, v3.2d
466; CHECK-GI-NEXT:    ret
467  %z = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> %x, <4 x i64> %y)
468  ret <4 x i64> %z
469}
470
471define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
472; CHECK-SD-LABEL: v8i64:
473; CHECK-SD:       // %bb.0:
474; CHECK-SD-NEXT:    uqsub v2.2d, v2.2d, v6.2d
475; CHECK-SD-NEXT:    uqsub v0.2d, v0.2d, v4.2d
476; CHECK-SD-NEXT:    uqsub v1.2d, v1.2d, v5.2d
477; CHECK-SD-NEXT:    uqsub v3.2d, v3.2d, v7.2d
478; CHECK-SD-NEXT:    ret
479;
480; CHECK-GI-LABEL: v8i64:
481; CHECK-GI:       // %bb.0:
482; CHECK-GI-NEXT:    uqsub v0.2d, v0.2d, v4.2d
483; CHECK-GI-NEXT:    uqsub v1.2d, v1.2d, v5.2d
484; CHECK-GI-NEXT:    uqsub v2.2d, v2.2d, v6.2d
485; CHECK-GI-NEXT:    uqsub v3.2d, v3.2d, v7.2d
486; CHECK-GI-NEXT:    ret
487  %z = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> %x, <8 x i64> %y)
488  ret <8 x i64> %z
489}
490
491define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
492; CHECK-SD-LABEL: v2i128:
493; CHECK-SD:       // %bb.0:
494; CHECK-SD-NEXT:    subs x8, x0, x4
495; CHECK-SD-NEXT:    sbcs x9, x1, x5
496; CHECK-SD-NEXT:    csel x0, xzr, x8, lo
497; CHECK-SD-NEXT:    csel x1, xzr, x9, lo
498; CHECK-SD-NEXT:    subs x8, x2, x6
499; CHECK-SD-NEXT:    sbcs x9, x3, x7
500; CHECK-SD-NEXT:    csel x2, xzr, x8, lo
501; CHECK-SD-NEXT:    csel x3, xzr, x9, lo
502; CHECK-SD-NEXT:    ret
503;
504; CHECK-GI-LABEL: v2i128:
505; CHECK-GI:       // %bb.0:
506; CHECK-GI-NEXT:    subs x8, x0, x4
507; CHECK-GI-NEXT:    sbcs x9, x1, x5
508; CHECK-GI-NEXT:    cset w10, lo
509; CHECK-GI-NEXT:    tst w10, #0x1
510; CHECK-GI-NEXT:    csel x0, xzr, x8, ne
511; CHECK-GI-NEXT:    csel x1, xzr, x9, ne
512; CHECK-GI-NEXT:    subs x8, x2, x6
513; CHECK-GI-NEXT:    sbcs x9, x3, x7
514; CHECK-GI-NEXT:    cset w10, lo
515; CHECK-GI-NEXT:    tst w10, #0x1
516; CHECK-GI-NEXT:    csel x2, xzr, x8, ne
517; CHECK-GI-NEXT:    csel x3, xzr, x9, ne
518; CHECK-GI-NEXT:    ret
519  %z = call <2 x i128> @llvm.usub.sat.v2i128(<2 x i128> %x, <2 x i128> %y)
520  ret <2 x i128> %z
521}
522