xref: /llvm-project/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll (revision c537c752787e9da8bd8762dd5298a152f546861b)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4
5; CHECK-GI:       warning: Instruction selection used fallback path for v16i4
6; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for v16i1
7
8declare <1 x i8> @llvm.ssub.sat.v1i8(<1 x i8>, <1 x i8>)
9declare <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8>, <2 x i8>)
10declare <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8>, <4 x i8>)
11declare <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8>, <8 x i8>)
12declare <12 x i8> @llvm.ssub.sat.v12i8(<12 x i8>, <12 x i8>)
13declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>)
14declare <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8>, <32 x i8>)
15declare <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8>, <64 x i8>)
16
17declare <1 x i16> @llvm.ssub.sat.v1i16(<1 x i16>, <1 x i16>)
18declare <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16>, <2 x i16>)
19declare <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16>, <4 x i16>)
20declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>)
21declare <12 x i16> @llvm.ssub.sat.v12i16(<12 x i16>, <12 x i16>)
22declare <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16>, <16 x i16>)
23declare <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16>, <32 x i16>)
24
25declare <16 x i1> @llvm.ssub.sat.v16i1(<16 x i1>, <16 x i1>)
26declare <16 x i4> @llvm.ssub.sat.v16i4(<16 x i4>, <16 x i4>)
27
28declare <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>)
29declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>)
30declare <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32>, <8 x i32>)
31declare <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32>, <16 x i32>)
32declare <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64>, <2 x i64>)
33declare <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64>, <4 x i64>)
34declare <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64>, <8 x i64>)
35
36declare <4 x i24> @llvm.ssub.sat.v4i24(<4 x i24>, <4 x i24>)
37declare <2 x i128> @llvm.ssub.sat.v2i128(<2 x i128>, <2 x i128>)
38
39
40define <16 x i8> @v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
41; CHECK-LABEL: v16i8:
42; CHECK:       // %bb.0:
43; CHECK-NEXT:    sqsub v0.16b, v0.16b, v1.16b
44; CHECK-NEXT:    ret
45  %z = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
46  ret <16 x i8> %z
47}
48
49define <32 x i8> @v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
50; CHECK-SD-LABEL: v32i8:
51; CHECK-SD:       // %bb.0:
52; CHECK-SD-NEXT:    sqsub v1.16b, v1.16b, v3.16b
53; CHECK-SD-NEXT:    sqsub v0.16b, v0.16b, v2.16b
54; CHECK-SD-NEXT:    ret
55;
56; CHECK-GI-LABEL: v32i8:
57; CHECK-GI:       // %bb.0:
58; CHECK-GI-NEXT:    sqsub v0.16b, v0.16b, v2.16b
59; CHECK-GI-NEXT:    sqsub v1.16b, v1.16b, v3.16b
60; CHECK-GI-NEXT:    ret
61  %z = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %x, <32 x i8> %y)
62  ret <32 x i8> %z
63}
64
65define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
66; CHECK-SD-LABEL: v64i8:
67; CHECK-SD:       // %bb.0:
68; CHECK-SD-NEXT:    sqsub v2.16b, v2.16b, v6.16b
69; CHECK-SD-NEXT:    sqsub v0.16b, v0.16b, v4.16b
70; CHECK-SD-NEXT:    sqsub v1.16b, v1.16b, v5.16b
71; CHECK-SD-NEXT:    sqsub v3.16b, v3.16b, v7.16b
72; CHECK-SD-NEXT:    ret
73;
74; CHECK-GI-LABEL: v64i8:
75; CHECK-GI:       // %bb.0:
76; CHECK-GI-NEXT:    sqsub v0.16b, v0.16b, v4.16b
77; CHECK-GI-NEXT:    sqsub v1.16b, v1.16b, v5.16b
78; CHECK-GI-NEXT:    sqsub v2.16b, v2.16b, v6.16b
79; CHECK-GI-NEXT:    sqsub v3.16b, v3.16b, v7.16b
80; CHECK-GI-NEXT:    ret
81  %z = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> %x, <64 x i8> %y)
82  ret <64 x i8> %z
83}
84
85define <8 x i16> @v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
86; CHECK-LABEL: v8i16:
87; CHECK:       // %bb.0:
88; CHECK-NEXT:    sqsub v0.8h, v0.8h, v1.8h
89; CHECK-NEXT:    ret
90  %z = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
91  ret <8 x i16> %z
92}
93
94define <16 x i16> @v16i16(<16 x i16> %x, <16 x i16> %y) nounwind {
95; CHECK-SD-LABEL: v16i16:
96; CHECK-SD:       // %bb.0:
97; CHECK-SD-NEXT:    sqsub v1.8h, v1.8h, v3.8h
98; CHECK-SD-NEXT:    sqsub v0.8h, v0.8h, v2.8h
99; CHECK-SD-NEXT:    ret
100;
101; CHECK-GI-LABEL: v16i16:
102; CHECK-GI:       // %bb.0:
103; CHECK-GI-NEXT:    sqsub v0.8h, v0.8h, v2.8h
104; CHECK-GI-NEXT:    sqsub v1.8h, v1.8h, v3.8h
105; CHECK-GI-NEXT:    ret
106  %z = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %x, <16 x i16> %y)
107  ret <16 x i16> %z
108}
109
110define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
111; CHECK-SD-LABEL: v32i16:
112; CHECK-SD:       // %bb.0:
113; CHECK-SD-NEXT:    sqsub v2.8h, v2.8h, v6.8h
114; CHECK-SD-NEXT:    sqsub v0.8h, v0.8h, v4.8h
115; CHECK-SD-NEXT:    sqsub v1.8h, v1.8h, v5.8h
116; CHECK-SD-NEXT:    sqsub v3.8h, v3.8h, v7.8h
117; CHECK-SD-NEXT:    ret
118;
119; CHECK-GI-LABEL: v32i16:
120; CHECK-GI:       // %bb.0:
121; CHECK-GI-NEXT:    sqsub v0.8h, v0.8h, v4.8h
122; CHECK-GI-NEXT:    sqsub v1.8h, v1.8h, v5.8h
123; CHECK-GI-NEXT:    sqsub v2.8h, v2.8h, v6.8h
124; CHECK-GI-NEXT:    sqsub v3.8h, v3.8h, v7.8h
125; CHECK-GI-NEXT:    ret
126  %z = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> %x, <32 x i16> %y)
127  ret <32 x i16> %z
128}
129
130define void @v8i8(ptr %px, ptr %py, ptr %pz) nounwind {
131; CHECK-LABEL: v8i8:
132; CHECK:       // %bb.0:
133; CHECK-NEXT:    ldr d0, [x0]
134; CHECK-NEXT:    ldr d1, [x1]
135; CHECK-NEXT:    sqsub v0.8b, v0.8b, v1.8b
136; CHECK-NEXT:    str d0, [x2]
137; CHECK-NEXT:    ret
138  %x = load <8 x i8>, ptr %px
139  %y = load <8 x i8>, ptr %py
140  %z = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> %x, <8 x i8> %y)
141  store <8 x i8> %z, ptr %pz
142  ret void
143}
144
145define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind {
146; CHECK-SD-LABEL: v4i8:
147; CHECK-SD:       // %bb.0:
148; CHECK-SD-NEXT:    ldr s0, [x0]
149; CHECK-SD-NEXT:    ldr s1, [x1]
150; CHECK-SD-NEXT:    ushll v0.8h, v0.8b, #0
151; CHECK-SD-NEXT:    ushll v1.8h, v1.8b, #0
152; CHECK-SD-NEXT:    shl v1.4h, v1.4h, #8
153; CHECK-SD-NEXT:    shl v0.4h, v0.4h, #8
154; CHECK-SD-NEXT:    sqsub v0.4h, v0.4h, v1.4h
155; CHECK-SD-NEXT:    ushr v0.4h, v0.4h, #8
156; CHECK-SD-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
157; CHECK-SD-NEXT:    str s0, [x2]
158; CHECK-SD-NEXT:    ret
159;
160; CHECK-GI-LABEL: v4i8:
161; CHECK-GI:       // %bb.0:
162; CHECK-GI-NEXT:    ldr w8, [x0]
163; CHECK-GI-NEXT:    ldr w9, [x1]
164; CHECK-GI-NEXT:    fmov s0, w8
165; CHECK-GI-NEXT:    fmov s1, w9
166; CHECK-GI-NEXT:    mov b2, v0.b[1]
167; CHECK-GI-NEXT:    mov v3.b[0], v0.b[0]
168; CHECK-GI-NEXT:    mov b4, v1.b[1]
169; CHECK-GI-NEXT:    mov v5.b[0], v1.b[0]
170; CHECK-GI-NEXT:    mov v3.b[1], v2.b[0]
171; CHECK-GI-NEXT:    mov b2, v0.b[2]
172; CHECK-GI-NEXT:    mov b0, v0.b[3]
173; CHECK-GI-NEXT:    mov v5.b[1], v4.b[0]
174; CHECK-GI-NEXT:    mov b4, v1.b[2]
175; CHECK-GI-NEXT:    mov b1, v1.b[3]
176; CHECK-GI-NEXT:    mov v3.b[2], v2.b[0]
177; CHECK-GI-NEXT:    mov v5.b[2], v4.b[0]
178; CHECK-GI-NEXT:    mov v3.b[3], v0.b[0]
179; CHECK-GI-NEXT:    mov v5.b[3], v1.b[0]
180; CHECK-GI-NEXT:    sqsub v0.8b, v3.8b, v5.8b
181; CHECK-GI-NEXT:    fmov w8, s0
182; CHECK-GI-NEXT:    str w8, [x2]
183; CHECK-GI-NEXT:    ret
184  %x = load <4 x i8>, ptr %px
185  %y = load <4 x i8>, ptr %py
186  %z = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> %x, <4 x i8> %y)
187  store <4 x i8> %z, ptr %pz
188  ret void
189}
190
191define void @v2i8(ptr %px, ptr %py, ptr %pz) nounwind {
192; CHECK-SD-LABEL: v2i8:
193; CHECK-SD:       // %bb.0:
194; CHECK-SD-NEXT:    ld1 { v0.b }[0], [x0]
195; CHECK-SD-NEXT:    ld1 { v1.b }[0], [x1]
196; CHECK-SD-NEXT:    add x8, x0, #1
197; CHECK-SD-NEXT:    add x9, x1, #1
198; CHECK-SD-NEXT:    ld1 { v0.b }[4], [x8]
199; CHECK-SD-NEXT:    ld1 { v1.b }[4], [x9]
200; CHECK-SD-NEXT:    shl v1.2s, v1.2s, #24
201; CHECK-SD-NEXT:    shl v0.2s, v0.2s, #24
202; CHECK-SD-NEXT:    sqsub v0.2s, v0.2s, v1.2s
203; CHECK-SD-NEXT:    ushr v0.2s, v0.2s, #24
204; CHECK-SD-NEXT:    mov w8, v0.s[1]
205; CHECK-SD-NEXT:    fmov w9, s0
206; CHECK-SD-NEXT:    strb w9, [x2]
207; CHECK-SD-NEXT:    strb w8, [x2, #1]
208; CHECK-SD-NEXT:    ret
209;
210; CHECK-GI-LABEL: v2i8:
211; CHECK-GI:       // %bb.0:
212; CHECK-GI-NEXT:    ldr b0, [x0]
213; CHECK-GI-NEXT:    ldr b1, [x1]
214; CHECK-GI-NEXT:    add x8, x2, #1
215; CHECK-GI-NEXT:    ldr b2, [x0, #1]
216; CHECK-GI-NEXT:    ldr b3, [x1, #1]
217; CHECK-GI-NEXT:    mov v0.b[0], v0.b[0]
218; CHECK-GI-NEXT:    mov v1.b[0], v1.b[0]
219; CHECK-GI-NEXT:    mov v0.b[1], v2.b[0]
220; CHECK-GI-NEXT:    mov v1.b[1], v3.b[0]
221; CHECK-GI-NEXT:    sqsub v0.8b, v0.8b, v1.8b
222; CHECK-GI-NEXT:    st1 { v0.b }[0], [x2]
223; CHECK-GI-NEXT:    st1 { v0.b }[1], [x8]
224; CHECK-GI-NEXT:    ret
225  %x = load <2 x i8>, ptr %px
226  %y = load <2 x i8>, ptr %py
227  %z = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %x, <2 x i8> %y)
228  store <2 x i8> %z, ptr %pz
229  ret void
230}
231
232define void @v4i16(ptr %px, ptr %py, ptr %pz) nounwind {
233; CHECK-LABEL: v4i16:
234; CHECK:       // %bb.0:
235; CHECK-NEXT:    ldr d0, [x0]
236; CHECK-NEXT:    ldr d1, [x1]
237; CHECK-NEXT:    sqsub v0.4h, v0.4h, v1.4h
238; CHECK-NEXT:    str d0, [x2]
239; CHECK-NEXT:    ret
240  %x = load <4 x i16>, ptr %px
241  %y = load <4 x i16>, ptr %py
242  %z = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %x, <4 x i16> %y)
243  store <4 x i16> %z, ptr %pz
244  ret void
245}
246
247define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind {
248; CHECK-SD-LABEL: v2i16:
249; CHECK-SD:       // %bb.0:
250; CHECK-SD-NEXT:    ld1 { v0.h }[0], [x0]
251; CHECK-SD-NEXT:    ld1 { v1.h }[0], [x1]
252; CHECK-SD-NEXT:    add x8, x0, #2
253; CHECK-SD-NEXT:    add x9, x1, #2
254; CHECK-SD-NEXT:    ld1 { v0.h }[2], [x8]
255; CHECK-SD-NEXT:    ld1 { v1.h }[2], [x9]
256; CHECK-SD-NEXT:    shl v1.2s, v1.2s, #16
257; CHECK-SD-NEXT:    shl v0.2s, v0.2s, #16
258; CHECK-SD-NEXT:    sqsub v0.2s, v0.2s, v1.2s
259; CHECK-SD-NEXT:    ushr v0.2s, v0.2s, #16
260; CHECK-SD-NEXT:    mov w8, v0.s[1]
261; CHECK-SD-NEXT:    fmov w9, s0
262; CHECK-SD-NEXT:    strh w9, [x2]
263; CHECK-SD-NEXT:    strh w8, [x2, #2]
264; CHECK-SD-NEXT:    ret
265;
266; CHECK-GI-LABEL: v2i16:
267; CHECK-GI:       // %bb.0:
268; CHECK-GI-NEXT:    ldr h0, [x0]
269; CHECK-GI-NEXT:    ldr h1, [x1]
270; CHECK-GI-NEXT:    add x8, x0, #2
271; CHECK-GI-NEXT:    add x9, x1, #2
272; CHECK-GI-NEXT:    ld1 { v0.h }[1], [x8]
273; CHECK-GI-NEXT:    ld1 { v1.h }[1], [x9]
274; CHECK-GI-NEXT:    add x8, x2, #2
275; CHECK-GI-NEXT:    sqsub v0.4h, v0.4h, v1.4h
276; CHECK-GI-NEXT:    str h0, [x2]
277; CHECK-GI-NEXT:    st1 { v0.h }[1], [x8]
278; CHECK-GI-NEXT:    ret
279  %x = load <2 x i16>, ptr %px
280  %y = load <2 x i16>, ptr %py
281  %z = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> %x, <2 x i16> %y)
282  store <2 x i16> %z, ptr %pz
283  ret void
284}
285
286define <12 x i8> @v12i8(<12 x i8> %x, <12 x i8> %y) nounwind {
287; CHECK-LABEL: v12i8:
288; CHECK:       // %bb.0:
289; CHECK-NEXT:    sqsub v0.16b, v0.16b, v1.16b
290; CHECK-NEXT:    ret
291  %z = call <12 x i8> @llvm.ssub.sat.v12i8(<12 x i8> %x, <12 x i8> %y)
292  ret <12 x i8> %z
293}
294
295define void @v12i16(ptr %px, ptr %py, ptr %pz) nounwind {
296; CHECK-SD-LABEL: v12i16:
297; CHECK-SD:       // %bb.0:
298; CHECK-SD-NEXT:    ldp q0, q3, [x1]
299; CHECK-SD-NEXT:    ldp q1, q2, [x0]
300; CHECK-SD-NEXT:    sqsub v0.8h, v1.8h, v0.8h
301; CHECK-SD-NEXT:    sqsub v1.8h, v2.8h, v3.8h
302; CHECK-SD-NEXT:    str q0, [x2]
303; CHECK-SD-NEXT:    str d1, [x2, #16]
304; CHECK-SD-NEXT:    ret
305;
306; CHECK-GI-LABEL: v12i16:
307; CHECK-GI:       // %bb.0:
308; CHECK-GI-NEXT:    ldr q0, [x0]
309; CHECK-GI-NEXT:    ldr q1, [x1]
310; CHECK-GI-NEXT:    ldr d2, [x0, #16]
311; CHECK-GI-NEXT:    ldr d3, [x1, #16]
312; CHECK-GI-NEXT:    sqsub v0.8h, v0.8h, v1.8h
313; CHECK-GI-NEXT:    sqsub v1.4h, v2.4h, v3.4h
314; CHECK-GI-NEXT:    str q0, [x2]
315; CHECK-GI-NEXT:    str d1, [x2, #16]
316; CHECK-GI-NEXT:    ret
317  %x = load <12 x i16>, ptr %px
318  %y = load <12 x i16>, ptr %py
319  %z = call <12 x i16> @llvm.ssub.sat.v12i16(<12 x i16> %x, <12 x i16> %y)
320  store <12 x i16> %z, ptr %pz
321  ret void
322}
323
324define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind {
325; CHECK-SD-LABEL: v1i8:
326; CHECK-SD:       // %bb.0:
327; CHECK-SD-NEXT:    ldr b0, [x0]
328; CHECK-SD-NEXT:    ldr b1, [x1]
329; CHECK-SD-NEXT:    sqsub v0.8b, v0.8b, v1.8b
330; CHECK-SD-NEXT:    st1 { v0.b }[0], [x2]
331; CHECK-SD-NEXT:    ret
332;
333; CHECK-GI-LABEL: v1i8:
334; CHECK-GI:       // %bb.0:
335; CHECK-GI-NEXT:    ldrsb w8, [x0]
336; CHECK-GI-NEXT:    ldrsb w9, [x1]
337; CHECK-GI-NEXT:    sub w8, w8, w9
338; CHECK-GI-NEXT:    sxtb w9, w8
339; CHECK-GI-NEXT:    sbfx w10, w8, #7, #1
340; CHECK-GI-NEXT:    sub w10, w10, #128
341; CHECK-GI-NEXT:    cmp w8, w9
342; CHECK-GI-NEXT:    csel w8, w10, w8, ne
343; CHECK-GI-NEXT:    strb w8, [x2]
344; CHECK-GI-NEXT:    ret
345  %x = load <1 x i8>, ptr %px
346  %y = load <1 x i8>, ptr %py
347  %z = call <1 x i8> @llvm.ssub.sat.v1i8(<1 x i8> %x, <1 x i8> %y)
348  store <1 x i8> %z, ptr %pz
349  ret void
350}
351
352define void @v1i16(ptr %px, ptr %py, ptr %pz) nounwind {
353; CHECK-SD-LABEL: v1i16:
354; CHECK-SD:       // %bb.0:
355; CHECK-SD-NEXT:    ldr h0, [x0]
356; CHECK-SD-NEXT:    ldr h1, [x1]
357; CHECK-SD-NEXT:    sqsub v0.4h, v0.4h, v1.4h
358; CHECK-SD-NEXT:    str h0, [x2]
359; CHECK-SD-NEXT:    ret
360;
361; CHECK-GI-LABEL: v1i16:
362; CHECK-GI:       // %bb.0:
363; CHECK-GI-NEXT:    ldrsh w8, [x0]
364; CHECK-GI-NEXT:    ldrsh w9, [x1]
365; CHECK-GI-NEXT:    sub w8, w8, w9
366; CHECK-GI-NEXT:    sxth w9, w8
367; CHECK-GI-NEXT:    sbfx w10, w8, #15, #1
368; CHECK-GI-NEXT:    sub w10, w10, #8, lsl #12 // =32768
369; CHECK-GI-NEXT:    cmp w8, w9
370; CHECK-GI-NEXT:    csel w8, w10, w8, ne
371; CHECK-GI-NEXT:    strh w8, [x2]
372; CHECK-GI-NEXT:    ret
373  %x = load <1 x i16>, ptr %px
374  %y = load <1 x i16>, ptr %py
375  %z = call <1 x i16> @llvm.ssub.sat.v1i16(<1 x i16> %x, <1 x i16> %y)
376  store <1 x i16> %z, ptr %pz
377  ret void
378}
379
380define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
381; CHECK-LABEL: v16i4:
382; CHECK:       // %bb.0:
383; CHECK-NEXT:    shl v1.16b, v1.16b, #4
384; CHECK-NEXT:    shl v0.16b, v0.16b, #4
385; CHECK-NEXT:    sqsub v0.16b, v0.16b, v1.16b
386; CHECK-NEXT:    sshr v0.16b, v0.16b, #4
387; CHECK-NEXT:    ret
388  %z = call <16 x i4> @llvm.ssub.sat.v16i4(<16 x i4> %x, <16 x i4> %y)
389  ret <16 x i4> %z
390}
391
392define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
393; CHECK-LABEL: v16i1:
394; CHECK:       // %bb.0:
395; CHECK-NEXT:    movi v2.16b, #1
396; CHECK-NEXT:    eor v1.16b, v1.16b, v2.16b
397; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
398; CHECK-NEXT:    ret
399  %z = call <16 x i1> @llvm.ssub.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
400  ret <16 x i1> %z
401}
402
403define <2 x i32> @v2i32(<2 x i32> %x, <2 x i32> %y) nounwind {
404; CHECK-LABEL: v2i32:
405; CHECK:       // %bb.0:
406; CHECK-NEXT:    sqsub v0.2s, v0.2s, v1.2s
407; CHECK-NEXT:    ret
408  %z = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %x, <2 x i32> %y)
409  ret <2 x i32> %z
410}
411
412define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
413; CHECK-LABEL: v4i32:
414; CHECK:       // %bb.0:
415; CHECK-NEXT:    sqsub v0.4s, v0.4s, v1.4s
416; CHECK-NEXT:    ret
417  %z = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
418  ret <4 x i32> %z
419}
420
421define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
422; CHECK-SD-LABEL: v8i32:
423; CHECK-SD:       // %bb.0:
424; CHECK-SD-NEXT:    sqsub v1.4s, v1.4s, v3.4s
425; CHECK-SD-NEXT:    sqsub v0.4s, v0.4s, v2.4s
426; CHECK-SD-NEXT:    ret
427;
428; CHECK-GI-LABEL: v8i32:
429; CHECK-GI:       // %bb.0:
430; CHECK-GI-NEXT:    sqsub v0.4s, v0.4s, v2.4s
431; CHECK-GI-NEXT:    sqsub v1.4s, v1.4s, v3.4s
432; CHECK-GI-NEXT:    ret
433  %z = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> %x, <8 x i32> %y)
434  ret <8 x i32> %z
435}
436
437define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
438; CHECK-SD-LABEL: v16i32:
439; CHECK-SD:       // %bb.0:
440; CHECK-SD-NEXT:    sqsub v2.4s, v2.4s, v6.4s
441; CHECK-SD-NEXT:    sqsub v0.4s, v0.4s, v4.4s
442; CHECK-SD-NEXT:    sqsub v1.4s, v1.4s, v5.4s
443; CHECK-SD-NEXT:    sqsub v3.4s, v3.4s, v7.4s
444; CHECK-SD-NEXT:    ret
445;
446; CHECK-GI-LABEL: v16i32:
447; CHECK-GI:       // %bb.0:
448; CHECK-GI-NEXT:    sqsub v0.4s, v0.4s, v4.4s
449; CHECK-GI-NEXT:    sqsub v1.4s, v1.4s, v5.4s
450; CHECK-GI-NEXT:    sqsub v2.4s, v2.4s, v6.4s
451; CHECK-GI-NEXT:    sqsub v3.4s, v3.4s, v7.4s
452; CHECK-GI-NEXT:    ret
453  %z = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> %x, <16 x i32> %y)
454  ret <16 x i32> %z
455}
456
457define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
458; CHECK-LABEL: v2i64:
459; CHECK:       // %bb.0:
460; CHECK-NEXT:    sqsub v0.2d, v0.2d, v1.2d
461; CHECK-NEXT:    ret
462  %z = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %x, <2 x i64> %y)
463  ret <2 x i64> %z
464}
465
466define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
467; CHECK-SD-LABEL: v4i64:
468; CHECK-SD:       // %bb.0:
469; CHECK-SD-NEXT:    sqsub v1.2d, v1.2d, v3.2d
470; CHECK-SD-NEXT:    sqsub v0.2d, v0.2d, v2.2d
471; CHECK-SD-NEXT:    ret
472;
473; CHECK-GI-LABEL: v4i64:
474; CHECK-GI:       // %bb.0:
475; CHECK-GI-NEXT:    sqsub v0.2d, v0.2d, v2.2d
476; CHECK-GI-NEXT:    sqsub v1.2d, v1.2d, v3.2d
477; CHECK-GI-NEXT:    ret
478  %z = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> %x, <4 x i64> %y)
479  ret <4 x i64> %z
480}
481
482define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
483; CHECK-SD-LABEL: v8i64:
484; CHECK-SD:       // %bb.0:
485; CHECK-SD-NEXT:    sqsub v2.2d, v2.2d, v6.2d
486; CHECK-SD-NEXT:    sqsub v0.2d, v0.2d, v4.2d
487; CHECK-SD-NEXT:    sqsub v1.2d, v1.2d, v5.2d
488; CHECK-SD-NEXT:    sqsub v3.2d, v3.2d, v7.2d
489; CHECK-SD-NEXT:    ret
490;
491; CHECK-GI-LABEL: v8i64:
492; CHECK-GI:       // %bb.0:
493; CHECK-GI-NEXT:    sqsub v0.2d, v0.2d, v4.2d
494; CHECK-GI-NEXT:    sqsub v1.2d, v1.2d, v5.2d
495; CHECK-GI-NEXT:    sqsub v2.2d, v2.2d, v6.2d
496; CHECK-GI-NEXT:    sqsub v3.2d, v3.2d, v7.2d
497; CHECK-GI-NEXT:    ret
498  %z = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> %x, <8 x i64> %y)
499  ret <8 x i64> %z
500}
501
502define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
503; CHECK-SD-LABEL: v2i128:
504; CHECK-SD:       // %bb.0:
505; CHECK-SD-NEXT:    subs x8, x0, x4
506; CHECK-SD-NEXT:    sbcs x9, x1, x5
507; CHECK-SD-NEXT:    asr x10, x9, #63
508; CHECK-SD-NEXT:    eor x11, x10, #0x8000000000000000
509; CHECK-SD-NEXT:    csel x0, x10, x8, vs
510; CHECK-SD-NEXT:    csel x1, x11, x9, vs
511; CHECK-SD-NEXT:    subs x8, x2, x6
512; CHECK-SD-NEXT:    sbcs x9, x3, x7
513; CHECK-SD-NEXT:    asr x10, x9, #63
514; CHECK-SD-NEXT:    eor x11, x10, #0x8000000000000000
515; CHECK-SD-NEXT:    csel x2, x10, x8, vs
516; CHECK-SD-NEXT:    csel x3, x11, x9, vs
517; CHECK-SD-NEXT:    ret
518;
519; CHECK-GI-LABEL: v2i128:
520; CHECK-GI:       // %bb.0:
521; CHECK-GI-NEXT:    subs x9, x0, x4
522; CHECK-GI-NEXT:    mov w8, wzr
523; CHECK-GI-NEXT:    mov x13, #-9223372036854775808 // =0x8000000000000000
524; CHECK-GI-NEXT:    sbcs x10, x1, x5
525; CHECK-GI-NEXT:    asr x11, x10, #63
526; CHECK-GI-NEXT:    cset w12, vs
527; CHECK-GI-NEXT:    cmp w8, #1
528; CHECK-GI-NEXT:    adc x14, x11, x13
529; CHECK-GI-NEXT:    tst w12, #0x1
530; CHECK-GI-NEXT:    csel x0, x11, x9, ne
531; CHECK-GI-NEXT:    csel x1, x14, x10, ne
532; CHECK-GI-NEXT:    subs x9, x2, x6
533; CHECK-GI-NEXT:    sbcs x10, x3, x7
534; CHECK-GI-NEXT:    asr x11, x10, #63
535; CHECK-GI-NEXT:    cset w12, vs
536; CHECK-GI-NEXT:    cmp w8, #1
537; CHECK-GI-NEXT:    adc x8, x11, x13
538; CHECK-GI-NEXT:    tst w12, #0x1
539; CHECK-GI-NEXT:    csel x2, x11, x9, ne
540; CHECK-GI-NEXT:    csel x3, x8, x10, ne
541; CHECK-GI-NEXT:    ret
542  %z = call <2 x i128> @llvm.ssub.sat.v2i128(<2 x i128> %x, <2 x i128> %y)
543  ret <2 x i128> %z
544}
545