xref: /llvm-project/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll (revision c537c752787e9da8bd8762dd5298a152f546861b)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4
5; CHECK-GI:       warning: Instruction selection used fallback path for v16i4
6; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for v16i1
7
8declare <1 x i8> @llvm.sadd.sat.v1i8(<1 x i8>, <1 x i8>)
9declare <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8>, <2 x i8>)
10declare <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8>, <4 x i8>)
11declare <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8>, <8 x i8>)
12declare <12 x i8> @llvm.sadd.sat.v12i8(<12 x i8>, <12 x i8>)
13declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>)
14declare <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8>, <32 x i8>)
15declare <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8>, <64 x i8>)
16
17declare <1 x i16> @llvm.sadd.sat.v1i16(<1 x i16>, <1 x i16>)
18declare <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16>, <2 x i16>)
19declare <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16>, <4 x i16>)
20declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>)
21declare <12 x i16> @llvm.sadd.sat.v12i16(<12 x i16>, <12 x i16>)
22declare <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16>, <16 x i16>)
23declare <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16>, <32 x i16>)
24
25declare <16 x i1> @llvm.sadd.sat.v16i1(<16 x i1>, <16 x i1>)
26declare <16 x i4> @llvm.sadd.sat.v16i4(<16 x i4>, <16 x i4>)
27
28declare <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32>, <2 x i32>)
29declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>)
30declare <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32>, <8 x i32>)
31declare <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32>, <16 x i32>)
32declare <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64>, <2 x i64>)
33declare <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64>, <4 x i64>)
34declare <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64>, <8 x i64>)
35
36declare <4 x i24> @llvm.sadd.sat.v4i24(<4 x i24>, <4 x i24>)
37declare <2 x i128> @llvm.sadd.sat.v2i128(<2 x i128>, <2 x i128>)
38
39define <16 x i8> @v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
40; CHECK-LABEL: v16i8:
41; CHECK:       // %bb.0:
42; CHECK-NEXT:    sqadd v0.16b, v0.16b, v1.16b
43; CHECK-NEXT:    ret
44  %z = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
45  ret <16 x i8> %z
46}
47
48define <32 x i8> @v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
49; CHECK-SD-LABEL: v32i8:
50; CHECK-SD:       // %bb.0:
51; CHECK-SD-NEXT:    sqadd v1.16b, v1.16b, v3.16b
52; CHECK-SD-NEXT:    sqadd v0.16b, v0.16b, v2.16b
53; CHECK-SD-NEXT:    ret
54;
55; CHECK-GI-LABEL: v32i8:
56; CHECK-GI:       // %bb.0:
57; CHECK-GI-NEXT:    sqadd v0.16b, v0.16b, v2.16b
58; CHECK-GI-NEXT:    sqadd v1.16b, v1.16b, v3.16b
59; CHECK-GI-NEXT:    ret
60  %z = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %x, <32 x i8> %y)
61  ret <32 x i8> %z
62}
63
64define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
65; CHECK-SD-LABEL: v64i8:
66; CHECK-SD:       // %bb.0:
67; CHECK-SD-NEXT:    sqadd v2.16b, v2.16b, v6.16b
68; CHECK-SD-NEXT:    sqadd v0.16b, v0.16b, v4.16b
69; CHECK-SD-NEXT:    sqadd v1.16b, v1.16b, v5.16b
70; CHECK-SD-NEXT:    sqadd v3.16b, v3.16b, v7.16b
71; CHECK-SD-NEXT:    ret
72;
73; CHECK-GI-LABEL: v64i8:
74; CHECK-GI:       // %bb.0:
75; CHECK-GI-NEXT:    sqadd v0.16b, v0.16b, v4.16b
76; CHECK-GI-NEXT:    sqadd v1.16b, v1.16b, v5.16b
77; CHECK-GI-NEXT:    sqadd v2.16b, v2.16b, v6.16b
78; CHECK-GI-NEXT:    sqadd v3.16b, v3.16b, v7.16b
79; CHECK-GI-NEXT:    ret
80  %z = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> %x, <64 x i8> %y)
81  ret <64 x i8> %z
82}
83
84define <8 x i16> @v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
85; CHECK-LABEL: v8i16:
86; CHECK:       // %bb.0:
87; CHECK-NEXT:    sqadd v0.8h, v0.8h, v1.8h
88; CHECK-NEXT:    ret
89  %z = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
90  ret <8 x i16> %z
91}
92
93define <16 x i16> @v16i16(<16 x i16> %x, <16 x i16> %y) nounwind {
94; CHECK-SD-LABEL: v16i16:
95; CHECK-SD:       // %bb.0:
96; CHECK-SD-NEXT:    sqadd v1.8h, v1.8h, v3.8h
97; CHECK-SD-NEXT:    sqadd v0.8h, v0.8h, v2.8h
98; CHECK-SD-NEXT:    ret
99;
100; CHECK-GI-LABEL: v16i16:
101; CHECK-GI:       // %bb.0:
102; CHECK-GI-NEXT:    sqadd v0.8h, v0.8h, v2.8h
103; CHECK-GI-NEXT:    sqadd v1.8h, v1.8h, v3.8h
104; CHECK-GI-NEXT:    ret
105  %z = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %x, <16 x i16> %y)
106  ret <16 x i16> %z
107}
108
109define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
110; CHECK-SD-LABEL: v32i16:
111; CHECK-SD:       // %bb.0:
112; CHECK-SD-NEXT:    sqadd v2.8h, v2.8h, v6.8h
113; CHECK-SD-NEXT:    sqadd v0.8h, v0.8h, v4.8h
114; CHECK-SD-NEXT:    sqadd v1.8h, v1.8h, v5.8h
115; CHECK-SD-NEXT:    sqadd v3.8h, v3.8h, v7.8h
116; CHECK-SD-NEXT:    ret
117;
118; CHECK-GI-LABEL: v32i16:
119; CHECK-GI:       // %bb.0:
120; CHECK-GI-NEXT:    sqadd v0.8h, v0.8h, v4.8h
121; CHECK-GI-NEXT:    sqadd v1.8h, v1.8h, v5.8h
122; CHECK-GI-NEXT:    sqadd v2.8h, v2.8h, v6.8h
123; CHECK-GI-NEXT:    sqadd v3.8h, v3.8h, v7.8h
124; CHECK-GI-NEXT:    ret
125  %z = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> %x, <32 x i16> %y)
126  ret <32 x i16> %z
127}
128
129define void @v8i8(ptr %px, ptr %py, ptr %pz) nounwind {
130; CHECK-LABEL: v8i8:
131; CHECK:       // %bb.0:
132; CHECK-NEXT:    ldr d0, [x0]
133; CHECK-NEXT:    ldr d1, [x1]
134; CHECK-NEXT:    sqadd v0.8b, v0.8b, v1.8b
135; CHECK-NEXT:    str d0, [x2]
136; CHECK-NEXT:    ret
137  %x = load <8 x i8>, ptr %px
138  %y = load <8 x i8>, ptr %py
139  %z = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> %x, <8 x i8> %y)
140  store <8 x i8> %z, ptr %pz
141  ret void
142}
143
144define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind {
145; CHECK-SD-LABEL: v4i8:
146; CHECK-SD:       // %bb.0:
147; CHECK-SD-NEXT:    ldr s0, [x0]
148; CHECK-SD-NEXT:    ldr s1, [x1]
149; CHECK-SD-NEXT:    ushll v0.8h, v0.8b, #0
150; CHECK-SD-NEXT:    ushll v1.8h, v1.8b, #0
151; CHECK-SD-NEXT:    shl v1.4h, v1.4h, #8
152; CHECK-SD-NEXT:    shl v0.4h, v0.4h, #8
153; CHECK-SD-NEXT:    sqadd v0.4h, v0.4h, v1.4h
154; CHECK-SD-NEXT:    ushr v0.4h, v0.4h, #8
155; CHECK-SD-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
156; CHECK-SD-NEXT:    str s0, [x2]
157; CHECK-SD-NEXT:    ret
158;
159; CHECK-GI-LABEL: v4i8:
160; CHECK-GI:       // %bb.0:
161; CHECK-GI-NEXT:    ldr w8, [x0]
162; CHECK-GI-NEXT:    ldr w9, [x1]
163; CHECK-GI-NEXT:    fmov s0, w8
164; CHECK-GI-NEXT:    fmov s1, w9
165; CHECK-GI-NEXT:    mov b2, v0.b[1]
166; CHECK-GI-NEXT:    mov v3.b[0], v0.b[0]
167; CHECK-GI-NEXT:    mov b4, v1.b[1]
168; CHECK-GI-NEXT:    mov v5.b[0], v1.b[0]
169; CHECK-GI-NEXT:    mov v3.b[1], v2.b[0]
170; CHECK-GI-NEXT:    mov b2, v0.b[2]
171; CHECK-GI-NEXT:    mov b0, v0.b[3]
172; CHECK-GI-NEXT:    mov v5.b[1], v4.b[0]
173; CHECK-GI-NEXT:    mov b4, v1.b[2]
174; CHECK-GI-NEXT:    mov b1, v1.b[3]
175; CHECK-GI-NEXT:    mov v3.b[2], v2.b[0]
176; CHECK-GI-NEXT:    mov v5.b[2], v4.b[0]
177; CHECK-GI-NEXT:    mov v3.b[3], v0.b[0]
178; CHECK-GI-NEXT:    mov v5.b[3], v1.b[0]
179; CHECK-GI-NEXT:    sqadd v0.8b, v3.8b, v5.8b
180; CHECK-GI-NEXT:    fmov w8, s0
181; CHECK-GI-NEXT:    str w8, [x2]
182; CHECK-GI-NEXT:    ret
183  %x = load <4 x i8>, ptr %px
184  %y = load <4 x i8>, ptr %py
185  %z = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> %x, <4 x i8> %y)
186  store <4 x i8> %z, ptr %pz
187  ret void
188}
189
190define void @v2i8(ptr %px, ptr %py, ptr %pz) nounwind {
191; CHECK-SD-LABEL: v2i8:
192; CHECK-SD:       // %bb.0:
193; CHECK-SD-NEXT:    ld1 { v0.b }[0], [x0]
194; CHECK-SD-NEXT:    ld1 { v1.b }[0], [x1]
195; CHECK-SD-NEXT:    add x8, x0, #1
196; CHECK-SD-NEXT:    add x9, x1, #1
197; CHECK-SD-NEXT:    ld1 { v0.b }[4], [x8]
198; CHECK-SD-NEXT:    ld1 { v1.b }[4], [x9]
199; CHECK-SD-NEXT:    shl v1.2s, v1.2s, #24
200; CHECK-SD-NEXT:    shl v0.2s, v0.2s, #24
201; CHECK-SD-NEXT:    sqadd v0.2s, v0.2s, v1.2s
202; CHECK-SD-NEXT:    ushr v0.2s, v0.2s, #24
203; CHECK-SD-NEXT:    mov w8, v0.s[1]
204; CHECK-SD-NEXT:    fmov w9, s0
205; CHECK-SD-NEXT:    strb w9, [x2]
206; CHECK-SD-NEXT:    strb w8, [x2, #1]
207; CHECK-SD-NEXT:    ret
208;
209; CHECK-GI-LABEL: v2i8:
210; CHECK-GI:       // %bb.0:
211; CHECK-GI-NEXT:    ldr b0, [x0]
212; CHECK-GI-NEXT:    ldr b1, [x1]
213; CHECK-GI-NEXT:    add x8, x2, #1
214; CHECK-GI-NEXT:    ldr b2, [x0, #1]
215; CHECK-GI-NEXT:    ldr b3, [x1, #1]
216; CHECK-GI-NEXT:    mov v0.b[0], v0.b[0]
217; CHECK-GI-NEXT:    mov v1.b[0], v1.b[0]
218; CHECK-GI-NEXT:    mov v0.b[1], v2.b[0]
219; CHECK-GI-NEXT:    mov v1.b[1], v3.b[0]
220; CHECK-GI-NEXT:    sqadd v0.8b, v0.8b, v1.8b
221; CHECK-GI-NEXT:    st1 { v0.b }[0], [x2]
222; CHECK-GI-NEXT:    st1 { v0.b }[1], [x8]
223; CHECK-GI-NEXT:    ret
224  %x = load <2 x i8>, ptr %px
225  %y = load <2 x i8>, ptr %py
226  %z = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %x, <2 x i8> %y)
227  store <2 x i8> %z, ptr %pz
228  ret void
229}
230
231define void @v4i16(ptr %px, ptr %py, ptr %pz) nounwind {
232; CHECK-LABEL: v4i16:
233; CHECK:       // %bb.0:
234; CHECK-NEXT:    ldr d0, [x0]
235; CHECK-NEXT:    ldr d1, [x1]
236; CHECK-NEXT:    sqadd v0.4h, v0.4h, v1.4h
237; CHECK-NEXT:    str d0, [x2]
238; CHECK-NEXT:    ret
239  %x = load <4 x i16>, ptr %px
240  %y = load <4 x i16>, ptr %py
241  %z = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> %x, <4 x i16> %y)
242  store <4 x i16> %z, ptr %pz
243  ret void
244}
245
246define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind {
247; CHECK-SD-LABEL: v2i16:
248; CHECK-SD:       // %bb.0:
249; CHECK-SD-NEXT:    ld1 { v0.h }[0], [x0]
250; CHECK-SD-NEXT:    ld1 { v1.h }[0], [x1]
251; CHECK-SD-NEXT:    add x8, x0, #2
252; CHECK-SD-NEXT:    add x9, x1, #2
253; CHECK-SD-NEXT:    ld1 { v0.h }[2], [x8]
254; CHECK-SD-NEXT:    ld1 { v1.h }[2], [x9]
255; CHECK-SD-NEXT:    shl v1.2s, v1.2s, #16
256; CHECK-SD-NEXT:    shl v0.2s, v0.2s, #16
257; CHECK-SD-NEXT:    sqadd v0.2s, v0.2s, v1.2s
258; CHECK-SD-NEXT:    ushr v0.2s, v0.2s, #16
259; CHECK-SD-NEXT:    mov w8, v0.s[1]
260; CHECK-SD-NEXT:    fmov w9, s0
261; CHECK-SD-NEXT:    strh w9, [x2]
262; CHECK-SD-NEXT:    strh w8, [x2, #2]
263; CHECK-SD-NEXT:    ret
264;
265; CHECK-GI-LABEL: v2i16:
266; CHECK-GI:       // %bb.0:
267; CHECK-GI-NEXT:    ldr h0, [x0]
268; CHECK-GI-NEXT:    ldr h1, [x1]
269; CHECK-GI-NEXT:    add x8, x0, #2
270; CHECK-GI-NEXT:    add x9, x1, #2
271; CHECK-GI-NEXT:    ld1 { v0.h }[1], [x8]
272; CHECK-GI-NEXT:    ld1 { v1.h }[1], [x9]
273; CHECK-GI-NEXT:    add x8, x2, #2
274; CHECK-GI-NEXT:    sqadd v0.4h, v0.4h, v1.4h
275; CHECK-GI-NEXT:    str h0, [x2]
276; CHECK-GI-NEXT:    st1 { v0.h }[1], [x8]
277; CHECK-GI-NEXT:    ret
278  %x = load <2 x i16>, ptr %px
279  %y = load <2 x i16>, ptr %py
280  %z = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> %x, <2 x i16> %y)
281  store <2 x i16> %z, ptr %pz
282  ret void
283}
284
285define <12 x i8> @v12i8(<12 x i8> %x, <12 x i8> %y) nounwind {
286; CHECK-LABEL: v12i8:
287; CHECK:       // %bb.0:
288; CHECK-NEXT:    sqadd v0.16b, v0.16b, v1.16b
289; CHECK-NEXT:    ret
290  %z = call <12 x i8> @llvm.sadd.sat.v12i8(<12 x i8> %x, <12 x i8> %y)
291  ret <12 x i8> %z
292}
293
294define void @v12i16(ptr %px, ptr %py, ptr %pz) nounwind {
295; CHECK-SD-LABEL: v12i16:
296; CHECK-SD:       // %bb.0:
297; CHECK-SD-NEXT:    ldp q0, q3, [x1]
298; CHECK-SD-NEXT:    ldp q1, q2, [x0]
299; CHECK-SD-NEXT:    sqadd v0.8h, v1.8h, v0.8h
300; CHECK-SD-NEXT:    sqadd v1.8h, v2.8h, v3.8h
301; CHECK-SD-NEXT:    str q0, [x2]
302; CHECK-SD-NEXT:    str d1, [x2, #16]
303; CHECK-SD-NEXT:    ret
304;
305; CHECK-GI-LABEL: v12i16:
306; CHECK-GI:       // %bb.0:
307; CHECK-GI-NEXT:    ldr q0, [x0]
308; CHECK-GI-NEXT:    ldr q1, [x1]
309; CHECK-GI-NEXT:    ldr d2, [x0, #16]
310; CHECK-GI-NEXT:    ldr d3, [x1, #16]
311; CHECK-GI-NEXT:    sqadd v0.8h, v0.8h, v1.8h
312; CHECK-GI-NEXT:    sqadd v1.4h, v2.4h, v3.4h
313; CHECK-GI-NEXT:    str q0, [x2]
314; CHECK-GI-NEXT:    str d1, [x2, #16]
315; CHECK-GI-NEXT:    ret
316  %x = load <12 x i16>, ptr %px
317  %y = load <12 x i16>, ptr %py
318  %z = call <12 x i16> @llvm.sadd.sat.v12i16(<12 x i16> %x, <12 x i16> %y)
319  store <12 x i16> %z, ptr %pz
320  ret void
321}
322
323define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind {
324; CHECK-SD-LABEL: v1i8:
325; CHECK-SD:       // %bb.0:
326; CHECK-SD-NEXT:    ldr b0, [x0]
327; CHECK-SD-NEXT:    ldr b1, [x1]
328; CHECK-SD-NEXT:    sqadd v0.8b, v0.8b, v1.8b
329; CHECK-SD-NEXT:    st1 { v0.b }[0], [x2]
330; CHECK-SD-NEXT:    ret
331;
332; CHECK-GI-LABEL: v1i8:
333; CHECK-GI:       // %bb.0:
334; CHECK-GI-NEXT:    ldrsb w8, [x0]
335; CHECK-GI-NEXT:    ldrsb w9, [x1]
336; CHECK-GI-NEXT:    add w8, w8, w9
337; CHECK-GI-NEXT:    sxtb w9, w8
338; CHECK-GI-NEXT:    sbfx w10, w8, #7, #1
339; CHECK-GI-NEXT:    sub w10, w10, #128
340; CHECK-GI-NEXT:    cmp w8, w9
341; CHECK-GI-NEXT:    csel w8, w10, w8, ne
342; CHECK-GI-NEXT:    strb w8, [x2]
343; CHECK-GI-NEXT:    ret
344  %x = load <1 x i8>, ptr %px
345  %y = load <1 x i8>, ptr %py
346  %z = call <1 x i8> @llvm.sadd.sat.v1i8(<1 x i8> %x, <1 x i8> %y)
347  store <1 x i8> %z, ptr %pz
348  ret void
349}
350
351define void @v1i16(ptr %px, ptr %py, ptr %pz) nounwind {
352; CHECK-SD-LABEL: v1i16:
353; CHECK-SD:       // %bb.0:
354; CHECK-SD-NEXT:    ldr h0, [x0]
355; CHECK-SD-NEXT:    ldr h1, [x1]
356; CHECK-SD-NEXT:    sqadd v0.4h, v0.4h, v1.4h
357; CHECK-SD-NEXT:    str h0, [x2]
358; CHECK-SD-NEXT:    ret
359;
360; CHECK-GI-LABEL: v1i16:
361; CHECK-GI:       // %bb.0:
362; CHECK-GI-NEXT:    ldrsh w8, [x0]
363; CHECK-GI-NEXT:    ldrsh w9, [x1]
364; CHECK-GI-NEXT:    add w8, w8, w9
365; CHECK-GI-NEXT:    sxth w9, w8
366; CHECK-GI-NEXT:    sbfx w10, w8, #15, #1
367; CHECK-GI-NEXT:    sub w10, w10, #8, lsl #12 // =32768
368; CHECK-GI-NEXT:    cmp w8, w9
369; CHECK-GI-NEXT:    csel w8, w10, w8, ne
370; CHECK-GI-NEXT:    strh w8, [x2]
371; CHECK-GI-NEXT:    ret
372  %x = load <1 x i16>, ptr %px
373  %y = load <1 x i16>, ptr %py
374  %z = call <1 x i16> @llvm.sadd.sat.v1i16(<1 x i16> %x, <1 x i16> %y)
375  store <1 x i16> %z, ptr %pz
376  ret void
377}
378
379define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
380; CHECK-LABEL: v16i4:
381; CHECK:       // %bb.0:
382; CHECK-NEXT:    shl v1.16b, v1.16b, #4
383; CHECK-NEXT:    shl v0.16b, v0.16b, #4
384; CHECK-NEXT:    sqadd v0.16b, v0.16b, v1.16b
385; CHECK-NEXT:    sshr v0.16b, v0.16b, #4
386; CHECK-NEXT:    ret
387  %z = call <16 x i4> @llvm.sadd.sat.v16i4(<16 x i4> %x, <16 x i4> %y)
388  ret <16 x i4> %z
389}
390
391define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
392; CHECK-LABEL: v16i1:
393; CHECK:       // %bb.0:
394; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
395; CHECK-NEXT:    ret
396  %z = call <16 x i1> @llvm.sadd.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
397  ret <16 x i1> %z
398}
399
400define <2 x i32> @v2i32(<2 x i32> %x, <2 x i32> %y) nounwind {
401; CHECK-LABEL: v2i32:
402; CHECK:       // %bb.0:
403; CHECK-NEXT:    sqadd v0.2s, v0.2s, v1.2s
404; CHECK-NEXT:    ret
405  %z = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %x, <2 x i32> %y)
406  ret <2 x i32> %z
407}
408
409define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
410; CHECK-LABEL: v4i32:
411; CHECK:       // %bb.0:
412; CHECK-NEXT:    sqadd v0.4s, v0.4s, v1.4s
413; CHECK-NEXT:    ret
414  %z = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
415  ret <4 x i32> %z
416}
417
418define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
419; CHECK-SD-LABEL: v8i32:
420; CHECK-SD:       // %bb.0:
421; CHECK-SD-NEXT:    sqadd v1.4s, v1.4s, v3.4s
422; CHECK-SD-NEXT:    sqadd v0.4s, v0.4s, v2.4s
423; CHECK-SD-NEXT:    ret
424;
425; CHECK-GI-LABEL: v8i32:
426; CHECK-GI:       // %bb.0:
427; CHECK-GI-NEXT:    sqadd v0.4s, v0.4s, v2.4s
428; CHECK-GI-NEXT:    sqadd v1.4s, v1.4s, v3.4s
429; CHECK-GI-NEXT:    ret
430  %z = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> %x, <8 x i32> %y)
431  ret <8 x i32> %z
432}
433
434define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
435; CHECK-SD-LABEL: v16i32:
436; CHECK-SD:       // %bb.0:
437; CHECK-SD-NEXT:    sqadd v2.4s, v2.4s, v6.4s
438; CHECK-SD-NEXT:    sqadd v0.4s, v0.4s, v4.4s
439; CHECK-SD-NEXT:    sqadd v1.4s, v1.4s, v5.4s
440; CHECK-SD-NEXT:    sqadd v3.4s, v3.4s, v7.4s
441; CHECK-SD-NEXT:    ret
442;
443; CHECK-GI-LABEL: v16i32:
444; CHECK-GI:       // %bb.0:
445; CHECK-GI-NEXT:    sqadd v0.4s, v0.4s, v4.4s
446; CHECK-GI-NEXT:    sqadd v1.4s, v1.4s, v5.4s
447; CHECK-GI-NEXT:    sqadd v2.4s, v2.4s, v6.4s
448; CHECK-GI-NEXT:    sqadd v3.4s, v3.4s, v7.4s
449; CHECK-GI-NEXT:    ret
450  %z = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> %x, <16 x i32> %y)
451  ret <16 x i32> %z
452}
453
454define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
455; CHECK-LABEL: v2i64:
456; CHECK:       // %bb.0:
457; CHECK-NEXT:    sqadd v0.2d, v0.2d, v1.2d
458; CHECK-NEXT:    ret
459  %z = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %x, <2 x i64> %y)
460  ret <2 x i64> %z
461}
462
463define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
464; CHECK-SD-LABEL: v4i64:
465; CHECK-SD:       // %bb.0:
466; CHECK-SD-NEXT:    sqadd v1.2d, v1.2d, v3.2d
467; CHECK-SD-NEXT:    sqadd v0.2d, v0.2d, v2.2d
468; CHECK-SD-NEXT:    ret
469;
470; CHECK-GI-LABEL: v4i64:
471; CHECK-GI:       // %bb.0:
472; CHECK-GI-NEXT:    sqadd v0.2d, v0.2d, v2.2d
473; CHECK-GI-NEXT:    sqadd v1.2d, v1.2d, v3.2d
474; CHECK-GI-NEXT:    ret
475  %z = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> %x, <4 x i64> %y)
476  ret <4 x i64> %z
477}
478
479define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
480; CHECK-SD-LABEL: v8i64:
481; CHECK-SD:       // %bb.0:
482; CHECK-SD-NEXT:    sqadd v2.2d, v2.2d, v6.2d
483; CHECK-SD-NEXT:    sqadd v0.2d, v0.2d, v4.2d
484; CHECK-SD-NEXT:    sqadd v1.2d, v1.2d, v5.2d
485; CHECK-SD-NEXT:    sqadd v3.2d, v3.2d, v7.2d
486; CHECK-SD-NEXT:    ret
487;
488; CHECK-GI-LABEL: v8i64:
489; CHECK-GI:       // %bb.0:
490; CHECK-GI-NEXT:    sqadd v0.2d, v0.2d, v4.2d
491; CHECK-GI-NEXT:    sqadd v1.2d, v1.2d, v5.2d
492; CHECK-GI-NEXT:    sqadd v2.2d, v2.2d, v6.2d
493; CHECK-GI-NEXT:    sqadd v3.2d, v3.2d, v7.2d
494; CHECK-GI-NEXT:    ret
495  %z = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> %x, <8 x i64> %y)
496  ret <8 x i64> %z
497}
498
499define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
500; CHECK-SD-LABEL: v2i128:
501; CHECK-SD:       // %bb.0:
502; CHECK-SD-NEXT:    adds x8, x0, x4
503; CHECK-SD-NEXT:    adcs x9, x1, x5
504; CHECK-SD-NEXT:    asr x10, x9, #63
505; CHECK-SD-NEXT:    eor x11, x10, #0x8000000000000000
506; CHECK-SD-NEXT:    csel x0, x10, x8, vs
507; CHECK-SD-NEXT:    csel x1, x11, x9, vs
508; CHECK-SD-NEXT:    adds x8, x2, x6
509; CHECK-SD-NEXT:    adcs x9, x3, x7
510; CHECK-SD-NEXT:    asr x10, x9, #63
511; CHECK-SD-NEXT:    eor x11, x10, #0x8000000000000000
512; CHECK-SD-NEXT:    csel x2, x10, x8, vs
513; CHECK-SD-NEXT:    csel x3, x11, x9, vs
514; CHECK-SD-NEXT:    ret
515;
516; CHECK-GI-LABEL: v2i128:
517; CHECK-GI:       // %bb.0:
518; CHECK-GI-NEXT:    adds x9, x0, x4
519; CHECK-GI-NEXT:    mov w8, wzr
520; CHECK-GI-NEXT:    mov x13, #-9223372036854775808 // =0x8000000000000000
521; CHECK-GI-NEXT:    adcs x10, x1, x5
522; CHECK-GI-NEXT:    asr x11, x10, #63
523; CHECK-GI-NEXT:    cset w12, vs
524; CHECK-GI-NEXT:    cmp w8, #1
525; CHECK-GI-NEXT:    adc x14, x11, x13
526; CHECK-GI-NEXT:    tst w12, #0x1
527; CHECK-GI-NEXT:    csel x0, x11, x9, ne
528; CHECK-GI-NEXT:    csel x1, x14, x10, ne
529; CHECK-GI-NEXT:    adds x9, x2, x6
530; CHECK-GI-NEXT:    adcs x10, x3, x7
531; CHECK-GI-NEXT:    asr x11, x10, #63
532; CHECK-GI-NEXT:    cset w12, vs
533; CHECK-GI-NEXT:    cmp w8, #1
534; CHECK-GI-NEXT:    adc x8, x11, x13
535; CHECK-GI-NEXT:    tst w12, #0x1
536; CHECK-GI-NEXT:    csel x2, x11, x9, ne
537; CHECK-GI-NEXT:    csel x3, x8, x10, ne
538; CHECK-GI-NEXT:    ret
539  %z = call <2 x i128> @llvm.sadd.sat.v2i128(<2 x i128> %x, <2 x i128> %y)
540  ret <2 x i128> %z
541}
542