xref: /llvm-project/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll (revision c537c752787e9da8bd8762dd5298a152f546861b)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4
5; CHECK-GI:       warning: Instruction selection used fallback path for v16i4
6; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for v16i1
7
8declare <1 x i8> @llvm.uadd.sat.v1i8(<1 x i8>, <1 x i8>)
9declare <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8>, <2 x i8>)
10declare <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8>, <4 x i8>)
11declare <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8>, <8 x i8>)
12declare <12 x i8> @llvm.uadd.sat.v12i8(<12 x i8>, <12 x i8>)
13declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>)
14declare <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8>, <32 x i8>)
15declare <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8>, <64 x i8>)
16
17declare <1 x i16> @llvm.uadd.sat.v1i16(<1 x i16>, <1 x i16>)
18declare <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16>, <2 x i16>)
19declare <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16>, <4 x i16>)
20declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>)
21declare <12 x i16> @llvm.uadd.sat.v12i16(<12 x i16>, <12 x i16>)
22declare <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16>, <16 x i16>)
23declare <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16>, <32 x i16>)
24
25declare <16 x i1> @llvm.uadd.sat.v16i1(<16 x i1>, <16 x i1>)
26declare <16 x i4> @llvm.uadd.sat.v16i4(<16 x i4>, <16 x i4>)
27
28declare <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32>, <2 x i32>)
29declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32>, <4 x i32>)
30declare <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32>, <8 x i32>)
31declare <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32>, <16 x i32>)
32declare <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64>, <2 x i64>)
33declare <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64>, <4 x i64>)
34declare <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64>, <8 x i64>)
35
36declare <4 x i24> @llvm.uadd.sat.v4i24(<4 x i24>, <4 x i24>)
37declare <2 x i128> @llvm.uadd.sat.v2i128(<2 x i128>, <2 x i128>)
38
39define <16 x i8> @v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
40; CHECK-LABEL: v16i8:
41; CHECK:       // %bb.0:
42; CHECK-NEXT:    uqadd v0.16b, v0.16b, v1.16b
43; CHECK-NEXT:    ret
44  %z = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
45  ret <16 x i8> %z
46}
47
48define <32 x i8> @v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
49; CHECK-SD-LABEL: v32i8:
50; CHECK-SD:       // %bb.0:
51; CHECK-SD-NEXT:    uqadd v1.16b, v1.16b, v3.16b
52; CHECK-SD-NEXT:    uqadd v0.16b, v0.16b, v2.16b
53; CHECK-SD-NEXT:    ret
54;
55; CHECK-GI-LABEL: v32i8:
56; CHECK-GI:       // %bb.0:
57; CHECK-GI-NEXT:    uqadd v0.16b, v0.16b, v2.16b
58; CHECK-GI-NEXT:    uqadd v1.16b, v1.16b, v3.16b
59; CHECK-GI-NEXT:    ret
60  %z = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %x, <32 x i8> %y)
61  ret <32 x i8> %z
62}
63
64define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
65; CHECK-SD-LABEL: v64i8:
66; CHECK-SD:       // %bb.0:
67; CHECK-SD-NEXT:    uqadd v2.16b, v2.16b, v6.16b
68; CHECK-SD-NEXT:    uqadd v0.16b, v0.16b, v4.16b
69; CHECK-SD-NEXT:    uqadd v1.16b, v1.16b, v5.16b
70; CHECK-SD-NEXT:    uqadd v3.16b, v3.16b, v7.16b
71; CHECK-SD-NEXT:    ret
72;
73; CHECK-GI-LABEL: v64i8:
74; CHECK-GI:       // %bb.0:
75; CHECK-GI-NEXT:    uqadd v0.16b, v0.16b, v4.16b
76; CHECK-GI-NEXT:    uqadd v1.16b, v1.16b, v5.16b
77; CHECK-GI-NEXT:    uqadd v2.16b, v2.16b, v6.16b
78; CHECK-GI-NEXT:    uqadd v3.16b, v3.16b, v7.16b
79; CHECK-GI-NEXT:    ret
80  %z = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> %x, <64 x i8> %y)
81  ret <64 x i8> %z
82}
83
84define <8 x i16> @v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
85; CHECK-LABEL: v8i16:
86; CHECK:       // %bb.0:
87; CHECK-NEXT:    uqadd v0.8h, v0.8h, v1.8h
88; CHECK-NEXT:    ret
89  %z = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
90  ret <8 x i16> %z
91}
92
93define <16 x i16> @v16i16(<16 x i16> %x, <16 x i16> %y) nounwind {
94; CHECK-SD-LABEL: v16i16:
95; CHECK-SD:       // %bb.0:
96; CHECK-SD-NEXT:    uqadd v1.8h, v1.8h, v3.8h
97; CHECK-SD-NEXT:    uqadd v0.8h, v0.8h, v2.8h
98; CHECK-SD-NEXT:    ret
99;
100; CHECK-GI-LABEL: v16i16:
101; CHECK-GI:       // %bb.0:
102; CHECK-GI-NEXT:    uqadd v0.8h, v0.8h, v2.8h
103; CHECK-GI-NEXT:    uqadd v1.8h, v1.8h, v3.8h
104; CHECK-GI-NEXT:    ret
105  %z = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %x, <16 x i16> %y)
106  ret <16 x i16> %z
107}
108
109define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
110; CHECK-SD-LABEL: v32i16:
111; CHECK-SD:       // %bb.0:
112; CHECK-SD-NEXT:    uqadd v2.8h, v2.8h, v6.8h
113; CHECK-SD-NEXT:    uqadd v0.8h, v0.8h, v4.8h
114; CHECK-SD-NEXT:    uqadd v1.8h, v1.8h, v5.8h
115; CHECK-SD-NEXT:    uqadd v3.8h, v3.8h, v7.8h
116; CHECK-SD-NEXT:    ret
117;
118; CHECK-GI-LABEL: v32i16:
119; CHECK-GI:       // %bb.0:
120; CHECK-GI-NEXT:    uqadd v0.8h, v0.8h, v4.8h
121; CHECK-GI-NEXT:    uqadd v1.8h, v1.8h, v5.8h
122; CHECK-GI-NEXT:    uqadd v2.8h, v2.8h, v6.8h
123; CHECK-GI-NEXT:    uqadd v3.8h, v3.8h, v7.8h
124; CHECK-GI-NEXT:    ret
125  %z = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> %x, <32 x i16> %y)
126  ret <32 x i16> %z
127}
128
129define void @v8i8(ptr %px, ptr %py, ptr %pz) nounwind {
130; CHECK-LABEL: v8i8:
131; CHECK:       // %bb.0:
132; CHECK-NEXT:    ldr d0, [x0]
133; CHECK-NEXT:    ldr d1, [x1]
134; CHECK-NEXT:    uqadd v0.8b, v0.8b, v1.8b
135; CHECK-NEXT:    str d0, [x2]
136; CHECK-NEXT:    ret
137  %x = load <8 x i8>, ptr %px
138  %y = load <8 x i8>, ptr %py
139  %z = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> %x, <8 x i8> %y)
140  store <8 x i8> %z, ptr %pz
141  ret void
142}
143
144define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind {
145; CHECK-SD-LABEL: v4i8:
146; CHECK-SD:       // %bb.0:
147; CHECK-SD-NEXT:    ldr s0, [x0]
148; CHECK-SD-NEXT:    ldr s1, [x1]
149; CHECK-SD-NEXT:    movi d2, #0xff00ff00ff00ff
150; CHECK-SD-NEXT:    uaddl v0.8h, v0.8b, v1.8b
151; CHECK-SD-NEXT:    umin v0.4h, v0.4h, v2.4h
152; CHECK-SD-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
153; CHECK-SD-NEXT:    str s0, [x2]
154; CHECK-SD-NEXT:    ret
155;
156; CHECK-GI-LABEL: v4i8:
157; CHECK-GI:       // %bb.0:
158; CHECK-GI-NEXT:    ldr w8, [x0]
159; CHECK-GI-NEXT:    ldr w9, [x1]
160; CHECK-GI-NEXT:    fmov s0, w8
161; CHECK-GI-NEXT:    fmov s1, w9
162; CHECK-GI-NEXT:    mov b2, v0.b[1]
163; CHECK-GI-NEXT:    mov v3.b[0], v0.b[0]
164; CHECK-GI-NEXT:    mov b4, v1.b[1]
165; CHECK-GI-NEXT:    mov v5.b[0], v1.b[0]
166; CHECK-GI-NEXT:    mov v3.b[1], v2.b[0]
167; CHECK-GI-NEXT:    mov b2, v0.b[2]
168; CHECK-GI-NEXT:    mov b0, v0.b[3]
169; CHECK-GI-NEXT:    mov v5.b[1], v4.b[0]
170; CHECK-GI-NEXT:    mov b4, v1.b[2]
171; CHECK-GI-NEXT:    mov b1, v1.b[3]
172; CHECK-GI-NEXT:    mov v3.b[2], v2.b[0]
173; CHECK-GI-NEXT:    mov v5.b[2], v4.b[0]
174; CHECK-GI-NEXT:    mov v3.b[3], v0.b[0]
175; CHECK-GI-NEXT:    mov v5.b[3], v1.b[0]
176; CHECK-GI-NEXT:    uqadd v0.8b, v3.8b, v5.8b
177; CHECK-GI-NEXT:    fmov w8, s0
178; CHECK-GI-NEXT:    str w8, [x2]
179; CHECK-GI-NEXT:    ret
180  %x = load <4 x i8>, ptr %px
181  %y = load <4 x i8>, ptr %py
182  %z = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> %x, <4 x i8> %y)
183  store <4 x i8> %z, ptr %pz
184  ret void
185}
186
187define void @v2i8(ptr %px, ptr %py, ptr %pz) nounwind {
188; CHECK-SD-LABEL: v2i8:
189; CHECK-SD:       // %bb.0:
190; CHECK-SD-NEXT:    ldrb w8, [x0]
191; CHECK-SD-NEXT:    ldrb w9, [x1]
192; CHECK-SD-NEXT:    movi d2, #0x0000ff000000ff
193; CHECK-SD-NEXT:    ldrb w10, [x0, #1]
194; CHECK-SD-NEXT:    ldrb w11, [x1, #1]
195; CHECK-SD-NEXT:    fmov s0, w8
196; CHECK-SD-NEXT:    fmov s1, w9
197; CHECK-SD-NEXT:    mov v0.s[1], w10
198; CHECK-SD-NEXT:    mov v1.s[1], w11
199; CHECK-SD-NEXT:    add v0.2s, v0.2s, v1.2s
200; CHECK-SD-NEXT:    umin v0.2s, v0.2s, v2.2s
201; CHECK-SD-NEXT:    mov w8, v0.s[1]
202; CHECK-SD-NEXT:    fmov w9, s0
203; CHECK-SD-NEXT:    strb w9, [x2]
204; CHECK-SD-NEXT:    strb w8, [x2, #1]
205; CHECK-SD-NEXT:    ret
206;
207; CHECK-GI-LABEL: v2i8:
208; CHECK-GI:       // %bb.0:
209; CHECK-GI-NEXT:    ldr b0, [x0]
210; CHECK-GI-NEXT:    ldr b1, [x1]
211; CHECK-GI-NEXT:    add x8, x2, #1
212; CHECK-GI-NEXT:    ldr b2, [x0, #1]
213; CHECK-GI-NEXT:    ldr b3, [x1, #1]
214; CHECK-GI-NEXT:    mov v0.b[0], v0.b[0]
215; CHECK-GI-NEXT:    mov v1.b[0], v1.b[0]
216; CHECK-GI-NEXT:    mov v0.b[1], v2.b[0]
217; CHECK-GI-NEXT:    mov v1.b[1], v3.b[0]
218; CHECK-GI-NEXT:    uqadd v0.8b, v0.8b, v1.8b
219; CHECK-GI-NEXT:    st1 { v0.b }[0], [x2]
220; CHECK-GI-NEXT:    st1 { v0.b }[1], [x8]
221; CHECK-GI-NEXT:    ret
222  %x = load <2 x i8>, ptr %px
223  %y = load <2 x i8>, ptr %py
224  %z = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %x, <2 x i8> %y)
225  store <2 x i8> %z, ptr %pz
226  ret void
227}
228
229define void @v4i16(ptr %px, ptr %py, ptr %pz) nounwind {
230; CHECK-LABEL: v4i16:
231; CHECK:       // %bb.0:
232; CHECK-NEXT:    ldr d0, [x0]
233; CHECK-NEXT:    ldr d1, [x1]
234; CHECK-NEXT:    uqadd v0.4h, v0.4h, v1.4h
235; CHECK-NEXT:    str d0, [x2]
236; CHECK-NEXT:    ret
237  %x = load <4 x i16>, ptr %px
238  %y = load <4 x i16>, ptr %py
239  %z = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> %x, <4 x i16> %y)
240  store <4 x i16> %z, ptr %pz
241  ret void
242}
243
244define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind {
245; CHECK-SD-LABEL: v2i16:
246; CHECK-SD:       // %bb.0:
247; CHECK-SD-NEXT:    ldrh w8, [x0]
248; CHECK-SD-NEXT:    ldrh w9, [x1]
249; CHECK-SD-NEXT:    movi d2, #0x00ffff0000ffff
250; CHECK-SD-NEXT:    ldrh w10, [x0, #2]
251; CHECK-SD-NEXT:    ldrh w11, [x1, #2]
252; CHECK-SD-NEXT:    fmov s0, w8
253; CHECK-SD-NEXT:    fmov s1, w9
254; CHECK-SD-NEXT:    mov v0.s[1], w10
255; CHECK-SD-NEXT:    mov v1.s[1], w11
256; CHECK-SD-NEXT:    add v0.2s, v0.2s, v1.2s
257; CHECK-SD-NEXT:    umin v0.2s, v0.2s, v2.2s
258; CHECK-SD-NEXT:    mov w8, v0.s[1]
259; CHECK-SD-NEXT:    fmov w9, s0
260; CHECK-SD-NEXT:    strh w9, [x2]
261; CHECK-SD-NEXT:    strh w8, [x2, #2]
262; CHECK-SD-NEXT:    ret
263;
264; CHECK-GI-LABEL: v2i16:
265; CHECK-GI:       // %bb.0:
266; CHECK-GI-NEXT:    ldr h0, [x0]
267; CHECK-GI-NEXT:    ldr h1, [x1]
268; CHECK-GI-NEXT:    add x8, x0, #2
269; CHECK-GI-NEXT:    add x9, x1, #2
270; CHECK-GI-NEXT:    ld1 { v0.h }[1], [x8]
271; CHECK-GI-NEXT:    ld1 { v1.h }[1], [x9]
272; CHECK-GI-NEXT:    add x8, x2, #2
273; CHECK-GI-NEXT:    uqadd v0.4h, v0.4h, v1.4h
274; CHECK-GI-NEXT:    str h0, [x2]
275; CHECK-GI-NEXT:    st1 { v0.h }[1], [x8]
276; CHECK-GI-NEXT:    ret
277  %x = load <2 x i16>, ptr %px
278  %y = load <2 x i16>, ptr %py
279  %z = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> %x, <2 x i16> %y)
280  store <2 x i16> %z, ptr %pz
281  ret void
282}
283
284define <12 x i8> @v12i8(<12 x i8> %x, <12 x i8> %y) nounwind {
285; CHECK-LABEL: v12i8:
286; CHECK:       // %bb.0:
287; CHECK-NEXT:    uqadd v0.16b, v0.16b, v1.16b
288; CHECK-NEXT:    ret
289  %z = call <12 x i8> @llvm.uadd.sat.v12i8(<12 x i8> %x, <12 x i8> %y)
290  ret <12 x i8> %z
291}
292
293define void @v12i16(ptr %px, ptr %py, ptr %pz) nounwind {
294; CHECK-SD-LABEL: v12i16:
295; CHECK-SD:       // %bb.0:
296; CHECK-SD-NEXT:    ldp q0, q3, [x1]
297; CHECK-SD-NEXT:    ldp q1, q2, [x0]
298; CHECK-SD-NEXT:    uqadd v0.8h, v1.8h, v0.8h
299; CHECK-SD-NEXT:    uqadd v1.8h, v2.8h, v3.8h
300; CHECK-SD-NEXT:    str q0, [x2]
301; CHECK-SD-NEXT:    str d1, [x2, #16]
302; CHECK-SD-NEXT:    ret
303;
304; CHECK-GI-LABEL: v12i16:
305; CHECK-GI:       // %bb.0:
306; CHECK-GI-NEXT:    ldr q0, [x0]
307; CHECK-GI-NEXT:    ldr q1, [x1]
308; CHECK-GI-NEXT:    ldr d2, [x0, #16]
309; CHECK-GI-NEXT:    ldr d3, [x1, #16]
310; CHECK-GI-NEXT:    uqadd v0.8h, v0.8h, v1.8h
311; CHECK-GI-NEXT:    uqadd v1.4h, v2.4h, v3.4h
312; CHECK-GI-NEXT:    str q0, [x2]
313; CHECK-GI-NEXT:    str d1, [x2, #16]
314; CHECK-GI-NEXT:    ret
315  %x = load <12 x i16>, ptr %px
316  %y = load <12 x i16>, ptr %py
317  %z = call <12 x i16> @llvm.uadd.sat.v12i16(<12 x i16> %x, <12 x i16> %y)
318  store <12 x i16> %z, ptr %pz
319  ret void
320}
321
322define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind {
323; CHECK-SD-LABEL: v1i8:
324; CHECK-SD:       // %bb.0:
325; CHECK-SD-NEXT:    ldr b0, [x0]
326; CHECK-SD-NEXT:    ldr b1, [x1]
327; CHECK-SD-NEXT:    uqadd v0.8b, v0.8b, v1.8b
328; CHECK-SD-NEXT:    st1 { v0.b }[0], [x2]
329; CHECK-SD-NEXT:    ret
330;
331; CHECK-GI-LABEL: v1i8:
332; CHECK-GI:       // %bb.0:
333; CHECK-GI-NEXT:    ldrb w8, [x0]
334; CHECK-GI-NEXT:    ldrb w9, [x1]
335; CHECK-GI-NEXT:    add w8, w8, w9
336; CHECK-GI-NEXT:    cmp w8, w8, uxtb
337; CHECK-GI-NEXT:    csinv w8, w8, wzr, eq
338; CHECK-GI-NEXT:    strb w8, [x2]
339; CHECK-GI-NEXT:    ret
340  %x = load <1 x i8>, ptr %px
341  %y = load <1 x i8>, ptr %py
342  %z = call <1 x i8> @llvm.uadd.sat.v1i8(<1 x i8> %x, <1 x i8> %y)
343  store <1 x i8> %z, ptr %pz
344  ret void
345}
346
347define void @v1i16(ptr %px, ptr %py, ptr %pz) nounwind {
348; CHECK-SD-LABEL: v1i16:
349; CHECK-SD:       // %bb.0:
350; CHECK-SD-NEXT:    ldr h0, [x0]
351; CHECK-SD-NEXT:    ldr h1, [x1]
352; CHECK-SD-NEXT:    uqadd v0.4h, v0.4h, v1.4h
353; CHECK-SD-NEXT:    str h0, [x2]
354; CHECK-SD-NEXT:    ret
355;
356; CHECK-GI-LABEL: v1i16:
357; CHECK-GI:       // %bb.0:
358; CHECK-GI-NEXT:    ldrh w8, [x0]
359; CHECK-GI-NEXT:    ldrh w9, [x1]
360; CHECK-GI-NEXT:    add w8, w8, w9
361; CHECK-GI-NEXT:    cmp w8, w8, uxth
362; CHECK-GI-NEXT:    csinv w8, w8, wzr, eq
363; CHECK-GI-NEXT:    strh w8, [x2]
364; CHECK-GI-NEXT:    ret
365  %x = load <1 x i16>, ptr %px
366  %y = load <1 x i16>, ptr %py
367  %z = call <1 x i16> @llvm.uadd.sat.v1i16(<1 x i16> %x, <1 x i16> %y)
368  store <1 x i16> %z, ptr %pz
369  ret void
370}
371
372define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
373; CHECK-LABEL: v16i4:
374; CHECK:       // %bb.0:
375; CHECK-NEXT:    movi v2.16b, #15
376; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
377; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
378; CHECK-NEXT:    add v0.16b, v0.16b, v1.16b
379; CHECK-NEXT:    umin v0.16b, v0.16b, v2.16b
380; CHECK-NEXT:    ret
381  %z = call <16 x i4> @llvm.uadd.sat.v16i4(<16 x i4> %x, <16 x i4> %y)
382  ret <16 x i4> %z
383}
384
385define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
386; CHECK-LABEL: v16i1:
387; CHECK:       // %bb.0:
388; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
389; CHECK-NEXT:    ret
390  %z = call <16 x i1> @llvm.uadd.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
391  ret <16 x i1> %z
392}
393
394define <2 x i32> @v2i32(<2 x i32> %x, <2 x i32> %y) nounwind {
395; CHECK-LABEL: v2i32:
396; CHECK:       // %bb.0:
397; CHECK-NEXT:    uqadd v0.2s, v0.2s, v1.2s
398; CHECK-NEXT:    ret
399  %z = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %x, <2 x i32> %y)
400  ret <2 x i32> %z
401}
402
403define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
404; CHECK-LABEL: v4i32:
405; CHECK:       // %bb.0:
406; CHECK-NEXT:    uqadd v0.4s, v0.4s, v1.4s
407; CHECK-NEXT:    ret
408  %z = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
409  ret <4 x i32> %z
410}
411
412define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
413; CHECK-SD-LABEL: v8i32:
414; CHECK-SD:       // %bb.0:
415; CHECK-SD-NEXT:    uqadd v1.4s, v1.4s, v3.4s
416; CHECK-SD-NEXT:    uqadd v0.4s, v0.4s, v2.4s
417; CHECK-SD-NEXT:    ret
418;
419; CHECK-GI-LABEL: v8i32:
420; CHECK-GI:       // %bb.0:
421; CHECK-GI-NEXT:    uqadd v0.4s, v0.4s, v2.4s
422; CHECK-GI-NEXT:    uqadd v1.4s, v1.4s, v3.4s
423; CHECK-GI-NEXT:    ret
424  %z = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> %x, <8 x i32> %y)
425  ret <8 x i32> %z
426}
427
428define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
429; CHECK-SD-LABEL: v16i32:
430; CHECK-SD:       // %bb.0:
431; CHECK-SD-NEXT:    uqadd v2.4s, v2.4s, v6.4s
432; CHECK-SD-NEXT:    uqadd v0.4s, v0.4s, v4.4s
433; CHECK-SD-NEXT:    uqadd v1.4s, v1.4s, v5.4s
434; CHECK-SD-NEXT:    uqadd v3.4s, v3.4s, v7.4s
435; CHECK-SD-NEXT:    ret
436;
437; CHECK-GI-LABEL: v16i32:
438; CHECK-GI:       // %bb.0:
439; CHECK-GI-NEXT:    uqadd v0.4s, v0.4s, v4.4s
440; CHECK-GI-NEXT:    uqadd v1.4s, v1.4s, v5.4s
441; CHECK-GI-NEXT:    uqadd v2.4s, v2.4s, v6.4s
442; CHECK-GI-NEXT:    uqadd v3.4s, v3.4s, v7.4s
443; CHECK-GI-NEXT:    ret
444  %z = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> %x, <16 x i32> %y)
445  ret <16 x i32> %z
446}
447
448define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
449; CHECK-LABEL: v2i64:
450; CHECK:       // %bb.0:
451; CHECK-NEXT:    uqadd v0.2d, v0.2d, v1.2d
452; CHECK-NEXT:    ret
453  %z = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %x, <2 x i64> %y)
454  ret <2 x i64> %z
455}
456
457define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
458; CHECK-SD-LABEL: v4i64:
459; CHECK-SD:       // %bb.0:
460; CHECK-SD-NEXT:    uqadd v1.2d, v1.2d, v3.2d
461; CHECK-SD-NEXT:    uqadd v0.2d, v0.2d, v2.2d
462; CHECK-SD-NEXT:    ret
463;
464; CHECK-GI-LABEL: v4i64:
465; CHECK-GI:       // %bb.0:
466; CHECK-GI-NEXT:    uqadd v0.2d, v0.2d, v2.2d
467; CHECK-GI-NEXT:    uqadd v1.2d, v1.2d, v3.2d
468; CHECK-GI-NEXT:    ret
469  %z = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> %x, <4 x i64> %y)
470  ret <4 x i64> %z
471}
472
473define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
474; CHECK-SD-LABEL: v8i64:
475; CHECK-SD:       // %bb.0:
476; CHECK-SD-NEXT:    uqadd v2.2d, v2.2d, v6.2d
477; CHECK-SD-NEXT:    uqadd v0.2d, v0.2d, v4.2d
478; CHECK-SD-NEXT:    uqadd v1.2d, v1.2d, v5.2d
479; CHECK-SD-NEXT:    uqadd v3.2d, v3.2d, v7.2d
480; CHECK-SD-NEXT:    ret
481;
482; CHECK-GI-LABEL: v8i64:
483; CHECK-GI:       // %bb.0:
484; CHECK-GI-NEXT:    uqadd v0.2d, v0.2d, v4.2d
485; CHECK-GI-NEXT:    uqadd v1.2d, v1.2d, v5.2d
486; CHECK-GI-NEXT:    uqadd v2.2d, v2.2d, v6.2d
487; CHECK-GI-NEXT:    uqadd v3.2d, v3.2d, v7.2d
488; CHECK-GI-NEXT:    ret
489  %z = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> %x, <8 x i64> %y)
490  ret <8 x i64> %z
491}
492
493define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
494; CHECK-SD-LABEL: v2i128:
495; CHECK-SD:       // %bb.0:
496; CHECK-SD-NEXT:    adds x8, x0, x4
497; CHECK-SD-NEXT:    adcs x9, x1, x5
498; CHECK-SD-NEXT:    csinv x0, x8, xzr, lo
499; CHECK-SD-NEXT:    csinv x1, x9, xzr, lo
500; CHECK-SD-NEXT:    adds x8, x2, x6
501; CHECK-SD-NEXT:    adcs x9, x3, x7
502; CHECK-SD-NEXT:    csinv x2, x8, xzr, lo
503; CHECK-SD-NEXT:    csinv x3, x9, xzr, lo
504; CHECK-SD-NEXT:    ret
505;
506; CHECK-GI-LABEL: v2i128:
507; CHECK-GI:       // %bb.0:
508; CHECK-GI-NEXT:    adds x8, x0, x4
509; CHECK-GI-NEXT:    adcs x9, x1, x5
510; CHECK-GI-NEXT:    cset w10, hs
511; CHECK-GI-NEXT:    tst w10, #0x1
512; CHECK-GI-NEXT:    csinv x0, x8, xzr, eq
513; CHECK-GI-NEXT:    csinv x1, x9, xzr, eq
514; CHECK-GI-NEXT:    adds x8, x2, x6
515; CHECK-GI-NEXT:    adcs x9, x3, x7
516; CHECK-GI-NEXT:    cset w10, hs
517; CHECK-GI-NEXT:    tst w10, #0x1
518; CHECK-GI-NEXT:    csinv x2, x8, xzr, eq
519; CHECK-GI-NEXT:    csinv x3, x9, xzr, eq
520; CHECK-GI-NEXT:    ret
521  %z = call <2 x i128> @llvm.uadd.sat.v2i128(<2 x i128> %x, <2 x i128> %y)
522  ret <2 x i128> %z
523}
524