xref: /llvm-project/llvm/test/CodeGen/AArch64/sat-add.ll (revision cc82f1290a1e2157a6c0530d78d8cc84d2b8553d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s
3
4; There are at least 3 potential patterns corresponding to an unsigned saturated add: min, cmp with sum, cmp with not.
5; Test each of those patterns with i8/i16/i32/i64.
6; Test each of those with a constant operand and a variable operand.
7; Test each of those with a 128-bit vector type.
8
9define i8 @unsigned_sat_constant_i8_using_min(i8 %x) {
10; CHECK-LABEL: unsigned_sat_constant_i8_using_min:
11; CHECK:       // %bb.0:
12; CHECK-NEXT:    and w9, w0, #0xff
13; CHECK-NEXT:    mov w8, #-43 // =0xffffffd5
14; CHECK-NEXT:    cmp w9, #213
15; CHECK-NEXT:    csel w8, w0, w8, lo
16; CHECK-NEXT:    add w0, w8, #42
17; CHECK-NEXT:    ret
18  %c = icmp ult i8 %x, -43
19  %s = select i1 %c, i8 %x, i8 -43
20  %r = add i8 %s, 42
21  ret i8 %r
22}
23
24define i8 @unsigned_sat_constant_i8_using_cmp_sum(i8 %x) {
25; CHECK-LABEL: unsigned_sat_constant_i8_using_cmp_sum:
26; CHECK:       // %bb.0:
27; CHECK-NEXT:    and w8, w0, #0xff
28; CHECK-NEXT:    add w8, w8, #42
29; CHECK-NEXT:    tst w8, #0x100
30; CHECK-NEXT:    csinv w0, w8, wzr, eq
31; CHECK-NEXT:    ret
32  %a = add i8 %x, 42
33  %c = icmp ugt i8 %x, %a
34  %r = select i1 %c, i8 -1, i8 %a
35  ret i8 %r
36}
37
38define i8 @unsigned_sat_constant_i8_using_cmp_notval(i8 %x) {
39; CHECK-LABEL: unsigned_sat_constant_i8_using_cmp_notval:
40; CHECK:       // %bb.0:
41; CHECK-NEXT:    and w8, w0, #0xff
42; CHECK-NEXT:    add w9, w0, #42
43; CHECK-NEXT:    cmp w8, #213
44; CHECK-NEXT:    csinv w0, w9, wzr, ls
45; CHECK-NEXT:    ret
46  %a = add i8 %x, 42
47  %c = icmp ugt i8 %x, -43
48  %r = select i1 %c, i8 -1, i8 %a
49  ret i8 %r
50}
51
52define i16 @unsigned_sat_constant_i16_using_min(i16 %x) {
53; CHECK-LABEL: unsigned_sat_constant_i16_using_min:
54; CHECK:       // %bb.0:
55; CHECK-NEXT:    mov w8, #65493 // =0xffd5
56; CHECK-NEXT:    cmp w8, w0, uxth
57; CHECK-NEXT:    mov w8, #-43 // =0xffffffd5
58; CHECK-NEXT:    csel w8, w0, w8, hi
59; CHECK-NEXT:    add w0, w8, #42
60; CHECK-NEXT:    ret
61  %c = icmp ult i16 %x, -43
62  %s = select i1 %c, i16 %x, i16 -43
63  %r = add i16 %s, 42
64  ret i16 %r
65}
66
67define i16 @unsigned_sat_constant_i16_using_cmp_sum(i16 %x) {
68; CHECK-LABEL: unsigned_sat_constant_i16_using_cmp_sum:
69; CHECK:       // %bb.0:
70; CHECK-NEXT:    and w8, w0, #0xffff
71; CHECK-NEXT:    add w8, w8, #42
72; CHECK-NEXT:    tst w8, #0x10000
73; CHECK-NEXT:    csinv w0, w8, wzr, eq
74; CHECK-NEXT:    ret
75  %a = add i16 %x, 42
76  %c = icmp ugt i16 %x, %a
77  %r = select i1 %c, i16 -1, i16 %a
78  ret i16 %r
79}
80
81define i16 @unsigned_sat_constant_i16_using_cmp_notval(i16 %x) {
82; CHECK-LABEL: unsigned_sat_constant_i16_using_cmp_notval:
83; CHECK:       // %bb.0:
84; CHECK-NEXT:    mov w8, #65493 // =0xffd5
85; CHECK-NEXT:    add w9, w0, #42
86; CHECK-NEXT:    cmp w8, w0, uxth
87; CHECK-NEXT:    csinv w0, w9, wzr, hs
88; CHECK-NEXT:    ret
89  %a = add i16 %x, 42
90  %c = icmp ugt i16 %x, -43
91  %r = select i1 %c, i16 -1, i16 %a
92  ret i16 %r
93}
94
95define i32 @unsigned_sat_constant_i32_using_min(i32 %x) {
96; CHECK-LABEL: unsigned_sat_constant_i32_using_min:
97; CHECK:       // %bb.0:
98; CHECK-NEXT:    mov w8, #-43 // =0xffffffd5
99; CHECK-NEXT:    cmn w0, #43
100; CHECK-NEXT:    csel w8, w0, w8, lo
101; CHECK-NEXT:    add w0, w8, #42
102; CHECK-NEXT:    ret
103  %c = icmp ult i32 %x, -43
104  %s = select i1 %c, i32 %x, i32 -43
105  %r = add i32 %s, 42
106  ret i32 %r
107}
108
109define i32 @unsigned_sat_constant_i32_using_cmp_sum(i32 %x) {
110; CHECK-LABEL: unsigned_sat_constant_i32_using_cmp_sum:
111; CHECK:       // %bb.0:
112; CHECK-NEXT:    adds w8, w0, #42
113; CHECK-NEXT:    csinv w0, w8, wzr, lo
114; CHECK-NEXT:    ret
115  %a = add i32 %x, 42
116  %c = icmp ugt i32 %x, %a
117  %r = select i1 %c, i32 -1, i32 %a
118  ret i32 %r
119}
120
121define i32 @unsigned_sat_constant_i32_using_cmp_notval(i32 %x) {
122; CHECK-LABEL: unsigned_sat_constant_i32_using_cmp_notval:
123; CHECK:       // %bb.0:
124; CHECK-NEXT:    adds w8, w0, #42
125; CHECK-NEXT:    csinv w0, w8, wzr, lo
126; CHECK-NEXT:    ret
127  %a = add i32 %x, 42
128  %c = icmp ugt i32 %x, -43
129  %r = select i1 %c, i32 -1, i32 %a
130  ret i32 %r
131}
132
133define i64 @unsigned_sat_constant_i64_using_min(i64 %x) {
134; CHECK-LABEL: unsigned_sat_constant_i64_using_min:
135; CHECK:       // %bb.0:
136; CHECK-NEXT:    mov x8, #-43 // =0xffffffffffffffd5
137; CHECK-NEXT:    cmn x0, #43
138; CHECK-NEXT:    csel x8, x0, x8, lo
139; CHECK-NEXT:    add x0, x8, #42
140; CHECK-NEXT:    ret
141  %c = icmp ult i64 %x, -43
142  %s = select i1 %c, i64 %x, i64 -43
143  %r = add i64 %s, 42
144  ret i64 %r
145}
146
147define i64 @unsigned_sat_constant_i64_using_cmp_sum(i64 %x) {
148; CHECK-LABEL: unsigned_sat_constant_i64_using_cmp_sum:
149; CHECK:       // %bb.0:
150; CHECK-NEXT:    adds x8, x0, #42
151; CHECK-NEXT:    csinv x0, x8, xzr, lo
152; CHECK-NEXT:    ret
153  %a = add i64 %x, 42
154  %c = icmp ugt i64 %x, %a
155  %r = select i1 %c, i64 -1, i64 %a
156  ret i64 %r
157}
158
159define i64 @unsigned_sat_constant_i64_using_cmp_notval(i64 %x) {
160; CHECK-LABEL: unsigned_sat_constant_i64_using_cmp_notval:
161; CHECK:       // %bb.0:
162; CHECK-NEXT:    adds x8, x0, #42
163; CHECK-NEXT:    csinv x0, x8, xzr, lo
164; CHECK-NEXT:    ret
165  %a = add i64 %x, 42
166  %c = icmp ugt i64 %x, -43
167  %r = select i1 %c, i64 -1, i64 %a
168  ret i64 %r
169}
170
171define i8 @unsigned_sat_variable_i8_using_min(i8 %x, i8 %y) {
172; CHECK-LABEL: unsigned_sat_variable_i8_using_min:
173; CHECK:       // %bb.0:
174; CHECK-NEXT:    and w8, w0, #0xff
175; CHECK-NEXT:    mvn w9, w1
176; CHECK-NEXT:    cmp w8, w9, uxtb
177; CHECK-NEXT:    csinv w8, w0, w1, lo
178; CHECK-NEXT:    add w0, w8, w1
179; CHECK-NEXT:    ret
180  %noty = xor i8 %y, -1
181  %c = icmp ult i8 %x, %noty
182  %s = select i1 %c, i8 %x, i8 %noty
183  %r = add i8 %s, %y
184  ret i8 %r
185}
186
187define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) {
188; CHECK-LABEL: unsigned_sat_variable_i8_using_cmp_sum:
189; CHECK:       // %bb.0:
190; CHECK-NEXT:    and w8, w0, #0xff
191; CHECK-NEXT:    add w8, w8, w1, uxtb
192; CHECK-NEXT:    tst w8, #0x100
193; CHECK-NEXT:    csinv w0, w8, wzr, eq
194; CHECK-NEXT:    ret
195  %a = add i8 %x, %y
196  %c = icmp ugt i8 %x, %a
197  %r = select i1 %c, i8 -1, i8 %a
198  ret i8 %r
199}
200
201define i8 @unsigned_sat_variable_i8_using_cmp_notval(i8 %x, i8 %y) {
202; CHECK-LABEL: unsigned_sat_variable_i8_using_cmp_notval:
203; CHECK:       // %bb.0:
204; CHECK-NEXT:    and w8, w1, #0xff
205; CHECK-NEXT:    add w9, w0, w1
206; CHECK-NEXT:    add w8, w8, w0, uxtb
207; CHECK-NEXT:    tst w8, #0x100
208; CHECK-NEXT:    csinv w0, w9, wzr, eq
209; CHECK-NEXT:    ret
210  %noty = xor i8 %y, -1
211  %a = add i8 %x, %y
212  %c = icmp ugt i8 %x, %noty
213  %r = select i1 %c, i8 -1, i8 %a
214  ret i8 %r
215}
216
217define i16 @unsigned_sat_variable_i16_using_min(i16 %x, i16 %y) {
218; CHECK-LABEL: unsigned_sat_variable_i16_using_min:
219; CHECK:       // %bb.0:
220; CHECK-NEXT:    and w8, w0, #0xffff
221; CHECK-NEXT:    mvn w9, w1
222; CHECK-NEXT:    cmp w8, w9, uxth
223; CHECK-NEXT:    csinv w8, w0, w1, lo
224; CHECK-NEXT:    add w0, w8, w1
225; CHECK-NEXT:    ret
226  %noty = xor i16 %y, -1
227  %c = icmp ult i16 %x, %noty
228  %s = select i1 %c, i16 %x, i16 %noty
229  %r = add i16 %s, %y
230  ret i16 %r
231}
232
233define i16 @unsigned_sat_variable_i16_using_cmp_sum(i16 %x, i16 %y) {
234; CHECK-LABEL: unsigned_sat_variable_i16_using_cmp_sum:
235; CHECK:       // %bb.0:
236; CHECK-NEXT:    and w8, w0, #0xffff
237; CHECK-NEXT:    add w8, w8, w1, uxth
238; CHECK-NEXT:    tst w8, #0x10000
239; CHECK-NEXT:    csinv w0, w8, wzr, eq
240; CHECK-NEXT:    ret
241  %a = add i16 %x, %y
242  %c = icmp ugt i16 %x, %a
243  %r = select i1 %c, i16 -1, i16 %a
244  ret i16 %r
245}
246
247define i16 @unsigned_sat_variable_i16_using_cmp_notval(i16 %x, i16 %y) {
248; CHECK-LABEL: unsigned_sat_variable_i16_using_cmp_notval:
249; CHECK:       // %bb.0:
250; CHECK-NEXT:    and w8, w1, #0xffff
251; CHECK-NEXT:    add w9, w0, w1
252; CHECK-NEXT:    add w8, w8, w0, uxth
253; CHECK-NEXT:    tst w8, #0x10000
254; CHECK-NEXT:    csinv w0, w9, wzr, eq
255; CHECK-NEXT:    ret
256  %noty = xor i16 %y, -1
257  %a = add i16 %x, %y
258  %c = icmp ugt i16 %x, %noty
259  %r = select i1 %c, i16 -1, i16 %a
260  ret i16 %r
261}
262
263define i32 @unsigned_sat_variable_i32_using_min(i32 %x, i32 %y) {
264; CHECK-LABEL: unsigned_sat_variable_i32_using_min:
265; CHECK:       // %bb.0:
266; CHECK-NEXT:    mvn w8, w1
267; CHECK-NEXT:    cmp w0, w8
268; CHECK-NEXT:    csinv w8, w0, w1, lo
269; CHECK-NEXT:    add w0, w8, w1
270; CHECK-NEXT:    ret
271  %noty = xor i32 %y, -1
272  %c = icmp ult i32 %x, %noty
273  %s = select i1 %c, i32 %x, i32 %noty
274  %r = add i32 %s, %y
275  ret i32 %r
276}
277
278define i32 @unsigned_sat_variable_i32_using_cmp_sum(i32 %x, i32 %y) {
279; CHECK-LABEL: unsigned_sat_variable_i32_using_cmp_sum:
280; CHECK:       // %bb.0:
281; CHECK-NEXT:    adds w8, w0, w1
282; CHECK-NEXT:    csinv w0, w8, wzr, lo
283; CHECK-NEXT:    ret
284  %a = add i32 %x, %y
285  %c = icmp ugt i32 %x, %a
286  %r = select i1 %c, i32 -1, i32 %a
287  ret i32 %r
288}
289
290define i32 @unsigned_sat_variable_i32_using_cmp_notval(i32 %x, i32 %y) {
291; CHECK-LABEL: unsigned_sat_variable_i32_using_cmp_notval:
292; CHECK:       // %bb.0:
293; CHECK-NEXT:    add w8, w0, w1
294; CHECK-NEXT:    cmn w1, w0
295; CHECK-NEXT:    csinv w0, w8, wzr, lo
296; CHECK-NEXT:    ret
297  %noty = xor i32 %y, -1
298  %a = add i32 %x, %y
299  %c = icmp ugt i32 %x, %noty
300  %r = select i1 %c, i32 -1, i32 %a
301  ret i32 %r
302}
303
304define i64 @unsigned_sat_variable_i64_using_min(i64 %x, i64 %y) {
305; CHECK-LABEL: unsigned_sat_variable_i64_using_min:
306; CHECK:       // %bb.0:
307; CHECK-NEXT:    mvn x8, x1
308; CHECK-NEXT:    cmp x0, x8
309; CHECK-NEXT:    csinv x8, x0, x1, lo
310; CHECK-NEXT:    add x0, x8, x1
311; CHECK-NEXT:    ret
312  %noty = xor i64 %y, -1
313  %c = icmp ult i64 %x, %noty
314  %s = select i1 %c, i64 %x, i64 %noty
315  %r = add i64 %s, %y
316  ret i64 %r
317}
318
319define i64 @unsigned_sat_variable_i64_using_cmp_sum(i64 %x, i64 %y) {
320; CHECK-LABEL: unsigned_sat_variable_i64_using_cmp_sum:
321; CHECK:       // %bb.0:
322; CHECK-NEXT:    adds x8, x0, x1
323; CHECK-NEXT:    csinv x0, x8, xzr, lo
324; CHECK-NEXT:    ret
325  %a = add i64 %x, %y
326  %c = icmp ugt i64 %x, %a
327  %r = select i1 %c, i64 -1, i64 %a
328  ret i64 %r
329}
330
331define i64 @unsigned_sat_variable_i64_using_cmp_notval(i64 %x, i64 %y) {
332; CHECK-LABEL: unsigned_sat_variable_i64_using_cmp_notval:
333; CHECK:       // %bb.0:
334; CHECK-NEXT:    add x8, x0, x1
335; CHECK-NEXT:    cmn x1, x0
336; CHECK-NEXT:    csinv x0, x8, xzr, lo
337; CHECK-NEXT:    ret
338  %noty = xor i64 %y, -1
339  %a = add i64 %x, %y
340  %c = icmp ugt i64 %x, %noty
341  %r = select i1 %c, i64 -1, i64 %a
342  ret i64 %r
343}
344
345define <16 x i8> @unsigned_sat_constant_v16i8_using_min(<16 x i8> %x) {
346; CHECK-LABEL: unsigned_sat_constant_v16i8_using_min:
347; CHECK:       // %bb.0:
348; CHECK-NEXT:    movi v1.16b, #213
349; CHECK-NEXT:    movi v2.16b, #42
350; CHECK-NEXT:    umin v0.16b, v0.16b, v1.16b
351; CHECK-NEXT:    add v0.16b, v0.16b, v2.16b
352; CHECK-NEXT:    ret
353  %c = icmp ult <16 x i8> %x, <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43>
354  %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43>
355  %r = add <16 x i8> %s, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
356  ret <16 x i8> %r
357}
358
359define <16 x i8> @unsigned_sat_constant_v16i8_using_cmp_sum(<16 x i8> %x) {
360; CHECK-LABEL: unsigned_sat_constant_v16i8_using_cmp_sum:
361; CHECK:       // %bb.0:
362; CHECK-NEXT:    movi v1.16b, #42
363; CHECK-NEXT:    uqadd v0.16b, v0.16b, v1.16b
364; CHECK-NEXT:    ret
365  %a = add <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
366  %c = icmp ugt <16 x i8> %x, %a
367  %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a
368  ret <16 x i8> %r
369}
370
371define <16 x i8> @unsigned_sat_constant_v16i8_using_cmp_notval(<16 x i8> %x) {
372; CHECK-LABEL: unsigned_sat_constant_v16i8_using_cmp_notval:
373; CHECK:       // %bb.0:
374; CHECK-NEXT:    movi v1.16b, #42
375; CHECK-NEXT:    uqadd v0.16b, v0.16b, v1.16b
376; CHECK-NEXT:    ret
377  %a = add <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
378  %c = icmp ugt <16 x i8> %x, <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43>
379  %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a
380  ret <16 x i8> %r
381}
382
383define <8 x i16> @unsigned_sat_constant_v8i16_using_min(<8 x i16> %x) {
384; CHECK-LABEL: unsigned_sat_constant_v8i16_using_min:
385; CHECK:       // %bb.0:
386; CHECK-NEXT:    mvni v1.8h, #42
387; CHECK-NEXT:    movi v2.8h, #42
388; CHECK-NEXT:    umin v0.8h, v0.8h, v1.8h
389; CHECK-NEXT:    add v0.8h, v0.8h, v2.8h
390; CHECK-NEXT:    ret
391  %c = icmp ult <8 x i16> %x, <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43>
392  %s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43>
393  %r = add <8 x i16> %s, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
394  ret <8 x i16> %r
395}
396
397define <8 x i16> @unsigned_sat_constant_v8i16_using_cmp_sum(<8 x i16> %x) {
398; CHECK-LABEL: unsigned_sat_constant_v8i16_using_cmp_sum:
399; CHECK:       // %bb.0:
400; CHECK-NEXT:    movi v1.8h, #42
401; CHECK-NEXT:    uqadd v0.8h, v0.8h, v1.8h
402; CHECK-NEXT:    ret
403  %a = add <8 x i16> %x, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
404  %c = icmp ugt <8 x i16> %x, %a
405  %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a
406  ret <8 x i16> %r
407}
408
409define <8 x i16> @unsigned_sat_constant_v8i16_using_cmp_notval(<8 x i16> %x) {
410; CHECK-LABEL: unsigned_sat_constant_v8i16_using_cmp_notval:
411; CHECK:       // %bb.0:
412; CHECK-NEXT:    movi v1.8h, #42
413; CHECK-NEXT:    uqadd v0.8h, v0.8h, v1.8h
414; CHECK-NEXT:    ret
415  %a = add <8 x i16> %x, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
416  %c = icmp ugt <8 x i16> %x, <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43>
417  %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a
418  ret <8 x i16> %r
419}
420
421define <4 x i32> @unsigned_sat_constant_v4i32_using_min(<4 x i32> %x) {
422; CHECK-LABEL: unsigned_sat_constant_v4i32_using_min:
423; CHECK:       // %bb.0:
424; CHECK-NEXT:    mvni v1.4s, #42
425; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
426; CHECK-NEXT:    movi v1.4s, #42
427; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
428; CHECK-NEXT:    ret
429  %c = icmp ult <4 x i32> %x, <i32 -43, i32 -43, i32 -43, i32 -43>
430  %s = select <4 x i1> %c, <4 x i32> %x, <4 x i32> <i32 -43, i32 -43, i32 -43, i32 -43>
431  %r = add <4 x i32> %s, <i32 42, i32 42, i32 42, i32 42>
432  ret <4 x i32> %r
433}
434
435define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_sum(<4 x i32> %x) {
436; CHECK-LABEL: unsigned_sat_constant_v4i32_using_cmp_sum:
437; CHECK:       // %bb.0:
438; CHECK-NEXT:    movi v1.4s, #42
439; CHECK-NEXT:    uqadd v0.4s, v0.4s, v1.4s
440; CHECK-NEXT:    ret
441  %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
442  %c = icmp ugt <4 x i32> %x, %a
443  %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
444  ret <4 x i32> %r
445}
446
447define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_notval(<4 x i32> %x) {
448; CHECK-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval:
449; CHECK:       // %bb.0:
450; CHECK-NEXT:    movi v1.4s, #42
451; CHECK-NEXT:    uqadd v0.4s, v0.4s, v1.4s
452; CHECK-NEXT:    ret
453  %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
454  %c = icmp ugt <4 x i32> %x, <i32 -43, i32 -43, i32 -43, i32 -43>
455  %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
456  ret <4 x i32> %r
457}
458
459define <2 x i64> @unsigned_sat_constant_v2i64_using_min(<2 x i64> %x) {
460; CHECK-LABEL: unsigned_sat_constant_v2i64_using_min:
461; CHECK:       // %bb.0:
462; CHECK-NEXT:    mov x8, #-43 // =0xffffffffffffffd5
463; CHECK-NEXT:    dup v1.2d, x8
464; CHECK-NEXT:    mov w8, #42 // =0x2a
465; CHECK-NEXT:    cmhi v2.2d, v1.2d, v0.2d
466; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
467; CHECK-NEXT:    dup v1.2d, x8
468; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
469; CHECK-NEXT:    ret
470  %c = icmp ult <2 x i64> %x, <i64 -43, i64 -43>
471  %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> <i64 -43, i64 -43>
472  %r = add <2 x i64> %s, <i64 42, i64 42>
473  ret <2 x i64> %r
474}
475
476define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_sum(<2 x i64> %x) {
477; CHECK-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum:
478; CHECK:       // %bb.0:
479; CHECK-NEXT:    mov w8, #42 // =0x2a
480; CHECK-NEXT:    dup v1.2d, x8
481; CHECK-NEXT:    uqadd v0.2d, v0.2d, v1.2d
482; CHECK-NEXT:    ret
483  %a = add <2 x i64> %x, <i64 42, i64 42>
484  %c = icmp ugt <2 x i64> %x, %a
485  %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a
486  ret <2 x i64> %r
487}
488
489define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_notval(<2 x i64> %x) {
490; CHECK-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval:
491; CHECK:       // %bb.0:
492; CHECK-NEXT:    mov w8, #42 // =0x2a
493; CHECK-NEXT:    dup v1.2d, x8
494; CHECK-NEXT:    uqadd v0.2d, v0.2d, v1.2d
495; CHECK-NEXT:    ret
496  %a = add <2 x i64> %x, <i64 42, i64 42>
497  %c = icmp ugt <2 x i64> %x, <i64 -43, i64 -43>
498  %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a
499  ret <2 x i64> %r
500}
501
502define <16 x i8> @unsigned_sat_variable_v16i8_using_min(<16 x i8> %x, <16 x i8> %y) {
503; CHECK-LABEL: unsigned_sat_variable_v16i8_using_min:
504; CHECK:       // %bb.0:
505; CHECK-NEXT:    mvn v2.16b, v1.16b
506; CHECK-NEXT:    umin v0.16b, v0.16b, v2.16b
507; CHECK-NEXT:    add v0.16b, v0.16b, v1.16b
508; CHECK-NEXT:    ret
509  %noty = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
510  %c = icmp ult <16 x i8> %x, %noty
511  %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %noty
512  %r = add <16 x i8> %s, %y
513  ret <16 x i8> %r
514}
515
516define <16 x i8> @unsigned_sat_variable_v16i8_using_cmp_sum(<16 x i8> %x, <16 x i8> %y) {
517; CHECK-LABEL: unsigned_sat_variable_v16i8_using_cmp_sum:
518; CHECK:       // %bb.0:
519; CHECK-NEXT:    uqadd v0.16b, v0.16b, v1.16b
520; CHECK-NEXT:    ret
521  %a = add <16 x i8> %x, %y
522  %c = icmp ugt <16 x i8> %x, %a
523  %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a
524  ret <16 x i8> %r
525}
526
527define <16 x i8> @unsigned_sat_variable_v16i8_using_cmp_notval(<16 x i8> %x, <16 x i8> %y) {
528; CHECK-LABEL: unsigned_sat_variable_v16i8_using_cmp_notval:
529; CHECK:       // %bb.0:
530; CHECK-NEXT:    mvn v2.16b, v1.16b
531; CHECK-NEXT:    add v1.16b, v0.16b, v1.16b
532; CHECK-NEXT:    cmhi v0.16b, v0.16b, v2.16b
533; CHECK-NEXT:    orr v0.16b, v1.16b, v0.16b
534; CHECK-NEXT:    ret
535  %noty = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
536  %a = add <16 x i8> %x, %y
537  %c = icmp ugt <16 x i8> %x, %noty
538  %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a
539  ret <16 x i8> %r
540}
541
542define <8 x i16> @unsigned_sat_variable_v8i16_using_min(<8 x i16> %x, <8 x i16> %y) {
543; CHECK-LABEL: unsigned_sat_variable_v8i16_using_min:
544; CHECK:       // %bb.0:
545; CHECK-NEXT:    mvn v2.16b, v1.16b
546; CHECK-NEXT:    umin v0.8h, v0.8h, v2.8h
547; CHECK-NEXT:    add v0.8h, v0.8h, v1.8h
548; CHECK-NEXT:    ret
549  %noty = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
550  %c = icmp ult <8 x i16> %x, %noty
551  %s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %noty
552  %r = add <8 x i16> %s, %y
553  ret <8 x i16> %r
554}
555
556define <8 x i16> @unsigned_sat_variable_v8i16_using_cmp_sum(<8 x i16> %x, <8 x i16> %y) {
557; CHECK-LABEL: unsigned_sat_variable_v8i16_using_cmp_sum:
558; CHECK:       // %bb.0:
559; CHECK-NEXT:    uqadd v0.8h, v0.8h, v1.8h
560; CHECK-NEXT:    ret
561  %a = add <8 x i16> %x, %y
562  %c = icmp ugt <8 x i16> %x, %a
563  %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a
564  ret <8 x i16> %r
565}
566
567define <8 x i16> @unsigned_sat_variable_v8i16_using_cmp_notval(<8 x i16> %x, <8 x i16> %y) {
568; CHECK-LABEL: unsigned_sat_variable_v8i16_using_cmp_notval:
569; CHECK:       // %bb.0:
570; CHECK-NEXT:    mvn v2.16b, v1.16b
571; CHECK-NEXT:    add v1.8h, v0.8h, v1.8h
572; CHECK-NEXT:    cmhi v0.8h, v0.8h, v2.8h
573; CHECK-NEXT:    orr v0.16b, v1.16b, v0.16b
574; CHECK-NEXT:    ret
575  %noty = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
576  %a = add <8 x i16> %x, %y
577  %c = icmp ugt <8 x i16> %x, %noty
578  %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a
579  ret <8 x i16> %r
580}
581
582define <4 x i32> @unsigned_sat_variable_v4i32_using_min(<4 x i32> %x, <4 x i32> %y) {
583; CHECK-LABEL: unsigned_sat_variable_v4i32_using_min:
584; CHECK:       // %bb.0:
585; CHECK-NEXT:    mvn v2.16b, v1.16b
586; CHECK-NEXT:    umin v0.4s, v0.4s, v2.4s
587; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
588; CHECK-NEXT:    ret
589  %noty = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
590  %c = icmp ult <4 x i32> %x, %noty
591  %s = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %noty
592  %r = add <4 x i32> %s, %y
593  ret <4 x i32> %r
594}
595
596define <4 x i32> @unsigned_sat_variable_v4i32_using_cmp_sum(<4 x i32> %x, <4 x i32> %y) {
597; CHECK-LABEL: unsigned_sat_variable_v4i32_using_cmp_sum:
598; CHECK:       // %bb.0:
599; CHECK-NEXT:    uqadd v0.4s, v0.4s, v1.4s
600; CHECK-NEXT:    ret
601  %a = add <4 x i32> %x, %y
602  %c = icmp ugt <4 x i32> %x, %a
603  %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
604  ret <4 x i32> %r
605}
606
607define <4 x i32> @unsigned_sat_variable_v4i32_using_cmp_notval(<4 x i32> %x, <4 x i32> %y) {
608; CHECK-LABEL: unsigned_sat_variable_v4i32_using_cmp_notval:
609; CHECK:       // %bb.0:
610; CHECK-NEXT:    mvn v2.16b, v1.16b
611; CHECK-NEXT:    add v1.4s, v0.4s, v1.4s
612; CHECK-NEXT:    cmhi v0.4s, v0.4s, v2.4s
613; CHECK-NEXT:    orr v0.16b, v1.16b, v0.16b
614; CHECK-NEXT:    ret
615  %noty = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
616  %a = add <4 x i32> %x, %y
617  %c = icmp ugt <4 x i32> %x, %noty
618  %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
619  ret <4 x i32> %r
620}
621
622define <2 x i64> @unsigned_sat_variable_v2i64_using_min(<2 x i64> %x, <2 x i64> %y) {
623; CHECK-LABEL: unsigned_sat_variable_v2i64_using_min:
624; CHECK:       // %bb.0:
625; CHECK-NEXT:    mvn v2.16b, v1.16b
626; CHECK-NEXT:    cmhi v3.2d, v2.2d, v0.2d
627; CHECK-NEXT:    bif v0.16b, v2.16b, v3.16b
628; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
629; CHECK-NEXT:    ret
630  %noty = xor <2 x i64> %y, <i64 -1, i64 -1>
631  %c = icmp ult <2 x i64> %x, %noty
632  %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> %noty
633  %r = add <2 x i64> %s, %y
634  ret <2 x i64> %r
635}
636
637define <2 x i64> @unsigned_sat_variable_v2i64_using_cmp_sum(<2 x i64> %x, <2 x i64> %y) {
638; CHECK-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum:
639; CHECK:       // %bb.0:
640; CHECK-NEXT:    uqadd v0.2d, v0.2d, v1.2d
641; CHECK-NEXT:    ret
642  %a = add <2 x i64> %x, %y
643  %c = icmp ugt <2 x i64> %x, %a
644  %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a
645  ret <2 x i64> %r
646}
647
648define <2 x i64> @unsigned_sat_variable_v2i64_using_cmp_notval(<2 x i64> %x, <2 x i64> %y) {
649; CHECK-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval:
650; CHECK:       // %bb.0:
651; CHECK-NEXT:    mvn v2.16b, v1.16b
652; CHECK-NEXT:    add v1.2d, v0.2d, v1.2d
653; CHECK-NEXT:    cmhi v0.2d, v0.2d, v2.2d
654; CHECK-NEXT:    orr v0.16b, v1.16b, v0.16b
655; CHECK-NEXT:    ret
656  %noty = xor <2 x i64> %y, <i64 -1, i64 -1>
657  %a = add <2 x i64> %x, %y
658  %c = icmp ugt <2 x i64> %x, %noty
659  %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a
660  ret <2 x i64> %r
661}
662
663