xref: /llvm-project/llvm/test/CodeGen/X86/sat-add.ll (revision f0b3b6d15b2c0ee2cff2dd31dc075adb5d9a4ff7)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2   | FileCheck %s --check-prefixes=ANY,SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefixes=ANY,SSE,SSE4,SSE41
4; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s --check-prefixes=ANY,SSE,SSE4,SSE42
5; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2   | FileCheck %s --check-prefixes=ANY,AVX,AVX2
6; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl | FileCheck %s --check-prefixes=ANY,AVX,AVX512
7
8; There are at least 3 potential patterns corresponding to an unsigned saturated add: min, cmp with sum, cmp with not.
9; Test each of those patterns with i8/i16/i32/i64.
10; Test each of those with a constant operand and a variable operand.
11; Test each of those with a 128-bit vector type.
12
13define i8 @unsigned_sat_constant_i8_using_min(i8 %x) {
14; ANY-LABEL: unsigned_sat_constant_i8_using_min:
15; ANY:       # %bb.0:
16; ANY-NEXT:    cmpb $-43, %dil
17; ANY-NEXT:    movl $213, %eax
18; ANY-NEXT:    cmovbl %edi, %eax
19; ANY-NEXT:    addb $42, %al
20; ANY-NEXT:    # kill: def $al killed $al killed $eax
21; ANY-NEXT:    retq
22  %c = icmp ult i8 %x, -43
23  %s = select i1 %c, i8 %x, i8 -43
24  %r = add i8 %s, 42
25  ret i8 %r
26}
27
28define i8 @unsigned_sat_constant_i8_using_cmp_sum(i8 %x) {
29; ANY-LABEL: unsigned_sat_constant_i8_using_cmp_sum:
30; ANY:       # %bb.0:
31; ANY-NEXT:    addb $42, %dil
32; ANY-NEXT:    movzbl %dil, %ecx
33; ANY-NEXT:    movl $255, %eax
34; ANY-NEXT:    cmovael %ecx, %eax
35; ANY-NEXT:    # kill: def $al killed $al killed $eax
36; ANY-NEXT:    retq
37  %a = add i8 %x, 42
38  %c = icmp ugt i8 %x, %a
39  %r = select i1 %c, i8 -1, i8 %a
40  ret i8 %r
41}
42
43define i8 @unsigned_sat_constant_i8_using_cmp_notval(i8 %x) {
44; ANY-LABEL: unsigned_sat_constant_i8_using_cmp_notval:
45; ANY:       # %bb.0:
46; ANY-NEXT:    addb $42, %dil
47; ANY-NEXT:    movzbl %dil, %ecx
48; ANY-NEXT:    movl $255, %eax
49; ANY-NEXT:    cmovael %ecx, %eax
50; ANY-NEXT:    # kill: def $al killed $al killed $eax
51; ANY-NEXT:    retq
52  %a = add i8 %x, 42
53  %c = icmp ugt i8 %x, -43
54  %r = select i1 %c, i8 -1, i8 %a
55  ret i8 %r
56}
57
58define i16 @unsigned_sat_constant_i16_using_min(i16 %x) {
59; ANY-LABEL: unsigned_sat_constant_i16_using_min:
60; ANY:       # %bb.0:
61; ANY-NEXT:    cmpw $-43, %di
62; ANY-NEXT:    movl $65493, %eax # imm = 0xFFD5
63; ANY-NEXT:    cmovbl %edi, %eax
64; ANY-NEXT:    addl $42, %eax
65; ANY-NEXT:    # kill: def $ax killed $ax killed $eax
66; ANY-NEXT:    retq
67  %c = icmp ult i16 %x, -43
68  %s = select i1 %c, i16 %x, i16 -43
69  %r = add i16 %s, 42
70  ret i16 %r
71}
72
73define i16 @unsigned_sat_constant_i16_using_cmp_sum(i16 %x) {
74; ANY-LABEL: unsigned_sat_constant_i16_using_cmp_sum:
75; ANY:       # %bb.0:
76; ANY-NEXT:    addw $42, %di
77; ANY-NEXT:    movl $65535, %eax # imm = 0xFFFF
78; ANY-NEXT:    cmovael %edi, %eax
79; ANY-NEXT:    # kill: def $ax killed $ax killed $eax
80; ANY-NEXT:    retq
81  %a = add i16 %x, 42
82  %c = icmp ugt i16 %x, %a
83  %r = select i1 %c, i16 -1, i16 %a
84  ret i16 %r
85}
86
87define i16 @unsigned_sat_constant_i16_using_cmp_notval(i16 %x) {
88; ANY-LABEL: unsigned_sat_constant_i16_using_cmp_notval:
89; ANY:       # %bb.0:
90; ANY-NEXT:    addw $42, %di
91; ANY-NEXT:    movl $65535, %eax # imm = 0xFFFF
92; ANY-NEXT:    cmovael %edi, %eax
93; ANY-NEXT:    # kill: def $ax killed $ax killed $eax
94; ANY-NEXT:    retq
95  %a = add i16 %x, 42
96  %c = icmp ugt i16 %x, -43
97  %r = select i1 %c, i16 -1, i16 %a
98  ret i16 %r
99}
100
101define i32 @unsigned_sat_constant_i32_using_min(i32 %x) {
102; ANY-LABEL: unsigned_sat_constant_i32_using_min:
103; ANY:       # %bb.0:
104; ANY-NEXT:    cmpl $-43, %edi
105; ANY-NEXT:    movl $-43, %eax
106; ANY-NEXT:    cmovbl %edi, %eax
107; ANY-NEXT:    addl $42, %eax
108; ANY-NEXT:    retq
109  %c = icmp ult i32 %x, -43
110  %s = select i1 %c, i32 %x, i32 -43
111  %r = add i32 %s, 42
112  ret i32 %r
113}
114
115define i32 @unsigned_sat_constant_i32_using_cmp_sum(i32 %x) {
116; ANY-LABEL: unsigned_sat_constant_i32_using_cmp_sum:
117; ANY:       # %bb.0:
118; ANY-NEXT:    addl $42, %edi
119; ANY-NEXT:    movl $-1, %eax
120; ANY-NEXT:    cmovael %edi, %eax
121; ANY-NEXT:    retq
122  %a = add i32 %x, 42
123  %c = icmp ugt i32 %x, %a
124  %r = select i1 %c, i32 -1, i32 %a
125  ret i32 %r
126}
127
128define i32 @unsigned_sat_constant_i32_using_cmp_notval(i32 %x) {
129; ANY-LABEL: unsigned_sat_constant_i32_using_cmp_notval:
130; ANY:       # %bb.0:
131; ANY-NEXT:    addl $42, %edi
132; ANY-NEXT:    movl $-1, %eax
133; ANY-NEXT:    cmovael %edi, %eax
134; ANY-NEXT:    retq
135  %a = add i32 %x, 42
136  %c = icmp ugt i32 %x, -43
137  %r = select i1 %c, i32 -1, i32 %a
138  ret i32 %r
139}
140
141define i64 @unsigned_sat_constant_i64_using_min(i64 %x) {
142; ANY-LABEL: unsigned_sat_constant_i64_using_min:
143; ANY:       # %bb.0:
144; ANY-NEXT:    cmpq $-43, %rdi
145; ANY-NEXT:    movq $-43, %rax
146; ANY-NEXT:    cmovbq %rdi, %rax
147; ANY-NEXT:    addq $42, %rax
148; ANY-NEXT:    retq
149  %c = icmp ult i64 %x, -43
150  %s = select i1 %c, i64 %x, i64 -43
151  %r = add i64 %s, 42
152  ret i64 %r
153}
154
155define i64 @unsigned_sat_constant_i64_using_cmp_sum(i64 %x) {
156; ANY-LABEL: unsigned_sat_constant_i64_using_cmp_sum:
157; ANY:       # %bb.0:
158; ANY-NEXT:    addq $42, %rdi
159; ANY-NEXT:    movq $-1, %rax
160; ANY-NEXT:    cmovaeq %rdi, %rax
161; ANY-NEXT:    retq
162  %a = add i64 %x, 42
163  %c = icmp ugt i64 %x, %a
164  %r = select i1 %c, i64 -1, i64 %a
165  ret i64 %r
166}
167
168define i64 @unsigned_sat_constant_i64_using_cmp_notval(i64 %x) {
169; ANY-LABEL: unsigned_sat_constant_i64_using_cmp_notval:
170; ANY:       # %bb.0:
171; ANY-NEXT:    addq $42, %rdi
172; ANY-NEXT:    movq $-1, %rax
173; ANY-NEXT:    cmovaeq %rdi, %rax
174; ANY-NEXT:    retq
175  %a = add i64 %x, 42
176  %c = icmp ugt i64 %x, -43
177  %r = select i1 %c, i64 -1, i64 %a
178  ret i64 %r
179}
180
181define i8 @unsigned_sat_variable_i8_using_min(i8 %x, i8 %y) {
182; ANY-LABEL: unsigned_sat_variable_i8_using_min:
183; ANY:       # %bb.0:
184; ANY-NEXT:    movl %esi, %eax
185; ANY-NEXT:    notb %al
186; ANY-NEXT:    cmpb %al, %dil
187; ANY-NEXT:    movzbl %al, %eax
188; ANY-NEXT:    cmovbl %edi, %eax
189; ANY-NEXT:    addb %sil, %al
190; ANY-NEXT:    # kill: def $al killed $al killed $eax
191; ANY-NEXT:    retq
192  %noty = xor i8 %y, -1
193  %c = icmp ult i8 %x, %noty
194  %s = select i1 %c, i8 %x, i8 %noty
195  %r = add i8 %s, %y
196  ret i8 %r
197}
198
199define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) {
200; ANY-LABEL: unsigned_sat_variable_i8_using_cmp_sum:
201; ANY:       # %bb.0:
202; ANY-NEXT:    addb %sil, %dil
203; ANY-NEXT:    movzbl %dil, %ecx
204; ANY-NEXT:    movl $255, %eax
205; ANY-NEXT:    cmovael %ecx, %eax
206; ANY-NEXT:    # kill: def $al killed $al killed $eax
207; ANY-NEXT:    retq
208  %a = add i8 %x, %y
209  %c = icmp ugt i8 %x, %a
210  %r = select i1 %c, i8 -1, i8 %a
211  ret i8 %r
212}
213
214define i8 @unsigned_sat_variable_i8_using_cmp_notval(i8 %x, i8 %y) {
215; ANY-LABEL: unsigned_sat_variable_i8_using_cmp_notval:
216; ANY:       # %bb.0:
217; ANY-NEXT:    addb %dil, %sil
218; ANY-NEXT:    movzbl %sil, %ecx
219; ANY-NEXT:    movl $255, %eax
220; ANY-NEXT:    cmovael %ecx, %eax
221; ANY-NEXT:    # kill: def $al killed $al killed $eax
222; ANY-NEXT:    retq
223  %noty = xor i8 %y, -1
224  %a = add i8 %x, %y
225  %c = icmp ugt i8 %x, %noty
226  %r = select i1 %c, i8 -1, i8 %a
227  ret i8 %r
228}
229
230define i16 @unsigned_sat_variable_i16_using_min(i16 %x, i16 %y) {
231; ANY-LABEL: unsigned_sat_variable_i16_using_min:
232; ANY:       # %bb.0:
233; ANY-NEXT:    movl %esi, %eax
234; ANY-NEXT:    notl %eax
235; ANY-NEXT:    cmpw %ax, %di
236; ANY-NEXT:    cmovbl %edi, %eax
237; ANY-NEXT:    addl %esi, %eax
238; ANY-NEXT:    # kill: def $ax killed $ax killed $eax
239; ANY-NEXT:    retq
240  %noty = xor i16 %y, -1
241  %c = icmp ult i16 %x, %noty
242  %s = select i1 %c, i16 %x, i16 %noty
243  %r = add i16 %s, %y
244  ret i16 %r
245}
246
247define i16 @unsigned_sat_variable_i16_using_cmp_sum(i16 %x, i16 %y) {
248; ANY-LABEL: unsigned_sat_variable_i16_using_cmp_sum:
249; ANY:       # %bb.0:
250; ANY-NEXT:    addw %si, %di
251; ANY-NEXT:    movl $65535, %eax # imm = 0xFFFF
252; ANY-NEXT:    cmovael %edi, %eax
253; ANY-NEXT:    # kill: def $ax killed $ax killed $eax
254; ANY-NEXT:    retq
255  %a = add i16 %x, %y
256  %c = icmp ugt i16 %x, %a
257  %r = select i1 %c, i16 -1, i16 %a
258  ret i16 %r
259}
260
261define i16 @unsigned_sat_variable_i16_using_cmp_notval(i16 %x, i16 %y) {
262; ANY-LABEL: unsigned_sat_variable_i16_using_cmp_notval:
263; ANY:       # %bb.0:
264; ANY-NEXT:    addw %di, %si
265; ANY-NEXT:    movl $65535, %eax # imm = 0xFFFF
266; ANY-NEXT:    cmovael %esi, %eax
267; ANY-NEXT:    # kill: def $ax killed $ax killed $eax
268; ANY-NEXT:    retq
269  %noty = xor i16 %y, -1
270  %a = add i16 %x, %y
271  %c = icmp ugt i16 %x, %noty
272  %r = select i1 %c, i16 -1, i16 %a
273  ret i16 %r
274}
275
276define i32 @unsigned_sat_variable_i32_using_min(i32 %x, i32 %y) {
277; ANY-LABEL: unsigned_sat_variable_i32_using_min:
278; ANY:       # %bb.0:
279; ANY-NEXT:    movl %esi, %eax
280; ANY-NEXT:    notl %eax
281; ANY-NEXT:    cmpl %eax, %edi
282; ANY-NEXT:    cmovbl %edi, %eax
283; ANY-NEXT:    addl %esi, %eax
284; ANY-NEXT:    retq
285  %noty = xor i32 %y, -1
286  %c = icmp ult i32 %x, %noty
287  %s = select i1 %c, i32 %x, i32 %noty
288  %r = add i32 %s, %y
289  ret i32 %r
290}
291
292define i32 @unsigned_sat_variable_i32_using_cmp_sum(i32 %x, i32 %y) {
293; ANY-LABEL: unsigned_sat_variable_i32_using_cmp_sum:
294; ANY:       # %bb.0:
295; ANY-NEXT:    addl %esi, %edi
296; ANY-NEXT:    movl $-1, %eax
297; ANY-NEXT:    cmovael %edi, %eax
298; ANY-NEXT:    retq
299  %a = add i32 %x, %y
300  %c = icmp ugt i32 %x, %a
301  %r = select i1 %c, i32 -1, i32 %a
302  ret i32 %r
303}
304
305define i32 @unsigned_sat_variable_i32_using_cmp_notval(i32 %x, i32 %y) {
306; ANY-LABEL: unsigned_sat_variable_i32_using_cmp_notval:
307; ANY:       # %bb.0:
308; ANY-NEXT:    addl %esi, %edi
309; ANY-NEXT:    movl $-1, %eax
310; ANY-NEXT:    cmovael %edi, %eax
311; ANY-NEXT:    retq
312  %noty = xor i32 %y, -1
313  %a = add i32 %x, %y
314  %c = icmp ugt i32 %x, %noty
315  %r = select i1 %c, i32 -1, i32 %a
316  ret i32 %r
317}
318
319define i64 @unsigned_sat_variable_i64_using_min(i64 %x, i64 %y) {
320; ANY-LABEL: unsigned_sat_variable_i64_using_min:
321; ANY:       # %bb.0:
322; ANY-NEXT:    movq %rsi, %rax
323; ANY-NEXT:    notq %rax
324; ANY-NEXT:    cmpq %rax, %rdi
325; ANY-NEXT:    cmovbq %rdi, %rax
326; ANY-NEXT:    addq %rsi, %rax
327; ANY-NEXT:    retq
328  %noty = xor i64 %y, -1
329  %c = icmp ult i64 %x, %noty
330  %s = select i1 %c, i64 %x, i64 %noty
331  %r = add i64 %s, %y
332  ret i64 %r
333}
334
335define i64 @unsigned_sat_variable_i64_using_cmp_sum(i64 %x, i64 %y) {
336; ANY-LABEL: unsigned_sat_variable_i64_using_cmp_sum:
337; ANY:       # %bb.0:
338; ANY-NEXT:    addq %rsi, %rdi
339; ANY-NEXT:    movq $-1, %rax
340; ANY-NEXT:    cmovaeq %rdi, %rax
341; ANY-NEXT:    retq
342  %a = add i64 %x, %y
343  %c = icmp ugt i64 %x, %a
344  %r = select i1 %c, i64 -1, i64 %a
345  ret i64 %r
346}
347
348define i64 @unsigned_sat_variable_i64_using_cmp_notval(i64 %x, i64 %y) {
349; ANY-LABEL: unsigned_sat_variable_i64_using_cmp_notval:
350; ANY:       # %bb.0:
351; ANY-NEXT:    addq %rsi, %rdi
352; ANY-NEXT:    movq $-1, %rax
353; ANY-NEXT:    cmovaeq %rdi, %rax
354; ANY-NEXT:    retq
355  %noty = xor i64 %y, -1
356  %a = add i64 %x, %y
357  %c = icmp ugt i64 %x, %noty
358  %r = select i1 %c, i64 -1, i64 %a
359  ret i64 %r
360}
361
362define <16 x i8> @unsigned_sat_constant_v16i8_using_min(<16 x i8> %x) {
363; SSE-LABEL: unsigned_sat_constant_v16i8_using_min:
364; SSE:       # %bb.0:
365; SSE-NEXT:    pminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
366; SSE-NEXT:    paddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
367; SSE-NEXT:    retq
368;
369; AVX-LABEL: unsigned_sat_constant_v16i8_using_min:
370; AVX:       # %bb.0:
371; AVX-NEXT:    vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
372; AVX-NEXT:    vpaddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
373; AVX-NEXT:    retq
374  %c = icmp ult <16 x i8> %x, <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43>
375  %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43>
376  %r = add <16 x i8> %s, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
377  ret <16 x i8> %r
378}
379
380define <16 x i8> @unsigned_sat_constant_v16i8_using_cmp_sum(<16 x i8> %x) {
381; SSE-LABEL: unsigned_sat_constant_v16i8_using_cmp_sum:
382; SSE:       # %bb.0:
383; SSE-NEXT:    paddusb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
384; SSE-NEXT:    retq
385;
386; AVX-LABEL: unsigned_sat_constant_v16i8_using_cmp_sum:
387; AVX:       # %bb.0:
388; AVX-NEXT:    vpaddusb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
389; AVX-NEXT:    retq
390  %a = add <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
391  %c = icmp ugt <16 x i8> %x, %a
392  %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a
393  ret <16 x i8> %r
394}
395
396define <16 x i8> @unsigned_sat_constant_v16i8_using_cmp_notval(<16 x i8> %x) {
397; SSE-LABEL: unsigned_sat_constant_v16i8_using_cmp_notval:
398; SSE:       # %bb.0:
399; SSE-NEXT:    paddusb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
400; SSE-NEXT:    retq
401;
402; AVX-LABEL: unsigned_sat_constant_v16i8_using_cmp_notval:
403; AVX:       # %bb.0:
404; AVX-NEXT:    vpaddusb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
405; AVX-NEXT:    retq
406  %a = add <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
407  %c = icmp ugt <16 x i8> %x, <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43>
408  %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a
409  ret <16 x i8> %r
410}
411
412define <8 x i16> @unsigned_sat_constant_v8i16_using_min(<8 x i16> %x) {
413; SSE2-LABEL: unsigned_sat_constant_v8i16_using_min:
414; SSE2:       # %bb.0:
415; SSE2-NEXT:    movdqa %xmm0, %xmm1
416; SSE2-NEXT:    psubusw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
417; SSE2-NEXT:    psubw %xmm1, %xmm0
418; SSE2-NEXT:    paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
419; SSE2-NEXT:    retq
420;
421; SSE4-LABEL: unsigned_sat_constant_v8i16_using_min:
422; SSE4:       # %bb.0:
423; SSE4-NEXT:    pminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
424; SSE4-NEXT:    paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
425; SSE4-NEXT:    retq
426;
427; AVX-LABEL: unsigned_sat_constant_v8i16_using_min:
428; AVX:       # %bb.0:
429; AVX-NEXT:    vpminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
430; AVX-NEXT:    vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
431; AVX-NEXT:    retq
432  %c = icmp ult <8 x i16> %x, <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43>
433  %s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43>
434  %r = add <8 x i16> %s, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
435  ret <8 x i16> %r
436}
437
438define <8 x i16> @unsigned_sat_constant_v8i16_using_cmp_sum(<8 x i16> %x) {
439; SSE-LABEL: unsigned_sat_constant_v8i16_using_cmp_sum:
440; SSE:       # %bb.0:
441; SSE-NEXT:    paddusw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
442; SSE-NEXT:    retq
443;
444; AVX-LABEL: unsigned_sat_constant_v8i16_using_cmp_sum:
445; AVX:       # %bb.0:
446; AVX-NEXT:    vpaddusw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
447; AVX-NEXT:    retq
448  %a = add <8 x i16> %x, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
449  %c = icmp ugt <8 x i16> %x, %a
450  %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a
451  ret <8 x i16> %r
452}
453
454define <8 x i16> @unsigned_sat_constant_v8i16_using_cmp_notval(<8 x i16> %x) {
455; SSE-LABEL: unsigned_sat_constant_v8i16_using_cmp_notval:
456; SSE:       # %bb.0:
457; SSE-NEXT:    paddusw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
458; SSE-NEXT:    retq
459;
460; AVX-LABEL: unsigned_sat_constant_v8i16_using_cmp_notval:
461; AVX:       # %bb.0:
462; AVX-NEXT:    vpaddusw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
463; AVX-NEXT:    retq
464  %a = add <8 x i16> %x, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
465  %c = icmp ugt <8 x i16> %x, <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43>
466  %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a
467  ret <8 x i16> %r
468}
469
470define <4 x i32> @unsigned_sat_constant_v4i32_using_min(<4 x i32> %x) {
471; SSE2-LABEL: unsigned_sat_constant_v4i32_using_min:
472; SSE2:       # %bb.0:
473; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
474; SSE2-NEXT:    pxor %xmm0, %xmm1
475; SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
476; SSE2-NEXT:    movdqa %xmm1, %xmm2
477; SSE2-NEXT:    pandn %xmm0, %xmm2
478; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
479; SSE2-NEXT:    por %xmm2, %xmm1
480; SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
481; SSE2-NEXT:    movdqa %xmm1, %xmm0
482; SSE2-NEXT:    retq
483;
484; SSE4-LABEL: unsigned_sat_constant_v4i32_using_min:
485; SSE4:       # %bb.0:
486; SSE4-NEXT:    pminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
487; SSE4-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
488; SSE4-NEXT:    retq
489;
490; AVX2-LABEL: unsigned_sat_constant_v4i32_using_min:
491; AVX2:       # %bb.0:
492; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [4294967253,4294967253,4294967253,4294967253]
493; AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
494; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [42,42,42,42]
495; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
496; AVX2-NEXT:    retq
497;
498; AVX512-LABEL: unsigned_sat_constant_v4i32_using_min:
499; AVX512:       # %bb.0:
500; AVX512-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
501; AVX512-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
502; AVX512-NEXT:    retq
503  %c = icmp ult <4 x i32> %x, <i32 -43, i32 -43, i32 -43, i32 -43>
504  %s = select <4 x i1> %c, <4 x i32> %x, <4 x i32> <i32 -43, i32 -43, i32 -43, i32 -43>
505  %r = add <4 x i32> %s, <i32 42, i32 42, i32 42, i32 42>
506  ret <4 x i32> %r
507}
508
509define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_sum(<4 x i32> %x) {
510; SSE2-LABEL: unsigned_sat_constant_v4i32_using_cmp_sum:
511; SSE2:       # %bb.0:
512; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [42,42,42,42]
513; SSE2-NEXT:    paddd %xmm0, %xmm1
514; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
515; SSE2-NEXT:    pxor %xmm2, %xmm0
516; SSE2-NEXT:    pxor %xmm1, %xmm2
517; SSE2-NEXT:    pcmpgtd %xmm2, %xmm0
518; SSE2-NEXT:    por %xmm1, %xmm0
519; SSE2-NEXT:    retq
520;
521; SSE4-LABEL: unsigned_sat_constant_v4i32_using_cmp_sum:
522; SSE4:       # %bb.0:
523; SSE4-NEXT:    pminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
524; SSE4-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
525; SSE4-NEXT:    retq
526;
527; AVX2-LABEL: unsigned_sat_constant_v4i32_using_cmp_sum:
528; AVX2:       # %bb.0:
529; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [42,42,42,42]
530; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [4294967253,4294967253,4294967253,4294967253]
531; AVX2-NEXT:    vpminud %xmm2, %xmm0, %xmm0
532; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
533; AVX2-NEXT:    retq
534;
535; AVX512-LABEL: unsigned_sat_constant_v4i32_using_cmp_sum:
536; AVX512:       # %bb.0:
537; AVX512-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
538; AVX512-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
539; AVX512-NEXT:    retq
540  %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
541  %c = icmp ugt <4 x i32> %x, %a
542  %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
543  ret <4 x i32> %r
544}
545
546define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_notval(<4 x i32> %x) {
547; SSE2-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval:
548; SSE2:       # %bb.0:
549; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [42,42,42,42]
550; SSE2-NEXT:    paddd %xmm0, %xmm1
551; SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
552; SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
553; SSE2-NEXT:    por %xmm1, %xmm0
554; SSE2-NEXT:    retq
555;
556; SSE4-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval:
557; SSE4:       # %bb.0:
558; SSE4-NEXT:    pminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
559; SSE4-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
560; SSE4-NEXT:    retq
561;
562; AVX2-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval:
563; AVX2:       # %bb.0:
564; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [42,42,42,42]
565; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [4294967253,4294967253,4294967253,4294967253]
566; AVX2-NEXT:    vpminud %xmm2, %xmm0, %xmm0
567; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
568; AVX2-NEXT:    retq
569;
570; AVX512-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval:
571; AVX512:       # %bb.0:
572; AVX512-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
573; AVX512-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
574; AVX512-NEXT:    retq
575  %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
576  %c = icmp ugt <4 x i32> %x, <i32 -43, i32 -43, i32 -43, i32 -43>
577  %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
578  ret <4 x i32> %r
579}
580
581define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_notval_nonsplat(<4 x i32> %x) {
582; SSE2-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval_nonsplat:
583; SSE2:       # %bb.0:
584; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [43,44,45,46]
585; SSE2-NEXT:    paddd %xmm0, %xmm1
586; SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
587; SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
588; SSE2-NEXT:    por %xmm1, %xmm0
589; SSE2-NEXT:    retq
590;
591; SSE4-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval_nonsplat:
592; SSE4:       # %bb.0:
593; SSE4-NEXT:    pminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
594; SSE4-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
595; SSE4-NEXT:    retq
596;
597; AVX-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval_nonsplat:
598; AVX:       # %bb.0:
599; AVX-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
600; AVX-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
601; AVX-NEXT:    retq
602  %a = add <4 x i32> %x, <i32 43, i32 44, i32 45, i32 46>
603  %c = icmp ugt <4 x i32> %x, <i32 -44, i32 -45, i32 -46, i32 -47>
604  %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
605  ret <4 x i32> %r
606}
607
608define <2 x i64> @unsigned_sat_constant_v2i64_using_min(<2 x i64> %x) {
609; SSE2-LABEL: unsigned_sat_constant_v2i64_using_min:
610; SSE2:       # %bb.0:
611; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
612; SSE2-NEXT:    pxor %xmm0, %xmm1
613; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [9223372034707292117,9223372034707292117]
614; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
615; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
616; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
617; SSE2-NEXT:    pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
618; SSE2-NEXT:    pand %xmm3, %xmm1
619; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
620; SSE2-NEXT:    por %xmm1, %xmm2
621; SSE2-NEXT:    pand %xmm2, %xmm0
622; SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
623; SSE2-NEXT:    por %xmm2, %xmm0
624; SSE2-NEXT:    paddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
625; SSE2-NEXT:    retq
626;
627; SSE41-LABEL: unsigned_sat_constant_v2i64_using_min:
628; SSE41:       # %bb.0:
629; SSE41-NEXT:    movdqa %xmm0, %xmm1
630; SSE41-NEXT:    movapd {{.*#+}} xmm2 = [18446744073709551573,18446744073709551573]
631; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
632; SSE41-NEXT:    pxor %xmm1, %xmm0
633; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [9223372034707292117,9223372034707292117]
634; SSE41-NEXT:    movdqa %xmm0, %xmm4
635; SSE41-NEXT:    pcmpeqd %xmm3, %xmm4
636; SSE41-NEXT:    pcmpgtd %xmm0, %xmm3
637; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
638; SSE41-NEXT:    pand %xmm4, %xmm0
639; SSE41-NEXT:    por %xmm3, %xmm0
640; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
641; SSE41-NEXT:    paddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
642; SSE41-NEXT:    movdqa %xmm2, %xmm0
643; SSE41-NEXT:    retq
644;
645; SSE42-LABEL: unsigned_sat_constant_v2i64_using_min:
646; SSE42:       # %bb.0:
647; SSE42-NEXT:    movdqa %xmm0, %xmm1
648; SSE42-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
649; SSE42-NEXT:    pxor %xmm1, %xmm0
650; SSE42-NEXT:    pcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
651; SSE42-NEXT:    blendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
652; SSE42-NEXT:    paddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
653; SSE42-NEXT:    movdqa %xmm1, %xmm0
654; SSE42-NEXT:    retq
655;
656; AVX2-LABEL: unsigned_sat_constant_v2i64_using_min:
657; AVX2:       # %bb.0:
658; AVX2-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
659; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
660; AVX2-NEXT:    vblendvpd %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
661; AVX2-NEXT:    vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
662; AVX2-NEXT:    retq
663;
664; AVX512-LABEL: unsigned_sat_constant_v2i64_using_min:
665; AVX512:       # %bb.0:
666; AVX512-NEXT:    vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
667; AVX512-NEXT:    vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
668; AVX512-NEXT:    retq
669  %c = icmp ult <2 x i64> %x, <i64 -43, i64 -43>
670  %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> <i64 -43, i64 -43>
671  %r = add <2 x i64> %s, <i64 42, i64 42>
672  ret <2 x i64> %r
673}
674
675define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_sum(<2 x i64> %x) {
676; SSE2-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum:
677; SSE2:       # %bb.0:
678; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [42,42]
679; SSE2-NEXT:    paddq %xmm0, %xmm1
680; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
681; SSE2-NEXT:    pxor %xmm2, %xmm0
682; SSE2-NEXT:    pxor %xmm1, %xmm2
683; SSE2-NEXT:    movdqa %xmm0, %xmm3
684; SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
685; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
686; SSE2-NEXT:    pcmpeqd %xmm0, %xmm2
687; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
688; SSE2-NEXT:    pand %xmm4, %xmm2
689; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
690; SSE2-NEXT:    por %xmm1, %xmm0
691; SSE2-NEXT:    por %xmm2, %xmm0
692; SSE2-NEXT:    retq
693;
694; SSE41-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum:
695; SSE41:       # %bb.0:
696; SSE41-NEXT:    pmovsxbq {{.*#+}} xmm1 = [42,42]
697; SSE41-NEXT:    paddq %xmm0, %xmm1
698; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
699; SSE41-NEXT:    pxor %xmm2, %xmm0
700; SSE41-NEXT:    pxor %xmm1, %xmm2
701; SSE41-NEXT:    movdqa %xmm0, %xmm3
702; SSE41-NEXT:    pcmpgtd %xmm2, %xmm3
703; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
704; SSE41-NEXT:    pcmpeqd %xmm0, %xmm2
705; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
706; SSE41-NEXT:    pand %xmm4, %xmm2
707; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
708; SSE41-NEXT:    por %xmm1, %xmm0
709; SSE41-NEXT:    por %xmm2, %xmm0
710; SSE41-NEXT:    retq
711;
712; SSE42-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum:
713; SSE42:       # %bb.0:
714; SSE42-NEXT:    movdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
715; SSE42-NEXT:    movdqa %xmm0, %xmm2
716; SSE42-NEXT:    pxor %xmm1, %xmm2
717; SSE42-NEXT:    paddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
718; SSE42-NEXT:    pxor %xmm0, %xmm1
719; SSE42-NEXT:    pcmpgtq %xmm1, %xmm2
720; SSE42-NEXT:    por %xmm2, %xmm0
721; SSE42-NEXT:    retq
722;
723; AVX2-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum:
724; AVX2:       # %bb.0:
725; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
726; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm2
727; AVX2-NEXT:    vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
728; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm1
729; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm2, %xmm1
730; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
731; AVX2-NEXT:    retq
732;
733; AVX512-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum:
734; AVX512:       # %bb.0:
735; AVX512-NEXT:    vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
736; AVX512-NEXT:    vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
737; AVX512-NEXT:    retq
738  %a = add <2 x i64> %x, <i64 42, i64 42>
739  %c = icmp ugt <2 x i64> %x, %a
740  %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a
741  ret <2 x i64> %r
742}
743
744define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_notval(<2 x i64> %x) {
745; SSE2-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval:
746; SSE2:       # %bb.0:
747; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [42,42]
748; SSE2-NEXT:    paddq %xmm0, %xmm1
749; SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
750; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
751; SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
752; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
753; SSE2-NEXT:    pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
754; SSE2-NEXT:    pand %xmm2, %xmm0
755; SSE2-NEXT:    por %xmm1, %xmm0
756; SSE2-NEXT:    retq
757;
758; SSE41-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval:
759; SSE41:       # %bb.0:
760; SSE41-NEXT:    pmovsxbq {{.*#+}} xmm1 = [42,42]
761; SSE41-NEXT:    paddq %xmm0, %xmm1
762; SSE41-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
763; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
764; SSE41-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
765; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
766; SSE41-NEXT:    pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
767; SSE41-NEXT:    pand %xmm2, %xmm0
768; SSE41-NEXT:    por %xmm1, %xmm0
769; SSE41-NEXT:    retq
770;
771; SSE42-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval:
772; SSE42:       # %bb.0:
773; SSE42-NEXT:    movdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
774; SSE42-NEXT:    movdqa %xmm0, %xmm2
775; SSE42-NEXT:    pxor %xmm1, %xmm2
776; SSE42-NEXT:    paddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
777; SSE42-NEXT:    pxor %xmm0, %xmm1
778; SSE42-NEXT:    pcmpgtq %xmm1, %xmm2
779; SSE42-NEXT:    por %xmm2, %xmm0
780; SSE42-NEXT:    retq
781;
782; AVX2-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval:
783; AVX2:       # %bb.0:
784; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
785; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm2
786; AVX2-NEXT:    vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
787; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm1
788; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm2, %xmm1
789; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
790; AVX2-NEXT:    retq
791;
792; AVX512-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval:
793; AVX512:       # %bb.0:
794; AVX512-NEXT:    vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
795; AVX512-NEXT:    vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
796; AVX512-NEXT:    retq
797  %a = add <2 x i64> %x, <i64 42, i64 42>
798  %c = icmp ugt <2 x i64> %x, <i64 -43, i64 -43>
799  %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a
800  ret <2 x i64> %r
801}
802
803define <16 x i8> @unsigned_sat_variable_v16i8_using_min(<16 x i8> %x, <16 x i8> %y) {
804; SSE-LABEL: unsigned_sat_variable_v16i8_using_min:
805; SSE:       # %bb.0:
806; SSE-NEXT:    pcmpeqd %xmm2, %xmm2
807; SSE-NEXT:    pxor %xmm1, %xmm2
808; SSE-NEXT:    pminub %xmm2, %xmm0
809; SSE-NEXT:    paddb %xmm1, %xmm0
810; SSE-NEXT:    retq
811;
812; AVX2-LABEL: unsigned_sat_variable_v16i8_using_min:
813; AVX2:       # %bb.0:
814; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
815; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
816; AVX2-NEXT:    vpminub %xmm2, %xmm0, %xmm0
817; AVX2-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
818; AVX2-NEXT:    retq
819;
820; AVX512-LABEL: unsigned_sat_variable_v16i8_using_min:
821; AVX512:       # %bb.0:
822; AVX512-NEXT:    vmovdqa %xmm1, %xmm2
823; AVX512-NEXT:    vpternlogq {{.*#+}} xmm2 = ~xmm2
824; AVX512-NEXT:    vpminub %xmm2, %xmm0, %xmm0
825; AVX512-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
826; AVX512-NEXT:    retq
827  %noty = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
828  %c = icmp ult <16 x i8> %x, %noty
829  %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %noty
830  %r = add <16 x i8> %s, %y
831  ret <16 x i8> %r
832}
833
834define <16 x i8> @unsigned_sat_variable_v16i8_using_cmp_sum(<16 x i8> %x, <16 x i8> %y) {
835; SSE-LABEL: unsigned_sat_variable_v16i8_using_cmp_sum:
836; SSE:       # %bb.0:
837; SSE-NEXT:    paddusb %xmm1, %xmm0
838; SSE-NEXT:    retq
839;
840; AVX-LABEL: unsigned_sat_variable_v16i8_using_cmp_sum:
841; AVX:       # %bb.0:
842; AVX-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0
843; AVX-NEXT:    retq
844  %a = add <16 x i8> %x, %y
845  %c = icmp ugt <16 x i8> %x, %a
846  %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a
847  ret <16 x i8> %r
848}
849
850define <16 x i8> @unsigned_sat_variable_v16i8_using_cmp_notval(<16 x i8> %x, <16 x i8> %y) {
851; SSE-LABEL: unsigned_sat_variable_v16i8_using_cmp_notval:
852; SSE:       # %bb.0:
853; SSE-NEXT:    pcmpeqd %xmm2, %xmm2
854; SSE-NEXT:    movdqa %xmm0, %xmm3
855; SSE-NEXT:    paddb %xmm1, %xmm3
856; SSE-NEXT:    pxor %xmm2, %xmm1
857; SSE-NEXT:    pminub %xmm0, %xmm1
858; SSE-NEXT:    pcmpeqb %xmm1, %xmm0
859; SSE-NEXT:    pxor %xmm2, %xmm0
860; SSE-NEXT:    por %xmm3, %xmm0
861; SSE-NEXT:    retq
862;
863; AVX2-LABEL: unsigned_sat_variable_v16i8_using_cmp_notval:
864; AVX2:       # %bb.0:
865; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
866; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm3
867; AVX2-NEXT:    vpaddb %xmm1, %xmm0, %xmm1
868; AVX2-NEXT:    vpminub %xmm3, %xmm0, %xmm3
869; AVX2-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
870; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm0
871; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
872; AVX2-NEXT:    retq
873;
874; AVX512-LABEL: unsigned_sat_variable_v16i8_using_cmp_notval:
875; AVX512:       # %bb.0:
876; AVX512-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
877; AVX512-NEXT:    vpaddb %xmm1, %xmm0, %xmm3
878; AVX512-NEXT:    vpternlogq {{.*#+}} xmm1 = ~xmm1
879; AVX512-NEXT:    vpminub %xmm1, %xmm0, %xmm1
880; AVX512-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
881; AVX512-NEXT:    vpternlogq {{.*#+}} xmm0 = xmm3 | (xmm0 ^ xmm2)
882; AVX512-NEXT:    retq
883  %noty = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
884  %a = add <16 x i8> %x, %y
885  %c = icmp ugt <16 x i8> %x, %noty
886  %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a
887  ret <16 x i8> %r
888}
889
890define <8 x i16> @unsigned_sat_variable_v8i16_using_min(<8 x i16> %x, <8 x i16> %y) {
891; SSE2-LABEL: unsigned_sat_variable_v8i16_using_min:
892; SSE2:       # %bb.0:
893; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
894; SSE2-NEXT:    pxor %xmm1, %xmm2
895; SSE2-NEXT:    movdqa %xmm0, %xmm3
896; SSE2-NEXT:    psubusw %xmm2, %xmm3
897; SSE2-NEXT:    psubw %xmm3, %xmm0
898; SSE2-NEXT:    paddw %xmm1, %xmm0
899; SSE2-NEXT:    retq
900;
901; SSE4-LABEL: unsigned_sat_variable_v8i16_using_min:
902; SSE4:       # %bb.0:
903; SSE4-NEXT:    pcmpeqd %xmm2, %xmm2
904; SSE4-NEXT:    pxor %xmm1, %xmm2
905; SSE4-NEXT:    pminuw %xmm2, %xmm0
906; SSE4-NEXT:    paddw %xmm1, %xmm0
907; SSE4-NEXT:    retq
908;
909; AVX2-LABEL: unsigned_sat_variable_v8i16_using_min:
910; AVX2:       # %bb.0:
911; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
912; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
913; AVX2-NEXT:    vpminuw %xmm2, %xmm0, %xmm0
914; AVX2-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
915; AVX2-NEXT:    retq
916;
917; AVX512-LABEL: unsigned_sat_variable_v8i16_using_min:
918; AVX512:       # %bb.0:
919; AVX512-NEXT:    vmovdqa %xmm1, %xmm2
920; AVX512-NEXT:    vpternlogq {{.*#+}} xmm2 = ~xmm2
921; AVX512-NEXT:    vpminuw %xmm2, %xmm0, %xmm0
922; AVX512-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
923; AVX512-NEXT:    retq
924  %noty = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
925  %c = icmp ult <8 x i16> %x, %noty
926  %s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %noty
927  %r = add <8 x i16> %s, %y
928  ret <8 x i16> %r
929}
930
931define <8 x i16> @unsigned_sat_variable_v8i16_using_cmp_sum(<8 x i16> %x, <8 x i16> %y) {
932; SSE-LABEL: unsigned_sat_variable_v8i16_using_cmp_sum:
933; SSE:       # %bb.0:
934; SSE-NEXT:    paddusw %xmm1, %xmm0
935; SSE-NEXT:    retq
936;
937; AVX-LABEL: unsigned_sat_variable_v8i16_using_cmp_sum:
938; AVX:       # %bb.0:
939; AVX-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0
940; AVX-NEXT:    retq
941  %a = add <8 x i16> %x, %y
942  %c = icmp ugt <8 x i16> %x, %a
943  %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a
944  ret <8 x i16> %r
945}
946
947define <8 x i16> @unsigned_sat_variable_v8i16_using_cmp_notval(<8 x i16> %x, <8 x i16> %y) {
948; SSE2-LABEL: unsigned_sat_variable_v8i16_using_cmp_notval:
949; SSE2:       # %bb.0:
950; SSE2-NEXT:    movdqa %xmm0, %xmm2
951; SSE2-NEXT:    paddw %xmm1, %xmm2
952; SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
953; SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
954; SSE2-NEXT:    pcmpgtw %xmm1, %xmm0
955; SSE2-NEXT:    por %xmm2, %xmm0
956; SSE2-NEXT:    retq
957;
958; SSE4-LABEL: unsigned_sat_variable_v8i16_using_cmp_notval:
959; SSE4:       # %bb.0:
960; SSE4-NEXT:    pcmpeqd %xmm2, %xmm2
961; SSE4-NEXT:    movdqa %xmm0, %xmm3
962; SSE4-NEXT:    paddw %xmm1, %xmm3
963; SSE4-NEXT:    pxor %xmm2, %xmm1
964; SSE4-NEXT:    pminuw %xmm0, %xmm1
965; SSE4-NEXT:    pcmpeqw %xmm1, %xmm0
966; SSE4-NEXT:    pxor %xmm2, %xmm0
967; SSE4-NEXT:    por %xmm3, %xmm0
968; SSE4-NEXT:    retq
969;
970; AVX2-LABEL: unsigned_sat_variable_v8i16_using_cmp_notval:
971; AVX2:       # %bb.0:
972; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
973; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm3
974; AVX2-NEXT:    vpaddw %xmm1, %xmm0, %xmm1
975; AVX2-NEXT:    vpminuw %xmm3, %xmm0, %xmm3
976; AVX2-NEXT:    vpcmpeqw %xmm3, %xmm0, %xmm0
977; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm0
978; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
979; AVX2-NEXT:    retq
980;
981; AVX512-LABEL: unsigned_sat_variable_v8i16_using_cmp_notval:
982; AVX512:       # %bb.0:
983; AVX512-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
984; AVX512-NEXT:    vpaddw %xmm1, %xmm0, %xmm3
985; AVX512-NEXT:    vpternlogq {{.*#+}} xmm1 = ~xmm1
986; AVX512-NEXT:    vpminuw %xmm1, %xmm0, %xmm1
987; AVX512-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
988; AVX512-NEXT:    vpternlogq {{.*#+}} xmm0 = xmm3 | (xmm0 ^ xmm2)
989; AVX512-NEXT:    retq
990  %noty = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
991  %a = add <8 x i16> %x, %y
992  %c = icmp ugt <8 x i16> %x, %noty
993  %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a
994  ret <8 x i16> %r
995}
996
997define <4 x i32> @unsigned_sat_variable_v4i32_using_min(<4 x i32> %x, <4 x i32> %y) {
998; SSE2-LABEL: unsigned_sat_variable_v4i32_using_min:
999; SSE2:       # %bb.0:
1000; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
1001; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
1002; SSE2-NEXT:    pxor %xmm0, %xmm3
1003; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483647,2147483647,2147483647,2147483647]
1004; SSE2-NEXT:    pxor %xmm1, %xmm4
1005; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
1006; SSE2-NEXT:    pand %xmm4, %xmm0
1007; SSE2-NEXT:    por %xmm1, %xmm4
1008; SSE2-NEXT:    pxor %xmm2, %xmm4
1009; SSE2-NEXT:    por %xmm4, %xmm0
1010; SSE2-NEXT:    paddd %xmm1, %xmm0
1011; SSE2-NEXT:    retq
1012;
1013; SSE4-LABEL: unsigned_sat_variable_v4i32_using_min:
1014; SSE4:       # %bb.0:
1015; SSE4-NEXT:    pcmpeqd %xmm2, %xmm2
1016; SSE4-NEXT:    pxor %xmm1, %xmm2
1017; SSE4-NEXT:    pminud %xmm2, %xmm0
1018; SSE4-NEXT:    paddd %xmm1, %xmm0
1019; SSE4-NEXT:    retq
1020;
1021; AVX2-LABEL: unsigned_sat_variable_v4i32_using_min:
1022; AVX2:       # %bb.0:
1023; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1024; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
1025; AVX2-NEXT:    vpminud %xmm2, %xmm0, %xmm0
1026; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
1027; AVX2-NEXT:    retq
1028;
1029; AVX512-LABEL: unsigned_sat_variable_v4i32_using_min:
1030; AVX512:       # %bb.0:
1031; AVX512-NEXT:    vmovdqa %xmm1, %xmm2
1032; AVX512-NEXT:    vpternlogq {{.*#+}} xmm2 = ~xmm2
1033; AVX512-NEXT:    vpminud %xmm2, %xmm0, %xmm0
1034; AVX512-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
1035; AVX512-NEXT:    retq
1036  %noty = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
1037  %c = icmp ult <4 x i32> %x, %noty
1038  %s = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %noty
1039  %r = add <4 x i32> %s, %y
1040  ret <4 x i32> %r
1041}
1042
1043define <4 x i32> @unsigned_sat_variable_v4i32_using_cmp_sum(<4 x i32> %x, <4 x i32> %y) {
1044; SSE2-LABEL: unsigned_sat_variable_v4i32_using_cmp_sum:
1045; SSE2:       # %bb.0:
1046; SSE2-NEXT:    paddd %xmm0, %xmm1
1047; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
1048; SSE2-NEXT:    pxor %xmm2, %xmm0
1049; SSE2-NEXT:    pxor %xmm1, %xmm2
1050; SSE2-NEXT:    pcmpgtd %xmm2, %xmm0
1051; SSE2-NEXT:    por %xmm1, %xmm0
1052; SSE2-NEXT:    retq
1053;
1054; SSE4-LABEL: unsigned_sat_variable_v4i32_using_cmp_sum:
1055; SSE4:       # %bb.0:
1056; SSE4-NEXT:    pcmpeqd %xmm2, %xmm2
1057; SSE4-NEXT:    pxor %xmm1, %xmm2
1058; SSE4-NEXT:    pminud %xmm2, %xmm0
1059; SSE4-NEXT:    paddd %xmm1, %xmm0
1060; SSE4-NEXT:    retq
1061;
1062; AVX2-LABEL: unsigned_sat_variable_v4i32_using_cmp_sum:
1063; AVX2:       # %bb.0:
1064; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1065; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
1066; AVX2-NEXT:    vpminud %xmm2, %xmm0, %xmm0
1067; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
1068; AVX2-NEXT:    retq
1069;
1070; AVX512-LABEL: unsigned_sat_variable_v4i32_using_cmp_sum:
1071; AVX512:       # %bb.0:
1072; AVX512-NEXT:    vmovdqa %xmm1, %xmm2
1073; AVX512-NEXT:    vpternlogq {{.*#+}} xmm2 = ~xmm2
1074; AVX512-NEXT:    vpminud %xmm2, %xmm0, %xmm0
1075; AVX512-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
1076; AVX512-NEXT:    retq
1077  %a = add <4 x i32> %x, %y
1078  %c = icmp ugt <4 x i32> %x, %a
1079  %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
1080  ret <4 x i32> %r
1081}
1082
1083define <4 x i32> @unsigned_sat_variable_v4i32_using_cmp_notval(<4 x i32> %x, <4 x i32> %y) {
1084; SSE2-LABEL: unsigned_sat_variable_v4i32_using_cmp_notval:
1085; SSE2:       # %bb.0:
1086; SSE2-NEXT:    movdqa %xmm0, %xmm2
1087; SSE2-NEXT:    paddd %xmm1, %xmm2
1088; SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1089; SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1090; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
1091; SSE2-NEXT:    por %xmm2, %xmm0
1092; SSE2-NEXT:    retq
1093;
1094; SSE4-LABEL: unsigned_sat_variable_v4i32_using_cmp_notval:
1095; SSE4:       # %bb.0:
1096; SSE4-NEXT:    pcmpeqd %xmm2, %xmm2
1097; SSE4-NEXT:    movdqa %xmm0, %xmm3
1098; SSE4-NEXT:    paddd %xmm1, %xmm3
1099; SSE4-NEXT:    pxor %xmm2, %xmm1
1100; SSE4-NEXT:    pminud %xmm0, %xmm1
1101; SSE4-NEXT:    pcmpeqd %xmm1, %xmm0
1102; SSE4-NEXT:    pxor %xmm2, %xmm0
1103; SSE4-NEXT:    por %xmm3, %xmm0
1104; SSE4-NEXT:    retq
1105;
1106; AVX2-LABEL: unsigned_sat_variable_v4i32_using_cmp_notval:
1107; AVX2:       # %bb.0:
1108; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1109; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm3
1110; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
1111; AVX2-NEXT:    vpminud %xmm3, %xmm0, %xmm3
1112; AVX2-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
1113; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm0
1114; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1115; AVX2-NEXT:    retq
1116;
1117; AVX512-LABEL: unsigned_sat_variable_v4i32_using_cmp_notval:
1118; AVX512:       # %bb.0:
1119; AVX512-NEXT:    vmovdqa %xmm1, %xmm3
1120; AVX512-NEXT:    vpternlogq {{.*#+}} xmm3 = ~xmm3
1121; AVX512-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1122; AVX512-NEXT:    vpcmpleud %xmm3, %xmm0, %k1
1123; AVX512-NEXT:    vpaddd %xmm1, %xmm0, %xmm2 {%k1}
1124; AVX512-NEXT:    vmovdqa %xmm2, %xmm0
1125; AVX512-NEXT:    retq
1126  %noty = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
1127  %a = add <4 x i32> %x, %y
1128  %c = icmp ugt <4 x i32> %x, %noty
1129  %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
1130  ret <4 x i32> %r
1131}
1132
1133define <2 x i64> @unsigned_sat_variable_v2i64_using_min(<2 x i64> %x, <2 x i64> %y) {
1134; SSE2-LABEL: unsigned_sat_variable_v2i64_using_min:
1135; SSE2:       # %bb.0:
1136; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
1137; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456]
1138; SSE2-NEXT:    pxor %xmm0, %xmm3
1139; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [9223372034707292159,9223372034707292159]
1140; SSE2-NEXT:    pxor %xmm1, %xmm4
1141; SSE2-NEXT:    movdqa %xmm4, %xmm5
1142; SSE2-NEXT:    pcmpgtd %xmm3, %xmm5
1143; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
1144; SSE2-NEXT:    pcmpeqd %xmm3, %xmm4
1145; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
1146; SSE2-NEXT:    pand %xmm6, %xmm3
1147; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
1148; SSE2-NEXT:    por %xmm3, %xmm4
1149; SSE2-NEXT:    pand %xmm4, %xmm0
1150; SSE2-NEXT:    por %xmm1, %xmm4
1151; SSE2-NEXT:    pxor %xmm2, %xmm4
1152; SSE2-NEXT:    por %xmm4, %xmm0
1153; SSE2-NEXT:    paddq %xmm1, %xmm0
1154; SSE2-NEXT:    retq
1155;
1156; SSE41-LABEL: unsigned_sat_variable_v2i64_using_min:
1157; SSE41:       # %bb.0:
1158; SSE41-NEXT:    movdqa %xmm0, %xmm2
1159; SSE41-NEXT:    pcmpeqd %xmm3, %xmm3
1160; SSE41-NEXT:    pxor %xmm1, %xmm3
1161; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
1162; SSE41-NEXT:    pxor %xmm2, %xmm0
1163; SSE41-NEXT:    movdqa {{.*#+}} xmm4 = [9223372034707292159,9223372034707292159]
1164; SSE41-NEXT:    pxor %xmm1, %xmm4
1165; SSE41-NEXT:    movdqa %xmm4, %xmm5
1166; SSE41-NEXT:    pcmpeqd %xmm0, %xmm5
1167; SSE41-NEXT:    pcmpgtd %xmm0, %xmm4
1168; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2]
1169; SSE41-NEXT:    pand %xmm5, %xmm0
1170; SSE41-NEXT:    por %xmm4, %xmm0
1171; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm3
1172; SSE41-NEXT:    paddq %xmm1, %xmm3
1173; SSE41-NEXT:    movdqa %xmm3, %xmm0
1174; SSE41-NEXT:    retq
1175;
1176; SSE42-LABEL: unsigned_sat_variable_v2i64_using_min:
1177; SSE42:       # %bb.0:
1178; SSE42-NEXT:    movdqa %xmm0, %xmm2
1179; SSE42-NEXT:    pcmpeqd %xmm3, %xmm3
1180; SSE42-NEXT:    pxor %xmm1, %xmm3
1181; SSE42-NEXT:    movdqa {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
1182; SSE42-NEXT:    pxor %xmm0, %xmm4
1183; SSE42-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775807,9223372036854775807]
1184; SSE42-NEXT:    pxor %xmm1, %xmm0
1185; SSE42-NEXT:    pcmpgtq %xmm4, %xmm0
1186; SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm3
1187; SSE42-NEXT:    paddq %xmm1, %xmm3
1188; SSE42-NEXT:    movdqa %xmm3, %xmm0
1189; SSE42-NEXT:    retq
1190;
1191; AVX2-LABEL: unsigned_sat_variable_v2i64_using_min:
1192; AVX2:       # %bb.0:
1193; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1194; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
1195; AVX2-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3
1196; AVX2-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm4
1197; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm3
1198; AVX2-NEXT:    vblendvpd %xmm3, %xmm0, %xmm2, %xmm0
1199; AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
1200; AVX2-NEXT:    retq
1201;
1202; AVX512-LABEL: unsigned_sat_variable_v2i64_using_min:
1203; AVX512:       # %bb.0:
1204; AVX512-NEXT:    vmovdqa %xmm1, %xmm2
1205; AVX512-NEXT:    vpternlogq {{.*#+}} xmm2 = ~xmm2
1206; AVX512-NEXT:    vpminuq %xmm2, %xmm0, %xmm0
1207; AVX512-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
1208; AVX512-NEXT:    retq
1209  %noty = xor <2 x i64> %y, <i64 -1, i64 -1>
1210  %c = icmp ult <2 x i64> %x, %noty
1211  %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> %noty
1212  %r = add <2 x i64> %s, %y
1213  ret <2 x i64> %r
1214}
1215
1216define <2 x i64> @unsigned_sat_variable_v2i64_using_cmp_sum(<2 x i64> %x, <2 x i64> %y) {
1217; SSE2-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum:
1218; SSE2:       # %bb.0:
1219; SSE2-NEXT:    paddq %xmm0, %xmm1
1220; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
1221; SSE2-NEXT:    pxor %xmm2, %xmm0
1222; SSE2-NEXT:    pxor %xmm1, %xmm2
1223; SSE2-NEXT:    movdqa %xmm0, %xmm3
1224; SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
1225; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
1226; SSE2-NEXT:    pcmpeqd %xmm0, %xmm2
1227; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1228; SSE2-NEXT:    pand %xmm4, %xmm2
1229; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
1230; SSE2-NEXT:    por %xmm1, %xmm0
1231; SSE2-NEXT:    por %xmm2, %xmm0
1232; SSE2-NEXT:    retq
1233;
1234; SSE41-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum:
1235; SSE41:       # %bb.0:
1236; SSE41-NEXT:    paddq %xmm0, %xmm1
1237; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
1238; SSE41-NEXT:    pxor %xmm2, %xmm0
1239; SSE41-NEXT:    pxor %xmm1, %xmm2
1240; SSE41-NEXT:    movdqa %xmm0, %xmm3
1241; SSE41-NEXT:    pcmpgtd %xmm2, %xmm3
1242; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
1243; SSE41-NEXT:    pcmpeqd %xmm0, %xmm2
1244; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1245; SSE41-NEXT:    pand %xmm4, %xmm2
1246; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
1247; SSE41-NEXT:    por %xmm1, %xmm0
1248; SSE41-NEXT:    por %xmm2, %xmm0
1249; SSE41-NEXT:    retq
1250;
1251; SSE42-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum:
1252; SSE42:       # %bb.0:
1253; SSE42-NEXT:    movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
1254; SSE42-NEXT:    movdqa %xmm0, %xmm3
1255; SSE42-NEXT:    pxor %xmm2, %xmm3
1256; SSE42-NEXT:    paddq %xmm1, %xmm0
1257; SSE42-NEXT:    pxor %xmm0, %xmm2
1258; SSE42-NEXT:    pcmpgtq %xmm2, %xmm3
1259; SSE42-NEXT:    por %xmm3, %xmm0
1260; SSE42-NEXT:    retq
1261;
1262; AVX2-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum:
1263; AVX2:       # %bb.0:
1264; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
1265; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm3
1266; AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
1267; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm1
1268; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm3, %xmm1
1269; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1270; AVX2-NEXT:    retq
1271;
1272; AVX512-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum:
1273; AVX512:       # %bb.0:
1274; AVX512-NEXT:    vmovdqa %xmm1, %xmm2
1275; AVX512-NEXT:    vpternlogq {{.*#+}} xmm2 = ~xmm2
1276; AVX512-NEXT:    vpminuq %xmm2, %xmm0, %xmm0
1277; AVX512-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
1278; AVX512-NEXT:    retq
1279  %a = add <2 x i64> %x, %y
1280  %c = icmp ugt <2 x i64> %x, %a
1281  %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a
1282  ret <2 x i64> %r
1283}
1284
1285define <2 x i64> @unsigned_sat_variable_v2i64_using_cmp_notval(<2 x i64> %x, <2 x i64> %y) {
1286; SSE2-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval:
1287; SSE2:       # %bb.0:
1288; SSE2-NEXT:    movdqa %xmm0, %xmm2
1289; SSE2-NEXT:    paddq %xmm1, %xmm2
1290; SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1291; SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1292; SSE2-NEXT:    movdqa %xmm0, %xmm3
1293; SSE2-NEXT:    pcmpgtd %xmm1, %xmm3
1294; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
1295; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
1296; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1297; SSE2-NEXT:    pand %xmm4, %xmm1
1298; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
1299; SSE2-NEXT:    por %xmm2, %xmm0
1300; SSE2-NEXT:    por %xmm1, %xmm0
1301; SSE2-NEXT:    retq
1302;
1303; SSE41-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval:
1304; SSE41:       # %bb.0:
1305; SSE41-NEXT:    movdqa %xmm0, %xmm2
1306; SSE41-NEXT:    paddq %xmm1, %xmm2
1307; SSE41-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1308; SSE41-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1309; SSE41-NEXT:    movdqa %xmm0, %xmm3
1310; SSE41-NEXT:    pcmpgtd %xmm1, %xmm3
1311; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
1312; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
1313; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1314; SSE41-NEXT:    pand %xmm4, %xmm1
1315; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
1316; SSE41-NEXT:    por %xmm2, %xmm0
1317; SSE41-NEXT:    por %xmm1, %xmm0
1318; SSE41-NEXT:    retq
1319;
1320; SSE42-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval:
1321; SSE42:       # %bb.0:
1322; SSE42-NEXT:    movdqa %xmm0, %xmm2
1323; SSE42-NEXT:    paddq %xmm1, %xmm2
1324; SSE42-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1325; SSE42-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1326; SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
1327; SSE42-NEXT:    por %xmm2, %xmm0
1328; SSE42-NEXT:    retq
1329;
1330; AVX2-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval:
1331; AVX2:       # %bb.0:
1332; AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm2
1333; AVX2-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1334; AVX2-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1335; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
1336; AVX2-NEXT:    vpor %xmm2, %xmm0, %xmm0
1337; AVX2-NEXT:    retq
1338;
1339; AVX512-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval:
1340; AVX512:       # %bb.0:
1341; AVX512-NEXT:    vmovdqa %xmm1, %xmm3
1342; AVX512-NEXT:    vpternlogq {{.*#+}} xmm3 = ~xmm3
1343; AVX512-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1344; AVX512-NEXT:    vpcmpleuq %xmm3, %xmm0, %k1
1345; AVX512-NEXT:    vpaddq %xmm1, %xmm0, %xmm2 {%k1}
1346; AVX512-NEXT:    vmovdqa %xmm2, %xmm0
1347; AVX512-NEXT:    retq
1348  %noty = xor <2 x i64> %y, <i64 -1, i64 -1>
1349  %a = add <2 x i64> %x, %y
1350  %c = icmp ugt <2 x i64> %x, %noty
1351  %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a
1352  ret <2 x i64> %r
1353}
1354
1355