1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=ANY,SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefixes=ANY,SSE,SSE4,SSE41 4; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s --check-prefixes=ANY,SSE,SSE4,SSE42 5; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=ANY,AVX,AVX2 6; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl | FileCheck %s --check-prefixes=ANY,AVX,AVX512 7 8; There are at least 3 potential patterns corresponding to an unsigned saturated add: min, cmp with sum, cmp with not. 9; Test each of those patterns with i8/i16/i32/i64. 10; Test each of those with a constant operand and a variable operand. 11; Test each of those with a 128-bit vector type. 12 13define i8 @unsigned_sat_constant_i8_using_min(i8 %x) { 14; ANY-LABEL: unsigned_sat_constant_i8_using_min: 15; ANY: # %bb.0: 16; ANY-NEXT: cmpb $-43, %dil 17; ANY-NEXT: movl $213, %eax 18; ANY-NEXT: cmovbl %edi, %eax 19; ANY-NEXT: addb $42, %al 20; ANY-NEXT: # kill: def $al killed $al killed $eax 21; ANY-NEXT: retq 22 %c = icmp ult i8 %x, -43 23 %s = select i1 %c, i8 %x, i8 -43 24 %r = add i8 %s, 42 25 ret i8 %r 26} 27 28define i8 @unsigned_sat_constant_i8_using_cmp_sum(i8 %x) { 29; ANY-LABEL: unsigned_sat_constant_i8_using_cmp_sum: 30; ANY: # %bb.0: 31; ANY-NEXT: addb $42, %dil 32; ANY-NEXT: movzbl %dil, %ecx 33; ANY-NEXT: movl $255, %eax 34; ANY-NEXT: cmovael %ecx, %eax 35; ANY-NEXT: # kill: def $al killed $al killed $eax 36; ANY-NEXT: retq 37 %a = add i8 %x, 42 38 %c = icmp ugt i8 %x, %a 39 %r = select i1 %c, i8 -1, i8 %a 40 ret i8 %r 41} 42 43define i8 @unsigned_sat_constant_i8_using_cmp_notval(i8 %x) { 44; ANY-LABEL: unsigned_sat_constant_i8_using_cmp_notval: 45; ANY: # %bb.0: 46; ANY-NEXT: addb $42, %dil 47; ANY-NEXT: movzbl %dil, %ecx 48; ANY-NEXT: movl $255, %eax 49; ANY-NEXT: cmovael %ecx, %eax 50; ANY-NEXT: # kill: def $al killed $al killed $eax 51; ANY-NEXT: retq 52 %a = add i8 %x, 42 53 %c = icmp ugt i8 %x, -43 54 %r = select i1 %c, i8 -1, i8 %a 55 ret i8 %r 56} 57 58define i16 @unsigned_sat_constant_i16_using_min(i16 %x) { 59; ANY-LABEL: unsigned_sat_constant_i16_using_min: 60; ANY: # %bb.0: 61; ANY-NEXT: cmpw $-43, %di 62; ANY-NEXT: movl $65493, %eax # imm = 0xFFD5 63; ANY-NEXT: cmovbl %edi, %eax 64; ANY-NEXT: addl $42, %eax 65; ANY-NEXT: # kill: def $ax killed $ax killed $eax 66; ANY-NEXT: retq 67 %c = icmp ult i16 %x, -43 68 %s = select i1 %c, i16 %x, i16 -43 69 %r = add i16 %s, 42 70 ret i16 %r 71} 72 73define i16 @unsigned_sat_constant_i16_using_cmp_sum(i16 %x) { 74; ANY-LABEL: unsigned_sat_constant_i16_using_cmp_sum: 75; ANY: # %bb.0: 76; ANY-NEXT: addw $42, %di 77; ANY-NEXT: movl $65535, %eax # imm = 0xFFFF 78; ANY-NEXT: cmovael %edi, %eax 79; ANY-NEXT: # kill: def $ax killed $ax killed $eax 80; ANY-NEXT: retq 81 %a = add i16 %x, 42 82 %c = icmp ugt i16 %x, %a 83 %r = select i1 %c, i16 -1, i16 %a 84 ret i16 %r 85} 86 87define i16 @unsigned_sat_constant_i16_using_cmp_notval(i16 %x) { 88; ANY-LABEL: unsigned_sat_constant_i16_using_cmp_notval: 89; ANY: # %bb.0: 90; ANY-NEXT: addw $42, %di 91; ANY-NEXT: movl $65535, %eax # imm = 0xFFFF 92; ANY-NEXT: cmovael %edi, %eax 93; ANY-NEXT: # kill: def $ax killed $ax killed $eax 94; ANY-NEXT: retq 95 %a = add i16 %x, 42 96 %c = icmp ugt i16 %x, -43 97 %r = select i1 %c, i16 -1, i16 %a 98 ret i16 %r 99} 100 101define i32 @unsigned_sat_constant_i32_using_min(i32 %x) { 102; ANY-LABEL: unsigned_sat_constant_i32_using_min: 103; ANY: # %bb.0: 104; ANY-NEXT: cmpl $-43, %edi 105; ANY-NEXT: movl $-43, %eax 106; ANY-NEXT: cmovbl %edi, %eax 107; ANY-NEXT: addl $42, %eax 108; ANY-NEXT: retq 109 %c = icmp ult i32 %x, -43 110 %s = select i1 %c, i32 %x, i32 -43 111 %r = add i32 %s, 42 112 ret i32 %r 113} 114 115define i32 @unsigned_sat_constant_i32_using_cmp_sum(i32 %x) { 116; ANY-LABEL: unsigned_sat_constant_i32_using_cmp_sum: 117; ANY: # %bb.0: 118; ANY-NEXT: addl $42, %edi 119; ANY-NEXT: movl $-1, %eax 120; ANY-NEXT: cmovael %edi, %eax 121; ANY-NEXT: retq 122 %a = add i32 %x, 42 123 %c = icmp ugt i32 %x, %a 124 %r = select i1 %c, i32 -1, i32 %a 125 ret i32 %r 126} 127 128define i32 @unsigned_sat_constant_i32_using_cmp_notval(i32 %x) { 129; ANY-LABEL: unsigned_sat_constant_i32_using_cmp_notval: 130; ANY: # %bb.0: 131; ANY-NEXT: addl $42, %edi 132; ANY-NEXT: movl $-1, %eax 133; ANY-NEXT: cmovael %edi, %eax 134; ANY-NEXT: retq 135 %a = add i32 %x, 42 136 %c = icmp ugt i32 %x, -43 137 %r = select i1 %c, i32 -1, i32 %a 138 ret i32 %r 139} 140 141define i64 @unsigned_sat_constant_i64_using_min(i64 %x) { 142; ANY-LABEL: unsigned_sat_constant_i64_using_min: 143; ANY: # %bb.0: 144; ANY-NEXT: cmpq $-43, %rdi 145; ANY-NEXT: movq $-43, %rax 146; ANY-NEXT: cmovbq %rdi, %rax 147; ANY-NEXT: addq $42, %rax 148; ANY-NEXT: retq 149 %c = icmp ult i64 %x, -43 150 %s = select i1 %c, i64 %x, i64 -43 151 %r = add i64 %s, 42 152 ret i64 %r 153} 154 155define i64 @unsigned_sat_constant_i64_using_cmp_sum(i64 %x) { 156; ANY-LABEL: unsigned_sat_constant_i64_using_cmp_sum: 157; ANY: # %bb.0: 158; ANY-NEXT: addq $42, %rdi 159; ANY-NEXT: movq $-1, %rax 160; ANY-NEXT: cmovaeq %rdi, %rax 161; ANY-NEXT: retq 162 %a = add i64 %x, 42 163 %c = icmp ugt i64 %x, %a 164 %r = select i1 %c, i64 -1, i64 %a 165 ret i64 %r 166} 167 168define i64 @unsigned_sat_constant_i64_using_cmp_notval(i64 %x) { 169; ANY-LABEL: unsigned_sat_constant_i64_using_cmp_notval: 170; ANY: # %bb.0: 171; ANY-NEXT: addq $42, %rdi 172; ANY-NEXT: movq $-1, %rax 173; ANY-NEXT: cmovaeq %rdi, %rax 174; ANY-NEXT: retq 175 %a = add i64 %x, 42 176 %c = icmp ugt i64 %x, -43 177 %r = select i1 %c, i64 -1, i64 %a 178 ret i64 %r 179} 180 181define i8 @unsigned_sat_variable_i8_using_min(i8 %x, i8 %y) { 182; ANY-LABEL: unsigned_sat_variable_i8_using_min: 183; ANY: # %bb.0: 184; ANY-NEXT: movl %esi, %eax 185; ANY-NEXT: notb %al 186; ANY-NEXT: cmpb %al, %dil 187; ANY-NEXT: movzbl %al, %eax 188; ANY-NEXT: cmovbl %edi, %eax 189; ANY-NEXT: addb %sil, %al 190; ANY-NEXT: # kill: def $al killed $al killed $eax 191; ANY-NEXT: retq 192 %noty = xor i8 %y, -1 193 %c = icmp ult i8 %x, %noty 194 %s = select i1 %c, i8 %x, i8 %noty 195 %r = add i8 %s, %y 196 ret i8 %r 197} 198 199define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) { 200; ANY-LABEL: unsigned_sat_variable_i8_using_cmp_sum: 201; ANY: # %bb.0: 202; ANY-NEXT: addb %sil, %dil 203; ANY-NEXT: movzbl %dil, %ecx 204; ANY-NEXT: movl $255, %eax 205; ANY-NEXT: cmovael %ecx, %eax 206; ANY-NEXT: # kill: def $al killed $al killed $eax 207; ANY-NEXT: retq 208 %a = add i8 %x, %y 209 %c = icmp ugt i8 %x, %a 210 %r = select i1 %c, i8 -1, i8 %a 211 ret i8 %r 212} 213 214define i8 @unsigned_sat_variable_i8_using_cmp_notval(i8 %x, i8 %y) { 215; ANY-LABEL: unsigned_sat_variable_i8_using_cmp_notval: 216; ANY: # %bb.0: 217; ANY-NEXT: addb %dil, %sil 218; ANY-NEXT: movzbl %sil, %ecx 219; ANY-NEXT: movl $255, %eax 220; ANY-NEXT: cmovael %ecx, %eax 221; ANY-NEXT: # kill: def $al killed $al killed $eax 222; ANY-NEXT: retq 223 %noty = xor i8 %y, -1 224 %a = add i8 %x, %y 225 %c = icmp ugt i8 %x, %noty 226 %r = select i1 %c, i8 -1, i8 %a 227 ret i8 %r 228} 229 230define i16 @unsigned_sat_variable_i16_using_min(i16 %x, i16 %y) { 231; ANY-LABEL: unsigned_sat_variable_i16_using_min: 232; ANY: # %bb.0: 233; ANY-NEXT: movl %esi, %eax 234; ANY-NEXT: notl %eax 235; ANY-NEXT: cmpw %ax, %di 236; ANY-NEXT: cmovbl %edi, %eax 237; ANY-NEXT: addl %esi, %eax 238; ANY-NEXT: # kill: def $ax killed $ax killed $eax 239; ANY-NEXT: retq 240 %noty = xor i16 %y, -1 241 %c = icmp ult i16 %x, %noty 242 %s = select i1 %c, i16 %x, i16 %noty 243 %r = add i16 %s, %y 244 ret i16 %r 245} 246 247define i16 @unsigned_sat_variable_i16_using_cmp_sum(i16 %x, i16 %y) { 248; ANY-LABEL: unsigned_sat_variable_i16_using_cmp_sum: 249; ANY: # %bb.0: 250; ANY-NEXT: addw %si, %di 251; ANY-NEXT: movl $65535, %eax # imm = 0xFFFF 252; ANY-NEXT: cmovael %edi, %eax 253; ANY-NEXT: # kill: def $ax killed $ax killed $eax 254; ANY-NEXT: retq 255 %a = add i16 %x, %y 256 %c = icmp ugt i16 %x, %a 257 %r = select i1 %c, i16 -1, i16 %a 258 ret i16 %r 259} 260 261define i16 @unsigned_sat_variable_i16_using_cmp_notval(i16 %x, i16 %y) { 262; ANY-LABEL: unsigned_sat_variable_i16_using_cmp_notval: 263; ANY: # %bb.0: 264; ANY-NEXT: addw %di, %si 265; ANY-NEXT: movl $65535, %eax # imm = 0xFFFF 266; ANY-NEXT: cmovael %esi, %eax 267; ANY-NEXT: # kill: def $ax killed $ax killed $eax 268; ANY-NEXT: retq 269 %noty = xor i16 %y, -1 270 %a = add i16 %x, %y 271 %c = icmp ugt i16 %x, %noty 272 %r = select i1 %c, i16 -1, i16 %a 273 ret i16 %r 274} 275 276define i32 @unsigned_sat_variable_i32_using_min(i32 %x, i32 %y) { 277; ANY-LABEL: unsigned_sat_variable_i32_using_min: 278; ANY: # %bb.0: 279; ANY-NEXT: movl %esi, %eax 280; ANY-NEXT: notl %eax 281; ANY-NEXT: cmpl %eax, %edi 282; ANY-NEXT: cmovbl %edi, %eax 283; ANY-NEXT: addl %esi, %eax 284; ANY-NEXT: retq 285 %noty = xor i32 %y, -1 286 %c = icmp ult i32 %x, %noty 287 %s = select i1 %c, i32 %x, i32 %noty 288 %r = add i32 %s, %y 289 ret i32 %r 290} 291 292define i32 @unsigned_sat_variable_i32_using_cmp_sum(i32 %x, i32 %y) { 293; ANY-LABEL: unsigned_sat_variable_i32_using_cmp_sum: 294; ANY: # %bb.0: 295; ANY-NEXT: addl %esi, %edi 296; ANY-NEXT: movl $-1, %eax 297; ANY-NEXT: cmovael %edi, %eax 298; ANY-NEXT: retq 299 %a = add i32 %x, %y 300 %c = icmp ugt i32 %x, %a 301 %r = select i1 %c, i32 -1, i32 %a 302 ret i32 %r 303} 304 305define i32 @unsigned_sat_variable_i32_using_cmp_notval(i32 %x, i32 %y) { 306; ANY-LABEL: unsigned_sat_variable_i32_using_cmp_notval: 307; ANY: # %bb.0: 308; ANY-NEXT: addl %esi, %edi 309; ANY-NEXT: movl $-1, %eax 310; ANY-NEXT: cmovael %edi, %eax 311; ANY-NEXT: retq 312 %noty = xor i32 %y, -1 313 %a = add i32 %x, %y 314 %c = icmp ugt i32 %x, %noty 315 %r = select i1 %c, i32 -1, i32 %a 316 ret i32 %r 317} 318 319define i64 @unsigned_sat_variable_i64_using_min(i64 %x, i64 %y) { 320; ANY-LABEL: unsigned_sat_variable_i64_using_min: 321; ANY: # %bb.0: 322; ANY-NEXT: movq %rsi, %rax 323; ANY-NEXT: notq %rax 324; ANY-NEXT: cmpq %rax, %rdi 325; ANY-NEXT: cmovbq %rdi, %rax 326; ANY-NEXT: addq %rsi, %rax 327; ANY-NEXT: retq 328 %noty = xor i64 %y, -1 329 %c = icmp ult i64 %x, %noty 330 %s = select i1 %c, i64 %x, i64 %noty 331 %r = add i64 %s, %y 332 ret i64 %r 333} 334 335define i64 @unsigned_sat_variable_i64_using_cmp_sum(i64 %x, i64 %y) { 336; ANY-LABEL: unsigned_sat_variable_i64_using_cmp_sum: 337; ANY: # %bb.0: 338; ANY-NEXT: addq %rsi, %rdi 339; ANY-NEXT: movq $-1, %rax 340; ANY-NEXT: cmovaeq %rdi, %rax 341; ANY-NEXT: retq 342 %a = add i64 %x, %y 343 %c = icmp ugt i64 %x, %a 344 %r = select i1 %c, i64 -1, i64 %a 345 ret i64 %r 346} 347 348define i64 @unsigned_sat_variable_i64_using_cmp_notval(i64 %x, i64 %y) { 349; ANY-LABEL: unsigned_sat_variable_i64_using_cmp_notval: 350; ANY: # %bb.0: 351; ANY-NEXT: addq %rsi, %rdi 352; ANY-NEXT: movq $-1, %rax 353; ANY-NEXT: cmovaeq %rdi, %rax 354; ANY-NEXT: retq 355 %noty = xor i64 %y, -1 356 %a = add i64 %x, %y 357 %c = icmp ugt i64 %x, %noty 358 %r = select i1 %c, i64 -1, i64 %a 359 ret i64 %r 360} 361 362define <16 x i8> @unsigned_sat_constant_v16i8_using_min(<16 x i8> %x) { 363; SSE-LABEL: unsigned_sat_constant_v16i8_using_min: 364; SSE: # %bb.0: 365; SSE-NEXT: pminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 366; SSE-NEXT: paddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 367; SSE-NEXT: retq 368; 369; AVX-LABEL: unsigned_sat_constant_v16i8_using_min: 370; AVX: # %bb.0: 371; AVX-NEXT: vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 372; AVX-NEXT: vpaddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 373; AVX-NEXT: retq 374 %c = icmp ult <16 x i8> %x, <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43> 375 %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43> 376 %r = add <16 x i8> %s, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42> 377 ret <16 x i8> %r 378} 379 380define <16 x i8> @unsigned_sat_constant_v16i8_using_cmp_sum(<16 x i8> %x) { 381; SSE-LABEL: unsigned_sat_constant_v16i8_using_cmp_sum: 382; SSE: # %bb.0: 383; SSE-NEXT: paddusb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 384; SSE-NEXT: retq 385; 386; AVX-LABEL: unsigned_sat_constant_v16i8_using_cmp_sum: 387; AVX: # %bb.0: 388; AVX-NEXT: vpaddusb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 389; AVX-NEXT: retq 390 %a = add <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42> 391 %c = icmp ugt <16 x i8> %x, %a 392 %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a 393 ret <16 x i8> %r 394} 395 396define <16 x i8> @unsigned_sat_constant_v16i8_using_cmp_notval(<16 x i8> %x) { 397; SSE-LABEL: unsigned_sat_constant_v16i8_using_cmp_notval: 398; SSE: # %bb.0: 399; SSE-NEXT: paddusb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 400; SSE-NEXT: retq 401; 402; AVX-LABEL: unsigned_sat_constant_v16i8_using_cmp_notval: 403; AVX: # %bb.0: 404; AVX-NEXT: vpaddusb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 405; AVX-NEXT: retq 406 %a = add <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42> 407 %c = icmp ugt <16 x i8> %x, <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43> 408 %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a 409 ret <16 x i8> %r 410} 411 412define <8 x i16> @unsigned_sat_constant_v8i16_using_min(<8 x i16> %x) { 413; SSE2-LABEL: unsigned_sat_constant_v8i16_using_min: 414; SSE2: # %bb.0: 415; SSE2-NEXT: movdqa %xmm0, %xmm1 416; SSE2-NEXT: psubusw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 417; SSE2-NEXT: psubw %xmm1, %xmm0 418; SSE2-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 419; SSE2-NEXT: retq 420; 421; SSE4-LABEL: unsigned_sat_constant_v8i16_using_min: 422; SSE4: # %bb.0: 423; SSE4-NEXT: pminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 424; SSE4-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 425; SSE4-NEXT: retq 426; 427; AVX-LABEL: unsigned_sat_constant_v8i16_using_min: 428; AVX: # %bb.0: 429; AVX-NEXT: vpminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 430; AVX-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 431; AVX-NEXT: retq 432 %c = icmp ult <8 x i16> %x, <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43> 433 %s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43> 434 %r = add <8 x i16> %s, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42> 435 ret <8 x i16> %r 436} 437 438define <8 x i16> @unsigned_sat_constant_v8i16_using_cmp_sum(<8 x i16> %x) { 439; SSE-LABEL: unsigned_sat_constant_v8i16_using_cmp_sum: 440; SSE: # %bb.0: 441; SSE-NEXT: paddusw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 442; SSE-NEXT: retq 443; 444; AVX-LABEL: unsigned_sat_constant_v8i16_using_cmp_sum: 445; AVX: # %bb.0: 446; AVX-NEXT: vpaddusw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 447; AVX-NEXT: retq 448 %a = add <8 x i16> %x, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42> 449 %c = icmp ugt <8 x i16> %x, %a 450 %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a 451 ret <8 x i16> %r 452} 453 454define <8 x i16> @unsigned_sat_constant_v8i16_using_cmp_notval(<8 x i16> %x) { 455; SSE-LABEL: unsigned_sat_constant_v8i16_using_cmp_notval: 456; SSE: # %bb.0: 457; SSE-NEXT: paddusw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 458; SSE-NEXT: retq 459; 460; AVX-LABEL: unsigned_sat_constant_v8i16_using_cmp_notval: 461; AVX: # %bb.0: 462; AVX-NEXT: vpaddusw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 463; AVX-NEXT: retq 464 %a = add <8 x i16> %x, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42> 465 %c = icmp ugt <8 x i16> %x, <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43> 466 %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a 467 ret <8 x i16> %r 468} 469 470define <4 x i32> @unsigned_sat_constant_v4i32_using_min(<4 x i32> %x) { 471; SSE2-LABEL: unsigned_sat_constant_v4i32_using_min: 472; SSE2: # %bb.0: 473; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] 474; SSE2-NEXT: pxor %xmm0, %xmm1 475; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 476; SSE2-NEXT: movdqa %xmm1, %xmm2 477; SSE2-NEXT: pandn %xmm0, %xmm2 478; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 479; SSE2-NEXT: por %xmm2, %xmm1 480; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 481; SSE2-NEXT: movdqa %xmm1, %xmm0 482; SSE2-NEXT: retq 483; 484; SSE4-LABEL: unsigned_sat_constant_v4i32_using_min: 485; SSE4: # %bb.0: 486; SSE4-NEXT: pminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 487; SSE4-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 488; SSE4-NEXT: retq 489; 490; AVX2-LABEL: unsigned_sat_constant_v4i32_using_min: 491; AVX2: # %bb.0: 492; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967253,4294967253,4294967253,4294967253] 493; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 494; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [42,42,42,42] 495; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 496; AVX2-NEXT: retq 497; 498; AVX512-LABEL: unsigned_sat_constant_v4i32_using_min: 499; AVX512: # %bb.0: 500; AVX512-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 501; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 502; AVX512-NEXT: retq 503 %c = icmp ult <4 x i32> %x, <i32 -43, i32 -43, i32 -43, i32 -43> 504 %s = select <4 x i1> %c, <4 x i32> %x, <4 x i32> <i32 -43, i32 -43, i32 -43, i32 -43> 505 %r = add <4 x i32> %s, <i32 42, i32 42, i32 42, i32 42> 506 ret <4 x i32> %r 507} 508 509define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_sum(<4 x i32> %x) { 510; SSE2-LABEL: unsigned_sat_constant_v4i32_using_cmp_sum: 511; SSE2: # %bb.0: 512; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [42,42,42,42] 513; SSE2-NEXT: paddd %xmm0, %xmm1 514; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 515; SSE2-NEXT: pxor %xmm2, %xmm0 516; SSE2-NEXT: pxor %xmm1, %xmm2 517; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 518; SSE2-NEXT: por %xmm1, %xmm0 519; SSE2-NEXT: retq 520; 521; SSE4-LABEL: unsigned_sat_constant_v4i32_using_cmp_sum: 522; SSE4: # %bb.0: 523; SSE4-NEXT: pminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 524; SSE4-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 525; SSE4-NEXT: retq 526; 527; AVX2-LABEL: unsigned_sat_constant_v4i32_using_cmp_sum: 528; AVX2: # %bb.0: 529; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [42,42,42,42] 530; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [4294967253,4294967253,4294967253,4294967253] 531; AVX2-NEXT: vpminud %xmm2, %xmm0, %xmm0 532; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 533; AVX2-NEXT: retq 534; 535; AVX512-LABEL: unsigned_sat_constant_v4i32_using_cmp_sum: 536; AVX512: # %bb.0: 537; AVX512-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 538; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 539; AVX512-NEXT: retq 540 %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42> 541 %c = icmp ugt <4 x i32> %x, %a 542 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a 543 ret <4 x i32> %r 544} 545 546define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_notval(<4 x i32> %x) { 547; SSE2-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval: 548; SSE2: # %bb.0: 549; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [42,42,42,42] 550; SSE2-NEXT: paddd %xmm0, %xmm1 551; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 552; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 553; SSE2-NEXT: por %xmm1, %xmm0 554; SSE2-NEXT: retq 555; 556; SSE4-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval: 557; SSE4: # %bb.0: 558; SSE4-NEXT: pminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 559; SSE4-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 560; SSE4-NEXT: retq 561; 562; AVX2-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval: 563; AVX2: # %bb.0: 564; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [42,42,42,42] 565; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [4294967253,4294967253,4294967253,4294967253] 566; AVX2-NEXT: vpminud %xmm2, %xmm0, %xmm0 567; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 568; AVX2-NEXT: retq 569; 570; AVX512-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval: 571; AVX512: # %bb.0: 572; AVX512-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 573; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 574; AVX512-NEXT: retq 575 %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42> 576 %c = icmp ugt <4 x i32> %x, <i32 -43, i32 -43, i32 -43, i32 -43> 577 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a 578 ret <4 x i32> %r 579} 580 581define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_notval_nonsplat(<4 x i32> %x) { 582; SSE2-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval_nonsplat: 583; SSE2: # %bb.0: 584; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [43,44,45,46] 585; SSE2-NEXT: paddd %xmm0, %xmm1 586; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 587; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 588; SSE2-NEXT: por %xmm1, %xmm0 589; SSE2-NEXT: retq 590; 591; SSE4-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval_nonsplat: 592; SSE4: # %bb.0: 593; SSE4-NEXT: pminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 594; SSE4-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 595; SSE4-NEXT: retq 596; 597; AVX-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval_nonsplat: 598; AVX: # %bb.0: 599; AVX-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 600; AVX-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 601; AVX-NEXT: retq 602 %a = add <4 x i32> %x, <i32 43, i32 44, i32 45, i32 46> 603 %c = icmp ugt <4 x i32> %x, <i32 -44, i32 -45, i32 -46, i32 -47> 604 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a 605 ret <4 x i32> %r 606} 607 608define <2 x i64> @unsigned_sat_constant_v2i64_using_min(<2 x i64> %x) { 609; SSE2-LABEL: unsigned_sat_constant_v2i64_using_min: 610; SSE2: # %bb.0: 611; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456] 612; SSE2-NEXT: pxor %xmm0, %xmm1 613; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372034707292117,9223372034707292117] 614; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 615; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 616; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 617; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 618; SSE2-NEXT: pand %xmm3, %xmm1 619; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 620; SSE2-NEXT: por %xmm1, %xmm2 621; SSE2-NEXT: pand %xmm2, %xmm0 622; SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 623; SSE2-NEXT: por %xmm2, %xmm0 624; SSE2-NEXT: paddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 625; SSE2-NEXT: retq 626; 627; SSE41-LABEL: unsigned_sat_constant_v2i64_using_min: 628; SSE41: # %bb.0: 629; SSE41-NEXT: movdqa %xmm0, %xmm1 630; SSE41-NEXT: movapd {{.*#+}} xmm2 = [18446744073709551573,18446744073709551573] 631; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456] 632; SSE41-NEXT: pxor %xmm1, %xmm0 633; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372034707292117,9223372034707292117] 634; SSE41-NEXT: movdqa %xmm0, %xmm4 635; SSE41-NEXT: pcmpeqd %xmm3, %xmm4 636; SSE41-NEXT: pcmpgtd %xmm0, %xmm3 637; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 638; SSE41-NEXT: pand %xmm4, %xmm0 639; SSE41-NEXT: por %xmm3, %xmm0 640; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 641; SSE41-NEXT: paddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 642; SSE41-NEXT: movdqa %xmm2, %xmm0 643; SSE41-NEXT: retq 644; 645; SSE42-LABEL: unsigned_sat_constant_v2i64_using_min: 646; SSE42: # %bb.0: 647; SSE42-NEXT: movdqa %xmm0, %xmm1 648; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808] 649; SSE42-NEXT: pxor %xmm1, %xmm0 650; SSE42-NEXT: pcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 651; SSE42-NEXT: blendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 652; SSE42-NEXT: paddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 653; SSE42-NEXT: movdqa %xmm1, %xmm0 654; SSE42-NEXT: retq 655; 656; AVX2-LABEL: unsigned_sat_constant_v2i64_using_min: 657; AVX2: # %bb.0: 658; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 659; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 660; AVX2-NEXT: vblendvpd %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 661; AVX2-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 662; AVX2-NEXT: retq 663; 664; AVX512-LABEL: unsigned_sat_constant_v2i64_using_min: 665; AVX512: # %bb.0: 666; AVX512-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 667; AVX512-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 668; AVX512-NEXT: retq 669 %c = icmp ult <2 x i64> %x, <i64 -43, i64 -43> 670 %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> <i64 -43, i64 -43> 671 %r = add <2 x i64> %s, <i64 42, i64 42> 672 ret <2 x i64> %r 673} 674 675define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_sum(<2 x i64> %x) { 676; SSE2-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum: 677; SSE2: # %bb.0: 678; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [42,42] 679; SSE2-NEXT: paddq %xmm0, %xmm1 680; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456] 681; SSE2-NEXT: pxor %xmm2, %xmm0 682; SSE2-NEXT: pxor %xmm1, %xmm2 683; SSE2-NEXT: movdqa %xmm0, %xmm3 684; SSE2-NEXT: pcmpgtd %xmm2, %xmm3 685; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 686; SSE2-NEXT: pcmpeqd %xmm0, %xmm2 687; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 688; SSE2-NEXT: pand %xmm4, %xmm2 689; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3] 690; SSE2-NEXT: por %xmm1, %xmm0 691; SSE2-NEXT: por %xmm2, %xmm0 692; SSE2-NEXT: retq 693; 694; SSE41-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum: 695; SSE41: # %bb.0: 696; SSE41-NEXT: pmovsxbq {{.*#+}} xmm1 = [42,42] 697; SSE41-NEXT: paddq %xmm0, %xmm1 698; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456] 699; SSE41-NEXT: pxor %xmm2, %xmm0 700; SSE41-NEXT: pxor %xmm1, %xmm2 701; SSE41-NEXT: movdqa %xmm0, %xmm3 702; SSE41-NEXT: pcmpgtd %xmm2, %xmm3 703; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 704; SSE41-NEXT: pcmpeqd %xmm0, %xmm2 705; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 706; SSE41-NEXT: pand %xmm4, %xmm2 707; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3] 708; SSE41-NEXT: por %xmm1, %xmm0 709; SSE41-NEXT: por %xmm2, %xmm0 710; SSE41-NEXT: retq 711; 712; SSE42-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum: 713; SSE42: # %bb.0: 714; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] 715; SSE42-NEXT: movdqa %xmm0, %xmm2 716; SSE42-NEXT: pxor %xmm1, %xmm2 717; SSE42-NEXT: paddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 718; SSE42-NEXT: pxor %xmm0, %xmm1 719; SSE42-NEXT: pcmpgtq %xmm1, %xmm2 720; SSE42-NEXT: por %xmm2, %xmm0 721; SSE42-NEXT: retq 722; 723; AVX2-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum: 724; AVX2: # %bb.0: 725; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] 726; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm2 727; AVX2-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 728; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm1 729; AVX2-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 730; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 731; AVX2-NEXT: retq 732; 733; AVX512-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum: 734; AVX512: # %bb.0: 735; AVX512-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 736; AVX512-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 737; AVX512-NEXT: retq 738 %a = add <2 x i64> %x, <i64 42, i64 42> 739 %c = icmp ugt <2 x i64> %x, %a 740 %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a 741 ret <2 x i64> %r 742} 743 744define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_notval(<2 x i64> %x) { 745; SSE2-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval: 746; SSE2: # %bb.0: 747; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [42,42] 748; SSE2-NEXT: paddq %xmm0, %xmm1 749; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 750; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2] 751; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 752; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 753; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 754; SSE2-NEXT: pand %xmm2, %xmm0 755; SSE2-NEXT: por %xmm1, %xmm0 756; SSE2-NEXT: retq 757; 758; SSE41-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval: 759; SSE41: # %bb.0: 760; SSE41-NEXT: pmovsxbq {{.*#+}} xmm1 = [42,42] 761; SSE41-NEXT: paddq %xmm0, %xmm1 762; SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 763; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2] 764; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 765; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 766; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 767; SSE41-NEXT: pand %xmm2, %xmm0 768; SSE41-NEXT: por %xmm1, %xmm0 769; SSE41-NEXT: retq 770; 771; SSE42-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval: 772; SSE42: # %bb.0: 773; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] 774; SSE42-NEXT: movdqa %xmm0, %xmm2 775; SSE42-NEXT: pxor %xmm1, %xmm2 776; SSE42-NEXT: paddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 777; SSE42-NEXT: pxor %xmm0, %xmm1 778; SSE42-NEXT: pcmpgtq %xmm1, %xmm2 779; SSE42-NEXT: por %xmm2, %xmm0 780; SSE42-NEXT: retq 781; 782; AVX2-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval: 783; AVX2: # %bb.0: 784; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] 785; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm2 786; AVX2-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 787; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm1 788; AVX2-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 789; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 790; AVX2-NEXT: retq 791; 792; AVX512-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval: 793; AVX512: # %bb.0: 794; AVX512-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 795; AVX512-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 796; AVX512-NEXT: retq 797 %a = add <2 x i64> %x, <i64 42, i64 42> 798 %c = icmp ugt <2 x i64> %x, <i64 -43, i64 -43> 799 %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a 800 ret <2 x i64> %r 801} 802 803define <16 x i8> @unsigned_sat_variable_v16i8_using_min(<16 x i8> %x, <16 x i8> %y) { 804; SSE-LABEL: unsigned_sat_variable_v16i8_using_min: 805; SSE: # %bb.0: 806; SSE-NEXT: pcmpeqd %xmm2, %xmm2 807; SSE-NEXT: pxor %xmm1, %xmm2 808; SSE-NEXT: pminub %xmm2, %xmm0 809; SSE-NEXT: paddb %xmm1, %xmm0 810; SSE-NEXT: retq 811; 812; AVX2-LABEL: unsigned_sat_variable_v16i8_using_min: 813; AVX2: # %bb.0: 814; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 815; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 816; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 817; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm0 818; AVX2-NEXT: retq 819; 820; AVX512-LABEL: unsigned_sat_variable_v16i8_using_min: 821; AVX512: # %bb.0: 822; AVX512-NEXT: vmovdqa %xmm1, %xmm2 823; AVX512-NEXT: vpternlogq {{.*#+}} xmm2 = ~xmm2 824; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0 825; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0 826; AVX512-NEXT: retq 827 %noty = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 828 %c = icmp ult <16 x i8> %x, %noty 829 %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %noty 830 %r = add <16 x i8> %s, %y 831 ret <16 x i8> %r 832} 833 834define <16 x i8> @unsigned_sat_variable_v16i8_using_cmp_sum(<16 x i8> %x, <16 x i8> %y) { 835; SSE-LABEL: unsigned_sat_variable_v16i8_using_cmp_sum: 836; SSE: # %bb.0: 837; SSE-NEXT: paddusb %xmm1, %xmm0 838; SSE-NEXT: retq 839; 840; AVX-LABEL: unsigned_sat_variable_v16i8_using_cmp_sum: 841; AVX: # %bb.0: 842; AVX-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 843; AVX-NEXT: retq 844 %a = add <16 x i8> %x, %y 845 %c = icmp ugt <16 x i8> %x, %a 846 %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a 847 ret <16 x i8> %r 848} 849 850define <16 x i8> @unsigned_sat_variable_v16i8_using_cmp_notval(<16 x i8> %x, <16 x i8> %y) { 851; SSE-LABEL: unsigned_sat_variable_v16i8_using_cmp_notval: 852; SSE: # %bb.0: 853; SSE-NEXT: pcmpeqd %xmm2, %xmm2 854; SSE-NEXT: movdqa %xmm0, %xmm3 855; SSE-NEXT: paddb %xmm1, %xmm3 856; SSE-NEXT: pxor %xmm2, %xmm1 857; SSE-NEXT: pminub %xmm0, %xmm1 858; SSE-NEXT: pcmpeqb %xmm1, %xmm0 859; SSE-NEXT: pxor %xmm2, %xmm0 860; SSE-NEXT: por %xmm3, %xmm0 861; SSE-NEXT: retq 862; 863; AVX2-LABEL: unsigned_sat_variable_v16i8_using_cmp_notval: 864; AVX2: # %bb.0: 865; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 866; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3 867; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm1 868; AVX2-NEXT: vpminub %xmm3, %xmm0, %xmm3 869; AVX2-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 870; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 871; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 872; AVX2-NEXT: retq 873; 874; AVX512-LABEL: unsigned_sat_variable_v16i8_using_cmp_notval: 875; AVX512: # %bb.0: 876; AVX512-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 877; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm3 878; AVX512-NEXT: vpternlogq {{.*#+}} xmm1 = ~xmm1 879; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm1 880; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 881; AVX512-NEXT: vpternlogq {{.*#+}} xmm0 = xmm3 | (xmm0 ^ xmm2) 882; AVX512-NEXT: retq 883 %noty = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 884 %a = add <16 x i8> %x, %y 885 %c = icmp ugt <16 x i8> %x, %noty 886 %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a 887 ret <16 x i8> %r 888} 889 890define <8 x i16> @unsigned_sat_variable_v8i16_using_min(<8 x i16> %x, <8 x i16> %y) { 891; SSE2-LABEL: unsigned_sat_variable_v8i16_using_min: 892; SSE2: # %bb.0: 893; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 894; SSE2-NEXT: pxor %xmm1, %xmm2 895; SSE2-NEXT: movdqa %xmm0, %xmm3 896; SSE2-NEXT: psubusw %xmm2, %xmm3 897; SSE2-NEXT: psubw %xmm3, %xmm0 898; SSE2-NEXT: paddw %xmm1, %xmm0 899; SSE2-NEXT: retq 900; 901; SSE4-LABEL: unsigned_sat_variable_v8i16_using_min: 902; SSE4: # %bb.0: 903; SSE4-NEXT: pcmpeqd %xmm2, %xmm2 904; SSE4-NEXT: pxor %xmm1, %xmm2 905; SSE4-NEXT: pminuw %xmm2, %xmm0 906; SSE4-NEXT: paddw %xmm1, %xmm0 907; SSE4-NEXT: retq 908; 909; AVX2-LABEL: unsigned_sat_variable_v8i16_using_min: 910; AVX2: # %bb.0: 911; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 912; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 913; AVX2-NEXT: vpminuw %xmm2, %xmm0, %xmm0 914; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 915; AVX2-NEXT: retq 916; 917; AVX512-LABEL: unsigned_sat_variable_v8i16_using_min: 918; AVX512: # %bb.0: 919; AVX512-NEXT: vmovdqa %xmm1, %xmm2 920; AVX512-NEXT: vpternlogq {{.*#+}} xmm2 = ~xmm2 921; AVX512-NEXT: vpminuw %xmm2, %xmm0, %xmm0 922; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0 923; AVX512-NEXT: retq 924 %noty = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 925 %c = icmp ult <8 x i16> %x, %noty 926 %s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %noty 927 %r = add <8 x i16> %s, %y 928 ret <8 x i16> %r 929} 930 931define <8 x i16> @unsigned_sat_variable_v8i16_using_cmp_sum(<8 x i16> %x, <8 x i16> %y) { 932; SSE-LABEL: unsigned_sat_variable_v8i16_using_cmp_sum: 933; SSE: # %bb.0: 934; SSE-NEXT: paddusw %xmm1, %xmm0 935; SSE-NEXT: retq 936; 937; AVX-LABEL: unsigned_sat_variable_v8i16_using_cmp_sum: 938; AVX: # %bb.0: 939; AVX-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 940; AVX-NEXT: retq 941 %a = add <8 x i16> %x, %y 942 %c = icmp ugt <8 x i16> %x, %a 943 %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a 944 ret <8 x i16> %r 945} 946 947define <8 x i16> @unsigned_sat_variable_v8i16_using_cmp_notval(<8 x i16> %x, <8 x i16> %y) { 948; SSE2-LABEL: unsigned_sat_variable_v8i16_using_cmp_notval: 949; SSE2: # %bb.0: 950; SSE2-NEXT: movdqa %xmm0, %xmm2 951; SSE2-NEXT: paddw %xmm1, %xmm2 952; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 953; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 954; SSE2-NEXT: pcmpgtw %xmm1, %xmm0 955; SSE2-NEXT: por %xmm2, %xmm0 956; SSE2-NEXT: retq 957; 958; SSE4-LABEL: unsigned_sat_variable_v8i16_using_cmp_notval: 959; SSE4: # %bb.0: 960; SSE4-NEXT: pcmpeqd %xmm2, %xmm2 961; SSE4-NEXT: movdqa %xmm0, %xmm3 962; SSE4-NEXT: paddw %xmm1, %xmm3 963; SSE4-NEXT: pxor %xmm2, %xmm1 964; SSE4-NEXT: pminuw %xmm0, %xmm1 965; SSE4-NEXT: pcmpeqw %xmm1, %xmm0 966; SSE4-NEXT: pxor %xmm2, %xmm0 967; SSE4-NEXT: por %xmm3, %xmm0 968; SSE4-NEXT: retq 969; 970; AVX2-LABEL: unsigned_sat_variable_v8i16_using_cmp_notval: 971; AVX2: # %bb.0: 972; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 973; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3 974; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm1 975; AVX2-NEXT: vpminuw %xmm3, %xmm0, %xmm3 976; AVX2-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0 977; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 978; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 979; AVX2-NEXT: retq 980; 981; AVX512-LABEL: unsigned_sat_variable_v8i16_using_cmp_notval: 982; AVX512: # %bb.0: 983; AVX512-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 984; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm3 985; AVX512-NEXT: vpternlogq {{.*#+}} xmm1 = ~xmm1 986; AVX512-NEXT: vpminuw %xmm1, %xmm0, %xmm1 987; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 988; AVX512-NEXT: vpternlogq {{.*#+}} xmm0 = xmm3 | (xmm0 ^ xmm2) 989; AVX512-NEXT: retq 990 %noty = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 991 %a = add <8 x i16> %x, %y 992 %c = icmp ugt <8 x i16> %x, %noty 993 %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a 994 ret <8 x i16> %r 995} 996 997define <4 x i32> @unsigned_sat_variable_v4i32_using_min(<4 x i32> %x, <4 x i32> %y) { 998; SSE2-LABEL: unsigned_sat_variable_v4i32_using_min: 999; SSE2: # %bb.0: 1000; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 1001; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648] 1002; SSE2-NEXT: pxor %xmm0, %xmm3 1003; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483647,2147483647,2147483647,2147483647] 1004; SSE2-NEXT: pxor %xmm1, %xmm4 1005; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 1006; SSE2-NEXT: pand %xmm4, %xmm0 1007; SSE2-NEXT: por %xmm1, %xmm4 1008; SSE2-NEXT: pxor %xmm2, %xmm4 1009; SSE2-NEXT: por %xmm4, %xmm0 1010; SSE2-NEXT: paddd %xmm1, %xmm0 1011; SSE2-NEXT: retq 1012; 1013; SSE4-LABEL: unsigned_sat_variable_v4i32_using_min: 1014; SSE4: # %bb.0: 1015; SSE4-NEXT: pcmpeqd %xmm2, %xmm2 1016; SSE4-NEXT: pxor %xmm1, %xmm2 1017; SSE4-NEXT: pminud %xmm2, %xmm0 1018; SSE4-NEXT: paddd %xmm1, %xmm0 1019; SSE4-NEXT: retq 1020; 1021; AVX2-LABEL: unsigned_sat_variable_v4i32_using_min: 1022; AVX2: # %bb.0: 1023; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1024; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 1025; AVX2-NEXT: vpminud %xmm2, %xmm0, %xmm0 1026; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 1027; AVX2-NEXT: retq 1028; 1029; AVX512-LABEL: unsigned_sat_variable_v4i32_using_min: 1030; AVX512: # %bb.0: 1031; AVX512-NEXT: vmovdqa %xmm1, %xmm2 1032; AVX512-NEXT: vpternlogq {{.*#+}} xmm2 = ~xmm2 1033; AVX512-NEXT: vpminud %xmm2, %xmm0, %xmm0 1034; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 1035; AVX512-NEXT: retq 1036 %noty = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> 1037 %c = icmp ult <4 x i32> %x, %noty 1038 %s = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %noty 1039 %r = add <4 x i32> %s, %y 1040 ret <4 x i32> %r 1041} 1042 1043define <4 x i32> @unsigned_sat_variable_v4i32_using_cmp_sum(<4 x i32> %x, <4 x i32> %y) { 1044; SSE2-LABEL: unsigned_sat_variable_v4i32_using_cmp_sum: 1045; SSE2: # %bb.0: 1046; SSE2-NEXT: paddd %xmm0, %xmm1 1047; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 1048; SSE2-NEXT: pxor %xmm2, %xmm0 1049; SSE2-NEXT: pxor %xmm1, %xmm2 1050; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 1051; SSE2-NEXT: por %xmm1, %xmm0 1052; SSE2-NEXT: retq 1053; 1054; SSE4-LABEL: unsigned_sat_variable_v4i32_using_cmp_sum: 1055; SSE4: # %bb.0: 1056; SSE4-NEXT: pcmpeqd %xmm2, %xmm2 1057; SSE4-NEXT: pxor %xmm1, %xmm2 1058; SSE4-NEXT: pminud %xmm2, %xmm0 1059; SSE4-NEXT: paddd %xmm1, %xmm0 1060; SSE4-NEXT: retq 1061; 1062; AVX2-LABEL: unsigned_sat_variable_v4i32_using_cmp_sum: 1063; AVX2: # %bb.0: 1064; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1065; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 1066; AVX2-NEXT: vpminud %xmm2, %xmm0, %xmm0 1067; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 1068; AVX2-NEXT: retq 1069; 1070; AVX512-LABEL: unsigned_sat_variable_v4i32_using_cmp_sum: 1071; AVX512: # %bb.0: 1072; AVX512-NEXT: vmovdqa %xmm1, %xmm2 1073; AVX512-NEXT: vpternlogq {{.*#+}} xmm2 = ~xmm2 1074; AVX512-NEXT: vpminud %xmm2, %xmm0, %xmm0 1075; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 1076; AVX512-NEXT: retq 1077 %a = add <4 x i32> %x, %y 1078 %c = icmp ugt <4 x i32> %x, %a 1079 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a 1080 ret <4 x i32> %r 1081} 1082 1083define <4 x i32> @unsigned_sat_variable_v4i32_using_cmp_notval(<4 x i32> %x, <4 x i32> %y) { 1084; SSE2-LABEL: unsigned_sat_variable_v4i32_using_cmp_notval: 1085; SSE2: # %bb.0: 1086; SSE2-NEXT: movdqa %xmm0, %xmm2 1087; SSE2-NEXT: paddd %xmm1, %xmm2 1088; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1089; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1090; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 1091; SSE2-NEXT: por %xmm2, %xmm0 1092; SSE2-NEXT: retq 1093; 1094; SSE4-LABEL: unsigned_sat_variable_v4i32_using_cmp_notval: 1095; SSE4: # %bb.0: 1096; SSE4-NEXT: pcmpeqd %xmm2, %xmm2 1097; SSE4-NEXT: movdqa %xmm0, %xmm3 1098; SSE4-NEXT: paddd %xmm1, %xmm3 1099; SSE4-NEXT: pxor %xmm2, %xmm1 1100; SSE4-NEXT: pminud %xmm0, %xmm1 1101; SSE4-NEXT: pcmpeqd %xmm1, %xmm0 1102; SSE4-NEXT: pxor %xmm2, %xmm0 1103; SSE4-NEXT: por %xmm3, %xmm0 1104; SSE4-NEXT: retq 1105; 1106; AVX2-LABEL: unsigned_sat_variable_v4i32_using_cmp_notval: 1107; AVX2: # %bb.0: 1108; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1109; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3 1110; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm1 1111; AVX2-NEXT: vpminud %xmm3, %xmm0, %xmm3 1112; AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 1113; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 1114; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 1115; AVX2-NEXT: retq 1116; 1117; AVX512-LABEL: unsigned_sat_variable_v4i32_using_cmp_notval: 1118; AVX512: # %bb.0: 1119; AVX512-NEXT: vmovdqa %xmm1, %xmm3 1120; AVX512-NEXT: vpternlogq {{.*#+}} xmm3 = ~xmm3 1121; AVX512-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1122; AVX512-NEXT: vpcmpleud %xmm3, %xmm0, %k1 1123; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm2 {%k1} 1124; AVX512-NEXT: vmovdqa %xmm2, %xmm0 1125; AVX512-NEXT: retq 1126 %noty = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> 1127 %a = add <4 x i32> %x, %y 1128 %c = icmp ugt <4 x i32> %x, %noty 1129 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a 1130 ret <4 x i32> %r 1131} 1132 1133define <2 x i64> @unsigned_sat_variable_v2i64_using_min(<2 x i64> %x, <2 x i64> %y) { 1134; SSE2-LABEL: unsigned_sat_variable_v2i64_using_min: 1135; SSE2: # %bb.0: 1136; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 1137; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456] 1138; SSE2-NEXT: pxor %xmm0, %xmm3 1139; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [9223372034707292159,9223372034707292159] 1140; SSE2-NEXT: pxor %xmm1, %xmm4 1141; SSE2-NEXT: movdqa %xmm4, %xmm5 1142; SSE2-NEXT: pcmpgtd %xmm3, %xmm5 1143; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 1144; SSE2-NEXT: pcmpeqd %xmm3, %xmm4 1145; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 1146; SSE2-NEXT: pand %xmm6, %xmm3 1147; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] 1148; SSE2-NEXT: por %xmm3, %xmm4 1149; SSE2-NEXT: pand %xmm4, %xmm0 1150; SSE2-NEXT: por %xmm1, %xmm4 1151; SSE2-NEXT: pxor %xmm2, %xmm4 1152; SSE2-NEXT: por %xmm4, %xmm0 1153; SSE2-NEXT: paddq %xmm1, %xmm0 1154; SSE2-NEXT: retq 1155; 1156; SSE41-LABEL: unsigned_sat_variable_v2i64_using_min: 1157; SSE41: # %bb.0: 1158; SSE41-NEXT: movdqa %xmm0, %xmm2 1159; SSE41-NEXT: pcmpeqd %xmm3, %xmm3 1160; SSE41-NEXT: pxor %xmm1, %xmm3 1161; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456] 1162; SSE41-NEXT: pxor %xmm2, %xmm0 1163; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [9223372034707292159,9223372034707292159] 1164; SSE41-NEXT: pxor %xmm1, %xmm4 1165; SSE41-NEXT: movdqa %xmm4, %xmm5 1166; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 1167; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 1168; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] 1169; SSE41-NEXT: pand %xmm5, %xmm0 1170; SSE41-NEXT: por %xmm4, %xmm0 1171; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm3 1172; SSE41-NEXT: paddq %xmm1, %xmm3 1173; SSE41-NEXT: movdqa %xmm3, %xmm0 1174; SSE41-NEXT: retq 1175; 1176; SSE42-LABEL: unsigned_sat_variable_v2i64_using_min: 1177; SSE42: # %bb.0: 1178; SSE42-NEXT: movdqa %xmm0, %xmm2 1179; SSE42-NEXT: pcmpeqd %xmm3, %xmm3 1180; SSE42-NEXT: pxor %xmm1, %xmm3 1181; SSE42-NEXT: movdqa {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808] 1182; SSE42-NEXT: pxor %xmm0, %xmm4 1183; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775807,9223372036854775807] 1184; SSE42-NEXT: pxor %xmm1, %xmm0 1185; SSE42-NEXT: pcmpgtq %xmm4, %xmm0 1186; SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3 1187; SSE42-NEXT: paddq %xmm1, %xmm3 1188; SSE42-NEXT: movdqa %xmm3, %xmm0 1189; SSE42-NEXT: retq 1190; 1191; AVX2-LABEL: unsigned_sat_variable_v2i64_using_min: 1192; AVX2: # %bb.0: 1193; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1194; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 1195; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3 1196; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm4 1197; AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 1198; AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm2, %xmm0 1199; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 1200; AVX2-NEXT: retq 1201; 1202; AVX512-LABEL: unsigned_sat_variable_v2i64_using_min: 1203; AVX512: # %bb.0: 1204; AVX512-NEXT: vmovdqa %xmm1, %xmm2 1205; AVX512-NEXT: vpternlogq {{.*#+}} xmm2 = ~xmm2 1206; AVX512-NEXT: vpminuq %xmm2, %xmm0, %xmm0 1207; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0 1208; AVX512-NEXT: retq 1209 %noty = xor <2 x i64> %y, <i64 -1, i64 -1> 1210 %c = icmp ult <2 x i64> %x, %noty 1211 %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> %noty 1212 %r = add <2 x i64> %s, %y 1213 ret <2 x i64> %r 1214} 1215 1216define <2 x i64> @unsigned_sat_variable_v2i64_using_cmp_sum(<2 x i64> %x, <2 x i64> %y) { 1217; SSE2-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum: 1218; SSE2: # %bb.0: 1219; SSE2-NEXT: paddq %xmm0, %xmm1 1220; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456] 1221; SSE2-NEXT: pxor %xmm2, %xmm0 1222; SSE2-NEXT: pxor %xmm1, %xmm2 1223; SSE2-NEXT: movdqa %xmm0, %xmm3 1224; SSE2-NEXT: pcmpgtd %xmm2, %xmm3 1225; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 1226; SSE2-NEXT: pcmpeqd %xmm0, %xmm2 1227; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1228; SSE2-NEXT: pand %xmm4, %xmm2 1229; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3] 1230; SSE2-NEXT: por %xmm1, %xmm0 1231; SSE2-NEXT: por %xmm2, %xmm0 1232; SSE2-NEXT: retq 1233; 1234; SSE41-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum: 1235; SSE41: # %bb.0: 1236; SSE41-NEXT: paddq %xmm0, %xmm1 1237; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456] 1238; SSE41-NEXT: pxor %xmm2, %xmm0 1239; SSE41-NEXT: pxor %xmm1, %xmm2 1240; SSE41-NEXT: movdqa %xmm0, %xmm3 1241; SSE41-NEXT: pcmpgtd %xmm2, %xmm3 1242; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 1243; SSE41-NEXT: pcmpeqd %xmm0, %xmm2 1244; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1245; SSE41-NEXT: pand %xmm4, %xmm2 1246; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3] 1247; SSE41-NEXT: por %xmm1, %xmm0 1248; SSE41-NEXT: por %xmm2, %xmm0 1249; SSE41-NEXT: retq 1250; 1251; SSE42-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum: 1252; SSE42: # %bb.0: 1253; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 1254; SSE42-NEXT: movdqa %xmm0, %xmm3 1255; SSE42-NEXT: pxor %xmm2, %xmm3 1256; SSE42-NEXT: paddq %xmm1, %xmm0 1257; SSE42-NEXT: pxor %xmm0, %xmm2 1258; SSE42-NEXT: pcmpgtq %xmm2, %xmm3 1259; SSE42-NEXT: por %xmm3, %xmm0 1260; SSE42-NEXT: retq 1261; 1262; AVX2-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum: 1263; AVX2: # %bb.0: 1264; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 1265; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 1266; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 1267; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm1 1268; AVX2-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1 1269; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 1270; AVX2-NEXT: retq 1271; 1272; AVX512-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum: 1273; AVX512: # %bb.0: 1274; AVX512-NEXT: vmovdqa %xmm1, %xmm2 1275; AVX512-NEXT: vpternlogq {{.*#+}} xmm2 = ~xmm2 1276; AVX512-NEXT: vpminuq %xmm2, %xmm0, %xmm0 1277; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0 1278; AVX512-NEXT: retq 1279 %a = add <2 x i64> %x, %y 1280 %c = icmp ugt <2 x i64> %x, %a 1281 %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a 1282 ret <2 x i64> %r 1283} 1284 1285define <2 x i64> @unsigned_sat_variable_v2i64_using_cmp_notval(<2 x i64> %x, <2 x i64> %y) { 1286; SSE2-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval: 1287; SSE2: # %bb.0: 1288; SSE2-NEXT: movdqa %xmm0, %xmm2 1289; SSE2-NEXT: paddq %xmm1, %xmm2 1290; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1291; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1292; SSE2-NEXT: movdqa %xmm0, %xmm3 1293; SSE2-NEXT: pcmpgtd %xmm1, %xmm3 1294; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 1295; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 1296; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1297; SSE2-NEXT: pand %xmm4, %xmm1 1298; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3] 1299; SSE2-NEXT: por %xmm2, %xmm0 1300; SSE2-NEXT: por %xmm1, %xmm0 1301; SSE2-NEXT: retq 1302; 1303; SSE41-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval: 1304; SSE41: # %bb.0: 1305; SSE41-NEXT: movdqa %xmm0, %xmm2 1306; SSE41-NEXT: paddq %xmm1, %xmm2 1307; SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1308; SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1309; SSE41-NEXT: movdqa %xmm0, %xmm3 1310; SSE41-NEXT: pcmpgtd %xmm1, %xmm3 1311; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 1312; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1313; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1314; SSE41-NEXT: pand %xmm4, %xmm1 1315; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3] 1316; SSE41-NEXT: por %xmm2, %xmm0 1317; SSE41-NEXT: por %xmm1, %xmm0 1318; SSE41-NEXT: retq 1319; 1320; SSE42-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval: 1321; SSE42: # %bb.0: 1322; SSE42-NEXT: movdqa %xmm0, %xmm2 1323; SSE42-NEXT: paddq %xmm1, %xmm2 1324; SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1325; SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1326; SSE42-NEXT: pcmpgtq %xmm1, %xmm0 1327; SSE42-NEXT: por %xmm2, %xmm0 1328; SSE42-NEXT: retq 1329; 1330; AVX2-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval: 1331; AVX2: # %bb.0: 1332; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm2 1333; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 1334; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1335; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 1336; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0 1337; AVX2-NEXT: retq 1338; 1339; AVX512-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval: 1340; AVX512: # %bb.0: 1341; AVX512-NEXT: vmovdqa %xmm1, %xmm3 1342; AVX512-NEXT: vpternlogq {{.*#+}} xmm3 = ~xmm3 1343; AVX512-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1344; AVX512-NEXT: vpcmpleuq %xmm3, %xmm0, %k1 1345; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm2 {%k1} 1346; AVX512-NEXT: vmovdqa %xmm2, %xmm0 1347; AVX512-NEXT: retq 1348 %noty = xor <2 x i64> %y, <i64 -1, i64 -1> 1349 %a = add <2 x i64> %x, %y 1350 %c = icmp ugt <2 x i64> %x, %noty 1351 %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a 1352 ret <2 x i64> %r 1353} 1354 1355