1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefixes=X64,SSE,SSE4 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64,SSE,SSE2 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=X64,AVX,AVX2 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=X64,AVX,AVX512 6; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86 7 8define i8 @ucmp.8.8(i8 %x, i8 %y) nounwind { 9; X64-LABEL: ucmp.8.8: 10; X64: # %bb.0: 11; X64-NEXT: cmpb %sil, %dil 12; X64-NEXT: seta %al 13; X64-NEXT: sbbb $0, %al 14; X64-NEXT: retq 15; 16; X86-LABEL: ucmp.8.8: 17; X86: # %bb.0: 18; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 19; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al 20; X86-NEXT: seta %al 21; X86-NEXT: sbbb $0, %al 22; X86-NEXT: retl 23 %1 = call i8 @llvm.ucmp(i8 %x, i8 %y) 24 ret i8 %1 25} 26 27define i8 @ucmp.8.16(i16 %x, i16 %y) nounwind { 28; X64-LABEL: ucmp.8.16: 29; X64: # %bb.0: 30; X64-NEXT: cmpw %si, %di 31; X64-NEXT: seta %al 32; X64-NEXT: sbbb $0, %al 33; X64-NEXT: retq 34; 35; X86-LABEL: ucmp.8.16: 36; X86: # %bb.0: 37; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 38; X86-NEXT: cmpw {{[0-9]+}}(%esp), %ax 39; X86-NEXT: seta %al 40; X86-NEXT: sbbb $0, %al 41; X86-NEXT: retl 42 %1 = call i8 @llvm.ucmp(i16 %x, i16 %y) 43 ret i8 %1 44} 45 46define i8 @ucmp.8.32(i32 %x, i32 %y) nounwind { 47; X64-LABEL: ucmp.8.32: 48; X64: # %bb.0: 49; X64-NEXT: cmpl %esi, %edi 50; X64-NEXT: seta %al 51; X64-NEXT: sbbb $0, %al 52; X64-NEXT: retq 53; 54; X86-LABEL: ucmp.8.32: 55; X86: # %bb.0: 56; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 57; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax 58; X86-NEXT: seta %al 59; X86-NEXT: sbbb $0, %al 60; X86-NEXT: retl 61 %1 = call i8 @llvm.ucmp(i32 %x, i32 %y) 62 ret i8 %1 63} 64 65define i8 @ucmp.8.64(i64 %x, i64 %y) nounwind { 66; X64-LABEL: ucmp.8.64: 67; X64: # %bb.0: 68; X64-NEXT: cmpq %rsi, %rdi 69; X64-NEXT: seta %al 70; X64-NEXT: sbbb $0, %al 71; X64-NEXT: retq 72; 73; X86-LABEL: ucmp.8.64: 74; X86: # %bb.0: 75; X86-NEXT: pushl %edi 76; X86-NEXT: pushl %esi 77; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 78; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 79; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 80; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 81; X86-NEXT: cmpl %ecx, %esi 82; X86-NEXT: movl %edi, %eax 83; X86-NEXT: sbbl %edx, %eax 84; X86-NEXT: setb %al 85; X86-NEXT: cmpl %esi, %ecx 86; X86-NEXT: sbbl %edi, %edx 87; X86-NEXT: sbbb $0, %al 88; X86-NEXT: popl %esi 89; X86-NEXT: popl %edi 90; X86-NEXT: retl 91 %1 = call i8 @llvm.ucmp(i64 %x, i64 %y) 92 ret i8 %1 93} 94 95define i8 @ucmp.8.128(i128 %x, i128 %y) nounwind { 96; X64-LABEL: ucmp.8.128: 97; X64: # %bb.0: 98; X64-NEXT: cmpq %rdi, %rdx 99; X64-NEXT: movq %rcx, %rax 100; X64-NEXT: sbbq %rsi, %rax 101; X64-NEXT: setb %al 102; X64-NEXT: cmpq %rdx, %rdi 103; X64-NEXT: sbbq %rcx, %rsi 104; X64-NEXT: sbbb $0, %al 105; X64-NEXT: retq 106; 107; X86-LABEL: ucmp.8.128: 108; X86: # %bb.0: 109; X86-NEXT: pushl %ebp 110; X86-NEXT: pushl %ebx 111; X86-NEXT: pushl %edi 112; X86-NEXT: pushl %esi 113; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 114; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 115; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 116; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx 117; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp 118; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ebx 119; X86-NEXT: movl %ebp, %eax 120; X86-NEXT: sbbl %esi, %eax 121; X86-NEXT: movl %ecx, %eax 122; X86-NEXT: sbbl %edx, %eax 123; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 124; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 125; X86-NEXT: movl %edi, %eax 126; X86-NEXT: sbbl %ecx, %eax 127; X86-NEXT: setb %al 128; X86-NEXT: cmpl %ebx, {{[0-9]+}}(%esp) 129; X86-NEXT: sbbl %ebp, %esi 130; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx 131; X86-NEXT: sbbl %edi, %ecx 132; X86-NEXT: sbbb $0, %al 133; X86-NEXT: popl %esi 134; X86-NEXT: popl %edi 135; X86-NEXT: popl %ebx 136; X86-NEXT: popl %ebp 137; X86-NEXT: retl 138 %1 = call i8 @llvm.ucmp(i128 %x, i128 %y) 139 ret i8 %1 140} 141 142define i32 @ucmp.32.32(i32 %x, i32 %y) nounwind { 143; X64-LABEL: ucmp.32.32: 144; X64: # %bb.0: 145; X64-NEXT: cmpl %esi, %edi 146; X64-NEXT: seta %al 147; X64-NEXT: sbbb $0, %al 148; X64-NEXT: movsbl %al, %eax 149; X64-NEXT: retq 150; 151; X86-LABEL: ucmp.32.32: 152; X86: # %bb.0: 153; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 154; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax 155; X86-NEXT: seta %al 156; X86-NEXT: sbbb $0, %al 157; X86-NEXT: movsbl %al, %eax 158; X86-NEXT: retl 159 %1 = call i32 @llvm.ucmp(i32 %x, i32 %y) 160 ret i32 %1 161} 162 163define i32 @ucmp.32.64(i64 %x, i64 %y) nounwind { 164; X64-LABEL: ucmp.32.64: 165; X64: # %bb.0: 166; X64-NEXT: cmpq %rsi, %rdi 167; X64-NEXT: seta %al 168; X64-NEXT: sbbb $0, %al 169; X64-NEXT: movsbl %al, %eax 170; X64-NEXT: retq 171; 172; X86-LABEL: ucmp.32.64: 173; X86: # %bb.0: 174; X86-NEXT: pushl %ebx 175; X86-NEXT: pushl %edi 176; X86-NEXT: pushl %esi 177; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 178; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 179; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 180; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 181; X86-NEXT: cmpl %eax, %edx 182; X86-NEXT: movl %esi, %edi 183; X86-NEXT: sbbl %ecx, %edi 184; X86-NEXT: setb %bl 185; X86-NEXT: cmpl %edx, %eax 186; X86-NEXT: sbbl %esi, %ecx 187; X86-NEXT: sbbb $0, %bl 188; X86-NEXT: movsbl %bl, %eax 189; X86-NEXT: popl %esi 190; X86-NEXT: popl %edi 191; X86-NEXT: popl %ebx 192; X86-NEXT: retl 193 %1 = call i32 @llvm.ucmp(i64 %x, i64 %y) 194 ret i32 %1 195} 196 197define i64 @ucmp.64.64(i64 %x, i64 %y) nounwind { 198; X64-LABEL: ucmp.64.64: 199; X64: # %bb.0: 200; X64-NEXT: cmpq %rsi, %rdi 201; X64-NEXT: seta %al 202; X64-NEXT: sbbb $0, %al 203; X64-NEXT: movsbq %al, %rax 204; X64-NEXT: retq 205; 206; X86-LABEL: ucmp.64.64: 207; X86: # %bb.0: 208; X86-NEXT: pushl %ebx 209; X86-NEXT: pushl %edi 210; X86-NEXT: pushl %esi 211; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 212; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 213; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 214; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 215; X86-NEXT: cmpl %eax, %edx 216; X86-NEXT: movl %esi, %edi 217; X86-NEXT: sbbl %ecx, %edi 218; X86-NEXT: setb %bl 219; X86-NEXT: cmpl %edx, %eax 220; X86-NEXT: sbbl %esi, %ecx 221; X86-NEXT: sbbb $0, %bl 222; X86-NEXT: movsbl %bl, %eax 223; X86-NEXT: movl %eax, %edx 224; X86-NEXT: sarl $31, %edx 225; X86-NEXT: popl %esi 226; X86-NEXT: popl %edi 227; X86-NEXT: popl %ebx 228; X86-NEXT: retl 229 %1 = call i64 @llvm.ucmp(i64 %x, i64 %y) 230 ret i64 %1 231} 232 233define i4 @ucmp_narrow_result(i32 %x, i32 %y) nounwind { 234; X64-LABEL: ucmp_narrow_result: 235; X64: # %bb.0: 236; X64-NEXT: cmpl %esi, %edi 237; X64-NEXT: seta %al 238; X64-NEXT: sbbb $0, %al 239; X64-NEXT: retq 240; 241; X86-LABEL: ucmp_narrow_result: 242; X86: # %bb.0: 243; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 244; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax 245; X86-NEXT: seta %al 246; X86-NEXT: sbbb $0, %al 247; X86-NEXT: retl 248 %1 = call i4 @llvm.ucmp(i32 %x, i32 %y) 249 ret i4 %1 250} 251 252define i8 @ucmp_narrow_op(i62 %x, i62 %y) nounwind { 253; SSE-LABEL: ucmp_narrow_op: 254; SSE: # %bb.0: 255; SSE-NEXT: movabsq $4611686018427387903, %rax # imm = 0x3FFFFFFFFFFFFFFF 256; SSE-NEXT: andq %rax, %rsi 257; SSE-NEXT: andq %rax, %rdi 258; SSE-NEXT: cmpq %rsi, %rdi 259; SSE-NEXT: seta %al 260; SSE-NEXT: sbbb $0, %al 261; SSE-NEXT: retq 262; 263; AVX-LABEL: ucmp_narrow_op: 264; AVX: # %bb.0: 265; AVX-NEXT: movb $62, %al 266; AVX-NEXT: bzhiq %rax, %rsi, %rcx 267; AVX-NEXT: bzhiq %rax, %rdi, %rax 268; AVX-NEXT: cmpq %rcx, %rax 269; AVX-NEXT: seta %al 270; AVX-NEXT: sbbb $0, %al 271; AVX-NEXT: retq 272; 273; X86-LABEL: ucmp_narrow_op: 274; X86: # %bb.0: 275; X86-NEXT: pushl %edi 276; X86-NEXT: pushl %esi 277; X86-NEXT: movl $1073741823, %ecx # imm = 0x3FFFFFFF 278; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 279; X86-NEXT: andl %ecx, %edx 280; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx 281; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 282; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 283; X86-NEXT: cmpl %esi, %edi 284; X86-NEXT: movl %ecx, %eax 285; X86-NEXT: sbbl %edx, %eax 286; X86-NEXT: setb %al 287; X86-NEXT: cmpl %edi, %esi 288; X86-NEXT: sbbl %ecx, %edx 289; X86-NEXT: sbbb $0, %al 290; X86-NEXT: popl %esi 291; X86-NEXT: popl %edi 292; X86-NEXT: retl 293 %1 = call i8 @llvm.ucmp(i62 %x, i62 %y) 294 ret i8 %1 295} 296 297define i141 @ucmp_wide_result(i32 %x, i32 %y) nounwind { 298; X64-LABEL: ucmp_wide_result: 299; X64: # %bb.0: 300; X64-NEXT: cmpl %esi, %edi 301; X64-NEXT: seta %al 302; X64-NEXT: sbbb $0, %al 303; X64-NEXT: movsbq %al, %rax 304; X64-NEXT: movq %rax, %rdx 305; X64-NEXT: sarq $63, %rdx 306; X64-NEXT: movl %edx, %ecx 307; X64-NEXT: andl $8191, %ecx # imm = 0x1FFF 308; X64-NEXT: retq 309; 310; X86-LABEL: ucmp_wide_result: 311; X86: # %bb.0: 312; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 313; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 314; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx 315; X86-NEXT: seta %cl 316; X86-NEXT: sbbb $0, %cl 317; X86-NEXT: movsbl %cl, %ecx 318; X86-NEXT: movl %ecx, (%eax) 319; X86-NEXT: sarl $31, %ecx 320; X86-NEXT: movl %ecx, 12(%eax) 321; X86-NEXT: movl %ecx, 8(%eax) 322; X86-NEXT: movl %ecx, 4(%eax) 323; X86-NEXT: andl $8191, %ecx # imm = 0x1FFF 324; X86-NEXT: movw %cx, 16(%eax) 325; X86-NEXT: retl $4 326 %1 = call i141 @llvm.ucmp(i32 %x, i32 %y) 327 ret i141 %1 328} 329 330define i8 @ucmp_wide_op(i109 %x, i109 %y) nounwind { 331; SSE-LABEL: ucmp_wide_op: 332; SSE: # %bb.0: 333; SSE-NEXT: movabsq $35184372088831, %rax # imm = 0x1FFFFFFFFFFF 334; SSE-NEXT: andq %rax, %rsi 335; SSE-NEXT: andq %rax, %rcx 336; SSE-NEXT: cmpq %rdi, %rdx 337; SSE-NEXT: movq %rcx, %rax 338; SSE-NEXT: sbbq %rsi, %rax 339; SSE-NEXT: setb %al 340; SSE-NEXT: cmpq %rdx, %rdi 341; SSE-NEXT: sbbq %rcx, %rsi 342; SSE-NEXT: sbbb $0, %al 343; SSE-NEXT: retq 344; 345; AVX-LABEL: ucmp_wide_op: 346; AVX: # %bb.0: 347; AVX-NEXT: movb $45, %al 348; AVX-NEXT: bzhiq %rax, %rsi, %rsi 349; AVX-NEXT: bzhiq %rax, %rcx, %rcx 350; AVX-NEXT: cmpq %rdi, %rdx 351; AVX-NEXT: movq %rcx, %rax 352; AVX-NEXT: sbbq %rsi, %rax 353; AVX-NEXT: setb %al 354; AVX-NEXT: cmpq %rdx, %rdi 355; AVX-NEXT: sbbq %rcx, %rsi 356; AVX-NEXT: sbbb $0, %al 357; AVX-NEXT: retq 358; 359; X86-LABEL: ucmp_wide_op: 360; X86: # %bb.0: 361; X86-NEXT: pushl %ebp 362; X86-NEXT: pushl %ebx 363; X86-NEXT: pushl %edi 364; X86-NEXT: pushl %esi 365; X86-NEXT: movl $8191, %ecx # imm = 0x1FFF 366; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 367; X86-NEXT: andl %ecx, %edx 368; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx 369; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 370; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp 371; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 372; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ebp 373; X86-NEXT: sbbl %edi, %eax 374; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 375; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx 376; X86-NEXT: movl %ebx, %eax 377; X86-NEXT: sbbl %esi, %eax 378; X86-NEXT: movl %ecx, %eax 379; X86-NEXT: sbbl %edx, %eax 380; X86-NEXT: setb %al 381; X86-NEXT: cmpl %ebp, {{[0-9]+}}(%esp) 382; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi 383; X86-NEXT: sbbl %ebx, %esi 384; X86-NEXT: sbbl %ecx, %edx 385; X86-NEXT: sbbb $0, %al 386; X86-NEXT: popl %esi 387; X86-NEXT: popl %edi 388; X86-NEXT: popl %ebx 389; X86-NEXT: popl %ebp 390; X86-NEXT: retl 391 %1 = call i8 @llvm.ucmp(i109 %x, i109 %y) 392 ret i8 %1 393} 394 395define i41 @ucmp_uncommon_types(i7 %x, i7 %y) nounwind { 396; X64-LABEL: ucmp_uncommon_types: 397; X64: # %bb.0: 398; X64-NEXT: andb $127, %sil 399; X64-NEXT: andb $127, %dil 400; X64-NEXT: cmpb %sil, %dil 401; X64-NEXT: seta %al 402; X64-NEXT: sbbb $0, %al 403; X64-NEXT: movsbq %al, %rax 404; X64-NEXT: retq 405; 406; X86-LABEL: ucmp_uncommon_types: 407; X86: # %bb.0: 408; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 409; X86-NEXT: andb $127, %al 410; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 411; X86-NEXT: andb $127, %cl 412; X86-NEXT: cmpb %al, %cl 413; X86-NEXT: seta %al 414; X86-NEXT: sbbb $0, %al 415; X86-NEXT: movsbl %al, %eax 416; X86-NEXT: movl %eax, %edx 417; X86-NEXT: sarl $31, %edx 418; X86-NEXT: retl 419 %1 = call i41 @llvm.ucmp(i7 %x, i7 %y) 420 ret i41 %1 421} 422 423define <4 x i32> @ucmp_normal_vectors(<4 x i32> %x, <4 x i32> %y) nounwind { 424; SSE4-LABEL: ucmp_normal_vectors: 425; SSE4: # %bb.0: 426; SSE4-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 427; SSE4-NEXT: pxor %xmm2, %xmm1 428; SSE4-NEXT: pxor %xmm2, %xmm0 429; SSE4-NEXT: movdqa %xmm0, %xmm2 430; SSE4-NEXT: pcmpgtd %xmm1, %xmm2 431; SSE4-NEXT: pcmpgtd %xmm0, %xmm1 432; SSE4-NEXT: psubd %xmm2, %xmm1 433; SSE4-NEXT: movdqa %xmm1, %xmm0 434; SSE4-NEXT: retq 435; 436; SSE2-LABEL: ucmp_normal_vectors: 437; SSE2: # %bb.0: 438; SSE2-NEXT: movdqa %xmm0, %xmm2 439; SSE2-NEXT: pmaxud %xmm1, %xmm2 440; SSE2-NEXT: pcmpeqd %xmm0, %xmm2 441; SSE2-NEXT: pminud %xmm0, %xmm1 442; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 443; SSE2-NEXT: psubd %xmm2, %xmm0 444; SSE2-NEXT: retq 445; 446; AVX2-LABEL: ucmp_normal_vectors: 447; AVX2: # %bb.0: 448; AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm2 449; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm2 450; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1 451; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 452; AVX2-NEXT: vpsubd %xmm2, %xmm0, %xmm0 453; AVX2-NEXT: retq 454; 455; AVX512-LABEL: ucmp_normal_vectors: 456; AVX512: # %bb.0: 457; AVX512-NEXT: vpcmpltud %xmm1, %xmm0, %k1 458; AVX512-NEXT: vpcmpnleud %xmm1, %xmm0, %k2 459; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm0 {%k2} {z} = [1,1,1,1] 460; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 461; AVX512-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} 462; AVX512-NEXT: retq 463; 464; X86-LABEL: ucmp_normal_vectors: 465; X86: # %bb.0: 466; X86-NEXT: pushl %ebx 467; X86-NEXT: pushl %edi 468; X86-NEXT: pushl %esi 469; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 470; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 471; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 472; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 473; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 474; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edx 475; X86-NEXT: seta %dl 476; X86-NEXT: sbbb $0, %dl 477; X86-NEXT: movsbl %dl, %edx 478; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edi 479; X86-NEXT: seta %bl 480; X86-NEXT: sbbb $0, %bl 481; X86-NEXT: movsbl %bl, %edi 482; X86-NEXT: cmpl {{[0-9]+}}(%esp), %esi 483; X86-NEXT: seta %bl 484; X86-NEXT: sbbb $0, %bl 485; X86-NEXT: movsbl %bl, %esi 486; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx 487; X86-NEXT: seta %cl 488; X86-NEXT: sbbb $0, %cl 489; X86-NEXT: movsbl %cl, %ecx 490; X86-NEXT: movl %ecx, 12(%eax) 491; X86-NEXT: movl %esi, 8(%eax) 492; X86-NEXT: movl %edi, 4(%eax) 493; X86-NEXT: movl %edx, (%eax) 494; X86-NEXT: popl %esi 495; X86-NEXT: popl %edi 496; X86-NEXT: popl %ebx 497; X86-NEXT: retl $4 498 %1 = call <4 x i32> @llvm.ucmp(<4 x i32> %x, <4 x i32> %y) 499 ret <4 x i32> %1 500} 501 502define <4 x i8> @ucmp_narrow_vec_result(<4 x i32> %x, <4 x i32> %y) nounwind { 503; SSE4-LABEL: ucmp_narrow_vec_result: 504; SSE4: # %bb.0: 505; SSE4-NEXT: movd %xmm1, %eax 506; SSE4-NEXT: movd %xmm0, %ecx 507; SSE4-NEXT: cmpl %eax, %ecx 508; SSE4-NEXT: seta %al 509; SSE4-NEXT: sbbb $0, %al 510; SSE4-NEXT: movzbl %al, %eax 511; SSE4-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] 512; SSE4-NEXT: movd %xmm2, %ecx 513; SSE4-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] 514; SSE4-NEXT: movd %xmm2, %edx 515; SSE4-NEXT: cmpl %ecx, %edx 516; SSE4-NEXT: seta %cl 517; SSE4-NEXT: sbbb $0, %cl 518; SSE4-NEXT: movzbl %cl, %ecx 519; SSE4-NEXT: shll $8, %ecx 520; SSE4-NEXT: orl %eax, %ecx 521; SSE4-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] 522; SSE4-NEXT: movd %xmm2, %eax 523; SSE4-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 524; SSE4-NEXT: movd %xmm2, %edx 525; SSE4-NEXT: cmpl %eax, %edx 526; SSE4-NEXT: seta %al 527; SSE4-NEXT: sbbb $0, %al 528; SSE4-NEXT: movzbl %al, %eax 529; SSE4-NEXT: shll $16, %eax 530; SSE4-NEXT: orl %ecx, %eax 531; SSE4-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] 532; SSE4-NEXT: movd %xmm1, %ecx 533; SSE4-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 534; SSE4-NEXT: movd %xmm0, %edx 535; SSE4-NEXT: cmpl %ecx, %edx 536; SSE4-NEXT: seta %cl 537; SSE4-NEXT: sbbb $0, %cl 538; SSE4-NEXT: movzbl %cl, %ecx 539; SSE4-NEXT: shll $24, %ecx 540; SSE4-NEXT: orl %eax, %ecx 541; SSE4-NEXT: movd %ecx, %xmm0 542; SSE4-NEXT: retq 543; 544; SSE2-LABEL: ucmp_narrow_vec_result: 545; SSE2: # %bb.0: 546; SSE2-NEXT: pextrd $1, %xmm1, %eax 547; SSE2-NEXT: pextrd $1, %xmm0, %ecx 548; SSE2-NEXT: cmpl %eax, %ecx 549; SSE2-NEXT: seta %al 550; SSE2-NEXT: sbbb $0, %al 551; SSE2-NEXT: movzbl %al, %eax 552; SSE2-NEXT: movd %xmm1, %ecx 553; SSE2-NEXT: movd %xmm0, %edx 554; SSE2-NEXT: cmpl %ecx, %edx 555; SSE2-NEXT: seta %cl 556; SSE2-NEXT: sbbb $0, %cl 557; SSE2-NEXT: movzbl %cl, %ecx 558; SSE2-NEXT: movd %ecx, %xmm2 559; SSE2-NEXT: pinsrb $1, %eax, %xmm2 560; SSE2-NEXT: pextrd $2, %xmm1, %eax 561; SSE2-NEXT: pextrd $2, %xmm0, %ecx 562; SSE2-NEXT: cmpl %eax, %ecx 563; SSE2-NEXT: seta %al 564; SSE2-NEXT: sbbb $0, %al 565; SSE2-NEXT: movzbl %al, %eax 566; SSE2-NEXT: pinsrb $2, %eax, %xmm2 567; SSE2-NEXT: pextrd $3, %xmm1, %eax 568; SSE2-NEXT: pextrd $3, %xmm0, %ecx 569; SSE2-NEXT: cmpl %eax, %ecx 570; SSE2-NEXT: seta %al 571; SSE2-NEXT: sbbb $0, %al 572; SSE2-NEXT: movzbl %al, %eax 573; SSE2-NEXT: pinsrb $3, %eax, %xmm2 574; SSE2-NEXT: movdqa %xmm2, %xmm0 575; SSE2-NEXT: retq 576; 577; AVX-LABEL: ucmp_narrow_vec_result: 578; AVX: # %bb.0: 579; AVX-NEXT: vpextrd $1, %xmm1, %eax 580; AVX-NEXT: vpextrd $1, %xmm0, %ecx 581; AVX-NEXT: cmpl %eax, %ecx 582; AVX-NEXT: seta %al 583; AVX-NEXT: sbbb $0, %al 584; AVX-NEXT: vmovd %xmm1, %ecx 585; AVX-NEXT: vmovd %xmm0, %edx 586; AVX-NEXT: cmpl %ecx, %edx 587; AVX-NEXT: seta %cl 588; AVX-NEXT: sbbb $0, %cl 589; AVX-NEXT: vmovd %ecx, %xmm2 590; AVX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 591; AVX-NEXT: vpextrd $2, %xmm1, %eax 592; AVX-NEXT: vpextrd $2, %xmm0, %ecx 593; AVX-NEXT: cmpl %eax, %ecx 594; AVX-NEXT: seta %al 595; AVX-NEXT: sbbb $0, %al 596; AVX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 597; AVX-NEXT: vpextrd $3, %xmm1, %eax 598; AVX-NEXT: vpextrd $3, %xmm0, %ecx 599; AVX-NEXT: cmpl %eax, %ecx 600; AVX-NEXT: seta %al 601; AVX-NEXT: sbbb $0, %al 602; AVX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm0 603; AVX-NEXT: retq 604; 605; X86-LABEL: ucmp_narrow_vec_result: 606; X86: # %bb.0: 607; X86-NEXT: pushl %ebx 608; X86-NEXT: pushl %edi 609; X86-NEXT: pushl %esi 610; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 611; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 612; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 613; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 614; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 615; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx 616; X86-NEXT: seta %cl 617; X86-NEXT: sbbb $0, %cl 618; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edi 619; X86-NEXT: seta %ch 620; X86-NEXT: sbbb $0, %ch 621; X86-NEXT: cmpl {{[0-9]+}}(%esp), %esi 622; X86-NEXT: seta %bl 623; X86-NEXT: sbbb $0, %bl 624; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edx 625; X86-NEXT: seta %dl 626; X86-NEXT: sbbb $0, %dl 627; X86-NEXT: movb %dl, 3(%eax) 628; X86-NEXT: movb %bl, 2(%eax) 629; X86-NEXT: movb %ch, 1(%eax) 630; X86-NEXT: movb %cl, (%eax) 631; X86-NEXT: popl %esi 632; X86-NEXT: popl %edi 633; X86-NEXT: popl %ebx 634; X86-NEXT: retl $4 635 %1 = call <4 x i8> @llvm.ucmp(<4 x i32> %x, <4 x i32> %y) 636 ret <4 x i8> %1 637} 638 639define <4 x i32> @ucmp_narrow_vec_op(<4 x i8> %x, <4 x i8> %y) nounwind { 640; SSE4-LABEL: ucmp_narrow_vec_op: 641; SSE4: # %bb.0: 642; SSE4-NEXT: pxor %xmm2, %xmm2 643; SSE4-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 644; SSE4-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 645; SSE4-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 646; SSE4-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 647; SSE4-NEXT: movdqa %xmm0, %xmm2 648; SSE4-NEXT: pcmpgtd %xmm1, %xmm2 649; SSE4-NEXT: pcmpgtd %xmm0, %xmm1 650; SSE4-NEXT: psubd %xmm2, %xmm1 651; SSE4-NEXT: movdqa %xmm1, %xmm0 652; SSE4-NEXT: retq 653; 654; SSE2-LABEL: ucmp_narrow_vec_op: 655; SSE2: # %bb.0: 656; SSE2-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 657; SSE2-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 658; SSE2-NEXT: movdqa %xmm0, %xmm2 659; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 660; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 661; SSE2-NEXT: psubd %xmm2, %xmm1 662; SSE2-NEXT: movdqa %xmm1, %xmm0 663; SSE2-NEXT: retq 664; 665; AVX2-LABEL: ucmp_narrow_vec_op: 666; AVX2: # %bb.0: 667; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 668; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 669; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm2 670; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 671; AVX2-NEXT: vpsubd %xmm2, %xmm0, %xmm0 672; AVX2-NEXT: retq 673; 674; AVX512-LABEL: ucmp_narrow_vec_op: 675; AVX512: # %bb.0: 676; AVX512-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 677; AVX512-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 678; AVX512-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 679; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k2 680; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm0 {%k2} {z} = [1,1,1,1] 681; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 682; AVX512-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} 683; AVX512-NEXT: retq 684; 685; X86-LABEL: ucmp_narrow_vec_op: 686; X86: # %bb.0: 687; X86-NEXT: pushl %ebx 688; X86-NEXT: pushl %edi 689; X86-NEXT: pushl %esi 690; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 691; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 692; X86-NEXT: movb {{[0-9]+}}(%esp), %ch 693; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx 694; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx 695; X86-NEXT: cmpb {{[0-9]+}}(%esp), %dl 696; X86-NEXT: seta %dl 697; X86-NEXT: sbbb $0, %dl 698; X86-NEXT: movsbl %dl, %edx 699; X86-NEXT: cmpb {{[0-9]+}}(%esp), %bl 700; X86-NEXT: seta %bl 701; X86-NEXT: sbbb $0, %bl 702; X86-NEXT: movsbl %bl, %esi 703; X86-NEXT: cmpb {{[0-9]+}}(%esp), %ch 704; X86-NEXT: seta %ch 705; X86-NEXT: sbbb $0, %ch 706; X86-NEXT: movsbl %ch, %edi 707; X86-NEXT: cmpb {{[0-9]+}}(%esp), %cl 708; X86-NEXT: seta %cl 709; X86-NEXT: sbbb $0, %cl 710; X86-NEXT: movsbl %cl, %ecx 711; X86-NEXT: movl %ecx, 12(%eax) 712; X86-NEXT: movl %edi, 8(%eax) 713; X86-NEXT: movl %esi, 4(%eax) 714; X86-NEXT: movl %edx, (%eax) 715; X86-NEXT: popl %esi 716; X86-NEXT: popl %edi 717; X86-NEXT: popl %ebx 718; X86-NEXT: retl $4 719 %1 = call <4 x i32> @llvm.ucmp(<4 x i8> %x, <4 x i8> %y) 720 ret <4 x i32> %1 721} 722 723define <16 x i32> @ucmp_wide_vec_result(<16 x i8> %x, <16 x i8> %y) nounwind { 724; SSE4-LABEL: ucmp_wide_vec_result: 725; SSE4: # %bb.0: 726; SSE4-NEXT: movdqa %xmm1, %xmm3 727; SSE4-NEXT: pxor %xmm5, %xmm5 728; SSE4-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3],xmm1[4],xmm5[4],xmm1[5],xmm5[5],xmm1[6],xmm5[6],xmm1[7],xmm5[7] 729; SSE4-NEXT: movdqa %xmm1, %xmm4 730; SSE4-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3] 731; SSE4-NEXT: movdqa %xmm0, %xmm2 732; SSE4-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1],xmm2[2],xmm5[2],xmm2[3],xmm5[3],xmm2[4],xmm5[4],xmm2[5],xmm5[5],xmm2[6],xmm5[6],xmm2[7],xmm5[7] 733; SSE4-NEXT: movdqa %xmm2, %xmm6 734; SSE4-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3] 735; SSE4-NEXT: movdqa %xmm6, %xmm7 736; SSE4-NEXT: pcmpgtd %xmm4, %xmm7 737; SSE4-NEXT: pcmpgtd %xmm6, %xmm4 738; SSE4-NEXT: psubd %xmm7, %xmm4 739; SSE4-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm5[4],xmm1[5],xmm5[5],xmm1[6],xmm5[6],xmm1[7],xmm5[7] 740; SSE4-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm5[4],xmm2[5],xmm5[5],xmm2[6],xmm5[6],xmm2[7],xmm5[7] 741; SSE4-NEXT: movdqa %xmm2, %xmm6 742; SSE4-NEXT: pcmpgtd %xmm1, %xmm6 743; SSE4-NEXT: pcmpgtd %xmm2, %xmm1 744; SSE4-NEXT: psubd %xmm6, %xmm1 745; SSE4-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm5[8],xmm3[9],xmm5[9],xmm3[10],xmm5[10],xmm3[11],xmm5[11],xmm3[12],xmm5[12],xmm3[13],xmm5[13],xmm3[14],xmm5[14],xmm3[15],xmm5[15] 746; SSE4-NEXT: movdqa %xmm3, %xmm2 747; SSE4-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1],xmm2[2],xmm5[2],xmm2[3],xmm5[3] 748; SSE4-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm5[8],xmm0[9],xmm5[9],xmm0[10],xmm5[10],xmm0[11],xmm5[11],xmm0[12],xmm5[12],xmm0[13],xmm5[13],xmm0[14],xmm5[14],xmm0[15],xmm5[15] 749; SSE4-NEXT: movdqa %xmm0, %xmm6 750; SSE4-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3] 751; SSE4-NEXT: movdqa %xmm6, %xmm7 752; SSE4-NEXT: pcmpgtd %xmm2, %xmm7 753; SSE4-NEXT: pcmpgtd %xmm6, %xmm2 754; SSE4-NEXT: psubd %xmm7, %xmm2 755; SSE4-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7] 756; SSE4-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7] 757; SSE4-NEXT: movdqa %xmm0, %xmm5 758; SSE4-NEXT: pcmpgtd %xmm3, %xmm5 759; SSE4-NEXT: pcmpgtd %xmm0, %xmm3 760; SSE4-NEXT: psubd %xmm5, %xmm3 761; SSE4-NEXT: movdqa %xmm4, %xmm0 762; SSE4-NEXT: retq 763; 764; SSE2-LABEL: ucmp_wide_vec_result: 765; SSE2: # %bb.0: 766; SSE2-NEXT: movdqa %xmm0, %xmm4 767; SSE2-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 768; SSE2-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero 769; SSE2-NEXT: movdqa %xmm2, %xmm3 770; SSE2-NEXT: pcmpgtd %xmm0, %xmm3 771; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 772; SSE2-NEXT: psubd %xmm3, %xmm0 773; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] 774; SSE2-NEXT: pmovzxbd {{.*#+}} xmm5 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 775; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,1,1] 776; SSE2-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 777; SSE2-NEXT: movdqa %xmm2, %xmm3 778; SSE2-NEXT: pcmpgtd %xmm5, %xmm3 779; SSE2-NEXT: pcmpgtd %xmm2, %xmm5 780; SSE2-NEXT: psubd %xmm3, %xmm5 781; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] 782; SSE2-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 783; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[2,3,2,3] 784; SSE2-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero 785; SSE2-NEXT: movdqa %xmm3, %xmm6 786; SSE2-NEXT: pcmpgtd %xmm2, %xmm6 787; SSE2-NEXT: pcmpgtd %xmm3, %xmm2 788; SSE2-NEXT: psubd %xmm6, %xmm2 789; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] 790; SSE2-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 791; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[3,3,3,3] 792; SSE2-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 793; SSE2-NEXT: movdqa %xmm1, %xmm4 794; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 795; SSE2-NEXT: pcmpgtd %xmm1, %xmm3 796; SSE2-NEXT: psubd %xmm4, %xmm3 797; SSE2-NEXT: movdqa %xmm5, %xmm1 798; SSE2-NEXT: retq 799; 800; AVX2-LABEL: ucmp_wide_vec_result: 801; AVX2: # %bb.0: 802; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero 803; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 804; AVX2-NEXT: vpcmpgtd %ymm2, %ymm3, %ymm4 805; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm2 806; AVX2-NEXT: vpsubd %ymm4, %ymm2, %ymm2 807; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 808; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero 809; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 810; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 811; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm3 812; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 813; AVX2-NEXT: vpsubd %ymm3, %ymm0, %ymm1 814; AVX2-NEXT: vmovdqa %ymm2, %ymm0 815; AVX2-NEXT: retq 816; 817; AVX512-LABEL: ucmp_wide_vec_result: 818; AVX512: # %bb.0: 819; AVX512-NEXT: vpcmpltub %xmm1, %xmm0, %k1 820; AVX512-NEXT: vpcmpnleub %xmm1, %xmm0, %k2 821; AVX512-NEXT: vpbroadcastd {{.*#+}} zmm0 {%k2} {z} = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 822; AVX512-NEXT: vpternlogd {{.*#+}} zmm1 = -1 823; AVX512-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} 824; AVX512-NEXT: retq 825; 826; X86-LABEL: ucmp_wide_vec_result: 827; X86: # %bb.0: 828; X86-NEXT: pushl %ebp 829; X86-NEXT: pushl %ebx 830; X86-NEXT: pushl %edi 831; X86-NEXT: pushl %esi 832; X86-NEXT: subl $12, %esp 833; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx 834; X86-NEXT: movb {{[0-9]+}}(%esp), %ah 835; X86-NEXT: movb {{[0-9]+}}(%esp), %ch 836; X86-NEXT: movb {{[0-9]+}}(%esp), %dh 837; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx 838; X86-NEXT: movb {{[0-9]+}}(%esp), %bh 839; X86-NEXT: movb {{[0-9]+}}(%esp), %al 840; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 841; X86-NEXT: cmpb {{[0-9]+}}(%esp), %cl 842; X86-NEXT: seta %cl 843; X86-NEXT: sbbb $0, %cl 844; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 845; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al 846; X86-NEXT: seta %al 847; X86-NEXT: sbbb $0, %al 848; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 849; X86-NEXT: cmpb {{[0-9]+}}(%esp), %bh 850; X86-NEXT: seta %al 851; X86-NEXT: sbbb $0, %al 852; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 853; X86-NEXT: cmpb {{[0-9]+}}(%esp), %bl 854; X86-NEXT: seta %al 855; X86-NEXT: sbbb $0, %al 856; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 857; X86-NEXT: cmpb {{[0-9]+}}(%esp), %dh 858; X86-NEXT: seta %al 859; X86-NEXT: sbbb $0, %al 860; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 861; X86-NEXT: cmpb {{[0-9]+}}(%esp), %ch 862; X86-NEXT: seta %al 863; X86-NEXT: sbbb $0, %al 864; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 865; X86-NEXT: cmpb {{[0-9]+}}(%esp), %ah 866; X86-NEXT: seta %al 867; X86-NEXT: sbbb $0, %al 868; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 869; X86-NEXT: cmpb {{[0-9]+}}(%esp), %dl 870; X86-NEXT: seta %bl 871; X86-NEXT: sbbb $0, %bl 872; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 873; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al 874; X86-NEXT: seta %al 875; X86-NEXT: sbbb $0, %al 876; X86-NEXT: movb %al, (%esp) # 1-byte Spill 877; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 878; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al 879; X86-NEXT: seta %bh 880; X86-NEXT: sbbb $0, %bh 881; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 882; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al 883; X86-NEXT: seta %al 884; X86-NEXT: sbbb $0, %al 885; X86-NEXT: movsbl %al, %eax 886; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 887; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 888; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al 889; X86-NEXT: seta %al 890; X86-NEXT: sbbb $0, %al 891; X86-NEXT: movsbl %al, %edi 892; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 893; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al 894; X86-NEXT: seta %al 895; X86-NEXT: sbbb $0, %al 896; X86-NEXT: movsbl %al, %ebp 897; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 898; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al 899; X86-NEXT: seta %al 900; X86-NEXT: sbbb $0, %al 901; X86-NEXT: movsbl %al, %esi 902; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 903; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al 904; X86-NEXT: seta %al 905; X86-NEXT: sbbb $0, %al 906; X86-NEXT: movsbl %al, %edx 907; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 908; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al 909; X86-NEXT: seta %al 910; X86-NEXT: sbbb $0, %al 911; X86-NEXT: movsbl %al, %ecx 912; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 913; X86-NEXT: movl %ecx, 60(%eax) 914; X86-NEXT: movl %edx, 56(%eax) 915; X86-NEXT: movl %esi, 52(%eax) 916; X86-NEXT: movl %ebp, 48(%eax) 917; X86-NEXT: movl %edi, 44(%eax) 918; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 919; X86-NEXT: movl %ecx, 40(%eax) 920; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload 921; X86-NEXT: movsbl %bh, %ecx 922; X86-NEXT: movl %ecx, 36(%eax) 923; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload 924; X86-NEXT: movsbl (%esp), %edx # 1-byte Folded Reload 925; X86-NEXT: movl %edx, 32(%eax) 926; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload 927; X86-NEXT: movsbl %bl, %edi 928; X86-NEXT: movl %edi, 28(%eax) 929; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload 930; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload 931; X86-NEXT: movl %ebx, 24(%eax) 932; X86-NEXT: movl %edi, 20(%eax) 933; X86-NEXT: movl %edx, 16(%eax) 934; X86-NEXT: movl %ecx, 12(%eax) 935; X86-NEXT: movl %esi, 8(%eax) 936; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload 937; X86-NEXT: movl %ecx, 4(%eax) 938; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload 939; X86-NEXT: movl %ecx, (%eax) 940; X86-NEXT: addl $12, %esp 941; X86-NEXT: popl %esi 942; X86-NEXT: popl %edi 943; X86-NEXT: popl %ebx 944; X86-NEXT: popl %ebp 945; X86-NEXT: retl $4 946 %1 = call <16 x i32> @llvm.ucmp(<16 x i8> %x, <16 x i8> %y) 947 ret <16 x i32> %1 948} 949 950define <16 x i8> @ucmp_wide_vec_op(<16 x i32> %x, <16 x i32> %y) nounwind { 951; SSE4-LABEL: ucmp_wide_vec_op: 952; SSE4: # %bb.0: 953; SSE4-NEXT: pushq %rbp 954; SSE4-NEXT: pushq %r15 955; SSE4-NEXT: pushq %r14 956; SSE4-NEXT: pushq %r13 957; SSE4-NEXT: pushq %r12 958; SSE4-NEXT: pushq %rbx 959; SSE4-NEXT: pshufd {{.*#+}} xmm8 = xmm7[3,3,3,3] 960; SSE4-NEXT: movd %xmm8, %eax 961; SSE4-NEXT: pshufd {{.*#+}} xmm8 = xmm3[3,3,3,3] 962; SSE4-NEXT: movd %xmm8, %ecx 963; SSE4-NEXT: cmpl %eax, %ecx 964; SSE4-NEXT: seta %al 965; SSE4-NEXT: sbbb $0, %al 966; SSE4-NEXT: pshufd {{.*#+}} xmm8 = xmm7[2,3,2,3] 967; SSE4-NEXT: movd %xmm8, %ecx 968; SSE4-NEXT: pshufd {{.*#+}} xmm8 = xmm3[2,3,2,3] 969; SSE4-NEXT: movd %xmm8, %edx 970; SSE4-NEXT: cmpl %ecx, %edx 971; SSE4-NEXT: seta %cl 972; SSE4-NEXT: sbbb $0, %cl 973; SSE4-NEXT: movd %xmm7, %edx 974; SSE4-NEXT: movd %xmm3, %esi 975; SSE4-NEXT: cmpl %edx, %esi 976; SSE4-NEXT: seta %dl 977; SSE4-NEXT: sbbb $0, %dl 978; SSE4-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,1,1] 979; SSE4-NEXT: movd %xmm7, %esi 980; SSE4-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,1,1] 981; SSE4-NEXT: movd %xmm3, %edi 982; SSE4-NEXT: cmpl %esi, %edi 983; SSE4-NEXT: seta %sil 984; SSE4-NEXT: movzbl %al, %eax 985; SSE4-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 986; SSE4-NEXT: sbbb $0, %sil 987; SSE4-NEXT: pshufd {{.*#+}} xmm3 = xmm6[3,3,3,3] 988; SSE4-NEXT: movd %xmm3, %edi 989; SSE4-NEXT: pshufd {{.*#+}} xmm3 = xmm2[3,3,3,3] 990; SSE4-NEXT: movd %xmm3, %r8d 991; SSE4-NEXT: cmpl %edi, %r8d 992; SSE4-NEXT: seta %dil 993; SSE4-NEXT: sbbb $0, %dil 994; SSE4-NEXT: movzbl %cl, %eax 995; SSE4-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 996; SSE4-NEXT: pshufd {{.*#+}} xmm3 = xmm6[2,3,2,3] 997; SSE4-NEXT: movd %xmm3, %r8d 998; SSE4-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,2,3] 999; SSE4-NEXT: movd %xmm3, %r9d 1000; SSE4-NEXT: cmpl %r8d, %r9d 1001; SSE4-NEXT: seta %r8b 1002; SSE4-NEXT: movzbl %dl, %edx 1003; SSE4-NEXT: sbbb $0, %r8b 1004; SSE4-NEXT: movd %xmm6, %r9d 1005; SSE4-NEXT: movd %xmm2, %r10d 1006; SSE4-NEXT: cmpl %r9d, %r10d 1007; SSE4-NEXT: seta %r9b 1008; SSE4-NEXT: movzbl %sil, %esi 1009; SSE4-NEXT: sbbb $0, %r9b 1010; SSE4-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,1,1] 1011; SSE4-NEXT: movd %xmm3, %r10d 1012; SSE4-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,1,1] 1013; SSE4-NEXT: movd %xmm2, %r11d 1014; SSE4-NEXT: cmpl %r10d, %r11d 1015; SSE4-NEXT: seta %r10b 1016; SSE4-NEXT: sbbb $0, %r10b 1017; SSE4-NEXT: movzbl %dil, %edi 1018; SSE4-NEXT: pshufd {{.*#+}} xmm2 = xmm5[3,3,3,3] 1019; SSE4-NEXT: movd %xmm2, %r11d 1020; SSE4-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] 1021; SSE4-NEXT: movd %xmm2, %ebx 1022; SSE4-NEXT: cmpl %r11d, %ebx 1023; SSE4-NEXT: seta %r11b 1024; SSE4-NEXT: movzbl %r8b, %r8d 1025; SSE4-NEXT: sbbb $0, %r11b 1026; SSE4-NEXT: pshufd {{.*#+}} xmm2 = xmm5[2,3,2,3] 1027; SSE4-NEXT: movd %xmm2, %ebx 1028; SSE4-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] 1029; SSE4-NEXT: movd %xmm2, %ebp 1030; SSE4-NEXT: cmpl %ebx, %ebp 1031; SSE4-NEXT: seta %bpl 1032; SSE4-NEXT: sbbb $0, %bpl 1033; SSE4-NEXT: movzbl %r9b, %r9d 1034; SSE4-NEXT: movd %xmm5, %ebx 1035; SSE4-NEXT: movd %xmm1, %r14d 1036; SSE4-NEXT: cmpl %ebx, %r14d 1037; SSE4-NEXT: seta %r14b 1038; SSE4-NEXT: sbbb $0, %r14b 1039; SSE4-NEXT: movzbl %r10b, %r10d 1040; SSE4-NEXT: pshufd {{.*#+}} xmm2 = xmm5[1,1,1,1] 1041; SSE4-NEXT: movd %xmm2, %ebx 1042; SSE4-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1] 1043; SSE4-NEXT: movd %xmm1, %r15d 1044; SSE4-NEXT: cmpl %ebx, %r15d 1045; SSE4-NEXT: seta %bl 1046; SSE4-NEXT: movzbl %r11b, %r11d 1047; SSE4-NEXT: sbbb $0, %bl 1048; SSE4-NEXT: pshufd {{.*#+}} xmm1 = xmm4[3,3,3,3] 1049; SSE4-NEXT: movd %xmm1, %r15d 1050; SSE4-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] 1051; SSE4-NEXT: movd %xmm1, %r12d 1052; SSE4-NEXT: cmpl %r15d, %r12d 1053; SSE4-NEXT: seta %r12b 1054; SSE4-NEXT: sbbb $0, %r12b 1055; SSE4-NEXT: movzbl %bpl, %ebp 1056; SSE4-NEXT: pshufd {{.*#+}} xmm1 = xmm4[2,3,2,3] 1057; SSE4-NEXT: movd %xmm1, %r15d 1058; SSE4-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1059; SSE4-NEXT: movd %xmm1, %r13d 1060; SSE4-NEXT: cmpl %r15d, %r13d 1061; SSE4-NEXT: seta %r13b 1062; SSE4-NEXT: movzbl %r14b, %r15d 1063; SSE4-NEXT: sbbb $0, %r13b 1064; SSE4-NEXT: movd %xmm4, %r14d 1065; SSE4-NEXT: movd %xmm0, %eax 1066; SSE4-NEXT: cmpl %r14d, %eax 1067; SSE4-NEXT: seta %r14b 1068; SSE4-NEXT: sbbb $0, %r14b 1069; SSE4-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,1,1] 1070; SSE4-NEXT: movd %xmm1, %eax 1071; SSE4-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 1072; SSE4-NEXT: movd %xmm0, %ecx 1073; SSE4-NEXT: cmpl %eax, %ecx 1074; SSE4-NEXT: movzbl %bl, %eax 1075; SSE4-NEXT: movzbl %r12b, %ecx 1076; SSE4-NEXT: movzbl %r13b, %ebx 1077; SSE4-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 1078; SSE4-NEXT: # xmm0 = mem[0],zero,zero,zero 1079; SSE4-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 4-byte Folded Reload 1080; SSE4-NEXT: # xmm2 = mem[0],zero,zero,zero 1081; SSE4-NEXT: movd %edx, %xmm3 1082; SSE4-NEXT: movd %esi, %xmm4 1083; SSE4-NEXT: movd %edi, %xmm5 1084; SSE4-NEXT: movd %r8d, %xmm6 1085; SSE4-NEXT: movd %r9d, %xmm1 1086; SSE4-NEXT: movd %r10d, %xmm7 1087; SSE4-NEXT: movd %r11d, %xmm8 1088; SSE4-NEXT: movd %ebp, %xmm9 1089; SSE4-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 1090; SSE4-NEXT: movd %r15d, %xmm10 1091; SSE4-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 1092; SSE4-NEXT: movd %eax, %xmm0 1093; SSE4-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 1094; SSE4-NEXT: movd %ecx, %xmm2 1095; SSE4-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7] 1096; SSE4-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm7[0],xmm1[1],xmm7[1],xmm1[2],xmm7[2],xmm1[3],xmm7[3],xmm1[4],xmm7[4],xmm1[5],xmm7[5],xmm1[6],xmm7[6],xmm1[7],xmm7[7] 1097; SSE4-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3] 1098; SSE4-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 1099; SSE4-NEXT: punpcklbw {{.*#+}} xmm9 = xmm9[0],xmm8[0],xmm9[1],xmm8[1],xmm9[2],xmm8[2],xmm9[3],xmm8[3],xmm9[4],xmm8[4],xmm9[5],xmm8[5],xmm9[6],xmm8[6],xmm9[7],xmm8[7] 1100; SSE4-NEXT: punpcklbw {{.*#+}} xmm10 = xmm10[0],xmm0[0],xmm10[1],xmm0[1],xmm10[2],xmm0[2],xmm10[3],xmm0[3],xmm10[4],xmm0[4],xmm10[5],xmm0[5],xmm10[6],xmm0[6],xmm10[7],xmm0[7] 1101; SSE4-NEXT: punpcklwd {{.*#+}} xmm10 = xmm10[0],xmm9[0],xmm10[1],xmm9[1],xmm10[2],xmm9[2],xmm10[3],xmm9[3] 1102; SSE4-NEXT: movd %ebx, %xmm3 1103; SSE4-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 1104; SSE4-NEXT: movzbl %r14b, %eax 1105; SSE4-NEXT: seta %cl 1106; SSE4-NEXT: sbbb $0, %cl 1107; SSE4-NEXT: movd %eax, %xmm0 1108; SSE4-NEXT: movzbl %cl, %eax 1109; SSE4-NEXT: movd %eax, %xmm2 1110; SSE4-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 1111; SSE4-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] 1112; SSE4-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm10[0],xmm0[1],xmm10[1] 1113; SSE4-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1114; SSE4-NEXT: popq %rbx 1115; SSE4-NEXT: popq %r12 1116; SSE4-NEXT: popq %r13 1117; SSE4-NEXT: popq %r14 1118; SSE4-NEXT: popq %r15 1119; SSE4-NEXT: popq %rbp 1120; SSE4-NEXT: retq 1121; 1122; SSE2-LABEL: ucmp_wide_vec_op: 1123; SSE2: # %bb.0: 1124; SSE2-NEXT: pextrd $1, %xmm4, %eax 1125; SSE2-NEXT: movdqa %xmm0, %xmm8 1126; SSE2-NEXT: pextrd $1, %xmm0, %ecx 1127; SSE2-NEXT: cmpl %eax, %ecx 1128; SSE2-NEXT: seta %al 1129; SSE2-NEXT: sbbb $0, %al 1130; SSE2-NEXT: movzbl %al, %eax 1131; SSE2-NEXT: movd %xmm4, %ecx 1132; SSE2-NEXT: movd %xmm0, %edx 1133; SSE2-NEXT: cmpl %ecx, %edx 1134; SSE2-NEXT: seta %cl 1135; SSE2-NEXT: sbbb $0, %cl 1136; SSE2-NEXT: movzbl %cl, %ecx 1137; SSE2-NEXT: movd %ecx, %xmm0 1138; SSE2-NEXT: pinsrb $1, %eax, %xmm0 1139; SSE2-NEXT: pextrd $2, %xmm4, %eax 1140; SSE2-NEXT: pextrd $2, %xmm8, %ecx 1141; SSE2-NEXT: cmpl %eax, %ecx 1142; SSE2-NEXT: seta %al 1143; SSE2-NEXT: sbbb $0, %al 1144; SSE2-NEXT: movzbl %al, %eax 1145; SSE2-NEXT: pinsrb $2, %eax, %xmm0 1146; SSE2-NEXT: pextrd $3, %xmm4, %eax 1147; SSE2-NEXT: pextrd $3, %xmm8, %ecx 1148; SSE2-NEXT: cmpl %eax, %ecx 1149; SSE2-NEXT: seta %al 1150; SSE2-NEXT: sbbb $0, %al 1151; SSE2-NEXT: movzbl %al, %eax 1152; SSE2-NEXT: pinsrb $3, %eax, %xmm0 1153; SSE2-NEXT: movd %xmm5, %eax 1154; SSE2-NEXT: movd %xmm1, %ecx 1155; SSE2-NEXT: cmpl %eax, %ecx 1156; SSE2-NEXT: seta %al 1157; SSE2-NEXT: sbbb $0, %al 1158; SSE2-NEXT: movzbl %al, %eax 1159; SSE2-NEXT: pinsrb $4, %eax, %xmm0 1160; SSE2-NEXT: pextrd $1, %xmm5, %eax 1161; SSE2-NEXT: pextrd $1, %xmm1, %ecx 1162; SSE2-NEXT: cmpl %eax, %ecx 1163; SSE2-NEXT: seta %al 1164; SSE2-NEXT: sbbb $0, %al 1165; SSE2-NEXT: movzbl %al, %eax 1166; SSE2-NEXT: pinsrb $5, %eax, %xmm0 1167; SSE2-NEXT: pextrd $2, %xmm5, %eax 1168; SSE2-NEXT: pextrd $2, %xmm1, %ecx 1169; SSE2-NEXT: cmpl %eax, %ecx 1170; SSE2-NEXT: seta %al 1171; SSE2-NEXT: sbbb $0, %al 1172; SSE2-NEXT: movzbl %al, %eax 1173; SSE2-NEXT: pinsrb $6, %eax, %xmm0 1174; SSE2-NEXT: pextrd $3, %xmm5, %eax 1175; SSE2-NEXT: pextrd $3, %xmm1, %ecx 1176; SSE2-NEXT: cmpl %eax, %ecx 1177; SSE2-NEXT: seta %al 1178; SSE2-NEXT: sbbb $0, %al 1179; SSE2-NEXT: movzbl %al, %eax 1180; SSE2-NEXT: pinsrb $7, %eax, %xmm0 1181; SSE2-NEXT: movd %xmm6, %eax 1182; SSE2-NEXT: movd %xmm2, %ecx 1183; SSE2-NEXT: cmpl %eax, %ecx 1184; SSE2-NEXT: seta %al 1185; SSE2-NEXT: sbbb $0, %al 1186; SSE2-NEXT: movzbl %al, %eax 1187; SSE2-NEXT: pinsrb $8, %eax, %xmm0 1188; SSE2-NEXT: pextrd $1, %xmm6, %eax 1189; SSE2-NEXT: pextrd $1, %xmm2, %ecx 1190; SSE2-NEXT: cmpl %eax, %ecx 1191; SSE2-NEXT: seta %al 1192; SSE2-NEXT: sbbb $0, %al 1193; SSE2-NEXT: movzbl %al, %eax 1194; SSE2-NEXT: pinsrb $9, %eax, %xmm0 1195; SSE2-NEXT: pextrd $2, %xmm6, %eax 1196; SSE2-NEXT: pextrd $2, %xmm2, %ecx 1197; SSE2-NEXT: cmpl %eax, %ecx 1198; SSE2-NEXT: seta %al 1199; SSE2-NEXT: sbbb $0, %al 1200; SSE2-NEXT: movzbl %al, %eax 1201; SSE2-NEXT: pinsrb $10, %eax, %xmm0 1202; SSE2-NEXT: pextrd $3, %xmm6, %eax 1203; SSE2-NEXT: pextrd $3, %xmm2, %ecx 1204; SSE2-NEXT: cmpl %eax, %ecx 1205; SSE2-NEXT: seta %al 1206; SSE2-NEXT: sbbb $0, %al 1207; SSE2-NEXT: movzbl %al, %eax 1208; SSE2-NEXT: pinsrb $11, %eax, %xmm0 1209; SSE2-NEXT: movd %xmm7, %eax 1210; SSE2-NEXT: movd %xmm3, %ecx 1211; SSE2-NEXT: cmpl %eax, %ecx 1212; SSE2-NEXT: seta %al 1213; SSE2-NEXT: sbbb $0, %al 1214; SSE2-NEXT: movzbl %al, %eax 1215; SSE2-NEXT: pinsrb $12, %eax, %xmm0 1216; SSE2-NEXT: pextrd $1, %xmm7, %eax 1217; SSE2-NEXT: pextrd $1, %xmm3, %ecx 1218; SSE2-NEXT: cmpl %eax, %ecx 1219; SSE2-NEXT: seta %al 1220; SSE2-NEXT: sbbb $0, %al 1221; SSE2-NEXT: movzbl %al, %eax 1222; SSE2-NEXT: pinsrb $13, %eax, %xmm0 1223; SSE2-NEXT: pextrd $2, %xmm7, %eax 1224; SSE2-NEXT: pextrd $2, %xmm3, %ecx 1225; SSE2-NEXT: cmpl %eax, %ecx 1226; SSE2-NEXT: seta %al 1227; SSE2-NEXT: sbbb $0, %al 1228; SSE2-NEXT: movzbl %al, %eax 1229; SSE2-NEXT: pinsrb $14, %eax, %xmm0 1230; SSE2-NEXT: pextrd $3, %xmm7, %eax 1231; SSE2-NEXT: pextrd $3, %xmm3, %ecx 1232; SSE2-NEXT: cmpl %eax, %ecx 1233; SSE2-NEXT: seta %al 1234; SSE2-NEXT: sbbb $0, %al 1235; SSE2-NEXT: movzbl %al, %eax 1236; SSE2-NEXT: pinsrb $15, %eax, %xmm0 1237; SSE2-NEXT: retq 1238; 1239; AVX2-LABEL: ucmp_wide_vec_op: 1240; AVX2: # %bb.0: 1241; AVX2-NEXT: vpextrd $1, %xmm2, %eax 1242; AVX2-NEXT: vpextrd $1, %xmm0, %ecx 1243; AVX2-NEXT: cmpl %eax, %ecx 1244; AVX2-NEXT: seta %al 1245; AVX2-NEXT: sbbb $0, %al 1246; AVX2-NEXT: vmovd %xmm2, %ecx 1247; AVX2-NEXT: vmovd %xmm0, %edx 1248; AVX2-NEXT: cmpl %ecx, %edx 1249; AVX2-NEXT: seta %cl 1250; AVX2-NEXT: sbbb $0, %cl 1251; AVX2-NEXT: vmovd %ecx, %xmm4 1252; AVX2-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 1253; AVX2-NEXT: vpextrd $2, %xmm2, %eax 1254; AVX2-NEXT: vpextrd $2, %xmm0, %ecx 1255; AVX2-NEXT: cmpl %eax, %ecx 1256; AVX2-NEXT: seta %al 1257; AVX2-NEXT: sbbb $0, %al 1258; AVX2-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 1259; AVX2-NEXT: vpextrd $3, %xmm2, %eax 1260; AVX2-NEXT: vpextrd $3, %xmm0, %ecx 1261; AVX2-NEXT: cmpl %eax, %ecx 1262; AVX2-NEXT: seta %al 1263; AVX2-NEXT: sbbb $0, %al 1264; AVX2-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 1265; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm2 1266; AVX2-NEXT: vmovd %xmm2, %eax 1267; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 1268; AVX2-NEXT: vmovd %xmm0, %ecx 1269; AVX2-NEXT: cmpl %eax, %ecx 1270; AVX2-NEXT: seta %al 1271; AVX2-NEXT: sbbb $0, %al 1272; AVX2-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 1273; AVX2-NEXT: vpextrd $1, %xmm2, %eax 1274; AVX2-NEXT: vpextrd $1, %xmm0, %ecx 1275; AVX2-NEXT: cmpl %eax, %ecx 1276; AVX2-NEXT: seta %al 1277; AVX2-NEXT: sbbb $0, %al 1278; AVX2-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 1279; AVX2-NEXT: vpextrd $2, %xmm2, %eax 1280; AVX2-NEXT: vpextrd $2, %xmm0, %ecx 1281; AVX2-NEXT: cmpl %eax, %ecx 1282; AVX2-NEXT: seta %al 1283; AVX2-NEXT: sbbb $0, %al 1284; AVX2-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 1285; AVX2-NEXT: vpextrd $3, %xmm2, %eax 1286; AVX2-NEXT: vpextrd $3, %xmm0, %ecx 1287; AVX2-NEXT: cmpl %eax, %ecx 1288; AVX2-NEXT: seta %al 1289; AVX2-NEXT: sbbb $0, %al 1290; AVX2-NEXT: vpinsrb $7, %eax, %xmm4, %xmm0 1291; AVX2-NEXT: vmovd %xmm3, %eax 1292; AVX2-NEXT: vmovd %xmm1, %ecx 1293; AVX2-NEXT: cmpl %eax, %ecx 1294; AVX2-NEXT: seta %al 1295; AVX2-NEXT: sbbb $0, %al 1296; AVX2-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 1297; AVX2-NEXT: vpextrd $1, %xmm3, %eax 1298; AVX2-NEXT: vpextrd $1, %xmm1, %ecx 1299; AVX2-NEXT: cmpl %eax, %ecx 1300; AVX2-NEXT: seta %al 1301; AVX2-NEXT: sbbb $0, %al 1302; AVX2-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 1303; AVX2-NEXT: vpextrd $2, %xmm3, %eax 1304; AVX2-NEXT: vpextrd $2, %xmm1, %ecx 1305; AVX2-NEXT: cmpl %eax, %ecx 1306; AVX2-NEXT: seta %al 1307; AVX2-NEXT: sbbb $0, %al 1308; AVX2-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 1309; AVX2-NEXT: vpextrd $3, %xmm3, %eax 1310; AVX2-NEXT: vpextrd $3, %xmm1, %ecx 1311; AVX2-NEXT: cmpl %eax, %ecx 1312; AVX2-NEXT: seta %al 1313; AVX2-NEXT: sbbb $0, %al 1314; AVX2-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 1315; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm2 1316; AVX2-NEXT: vmovd %xmm2, %eax 1317; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1 1318; AVX2-NEXT: vmovd %xmm1, %ecx 1319; AVX2-NEXT: cmpl %eax, %ecx 1320; AVX2-NEXT: seta %al 1321; AVX2-NEXT: sbbb $0, %al 1322; AVX2-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 1323; AVX2-NEXT: vpextrd $1, %xmm2, %eax 1324; AVX2-NEXT: vpextrd $1, %xmm1, %ecx 1325; AVX2-NEXT: cmpl %eax, %ecx 1326; AVX2-NEXT: seta %al 1327; AVX2-NEXT: sbbb $0, %al 1328; AVX2-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 1329; AVX2-NEXT: vpextrd $2, %xmm2, %eax 1330; AVX2-NEXT: vpextrd $2, %xmm1, %ecx 1331; AVX2-NEXT: cmpl %eax, %ecx 1332; AVX2-NEXT: seta %al 1333; AVX2-NEXT: sbbb $0, %al 1334; AVX2-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 1335; AVX2-NEXT: vpextrd $3, %xmm2, %eax 1336; AVX2-NEXT: vpextrd $3, %xmm1, %ecx 1337; AVX2-NEXT: cmpl %eax, %ecx 1338; AVX2-NEXT: seta %al 1339; AVX2-NEXT: sbbb $0, %al 1340; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 1341; AVX2-NEXT: vzeroupper 1342; AVX2-NEXT: retq 1343; 1344; AVX512-LABEL: ucmp_wide_vec_op: 1345; AVX512: # %bb.0: 1346; AVX512-NEXT: vpcmpltud %zmm1, %zmm0, %k1 1347; AVX512-NEXT: vpcmpnleud %zmm1, %zmm0, %k2 1348; AVX512-NEXT: vmovdqu8 {{.*#+}} xmm0 {%k2} {z} = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 1349; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1350; AVX512-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1} 1351; AVX512-NEXT: vzeroupper 1352; AVX512-NEXT: retq 1353; 1354; X86-LABEL: ucmp_wide_vec_op: 1355; X86: # %bb.0: 1356; X86-NEXT: pushl %ebp 1357; X86-NEXT: pushl %ebx 1358; X86-NEXT: pushl %edi 1359; X86-NEXT: pushl %esi 1360; X86-NEXT: subl $12, %esp 1361; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 1362; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 1363; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1364; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 1365; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx 1366; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp 1367; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ebp 1368; X86-NEXT: seta %al 1369; X86-NEXT: sbbb $0, %al 1370; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 1371; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ebx 1372; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx 1373; X86-NEXT: seta %al 1374; X86-NEXT: sbbb $0, %al 1375; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 1376; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ebx 1377; X86-NEXT: seta %al 1378; X86-NEXT: sbbb $0, %al 1379; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 1380; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edx 1381; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 1382; X86-NEXT: seta %al 1383; X86-NEXT: sbbb $0, %al 1384; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 1385; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edx 1386; X86-NEXT: seta %al 1387; X86-NEXT: sbbb $0, %al 1388; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 1389; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx 1390; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1391; X86-NEXT: seta %al 1392; X86-NEXT: sbbb $0, %al 1393; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 1394; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx 1395; X86-NEXT: seta %al 1396; X86-NEXT: sbbb $0, %al 1397; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 1398; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edi 1399; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1400; X86-NEXT: seta %al 1401; X86-NEXT: sbbb $0, %al 1402; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 1403; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx 1404; X86-NEXT: seta %al 1405; X86-NEXT: sbbb $0, %al 1406; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 1407; X86-NEXT: cmpl {{[0-9]+}}(%esp), %esi 1408; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1409; X86-NEXT: seta %al 1410; X86-NEXT: sbbb $0, %al 1411; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 1412; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx 1413; X86-NEXT: seta %bh 1414; X86-NEXT: sbbb $0, %bh 1415; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1416; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax 1417; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1418; X86-NEXT: seta %bl 1419; X86-NEXT: sbbb $0, %bl 1420; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax 1421; X86-NEXT: seta %dh 1422; X86-NEXT: sbbb $0, %dh 1423; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1424; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax 1425; X86-NEXT: seta %ch 1426; X86-NEXT: sbbb $0, %ch 1427; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1428; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax 1429; X86-NEXT: seta %dl 1430; X86-NEXT: sbbb $0, %dl 1431; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1432; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax 1433; X86-NEXT: seta %cl 1434; X86-NEXT: sbbb $0, %cl 1435; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1436; X86-NEXT: movb %cl, 15(%eax) 1437; X86-NEXT: movb %dl, 14(%eax) 1438; X86-NEXT: movb %ch, 13(%eax) 1439; X86-NEXT: movb %dh, 12(%eax) 1440; X86-NEXT: movb %bl, 11(%eax) 1441; X86-NEXT: movb %bh, 10(%eax) 1442; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload 1443; X86-NEXT: movb %cl, 9(%eax) 1444; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload 1445; X86-NEXT: movb %cl, 8(%eax) 1446; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload 1447; X86-NEXT: movb %cl, 7(%eax) 1448; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload 1449; X86-NEXT: movb %cl, 6(%eax) 1450; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload 1451; X86-NEXT: movb %cl, 5(%eax) 1452; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload 1453; X86-NEXT: movb %cl, 4(%eax) 1454; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload 1455; X86-NEXT: movb %cl, 3(%eax) 1456; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload 1457; X86-NEXT: movb %cl, 2(%eax) 1458; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload 1459; X86-NEXT: movb %cl, 1(%eax) 1460; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload 1461; X86-NEXT: movb %cl, (%eax) 1462; X86-NEXT: addl $12, %esp 1463; X86-NEXT: popl %esi 1464; X86-NEXT: popl %edi 1465; X86-NEXT: popl %ebx 1466; X86-NEXT: popl %ebp 1467; X86-NEXT: retl $4 1468 %1 = call <16 x i8> @llvm.ucmp(<16 x i32> %x, <16 x i32> %y) 1469 ret <16 x i8> %1 1470} 1471 1472define <17 x i2> @ucmp_uncommon_vectors(<17 x i71> %x, <17 x i71> %y) nounwind { 1473; SSE4-LABEL: ucmp_uncommon_vectors: 1474; SSE4: # %bb.0: 1475; SSE4-NEXT: pushq %rbp 1476; SSE4-NEXT: pushq %r15 1477; SSE4-NEXT: pushq %r14 1478; SSE4-NEXT: pushq %r13 1479; SSE4-NEXT: pushq %r12 1480; SSE4-NEXT: pushq %rbx 1481; SSE4-NEXT: subq $120, %rsp 1482; SSE4-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1483; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1484; SSE4-NEXT: andl $127, %eax 1485; SSE4-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1486; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1487; SSE4-NEXT: andl $127, %eax 1488; SSE4-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1489; SSE4-NEXT: andl $127, %edx 1490; SSE4-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1491; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1492; SSE4-NEXT: andl $127, %eax 1493; SSE4-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1494; SSE4-NEXT: andl $127, %r8d 1495; SSE4-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1496; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1497; SSE4-NEXT: andl $127, %eax 1498; SSE4-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1499; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1500; SSE4-NEXT: andl $127, %eax 1501; SSE4-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1502; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1503; SSE4-NEXT: andl $127, %eax 1504; SSE4-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1505; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1506; SSE4-NEXT: andl $127, %eax 1507; SSE4-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1508; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1509; SSE4-NEXT: andl $127, %eax 1510; SSE4-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1511; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1512; SSE4-NEXT: andl $127, %eax 1513; SSE4-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1514; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1515; SSE4-NEXT: andl $127, %eax 1516; SSE4-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1517; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1518; SSE4-NEXT: andl $127, %eax 1519; SSE4-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1520; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1521; SSE4-NEXT: andl $127, %eax 1522; SSE4-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1523; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1524; SSE4-NEXT: andl $127, %eax 1525; SSE4-NEXT: movq %rax, (%rsp) # 8-byte Spill 1526; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1527; SSE4-NEXT: andl $127, %eax 1528; SSE4-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1529; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1530; SSE4-NEXT: andl $127, %eax 1531; SSE4-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1532; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1533; SSE4-NEXT: andl $127, %eax 1534; SSE4-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1535; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1536; SSE4-NEXT: andl $127, %eax 1537; SSE4-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1538; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1539; SSE4-NEXT: andl $127, %eax 1540; SSE4-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1541; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1542; SSE4-NEXT: andl $127, %eax 1543; SSE4-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1544; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1545; SSE4-NEXT: andl $127, %eax 1546; SSE4-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1547; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1548; SSE4-NEXT: andl $127, %eax 1549; SSE4-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1550; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1551; SSE4-NEXT: andl $127, %eax 1552; SSE4-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1553; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %r10 1554; SSE4-NEXT: andl $127, %r10d 1555; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1556; SSE4-NEXT: andl $127, %eax 1557; SSE4-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1558; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rcx 1559; SSE4-NEXT: andl $127, %ecx 1560; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %r8 1561; SSE4-NEXT: andl $127, %r8d 1562; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rbx 1563; SSE4-NEXT: andl $127, %ebx 1564; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rdx 1565; SSE4-NEXT: andl $127, %edx 1566; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %r13 1567; SSE4-NEXT: andl $127, %r13d 1568; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %r11 1569; SSE4-NEXT: andl $127, %r11d 1570; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %r14 1571; SSE4-NEXT: andl $127, %r14d 1572; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %r12 1573; SSE4-NEXT: andl $127, %r12d 1574; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1575; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rbp 1576; SSE4-NEXT: cmpq %rax, %rbp 1577; SSE4-NEXT: movq %r12, %r15 1578; SSE4-NEXT: sbbq %r14, %r15 1579; SSE4-NEXT: setb %r15b 1580; SSE4-NEXT: cmpq %rbp, %rax 1581; SSE4-NEXT: sbbq %r12, %r14 1582; SSE4-NEXT: sbbb $0, %r15b 1583; SSE4-NEXT: movb %r15b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1584; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1585; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %r14 1586; SSE4-NEXT: cmpq %rax, %r14 1587; SSE4-NEXT: movq %r11, %r15 1588; SSE4-NEXT: sbbq %r13, %r15 1589; SSE4-NEXT: setb %bpl 1590; SSE4-NEXT: cmpq %r14, %rax 1591; SSE4-NEXT: sbbq %r11, %r13 1592; SSE4-NEXT: sbbb $0, %bpl 1593; SSE4-NEXT: movb %bpl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1594; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1595; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %r11 1596; SSE4-NEXT: cmpq %rax, %r11 1597; SSE4-NEXT: movq %rdx, %r14 1598; SSE4-NEXT: sbbq %rbx, %r14 1599; SSE4-NEXT: setb %bpl 1600; SSE4-NEXT: cmpq %r11, %rax 1601; SSE4-NEXT: sbbq %rdx, %rbx 1602; SSE4-NEXT: sbbb $0, %bpl 1603; SSE4-NEXT: movb %bpl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1604; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1605; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rdx 1606; SSE4-NEXT: cmpq %rax, %rdx 1607; SSE4-NEXT: movq %r8, %r11 1608; SSE4-NEXT: sbbq %rcx, %r11 1609; SSE4-NEXT: setb %r11b 1610; SSE4-NEXT: cmpq %rdx, %rax 1611; SSE4-NEXT: sbbq %r8, %rcx 1612; SSE4-NEXT: sbbb $0, %r11b 1613; SSE4-NEXT: movb %r11b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1614; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1615; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rcx 1616; SSE4-NEXT: cmpq %rax, %rcx 1617; SSE4-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload 1618; SSE4-NEXT: movq %r8, %rdx 1619; SSE4-NEXT: sbbq %r10, %rdx 1620; SSE4-NEXT: setb %dl 1621; SSE4-NEXT: cmpq %rcx, %rax 1622; SSE4-NEXT: sbbq %r8, %r10 1623; SSE4-NEXT: sbbb $0, %dl 1624; SSE4-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1625; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1626; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rcx 1627; SSE4-NEXT: cmpq %rax, %rcx 1628; SSE4-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload 1629; SSE4-NEXT: movq %r11, %rdx 1630; SSE4-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload 1631; SSE4-NEXT: sbbq %r8, %rdx 1632; SSE4-NEXT: setb %r10b 1633; SSE4-NEXT: cmpq %rcx, %rax 1634; SSE4-NEXT: sbbq %r11, %r8 1635; SSE4-NEXT: sbbb $0, %r10b 1636; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1637; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rcx 1638; SSE4-NEXT: cmpq %rax, %rcx 1639; SSE4-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload 1640; SSE4-NEXT: movq %r11, %rdx 1641; SSE4-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload 1642; SSE4-NEXT: sbbq %r8, %rdx 1643; SSE4-NEXT: setb %dl 1644; SSE4-NEXT: cmpq %rcx, %rax 1645; SSE4-NEXT: sbbq %r11, %r8 1646; SSE4-NEXT: sbbb $0, %dl 1647; SSE4-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1648; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1649; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rcx 1650; SSE4-NEXT: cmpq %rax, %rcx 1651; SSE4-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload 1652; SSE4-NEXT: movq %r11, %rdx 1653; SSE4-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload 1654; SSE4-NEXT: sbbq %r8, %rdx 1655; SSE4-NEXT: setb %bpl 1656; SSE4-NEXT: cmpq %rcx, %rax 1657; SSE4-NEXT: sbbq %r11, %r8 1658; SSE4-NEXT: sbbb $0, %bpl 1659; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1660; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rcx 1661; SSE4-NEXT: cmpq %rax, %rcx 1662; SSE4-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload 1663; SSE4-NEXT: movq %r11, %rdx 1664; SSE4-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload 1665; SSE4-NEXT: sbbq %r8, %rdx 1666; SSE4-NEXT: setb %dl 1667; SSE4-NEXT: cmpq %rcx, %rax 1668; SSE4-NEXT: sbbq %r11, %r8 1669; SSE4-NEXT: sbbb $0, %dl 1670; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1671; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rcx 1672; SSE4-NEXT: cmpq %rax, %rcx 1673; SSE4-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload 1674; SSE4-NEXT: movq %r14, %r8 1675; SSE4-NEXT: movq (%rsp), %rbx # 8-byte Reload 1676; SSE4-NEXT: sbbq %rbx, %r8 1677; SSE4-NEXT: setb %r11b 1678; SSE4-NEXT: cmpq %rcx, %rax 1679; SSE4-NEXT: sbbq %r14, %rbx 1680; SSE4-NEXT: sbbb $0, %r11b 1681; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1682; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rcx 1683; SSE4-NEXT: cmpq %rax, %rcx 1684; SSE4-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload 1685; SSE4-NEXT: movq %r14, %rbx 1686; SSE4-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload 1687; SSE4-NEXT: sbbq %r8, %rbx 1688; SSE4-NEXT: setb %bl 1689; SSE4-NEXT: cmpq %rcx, %rax 1690; SSE4-NEXT: sbbq %r14, %r8 1691; SSE4-NEXT: sbbb $0, %bl 1692; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1693; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %r14 1694; SSE4-NEXT: cmpq %rax, %r14 1695; SSE4-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload 1696; SSE4-NEXT: movq %r15, %rcx 1697; SSE4-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload 1698; SSE4-NEXT: sbbq %r8, %rcx 1699; SSE4-NEXT: setb %cl 1700; SSE4-NEXT: cmpq %r14, %rax 1701; SSE4-NEXT: sbbq %r15, %r8 1702; SSE4-NEXT: sbbb $0, %cl 1703; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1704; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %r15 1705; SSE4-NEXT: cmpq %rax, %r15 1706; SSE4-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload 1707; SSE4-NEXT: movq %r12, %r14 1708; SSE4-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload 1709; SSE4-NEXT: sbbq %r8, %r14 1710; SSE4-NEXT: setb %r14b 1711; SSE4-NEXT: cmpq %r15, %rax 1712; SSE4-NEXT: sbbq %r12, %r8 1713; SSE4-NEXT: sbbb $0, %r14b 1714; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1715; SSE4-NEXT: cmpq %r9, %rax 1716; SSE4-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload 1717; SSE4-NEXT: movq %r12, %r15 1718; SSE4-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload 1719; SSE4-NEXT: sbbq %r8, %r15 1720; SSE4-NEXT: setb %r15b 1721; SSE4-NEXT: cmpq %rax, %r9 1722; SSE4-NEXT: sbbq %r12, %r8 1723; SSE4-NEXT: sbbb $0, %r15b 1724; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %rax 1725; SSE4-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload 1726; SSE4-NEXT: cmpq %r12, %rax 1727; SSE4-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload 1728; SSE4-NEXT: movq %r13, %r9 1729; SSE4-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload 1730; SSE4-NEXT: sbbq %r8, %r9 1731; SSE4-NEXT: setb %r9b 1732; SSE4-NEXT: cmpq %rax, %r12 1733; SSE4-NEXT: sbbq %r13, %r8 1734; SSE4-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1735; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %r12 1736; SSE4-NEXT: sbbb $0, %r9b 1737; SSE4-NEXT: cmpq %rsi, %r12 1738; SSE4-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload 1739; SSE4-NEXT: movq %r8, %rdi 1740; SSE4-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload 1741; SSE4-NEXT: sbbq %rax, %rdi 1742; SSE4-NEXT: setb %dil 1743; SSE4-NEXT: cmpq %r12, %rsi 1744; SSE4-NEXT: sbbq %r8, %rax 1745; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %r12 1746; SSE4-NEXT: movq {{[0-9]+}}(%rsp), %r13 1747; SSE4-NEXT: sbbb $0, %dil 1748; SSE4-NEXT: cmpq %r12, %r13 1749; SSE4-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload 1750; SSE4-NEXT: movq %r8, %rsi 1751; SSE4-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload 1752; SSE4-NEXT: sbbq %rax, %rsi 1753; SSE4-NEXT: setb %sil 1754; SSE4-NEXT: cmpq %r13, %r12 1755; SSE4-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r12d # 1-byte Folded Reload 1756; SSE4-NEXT: movd %r12d, %xmm1 1757; SSE4-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r12d # 1-byte Folded Reload 1758; SSE4-NEXT: movd %r12d, %xmm2 1759; SSE4-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r12d # 1-byte Folded Reload 1760; SSE4-NEXT: movd %r12d, %xmm3 1761; SSE4-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r12d # 1-byte Folded Reload 1762; SSE4-NEXT: movd %r12d, %xmm4 1763; SSE4-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r12d # 1-byte Folded Reload 1764; SSE4-NEXT: movd %r12d, %xmm5 1765; SSE4-NEXT: movzbl %r10b, %r10d 1766; SSE4-NEXT: movd %r10d, %xmm6 1767; SSE4-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r10d # 1-byte Folded Reload 1768; SSE4-NEXT: movd %r10d, %xmm7 1769; SSE4-NEXT: movzbl %bpl, %r10d 1770; SSE4-NEXT: movd %r10d, %xmm0 1771; SSE4-NEXT: movzbl %dl, %edx 1772; SSE4-NEXT: movd %edx, %xmm8 1773; SSE4-NEXT: movzbl %r11b, %edx 1774; SSE4-NEXT: movd %edx, %xmm9 1775; SSE4-NEXT: movzbl %bl, %edx 1776; SSE4-NEXT: movd %edx, %xmm10 1777; SSE4-NEXT: movzbl %cl, %ecx 1778; SSE4-NEXT: movd %ecx, %xmm11 1779; SSE4-NEXT: movzbl %r14b, %ecx 1780; SSE4-NEXT: movd %ecx, %xmm12 1781; SSE4-NEXT: movzbl %r15b, %ecx 1782; SSE4-NEXT: movd %ecx, %xmm13 1783; SSE4-NEXT: movzbl %r9b, %ecx 1784; SSE4-NEXT: movd %ecx, %xmm14 1785; SSE4-NEXT: movzbl %dil, %ecx 1786; SSE4-NEXT: movd %ecx, %xmm15 1787; SSE4-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 1788; SSE4-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 1789; SSE4-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3] 1790; SSE4-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7] 1791; SSE4-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3],xmm0[4],xmm7[4],xmm0[5],xmm7[5],xmm0[6],xmm7[6],xmm0[7],xmm7[7] 1792; SSE4-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3] 1793; SSE4-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] 1794; SSE4-NEXT: punpcklbw {{.*#+}} xmm9 = xmm9[0],xmm8[0],xmm9[1],xmm8[1],xmm9[2],xmm8[2],xmm9[3],xmm8[3],xmm9[4],xmm8[4],xmm9[5],xmm8[5],xmm9[6],xmm8[6],xmm9[7],xmm8[7] 1795; SSE4-NEXT: punpcklbw {{.*#+}} xmm11 = xmm11[0],xmm10[0],xmm11[1],xmm10[1],xmm11[2],xmm10[2],xmm11[3],xmm10[3],xmm11[4],xmm10[4],xmm11[5],xmm10[5],xmm11[6],xmm10[6],xmm11[7],xmm10[7] 1796; SSE4-NEXT: punpcklwd {{.*#+}} xmm11 = xmm11[0],xmm9[0],xmm11[1],xmm9[1],xmm11[2],xmm9[2],xmm11[3],xmm9[3] 1797; SSE4-NEXT: punpcklbw {{.*#+}} xmm13 = xmm13[0],xmm12[0],xmm13[1],xmm12[1],xmm13[2],xmm12[2],xmm13[3],xmm12[3],xmm13[4],xmm12[4],xmm13[5],xmm12[5],xmm13[6],xmm12[6],xmm13[7],xmm12[7] 1798; SSE4-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm14[0],xmm15[1],xmm14[1],xmm15[2],xmm14[2],xmm15[3],xmm14[3],xmm15[4],xmm14[4],xmm15[5],xmm14[5],xmm15[6],xmm14[6],xmm15[7],xmm14[7] 1799; SSE4-NEXT: punpcklwd {{.*#+}} xmm15 = xmm15[0],xmm13[0],xmm15[1],xmm13[1],xmm15[2],xmm13[2],xmm15[3],xmm13[3] 1800; SSE4-NEXT: punpckldq {{.*#+}} xmm15 = xmm15[0],xmm11[0],xmm15[1],xmm11[1] 1801; SSE4-NEXT: sbbq %r8, %rax 1802; SSE4-NEXT: sbbb $0, %sil 1803; SSE4-NEXT: punpcklqdq {{.*#+}} xmm15 = xmm15[0],xmm0[0] 1804; SSE4-NEXT: movzbl %sil, %ecx 1805; SSE4-NEXT: andl $3, %ecx 1806; SSE4-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload 1807; SSE4-NEXT: movb %cl, 4(%rax) 1808; SSE4-NEXT: movdqa %xmm15, -{{[0-9]+}}(%rsp) 1809; SSE4-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx 1810; SSE4-NEXT: andl $3, %ecx 1811; SSE4-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx 1812; SSE4-NEXT: andl $3, %edx 1813; SSE4-NEXT: leaq (%rdx,%rcx,4), %rcx 1814; SSE4-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx 1815; SSE4-NEXT: andl $3, %edx 1816; SSE4-NEXT: shll $4, %edx 1817; SSE4-NEXT: orq %rcx, %rdx 1818; SSE4-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx 1819; SSE4-NEXT: andl $3, %ecx 1820; SSE4-NEXT: shll $6, %ecx 1821; SSE4-NEXT: orq %rdx, %rcx 1822; SSE4-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx 1823; SSE4-NEXT: andl $3, %edx 1824; SSE4-NEXT: shll $8, %edx 1825; SSE4-NEXT: orq %rcx, %rdx 1826; SSE4-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx 1827; SSE4-NEXT: andl $3, %ecx 1828; SSE4-NEXT: shll $10, %ecx 1829; SSE4-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi 1830; SSE4-NEXT: andl $3, %esi 1831; SSE4-NEXT: shll $12, %esi 1832; SSE4-NEXT: orq %rcx, %rsi 1833; SSE4-NEXT: movzbl -{{[0-9]+}}(%rsp), %edi 1834; SSE4-NEXT: andl $3, %edi 1835; SSE4-NEXT: shll $14, %edi 1836; SSE4-NEXT: orq %rsi, %rdi 1837; SSE4-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx 1838; SSE4-NEXT: andl $3, %ecx 1839; SSE4-NEXT: shll $16, %ecx 1840; SSE4-NEXT: orq %rdi, %rcx 1841; SSE4-NEXT: orq %rdx, %rcx 1842; SSE4-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx 1843; SSE4-NEXT: andl $3, %edx 1844; SSE4-NEXT: shll $18, %edx 1845; SSE4-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi 1846; SSE4-NEXT: andl $3, %esi 1847; SSE4-NEXT: shll $20, %esi 1848; SSE4-NEXT: orq %rdx, %rsi 1849; SSE4-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx 1850; SSE4-NEXT: andl $3, %edx 1851; SSE4-NEXT: shll $22, %edx 1852; SSE4-NEXT: orq %rsi, %rdx 1853; SSE4-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi 1854; SSE4-NEXT: andl $3, %esi 1855; SSE4-NEXT: shll $24, %esi 1856; SSE4-NEXT: orq %rdx, %rsi 1857; SSE4-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx 1858; SSE4-NEXT: andl $3, %edx 1859; SSE4-NEXT: shlq $26, %rdx 1860; SSE4-NEXT: orq %rsi, %rdx 1861; SSE4-NEXT: orq %rcx, %rdx 1862; SSE4-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx 1863; SSE4-NEXT: andl $3, %ecx 1864; SSE4-NEXT: shlq $28, %rcx 1865; SSE4-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi 1866; SSE4-NEXT: andl $3, %esi 1867; SSE4-NEXT: shlq $30, %rsi 1868; SSE4-NEXT: orq %rcx, %rsi 1869; SSE4-NEXT: orq %rdx, %rsi 1870; SSE4-NEXT: movl %esi, (%rax) 1871; SSE4-NEXT: addq $120, %rsp 1872; SSE4-NEXT: popq %rbx 1873; SSE4-NEXT: popq %r12 1874; SSE4-NEXT: popq %r13 1875; SSE4-NEXT: popq %r14 1876; SSE4-NEXT: popq %r15 1877; SSE4-NEXT: popq %rbp 1878; SSE4-NEXT: retq 1879; 1880; SSE2-LABEL: ucmp_uncommon_vectors: 1881; SSE2: # %bb.0: 1882; SSE2-NEXT: pushq %rbp 1883; SSE2-NEXT: pushq %r15 1884; SSE2-NEXT: pushq %r14 1885; SSE2-NEXT: pushq %r13 1886; SSE2-NEXT: pushq %r12 1887; SSE2-NEXT: pushq %rbx 1888; SSE2-NEXT: subq $88, %rsp 1889; SSE2-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1890; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 1891; SSE2-NEXT: andl $127, %eax 1892; SSE2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1893; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 1894; SSE2-NEXT: andl $127, %eax 1895; SSE2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1896; SSE2-NEXT: andl $127, %r8d 1897; SSE2-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1898; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 1899; SSE2-NEXT: andl $127, %eax 1900; SSE2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1901; SSE2-NEXT: andl $127, %edx 1902; SSE2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1903; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 1904; SSE2-NEXT: andl $127, %eax 1905; SSE2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1906; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 1907; SSE2-NEXT: andl $127, %eax 1908; SSE2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1909; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 1910; SSE2-NEXT: andl $127, %eax 1911; SSE2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1912; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 1913; SSE2-NEXT: andl $127, %eax 1914; SSE2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1915; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 1916; SSE2-NEXT: andl $127, %eax 1917; SSE2-NEXT: movq %rax, (%rsp) # 8-byte Spill 1918; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 1919; SSE2-NEXT: andl $127, %eax 1920; SSE2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1921; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 1922; SSE2-NEXT: andl $127, %eax 1923; SSE2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1924; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 1925; SSE2-NEXT: andl $127, %eax 1926; SSE2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1927; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 1928; SSE2-NEXT: andl $127, %eax 1929; SSE2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1930; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 1931; SSE2-NEXT: andl $127, %eax 1932; SSE2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1933; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 1934; SSE2-NEXT: andl $127, %eax 1935; SSE2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1936; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 1937; SSE2-NEXT: andl $127, %eax 1938; SSE2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1939; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 1940; SSE2-NEXT: andl $127, %eax 1941; SSE2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1942; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 1943; SSE2-NEXT: andl $127, %eax 1944; SSE2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1945; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 1946; SSE2-NEXT: andl $127, %eax 1947; SSE2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1948; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 1949; SSE2-NEXT: andl $127, %eax 1950; SSE2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1951; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 1952; SSE2-NEXT: andl $127, %eax 1953; SSE2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1954; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 1955; SSE2-NEXT: andl $127, %eax 1956; SSE2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1957; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 1958; SSE2-NEXT: andl $127, %eax 1959; SSE2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1960; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rcx 1961; SSE2-NEXT: andl $127, %ecx 1962; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 1963; SSE2-NEXT: andl $127, %eax 1964; SSE2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1965; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rbx 1966; SSE2-NEXT: andl $127, %ebx 1967; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rdx 1968; SSE2-NEXT: andl $127, %edx 1969; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r10 1970; SSE2-NEXT: andl $127, %r10d 1971; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r14 1972; SSE2-NEXT: andl $127, %r14d 1973; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rbp 1974; SSE2-NEXT: andl $127, %ebp 1975; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r13 1976; SSE2-NEXT: andl $127, %r13d 1977; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r11 1978; SSE2-NEXT: andl $127, %r11d 1979; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r15 1980; SSE2-NEXT: andl $127, %r15d 1981; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 1982; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r12 1983; SSE2-NEXT: cmpq %rax, %r12 1984; SSE2-NEXT: movq %r15, %r8 1985; SSE2-NEXT: sbbq %r11, %r8 1986; SSE2-NEXT: setb %r8b 1987; SSE2-NEXT: cmpq %r12, %rax 1988; SSE2-NEXT: sbbq %r15, %r11 1989; SSE2-NEXT: sbbb $0, %r8b 1990; SSE2-NEXT: movb %r8b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1991; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 1992; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r8 1993; SSE2-NEXT: cmpq %rax, %r8 1994; SSE2-NEXT: movq %r13, %r11 1995; SSE2-NEXT: sbbq %rbp, %r11 1996; SSE2-NEXT: setb %r11b 1997; SSE2-NEXT: cmpq %r8, %rax 1998; SSE2-NEXT: sbbq %r13, %rbp 1999; SSE2-NEXT: sbbb $0, %r11b 2000; SSE2-NEXT: movb %r11b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 2001; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2002; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r8 2003; SSE2-NEXT: cmpq %rax, %r8 2004; SSE2-NEXT: movq %r14, %r11 2005; SSE2-NEXT: sbbq %r10, %r11 2006; SSE2-NEXT: setb %r11b 2007; SSE2-NEXT: cmpq %r8, %rax 2008; SSE2-NEXT: sbbq %r14, %r10 2009; SSE2-NEXT: sbbb $0, %r11b 2010; SSE2-NEXT: movb %r11b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 2011; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2012; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r8 2013; SSE2-NEXT: cmpq %rax, %r8 2014; SSE2-NEXT: movq %rdx, %r10 2015; SSE2-NEXT: sbbq %rbx, %r10 2016; SSE2-NEXT: setb %r10b 2017; SSE2-NEXT: cmpq %r8, %rax 2018; SSE2-NEXT: sbbq %rdx, %rbx 2019; SSE2-NEXT: sbbb $0, %r10b 2020; SSE2-NEXT: movb %r10b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 2021; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2022; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rdx 2023; SSE2-NEXT: cmpq %rax, %rdx 2024; SSE2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload 2025; SSE2-NEXT: movq %r10, %r8 2026; SSE2-NEXT: sbbq %rcx, %r8 2027; SSE2-NEXT: setb %r8b 2028; SSE2-NEXT: cmpq %rdx, %rax 2029; SSE2-NEXT: sbbq %r10, %rcx 2030; SSE2-NEXT: sbbb $0, %r8b 2031; SSE2-NEXT: movb %r8b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 2032; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2033; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rcx 2034; SSE2-NEXT: cmpq %rax, %rcx 2035; SSE2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload 2036; SSE2-NEXT: movq %r10, %rdx 2037; SSE2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload 2038; SSE2-NEXT: sbbq %r8, %rdx 2039; SSE2-NEXT: setb %dl 2040; SSE2-NEXT: cmpq %rcx, %rax 2041; SSE2-NEXT: sbbq %r10, %r8 2042; SSE2-NEXT: sbbb $0, %dl 2043; SSE2-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 2044; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2045; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rcx 2046; SSE2-NEXT: cmpq %rax, %rcx 2047; SSE2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload 2048; SSE2-NEXT: movq %r10, %rdx 2049; SSE2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload 2050; SSE2-NEXT: sbbq %r8, %rdx 2051; SSE2-NEXT: setb %dl 2052; SSE2-NEXT: cmpq %rcx, %rax 2053; SSE2-NEXT: sbbq %r10, %r8 2054; SSE2-NEXT: sbbb $0, %dl 2055; SSE2-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 2056; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2057; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rcx 2058; SSE2-NEXT: cmpq %rax, %rcx 2059; SSE2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload 2060; SSE2-NEXT: movq %r11, %rdx 2061; SSE2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload 2062; SSE2-NEXT: sbbq %r10, %rdx 2063; SSE2-NEXT: setb %r8b 2064; SSE2-NEXT: cmpq %rcx, %rax 2065; SSE2-NEXT: sbbq %r11, %r10 2066; SSE2-NEXT: sbbb $0, %r8b 2067; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2068; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rcx 2069; SSE2-NEXT: cmpq %rax, %rcx 2070; SSE2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload 2071; SSE2-NEXT: movq %rbx, %rdx 2072; SSE2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload 2073; SSE2-NEXT: sbbq %r10, %rdx 2074; SSE2-NEXT: setb %r11b 2075; SSE2-NEXT: cmpq %rcx, %rax 2076; SSE2-NEXT: sbbq %rbx, %r10 2077; SSE2-NEXT: sbbb $0, %r11b 2078; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2079; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rcx 2080; SSE2-NEXT: cmpq %rax, %rcx 2081; SSE2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload 2082; SSE2-NEXT: movq %rbx, %rdx 2083; SSE2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload 2084; SSE2-NEXT: sbbq %r10, %rdx 2085; SSE2-NEXT: setb %dl 2086; SSE2-NEXT: cmpq %rcx, %rax 2087; SSE2-NEXT: sbbq %rbx, %r10 2088; SSE2-NEXT: sbbb $0, %dl 2089; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2090; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rcx 2091; SSE2-NEXT: cmpq %rax, %rcx 2092; SSE2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload 2093; SSE2-NEXT: movq %r14, %r10 2094; SSE2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload 2095; SSE2-NEXT: sbbq %rbx, %r10 2096; SSE2-NEXT: setb %r10b 2097; SSE2-NEXT: cmpq %rcx, %rax 2098; SSE2-NEXT: sbbq %r14, %rbx 2099; SSE2-NEXT: sbbb $0, %r10b 2100; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2101; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rbx 2102; SSE2-NEXT: cmpq %rax, %rbx 2103; SSE2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload 2104; SSE2-NEXT: movq %r15, %rcx 2105; SSE2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload 2106; SSE2-NEXT: sbbq %r14, %rcx 2107; SSE2-NEXT: setb %cl 2108; SSE2-NEXT: cmpq %rbx, %rax 2109; SSE2-NEXT: sbbq %r15, %r14 2110; SSE2-NEXT: sbbb $0, %cl 2111; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2112; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r14 2113; SSE2-NEXT: cmpq %rax, %r14 2114; SSE2-NEXT: movq (%rsp), %r12 # 8-byte Reload 2115; SSE2-NEXT: movq %r12, %rbx 2116; SSE2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload 2117; SSE2-NEXT: sbbq %r15, %rbx 2118; SSE2-NEXT: setb %bl 2119; SSE2-NEXT: cmpq %r14, %rax 2120; SSE2-NEXT: sbbq %r12, %r15 2121; SSE2-NEXT: sbbb $0, %bl 2122; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2123; SSE2-NEXT: cmpq %r9, %rax 2124; SSE2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload 2125; SSE2-NEXT: movq %r12, %r14 2126; SSE2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload 2127; SSE2-NEXT: sbbq %r15, %r14 2128; SSE2-NEXT: setb %bpl 2129; SSE2-NEXT: cmpq %rax, %r9 2130; SSE2-NEXT: sbbq %r12, %r15 2131; SSE2-NEXT: sbbb $0, %bpl 2132; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2133; SSE2-NEXT: cmpq %rsi, %rax 2134; SSE2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload 2135; SSE2-NEXT: movq %r15, %r9 2136; SSE2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload 2137; SSE2-NEXT: sbbq %r14, %r9 2138; SSE2-NEXT: setb %r9b 2139; SSE2-NEXT: cmpq %rax, %rsi 2140; SSE2-NEXT: sbbq %r15, %r14 2141; SSE2-NEXT: movq %rdi, %rax 2142; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rsi 2143; SSE2-NEXT: sbbb $0, %r9b 2144; SSE2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload 2145; SSE2-NEXT: cmpq %r15, %rsi 2146; SSE2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload 2147; SSE2-NEXT: movq %r12, %rdi 2148; SSE2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload 2149; SSE2-NEXT: sbbq %r14, %rdi 2150; SSE2-NEXT: setb %dil 2151; SSE2-NEXT: cmpq %rsi, %r15 2152; SSE2-NEXT: sbbq %r12, %r14 2153; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rsi 2154; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r14 2155; SSE2-NEXT: sbbb $0, %dil 2156; SSE2-NEXT: cmpq %rsi, %r14 2157; SSE2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload 2158; SSE2-NEXT: movq %r13, %r15 2159; SSE2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload 2160; SSE2-NEXT: sbbq %r12, %r15 2161; SSE2-NEXT: setb %r15b 2162; SSE2-NEXT: cmpq %r14, %rsi 2163; SSE2-NEXT: sbbq %r13, %r12 2164; SSE2-NEXT: sbbb $0, %r15b 2165; SSE2-NEXT: movzbl %r15b, %esi 2166; SSE2-NEXT: andl $3, %esi 2167; SSE2-NEXT: movb %sil, 4(%rax) 2168; SSE2-NEXT: movzbl %dil, %esi 2169; SSE2-NEXT: movzbl %r9b, %edi 2170; SSE2-NEXT: andl $3, %esi 2171; SSE2-NEXT: andl $3, %edi 2172; SSE2-NEXT: leaq (%rdi,%rsi,4), %rsi 2173; SSE2-NEXT: movzbl %bpl, %edi 2174; SSE2-NEXT: andl $3, %edi 2175; SSE2-NEXT: shll $4, %edi 2176; SSE2-NEXT: orq %rsi, %rdi 2177; SSE2-NEXT: movzbl %bl, %r9d 2178; SSE2-NEXT: andl $3, %r9d 2179; SSE2-NEXT: shll $6, %r9d 2180; SSE2-NEXT: orq %rdi, %r9 2181; SSE2-NEXT: movzbl %cl, %esi 2182; SSE2-NEXT: andl $3, %esi 2183; SSE2-NEXT: shll $8, %esi 2184; SSE2-NEXT: orq %r9, %rsi 2185; SSE2-NEXT: movzbl %dl, %ecx 2186; SSE2-NEXT: movzbl %r10b, %edx 2187; SSE2-NEXT: andl $3, %edx 2188; SSE2-NEXT: shll $10, %edx 2189; SSE2-NEXT: andl $3, %ecx 2190; SSE2-NEXT: shll $12, %ecx 2191; SSE2-NEXT: orq %rdx, %rcx 2192; SSE2-NEXT: movzbl %r11b, %edx 2193; SSE2-NEXT: andl $3, %edx 2194; SSE2-NEXT: shll $14, %edx 2195; SSE2-NEXT: orq %rcx, %rdx 2196; SSE2-NEXT: movzbl %r8b, %ecx 2197; SSE2-NEXT: andl $3, %ecx 2198; SSE2-NEXT: shll $16, %ecx 2199; SSE2-NEXT: orq %rdx, %rcx 2200; SSE2-NEXT: orq %rsi, %rcx 2201; SSE2-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 1-byte Folded Reload 2202; SSE2-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 1-byte Folded Reload 2203; SSE2-NEXT: andl $3, %esi 2204; SSE2-NEXT: shll $18, %esi 2205; SSE2-NEXT: andl $3, %edx 2206; SSE2-NEXT: shll $20, %edx 2207; SSE2-NEXT: orq %rsi, %rdx 2208; SSE2-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 1-byte Folded Reload 2209; SSE2-NEXT: andl $3, %esi 2210; SSE2-NEXT: shll $22, %esi 2211; SSE2-NEXT: orq %rdx, %rsi 2212; SSE2-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 1-byte Folded Reload 2213; SSE2-NEXT: andl $3, %edx 2214; SSE2-NEXT: shll $24, %edx 2215; SSE2-NEXT: orq %rsi, %rdx 2216; SSE2-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 1-byte Folded Reload 2217; SSE2-NEXT: andl $3, %esi 2218; SSE2-NEXT: shlq $26, %rsi 2219; SSE2-NEXT: orq %rdx, %rsi 2220; SSE2-NEXT: orq %rcx, %rsi 2221; SSE2-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload 2222; SSE2-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 1-byte Folded Reload 2223; SSE2-NEXT: andl $3, %edx 2224; SSE2-NEXT: shlq $28, %rdx 2225; SSE2-NEXT: andl $3, %ecx 2226; SSE2-NEXT: shlq $30, %rcx 2227; SSE2-NEXT: orq %rdx, %rcx 2228; SSE2-NEXT: orq %rsi, %rcx 2229; SSE2-NEXT: movl %ecx, (%rax) 2230; SSE2-NEXT: addq $88, %rsp 2231; SSE2-NEXT: popq %rbx 2232; SSE2-NEXT: popq %r12 2233; SSE2-NEXT: popq %r13 2234; SSE2-NEXT: popq %r14 2235; SSE2-NEXT: popq %r15 2236; SSE2-NEXT: popq %rbp 2237; SSE2-NEXT: retq 2238; 2239; AVX2-LABEL: ucmp_uncommon_vectors: 2240; AVX2: # %bb.0: 2241; AVX2-NEXT: pushq %rbp 2242; AVX2-NEXT: pushq %r15 2243; AVX2-NEXT: pushq %r14 2244; AVX2-NEXT: pushq %r13 2245; AVX2-NEXT: pushq %r12 2246; AVX2-NEXT: pushq %rbx 2247; AVX2-NEXT: subq $88, %rsp 2248; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2249; AVX2-NEXT: andl $127, %eax 2250; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2251; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2252; AVX2-NEXT: andl $127, %eax 2253; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2254; AVX2-NEXT: andl $127, %r8d 2255; AVX2-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2256; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2257; AVX2-NEXT: andl $127, %eax 2258; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2259; AVX2-NEXT: andl $127, %edx 2260; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2261; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2262; AVX2-NEXT: andl $127, %eax 2263; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2264; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2265; AVX2-NEXT: andl $127, %eax 2266; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2267; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2268; AVX2-NEXT: andl $127, %eax 2269; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2270; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2271; AVX2-NEXT: andl $127, %eax 2272; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2273; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2274; AVX2-NEXT: andl $127, %eax 2275; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2276; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2277; AVX2-NEXT: andl $127, %eax 2278; AVX2-NEXT: movq %rax, (%rsp) # 8-byte Spill 2279; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2280; AVX2-NEXT: andl $127, %eax 2281; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2282; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2283; AVX2-NEXT: andl $127, %eax 2284; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2285; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2286; AVX2-NEXT: andl $127, %eax 2287; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2288; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2289; AVX2-NEXT: andl $127, %eax 2290; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2291; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2292; AVX2-NEXT: andl $127, %eax 2293; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2294; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2295; AVX2-NEXT: andl $127, %eax 2296; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2297; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2298; AVX2-NEXT: andl $127, %eax 2299; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2300; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2301; AVX2-NEXT: andl $127, %eax 2302; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2303; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2304; AVX2-NEXT: andl $127, %eax 2305; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2306; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2307; AVX2-NEXT: andl $127, %eax 2308; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2309; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2310; AVX2-NEXT: andl $127, %eax 2311; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2312; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2313; AVX2-NEXT: andl $127, %eax 2314; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2315; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2316; AVX2-NEXT: andl $127, %eax 2317; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2318; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2319; AVX2-NEXT: andl $127, %eax 2320; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2321; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2322; AVX2-NEXT: andl $127, %eax 2323; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2324; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r15 2325; AVX2-NEXT: andl $127, %r15d 2326; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2327; AVX2-NEXT: andl $127, %eax 2328; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r14 2329; AVX2-NEXT: andl $127, %r14d 2330; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rdx 2331; AVX2-NEXT: andl $127, %edx 2332; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rbp 2333; AVX2-NEXT: andl $127, %ebp 2334; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r8 2335; AVX2-NEXT: andl $127, %r8d 2336; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r12 2337; AVX2-NEXT: andl $127, %r12d 2338; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r13 2339; AVX2-NEXT: andl $127, %r13d 2340; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rbx 2341; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r11 2342; AVX2-NEXT: cmpq %rbx, %r11 2343; AVX2-NEXT: movq %r13, %r10 2344; AVX2-NEXT: sbbq %r12, %r10 2345; AVX2-NEXT: setb %r10b 2346; AVX2-NEXT: cmpq %r11, %rbx 2347; AVX2-NEXT: sbbq %r13, %r12 2348; AVX2-NEXT: sbbb $0, %r10b 2349; AVX2-NEXT: movb %r10b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 2350; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r10 2351; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r11 2352; AVX2-NEXT: cmpq %r10, %r11 2353; AVX2-NEXT: movq %r8, %rbx 2354; AVX2-NEXT: sbbq %rbp, %rbx 2355; AVX2-NEXT: setb %bl 2356; AVX2-NEXT: cmpq %r11, %r10 2357; AVX2-NEXT: sbbq %r8, %rbp 2358; AVX2-NEXT: sbbb $0, %bl 2359; AVX2-NEXT: movb %bl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 2360; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r8 2361; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r10 2362; AVX2-NEXT: cmpq %r8, %r10 2363; AVX2-NEXT: movq %rdx, %r11 2364; AVX2-NEXT: sbbq %r14, %r11 2365; AVX2-NEXT: setb %r11b 2366; AVX2-NEXT: cmpq %r10, %r8 2367; AVX2-NEXT: sbbq %rdx, %r14 2368; AVX2-NEXT: sbbb $0, %r11b 2369; AVX2-NEXT: movb %r11b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 2370; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rdx 2371; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r8 2372; AVX2-NEXT: cmpq %rdx, %r8 2373; AVX2-NEXT: movq %rax, %r10 2374; AVX2-NEXT: sbbq %r15, %r10 2375; AVX2-NEXT: setb %r10b 2376; AVX2-NEXT: cmpq %r8, %rdx 2377; AVX2-NEXT: sbbq %rax, %r15 2378; AVX2-NEXT: sbbb $0, %r10b 2379; AVX2-NEXT: movb %r10b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 2380; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2381; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rdx 2382; AVX2-NEXT: cmpq %rax, %rdx 2383; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload 2384; AVX2-NEXT: movq %r11, %r8 2385; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload 2386; AVX2-NEXT: sbbq %r10, %r8 2387; AVX2-NEXT: setb %r8b 2388; AVX2-NEXT: cmpq %rdx, %rax 2389; AVX2-NEXT: sbbq %r11, %r10 2390; AVX2-NEXT: sbbb $0, %r8b 2391; AVX2-NEXT: movb %r8b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 2392; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2393; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rdx 2394; AVX2-NEXT: cmpq %rax, %rdx 2395; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload 2396; AVX2-NEXT: movq %r11, %r8 2397; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload 2398; AVX2-NEXT: sbbq %r10, %r8 2399; AVX2-NEXT: setb %r8b 2400; AVX2-NEXT: cmpq %rdx, %rax 2401; AVX2-NEXT: sbbq %r11, %r10 2402; AVX2-NEXT: sbbb $0, %r8b 2403; AVX2-NEXT: movb %r8b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 2404; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2405; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rdx 2406; AVX2-NEXT: cmpq %rax, %rdx 2407; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload 2408; AVX2-NEXT: movq %r11, %r8 2409; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload 2410; AVX2-NEXT: sbbq %r10, %r8 2411; AVX2-NEXT: setb %r8b 2412; AVX2-NEXT: cmpq %rdx, %rax 2413; AVX2-NEXT: sbbq %r11, %r10 2414; AVX2-NEXT: sbbb $0, %r8b 2415; AVX2-NEXT: movb %r8b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 2416; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2417; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rdx 2418; AVX2-NEXT: cmpq %rax, %rdx 2419; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload 2420; AVX2-NEXT: movq %r11, %r8 2421; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload 2422; AVX2-NEXT: sbbq %r10, %r8 2423; AVX2-NEXT: setb %r12b 2424; AVX2-NEXT: cmpq %rdx, %rax 2425; AVX2-NEXT: sbbq %r11, %r10 2426; AVX2-NEXT: sbbb $0, %r12b 2427; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2428; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rdx 2429; AVX2-NEXT: cmpq %rax, %rdx 2430; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload 2431; AVX2-NEXT: movq %r11, %r8 2432; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload 2433; AVX2-NEXT: sbbq %r10, %r8 2434; AVX2-NEXT: setb %r8b 2435; AVX2-NEXT: cmpq %rdx, %rax 2436; AVX2-NEXT: sbbq %r11, %r10 2437; AVX2-NEXT: sbbb $0, %r8b 2438; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2439; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r10 2440; AVX2-NEXT: cmpq %rax, %r10 2441; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload 2442; AVX2-NEXT: movq %rbx, %rdx 2443; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload 2444; AVX2-NEXT: sbbq %r11, %rdx 2445; AVX2-NEXT: setb %dl 2446; AVX2-NEXT: cmpq %r10, %rax 2447; AVX2-NEXT: sbbq %rbx, %r11 2448; AVX2-NEXT: sbbb $0, %dl 2449; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2450; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r11 2451; AVX2-NEXT: cmpq %rax, %r11 2452; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload 2453; AVX2-NEXT: movq %r14, %r10 2454; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload 2455; AVX2-NEXT: sbbq %rbx, %r10 2456; AVX2-NEXT: setb %r10b 2457; AVX2-NEXT: cmpq %r11, %rax 2458; AVX2-NEXT: sbbq %r14, %rbx 2459; AVX2-NEXT: sbbb $0, %r10b 2460; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2461; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rbx 2462; AVX2-NEXT: cmpq %rax, %rbx 2463; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload 2464; AVX2-NEXT: movq %r15, %r11 2465; AVX2-NEXT: movq (%rsp), %r14 # 8-byte Reload 2466; AVX2-NEXT: sbbq %r14, %r11 2467; AVX2-NEXT: setb %r11b 2468; AVX2-NEXT: cmpq %rbx, %rax 2469; AVX2-NEXT: sbbq %r15, %r14 2470; AVX2-NEXT: sbbb $0, %r11b 2471; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2472; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r14 2473; AVX2-NEXT: cmpq %rax, %r14 2474; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload 2475; AVX2-NEXT: movq %r13, %rbx 2476; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload 2477; AVX2-NEXT: sbbq %r15, %rbx 2478; AVX2-NEXT: setb %bl 2479; AVX2-NEXT: cmpq %r14, %rax 2480; AVX2-NEXT: sbbq %r13, %r15 2481; AVX2-NEXT: sbbb $0, %bl 2482; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2483; AVX2-NEXT: cmpq %r9, %rax 2484; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload 2485; AVX2-NEXT: movq %r13, %r14 2486; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload 2487; AVX2-NEXT: sbbq %r15, %r14 2488; AVX2-NEXT: setb %bpl 2489; AVX2-NEXT: cmpq %rax, %r9 2490; AVX2-NEXT: sbbq %r13, %r15 2491; AVX2-NEXT: sbbb $0, %bpl 2492; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2493; AVX2-NEXT: cmpq %rsi, %rax 2494; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload 2495; AVX2-NEXT: movq %r15, %r9 2496; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload 2497; AVX2-NEXT: sbbq %r14, %r9 2498; AVX2-NEXT: setb %r9b 2499; AVX2-NEXT: cmpq %rax, %rsi 2500; AVX2-NEXT: sbbq %r15, %r14 2501; AVX2-NEXT: sbbb $0, %r9b 2502; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2503; AVX2-NEXT: cmpq %rcx, %rax 2504; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload 2505; AVX2-NEXT: movq %r15, %rsi 2506; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload 2507; AVX2-NEXT: sbbq %r14, %rsi 2508; AVX2-NEXT: setb %sil 2509; AVX2-NEXT: cmpq %rax, %rcx 2510; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax 2511; AVX2-NEXT: sbbq %r15, %r14 2512; AVX2-NEXT: sbbb $0, %sil 2513; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rcx 2514; AVX2-NEXT: cmpq %rax, %rcx 2515; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload 2516; AVX2-NEXT: movq %r13, %r14 2517; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload 2518; AVX2-NEXT: sbbq %r15, %r14 2519; AVX2-NEXT: setb %r14b 2520; AVX2-NEXT: cmpq %rcx, %rax 2521; AVX2-NEXT: sbbq %r13, %r15 2522; AVX2-NEXT: movq %rdi, %rax 2523; AVX2-NEXT: sbbb $0, %r14b 2524; AVX2-NEXT: movzbl %r14b, %ecx 2525; AVX2-NEXT: andl $3, %ecx 2526; AVX2-NEXT: movb %cl, 4(%rdi) 2527; AVX2-NEXT: movzbl %sil, %ecx 2528; AVX2-NEXT: andl $3, %ecx 2529; AVX2-NEXT: movzbl %r9b, %esi 2530; AVX2-NEXT: andl $3, %esi 2531; AVX2-NEXT: leaq (%rsi,%rcx,4), %rcx 2532; AVX2-NEXT: movzbl %bpl, %esi 2533; AVX2-NEXT: andl $3, %esi 2534; AVX2-NEXT: shll $4, %esi 2535; AVX2-NEXT: orq %rcx, %rsi 2536; AVX2-NEXT: movzbl %bl, %ecx 2537; AVX2-NEXT: andl $3, %ecx 2538; AVX2-NEXT: shll $6, %ecx 2539; AVX2-NEXT: orq %rsi, %rcx 2540; AVX2-NEXT: movzbl %r11b, %esi 2541; AVX2-NEXT: andl $3, %esi 2542; AVX2-NEXT: shll $8, %esi 2543; AVX2-NEXT: orq %rcx, %rsi 2544; AVX2-NEXT: movzbl %r10b, %ecx 2545; AVX2-NEXT: andl $3, %ecx 2546; AVX2-NEXT: shll $10, %ecx 2547; AVX2-NEXT: movzbl %dl, %edx 2548; AVX2-NEXT: andl $3, %edx 2549; AVX2-NEXT: shll $12, %edx 2550; AVX2-NEXT: orq %rcx, %rdx 2551; AVX2-NEXT: movzbl %r8b, %edi 2552; AVX2-NEXT: andl $3, %edi 2553; AVX2-NEXT: shll $14, %edi 2554; AVX2-NEXT: orq %rdx, %rdi 2555; AVX2-NEXT: movzbl %r12b, %ecx 2556; AVX2-NEXT: andl $3, %ecx 2557; AVX2-NEXT: shll $16, %ecx 2558; AVX2-NEXT: orq %rdi, %rcx 2559; AVX2-NEXT: orq %rsi, %rcx 2560; AVX2-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 1-byte Folded Reload 2561; AVX2-NEXT: andl $3, %edx 2562; AVX2-NEXT: shll $18, %edx 2563; AVX2-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 1-byte Folded Reload 2564; AVX2-NEXT: andl $3, %esi 2565; AVX2-NEXT: shll $20, %esi 2566; AVX2-NEXT: orq %rdx, %rsi 2567; AVX2-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 1-byte Folded Reload 2568; AVX2-NEXT: andl $3, %edx 2569; AVX2-NEXT: shll $22, %edx 2570; AVX2-NEXT: orq %rsi, %rdx 2571; AVX2-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 1-byte Folded Reload 2572; AVX2-NEXT: andl $3, %esi 2573; AVX2-NEXT: shll $24, %esi 2574; AVX2-NEXT: orq %rdx, %rsi 2575; AVX2-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 1-byte Folded Reload 2576; AVX2-NEXT: andl $3, %edx 2577; AVX2-NEXT: shlq $26, %rdx 2578; AVX2-NEXT: orq %rsi, %rdx 2579; AVX2-NEXT: orq %rcx, %rdx 2580; AVX2-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload 2581; AVX2-NEXT: andl $3, %ecx 2582; AVX2-NEXT: shlq $28, %rcx 2583; AVX2-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 1-byte Folded Reload 2584; AVX2-NEXT: andl $3, %esi 2585; AVX2-NEXT: shlq $30, %rsi 2586; AVX2-NEXT: orq %rcx, %rsi 2587; AVX2-NEXT: orq %rdx, %rsi 2588; AVX2-NEXT: movl %esi, (%rax) 2589; AVX2-NEXT: addq $88, %rsp 2590; AVX2-NEXT: popq %rbx 2591; AVX2-NEXT: popq %r12 2592; AVX2-NEXT: popq %r13 2593; AVX2-NEXT: popq %r14 2594; AVX2-NEXT: popq %r15 2595; AVX2-NEXT: popq %rbp 2596; AVX2-NEXT: retq 2597; 2598; AVX512-LABEL: ucmp_uncommon_vectors: 2599; AVX512: # %bb.0: 2600; AVX512-NEXT: pushq %rbp 2601; AVX512-NEXT: pushq %r15 2602; AVX512-NEXT: pushq %r14 2603; AVX512-NEXT: pushq %r13 2604; AVX512-NEXT: pushq %r12 2605; AVX512-NEXT: pushq %rbx 2606; AVX512-NEXT: subq $88, %rsp 2607; AVX512-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2608; AVX512-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2609; AVX512-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2610; AVX512-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2611; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax 2612; AVX512-NEXT: andl $127, %eax 2613; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2614; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax 2615; AVX512-NEXT: andl $127, %eax 2616; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2617; AVX512-NEXT: andl $127, %r8d 2618; AVX512-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2619; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax 2620; AVX512-NEXT: andl $127, %eax 2621; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2622; AVX512-NEXT: andl $127, %edx 2623; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2624; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax 2625; AVX512-NEXT: andl $127, %eax 2626; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2627; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax 2628; AVX512-NEXT: andl $127, %eax 2629; AVX512-NEXT: movq %rax, (%rsp) # 8-byte Spill 2630; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax 2631; AVX512-NEXT: andl $127, %eax 2632; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2633; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax 2634; AVX512-NEXT: andl $127, %eax 2635; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2636; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax 2637; AVX512-NEXT: andl $127, %eax 2638; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2639; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax 2640; AVX512-NEXT: andl $127, %eax 2641; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2642; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax 2643; AVX512-NEXT: andl $127, %eax 2644; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2645; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax 2646; AVX512-NEXT: andl $127, %eax 2647; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2648; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax 2649; AVX512-NEXT: andl $127, %eax 2650; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2651; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax 2652; AVX512-NEXT: andl $127, %eax 2653; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2654; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax 2655; AVX512-NEXT: andl $127, %eax 2656; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2657; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax 2658; AVX512-NEXT: andl $127, %eax 2659; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2660; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax 2661; AVX512-NEXT: andl $127, %eax 2662; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2663; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax 2664; AVX512-NEXT: andl $127, %eax 2665; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2666; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax 2667; AVX512-NEXT: andl $127, %eax 2668; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2669; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax 2670; AVX512-NEXT: andl $127, %eax 2671; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2672; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax 2673; AVX512-NEXT: andl $127, %eax 2674; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2675; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rbp 2676; AVX512-NEXT: andl $127, %ebp 2677; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r12 2678; AVX512-NEXT: andl $127, %r12d 2679; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r13 2680; AVX512-NEXT: andl $127, %r13d 2681; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r15 2682; AVX512-NEXT: andl $127, %r15d 2683; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10 2684; AVX512-NEXT: andl $127, %r10d 2685; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rbx 2686; AVX512-NEXT: andl $127, %ebx 2687; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r8 2688; AVX512-NEXT: andl $127, %r8d 2689; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r9 2690; AVX512-NEXT: andl $127, %r9d 2691; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rsi 2692; AVX512-NEXT: andl $127, %esi 2693; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdi 2694; AVX512-NEXT: andl $127, %edi 2695; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax 2696; AVX512-NEXT: andl $127, %eax 2697; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdx 2698; AVX512-NEXT: andl $127, %edx 2699; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r14 2700; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11 2701; AVX512-NEXT: cmpq %r14, %r11 2702; AVX512-NEXT: movq %rdx, %rcx 2703; AVX512-NEXT: sbbq %rax, %rcx 2704; AVX512-NEXT: setb %cl 2705; AVX512-NEXT: cmpq %r11, %r14 2706; AVX512-NEXT: sbbq %rdx, %rax 2707; AVX512-NEXT: sbbb $0, %cl 2708; AVX512-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 2709; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax 2710; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rcx 2711; AVX512-NEXT: cmpq %rax, %rcx 2712; AVX512-NEXT: movq %rdi, %rdx 2713; AVX512-NEXT: sbbq %rsi, %rdx 2714; AVX512-NEXT: setb %dl 2715; AVX512-NEXT: cmpq %rcx, %rax 2716; AVX512-NEXT: sbbq %rdi, %rsi 2717; AVX512-NEXT: sbbb $0, %dl 2718; AVX512-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 2719; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax 2720; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rcx 2721; AVX512-NEXT: cmpq %rax, %rcx 2722; AVX512-NEXT: movq %r9, %rdx 2723; AVX512-NEXT: sbbq %r8, %rdx 2724; AVX512-NEXT: setb %dl 2725; AVX512-NEXT: cmpq %rcx, %rax 2726; AVX512-NEXT: sbbq %r9, %r8 2727; AVX512-NEXT: sbbb $0, %dl 2728; AVX512-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 2729; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax 2730; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rcx 2731; AVX512-NEXT: cmpq %rax, %rcx 2732; AVX512-NEXT: movq %rbx, %rdx 2733; AVX512-NEXT: sbbq %r10, %rdx 2734; AVX512-NEXT: setb %dl 2735; AVX512-NEXT: cmpq %rcx, %rax 2736; AVX512-NEXT: sbbq %rbx, %r10 2737; AVX512-NEXT: sbbb $0, %dl 2738; AVX512-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 2739; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax 2740; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rcx 2741; AVX512-NEXT: cmpq %rax, %rcx 2742; AVX512-NEXT: movq %r15, %rdx 2743; AVX512-NEXT: sbbq %r13, %rdx 2744; AVX512-NEXT: setb %dl 2745; AVX512-NEXT: cmpq %rcx, %rax 2746; AVX512-NEXT: sbbq %r15, %r13 2747; AVX512-NEXT: sbbb $0, %dl 2748; AVX512-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 2749; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax 2750; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rcx 2751; AVX512-NEXT: cmpq %rax, %rcx 2752; AVX512-NEXT: movq %r12, %rdx 2753; AVX512-NEXT: sbbq %rbp, %rdx 2754; AVX512-NEXT: setb %dl 2755; AVX512-NEXT: cmpq %rcx, %rax 2756; AVX512-NEXT: sbbq %r12, %rbp 2757; AVX512-NEXT: sbbb $0, %dl 2758; AVX512-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 2759; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax 2760; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rcx 2761; AVX512-NEXT: cmpq %rax, %rcx 2762; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload 2763; AVX512-NEXT: movq %rdi, %rdx 2764; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload 2765; AVX512-NEXT: sbbq %rsi, %rdx 2766; AVX512-NEXT: setb %r13b 2767; AVX512-NEXT: cmpq %rcx, %rax 2768; AVX512-NEXT: sbbq %rdi, %rsi 2769; AVX512-NEXT: sbbb $0, %r13b 2770; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax 2771; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rcx 2772; AVX512-NEXT: cmpq %rax, %rcx 2773; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload 2774; AVX512-NEXT: movq %rdi, %rdx 2775; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload 2776; AVX512-NEXT: sbbq %rsi, %rdx 2777; AVX512-NEXT: setb %bpl 2778; AVX512-NEXT: cmpq %rcx, %rax 2779; AVX512-NEXT: sbbq %rdi, %rsi 2780; AVX512-NEXT: sbbb $0, %bpl 2781; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rcx 2782; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdx 2783; AVX512-NEXT: cmpq %rcx, %rdx 2784; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload 2785; AVX512-NEXT: movq %rdi, %rax 2786; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload 2787; AVX512-NEXT: sbbq %rsi, %rax 2788; AVX512-NEXT: setb %r9b 2789; AVX512-NEXT: cmpq %rdx, %rcx 2790; AVX512-NEXT: sbbq %rdi, %rsi 2791; AVX512-NEXT: sbbb $0, %r9b 2792; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdx 2793; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rsi 2794; AVX512-NEXT: cmpq %rdx, %rsi 2795; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload 2796; AVX512-NEXT: movq %rdi, %rcx 2797; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload 2798; AVX512-NEXT: sbbq %rax, %rcx 2799; AVX512-NEXT: setb %cl 2800; AVX512-NEXT: cmpq %rsi, %rdx 2801; AVX512-NEXT: sbbq %rdi, %rax 2802; AVX512-NEXT: sbbb $0, %cl 2803; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rsi 2804; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdi 2805; AVX512-NEXT: cmpq %rsi, %rdi 2806; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload 2807; AVX512-NEXT: movq %r8, %rdx 2808; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload 2809; AVX512-NEXT: sbbq %rax, %rdx 2810; AVX512-NEXT: setb %dl 2811; AVX512-NEXT: cmpq %rdi, %rsi 2812; AVX512-NEXT: sbbq %r8, %rax 2813; AVX512-NEXT: sbbb $0, %dl 2814; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdi 2815; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r8 2816; AVX512-NEXT: cmpq %rdi, %r8 2817; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload 2818; AVX512-NEXT: movq %r10, %rsi 2819; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload 2820; AVX512-NEXT: sbbq %rax, %rsi 2821; AVX512-NEXT: setb %sil 2822; AVX512-NEXT: cmpq %r8, %rdi 2823; AVX512-NEXT: sbbq %r10, %rax 2824; AVX512-NEXT: sbbb $0, %sil 2825; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r8 2826; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10 2827; AVX512-NEXT: cmpq %r8, %r10 2828; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload 2829; AVX512-NEXT: movq %r11, %rdi 2830; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload 2831; AVX512-NEXT: sbbq %rax, %rdi 2832; AVX512-NEXT: setb %dil 2833; AVX512-NEXT: cmpq %r10, %r8 2834; AVX512-NEXT: sbbq %r11, %rax 2835; AVX512-NEXT: sbbb $0, %dil 2836; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10 2837; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload 2838; AVX512-NEXT: cmpq %rax, %r10 2839; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload 2840; AVX512-NEXT: movq %rbx, %r8 2841; AVX512-NEXT: movq (%rsp), %r11 # 8-byte Reload 2842; AVX512-NEXT: sbbq %r11, %r8 2843; AVX512-NEXT: setb %r8b 2844; AVX512-NEXT: cmpq %r10, %rax 2845; AVX512-NEXT: sbbq %rbx, %r11 2846; AVX512-NEXT: sbbb $0, %r8b 2847; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11 2848; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload 2849; AVX512-NEXT: cmpq %rbx, %r11 2850; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload 2851; AVX512-NEXT: movq %r14, %r10 2852; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload 2853; AVX512-NEXT: sbbq %rax, %r10 2854; AVX512-NEXT: setb %r10b 2855; AVX512-NEXT: cmpq %r11, %rbx 2856; AVX512-NEXT: sbbq %r14, %rax 2857; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11 2858; AVX512-NEXT: sbbb $0, %r10b 2859; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload 2860; AVX512-NEXT: cmpq %r15, %r11 2861; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload 2862; AVX512-NEXT: movq %rax, %rbx 2863; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload 2864; AVX512-NEXT: sbbq %r14, %rbx 2865; AVX512-NEXT: setb %bl 2866; AVX512-NEXT: cmpq %r11, %r15 2867; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11 2868; AVX512-NEXT: sbbq %rax, %r14 2869; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r14 2870; AVX512-NEXT: sbbb $0, %bl 2871; AVX512-NEXT: cmpq %r11, %r14 2872; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload 2873; AVX512-NEXT: movq %rax, %r15 2874; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload 2875; AVX512-NEXT: sbbq %r12, %r15 2876; AVX512-NEXT: setb %r15b 2877; AVX512-NEXT: cmpq %r14, %r11 2878; AVX512-NEXT: sbbq %rax, %r12 2879; AVX512-NEXT: sbbb $0, %r15b 2880; AVX512-NEXT: movzbl %r15b, %r11d 2881; AVX512-NEXT: andl $3, %r11d 2882; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload 2883; AVX512-NEXT: movb %r11b, 4(%r14) 2884; AVX512-NEXT: movzbl %bl, %r11d 2885; AVX512-NEXT: andl $3, %r11d 2886; AVX512-NEXT: movzbl %r10b, %r10d 2887; AVX512-NEXT: andl $3, %r10d 2888; AVX512-NEXT: leaq (%r10,%r11,4), %r10 2889; AVX512-NEXT: movzbl %r8b, %r8d 2890; AVX512-NEXT: andl $3, %r8d 2891; AVX512-NEXT: shll $4, %r8d 2892; AVX512-NEXT: orq %r10, %r8 2893; AVX512-NEXT: movzbl %dil, %edi 2894; AVX512-NEXT: andl $3, %edi 2895; AVX512-NEXT: shll $6, %edi 2896; AVX512-NEXT: orq %r8, %rdi 2897; AVX512-NEXT: movzbl %sil, %esi 2898; AVX512-NEXT: andl $3, %esi 2899; AVX512-NEXT: shll $8, %esi 2900; AVX512-NEXT: orq %rdi, %rsi 2901; AVX512-NEXT: movzbl %dl, %edx 2902; AVX512-NEXT: andl $3, %edx 2903; AVX512-NEXT: shll $10, %edx 2904; AVX512-NEXT: movzbl %cl, %ecx 2905; AVX512-NEXT: andl $3, %ecx 2906; AVX512-NEXT: shll $12, %ecx 2907; AVX512-NEXT: orq %rdx, %rcx 2908; AVX512-NEXT: movzbl %r9b, %edx 2909; AVX512-NEXT: andl $3, %edx 2910; AVX512-NEXT: shll $14, %edx 2911; AVX512-NEXT: orq %rcx, %rdx 2912; AVX512-NEXT: movzbl %bpl, %eax 2913; AVX512-NEXT: andl $3, %eax 2914; AVX512-NEXT: shll $16, %eax 2915; AVX512-NEXT: orq %rdx, %rax 2916; AVX512-NEXT: orq %rsi, %rax 2917; AVX512-NEXT: movzbl %r13b, %ecx 2918; AVX512-NEXT: andl $3, %ecx 2919; AVX512-NEXT: shll $18, %ecx 2920; AVX512-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 1-byte Folded Reload 2921; AVX512-NEXT: andl $3, %edx 2922; AVX512-NEXT: shll $20, %edx 2923; AVX512-NEXT: orq %rcx, %rdx 2924; AVX512-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload 2925; AVX512-NEXT: andl $3, %ecx 2926; AVX512-NEXT: shll $22, %ecx 2927; AVX512-NEXT: orq %rdx, %rcx 2928; AVX512-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 1-byte Folded Reload 2929; AVX512-NEXT: andl $3, %edx 2930; AVX512-NEXT: shll $24, %edx 2931; AVX512-NEXT: orq %rcx, %rdx 2932; AVX512-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload 2933; AVX512-NEXT: andl $3, %ecx 2934; AVX512-NEXT: shlq $26, %rcx 2935; AVX512-NEXT: orq %rdx, %rcx 2936; AVX512-NEXT: orq %rax, %rcx 2937; AVX512-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 2938; AVX512-NEXT: andl $3, %eax 2939; AVX512-NEXT: shlq $28, %rax 2940; AVX512-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 1-byte Folded Reload 2941; AVX512-NEXT: andl $3, %edx 2942; AVX512-NEXT: shlq $30, %rdx 2943; AVX512-NEXT: orq %rax, %rdx 2944; AVX512-NEXT: orq %rcx, %rdx 2945; AVX512-NEXT: movq %r14, %rax 2946; AVX512-NEXT: movl %edx, (%r14) 2947; AVX512-NEXT: addq $88, %rsp 2948; AVX512-NEXT: popq %rbx 2949; AVX512-NEXT: popq %r12 2950; AVX512-NEXT: popq %r13 2951; AVX512-NEXT: popq %r14 2952; AVX512-NEXT: popq %r15 2953; AVX512-NEXT: popq %rbp 2954; AVX512-NEXT: retq 2955; 2956; X86-LABEL: ucmp_uncommon_vectors: 2957; X86: # %bb.0: 2958; X86-NEXT: pushl %ebp 2959; X86-NEXT: pushl %ebx 2960; X86-NEXT: pushl %edi 2961; X86-NEXT: pushl %esi 2962; X86-NEXT: subl $132, %esp 2963; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 2964; X86-NEXT: andl $127, %eax 2965; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 2966; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 2967; X86-NEXT: andl $127, %eax 2968; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 2969; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 2970; X86-NEXT: andl $127, %eax 2971; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 2972; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 2973; X86-NEXT: andl $127, %eax 2974; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 2975; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 2976; X86-NEXT: andl $127, %eax 2977; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 2978; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 2979; X86-NEXT: andl $127, %eax 2980; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 2981; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 2982; X86-NEXT: andl $127, %eax 2983; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 2984; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 2985; X86-NEXT: andl $127, %eax 2986; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 2987; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 2988; X86-NEXT: andl $127, %eax 2989; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 2990; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 2991; X86-NEXT: andl $127, %eax 2992; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 2993; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 2994; X86-NEXT: andl $127, %eax 2995; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 2996; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 2997; X86-NEXT: andl $127, %eax 2998; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 2999; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 3000; X86-NEXT: andl $127, %eax 3001; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 3002; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 3003; X86-NEXT: andl $127, %eax 3004; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 3005; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 3006; X86-NEXT: andl $127, %eax 3007; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 3008; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 3009; X86-NEXT: andl $127, %eax 3010; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 3011; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 3012; X86-NEXT: andl $127, %eax 3013; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 3014; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 3015; X86-NEXT: andl $127, %eax 3016; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 3017; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 3018; X86-NEXT: andl $127, %eax 3019; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 3020; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 3021; X86-NEXT: andl $127, %eax 3022; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 3023; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 3024; X86-NEXT: andl $127, %eax 3025; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 3026; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 3027; X86-NEXT: andl $127, %eax 3028; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 3029; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 3030; X86-NEXT: andl $127, %eax 3031; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 3032; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 3033; X86-NEXT: andl $127, %eax 3034; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 3035; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 3036; X86-NEXT: andl $127, %eax 3037; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 3038; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 3039; X86-NEXT: andl $127, %eax 3040; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 3041; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 3042; X86-NEXT: andl $127, %eax 3043; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 3044; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 3045; X86-NEXT: andl $127, %eax 3046; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 3047; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 3048; X86-NEXT: andl $127, %eax 3049; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 3050; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 3051; X86-NEXT: andl $127, %eax 3052; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 3053; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp 3054; X86-NEXT: andl $127, %ebp 3055; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 3056; X86-NEXT: andl $127, %eax 3057; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 3058; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 3059; X86-NEXT: andl $127, %edx 3060; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 3061; X86-NEXT: andl $127, %edi 3062; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 3063; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill 3064; X86-NEXT: cmpl %eax, {{[0-9]+}}(%esp) 3065; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 3066; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx 3067; X86-NEXT: movl %ebx, %esi 3068; X86-NEXT: sbbl %eax, %esi 3069; X86-NEXT: movl %edi, %esi 3070; X86-NEXT: sbbl %edx, %esi 3071; X86-NEXT: movl $0, %esi 3072; X86-NEXT: sbbl %esi, %esi 3073; X86-NEXT: setb %cl 3074; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 3075; X86-NEXT: cmpl {{[0-9]+}}(%esp), %esi 3076; X86-NEXT: sbbl %ebx, %eax 3077; X86-NEXT: sbbl %edi, %edx 3078; X86-NEXT: movl $0, %eax 3079; X86-NEXT: sbbl %eax, %eax 3080; X86-NEXT: sbbb $0, %cl 3081; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 3082; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 3083; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx 3084; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 3085; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 3086; X86-NEXT: movl %esi, %edi 3087; X86-NEXT: sbbl %edx, %edi 3088; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 3089; X86-NEXT: movl %eax, %edi 3090; X86-NEXT: sbbl %ebp, %edi 3091; X86-NEXT: movl $0, %edi 3092; X86-NEXT: sbbl %edi, %edi 3093; X86-NEXT: setb %bl 3094; X86-NEXT: cmpl %ecx, {{[0-9]+}}(%esp) 3095; X86-NEXT: sbbl %esi, %edx 3096; X86-NEXT: sbbl %eax, %ebp 3097; X86-NEXT: movl $0, %eax 3098; X86-NEXT: sbbl %eax, %eax 3099; X86-NEXT: sbbb $0, %bl 3100; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 3101; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 3102; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx 3103; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 3104; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 3105; X86-NEXT: movl %esi, %edi 3106; X86-NEXT: sbbl %edx, %edi 3107; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 3108; X86-NEXT: movl %eax, %edi 3109; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload 3110; X86-NEXT: sbbl %ebp, %edi 3111; X86-NEXT: movl $0, %edi 3112; X86-NEXT: sbbl %edi, %edi 3113; X86-NEXT: setb %bl 3114; X86-NEXT: cmpl %ecx, {{[0-9]+}}(%esp) 3115; X86-NEXT: sbbl %esi, %edx 3116; X86-NEXT: sbbl %eax, %ebp 3117; X86-NEXT: movl $0, %eax 3118; X86-NEXT: sbbl %eax, %eax 3119; X86-NEXT: sbbb $0, %bl 3120; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 3121; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 3122; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx 3123; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 3124; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 3125; X86-NEXT: movl %esi, %edi 3126; X86-NEXT: sbbl %edx, %edi 3127; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 3128; X86-NEXT: movl %eax, %edi 3129; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload 3130; X86-NEXT: sbbl %ebp, %edi 3131; X86-NEXT: movl $0, %edi 3132; X86-NEXT: sbbl %edi, %edi 3133; X86-NEXT: setb %bl 3134; X86-NEXT: cmpl %ecx, {{[0-9]+}}(%esp) 3135; X86-NEXT: sbbl %esi, %edx 3136; X86-NEXT: sbbl %eax, %ebp 3137; X86-NEXT: movl $0, %eax 3138; X86-NEXT: sbbl %eax, %eax 3139; X86-NEXT: sbbb $0, %bl 3140; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 3141; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 3142; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx 3143; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 3144; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 3145; X86-NEXT: movl %esi, %edi 3146; X86-NEXT: sbbl %edx, %edi 3147; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 3148; X86-NEXT: movl %eax, %edi 3149; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload 3150; X86-NEXT: sbbl %ebp, %edi 3151; X86-NEXT: movl $0, %edi 3152; X86-NEXT: sbbl %edi, %edi 3153; X86-NEXT: setb %bl 3154; X86-NEXT: cmpl %ecx, {{[0-9]+}}(%esp) 3155; X86-NEXT: sbbl %esi, %edx 3156; X86-NEXT: sbbl %eax, %ebp 3157; X86-NEXT: movl $0, %eax 3158; X86-NEXT: sbbl %eax, %eax 3159; X86-NEXT: sbbb $0, %bl 3160; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 3161; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 3162; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx 3163; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 3164; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 3165; X86-NEXT: movl %esi, %edi 3166; X86-NEXT: sbbl %edx, %edi 3167; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 3168; X86-NEXT: movl %eax, %edi 3169; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload 3170; X86-NEXT: sbbl %ebp, %edi 3171; X86-NEXT: movl $0, %edi 3172; X86-NEXT: sbbl %edi, %edi 3173; X86-NEXT: setb %bl 3174; X86-NEXT: cmpl %ecx, {{[0-9]+}}(%esp) 3175; X86-NEXT: sbbl %esi, %edx 3176; X86-NEXT: sbbl %eax, %ebp 3177; X86-NEXT: movl $0, %eax 3178; X86-NEXT: sbbl %eax, %eax 3179; X86-NEXT: sbbb $0, %bl 3180; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 3181; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 3182; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx 3183; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 3184; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 3185; X86-NEXT: movl %esi, %edi 3186; X86-NEXT: sbbl %edx, %edi 3187; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 3188; X86-NEXT: movl %eax, %edi 3189; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload 3190; X86-NEXT: sbbl %ebp, %edi 3191; X86-NEXT: movl $0, %edi 3192; X86-NEXT: sbbl %edi, %edi 3193; X86-NEXT: setb %bl 3194; X86-NEXT: cmpl %ecx, {{[0-9]+}}(%esp) 3195; X86-NEXT: sbbl %esi, %edx 3196; X86-NEXT: sbbl %eax, %ebp 3197; X86-NEXT: movl $0, %eax 3198; X86-NEXT: sbbl %eax, %eax 3199; X86-NEXT: sbbb $0, %bl 3200; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 3201; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 3202; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax 3203; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 3204; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 3205; X86-NEXT: movl %esi, %edi 3206; X86-NEXT: sbbl %edx, %edi 3207; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload 3208; X86-NEXT: movl %ebp, %edi 3209; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload 3210; X86-NEXT: sbbl %ebx, %edi 3211; X86-NEXT: movl $0, %edi 3212; X86-NEXT: sbbl %edi, %edi 3213; X86-NEXT: setb %cl 3214; X86-NEXT: cmpl %eax, {{[0-9]+}}(%esp) 3215; X86-NEXT: sbbl %esi, %edx 3216; X86-NEXT: sbbl %ebp, %ebx 3217; X86-NEXT: movl $0, %eax 3218; X86-NEXT: sbbl %eax, %eax 3219; X86-NEXT: sbbb $0, %cl 3220; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 3221; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 3222; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax 3223; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 3224; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 3225; X86-NEXT: movl %esi, %edi 3226; X86-NEXT: sbbl %edx, %edi 3227; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload 3228; X86-NEXT: movl %ebp, %edi 3229; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload 3230; X86-NEXT: sbbl %ebx, %edi 3231; X86-NEXT: movl $0, %edi 3232; X86-NEXT: sbbl %edi, %edi 3233; X86-NEXT: setb %cl 3234; X86-NEXT: cmpl %eax, {{[0-9]+}}(%esp) 3235; X86-NEXT: sbbl %esi, %edx 3236; X86-NEXT: sbbl %ebp, %ebx 3237; X86-NEXT: movl $0, %eax 3238; X86-NEXT: sbbl %eax, %eax 3239; X86-NEXT: sbbb $0, %cl 3240; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 3241; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 3242; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax 3243; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 3244; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 3245; X86-NEXT: movl %esi, %edi 3246; X86-NEXT: sbbl %edx, %edi 3247; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload 3248; X86-NEXT: movl %ebp, %edi 3249; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload 3250; X86-NEXT: sbbl %ebx, %edi 3251; X86-NEXT: movl $0, %edi 3252; X86-NEXT: sbbl %edi, %edi 3253; X86-NEXT: setb %cl 3254; X86-NEXT: cmpl %eax, {{[0-9]+}}(%esp) 3255; X86-NEXT: sbbl %esi, %edx 3256; X86-NEXT: sbbl %ebp, %ebx 3257; X86-NEXT: movl $0, %eax 3258; X86-NEXT: sbbl %eax, %eax 3259; X86-NEXT: sbbb $0, %cl 3260; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 3261; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 3262; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 3263; X86-NEXT: cmpl %eax, %ecx 3264; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 3265; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 3266; X86-NEXT: movl %edi, %edx 3267; X86-NEXT: sbbl %esi, %edx 3268; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload 3269; X86-NEXT: movl %ebp, %edx 3270; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload 3271; X86-NEXT: sbbl %ebx, %edx 3272; X86-NEXT: movl $0, %edx 3273; X86-NEXT: sbbl %edx, %edx 3274; X86-NEXT: setb %dl 3275; X86-NEXT: cmpl %ecx, %eax 3276; X86-NEXT: sbbl %edi, %esi 3277; X86-NEXT: sbbl %ebp, %ebx 3278; X86-NEXT: movl $0, %eax 3279; X86-NEXT: sbbl %eax, %eax 3280; X86-NEXT: sbbb $0, %dl 3281; X86-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 3282; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 3283; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 3284; X86-NEXT: cmpl %eax, %ecx 3285; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 3286; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 3287; X86-NEXT: movl %edi, %ebx 3288; X86-NEXT: sbbl %esi, %ebx 3289; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload 3290; X86-NEXT: movl %ebp, %ebx 3291; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload 3292; X86-NEXT: sbbl %edx, %ebx 3293; X86-NEXT: movl $0, %ebx 3294; X86-NEXT: sbbl %ebx, %ebx 3295; X86-NEXT: setb %bl 3296; X86-NEXT: cmpl %ecx, %eax 3297; X86-NEXT: sbbl %edi, %esi 3298; X86-NEXT: sbbl %ebp, %edx 3299; X86-NEXT: movl $0, %eax 3300; X86-NEXT: sbbl %eax, %eax 3301; X86-NEXT: sbbb $0, %bl 3302; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 3303; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 3304; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 3305; X86-NEXT: cmpl %eax, %ecx 3306; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 3307; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 3308; X86-NEXT: movl %edi, %ebx 3309; X86-NEXT: sbbl %esi, %ebx 3310; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload 3311; X86-NEXT: movl %ebp, %ebx 3312; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload 3313; X86-NEXT: sbbl %edx, %ebx 3314; X86-NEXT: movl $0, %ebx 3315; X86-NEXT: sbbl %ebx, %ebx 3316; X86-NEXT: setb %bl 3317; X86-NEXT: cmpl %ecx, %eax 3318; X86-NEXT: sbbl %edi, %esi 3319; X86-NEXT: sbbl %ebp, %edx 3320; X86-NEXT: movl $0, %eax 3321; X86-NEXT: sbbl %eax, %eax 3322; X86-NEXT: sbbb $0, %bl 3323; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 3324; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 3325; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax 3326; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 3327; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 3328; X86-NEXT: movl %edi, %ebp 3329; X86-NEXT: sbbl %esi, %ebp 3330; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload 3331; X86-NEXT: movl %ebx, %ebp 3332; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload 3333; X86-NEXT: sbbl %edx, %ebp 3334; X86-NEXT: movl $0, %ebp 3335; X86-NEXT: sbbl %ebp, %ebp 3336; X86-NEXT: setb %cl 3337; X86-NEXT: cmpl %eax, {{[0-9]+}}(%esp) 3338; X86-NEXT: sbbl %edi, %esi 3339; X86-NEXT: sbbl %ebx, %edx 3340; X86-NEXT: movl $0, %eax 3341; X86-NEXT: sbbl %eax, %eax 3342; X86-NEXT: sbbb $0, %cl 3343; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 3344; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 3345; X86-NEXT: cmpl {{[0-9]+}}(%esp), %esi 3346; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 3347; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp 3348; X86-NEXT: movl %ebp, %eax 3349; X86-NEXT: sbbl %edi, %eax 3350; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 3351; X86-NEXT: movl %ecx, %eax 3352; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload 3353; X86-NEXT: sbbl %edx, %eax 3354; X86-NEXT: movl $0, %eax 3355; X86-NEXT: sbbl %eax, %eax 3356; X86-NEXT: setb %bl 3357; X86-NEXT: cmpl %esi, {{[0-9]+}}(%esp) 3358; X86-NEXT: sbbl %ebp, %edi 3359; X86-NEXT: sbbl %ecx, %edx 3360; X86-NEXT: movl $0, %ecx 3361; X86-NEXT: sbbl %ecx, %ecx 3362; X86-NEXT: sbbb $0, %bl 3363; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 3364; X86-NEXT: cmpl {{[0-9]+}}(%esp), %esi 3365; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 3366; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 3367; X86-NEXT: movl %ecx, %ebp 3368; X86-NEXT: sbbl %edi, %ebp 3369; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 3370; X86-NEXT: movl %eax, %ebp 3371; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload 3372; X86-NEXT: sbbl %edx, %ebp 3373; X86-NEXT: movl $0, %ebp 3374; X86-NEXT: sbbl %ebp, %ebp 3375; X86-NEXT: setb %bh 3376; X86-NEXT: cmpl %esi, {{[0-9]+}}(%esp) 3377; X86-NEXT: sbbl %ecx, %edi 3378; X86-NEXT: sbbl %eax, %edx 3379; X86-NEXT: movl $0, %ecx 3380; X86-NEXT: sbbl %ecx, %ecx 3381; X86-NEXT: sbbb $0, %bh 3382; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 3383; X86-NEXT: cmpl %eax, {{[0-9]+}}(%esp) 3384; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 3385; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 3386; X86-NEXT: movl %esi, %ebp 3387; X86-NEXT: sbbl %edi, %ebp 3388; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload 3389; X86-NEXT: movl %edx, %ebp 3390; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 3391; X86-NEXT: sbbl %eax, %ebp 3392; X86-NEXT: movl $0, %ebp 3393; X86-NEXT: sbbl %ebp, %ebp 3394; X86-NEXT: setb %cl 3395; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp 3396; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ebp 3397; X86-NEXT: sbbl %esi, %edi 3398; X86-NEXT: sbbl %edx, %eax 3399; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 3400; X86-NEXT: sbbl %eax, %eax 3401; X86-NEXT: sbbb $0, %cl 3402; X86-NEXT: movzbl %cl, %ecx 3403; X86-NEXT: andl $3, %ecx 3404; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 3405; X86-NEXT: movb %cl, 4(%edi) 3406; X86-NEXT: movzbl %bh, %ebp 3407; X86-NEXT: movzbl %bl, %ecx 3408; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload 3409; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload 3410; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload 3411; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload 3412; X86-NEXT: andl $3, %ebp 3413; X86-NEXT: andl $3, %ecx 3414; X86-NEXT: leal (%ecx,%ebp,4), %ecx 3415; X86-NEXT: andl $3, %eax 3416; X86-NEXT: shll $4, %eax 3417; X86-NEXT: orl %ecx, %eax 3418; X86-NEXT: andl $3, %ebx 3419; X86-NEXT: shll $6, %ebx 3420; X86-NEXT: orl %eax, %ebx 3421; X86-NEXT: andl $3, %esi 3422; X86-NEXT: shll $8, %esi 3423; X86-NEXT: orl %ebx, %esi 3424; X86-NEXT: andl $3, %edx 3425; X86-NEXT: shll $10, %edx 3426; X86-NEXT: orl %esi, %edx 3427; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload 3428; X86-NEXT: andl $3, %eax 3429; X86-NEXT: shll $12, %eax 3430; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload 3431; X86-NEXT: andl $3, %ecx 3432; X86-NEXT: shll $14, %ecx 3433; X86-NEXT: orl %eax, %ecx 3434; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload 3435; X86-NEXT: andl $3, %eax 3436; X86-NEXT: shll $16, %eax 3437; X86-NEXT: orl %ecx, %eax 3438; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload 3439; X86-NEXT: andl $3, %esi 3440; X86-NEXT: shll $18, %esi 3441; X86-NEXT: orl %eax, %esi 3442; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload 3443; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload 3444; X86-NEXT: andl $3, %eax 3445; X86-NEXT: shll $20, %eax 3446; X86-NEXT: orl %esi, %eax 3447; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload 3448; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload 3449; X86-NEXT: orl %edx, %eax 3450; X86-NEXT: andl $3, %ecx 3451; X86-NEXT: shll $22, %ecx 3452; X86-NEXT: andl $3, %esi 3453; X86-NEXT: shll $24, %esi 3454; X86-NEXT: orl %ecx, %esi 3455; X86-NEXT: andl $3, %ebx 3456; X86-NEXT: shll $26, %ebx 3457; X86-NEXT: orl %esi, %ebx 3458; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload 3459; X86-NEXT: andl $3, %ecx 3460; X86-NEXT: shll $28, %ecx 3461; X86-NEXT: orl %ebx, %ecx 3462; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload 3463; X86-NEXT: shll $30, %edx 3464; X86-NEXT: orl %ecx, %edx 3465; X86-NEXT: orl %eax, %edx 3466; X86-NEXT: movl %edx, (%edi) 3467; X86-NEXT: movl %edi, %eax 3468; X86-NEXT: addl $132, %esp 3469; X86-NEXT: popl %esi 3470; X86-NEXT: popl %edi 3471; X86-NEXT: popl %ebx 3472; X86-NEXT: popl %ebp 3473; X86-NEXT: retl $4 3474 %1 = call <17 x i2> @llvm.ucmp(<17 x i71> %x, <17 x i71> %y) 3475 ret <17 x i2> %1 3476} 3477