1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefixes=X64,SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64,SSE,SSE4 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=X64,AVX,AVX2 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=X64,AVX,AVX512 6; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86 7 8define i8 @scmp.8.8(i8 %x, i8 %y) nounwind { 9; X64-LABEL: scmp.8.8: 10; X64: # %bb.0: 11; X64-NEXT: cmpb %sil, %dil 12; X64-NEXT: setl %cl 13; X64-NEXT: setg %al 14; X64-NEXT: subb %cl, %al 15; X64-NEXT: retq 16; 17; X86-LABEL: scmp.8.8: 18; X86: # %bb.0: 19; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 20; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al 21; X86-NEXT: setl %cl 22; X86-NEXT: setg %al 23; X86-NEXT: subb %cl, %al 24; X86-NEXT: retl 25 %1 = call i8 @llvm.scmp(i8 %x, i8 %y) 26 ret i8 %1 27} 28 29define i8 @scmp.8.16(i16 %x, i16 %y) nounwind { 30; X64-LABEL: scmp.8.16: 31; X64: # %bb.0: 32; X64-NEXT: cmpw %si, %di 33; X64-NEXT: setl %cl 34; X64-NEXT: setg %al 35; X64-NEXT: subb %cl, %al 36; X64-NEXT: retq 37; 38; X86-LABEL: scmp.8.16: 39; X86: # %bb.0: 40; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 41; X86-NEXT: cmpw {{[0-9]+}}(%esp), %ax 42; X86-NEXT: setl %cl 43; X86-NEXT: setg %al 44; X86-NEXT: subb %cl, %al 45; X86-NEXT: retl 46 %1 = call i8 @llvm.scmp(i16 %x, i16 %y) 47 ret i8 %1 48} 49 50define i8 @scmp.8.32(i32 %x, i32 %y) nounwind { 51; X64-LABEL: scmp.8.32: 52; X64: # %bb.0: 53; X64-NEXT: cmpl %esi, %edi 54; X64-NEXT: setl %cl 55; X64-NEXT: setg %al 56; X64-NEXT: subb %cl, %al 57; X64-NEXT: retq 58; 59; X86-LABEL: scmp.8.32: 60; X86: # %bb.0: 61; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 62; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax 63; X86-NEXT: setl %cl 64; X86-NEXT: setg %al 65; X86-NEXT: subb %cl, %al 66; X86-NEXT: retl 67 %1 = call i8 @llvm.scmp(i32 %x, i32 %y) 68 ret i8 %1 69} 70 71define i8 @scmp.8.64(i64 %x, i64 %y) nounwind { 72; X64-LABEL: scmp.8.64: 73; X64: # %bb.0: 74; X64-NEXT: cmpq %rsi, %rdi 75; X64-NEXT: setl %cl 76; X64-NEXT: setg %al 77; X64-NEXT: subb %cl, %al 78; X64-NEXT: retq 79; 80; X86-LABEL: scmp.8.64: 81; X86: # %bb.0: 82; X86-NEXT: pushl %ebx 83; X86-NEXT: pushl %edi 84; X86-NEXT: pushl %esi 85; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 86; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 87; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 88; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 89; X86-NEXT: cmpl %eax, %edx 90; X86-NEXT: movl %esi, %edi 91; X86-NEXT: sbbl %ecx, %edi 92; X86-NEXT: setl %bl 93; X86-NEXT: cmpl %edx, %eax 94; X86-NEXT: sbbl %esi, %ecx 95; X86-NEXT: setl %al 96; X86-NEXT: subb %bl, %al 97; X86-NEXT: popl %esi 98; X86-NEXT: popl %edi 99; X86-NEXT: popl %ebx 100; X86-NEXT: retl 101 %1 = call i8 @llvm.scmp(i64 %x, i64 %y) 102 ret i8 %1 103} 104 105define i8 @scmp.8.128(i128 %x, i128 %y) nounwind { 106; X64-LABEL: scmp.8.128: 107; X64: # %bb.0: 108; X64-NEXT: cmpq %rdx, %rdi 109; X64-NEXT: movq %rsi, %rax 110; X64-NEXT: sbbq %rcx, %rax 111; X64-NEXT: setl %r8b 112; X64-NEXT: cmpq %rdi, %rdx 113; X64-NEXT: sbbq %rsi, %rcx 114; X64-NEXT: setl %al 115; X64-NEXT: subb %r8b, %al 116; X64-NEXT: retq 117; 118; X86-LABEL: scmp.8.128: 119; X86: # %bb.0: 120; X86-NEXT: pushl %ebp 121; X86-NEXT: pushl %ebx 122; X86-NEXT: pushl %edi 123; X86-NEXT: pushl %esi 124; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 125; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 126; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 127; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 128; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx 129; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edi 130; X86-NEXT: movl %ebx, %ebp 131; X86-NEXT: sbbl %edx, %ebp 132; X86-NEXT: movl %ecx, %ebp 133; X86-NEXT: sbbl %eax, %ebp 134; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp 135; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 136; X86-NEXT: movl %esi, %ecx 137; X86-NEXT: sbbl %ebp, %ecx 138; X86-NEXT: setl %cl 139; X86-NEXT: cmpl %edi, {{[0-9]+}}(%esp) 140; X86-NEXT: sbbl %ebx, %edx 141; X86-NEXT: sbbl {{[0-9]+}}(%esp), %eax 142; X86-NEXT: sbbl %esi, %ebp 143; X86-NEXT: setl %al 144; X86-NEXT: subb %cl, %al 145; X86-NEXT: popl %esi 146; X86-NEXT: popl %edi 147; X86-NEXT: popl %ebx 148; X86-NEXT: popl %ebp 149; X86-NEXT: retl 150 %1 = call i8 @llvm.scmp(i128 %x, i128 %y) 151 ret i8 %1 152} 153 154define i32 @scmp.32.32(i32 %x, i32 %y) nounwind { 155; X64-LABEL: scmp.32.32: 156; X64: # %bb.0: 157; X64-NEXT: cmpl %esi, %edi 158; X64-NEXT: setl %al 159; X64-NEXT: setg %cl 160; X64-NEXT: subb %al, %cl 161; X64-NEXT: movsbl %cl, %eax 162; X64-NEXT: retq 163; 164; X86-LABEL: scmp.32.32: 165; X86: # %bb.0: 166; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 167; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax 168; X86-NEXT: setl %al 169; X86-NEXT: setg %cl 170; X86-NEXT: subb %al, %cl 171; X86-NEXT: movsbl %cl, %eax 172; X86-NEXT: retl 173 %1 = call i32 @llvm.scmp(i32 %x, i32 %y) 174 ret i32 %1 175} 176 177define i32 @scmp.32.64(i64 %x, i64 %y) nounwind { 178; X64-LABEL: scmp.32.64: 179; X64: # %bb.0: 180; X64-NEXT: cmpq %rsi, %rdi 181; X64-NEXT: setl %al 182; X64-NEXT: setg %cl 183; X64-NEXT: subb %al, %cl 184; X64-NEXT: movsbl %cl, %eax 185; X64-NEXT: retq 186; 187; X86-LABEL: scmp.32.64: 188; X86: # %bb.0: 189; X86-NEXT: pushl %ebx 190; X86-NEXT: pushl %edi 191; X86-NEXT: pushl %esi 192; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 193; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 194; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 195; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 196; X86-NEXT: cmpl %eax, %edx 197; X86-NEXT: movl %esi, %edi 198; X86-NEXT: sbbl %ecx, %edi 199; X86-NEXT: setl %bl 200; X86-NEXT: cmpl %edx, %eax 201; X86-NEXT: sbbl %esi, %ecx 202; X86-NEXT: setl %al 203; X86-NEXT: subb %bl, %al 204; X86-NEXT: movsbl %al, %eax 205; X86-NEXT: popl %esi 206; X86-NEXT: popl %edi 207; X86-NEXT: popl %ebx 208; X86-NEXT: retl 209 %1 = call i32 @llvm.scmp(i64 %x, i64 %y) 210 ret i32 %1 211} 212 213define i64 @scmp.64.64(i64 %x, i64 %y) nounwind { 214; X64-LABEL: scmp.64.64: 215; X64: # %bb.0: 216; X64-NEXT: cmpq %rsi, %rdi 217; X64-NEXT: setl %al 218; X64-NEXT: setg %cl 219; X64-NEXT: subb %al, %cl 220; X64-NEXT: movsbq %cl, %rax 221; X64-NEXT: retq 222; 223; X86-LABEL: scmp.64.64: 224; X86: # %bb.0: 225; X86-NEXT: pushl %ebx 226; X86-NEXT: pushl %edi 227; X86-NEXT: pushl %esi 228; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 229; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 230; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 231; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 232; X86-NEXT: cmpl %eax, %edx 233; X86-NEXT: movl %esi, %edi 234; X86-NEXT: sbbl %ecx, %edi 235; X86-NEXT: setl %bl 236; X86-NEXT: cmpl %edx, %eax 237; X86-NEXT: sbbl %esi, %ecx 238; X86-NEXT: setl %al 239; X86-NEXT: subb %bl, %al 240; X86-NEXT: movsbl %al, %eax 241; X86-NEXT: movl %eax, %edx 242; X86-NEXT: sarl $31, %edx 243; X86-NEXT: popl %esi 244; X86-NEXT: popl %edi 245; X86-NEXT: popl %ebx 246; X86-NEXT: retl 247 %1 = call i64 @llvm.scmp(i64 %x, i64 %y) 248 ret i64 %1 249} 250 251define i4 @scmp_narrow_result(i32 %x, i32 %y) nounwind { 252; X64-LABEL: scmp_narrow_result: 253; X64: # %bb.0: 254; X64-NEXT: cmpl %esi, %edi 255; X64-NEXT: setl %cl 256; X64-NEXT: setg %al 257; X64-NEXT: subb %cl, %al 258; X64-NEXT: retq 259; 260; X86-LABEL: scmp_narrow_result: 261; X86: # %bb.0: 262; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 263; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax 264; X86-NEXT: setl %cl 265; X86-NEXT: setg %al 266; X86-NEXT: subb %cl, %al 267; X86-NEXT: retl 268 %1 = call i4 @llvm.scmp(i32 %x, i32 %y) 269 ret i4 %1 270} 271 272define i8 @scmp_narrow_op(i62 %x, i62 %y) nounwind { 273; X64-LABEL: scmp_narrow_op: 274; X64: # %bb.0: 275; X64-NEXT: shlq $2, %rsi 276; X64-NEXT: sarq $2, %rsi 277; X64-NEXT: shlq $2, %rdi 278; X64-NEXT: sarq $2, %rdi 279; X64-NEXT: cmpq %rsi, %rdi 280; X64-NEXT: setl %cl 281; X64-NEXT: setg %al 282; X64-NEXT: subb %cl, %al 283; X64-NEXT: retq 284; 285; X86-LABEL: scmp_narrow_op: 286; X86: # %bb.0: 287; X86-NEXT: pushl %ebx 288; X86-NEXT: pushl %edi 289; X86-NEXT: pushl %esi 290; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 291; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 292; X86-NEXT: shll $2, %eax 293; X86-NEXT: sarl $2, %eax 294; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 295; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 296; X86-NEXT: shll $2, %esi 297; X86-NEXT: sarl $2, %esi 298; X86-NEXT: cmpl %ecx, %edx 299; X86-NEXT: movl %esi, %edi 300; X86-NEXT: sbbl %eax, %edi 301; X86-NEXT: setl %bl 302; X86-NEXT: cmpl %edx, %ecx 303; X86-NEXT: sbbl %esi, %eax 304; X86-NEXT: setl %al 305; X86-NEXT: subb %bl, %al 306; X86-NEXT: popl %esi 307; X86-NEXT: popl %edi 308; X86-NEXT: popl %ebx 309; X86-NEXT: retl 310 %1 = call i8 @llvm.scmp(i62 %x, i62 %y) 311 ret i8 %1 312} 313 314define i141 @scmp_wide_result(i32 %x, i32 %y) nounwind { 315; X64-LABEL: scmp_wide_result: 316; X64: # %bb.0: 317; X64-NEXT: cmpl %esi, %edi 318; X64-NEXT: setl %al 319; X64-NEXT: setg %cl 320; X64-NEXT: subb %al, %cl 321; X64-NEXT: movsbq %cl, %rax 322; X64-NEXT: movq %rax, %rdx 323; X64-NEXT: sarq $63, %rdx 324; X64-NEXT: movl %edx, %ecx 325; X64-NEXT: andl $8191, %ecx # imm = 0x1FFF 326; X64-NEXT: retq 327; 328; X86-LABEL: scmp_wide_result: 329; X86: # %bb.0: 330; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 331; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 332; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx 333; X86-NEXT: setl %cl 334; X86-NEXT: setg %dl 335; X86-NEXT: subb %cl, %dl 336; X86-NEXT: movsbl %dl, %ecx 337; X86-NEXT: movl %ecx, (%eax) 338; X86-NEXT: sarl $31, %ecx 339; X86-NEXT: movl %ecx, 12(%eax) 340; X86-NEXT: movl %ecx, 8(%eax) 341; X86-NEXT: movl %ecx, 4(%eax) 342; X86-NEXT: andl $8191, %ecx # imm = 0x1FFF 343; X86-NEXT: movw %cx, 16(%eax) 344; X86-NEXT: retl $4 345 %1 = call i141 @llvm.scmp(i32 %x, i32 %y) 346 ret i141 %1 347} 348 349define i8 @scmp_wide_op(i109 %x, i109 %y) nounwind { 350; X64-LABEL: scmp_wide_op: 351; X64: # %bb.0: 352; X64-NEXT: shlq $19, %rcx 353; X64-NEXT: sarq $19, %rcx 354; X64-NEXT: shlq $19, %rsi 355; X64-NEXT: sarq $19, %rsi 356; X64-NEXT: cmpq %rdx, %rdi 357; X64-NEXT: movq %rsi, %rax 358; X64-NEXT: sbbq %rcx, %rax 359; X64-NEXT: setl %r8b 360; X64-NEXT: cmpq %rdi, %rdx 361; X64-NEXT: sbbq %rsi, %rcx 362; X64-NEXT: setl %al 363; X64-NEXT: subb %r8b, %al 364; X64-NEXT: retq 365; 366; X86-LABEL: scmp_wide_op: 367; X86: # %bb.0: 368; X86-NEXT: pushl %ebp 369; X86-NEXT: pushl %ebx 370; X86-NEXT: pushl %edi 371; X86-NEXT: pushl %esi 372; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 373; X86-NEXT: shll $19, %eax 374; X86-NEXT: sarl $19, %eax 375; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 376; X86-NEXT: shll $19, %ecx 377; X86-NEXT: sarl $19, %ecx 378; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 379; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 380; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp 381; X86-NEXT: cmpl %esi, {{[0-9]+}}(%esp) 382; X86-NEXT: sbbl %edx, %ebp 383; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp 384; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 385; X86-NEXT: movl %edi, %esi 386; X86-NEXT: sbbl %ebp, %esi 387; X86-NEXT: movl %ecx, %esi 388; X86-NEXT: sbbl %eax, %esi 389; X86-NEXT: setl %bl 390; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 391; X86-NEXT: cmpl {{[0-9]+}}(%esp), %esi 392; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx 393; X86-NEXT: sbbl %edi, %ebp 394; X86-NEXT: sbbl %ecx, %eax 395; X86-NEXT: setl %al 396; X86-NEXT: subb %bl, %al 397; X86-NEXT: popl %esi 398; X86-NEXT: popl %edi 399; X86-NEXT: popl %ebx 400; X86-NEXT: popl %ebp 401; X86-NEXT: retl 402 %1 = call i8 @llvm.scmp(i109 %x, i109 %y) 403 ret i8 %1 404} 405 406define i41 @scmp_uncommon_types(i7 %x, i7 %y) nounwind { 407; X64-LABEL: scmp_uncommon_types: 408; X64: # %bb.0: 409; X64-NEXT: addb %sil, %sil 410; X64-NEXT: sarb %sil 411; X64-NEXT: addb %dil, %dil 412; X64-NEXT: sarb %dil 413; X64-NEXT: cmpb %sil, %dil 414; X64-NEXT: setl %al 415; X64-NEXT: setg %cl 416; X64-NEXT: subb %al, %cl 417; X64-NEXT: movsbq %cl, %rax 418; X64-NEXT: retq 419; 420; X86-LABEL: scmp_uncommon_types: 421; X86: # %bb.0: 422; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 423; X86-NEXT: addb %al, %al 424; X86-NEXT: sarb %al 425; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 426; X86-NEXT: addb %cl, %cl 427; X86-NEXT: sarb %cl 428; X86-NEXT: cmpb %al, %cl 429; X86-NEXT: setl %al 430; X86-NEXT: setg %cl 431; X86-NEXT: subb %al, %cl 432; X86-NEXT: movsbl %cl, %eax 433; X86-NEXT: movl %eax, %edx 434; X86-NEXT: sarl $31, %edx 435; X86-NEXT: retl 436 %1 = call i41 @llvm.scmp(i7 %x, i7 %y) 437 ret i41 %1 438} 439 440define <4 x i32> @scmp_normal_vectors(<4 x i32> %x, <4 x i32> %y) nounwind { 441; SSE-LABEL: scmp_normal_vectors: 442; SSE: # %bb.0: 443; SSE-NEXT: movdqa %xmm0, %xmm2 444; SSE-NEXT: pcmpgtd %xmm1, %xmm2 445; SSE-NEXT: pcmpgtd %xmm0, %xmm1 446; SSE-NEXT: psubd %xmm2, %xmm1 447; SSE-NEXT: movdqa %xmm1, %xmm0 448; SSE-NEXT: retq 449; 450; AVX2-LABEL: scmp_normal_vectors: 451; AVX2: # %bb.0: 452; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm2 453; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 454; AVX2-NEXT: vpsubd %xmm2, %xmm0, %xmm0 455; AVX2-NEXT: retq 456; 457; AVX512-LABEL: scmp_normal_vectors: 458; AVX512: # %bb.0: 459; AVX512-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 460; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k2 461; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm0 {%k2} {z} = [1,1,1,1] 462; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 463; AVX512-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} 464; AVX512-NEXT: retq 465; 466; X86-LABEL: scmp_normal_vectors: 467; X86: # %bb.0: 468; X86-NEXT: pushl %ebx 469; X86-NEXT: pushl %edi 470; X86-NEXT: pushl %esi 471; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 472; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 473; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 474; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 475; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 476; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edx 477; X86-NEXT: setl %dl 478; X86-NEXT: setg %dh 479; X86-NEXT: subb %dl, %dh 480; X86-NEXT: movsbl %dh, %edx 481; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edi 482; X86-NEXT: setl %bl 483; X86-NEXT: setg %bh 484; X86-NEXT: subb %bl, %bh 485; X86-NEXT: movsbl %bh, %edi 486; X86-NEXT: cmpl {{[0-9]+}}(%esp), %esi 487; X86-NEXT: setl %bl 488; X86-NEXT: setg %bh 489; X86-NEXT: subb %bl, %bh 490; X86-NEXT: movsbl %bh, %esi 491; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx 492; X86-NEXT: setl %cl 493; X86-NEXT: setg %ch 494; X86-NEXT: subb %cl, %ch 495; X86-NEXT: movsbl %ch, %ecx 496; X86-NEXT: movl %ecx, 12(%eax) 497; X86-NEXT: movl %esi, 8(%eax) 498; X86-NEXT: movl %edi, 4(%eax) 499; X86-NEXT: movl %edx, (%eax) 500; X86-NEXT: popl %esi 501; X86-NEXT: popl %edi 502; X86-NEXT: popl %ebx 503; X86-NEXT: retl $4 504 %1 = call <4 x i32> @llvm.scmp(<4 x i32> %x, <4 x i32> %y) 505 ret <4 x i32> %1 506} 507 508define <4 x i8> @scmp_narrow_vec_result(<4 x i32> %x, <4 x i32> %y) nounwind { 509; SSE2-LABEL: scmp_narrow_vec_result: 510; SSE2: # %bb.0: 511; SSE2-NEXT: movd %xmm1, %eax 512; SSE2-NEXT: movd %xmm0, %ecx 513; SSE2-NEXT: cmpl %eax, %ecx 514; SSE2-NEXT: setl %al 515; SSE2-NEXT: setg %cl 516; SSE2-NEXT: subb %al, %cl 517; SSE2-NEXT: movzbl %cl, %eax 518; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] 519; SSE2-NEXT: movd %xmm2, %ecx 520; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] 521; SSE2-NEXT: movd %xmm2, %edx 522; SSE2-NEXT: cmpl %ecx, %edx 523; SSE2-NEXT: setl %cl 524; SSE2-NEXT: setg %dl 525; SSE2-NEXT: subb %cl, %dl 526; SSE2-NEXT: movzbl %dl, %ecx 527; SSE2-NEXT: shll $8, %ecx 528; SSE2-NEXT: orl %eax, %ecx 529; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] 530; SSE2-NEXT: movd %xmm2, %eax 531; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 532; SSE2-NEXT: movd %xmm2, %edx 533; SSE2-NEXT: cmpl %eax, %edx 534; SSE2-NEXT: setl %al 535; SSE2-NEXT: setg %dl 536; SSE2-NEXT: subb %al, %dl 537; SSE2-NEXT: movzbl %dl, %eax 538; SSE2-NEXT: shll $16, %eax 539; SSE2-NEXT: orl %ecx, %eax 540; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] 541; SSE2-NEXT: movd %xmm1, %ecx 542; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 543; SSE2-NEXT: movd %xmm0, %edx 544; SSE2-NEXT: cmpl %ecx, %edx 545; SSE2-NEXT: setl %cl 546; SSE2-NEXT: setg %dl 547; SSE2-NEXT: subb %cl, %dl 548; SSE2-NEXT: movzbl %dl, %ecx 549; SSE2-NEXT: shll $24, %ecx 550; SSE2-NEXT: orl %eax, %ecx 551; SSE2-NEXT: movd %ecx, %xmm0 552; SSE2-NEXT: retq 553; 554; SSE4-LABEL: scmp_narrow_vec_result: 555; SSE4: # %bb.0: 556; SSE4-NEXT: pextrd $1, %xmm1, %eax 557; SSE4-NEXT: pextrd $1, %xmm0, %ecx 558; SSE4-NEXT: cmpl %eax, %ecx 559; SSE4-NEXT: setl %al 560; SSE4-NEXT: setg %cl 561; SSE4-NEXT: subb %al, %cl 562; SSE4-NEXT: movzbl %cl, %eax 563; SSE4-NEXT: movd %xmm1, %ecx 564; SSE4-NEXT: movd %xmm0, %edx 565; SSE4-NEXT: cmpl %ecx, %edx 566; SSE4-NEXT: setl %cl 567; SSE4-NEXT: setg %dl 568; SSE4-NEXT: subb %cl, %dl 569; SSE4-NEXT: movzbl %dl, %ecx 570; SSE4-NEXT: movd %ecx, %xmm2 571; SSE4-NEXT: pinsrb $1, %eax, %xmm2 572; SSE4-NEXT: pextrd $2, %xmm1, %eax 573; SSE4-NEXT: pextrd $2, %xmm0, %ecx 574; SSE4-NEXT: cmpl %eax, %ecx 575; SSE4-NEXT: setl %al 576; SSE4-NEXT: setg %cl 577; SSE4-NEXT: subb %al, %cl 578; SSE4-NEXT: movzbl %cl, %eax 579; SSE4-NEXT: pinsrb $2, %eax, %xmm2 580; SSE4-NEXT: pextrd $3, %xmm1, %eax 581; SSE4-NEXT: pextrd $3, %xmm0, %ecx 582; SSE4-NEXT: cmpl %eax, %ecx 583; SSE4-NEXT: setl %al 584; SSE4-NEXT: setg %cl 585; SSE4-NEXT: subb %al, %cl 586; SSE4-NEXT: movzbl %cl, %eax 587; SSE4-NEXT: pinsrb $3, %eax, %xmm2 588; SSE4-NEXT: movdqa %xmm2, %xmm0 589; SSE4-NEXT: retq 590; 591; AVX-LABEL: scmp_narrow_vec_result: 592; AVX: # %bb.0: 593; AVX-NEXT: vpextrd $1, %xmm1, %eax 594; AVX-NEXT: vpextrd $1, %xmm0, %ecx 595; AVX-NEXT: cmpl %eax, %ecx 596; AVX-NEXT: setl %al 597; AVX-NEXT: setg %cl 598; AVX-NEXT: subb %al, %cl 599; AVX-NEXT: vmovd %xmm1, %eax 600; AVX-NEXT: vmovd %xmm0, %edx 601; AVX-NEXT: cmpl %eax, %edx 602; AVX-NEXT: setl %al 603; AVX-NEXT: setg %dl 604; AVX-NEXT: subb %al, %dl 605; AVX-NEXT: vmovd %edx, %xmm2 606; AVX-NEXT: vpinsrb $1, %ecx, %xmm2, %xmm2 607; AVX-NEXT: vpextrd $2, %xmm1, %eax 608; AVX-NEXT: vpextrd $2, %xmm0, %ecx 609; AVX-NEXT: cmpl %eax, %ecx 610; AVX-NEXT: setl %al 611; AVX-NEXT: setg %cl 612; AVX-NEXT: subb %al, %cl 613; AVX-NEXT: vpinsrb $2, %ecx, %xmm2, %xmm2 614; AVX-NEXT: vpextrd $3, %xmm1, %eax 615; AVX-NEXT: vpextrd $3, %xmm0, %ecx 616; AVX-NEXT: cmpl %eax, %ecx 617; AVX-NEXT: setl %al 618; AVX-NEXT: setg %cl 619; AVX-NEXT: subb %al, %cl 620; AVX-NEXT: vpinsrb $3, %ecx, %xmm2, %xmm0 621; AVX-NEXT: retq 622; 623; X86-LABEL: scmp_narrow_vec_result: 624; X86: # %bb.0: 625; X86-NEXT: pushl %ebx 626; X86-NEXT: pushl %edi 627; X86-NEXT: pushl %esi 628; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 629; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 630; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 631; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 632; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 633; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx 634; X86-NEXT: setl %ch 635; X86-NEXT: setg %cl 636; X86-NEXT: subb %ch, %cl 637; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edi 638; X86-NEXT: setl %ch 639; X86-NEXT: setg %bl 640; X86-NEXT: subb %ch, %bl 641; X86-NEXT: cmpl {{[0-9]+}}(%esp), %esi 642; X86-NEXT: setl %ch 643; X86-NEXT: setg %bh 644; X86-NEXT: subb %ch, %bh 645; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edx 646; X86-NEXT: setl %dl 647; X86-NEXT: setg %ch 648; X86-NEXT: subb %dl, %ch 649; X86-NEXT: movb %ch, 3(%eax) 650; X86-NEXT: movb %bh, 2(%eax) 651; X86-NEXT: movb %bl, 1(%eax) 652; X86-NEXT: movb %cl, (%eax) 653; X86-NEXT: popl %esi 654; X86-NEXT: popl %edi 655; X86-NEXT: popl %ebx 656; X86-NEXT: retl $4 657 %1 = call <4 x i8> @llvm.scmp(<4 x i32> %x, <4 x i32> %y) 658 ret <4 x i8> %1 659} 660 661define <4 x i32> @scmp_narrow_vec_op(<4 x i8> %x, <4 x i8> %y) nounwind { 662; SSE2-LABEL: scmp_narrow_vec_op: 663; SSE2: # %bb.0: 664; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 665; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 666; SSE2-NEXT: psrad $24, %xmm1 667; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 668; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 669; SSE2-NEXT: psrad $24, %xmm0 670; SSE2-NEXT: movdqa %xmm0, %xmm2 671; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 672; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 673; SSE2-NEXT: psubd %xmm2, %xmm1 674; SSE2-NEXT: movdqa %xmm1, %xmm0 675; SSE2-NEXT: retq 676; 677; SSE4-LABEL: scmp_narrow_vec_op: 678; SSE4: # %bb.0: 679; SSE4-NEXT: pmovsxbd %xmm1, %xmm1 680; SSE4-NEXT: pmovsxbd %xmm0, %xmm0 681; SSE4-NEXT: movdqa %xmm0, %xmm2 682; SSE4-NEXT: pcmpgtd %xmm1, %xmm2 683; SSE4-NEXT: pcmpgtd %xmm0, %xmm1 684; SSE4-NEXT: psubd %xmm2, %xmm1 685; SSE4-NEXT: movdqa %xmm1, %xmm0 686; SSE4-NEXT: retq 687; 688; AVX2-LABEL: scmp_narrow_vec_op: 689; AVX2: # %bb.0: 690; AVX2-NEXT: vpmovsxbd %xmm1, %xmm1 691; AVX2-NEXT: vpmovsxbd %xmm0, %xmm0 692; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm2 693; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 694; AVX2-NEXT: vpsubd %xmm2, %xmm0, %xmm0 695; AVX2-NEXT: retq 696; 697; AVX512-LABEL: scmp_narrow_vec_op: 698; AVX512: # %bb.0: 699; AVX512-NEXT: vpmovsxbd %xmm0, %xmm0 700; AVX512-NEXT: vpmovsxbd %xmm1, %xmm1 701; AVX512-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 702; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k2 703; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm0 {%k2} {z} = [1,1,1,1] 704; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 705; AVX512-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} 706; AVX512-NEXT: retq 707; 708; X86-LABEL: scmp_narrow_vec_op: 709; X86: # %bb.0: 710; X86-NEXT: pushl %ebx 711; X86-NEXT: pushl %edi 712; X86-NEXT: pushl %esi 713; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 714; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 715; X86-NEXT: movb {{[0-9]+}}(%esp), %ch 716; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx 717; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx 718; X86-NEXT: cmpb {{[0-9]+}}(%esp), %dl 719; X86-NEXT: setl %dl 720; X86-NEXT: setg %dh 721; X86-NEXT: subb %dl, %dh 722; X86-NEXT: movsbl %dh, %edx 723; X86-NEXT: cmpb {{[0-9]+}}(%esp), %bl 724; X86-NEXT: setl %bl 725; X86-NEXT: setg %bh 726; X86-NEXT: subb %bl, %bh 727; X86-NEXT: movsbl %bh, %esi 728; X86-NEXT: cmpb {{[0-9]+}}(%esp), %ch 729; X86-NEXT: setl %ch 730; X86-NEXT: setg %bl 731; X86-NEXT: subb %ch, %bl 732; X86-NEXT: movsbl %bl, %edi 733; X86-NEXT: cmpb {{[0-9]+}}(%esp), %cl 734; X86-NEXT: setl %cl 735; X86-NEXT: setg %ch 736; X86-NEXT: subb %cl, %ch 737; X86-NEXT: movsbl %ch, %ecx 738; X86-NEXT: movl %ecx, 12(%eax) 739; X86-NEXT: movl %edi, 8(%eax) 740; X86-NEXT: movl %esi, 4(%eax) 741; X86-NEXT: movl %edx, (%eax) 742; X86-NEXT: popl %esi 743; X86-NEXT: popl %edi 744; X86-NEXT: popl %ebx 745; X86-NEXT: retl $4 746 %1 = call <4 x i32> @llvm.scmp(<4 x i8> %x, <4 x i8> %y) 747 ret <4 x i32> %1 748} 749 750define <16 x i32> @scmp_wide_vec_result(<16 x i8> %x, <16 x i8> %y) nounwind { 751; SSE2-LABEL: scmp_wide_vec_result: 752; SSE2: # %bb.0: 753; SSE2-NEXT: movdqa %xmm1, %xmm2 754; SSE2-NEXT: movdqa %xmm0, %xmm3 755; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 756; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 757; SSE2-NEXT: psrad $24, %xmm0 758; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 759; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3] 760; SSE2-NEXT: psrad $24, %xmm5 761; SSE2-NEXT: movdqa %xmm5, %xmm6 762; SSE2-NEXT: pcmpgtd %xmm0, %xmm6 763; SSE2-NEXT: pcmpgtd %xmm5, %xmm0 764; SSE2-NEXT: psubd %xmm6, %xmm0 765; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 766; SSE2-NEXT: psrad $24, %xmm1 767; SSE2-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4,4,5,5,6,6,7,7] 768; SSE2-NEXT: psrad $24, %xmm4 769; SSE2-NEXT: movdqa %xmm4, %xmm5 770; SSE2-NEXT: pcmpgtd %xmm1, %xmm5 771; SSE2-NEXT: pcmpgtd %xmm4, %xmm1 772; SSE2-NEXT: psubd %xmm5, %xmm1 773; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm2[8],xmm4[9],xmm2[9],xmm4[10],xmm2[10],xmm4[11],xmm2[11],xmm4[12],xmm2[12],xmm4[13],xmm2[13],xmm4[14],xmm2[14],xmm4[15],xmm2[15] 774; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] 775; SSE2-NEXT: psrad $24, %xmm2 776; SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm3[8],xmm5[9],xmm3[9],xmm5[10],xmm3[10],xmm5[11],xmm3[11],xmm5[12],xmm3[12],xmm5[13],xmm3[13],xmm5[14],xmm3[14],xmm5[15],xmm3[15] 777; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3] 778; SSE2-NEXT: psrad $24, %xmm3 779; SSE2-NEXT: movdqa %xmm3, %xmm6 780; SSE2-NEXT: pcmpgtd %xmm2, %xmm6 781; SSE2-NEXT: pcmpgtd %xmm3, %xmm2 782; SSE2-NEXT: psubd %xmm6, %xmm2 783; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 784; SSE2-NEXT: psrad $24, %xmm3 785; SSE2-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7] 786; SSE2-NEXT: psrad $24, %xmm4 787; SSE2-NEXT: movdqa %xmm4, %xmm5 788; SSE2-NEXT: pcmpgtd %xmm3, %xmm5 789; SSE2-NEXT: pcmpgtd %xmm4, %xmm3 790; SSE2-NEXT: psubd %xmm5, %xmm3 791; SSE2-NEXT: retq 792; 793; SSE4-LABEL: scmp_wide_vec_result: 794; SSE4: # %bb.0: 795; SSE4-NEXT: movdqa %xmm0, %xmm4 796; SSE4-NEXT: pmovsxbd %xmm1, %xmm0 797; SSE4-NEXT: pmovsxbd %xmm4, %xmm2 798; SSE4-NEXT: movdqa %xmm2, %xmm3 799; SSE4-NEXT: pcmpgtd %xmm0, %xmm3 800; SSE4-NEXT: pcmpgtd %xmm2, %xmm0 801; SSE4-NEXT: psubd %xmm3, %xmm0 802; SSE4-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] 803; SSE4-NEXT: pmovsxbd %xmm2, %xmm5 804; SSE4-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,1,1] 805; SSE4-NEXT: pmovsxbd %xmm2, %xmm2 806; SSE4-NEXT: movdqa %xmm2, %xmm3 807; SSE4-NEXT: pcmpgtd %xmm5, %xmm3 808; SSE4-NEXT: pcmpgtd %xmm2, %xmm5 809; SSE4-NEXT: psubd %xmm3, %xmm5 810; SSE4-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] 811; SSE4-NEXT: pmovsxbd %xmm2, %xmm2 812; SSE4-NEXT: pshufd {{.*#+}} xmm3 = xmm4[2,3,2,3] 813; SSE4-NEXT: pmovsxbd %xmm3, %xmm3 814; SSE4-NEXT: movdqa %xmm3, %xmm6 815; SSE4-NEXT: pcmpgtd %xmm2, %xmm6 816; SSE4-NEXT: pcmpgtd %xmm3, %xmm2 817; SSE4-NEXT: psubd %xmm6, %xmm2 818; SSE4-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] 819; SSE4-NEXT: pmovsxbd %xmm1, %xmm3 820; SSE4-NEXT: pshufd {{.*#+}} xmm1 = xmm4[3,3,3,3] 821; SSE4-NEXT: pmovsxbd %xmm1, %xmm1 822; SSE4-NEXT: movdqa %xmm1, %xmm4 823; SSE4-NEXT: pcmpgtd %xmm3, %xmm4 824; SSE4-NEXT: pcmpgtd %xmm1, %xmm3 825; SSE4-NEXT: psubd %xmm4, %xmm3 826; SSE4-NEXT: movdqa %xmm5, %xmm1 827; SSE4-NEXT: retq 828; 829; AVX2-LABEL: scmp_wide_vec_result: 830; AVX2: # %bb.0: 831; AVX2-NEXT: vpmovsxbd %xmm1, %ymm2 832; AVX2-NEXT: vpmovsxbd %xmm0, %ymm3 833; AVX2-NEXT: vpcmpgtd %ymm2, %ymm3, %ymm4 834; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm2 835; AVX2-NEXT: vpsubd %ymm4, %ymm2, %ymm2 836; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 837; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1 838; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 839; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0 840; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm3 841; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 842; AVX2-NEXT: vpsubd %ymm3, %ymm0, %ymm1 843; AVX2-NEXT: vmovdqa %ymm2, %ymm0 844; AVX2-NEXT: retq 845; 846; AVX512-LABEL: scmp_wide_vec_result: 847; AVX512: # %bb.0: 848; AVX512-NEXT: vpcmpgtb %xmm0, %xmm1, %k1 849; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k2 850; AVX512-NEXT: vpbroadcastd {{.*#+}} zmm0 {%k2} {z} = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 851; AVX512-NEXT: vpternlogd {{.*#+}} zmm1 = -1 852; AVX512-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} 853; AVX512-NEXT: retq 854; 855; X86-LABEL: scmp_wide_vec_result: 856; X86: # %bb.0: 857; X86-NEXT: pushl %ebp 858; X86-NEXT: pushl %ebx 859; X86-NEXT: pushl %edi 860; X86-NEXT: pushl %esi 861; X86-NEXT: subl $16, %esp 862; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx 863; X86-NEXT: movb {{[0-9]+}}(%esp), %ah 864; X86-NEXT: movb {{[0-9]+}}(%esp), %ch 865; X86-NEXT: movb {{[0-9]+}}(%esp), %dh 866; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx 867; X86-NEXT: movb {{[0-9]+}}(%esp), %bh 868; X86-NEXT: movb {{[0-9]+}}(%esp), %al 869; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al 870; X86-NEXT: setl %al 871; X86-NEXT: setg %cl 872; X86-NEXT: subb %al, %cl 873; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 874; X86-NEXT: cmpb {{[0-9]+}}(%esp), %bh 875; X86-NEXT: setl %al 876; X86-NEXT: setg %cl 877; X86-NEXT: subb %al, %cl 878; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 879; X86-NEXT: cmpb {{[0-9]+}}(%esp), %bl 880; X86-NEXT: setl %al 881; X86-NEXT: setg %cl 882; X86-NEXT: subb %al, %cl 883; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 884; X86-NEXT: cmpb {{[0-9]+}}(%esp), %dh 885; X86-NEXT: setl %al 886; X86-NEXT: setg %cl 887; X86-NEXT: subb %al, %cl 888; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 889; X86-NEXT: cmpb {{[0-9]+}}(%esp), %ch 890; X86-NEXT: setl %al 891; X86-NEXT: setg %cl 892; X86-NEXT: subb %al, %cl 893; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 894; X86-NEXT: cmpb {{[0-9]+}}(%esp), %ah 895; X86-NEXT: setl %al 896; X86-NEXT: setg %cl 897; X86-NEXT: subb %al, %cl 898; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 899; X86-NEXT: cmpb {{[0-9]+}}(%esp), %dl 900; X86-NEXT: setl %al 901; X86-NEXT: setg %cl 902; X86-NEXT: subb %al, %cl 903; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 904; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 905; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al 906; X86-NEXT: setl %al 907; X86-NEXT: setg %bh 908; X86-NEXT: subb %al, %bh 909; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 910; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al 911; X86-NEXT: setl %al 912; X86-NEXT: setg %bl 913; X86-NEXT: subb %al, %bl 914; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 915; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al 916; X86-NEXT: setl %al 917; X86-NEXT: setg %dh 918; X86-NEXT: subb %al, %dh 919; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 920; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al 921; X86-NEXT: setl %al 922; X86-NEXT: setg %dl 923; X86-NEXT: subb %al, %dl 924; X86-NEXT: movsbl %dl, %eax 925; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 926; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 927; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al 928; X86-NEXT: setl %al 929; X86-NEXT: setg %dl 930; X86-NEXT: subb %al, %dl 931; X86-NEXT: movsbl %dl, %eax 932; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 933; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 934; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al 935; X86-NEXT: setl %al 936; X86-NEXT: setg %dl 937; X86-NEXT: subb %al, %dl 938; X86-NEXT: movsbl %dl, %ebp 939; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 940; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al 941; X86-NEXT: setl %al 942; X86-NEXT: setg %dl 943; X86-NEXT: subb %al, %dl 944; X86-NEXT: movsbl %dl, %edi 945; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 946; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al 947; X86-NEXT: setl %al 948; X86-NEXT: setg %ah 949; X86-NEXT: subb %al, %ah 950; X86-NEXT: movsbl %ah, %esi 951; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 952; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al 953; X86-NEXT: setl %al 954; X86-NEXT: setg %dl 955; X86-NEXT: subb %al, %dl 956; X86-NEXT: movsbl %dl, %ecx 957; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 958; X86-NEXT: movl %ecx, 60(%eax) 959; X86-NEXT: movl %esi, 56(%eax) 960; X86-NEXT: movl %edi, 52(%eax) 961; X86-NEXT: movl %ebp, 48(%eax) 962; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 963; X86-NEXT: movl %ecx, 44(%eax) 964; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 965; X86-NEXT: movl %ecx, 40(%eax) 966; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload 967; X86-NEXT: movsbl %dh, %edx 968; X86-NEXT: movl %edx, 36(%eax) 969; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload 970; X86-NEXT: movsbl %bl, %esi 971; X86-NEXT: movl %esi, 32(%eax) 972; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload 973; X86-NEXT: movsbl %bh, %edi 974; X86-NEXT: movl %edi, 28(%eax) 975; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload 976; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload 977; X86-NEXT: movl %ebx, 24(%eax) 978; X86-NEXT: movl %edi, 20(%eax) 979; X86-NEXT: movl %esi, 16(%eax) 980; X86-NEXT: movl %edx, 12(%eax) 981; X86-NEXT: movl %ecx, 8(%eax) 982; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload 983; X86-NEXT: movl %ecx, 4(%eax) 984; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload 985; X86-NEXT: movl %ecx, (%eax) 986; X86-NEXT: addl $16, %esp 987; X86-NEXT: popl %esi 988; X86-NEXT: popl %edi 989; X86-NEXT: popl %ebx 990; X86-NEXT: popl %ebp 991; X86-NEXT: retl $4 992 %1 = call <16 x i32> @llvm.scmp(<16 x i8> %x, <16 x i8> %y) 993 ret <16 x i32> %1 994} 995 996define <16 x i8> @scmp_wide_vec_op(<16 x i64> %x, <16 x i64> %y) nounwind { 997; SSE2-LABEL: scmp_wide_vec_op: 998; SSE2: # %bb.0: 999; SSE2-NEXT: movq %xmm7, %rax 1000; SSE2-NEXT: cmpq {{[0-9]+}}(%rsp), %rax 1001; SSE2-NEXT: setl %al 1002; SSE2-NEXT: setg %cl 1003; SSE2-NEXT: subb %al, %cl 1004; SSE2-NEXT: movzbl %cl, %eax 1005; SSE2-NEXT: movd %eax, %xmm8 1006; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm7[2,3,2,3] 1007; SSE2-NEXT: movq %xmm7, %rax 1008; SSE2-NEXT: cmpq {{[0-9]+}}(%rsp), %rax 1009; SSE2-NEXT: setl %al 1010; SSE2-NEXT: setg %cl 1011; SSE2-NEXT: subb %al, %cl 1012; SSE2-NEXT: movzbl %cl, %eax 1013; SSE2-NEXT: movd %eax, %xmm7 1014; SSE2-NEXT: movq %xmm6, %rax 1015; SSE2-NEXT: cmpq {{[0-9]+}}(%rsp), %rax 1016; SSE2-NEXT: punpcklbw {{.*#+}} xmm8 = xmm8[0],xmm7[0],xmm8[1],xmm7[1],xmm8[2],xmm7[2],xmm8[3],xmm7[3],xmm8[4],xmm7[4],xmm8[5],xmm7[5],xmm8[6],xmm7[6],xmm8[7],xmm7[7] 1017; SSE2-NEXT: setl %al 1018; SSE2-NEXT: setg %cl 1019; SSE2-NEXT: subb %al, %cl 1020; SSE2-NEXT: movzbl %cl, %eax 1021; SSE2-NEXT: movd %eax, %xmm7 1022; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[2,3,2,3] 1023; SSE2-NEXT: movq %xmm6, %rax 1024; SSE2-NEXT: cmpq {{[0-9]+}}(%rsp), %rax 1025; SSE2-NEXT: setl %al 1026; SSE2-NEXT: setg %cl 1027; SSE2-NEXT: subb %al, %cl 1028; SSE2-NEXT: movzbl %cl, %eax 1029; SSE2-NEXT: movd %eax, %xmm6 1030; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1],xmm7[2],xmm6[2],xmm7[3],xmm6[3],xmm7[4],xmm6[4],xmm7[5],xmm6[5],xmm7[6],xmm6[6],xmm7[7],xmm6[7] 1031; SSE2-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm8[0],xmm7[1],xmm8[1],xmm7[2],xmm8[2],xmm7[3],xmm8[3] 1032; SSE2-NEXT: movq %xmm5, %rax 1033; SSE2-NEXT: cmpq {{[0-9]+}}(%rsp), %rax 1034; SSE2-NEXT: setl %al 1035; SSE2-NEXT: setg %cl 1036; SSE2-NEXT: subb %al, %cl 1037; SSE2-NEXT: movzbl %cl, %eax 1038; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,2,3] 1039; SSE2-NEXT: movq %xmm5, %rcx 1040; SSE2-NEXT: cmpq {{[0-9]+}}(%rsp), %rcx 1041; SSE2-NEXT: movd %eax, %xmm6 1042; SSE2-NEXT: setl %al 1043; SSE2-NEXT: setg %cl 1044; SSE2-NEXT: subb %al, %cl 1045; SSE2-NEXT: movzbl %cl, %eax 1046; SSE2-NEXT: movq %xmm4, %rcx 1047; SSE2-NEXT: cmpq {{[0-9]+}}(%rsp), %rcx 1048; SSE2-NEXT: movd %eax, %xmm8 1049; SSE2-NEXT: setl %al 1050; SSE2-NEXT: setg %cl 1051; SSE2-NEXT: subb %al, %cl 1052; SSE2-NEXT: movzbl %cl, %eax 1053; SSE2-NEXT: movd %eax, %xmm5 1054; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3] 1055; SSE2-NEXT: movq %xmm4, %rax 1056; SSE2-NEXT: cmpq {{[0-9]+}}(%rsp), %rax 1057; SSE2-NEXT: setl %al 1058; SSE2-NEXT: setg %cl 1059; SSE2-NEXT: subb %al, %cl 1060; SSE2-NEXT: movzbl %cl, %eax 1061; SSE2-NEXT: movd %eax, %xmm4 1062; SSE2-NEXT: movq %xmm3, %rax 1063; SSE2-NEXT: cmpq {{[0-9]+}}(%rsp), %rax 1064; SSE2-NEXT: setl %al 1065; SSE2-NEXT: setg %cl 1066; SSE2-NEXT: subb %al, %cl 1067; SSE2-NEXT: movzbl %cl, %eax 1068; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3] 1069; SSE2-NEXT: movq %xmm3, %rcx 1070; SSE2-NEXT: movd %eax, %xmm3 1071; SSE2-NEXT: cmpq {{[0-9]+}}(%rsp), %rcx 1072; SSE2-NEXT: setl %al 1073; SSE2-NEXT: setg %cl 1074; SSE2-NEXT: subb %al, %cl 1075; SSE2-NEXT: movzbl %cl, %eax 1076; SSE2-NEXT: movq %xmm2, %rcx 1077; SSE2-NEXT: cmpq {{[0-9]+}}(%rsp), %rcx 1078; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] 1079; SSE2-NEXT: movq %xmm2, %rcx 1080; SSE2-NEXT: movd %eax, %xmm2 1081; SSE2-NEXT: setl %al 1082; SSE2-NEXT: setg %dl 1083; SSE2-NEXT: subb %al, %dl 1084; SSE2-NEXT: cmpq {{[0-9]+}}(%rsp), %rcx 1085; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm8[0],xmm6[1],xmm8[1],xmm6[2],xmm8[2],xmm6[3],xmm8[3],xmm6[4],xmm8[4],xmm6[5],xmm8[5],xmm6[6],xmm8[6],xmm6[7],xmm8[7] 1086; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7] 1087; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3] 1088; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1] 1089; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 1090; SSE2-NEXT: movzbl %dl, %eax 1091; SSE2-NEXT: movd %eax, %xmm2 1092; SSE2-NEXT: setl %al 1093; SSE2-NEXT: setg %cl 1094; SSE2-NEXT: subb %al, %cl 1095; SSE2-NEXT: movzbl %cl, %eax 1096; SSE2-NEXT: movd %eax, %xmm4 1097; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] 1098; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3] 1099; SSE2-NEXT: movq %xmm1, %rax 1100; SSE2-NEXT: cmpq {{[0-9]+}}(%rsp), %rax 1101; SSE2-NEXT: setl %al 1102; SSE2-NEXT: setg %cl 1103; SSE2-NEXT: subb %al, %cl 1104; SSE2-NEXT: movzbl %cl, %eax 1105; SSE2-NEXT: movd %eax, %xmm3 1106; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 1107; SSE2-NEXT: movq %xmm1, %rax 1108; SSE2-NEXT: cmpq {{[0-9]+}}(%rsp), %rax 1109; SSE2-NEXT: setl %al 1110; SSE2-NEXT: setg %cl 1111; SSE2-NEXT: subb %al, %cl 1112; SSE2-NEXT: movzbl %cl, %eax 1113; SSE2-NEXT: movd %eax, %xmm1 1114; SSE2-NEXT: movq %xmm0, %rax 1115; SSE2-NEXT: cmpq {{[0-9]+}}(%rsp), %rax 1116; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] 1117; SSE2-NEXT: setl %al 1118; SSE2-NEXT: setg %cl 1119; SSE2-NEXT: subb %al, %cl 1120; SSE2-NEXT: movzbl %cl, %eax 1121; SSE2-NEXT: movd %eax, %xmm1 1122; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1123; SSE2-NEXT: movq %xmm0, %rax 1124; SSE2-NEXT: cmpq {{[0-9]+}}(%rsp), %rax 1125; SSE2-NEXT: setl %al 1126; SSE2-NEXT: setg %cl 1127; SSE2-NEXT: subb %al, %cl 1128; SSE2-NEXT: movzbl %cl, %eax 1129; SSE2-NEXT: movd %eax, %xmm0 1130; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1131; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 1132; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1133; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0] 1134; SSE2-NEXT: movdqa %xmm1, %xmm0 1135; SSE2-NEXT: retq 1136; 1137; SSE4-LABEL: scmp_wide_vec_op: 1138; SSE4: # %bb.0: 1139; SSE4-NEXT: pextrq $1, %xmm0, %rax 1140; SSE4-NEXT: cmpq {{[0-9]+}}(%rsp), %rax 1141; SSE4-NEXT: setl %al 1142; SSE4-NEXT: setg %cl 1143; SSE4-NEXT: subb %al, %cl 1144; SSE4-NEXT: movzbl %cl, %eax 1145; SSE4-NEXT: movq %xmm0, %rcx 1146; SSE4-NEXT: cmpq {{[0-9]+}}(%rsp), %rcx 1147; SSE4-NEXT: setl %cl 1148; SSE4-NEXT: setg %dl 1149; SSE4-NEXT: subb %cl, %dl 1150; SSE4-NEXT: movzbl %dl, %ecx 1151; SSE4-NEXT: movd %ecx, %xmm0 1152; SSE4-NEXT: pinsrb $1, %eax, %xmm0 1153; SSE4-NEXT: movq %xmm1, %rax 1154; SSE4-NEXT: cmpq {{[0-9]+}}(%rsp), %rax 1155; SSE4-NEXT: setl %al 1156; SSE4-NEXT: setg %cl 1157; SSE4-NEXT: subb %al, %cl 1158; SSE4-NEXT: movzbl %cl, %eax 1159; SSE4-NEXT: pinsrb $2, %eax, %xmm0 1160; SSE4-NEXT: pextrq $1, %xmm1, %rax 1161; SSE4-NEXT: cmpq {{[0-9]+}}(%rsp), %rax 1162; SSE4-NEXT: setl %al 1163; SSE4-NEXT: setg %cl 1164; SSE4-NEXT: subb %al, %cl 1165; SSE4-NEXT: movzbl %cl, %eax 1166; SSE4-NEXT: pinsrb $3, %eax, %xmm0 1167; SSE4-NEXT: movq %xmm2, %rax 1168; SSE4-NEXT: cmpq {{[0-9]+}}(%rsp), %rax 1169; SSE4-NEXT: setl %al 1170; SSE4-NEXT: setg %cl 1171; SSE4-NEXT: subb %al, %cl 1172; SSE4-NEXT: movzbl %cl, %eax 1173; SSE4-NEXT: pinsrb $4, %eax, %xmm0 1174; SSE4-NEXT: pextrq $1, %xmm2, %rax 1175; SSE4-NEXT: cmpq {{[0-9]+}}(%rsp), %rax 1176; SSE4-NEXT: setl %al 1177; SSE4-NEXT: setg %cl 1178; SSE4-NEXT: subb %al, %cl 1179; SSE4-NEXT: movzbl %cl, %eax 1180; SSE4-NEXT: pinsrb $5, %eax, %xmm0 1181; SSE4-NEXT: movq %xmm3, %rax 1182; SSE4-NEXT: cmpq {{[0-9]+}}(%rsp), %rax 1183; SSE4-NEXT: setl %al 1184; SSE4-NEXT: setg %cl 1185; SSE4-NEXT: subb %al, %cl 1186; SSE4-NEXT: movzbl %cl, %eax 1187; SSE4-NEXT: pinsrb $6, %eax, %xmm0 1188; SSE4-NEXT: pextrq $1, %xmm3, %rax 1189; SSE4-NEXT: cmpq {{[0-9]+}}(%rsp), %rax 1190; SSE4-NEXT: setl %al 1191; SSE4-NEXT: setg %cl 1192; SSE4-NEXT: subb %al, %cl 1193; SSE4-NEXT: movzbl %cl, %eax 1194; SSE4-NEXT: pinsrb $7, %eax, %xmm0 1195; SSE4-NEXT: movq %xmm4, %rax 1196; SSE4-NEXT: cmpq {{[0-9]+}}(%rsp), %rax 1197; SSE4-NEXT: setl %al 1198; SSE4-NEXT: setg %cl 1199; SSE4-NEXT: subb %al, %cl 1200; SSE4-NEXT: movzbl %cl, %eax 1201; SSE4-NEXT: pinsrb $8, %eax, %xmm0 1202; SSE4-NEXT: pextrq $1, %xmm4, %rax 1203; SSE4-NEXT: cmpq {{[0-9]+}}(%rsp), %rax 1204; SSE4-NEXT: setl %al 1205; SSE4-NEXT: setg %cl 1206; SSE4-NEXT: subb %al, %cl 1207; SSE4-NEXT: movzbl %cl, %eax 1208; SSE4-NEXT: pinsrb $9, %eax, %xmm0 1209; SSE4-NEXT: movq %xmm5, %rax 1210; SSE4-NEXT: cmpq {{[0-9]+}}(%rsp), %rax 1211; SSE4-NEXT: setl %al 1212; SSE4-NEXT: setg %cl 1213; SSE4-NEXT: subb %al, %cl 1214; SSE4-NEXT: movzbl %cl, %eax 1215; SSE4-NEXT: pinsrb $10, %eax, %xmm0 1216; SSE4-NEXT: pextrq $1, %xmm5, %rax 1217; SSE4-NEXT: cmpq {{[0-9]+}}(%rsp), %rax 1218; SSE4-NEXT: setl %al 1219; SSE4-NEXT: setg %cl 1220; SSE4-NEXT: subb %al, %cl 1221; SSE4-NEXT: movzbl %cl, %eax 1222; SSE4-NEXT: pinsrb $11, %eax, %xmm0 1223; SSE4-NEXT: movq %xmm6, %rax 1224; SSE4-NEXT: cmpq {{[0-9]+}}(%rsp), %rax 1225; SSE4-NEXT: setl %al 1226; SSE4-NEXT: setg %cl 1227; SSE4-NEXT: subb %al, %cl 1228; SSE4-NEXT: movzbl %cl, %eax 1229; SSE4-NEXT: pinsrb $12, %eax, %xmm0 1230; SSE4-NEXT: pextrq $1, %xmm6, %rax 1231; SSE4-NEXT: cmpq {{[0-9]+}}(%rsp), %rax 1232; SSE4-NEXT: setl %al 1233; SSE4-NEXT: setg %cl 1234; SSE4-NEXT: subb %al, %cl 1235; SSE4-NEXT: movzbl %cl, %eax 1236; SSE4-NEXT: pinsrb $13, %eax, %xmm0 1237; SSE4-NEXT: movq %xmm7, %rax 1238; SSE4-NEXT: cmpq {{[0-9]+}}(%rsp), %rax 1239; SSE4-NEXT: setl %al 1240; SSE4-NEXT: setg %cl 1241; SSE4-NEXT: subb %al, %cl 1242; SSE4-NEXT: movzbl %cl, %eax 1243; SSE4-NEXT: pinsrb $14, %eax, %xmm0 1244; SSE4-NEXT: pextrq $1, %xmm7, %rax 1245; SSE4-NEXT: cmpq {{[0-9]+}}(%rsp), %rax 1246; SSE4-NEXT: setl %al 1247; SSE4-NEXT: setg %cl 1248; SSE4-NEXT: subb %al, %cl 1249; SSE4-NEXT: movzbl %cl, %eax 1250; SSE4-NEXT: pinsrb $15, %eax, %xmm0 1251; SSE4-NEXT: retq 1252; 1253; AVX2-LABEL: scmp_wide_vec_op: 1254; AVX2: # %bb.0: 1255; AVX2-NEXT: vpextrq $1, %xmm4, %rax 1256; AVX2-NEXT: vpextrq $1, %xmm0, %rcx 1257; AVX2-NEXT: cmpq %rax, %rcx 1258; AVX2-NEXT: setl %al 1259; AVX2-NEXT: setg %cl 1260; AVX2-NEXT: subb %al, %cl 1261; AVX2-NEXT: vmovq %xmm4, %rax 1262; AVX2-NEXT: vmovq %xmm0, %rdx 1263; AVX2-NEXT: cmpq %rax, %rdx 1264; AVX2-NEXT: setl %al 1265; AVX2-NEXT: setg %dl 1266; AVX2-NEXT: subb %al, %dl 1267; AVX2-NEXT: vmovd %edx, %xmm8 1268; AVX2-NEXT: vpinsrb $1, %ecx, %xmm8, %xmm8 1269; AVX2-NEXT: vextracti128 $1, %ymm4, %xmm4 1270; AVX2-NEXT: vmovq %xmm4, %rax 1271; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 1272; AVX2-NEXT: vmovq %xmm0, %rcx 1273; AVX2-NEXT: cmpq %rax, %rcx 1274; AVX2-NEXT: setl %al 1275; AVX2-NEXT: setg %cl 1276; AVX2-NEXT: subb %al, %cl 1277; AVX2-NEXT: vpinsrb $2, %ecx, %xmm8, %xmm8 1278; AVX2-NEXT: vpextrq $1, %xmm4, %rax 1279; AVX2-NEXT: vpextrq $1, %xmm0, %rcx 1280; AVX2-NEXT: cmpq %rax, %rcx 1281; AVX2-NEXT: setl %al 1282; AVX2-NEXT: setg %cl 1283; AVX2-NEXT: subb %al, %cl 1284; AVX2-NEXT: vpinsrb $3, %ecx, %xmm8, %xmm0 1285; AVX2-NEXT: vmovq %xmm5, %rax 1286; AVX2-NEXT: vmovq %xmm1, %rcx 1287; AVX2-NEXT: cmpq %rax, %rcx 1288; AVX2-NEXT: setl %al 1289; AVX2-NEXT: setg %cl 1290; AVX2-NEXT: subb %al, %cl 1291; AVX2-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 1292; AVX2-NEXT: vpextrq $1, %xmm5, %rax 1293; AVX2-NEXT: vpextrq $1, %xmm1, %rcx 1294; AVX2-NEXT: cmpq %rax, %rcx 1295; AVX2-NEXT: setl %al 1296; AVX2-NEXT: setg %cl 1297; AVX2-NEXT: subb %al, %cl 1298; AVX2-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 1299; AVX2-NEXT: vextracti128 $1, %ymm5, %xmm4 1300; AVX2-NEXT: vmovq %xmm4, %rax 1301; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1 1302; AVX2-NEXT: vmovq %xmm1, %rcx 1303; AVX2-NEXT: cmpq %rax, %rcx 1304; AVX2-NEXT: setl %al 1305; AVX2-NEXT: setg %cl 1306; AVX2-NEXT: subb %al, %cl 1307; AVX2-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 1308; AVX2-NEXT: vpextrq $1, %xmm4, %rax 1309; AVX2-NEXT: vpextrq $1, %xmm1, %rcx 1310; AVX2-NEXT: cmpq %rax, %rcx 1311; AVX2-NEXT: setl %al 1312; AVX2-NEXT: setg %cl 1313; AVX2-NEXT: subb %al, %cl 1314; AVX2-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0 1315; AVX2-NEXT: vmovq %xmm6, %rax 1316; AVX2-NEXT: vmovq %xmm2, %rcx 1317; AVX2-NEXT: cmpq %rax, %rcx 1318; AVX2-NEXT: setl %al 1319; AVX2-NEXT: setg %cl 1320; AVX2-NEXT: subb %al, %cl 1321; AVX2-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 1322; AVX2-NEXT: vpextrq $1, %xmm6, %rax 1323; AVX2-NEXT: vpextrq $1, %xmm2, %rcx 1324; AVX2-NEXT: cmpq %rax, %rcx 1325; AVX2-NEXT: setl %al 1326; AVX2-NEXT: setg %cl 1327; AVX2-NEXT: subb %al, %cl 1328; AVX2-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0 1329; AVX2-NEXT: vextracti128 $1, %ymm6, %xmm1 1330; AVX2-NEXT: vmovq %xmm1, %rax 1331; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm2 1332; AVX2-NEXT: vmovq %xmm2, %rcx 1333; AVX2-NEXT: cmpq %rax, %rcx 1334; AVX2-NEXT: setl %al 1335; AVX2-NEXT: setg %cl 1336; AVX2-NEXT: subb %al, %cl 1337; AVX2-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0 1338; AVX2-NEXT: vpextrq $1, %xmm1, %rax 1339; AVX2-NEXT: vpextrq $1, %xmm2, %rcx 1340; AVX2-NEXT: cmpq %rax, %rcx 1341; AVX2-NEXT: setl %al 1342; AVX2-NEXT: setg %cl 1343; AVX2-NEXT: subb %al, %cl 1344; AVX2-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0 1345; AVX2-NEXT: vmovq %xmm7, %rax 1346; AVX2-NEXT: vmovq %xmm3, %rcx 1347; AVX2-NEXT: cmpq %rax, %rcx 1348; AVX2-NEXT: setl %al 1349; AVX2-NEXT: setg %cl 1350; AVX2-NEXT: subb %al, %cl 1351; AVX2-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 1352; AVX2-NEXT: vpextrq $1, %xmm7, %rax 1353; AVX2-NEXT: vpextrq $1, %xmm3, %rcx 1354; AVX2-NEXT: cmpq %rax, %rcx 1355; AVX2-NEXT: setl %al 1356; AVX2-NEXT: setg %cl 1357; AVX2-NEXT: subb %al, %cl 1358; AVX2-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 1359; AVX2-NEXT: vextracti128 $1, %ymm7, %xmm1 1360; AVX2-NEXT: vmovq %xmm1, %rax 1361; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm2 1362; AVX2-NEXT: vmovq %xmm2, %rcx 1363; AVX2-NEXT: cmpq %rax, %rcx 1364; AVX2-NEXT: setl %al 1365; AVX2-NEXT: setg %cl 1366; AVX2-NEXT: subb %al, %cl 1367; AVX2-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 1368; AVX2-NEXT: vpextrq $1, %xmm1, %rax 1369; AVX2-NEXT: vpextrq $1, %xmm2, %rcx 1370; AVX2-NEXT: cmpq %rax, %rcx 1371; AVX2-NEXT: setl %al 1372; AVX2-NEXT: setg %cl 1373; AVX2-NEXT: subb %al, %cl 1374; AVX2-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0 1375; AVX2-NEXT: vzeroupper 1376; AVX2-NEXT: retq 1377; 1378; AVX512-LABEL: scmp_wide_vec_op: 1379; AVX512: # %bb.0: 1380; AVX512-NEXT: vpextrq $1, %xmm2, %rax 1381; AVX512-NEXT: vpextrq $1, %xmm0, %rcx 1382; AVX512-NEXT: cmpq %rax, %rcx 1383; AVX512-NEXT: setl %al 1384; AVX512-NEXT: setg %cl 1385; AVX512-NEXT: subb %al, %cl 1386; AVX512-NEXT: vmovq %xmm2, %rax 1387; AVX512-NEXT: vmovq %xmm0, %rdx 1388; AVX512-NEXT: cmpq %rax, %rdx 1389; AVX512-NEXT: setl %al 1390; AVX512-NEXT: setg %dl 1391; AVX512-NEXT: subb %al, %dl 1392; AVX512-NEXT: vmovd %edx, %xmm4 1393; AVX512-NEXT: vpinsrb $1, %ecx, %xmm4, %xmm4 1394; AVX512-NEXT: vextracti128 $1, %ymm2, %xmm5 1395; AVX512-NEXT: vmovq %xmm5, %rax 1396; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm6 1397; AVX512-NEXT: vmovq %xmm6, %rcx 1398; AVX512-NEXT: cmpq %rax, %rcx 1399; AVX512-NEXT: setl %al 1400; AVX512-NEXT: setg %cl 1401; AVX512-NEXT: subb %al, %cl 1402; AVX512-NEXT: vpinsrb $2, %ecx, %xmm4, %xmm4 1403; AVX512-NEXT: vpextrq $1, %xmm5, %rax 1404; AVX512-NEXT: vpextrq $1, %xmm6, %rcx 1405; AVX512-NEXT: cmpq %rax, %rcx 1406; AVX512-NEXT: setl %al 1407; AVX512-NEXT: setg %cl 1408; AVX512-NEXT: subb %al, %cl 1409; AVX512-NEXT: vpinsrb $3, %ecx, %xmm4, %xmm4 1410; AVX512-NEXT: vextracti32x4 $2, %zmm2, %xmm5 1411; AVX512-NEXT: vmovq %xmm5, %rax 1412; AVX512-NEXT: vextracti32x4 $2, %zmm0, %xmm6 1413; AVX512-NEXT: vmovq %xmm6, %rcx 1414; AVX512-NEXT: cmpq %rax, %rcx 1415; AVX512-NEXT: setl %al 1416; AVX512-NEXT: setg %cl 1417; AVX512-NEXT: subb %al, %cl 1418; AVX512-NEXT: vpinsrb $4, %ecx, %xmm4, %xmm4 1419; AVX512-NEXT: vpextrq $1, %xmm5, %rax 1420; AVX512-NEXT: vpextrq $1, %xmm6, %rcx 1421; AVX512-NEXT: cmpq %rax, %rcx 1422; AVX512-NEXT: setl %al 1423; AVX512-NEXT: setg %cl 1424; AVX512-NEXT: subb %al, %cl 1425; AVX512-NEXT: vpinsrb $5, %ecx, %xmm4, %xmm4 1426; AVX512-NEXT: vextracti32x4 $3, %zmm2, %xmm2 1427; AVX512-NEXT: vmovq %xmm2, %rax 1428; AVX512-NEXT: vextracti32x4 $3, %zmm0, %xmm0 1429; AVX512-NEXT: vmovq %xmm0, %rcx 1430; AVX512-NEXT: cmpq %rax, %rcx 1431; AVX512-NEXT: setl %al 1432; AVX512-NEXT: setg %cl 1433; AVX512-NEXT: subb %al, %cl 1434; AVX512-NEXT: vpinsrb $6, %ecx, %xmm4, %xmm4 1435; AVX512-NEXT: vpextrq $1, %xmm2, %rax 1436; AVX512-NEXT: vpextrq $1, %xmm0, %rcx 1437; AVX512-NEXT: cmpq %rax, %rcx 1438; AVX512-NEXT: setl %al 1439; AVX512-NEXT: setg %cl 1440; AVX512-NEXT: subb %al, %cl 1441; AVX512-NEXT: vpinsrb $7, %ecx, %xmm4, %xmm0 1442; AVX512-NEXT: vmovq %xmm3, %rax 1443; AVX512-NEXT: vmovq %xmm1, %rcx 1444; AVX512-NEXT: cmpq %rax, %rcx 1445; AVX512-NEXT: setl %al 1446; AVX512-NEXT: setg %cl 1447; AVX512-NEXT: subb %al, %cl 1448; AVX512-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 1449; AVX512-NEXT: vpextrq $1, %xmm3, %rax 1450; AVX512-NEXT: vpextrq $1, %xmm1, %rcx 1451; AVX512-NEXT: cmpq %rax, %rcx 1452; AVX512-NEXT: setl %al 1453; AVX512-NEXT: setg %cl 1454; AVX512-NEXT: subb %al, %cl 1455; AVX512-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0 1456; AVX512-NEXT: vextracti128 $1, %ymm3, %xmm2 1457; AVX512-NEXT: vmovq %xmm2, %rax 1458; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm4 1459; AVX512-NEXT: vmovq %xmm4, %rcx 1460; AVX512-NEXT: cmpq %rax, %rcx 1461; AVX512-NEXT: setl %al 1462; AVX512-NEXT: setg %cl 1463; AVX512-NEXT: subb %al, %cl 1464; AVX512-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0 1465; AVX512-NEXT: vpextrq $1, %xmm2, %rax 1466; AVX512-NEXT: vpextrq $1, %xmm4, %rcx 1467; AVX512-NEXT: cmpq %rax, %rcx 1468; AVX512-NEXT: setl %al 1469; AVX512-NEXT: setg %cl 1470; AVX512-NEXT: subb %al, %cl 1471; AVX512-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0 1472; AVX512-NEXT: vextracti32x4 $2, %zmm3, %xmm2 1473; AVX512-NEXT: vmovq %xmm2, %rax 1474; AVX512-NEXT: vextracti32x4 $2, %zmm1, %xmm4 1475; AVX512-NEXT: vmovq %xmm4, %rcx 1476; AVX512-NEXT: cmpq %rax, %rcx 1477; AVX512-NEXT: setl %al 1478; AVX512-NEXT: setg %cl 1479; AVX512-NEXT: subb %al, %cl 1480; AVX512-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 1481; AVX512-NEXT: vpextrq $1, %xmm2, %rax 1482; AVX512-NEXT: vpextrq $1, %xmm4, %rcx 1483; AVX512-NEXT: cmpq %rax, %rcx 1484; AVX512-NEXT: setl %al 1485; AVX512-NEXT: setg %cl 1486; AVX512-NEXT: subb %al, %cl 1487; AVX512-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 1488; AVX512-NEXT: vextracti32x4 $3, %zmm3, %xmm2 1489; AVX512-NEXT: vmovq %xmm2, %rax 1490; AVX512-NEXT: vextracti32x4 $3, %zmm1, %xmm1 1491; AVX512-NEXT: vmovq %xmm1, %rcx 1492; AVX512-NEXT: cmpq %rax, %rcx 1493; AVX512-NEXT: setl %al 1494; AVX512-NEXT: setg %cl 1495; AVX512-NEXT: subb %al, %cl 1496; AVX512-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 1497; AVX512-NEXT: vpextrq $1, %xmm2, %rax 1498; AVX512-NEXT: vpextrq $1, %xmm1, %rcx 1499; AVX512-NEXT: cmpq %rax, %rcx 1500; AVX512-NEXT: setl %al 1501; AVX512-NEXT: setg %cl 1502; AVX512-NEXT: subb %al, %cl 1503; AVX512-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0 1504; AVX512-NEXT: vzeroupper 1505; AVX512-NEXT: retq 1506; 1507; X86-LABEL: scmp_wide_vec_op: 1508; X86: # %bb.0: 1509; X86-NEXT: pushl %ebp 1510; X86-NEXT: pushl %ebx 1511; X86-NEXT: pushl %edi 1512; X86-NEXT: pushl %esi 1513; X86-NEXT: subl $12, %esp 1514; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1515; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 1516; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 1517; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 1518; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx 1519; X86-NEXT: cmpl %edx, %edi 1520; X86-NEXT: movl %ebx, %ebp 1521; X86-NEXT: sbbl %esi, %ebp 1522; X86-NEXT: setl %al 1523; X86-NEXT: cmpl %edi, %edx 1524; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 1525; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp 1526; X86-NEXT: sbbl %ebx, %esi 1527; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx 1528; X86-NEXT: setl %ah 1529; X86-NEXT: subb %al, %ah 1530; X86-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 1531; X86-NEXT: cmpl %ecx, %ebp 1532; X86-NEXT: movl %ebx, %eax 1533; X86-NEXT: sbbl %edx, %eax 1534; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 1535; X86-NEXT: setl %al 1536; X86-NEXT: cmpl %ebp, %ecx 1537; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 1538; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1539; X86-NEXT: sbbl %ebx, %edx 1540; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 1541; X86-NEXT: setl %ah 1542; X86-NEXT: subb %al, %ah 1543; X86-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 1544; X86-NEXT: cmpl %edi, %ecx 1545; X86-NEXT: movl %edx, %eax 1546; X86-NEXT: sbbl %esi, %eax 1547; X86-NEXT: setl %al 1548; X86-NEXT: cmpl %ecx, %edi 1549; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1550; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 1551; X86-NEXT: sbbl %edx, %esi 1552; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 1553; X86-NEXT: setl %dl 1554; X86-NEXT: subb %al, %dl 1555; X86-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 1556; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp 1557; X86-NEXT: cmpl %ebp, %edi 1558; X86-NEXT: movl %esi, %eax 1559; X86-NEXT: sbbl %ecx, %eax 1560; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 1561; X86-NEXT: setl %bl 1562; X86-NEXT: cmpl %edi, %ebp 1563; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1564; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 1565; X86-NEXT: sbbl %esi, %ecx 1566; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 1567; X86-NEXT: setl %cl 1568; X86-NEXT: subb %bl, %cl 1569; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 1570; X86-NEXT: cmpl %edx, %edi 1571; X86-NEXT: movl %esi, %ecx 1572; X86-NEXT: sbbl %eax, %ecx 1573; X86-NEXT: setl %bl 1574; X86-NEXT: cmpl %edi, %edx 1575; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 1576; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1577; X86-NEXT: sbbl %esi, %eax 1578; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1579; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 1580; X86-NEXT: setl %bh 1581; X86-NEXT: subb %bl, %bh 1582; X86-NEXT: movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 1583; X86-NEXT: cmpl %edx, %eax 1584; X86-NEXT: movl %esi, %edi 1585; X86-NEXT: sbbl %ecx, %edi 1586; X86-NEXT: setl %bl 1587; X86-NEXT: cmpl %eax, %edx 1588; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 1589; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1590; X86-NEXT: sbbl %esi, %ecx 1591; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1592; X86-NEXT: setl %bh 1593; X86-NEXT: subb %bl, %bh 1594; X86-NEXT: movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 1595; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 1596; X86-NEXT: cmpl %edx, %ecx 1597; X86-NEXT: movl %esi, %edi 1598; X86-NEXT: sbbl %eax, %edi 1599; X86-NEXT: setl %bl 1600; X86-NEXT: cmpl %ecx, %edx 1601; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1602; X86-NEXT: sbbl %esi, %eax 1603; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1604; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 1605; X86-NEXT: setl %bh 1606; X86-NEXT: subb %bl, %bh 1607; X86-NEXT: movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 1608; X86-NEXT: cmpl %ecx, %edx 1609; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 1610; X86-NEXT: movl %esi, %edi 1611; X86-NEXT: sbbl %eax, %edi 1612; X86-NEXT: setl %bl 1613; X86-NEXT: cmpl %edx, %ecx 1614; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1615; X86-NEXT: sbbl %esi, %eax 1616; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1617; X86-NEXT: setl %dl 1618; X86-NEXT: subb %bl, %dl 1619; X86-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 1620; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 1621; X86-NEXT: cmpl %ecx, %edx 1622; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 1623; X86-NEXT: movl %esi, %edi 1624; X86-NEXT: sbbl %eax, %edi 1625; X86-NEXT: setl %bl 1626; X86-NEXT: cmpl %edx, %ecx 1627; X86-NEXT: sbbl %esi, %eax 1628; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1629; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1630; X86-NEXT: setl %dl 1631; X86-NEXT: subb %bl, %dl 1632; X86-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 1633; X86-NEXT: cmpl %eax, %ecx 1634; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 1635; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 1636; X86-NEXT: movl %esi, %edi 1637; X86-NEXT: sbbl %edx, %edi 1638; X86-NEXT: setl %bl 1639; X86-NEXT: cmpl %ecx, %eax 1640; X86-NEXT: sbbl %esi, %edx 1641; X86-NEXT: setl %al 1642; X86-NEXT: subb %bl, %al 1643; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 1644; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp 1645; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1646; X86-NEXT: cmpl %ebp, %ecx 1647; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 1648; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 1649; X86-NEXT: movl %esi, %edi 1650; X86-NEXT: sbbl %edx, %edi 1651; X86-NEXT: setl %al 1652; X86-NEXT: cmpl %ecx, %ebp 1653; X86-NEXT: sbbl %esi, %edx 1654; X86-NEXT: setl %cl 1655; X86-NEXT: subb %al, %cl 1656; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 1657; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp 1658; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1659; X86-NEXT: cmpl %ebp, %ecx 1660; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 1661; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 1662; X86-NEXT: movl %esi, %edi 1663; X86-NEXT: sbbl %edx, %edi 1664; X86-NEXT: setl %al 1665; X86-NEXT: cmpl %ecx, %ebp 1666; X86-NEXT: sbbl %esi, %edx 1667; X86-NEXT: setl %cl 1668; X86-NEXT: subb %al, %cl 1669; X86-NEXT: movb %cl, (%esp) # 1-byte Spill 1670; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1671; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1672; X86-NEXT: cmpl %eax, %ecx 1673; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 1674; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 1675; X86-NEXT: movl %edi, %ebp 1676; X86-NEXT: sbbl %esi, %ebp 1677; X86-NEXT: setl %dl 1678; X86-NEXT: cmpl %ecx, %eax 1679; X86-NEXT: sbbl %edi, %esi 1680; X86-NEXT: setl %ch 1681; X86-NEXT: subb %dl, %ch 1682; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 1683; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 1684; X86-NEXT: cmpl %edx, %esi 1685; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 1686; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1687; X86-NEXT: movl %eax, %ebp 1688; X86-NEXT: sbbl %edi, %ebp 1689; X86-NEXT: setl %cl 1690; X86-NEXT: cmpl %esi, %edx 1691; X86-NEXT: sbbl %eax, %edi 1692; X86-NEXT: setl %dl 1693; X86-NEXT: subb %cl, %dl 1694; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx 1695; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 1696; X86-NEXT: cmpl %ebx, %esi 1697; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 1698; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1699; X86-NEXT: movl %eax, %ebp 1700; X86-NEXT: sbbl %edi, %ebp 1701; X86-NEXT: setl %dh 1702; X86-NEXT: cmpl %esi, %ebx 1703; X86-NEXT: sbbl %eax, %edi 1704; X86-NEXT: setl %cl 1705; X86-NEXT: subb %dh, %cl 1706; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1707; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 1708; X86-NEXT: cmpl %eax, %esi 1709; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 1710; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx 1711; X86-NEXT: movl %ebx, %ebp 1712; X86-NEXT: sbbl %edi, %ebp 1713; X86-NEXT: setl %dh 1714; X86-NEXT: cmpl %esi, %eax 1715; X86-NEXT: sbbl %ebx, %edi 1716; X86-NEXT: setl %bl 1717; X86-NEXT: subb %dh, %bl 1718; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1719; X86-NEXT: movb %bl, 15(%eax) 1720; X86-NEXT: movb %cl, 14(%eax) 1721; X86-NEXT: movb %dl, 13(%eax) 1722; X86-NEXT: movb %ch, 12(%eax) 1723; X86-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload 1724; X86-NEXT: movb %cl, 11(%eax) 1725; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload 1726; X86-NEXT: movb %cl, 10(%eax) 1727; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload 1728; X86-NEXT: movb %cl, 9(%eax) 1729; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload 1730; X86-NEXT: movb %cl, 8(%eax) 1731; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload 1732; X86-NEXT: movb %cl, 7(%eax) 1733; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload 1734; X86-NEXT: movb %cl, 6(%eax) 1735; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload 1736; X86-NEXT: movb %cl, 5(%eax) 1737; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload 1738; X86-NEXT: movb %cl, 4(%eax) 1739; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload 1740; X86-NEXT: movb %cl, 3(%eax) 1741; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload 1742; X86-NEXT: movb %cl, 2(%eax) 1743; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload 1744; X86-NEXT: movb %cl, 1(%eax) 1745; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload 1746; X86-NEXT: movb %cl, (%eax) 1747; X86-NEXT: addl $12, %esp 1748; X86-NEXT: popl %esi 1749; X86-NEXT: popl %edi 1750; X86-NEXT: popl %ebx 1751; X86-NEXT: popl %ebp 1752; X86-NEXT: retl $4 1753 %1 = call <16 x i8> @llvm.scmp(<16 x i64> %x, <16 x i64> %y) 1754 ret <16 x i8> %1 1755} 1756 1757define <7 x i117> @scmp_uncommon_vectors(<7 x i7> %x, <7 x i7> %y) nounwind { 1758; SSE2-LABEL: scmp_uncommon_vectors: 1759; SSE2: # %bb.0: 1760; SSE2-NEXT: pushq %rbp 1761; SSE2-NEXT: pushq %r15 1762; SSE2-NEXT: pushq %r14 1763; SSE2-NEXT: pushq %r13 1764; SSE2-NEXT: pushq %r12 1765; SSE2-NEXT: pushq %rbx 1766; SSE2-NEXT: movq %rdi, %rax 1767; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %edi 1768; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d 1769; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d 1770; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx 1771; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp 1772; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d 1773; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d 1774; SSE2-NEXT: addb %r15b, %r15b 1775; SSE2-NEXT: sarb %r15b 1776; SSE2-NEXT: addb %sil, %sil 1777; SSE2-NEXT: sarb %sil 1778; SSE2-NEXT: cmpb %r15b, %sil 1779; SSE2-NEXT: setl %sil 1780; SSE2-NEXT: setg %r15b 1781; SSE2-NEXT: subb %sil, %r15b 1782; SSE2-NEXT: movsbq %r15b, %rsi 1783; SSE2-NEXT: movq %rsi, (%rax) 1784; SSE2-NEXT: movq %rsi, %xmm0 1785; SSE2-NEXT: sarq $63, %rsi 1786; SSE2-NEXT: addb %r14b, %r14b 1787; SSE2-NEXT: sarb %r14b 1788; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d 1789; SSE2-NEXT: addb %r15b, %r15b 1790; SSE2-NEXT: sarb %r15b 1791; SSE2-NEXT: cmpb %r14b, %r15b 1792; SSE2-NEXT: setl %r14b 1793; SSE2-NEXT: setg %r15b 1794; SSE2-NEXT: subb %r14b, %r15b 1795; SSE2-NEXT: movsbq %r15b, %r14 1796; SSE2-NEXT: movq %r14, %r15 1797; SSE2-NEXT: sarq $63, %r15 1798; SSE2-NEXT: addb %bpl, %bpl 1799; SSE2-NEXT: sarb %bpl 1800; SSE2-NEXT: addb %dl, %dl 1801; SSE2-NEXT: sarb %dl 1802; SSE2-NEXT: cmpb %bpl, %dl 1803; SSE2-NEXT: setl %dl 1804; SSE2-NEXT: setg %bpl 1805; SSE2-NEXT: subb %dl, %bpl 1806; SSE2-NEXT: movsbq %bpl, %rdx 1807; SSE2-NEXT: movq %rdx, %r12 1808; SSE2-NEXT: sarq $63, %r12 1809; SSE2-NEXT: addb %bl, %bl 1810; SSE2-NEXT: sarb %bl 1811; SSE2-NEXT: addb %cl, %cl 1812; SSE2-NEXT: sarb %cl 1813; SSE2-NEXT: cmpb %bl, %cl 1814; SSE2-NEXT: setl %cl 1815; SSE2-NEXT: setg %bl 1816; SSE2-NEXT: subb %cl, %bl 1817; SSE2-NEXT: movsbq %bl, %rbx 1818; SSE2-NEXT: movq %rbx, %rcx 1819; SSE2-NEXT: sarq $63, %rcx 1820; SSE2-NEXT: addb %r11b, %r11b 1821; SSE2-NEXT: sarb %r11b 1822; SSE2-NEXT: addb %r8b, %r8b 1823; SSE2-NEXT: sarb %r8b 1824; SSE2-NEXT: cmpb %r11b, %r8b 1825; SSE2-NEXT: setl %r8b 1826; SSE2-NEXT: setg %r11b 1827; SSE2-NEXT: subb %r8b, %r11b 1828; SSE2-NEXT: movsbq %r11b, %r8 1829; SSE2-NEXT: movq %r8, %r11 1830; SSE2-NEXT: sarq $63, %r11 1831; SSE2-NEXT: addb %r10b, %r10b 1832; SSE2-NEXT: sarb %r10b 1833; SSE2-NEXT: addb %r9b, %r9b 1834; SSE2-NEXT: sarb %r9b 1835; SSE2-NEXT: cmpb %r10b, %r9b 1836; SSE2-NEXT: setl %r9b 1837; SSE2-NEXT: setg %r10b 1838; SSE2-NEXT: subb %r9b, %r10b 1839; SSE2-NEXT: movsbq %r10b, %r9 1840; SSE2-NEXT: movq %r9, %r10 1841; SSE2-NEXT: sarq $63, %r10 1842; SSE2-NEXT: addb %dil, %dil 1843; SSE2-NEXT: sarb %dil 1844; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp 1845; SSE2-NEXT: addb %bpl, %bpl 1846; SSE2-NEXT: sarb %bpl 1847; SSE2-NEXT: cmpb %dil, %bpl 1848; SSE2-NEXT: setl %dil 1849; SSE2-NEXT: setg %bpl 1850; SSE2-NEXT: subb %dil, %bpl 1851; SSE2-NEXT: movsbq %bpl, %rdi 1852; SSE2-NEXT: movq %rdi, %r13 1853; SSE2-NEXT: sarq $63, %r13 1854; SSE2-NEXT: movl %r13d, 96(%rax) 1855; SSE2-NEXT: movabsq $2251799813685247, %rbp # imm = 0x7FFFFFFFFFFFF 1856; SSE2-NEXT: andq %r13, %rbp 1857; SSE2-NEXT: shldq $62, %rdi, %r13 1858; SSE2-NEXT: movq %r13, 88(%rax) 1859; SSE2-NEXT: movq %r10, %r13 1860; SSE2-NEXT: shldq $20, %r9, %r13 1861; SSE2-NEXT: movq %r13, 64(%rax) 1862; SSE2-NEXT: movq %r11, %r13 1863; SSE2-NEXT: shldq $31, %r8, %r13 1864; SSE2-NEXT: movq %r13, 48(%rax) 1865; SSE2-NEXT: movq %rcx, %r13 1866; SSE2-NEXT: shldq $42, %rbx, %r13 1867; SSE2-NEXT: movq %r13, 32(%rax) 1868; SSE2-NEXT: movabsq $9007199254738944, %r13 # imm = 0x1FFFFFFFFFF800 1869; SSE2-NEXT: andq %r12, %r13 1870; SSE2-NEXT: shldq $53, %rdx, %r12 1871; SSE2-NEXT: movq %r12, 16(%rax) 1872; SSE2-NEXT: movq %rbp, %r12 1873; SSE2-NEXT: shrq $48, %r12 1874; SSE2-NEXT: movb %r12b, 102(%rax) 1875; SSE2-NEXT: shrq $32, %rbp 1876; SSE2-NEXT: movw %bp, 100(%rax) 1877; SSE2-NEXT: movabsq $9007199254740991, %r12 # imm = 0x1FFFFFFFFFFFFF 1878; SSE2-NEXT: andq %r12, %r15 1879; SSE2-NEXT: shldq $9, %r14, %r15 1880; SSE2-NEXT: shlq $62, %rdi 1881; SSE2-NEXT: orq %r15, %rdi 1882; SSE2-NEXT: movq %rdi, 80(%rax) 1883; SSE2-NEXT: shlq $42, %rbx 1884; SSE2-NEXT: shrq $11, %r13 1885; SSE2-NEXT: orq %rbx, %r13 1886; SSE2-NEXT: movq %r13, 24(%rax) 1887; SSE2-NEXT: shlq $9, %r14 1888; SSE2-NEXT: andl $511, %r10d # imm = 0x1FF 1889; SSE2-NEXT: orq %r14, %r10 1890; SSE2-NEXT: movq %r10, 72(%rax) 1891; SSE2-NEXT: shlq $20, %r9 1892; SSE2-NEXT: andl $1048575, %r11d # imm = 0xFFFFF 1893; SSE2-NEXT: orq %r9, %r11 1894; SSE2-NEXT: movq %r11, 56(%rax) 1895; SSE2-NEXT: shlq $31, %r8 1896; SSE2-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF 1897; SSE2-NEXT: orq %r8, %rcx 1898; SSE2-NEXT: movq %rcx, 40(%rax) 1899; SSE2-NEXT: movq %rsi, %xmm1 1900; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1901; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1902; SSE2-NEXT: movq %xmm0, %rcx 1903; SSE2-NEXT: andq %r12, %rcx 1904; SSE2-NEXT: shlq $53, %rdx 1905; SSE2-NEXT: orq %rcx, %rdx 1906; SSE2-NEXT: movq %rdx, 8(%rax) 1907; SSE2-NEXT: popq %rbx 1908; SSE2-NEXT: popq %r12 1909; SSE2-NEXT: popq %r13 1910; SSE2-NEXT: popq %r14 1911; SSE2-NEXT: popq %r15 1912; SSE2-NEXT: popq %rbp 1913; SSE2-NEXT: retq 1914; 1915; SSE4-LABEL: scmp_uncommon_vectors: 1916; SSE4: # %bb.0: 1917; SSE4-NEXT: pushq %rbp 1918; SSE4-NEXT: pushq %r15 1919; SSE4-NEXT: pushq %r14 1920; SSE4-NEXT: pushq %r13 1921; SSE4-NEXT: pushq %r12 1922; SSE4-NEXT: pushq %rbx 1923; SSE4-NEXT: movq %rdi, %rax 1924; SSE4-NEXT: movzbl {{[0-9]+}}(%rsp), %edi 1925; SSE4-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d 1926; SSE4-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d 1927; SSE4-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx 1928; SSE4-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp 1929; SSE4-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d 1930; SSE4-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d 1931; SSE4-NEXT: addb %r14b, %r14b 1932; SSE4-NEXT: sarb %r14b 1933; SSE4-NEXT: addb %sil, %sil 1934; SSE4-NEXT: sarb %sil 1935; SSE4-NEXT: cmpb %r14b, %sil 1936; SSE4-NEXT: setl %sil 1937; SSE4-NEXT: setg %r14b 1938; SSE4-NEXT: subb %sil, %r14b 1939; SSE4-NEXT: movsbq %r14b, %r14 1940; SSE4-NEXT: movq %r14, (%rax) 1941; SSE4-NEXT: sarq $63, %r14 1942; SSE4-NEXT: addb %r15b, %r15b 1943; SSE4-NEXT: sarb %r15b 1944; SSE4-NEXT: movzbl {{[0-9]+}}(%rsp), %esi 1945; SSE4-NEXT: addb %sil, %sil 1946; SSE4-NEXT: sarb %sil 1947; SSE4-NEXT: cmpb %r15b, %sil 1948; SSE4-NEXT: setl %sil 1949; SSE4-NEXT: setg %r15b 1950; SSE4-NEXT: subb %sil, %r15b 1951; SSE4-NEXT: movsbq %r15b, %rsi 1952; SSE4-NEXT: movq %rsi, %r15 1953; SSE4-NEXT: sarq $63, %r15 1954; SSE4-NEXT: addb %bpl, %bpl 1955; SSE4-NEXT: sarb %bpl 1956; SSE4-NEXT: addb %dl, %dl 1957; SSE4-NEXT: sarb %dl 1958; SSE4-NEXT: cmpb %bpl, %dl 1959; SSE4-NEXT: setl %dl 1960; SSE4-NEXT: setg %bpl 1961; SSE4-NEXT: subb %dl, %bpl 1962; SSE4-NEXT: movsbq %bpl, %r12 1963; SSE4-NEXT: movq %r12, %r13 1964; SSE4-NEXT: sarq $63, %r13 1965; SSE4-NEXT: addb %bl, %bl 1966; SSE4-NEXT: sarb %bl 1967; SSE4-NEXT: addb %cl, %cl 1968; SSE4-NEXT: sarb %cl 1969; SSE4-NEXT: cmpb %bl, %cl 1970; SSE4-NEXT: setl %cl 1971; SSE4-NEXT: setg %dl 1972; SSE4-NEXT: subb %cl, %dl 1973; SSE4-NEXT: movsbq %dl, %rbx 1974; SSE4-NEXT: movq %rbx, %rcx 1975; SSE4-NEXT: sarq $63, %rcx 1976; SSE4-NEXT: addb %r11b, %r11b 1977; SSE4-NEXT: sarb %r11b 1978; SSE4-NEXT: addb %r8b, %r8b 1979; SSE4-NEXT: sarb %r8b 1980; SSE4-NEXT: cmpb %r11b, %r8b 1981; SSE4-NEXT: setl %dl 1982; SSE4-NEXT: setg %r8b 1983; SSE4-NEXT: subb %dl, %r8b 1984; SSE4-NEXT: movsbq %r8b, %rdx 1985; SSE4-NEXT: movq %rdx, %r8 1986; SSE4-NEXT: sarq $63, %r8 1987; SSE4-NEXT: addb %r10b, %r10b 1988; SSE4-NEXT: sarb %r10b 1989; SSE4-NEXT: addb %r9b, %r9b 1990; SSE4-NEXT: sarb %r9b 1991; SSE4-NEXT: cmpb %r10b, %r9b 1992; SSE4-NEXT: setl %r9b 1993; SSE4-NEXT: setg %r10b 1994; SSE4-NEXT: subb %r9b, %r10b 1995; SSE4-NEXT: movsbq %r10b, %r9 1996; SSE4-NEXT: movq %r9, %r10 1997; SSE4-NEXT: sarq $63, %r10 1998; SSE4-NEXT: addb %dil, %dil 1999; SSE4-NEXT: sarb %dil 2000; SSE4-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d 2001; SSE4-NEXT: addb %r11b, %r11b 2002; SSE4-NEXT: sarb %r11b 2003; SSE4-NEXT: cmpb %dil, %r11b 2004; SSE4-NEXT: setl %dil 2005; SSE4-NEXT: setg %r11b 2006; SSE4-NEXT: subb %dil, %r11b 2007; SSE4-NEXT: movsbq %r11b, %rdi 2008; SSE4-NEXT: movq %rdi, %rbp 2009; SSE4-NEXT: sarq $63, %rbp 2010; SSE4-NEXT: movl %ebp, 96(%rax) 2011; SSE4-NEXT: movabsq $2251799813685247, %r11 # imm = 0x7FFFFFFFFFFFF 2012; SSE4-NEXT: andq %rbp, %r11 2013; SSE4-NEXT: shldq $62, %rdi, %rbp 2014; SSE4-NEXT: movq %rbp, 88(%rax) 2015; SSE4-NEXT: movq %r10, %rbp 2016; SSE4-NEXT: shldq $20, %r9, %rbp 2017; SSE4-NEXT: movq %rbp, 64(%rax) 2018; SSE4-NEXT: movq %r8, %rbp 2019; SSE4-NEXT: shldq $31, %rdx, %rbp 2020; SSE4-NEXT: movq %rbp, 48(%rax) 2021; SSE4-NEXT: movq %rcx, %rbp 2022; SSE4-NEXT: shldq $42, %rbx, %rbp 2023; SSE4-NEXT: movq %rbp, 32(%rax) 2024; SSE4-NEXT: movabsq $9007199254738944, %rbp # imm = 0x1FFFFFFFFFF800 2025; SSE4-NEXT: andq %r13, %rbp 2026; SSE4-NEXT: shldq $53, %r12, %r13 2027; SSE4-NEXT: movq %r13, 16(%rax) 2028; SSE4-NEXT: movq %r11, %r13 2029; SSE4-NEXT: shrq $48, %r13 2030; SSE4-NEXT: movb %r13b, 102(%rax) 2031; SSE4-NEXT: shrq $32, %r11 2032; SSE4-NEXT: movw %r11w, 100(%rax) 2033; SSE4-NEXT: movabsq $9007199254740991, %r11 # imm = 0x1FFFFFFFFFFFFF 2034; SSE4-NEXT: andq %r11, %r15 2035; SSE4-NEXT: shldq $9, %rsi, %r15 2036; SSE4-NEXT: shlq $62, %rdi 2037; SSE4-NEXT: orq %r15, %rdi 2038; SSE4-NEXT: movq %rdi, 80(%rax) 2039; SSE4-NEXT: andq %r11, %r14 2040; SSE4-NEXT: shlq $53, %r12 2041; SSE4-NEXT: orq %r14, %r12 2042; SSE4-NEXT: movq %r12, 8(%rax) 2043; SSE4-NEXT: shlq $42, %rbx 2044; SSE4-NEXT: shrq $11, %rbp 2045; SSE4-NEXT: orq %rbx, %rbp 2046; SSE4-NEXT: movq %rbp, 24(%rax) 2047; SSE4-NEXT: shlq $9, %rsi 2048; SSE4-NEXT: andl $511, %r10d # imm = 0x1FF 2049; SSE4-NEXT: orq %rsi, %r10 2050; SSE4-NEXT: movq %r10, 72(%rax) 2051; SSE4-NEXT: shlq $20, %r9 2052; SSE4-NEXT: andl $1048575, %r8d # imm = 0xFFFFF 2053; SSE4-NEXT: orq %r9, %r8 2054; SSE4-NEXT: movq %r8, 56(%rax) 2055; SSE4-NEXT: shlq $31, %rdx 2056; SSE4-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF 2057; SSE4-NEXT: orq %rdx, %rcx 2058; SSE4-NEXT: movq %rcx, 40(%rax) 2059; SSE4-NEXT: popq %rbx 2060; SSE4-NEXT: popq %r12 2061; SSE4-NEXT: popq %r13 2062; SSE4-NEXT: popq %r14 2063; SSE4-NEXT: popq %r15 2064; SSE4-NEXT: popq %rbp 2065; SSE4-NEXT: retq 2066; 2067; AVX-LABEL: scmp_uncommon_vectors: 2068; AVX: # %bb.0: 2069; AVX-NEXT: pushq %rbp 2070; AVX-NEXT: pushq %r15 2071; AVX-NEXT: pushq %r14 2072; AVX-NEXT: pushq %r13 2073; AVX-NEXT: pushq %r12 2074; AVX-NEXT: pushq %rbx 2075; AVX-NEXT: movq %rdi, %rax 2076; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %edi 2077; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d 2078; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d 2079; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx 2080; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp 2081; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d 2082; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d 2083; AVX-NEXT: addb %r14b, %r14b 2084; AVX-NEXT: sarb %r14b 2085; AVX-NEXT: addb %sil, %sil 2086; AVX-NEXT: sarb %sil 2087; AVX-NEXT: cmpb %r14b, %sil 2088; AVX-NEXT: setl %sil 2089; AVX-NEXT: setg %r14b 2090; AVX-NEXT: subb %sil, %r14b 2091; AVX-NEXT: movsbq %r14b, %r14 2092; AVX-NEXT: movq %r14, (%rax) 2093; AVX-NEXT: sarq $63, %r14 2094; AVX-NEXT: addb %r15b, %r15b 2095; AVX-NEXT: sarb %r15b 2096; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %esi 2097; AVX-NEXT: addb %sil, %sil 2098; AVX-NEXT: sarb %sil 2099; AVX-NEXT: cmpb %r15b, %sil 2100; AVX-NEXT: setl %sil 2101; AVX-NEXT: setg %r15b 2102; AVX-NEXT: subb %sil, %r15b 2103; AVX-NEXT: movsbq %r15b, %rsi 2104; AVX-NEXT: movq %rsi, %r12 2105; AVX-NEXT: sarq $63, %r12 2106; AVX-NEXT: addb %bpl, %bpl 2107; AVX-NEXT: sarb %bpl 2108; AVX-NEXT: addb %dl, %dl 2109; AVX-NEXT: sarb %dl 2110; AVX-NEXT: cmpb %bpl, %dl 2111; AVX-NEXT: setl %dl 2112; AVX-NEXT: setg %bpl 2113; AVX-NEXT: subb %dl, %bpl 2114; AVX-NEXT: movsbq %bpl, %r15 2115; AVX-NEXT: movq %r15, %r13 2116; AVX-NEXT: sarq $63, %r13 2117; AVX-NEXT: addb %bl, %bl 2118; AVX-NEXT: sarb %bl 2119; AVX-NEXT: addb %cl, %cl 2120; AVX-NEXT: sarb %cl 2121; AVX-NEXT: cmpb %bl, %cl 2122; AVX-NEXT: setl %cl 2123; AVX-NEXT: setg %dl 2124; AVX-NEXT: subb %cl, %dl 2125; AVX-NEXT: movsbq %dl, %rbx 2126; AVX-NEXT: movq %rbx, %rcx 2127; AVX-NEXT: sarq $63, %rcx 2128; AVX-NEXT: addb %r11b, %r11b 2129; AVX-NEXT: sarb %r11b 2130; AVX-NEXT: addb %r8b, %r8b 2131; AVX-NEXT: sarb %r8b 2132; AVX-NEXT: cmpb %r11b, %r8b 2133; AVX-NEXT: setl %dl 2134; AVX-NEXT: setg %r8b 2135; AVX-NEXT: subb %dl, %r8b 2136; AVX-NEXT: movsbq %r8b, %rdx 2137; AVX-NEXT: movq %rdx, %r8 2138; AVX-NEXT: sarq $63, %r8 2139; AVX-NEXT: addb %r10b, %r10b 2140; AVX-NEXT: sarb %r10b 2141; AVX-NEXT: addb %r9b, %r9b 2142; AVX-NEXT: sarb %r9b 2143; AVX-NEXT: cmpb %r10b, %r9b 2144; AVX-NEXT: setl %r9b 2145; AVX-NEXT: setg %r10b 2146; AVX-NEXT: subb %r9b, %r10b 2147; AVX-NEXT: movsbq %r10b, %r9 2148; AVX-NEXT: movq %r9, %r10 2149; AVX-NEXT: sarq $63, %r10 2150; AVX-NEXT: addb %dil, %dil 2151; AVX-NEXT: sarb %dil 2152; AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d 2153; AVX-NEXT: addb %r11b, %r11b 2154; AVX-NEXT: sarb %r11b 2155; AVX-NEXT: cmpb %dil, %r11b 2156; AVX-NEXT: setl %dil 2157; AVX-NEXT: setg %r11b 2158; AVX-NEXT: subb %dil, %r11b 2159; AVX-NEXT: movsbq %r11b, %rdi 2160; AVX-NEXT: movq %rdi, %rbp 2161; AVX-NEXT: sarq $63, %rbp 2162; AVX-NEXT: movl %ebp, 96(%rax) 2163; AVX-NEXT: movb $51, %r11b 2164; AVX-NEXT: bzhiq %r11, %rbp, %r11 2165; AVX-NEXT: shldq $62, %rdi, %rbp 2166; AVX-NEXT: movq %rbp, 88(%rax) 2167; AVX-NEXT: movq %r10, %rbp 2168; AVX-NEXT: shldq $20, %r9, %rbp 2169; AVX-NEXT: movq %rbp, 64(%rax) 2170; AVX-NEXT: movq %r8, %rbp 2171; AVX-NEXT: shldq $31, %rdx, %rbp 2172; AVX-NEXT: movq %rbp, 48(%rax) 2173; AVX-NEXT: movq %rcx, %rbp 2174; AVX-NEXT: shldq $42, %rbx, %rbp 2175; AVX-NEXT: movq %rbp, 32(%rax) 2176; AVX-NEXT: movb $42, %bpl 2177; AVX-NEXT: bzhiq %rbp, %r13, %rbp 2178; AVX-NEXT: shldq $53, %r15, %r13 2179; AVX-NEXT: movq %r13, 16(%rax) 2180; AVX-NEXT: movq %r11, %r13 2181; AVX-NEXT: shrq $48, %r13 2182; AVX-NEXT: movb %r13b, 102(%rax) 2183; AVX-NEXT: shrq $32, %r11 2184; AVX-NEXT: movw %r11w, 100(%rax) 2185; AVX-NEXT: movb $53, %r11b 2186; AVX-NEXT: bzhiq %r11, %r12, %r12 2187; AVX-NEXT: shldq $9, %rsi, %r12 2188; AVX-NEXT: shlq $62, %rdi 2189; AVX-NEXT: orq %r12, %rdi 2190; AVX-NEXT: movq %rdi, 80(%rax) 2191; AVX-NEXT: shlq $42, %rbx 2192; AVX-NEXT: orq %rbp, %rbx 2193; AVX-NEXT: movq %rbx, 24(%rax) 2194; AVX-NEXT: bzhiq %r11, %r14, %rdi 2195; AVX-NEXT: shlq $53, %r15 2196; AVX-NEXT: orq %rdi, %r15 2197; AVX-NEXT: movq %r15, 8(%rax) 2198; AVX-NEXT: shlq $9, %rsi 2199; AVX-NEXT: andl $511, %r10d # imm = 0x1FF 2200; AVX-NEXT: orq %rsi, %r10 2201; AVX-NEXT: movq %r10, 72(%rax) 2202; AVX-NEXT: shlq $20, %r9 2203; AVX-NEXT: andl $1048575, %r8d # imm = 0xFFFFF 2204; AVX-NEXT: orq %r9, %r8 2205; AVX-NEXT: movq %r8, 56(%rax) 2206; AVX-NEXT: shlq $31, %rdx 2207; AVX-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF 2208; AVX-NEXT: orq %rdx, %rcx 2209; AVX-NEXT: movq %rcx, 40(%rax) 2210; AVX-NEXT: popq %rbx 2211; AVX-NEXT: popq %r12 2212; AVX-NEXT: popq %r13 2213; AVX-NEXT: popq %r14 2214; AVX-NEXT: popq %r15 2215; AVX-NEXT: popq %rbp 2216; AVX-NEXT: retq 2217; 2218; X86-LABEL: scmp_uncommon_vectors: 2219; X86: # %bb.0: 2220; X86-NEXT: pushl %ebp 2221; X86-NEXT: pushl %ebx 2222; X86-NEXT: pushl %edi 2223; X86-NEXT: pushl %esi 2224; X86-NEXT: subl $52, %esp 2225; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2226; X86-NEXT: addb %al, %al 2227; X86-NEXT: sarb %al 2228; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 2229; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2230; X86-NEXT: addb %al, %al 2231; X86-NEXT: sarb %al 2232; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 2233; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2234; X86-NEXT: addb %al, %al 2235; X86-NEXT: sarb %al 2236; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 2237; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2238; X86-NEXT: addb %al, %al 2239; X86-NEXT: sarb %al 2240; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 2241; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2242; X86-NEXT: addb %al, %al 2243; X86-NEXT: sarb %al 2244; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 2245; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2246; X86-NEXT: addb %al, %al 2247; X86-NEXT: sarb %al 2248; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill 2249; X86-NEXT: movb {{[0-9]+}}(%esp), %dh 2250; X86-NEXT: addb %dh, %dh 2251; X86-NEXT: sarb %dh 2252; X86-NEXT: movb {{[0-9]+}}(%esp), %dl 2253; X86-NEXT: addb %dl, %dl 2254; X86-NEXT: sarb %dl 2255; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2256; X86-NEXT: addb %al, %al 2257; X86-NEXT: sarb %al 2258; X86-NEXT: movb {{[0-9]+}}(%esp), %ah 2259; X86-NEXT: addb %ah, %ah 2260; X86-NEXT: sarb %ah 2261; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 2262; X86-NEXT: addb %cl, %cl 2263; X86-NEXT: sarb %cl 2264; X86-NEXT: movb {{[0-9]+}}(%esp), %ch 2265; X86-NEXT: addb %ch, %ch 2266; X86-NEXT: sarb %ch 2267; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx 2268; X86-NEXT: addb %bl, %bl 2269; X86-NEXT: sarb %bl 2270; X86-NEXT: movb {{[0-9]+}}(%esp), %bh 2271; X86-NEXT: addb %bh, %bh 2272; X86-NEXT: sarb %bh 2273; X86-NEXT: cmpb %bl, %bh 2274; X86-NEXT: setl %bl 2275; X86-NEXT: setg %bh 2276; X86-NEXT: subb %bl, %bh 2277; X86-NEXT: movsbl %bh, %esi 2278; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 2279; X86-NEXT: sarl $31, %esi 2280; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 2281; X86-NEXT: andl $2097151, %esi # imm = 0x1FFFFF 2282; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 2283; X86-NEXT: cmpb %cl, %ch 2284; X86-NEXT: setl %cl 2285; X86-NEXT: setg %ch 2286; X86-NEXT: subb %cl, %ch 2287; X86-NEXT: movsbl %ch, %ecx 2288; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 2289; X86-NEXT: sarl $31, %ecx 2290; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 2291; X86-NEXT: andl $2097151, %ecx # imm = 0x1FFFFF 2292; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 2293; X86-NEXT: cmpb %al, %ah 2294; X86-NEXT: setl %al 2295; X86-NEXT: setg %cl 2296; X86-NEXT: subb %al, %cl 2297; X86-NEXT: movsbl %cl, %ecx 2298; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 2299; X86-NEXT: movl %ecx, (%edi) 2300; X86-NEXT: sarl $31, %ecx 2301; X86-NEXT: movl %ecx, %eax 2302; X86-NEXT: andl $2097151, %eax # imm = 0x1FFFFF 2303; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 2304; X86-NEXT: cmpb %dh, %dl 2305; X86-NEXT: setl %al 2306; X86-NEXT: setg %dl 2307; X86-NEXT: subb %al, %dl 2308; X86-NEXT: movsbl %dl, %ebp 2309; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 2310; X86-NEXT: sarl $31, %ebp 2311; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload 2312; X86-NEXT: cmpb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload 2313; X86-NEXT: setl %al 2314; X86-NEXT: setg %dl 2315; X86-NEXT: subb %al, %dl 2316; X86-NEXT: movsbl %dl, %esi 2317; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 2318; X86-NEXT: sarl $31, %esi 2319; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload 2320; X86-NEXT: cmpb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload 2321; X86-NEXT: setl %al 2322; X86-NEXT: setg %dl 2323; X86-NEXT: subb %al, %dl 2324; X86-NEXT: movsbl %dl, %eax 2325; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 2326; X86-NEXT: sarl $31, %eax 2327; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload 2328; X86-NEXT: cmpb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Folded Reload 2329; X86-NEXT: setl %dl 2330; X86-NEXT: setg %dh 2331; X86-NEXT: subb %dl, %dh 2332; X86-NEXT: movsbl %dh, %ebx 2333; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 2334; X86-NEXT: sarl $31, %ebx 2335; X86-NEXT: movl %ebx, 96(%edi) 2336; X86-NEXT: movl %ebx, 92(%edi) 2337; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload 2338; X86-NEXT: movl %edx, 80(%edi) 2339; X86-NEXT: movl %eax, 68(%edi) 2340; X86-NEXT: movl %eax, 64(%edi) 2341; X86-NEXT: movl %esi, 52(%edi) 2342; X86-NEXT: movl %esi, 48(%edi) 2343; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload 2344; X86-NEXT: movl %edx, 36(%edi) 2345; X86-NEXT: movl %ebp, 24(%edi) 2346; X86-NEXT: movl %ebp, 20(%edi) 2347; X86-NEXT: movl %ecx, 8(%edi) 2348; X86-NEXT: movl %ecx, 4(%edi) 2349; X86-NEXT: movl %ebx, %ecx 2350; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 2351; X86-NEXT: movw %cx, 100(%edi) 2352; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload 2353; X86-NEXT: shldl $30, %edx, %ecx 2354; X86-NEXT: movl %ecx, 88(%edi) 2355; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 2356; X86-NEXT: shldl $9, %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill 2357; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload 2358; X86-NEXT: shldl $9, %edx, %ecx 2359; X86-NEXT: movl %ecx, 76(%edi) 2360; X86-NEXT: movl %eax, %ecx 2361; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload 2362; X86-NEXT: shldl $20, %edx, %ecx 2363; X86-NEXT: movl %ecx, 60(%edi) 2364; X86-NEXT: movl %esi, %ecx 2365; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload 2366; X86-NEXT: shldl $31, %edx, %ecx 2367; X86-NEXT: movl %ecx, 44(%edi) 2368; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 2369; X86-NEXT: shldl $10, %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill 2370; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload 2371; X86-NEXT: shldl $10, %edx, %ecx 2372; X86-NEXT: movl %ecx, 32(%edi) 2373; X86-NEXT: movl %ebp, %ecx 2374; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload 2375; X86-NEXT: shldl $21, %ebx, %ecx 2376; X86-NEXT: movl %ecx, 16(%edi) 2377; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 2378; X86-NEXT: shll $9, %ecx 2379; X86-NEXT: andl $511, %eax # imm = 0x1FF 2380; X86-NEXT: orl %ecx, %eax 2381; X86-NEXT: movl %eax, 72(%edi) 2382; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 2383; X86-NEXT: shll $20, %eax 2384; X86-NEXT: andl $1048575, %esi # imm = 0xFFFFF 2385; X86-NEXT: orl %eax, %esi 2386; X86-NEXT: movl %esi, 56(%edi) 2387; X86-NEXT: shll $10, %edx 2388; X86-NEXT: andl $1023, %ebp # imm = 0x3FF 2389; X86-NEXT: orl %edx, %ebp 2390; X86-NEXT: movl %ebp, 28(%edi) 2391; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 2392; X86-NEXT: shll $21, %eax 2393; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload 2394; X86-NEXT: movl %eax, 12(%edi) 2395; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 2396; X86-NEXT: andl $7, %eax 2397; X86-NEXT: movb %al, 102(%edi) 2398; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 2399; X86-NEXT: shll $30, %eax 2400; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload 2401; X86-NEXT: movl %eax, 84(%edi) 2402; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 2403; X86-NEXT: shll $31, %eax 2404; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload 2405; X86-NEXT: movl %eax, 40(%edi) 2406; X86-NEXT: movl %edi, %eax 2407; X86-NEXT: addl $52, %esp 2408; X86-NEXT: popl %esi 2409; X86-NEXT: popl %edi 2410; X86-NEXT: popl %ebx 2411; X86-NEXT: popl %ebp 2412; X86-NEXT: retl $4 2413 %1 = call <7 x i117> @llvm.scmp(<7 x i7> %x, <7 x i7> %y) 2414 ret <7 x i117> %1 2415} 2416 2417define <1 x i3> @scmp_scalarize(<1 x i33> %x, <1 x i33> %y) nounwind { 2418; X64-LABEL: scmp_scalarize: 2419; X64: # %bb.0: 2420; X64-NEXT: shlq $31, %rsi 2421; X64-NEXT: sarq $31, %rsi 2422; X64-NEXT: shlq $31, %rdi 2423; X64-NEXT: sarq $31, %rdi 2424; X64-NEXT: cmpq %rsi, %rdi 2425; X64-NEXT: setl %cl 2426; X64-NEXT: setg %al 2427; X64-NEXT: subb %cl, %al 2428; X64-NEXT: retq 2429; 2430; X86-LABEL: scmp_scalarize: 2431; X86: # %bb.0: 2432; X86-NEXT: pushl %ebx 2433; X86-NEXT: pushl %edi 2434; X86-NEXT: pushl %esi 2435; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 2436; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 2437; X86-NEXT: andl $1, %eax 2438; X86-NEXT: negl %eax 2439; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 2440; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 2441; X86-NEXT: andl $1, %esi 2442; X86-NEXT: negl %esi 2443; X86-NEXT: cmpl %ecx, %edx 2444; X86-NEXT: movl %esi, %edi 2445; X86-NEXT: sbbl %eax, %edi 2446; X86-NEXT: setl %bl 2447; X86-NEXT: cmpl %edx, %ecx 2448; X86-NEXT: sbbl %esi, %eax 2449; X86-NEXT: setl %al 2450; X86-NEXT: subb %bl, %al 2451; X86-NEXT: popl %esi 2452; X86-NEXT: popl %edi 2453; X86-NEXT: popl %ebx 2454; X86-NEXT: retl 2455 %1 = call <1 x i3> @llvm.scmp(<1 x i33> %x, <1 x i33> %y) 2456 ret <1 x i3> %1 2457} 2458 2459define <2 x i8> @scmp_bool_operands(<2 x i1> %x, <2 x i1> %y) nounwind { 2460; SSE2-LABEL: scmp_bool_operands: 2461; SSE2: # %bb.0: 2462; SSE2-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) 2463; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 2464; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 2465; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx 2466; SSE2-NEXT: andb $1, %al 2467; SSE2-NEXT: negb %al 2468; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx 2469; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi 2470; SSE2-NEXT: andb $1, %dl 2471; SSE2-NEXT: negb %dl 2472; SSE2-NEXT: cmpb %al, %dl 2473; SSE2-NEXT: setl %al 2474; SSE2-NEXT: setg %dl 2475; SSE2-NEXT: subb %al, %dl 2476; SSE2-NEXT: movzbl %dl, %eax 2477; SSE2-NEXT: andb $1, %cl 2478; SSE2-NEXT: negb %cl 2479; SSE2-NEXT: andb $1, %sil 2480; SSE2-NEXT: negb %sil 2481; SSE2-NEXT: cmpb %cl, %sil 2482; SSE2-NEXT: setl %cl 2483; SSE2-NEXT: setg %dl 2484; SSE2-NEXT: subb %cl, %dl 2485; SSE2-NEXT: movzbl %dl, %ecx 2486; SSE2-NEXT: shll $8, %ecx 2487; SSE2-NEXT: orl %eax, %ecx 2488; SSE2-NEXT: movd %ecx, %xmm0 2489; SSE2-NEXT: retq 2490; 2491; SSE4-LABEL: scmp_bool_operands: 2492; SSE4: # %bb.0: 2493; SSE4-NEXT: pextrb $8, %xmm1, %eax 2494; SSE4-NEXT: andb $1, %al 2495; SSE4-NEXT: negb %al 2496; SSE4-NEXT: pextrb $8, %xmm0, %ecx 2497; SSE4-NEXT: andb $1, %cl 2498; SSE4-NEXT: negb %cl 2499; SSE4-NEXT: cmpb %al, %cl 2500; SSE4-NEXT: setl %al 2501; SSE4-NEXT: setg %cl 2502; SSE4-NEXT: subb %al, %cl 2503; SSE4-NEXT: movzbl %cl, %eax 2504; SSE4-NEXT: movd %xmm1, %ecx 2505; SSE4-NEXT: andb $1, %cl 2506; SSE4-NEXT: negb %cl 2507; SSE4-NEXT: movd %xmm0, %edx 2508; SSE4-NEXT: andb $1, %dl 2509; SSE4-NEXT: negb %dl 2510; SSE4-NEXT: cmpb %cl, %dl 2511; SSE4-NEXT: setl %cl 2512; SSE4-NEXT: setg %dl 2513; SSE4-NEXT: subb %cl, %dl 2514; SSE4-NEXT: movzbl %dl, %ecx 2515; SSE4-NEXT: movd %ecx, %xmm0 2516; SSE4-NEXT: pinsrb $1, %eax, %xmm0 2517; SSE4-NEXT: retq 2518; 2519; AVX2-LABEL: scmp_bool_operands: 2520; AVX2: # %bb.0: 2521; AVX2-NEXT: vpextrb $8, %xmm1, %eax 2522; AVX2-NEXT: andb $1, %al 2523; AVX2-NEXT: vpextrb $8, %xmm0, %ecx 2524; AVX2-NEXT: negb %al 2525; AVX2-NEXT: andb $1, %cl 2526; AVX2-NEXT: negb %cl 2527; AVX2-NEXT: cmpb %al, %cl 2528; AVX2-NEXT: setl %al 2529; AVX2-NEXT: setg %cl 2530; AVX2-NEXT: subb %al, %cl 2531; AVX2-NEXT: vmovd %xmm1, %eax 2532; AVX2-NEXT: andb $1, %al 2533; AVX2-NEXT: negb %al 2534; AVX2-NEXT: vmovd %xmm0, %edx 2535; AVX2-NEXT: andb $1, %dl 2536; AVX2-NEXT: negb %dl 2537; AVX2-NEXT: cmpb %al, %dl 2538; AVX2-NEXT: setl %al 2539; AVX2-NEXT: setg %dl 2540; AVX2-NEXT: subb %al, %dl 2541; AVX2-NEXT: vmovd %edx, %xmm0 2542; AVX2-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 2543; AVX2-NEXT: retq 2544; 2545; AVX512-LABEL: scmp_bool_operands: 2546; AVX512: # %bb.0: 2547; AVX512-NEXT: vpsllq $63, %xmm0, %xmm0 2548; AVX512-NEXT: vpmovq2m %xmm0, %k0 2549; AVX512-NEXT: kshiftrb $1, %k0, %k1 2550; AVX512-NEXT: kmovd %k1, %eax 2551; AVX512-NEXT: vpsllq $63, %xmm1, %xmm0 2552; AVX512-NEXT: vpmovq2m %xmm0, %k1 2553; AVX512-NEXT: kshiftrb $1, %k1, %k2 2554; AVX512-NEXT: kmovd %k2, %ecx 2555; AVX512-NEXT: andb $1, %cl 2556; AVX512-NEXT: negb %cl 2557; AVX512-NEXT: andb $1, %al 2558; AVX512-NEXT: negb %al 2559; AVX512-NEXT: cmpb %cl, %al 2560; AVX512-NEXT: setl %al 2561; AVX512-NEXT: setg %cl 2562; AVX512-NEXT: subb %al, %cl 2563; AVX512-NEXT: kmovd %k1, %eax 2564; AVX512-NEXT: andb $1, %al 2565; AVX512-NEXT: negb %al 2566; AVX512-NEXT: kmovd %k0, %edx 2567; AVX512-NEXT: andb $1, %dl 2568; AVX512-NEXT: negb %dl 2569; AVX512-NEXT: cmpb %al, %dl 2570; AVX512-NEXT: setl %al 2571; AVX512-NEXT: setg %dl 2572; AVX512-NEXT: subb %al, %dl 2573; AVX512-NEXT: vmovd %edx, %xmm0 2574; AVX512-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 2575; AVX512-NEXT: retq 2576; 2577; X86-LABEL: scmp_bool_operands: 2578; X86: # %bb.0: 2579; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 2580; X86-NEXT: andb $1, %cl 2581; X86-NEXT: negb %cl 2582; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx 2583; X86-NEXT: andb $1, %dl 2584; X86-NEXT: negb %dl 2585; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2586; X86-NEXT: andb $1, %al 2587; X86-NEXT: negb %al 2588; X86-NEXT: movb {{[0-9]+}}(%esp), %ah 2589; X86-NEXT: andb $1, %ah 2590; X86-NEXT: negb %ah 2591; X86-NEXT: cmpb %al, %ah 2592; X86-NEXT: setl %ah 2593; X86-NEXT: setg %al 2594; X86-NEXT: subb %ah, %al 2595; X86-NEXT: cmpb %cl, %dl 2596; X86-NEXT: setl %cl 2597; X86-NEXT: setg %dl 2598; X86-NEXT: subb %cl, %dl 2599; X86-NEXT: retl 2600 %1 = call <2 x i8> @llvm.scmp(<2 x i1> %x, <2 x i1> %y) 2601 ret <2 x i8> %1 2602} 2603 2604define <2 x i16> @scmp_ret_wider_than_operands(<2 x i8> %x, <2 x i8> %y) nounwind { 2605; SSE2-LABEL: scmp_ret_wider_than_operands: 2606; SSE2: # %bb.0: 2607; SSE2-NEXT: movd %xmm1, %eax 2608; SSE2-NEXT: movl %eax, %ecx 2609; SSE2-NEXT: shrl $8, %ecx 2610; SSE2-NEXT: movd %xmm0, %edx 2611; SSE2-NEXT: movl %edx, %esi 2612; SSE2-NEXT: shrl $8, %esi 2613; SSE2-NEXT: cmpb %cl, %sil 2614; SSE2-NEXT: setl %cl 2615; SSE2-NEXT: setg %sil 2616; SSE2-NEXT: subb %cl, %sil 2617; SSE2-NEXT: movsbl %sil, %ecx 2618; SSE2-NEXT: cmpb %al, %dl 2619; SSE2-NEXT: setl %al 2620; SSE2-NEXT: setg %dl 2621; SSE2-NEXT: subb %al, %dl 2622; SSE2-NEXT: movsbl %dl, %eax 2623; SSE2-NEXT: movd %eax, %xmm0 2624; SSE2-NEXT: pinsrw $1, %ecx, %xmm0 2625; SSE2-NEXT: retq 2626; 2627; SSE4-LABEL: scmp_ret_wider_than_operands: 2628; SSE4: # %bb.0: 2629; SSE4-NEXT: pextrb $1, %xmm1, %eax 2630; SSE4-NEXT: pextrb $1, %xmm0, %ecx 2631; SSE4-NEXT: cmpb %al, %cl 2632; SSE4-NEXT: setl %al 2633; SSE4-NEXT: setg %cl 2634; SSE4-NEXT: subb %al, %cl 2635; SSE4-NEXT: movsbl %cl, %eax 2636; SSE4-NEXT: movd %xmm1, %ecx 2637; SSE4-NEXT: movd %xmm0, %edx 2638; SSE4-NEXT: cmpb %cl, %dl 2639; SSE4-NEXT: setl %cl 2640; SSE4-NEXT: setg %dl 2641; SSE4-NEXT: subb %cl, %dl 2642; SSE4-NEXT: movsbl %dl, %ecx 2643; SSE4-NEXT: movd %ecx, %xmm0 2644; SSE4-NEXT: pinsrw $1, %eax, %xmm0 2645; SSE4-NEXT: retq 2646; 2647; AVX-LABEL: scmp_ret_wider_than_operands: 2648; AVX: # %bb.0: 2649; AVX-NEXT: vpextrb $1, %xmm1, %eax 2650; AVX-NEXT: vpextrb $1, %xmm0, %ecx 2651; AVX-NEXT: cmpb %al, %cl 2652; AVX-NEXT: setl %al 2653; AVX-NEXT: setg %cl 2654; AVX-NEXT: subb %al, %cl 2655; AVX-NEXT: movsbl %cl, %eax 2656; AVX-NEXT: vmovd %xmm1, %ecx 2657; AVX-NEXT: vmovd %xmm0, %edx 2658; AVX-NEXT: cmpb %cl, %dl 2659; AVX-NEXT: setl %cl 2660; AVX-NEXT: setg %dl 2661; AVX-NEXT: subb %cl, %dl 2662; AVX-NEXT: movsbl %dl, %ecx 2663; AVX-NEXT: vmovd %ecx, %xmm0 2664; AVX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 2665; AVX-NEXT: retq 2666; 2667; X86-LABEL: scmp_ret_wider_than_operands: 2668; X86: # %bb.0: 2669; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 2670; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2671; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al 2672; X86-NEXT: setl %al 2673; X86-NEXT: setg %dl 2674; X86-NEXT: subb %al, %dl 2675; X86-NEXT: movsbl %dl, %eax 2676; X86-NEXT: cmpb {{[0-9]+}}(%esp), %cl 2677; X86-NEXT: setl %cl 2678; X86-NEXT: setg %dl 2679; X86-NEXT: subb %cl, %dl 2680; X86-NEXT: movsbl %dl, %edx 2681; X86-NEXT: # kill: def $ax killed $ax killed $eax 2682; X86-NEXT: # kill: def $dx killed $dx killed $edx 2683; X86-NEXT: retl 2684 %1 = call <2 x i16> @llvm.scmp(<2 x i8> %x, <2 x i8> %y) 2685 ret <2 x i16> %1 2686} 2687 2688