1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefixes=X64,SSE 3; RUN: llc < %s -mtriple=x86_64-linux -mattr=avx | FileCheck %s --check-prefixes=X64,AVX,AVX1 4; RUN: llc < %s -mtriple=x86_64-linux -mattr=avx2 | FileCheck %s --check-prefixes=X64,AVX,AVX2 5; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86 6 7declare i8 @llvm.smax.i8(i8, i8) 8declare i16 @llvm.smax.i16(i16, i16) 9declare i24 @llvm.smax.i24(i24, i24) 10declare i32 @llvm.smax.i32(i32, i32) 11declare i64 @llvm.smax.i64(i64, i64) 12declare i128 @llvm.smax.i128(i128, i128) 13 14declare <1 x i32> @llvm.smax.v1i32(<1 x i32>, <1 x i32>) 15declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>) 16declare <3 x i32> @llvm.smax.v3i32(<3 x i32>, <3 x i32>) 17declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>) 18declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>) 19 20declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>) 21declare <16 x i8> @llvm.smax.v16i8(<16 x i8>, <16 x i8>) 22 23define i8 @test_i8(i8 %a, i8 %b) nounwind { 24; X64-LABEL: test_i8: 25; X64: # %bb.0: 26; X64-NEXT: movl %esi, %eax 27; X64-NEXT: cmpb %al, %dil 28; X64-NEXT: cmovgl %edi, %eax 29; X64-NEXT: # kill: def $al killed $al killed $eax 30; X64-NEXT: retq 31; 32; X86-LABEL: test_i8: 33; X86: # %bb.0: 34; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 35; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 36; X86-NEXT: cmpb %al, %cl 37; X86-NEXT: cmovgl %ecx, %eax 38; X86-NEXT: # kill: def $al killed $al killed $eax 39; X86-NEXT: retl 40 %r = call i8 @llvm.smax.i8(i8 %a, i8 %b) 41 ret i8 %r 42} 43 44define i16 @test_i16(i16 %a, i16 %b) nounwind { 45; X64-LABEL: test_i16: 46; X64: # %bb.0: 47; X64-NEXT: movl %esi, %eax 48; X64-NEXT: cmpw %ax, %di 49; X64-NEXT: cmovgl %edi, %eax 50; X64-NEXT: # kill: def $ax killed $ax killed $eax 51; X64-NEXT: retq 52; 53; X86-LABEL: test_i16: 54; X86: # %bb.0: 55; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 56; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 57; X86-NEXT: cmpw %ax, %cx 58; X86-NEXT: cmovgl %ecx, %eax 59; X86-NEXT: # kill: def $ax killed $ax killed $eax 60; X86-NEXT: retl 61 %r = call i16 @llvm.smax.i16(i16 %a, i16 %b) 62 ret i16 %r 63} 64 65define i24 @test_i24(i24 %a, i24 %b) nounwind { 66; X64-LABEL: test_i24: 67; X64: # %bb.0: 68; X64-NEXT: movl %edi, %eax 69; X64-NEXT: shll $8, %esi 70; X64-NEXT: sarl $8, %esi 71; X64-NEXT: shll $8, %eax 72; X64-NEXT: sarl $8, %eax 73; X64-NEXT: cmpl %esi, %eax 74; X64-NEXT: cmovlel %esi, %eax 75; X64-NEXT: retq 76; 77; X86-LABEL: test_i24: 78; X86: # %bb.0: 79; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 80; X86-NEXT: shll $8, %ecx 81; X86-NEXT: sarl $8, %ecx 82; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 83; X86-NEXT: shll $8, %eax 84; X86-NEXT: sarl $8, %eax 85; X86-NEXT: cmpl %ecx, %eax 86; X86-NEXT: cmovlel %ecx, %eax 87; X86-NEXT: retl 88 %r = call i24 @llvm.smax.i24(i24 %a, i24 %b) 89 ret i24 %r 90} 91 92define i32 @test_i32(i32 %a, i32 %b) nounwind { 93; X64-LABEL: test_i32: 94; X64: # %bb.0: 95; X64-NEXT: movl %esi, %eax 96; X64-NEXT: cmpl %esi, %edi 97; X64-NEXT: cmovgl %edi, %eax 98; X64-NEXT: retq 99; 100; X86-LABEL: test_i32: 101; X86: # %bb.0: 102; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 103; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 104; X86-NEXT: cmpl %eax, %ecx 105; X86-NEXT: cmovgl %ecx, %eax 106; X86-NEXT: retl 107 %r = call i32 @llvm.smax.i32(i32 %a, i32 %b) 108 ret i32 %r 109} 110 111define i64 @test_i64(i64 %a, i64 %b) nounwind { 112; X64-LABEL: test_i64: 113; X64: # %bb.0: 114; X64-NEXT: movq %rsi, %rax 115; X64-NEXT: cmpq %rsi, %rdi 116; X64-NEXT: cmovgq %rdi, %rax 117; X64-NEXT: retq 118; 119; X86-LABEL: test_i64: 120; X86: # %bb.0: 121; X86-NEXT: pushl %edi 122; X86-NEXT: pushl %esi 123; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 124; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 125; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 126; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 127; X86-NEXT: cmpl %ecx, %eax 128; X86-NEXT: movl %edx, %edi 129; X86-NEXT: sbbl %esi, %edi 130; X86-NEXT: cmovll %ecx, %eax 131; X86-NEXT: cmovll %esi, %edx 132; X86-NEXT: popl %esi 133; X86-NEXT: popl %edi 134; X86-NEXT: retl 135 %r = call i64 @llvm.smax.i64(i64 %a, i64 %b) 136 ret i64 %r 137} 138 139define i128 @test_i128(i128 %a, i128 %b) nounwind { 140; X64-LABEL: test_i128: 141; X64: # %bb.0: 142; X64-NEXT: movq %rdx, %rax 143; X64-NEXT: cmpq %rdi, %rdx 144; X64-NEXT: movq %rcx, %rdx 145; X64-NEXT: sbbq %rsi, %rdx 146; X64-NEXT: cmovlq %rdi, %rax 147; X64-NEXT: cmovlq %rsi, %rcx 148; X64-NEXT: movq %rcx, %rdx 149; X64-NEXT: retq 150; 151; X86-LABEL: test_i128: 152; X86: # %bb.0: 153; X86-NEXT: pushl %ebp 154; X86-NEXT: pushl %ebx 155; X86-NEXT: pushl %edi 156; X86-NEXT: pushl %esi 157; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 158; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx 159; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 160; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 161; X86-NEXT: cmpl %ebx, %edx 162; X86-NEXT: movl %esi, %ebp 163; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebp 164; X86-NEXT: movl %edi, %ebp 165; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebp 166; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp 167; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 168; X86-NEXT: movl %ecx, %eax 169; X86-NEXT: sbbl %ebp, %eax 170; X86-NEXT: cmovll %ebx, %edx 171; X86-NEXT: cmovll {{[0-9]+}}(%esp), %esi 172; X86-NEXT: cmovll {{[0-9]+}}(%esp), %edi 173; X86-NEXT: cmovll %ebp, %ecx 174; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 175; X86-NEXT: movl %ecx, 12(%eax) 176; X86-NEXT: movl %edi, 8(%eax) 177; X86-NEXT: movl %esi, 4(%eax) 178; X86-NEXT: movl %edx, (%eax) 179; X86-NEXT: popl %esi 180; X86-NEXT: popl %edi 181; X86-NEXT: popl %ebx 182; X86-NEXT: popl %ebp 183; X86-NEXT: retl $4 184 %r = call i128 @llvm.smax.i128(i128 %a, i128 %b) 185 ret i128 %r 186} 187 188define <1 x i32> @test_v1i32(<1 x i32> %a, <1 x i32> %b) nounwind { 189; X64-LABEL: test_v1i32: 190; X64: # %bb.0: 191; X64-NEXT: movl %esi, %eax 192; X64-NEXT: cmpl %esi, %edi 193; X64-NEXT: cmovgl %edi, %eax 194; X64-NEXT: retq 195; 196; X86-LABEL: test_v1i32: 197; X86: # %bb.0: 198; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 199; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 200; X86-NEXT: cmpl %eax, %ecx 201; X86-NEXT: cmovgl %ecx, %eax 202; X86-NEXT: retl 203 %r = call <1 x i32> @llvm.smax.v1i32(<1 x i32> %a, <1 x i32> %b) 204 ret <1 x i32> %r 205} 206 207define <2 x i32> @test_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind { 208; SSE-LABEL: test_v2i32: 209; SSE: # %bb.0: 210; SSE-NEXT: movdqa %xmm0, %xmm2 211; SSE-NEXT: pcmpgtd %xmm1, %xmm2 212; SSE-NEXT: pand %xmm2, %xmm0 213; SSE-NEXT: pandn %xmm1, %xmm2 214; SSE-NEXT: por %xmm2, %xmm0 215; SSE-NEXT: retq 216; 217; AVX-LABEL: test_v2i32: 218; AVX: # %bb.0: 219; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 220; AVX-NEXT: retq 221; 222; X86-LABEL: test_v2i32: 223; X86: # %bb.0: 224; X86-NEXT: pushl %esi 225; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 226; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 227; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 228; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 229; X86-NEXT: cmpl %eax, %esi 230; X86-NEXT: cmovgl %esi, %eax 231; X86-NEXT: cmpl %edx, %ecx 232; X86-NEXT: cmovgl %ecx, %edx 233; X86-NEXT: popl %esi 234; X86-NEXT: retl 235 %r = call <2 x i32> @llvm.smax.v2i32(<2 x i32> %a, <2 x i32> %b) 236 ret <2 x i32> %r 237} 238 239define <3 x i32> @test_v3i32(<3 x i32> %a, <3 x i32> %b) nounwind { 240; SSE-LABEL: test_v3i32: 241; SSE: # %bb.0: 242; SSE-NEXT: movdqa %xmm0, %xmm2 243; SSE-NEXT: pcmpgtd %xmm1, %xmm2 244; SSE-NEXT: pand %xmm2, %xmm0 245; SSE-NEXT: pandn %xmm1, %xmm2 246; SSE-NEXT: por %xmm2, %xmm0 247; SSE-NEXT: retq 248; 249; AVX-LABEL: test_v3i32: 250; AVX: # %bb.0: 251; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 252; AVX-NEXT: retq 253; 254; X86-LABEL: test_v3i32: 255; X86: # %bb.0: 256; X86-NEXT: pushl %ebx 257; X86-NEXT: pushl %edi 258; X86-NEXT: pushl %esi 259; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 260; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 261; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 262; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 263; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 264; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx 265; X86-NEXT: cmpl %eax, %ebx 266; X86-NEXT: cmovgl %ebx, %eax 267; X86-NEXT: cmpl %edx, %edi 268; X86-NEXT: cmovgl %edi, %edx 269; X86-NEXT: cmpl %ecx, %esi 270; X86-NEXT: cmovgl %esi, %ecx 271; X86-NEXT: popl %esi 272; X86-NEXT: popl %edi 273; X86-NEXT: popl %ebx 274; X86-NEXT: retl 275 %r = call <3 x i32> @llvm.smax.v3i32(<3 x i32> %a, <3 x i32> %b) 276 ret <3 x i32> %r 277} 278 279define <4 x i32> @test_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind { 280; SSE-LABEL: test_v4i32: 281; SSE: # %bb.0: 282; SSE-NEXT: movdqa %xmm0, %xmm2 283; SSE-NEXT: pcmpgtd %xmm1, %xmm2 284; SSE-NEXT: pand %xmm2, %xmm0 285; SSE-NEXT: pandn %xmm1, %xmm2 286; SSE-NEXT: por %xmm2, %xmm0 287; SSE-NEXT: retq 288; 289; AVX-LABEL: test_v4i32: 290; AVX: # %bb.0: 291; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 292; AVX-NEXT: retq 293; 294; X86-LABEL: test_v4i32: 295; X86: # %bb.0: 296; X86-NEXT: pushl %edi 297; X86-NEXT: pushl %esi 298; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 299; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 300; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 301; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 302; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 303; X86-NEXT: cmpl %edi, %eax 304; X86-NEXT: cmovgl %eax, %edi 305; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 306; X86-NEXT: cmpl %esi, %eax 307; X86-NEXT: cmovgl %eax, %esi 308; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 309; X86-NEXT: cmpl %edx, %eax 310; X86-NEXT: cmovgl %eax, %edx 311; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 312; X86-NEXT: cmpl %ecx, %eax 313; X86-NEXT: cmovgl %eax, %ecx 314; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 315; X86-NEXT: movl %ecx, 12(%eax) 316; X86-NEXT: movl %edx, 8(%eax) 317; X86-NEXT: movl %esi, 4(%eax) 318; X86-NEXT: movl %edi, (%eax) 319; X86-NEXT: popl %esi 320; X86-NEXT: popl %edi 321; X86-NEXT: retl $4 322 %r = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> %b) 323 ret <4 x i32> %r 324} 325 326define <8 x i32> @test_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { 327; SSE-LABEL: test_v8i32: 328; SSE: # %bb.0: 329; SSE-NEXT: movdqa %xmm0, %xmm4 330; SSE-NEXT: pcmpgtd %xmm2, %xmm4 331; SSE-NEXT: pand %xmm4, %xmm0 332; SSE-NEXT: pandn %xmm2, %xmm4 333; SSE-NEXT: por %xmm4, %xmm0 334; SSE-NEXT: movdqa %xmm1, %xmm2 335; SSE-NEXT: pcmpgtd %xmm3, %xmm2 336; SSE-NEXT: pand %xmm2, %xmm1 337; SSE-NEXT: pandn %xmm3, %xmm2 338; SSE-NEXT: por %xmm2, %xmm1 339; SSE-NEXT: retq 340; 341; AVX1-LABEL: test_v8i32: 342; AVX1: # %bb.0: 343; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 344; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 345; AVX1-NEXT: vpmaxsd %xmm2, %xmm3, %xmm2 346; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 347; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 348; AVX1-NEXT: retq 349; 350; AVX2-LABEL: test_v8i32: 351; AVX2: # %bb.0: 352; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 353; AVX2-NEXT: retq 354; 355; X86-LABEL: test_v8i32: 356; X86: # %bb.0: 357; X86-NEXT: pushl %ebp 358; X86-NEXT: pushl %ebx 359; X86-NEXT: pushl %edi 360; X86-NEXT: pushl %esi 361; X86-NEXT: subl $8, %esp 362; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 363; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 364; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 365; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 366; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx 367; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp 368; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 369; X86-NEXT: cmpl %ebp, %eax 370; X86-NEXT: cmovgl %eax, %ebp 371; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 372; X86-NEXT: cmpl %ebx, %eax 373; X86-NEXT: cmovgl %eax, %ebx 374; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 375; X86-NEXT: cmpl %edi, %eax 376; X86-NEXT: cmovgl %eax, %edi 377; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 378; X86-NEXT: cmpl %esi, %eax 379; X86-NEXT: cmovgl %eax, %esi 380; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 381; X86-NEXT: cmpl %edx, %eax 382; X86-NEXT: cmovgl %eax, %edx 383; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 384; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 385; X86-NEXT: cmpl %ecx, %eax 386; X86-NEXT: cmovgl %eax, %ecx 387; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill 388; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 389; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 390; X86-NEXT: cmpl %edx, %eax 391; X86-NEXT: cmovgl %eax, %edx 392; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 393; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 394; X86-NEXT: cmpl %eax, %ecx 395; X86-NEXT: cmovgl %ecx, %eax 396; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 397; X86-NEXT: movl %eax, 28(%ecx) 398; X86-NEXT: movl %edx, 24(%ecx) 399; X86-NEXT: movl (%esp), %eax # 4-byte Reload 400; X86-NEXT: movl %eax, 20(%ecx) 401; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 402; X86-NEXT: movl %eax, 16(%ecx) 403; X86-NEXT: movl %esi, 12(%ecx) 404; X86-NEXT: movl %edi, 8(%ecx) 405; X86-NEXT: movl %ebx, 4(%ecx) 406; X86-NEXT: movl %ebp, (%ecx) 407; X86-NEXT: movl %ecx, %eax 408; X86-NEXT: addl $8, %esp 409; X86-NEXT: popl %esi 410; X86-NEXT: popl %edi 411; X86-NEXT: popl %ebx 412; X86-NEXT: popl %ebp 413; X86-NEXT: retl $4 414 %r = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %a, <8 x i32> %b) 415 ret <8 x i32> %r 416} 417 418define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { 419; SSE-LABEL: test_v8i16: 420; SSE: # %bb.0: 421; SSE-NEXT: pmaxsw %xmm1, %xmm0 422; SSE-NEXT: retq 423; 424; AVX-LABEL: test_v8i16: 425; AVX: # %bb.0: 426; AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 427; AVX-NEXT: retq 428; 429; X86-LABEL: test_v8i16: 430; X86: # %bb.0: 431; X86-NEXT: pushl %ebp 432; X86-NEXT: pushl %ebx 433; X86-NEXT: pushl %edi 434; X86-NEXT: pushl %esi 435; X86-NEXT: subl $8, %esp 436; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 437; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 438; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 439; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 440; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx 441; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp 442; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 443; X86-NEXT: cmpw %bp, %ax 444; X86-NEXT: cmovgl %eax, %ebp 445; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 446; X86-NEXT: cmpw %bx, %ax 447; X86-NEXT: cmovgl %eax, %ebx 448; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 449; X86-NEXT: cmpw %di, %ax 450; X86-NEXT: cmovgl %eax, %edi 451; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 452; X86-NEXT: cmpw %si, %ax 453; X86-NEXT: cmovgl %eax, %esi 454; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 455; X86-NEXT: cmpw %dx, %ax 456; X86-NEXT: cmovgl %eax, %edx 457; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 458; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 459; X86-NEXT: cmpw %cx, %ax 460; X86-NEXT: cmovgl %eax, %ecx 461; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill 462; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 463; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 464; X86-NEXT: cmpw %dx, %ax 465; X86-NEXT: cmovgl %eax, %edx 466; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 467; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 468; X86-NEXT: cmpw %ax, %cx 469; X86-NEXT: cmovgl %ecx, %eax 470; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 471; X86-NEXT: movw %ax, 14(%ecx) 472; X86-NEXT: movw %dx, 12(%ecx) 473; X86-NEXT: movl (%esp), %eax # 4-byte Reload 474; X86-NEXT: movw %ax, 10(%ecx) 475; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 476; X86-NEXT: movw %ax, 8(%ecx) 477; X86-NEXT: movw %si, 6(%ecx) 478; X86-NEXT: movw %di, 4(%ecx) 479; X86-NEXT: movw %bx, 2(%ecx) 480; X86-NEXT: movw %bp, (%ecx) 481; X86-NEXT: movl %ecx, %eax 482; X86-NEXT: addl $8, %esp 483; X86-NEXT: popl %esi 484; X86-NEXT: popl %edi 485; X86-NEXT: popl %ebx 486; X86-NEXT: popl %ebp 487; X86-NEXT: retl $4 488 %r = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %a, <8 x i16> %b) 489 ret <8 x i16> %r 490} 491 492define <16 x i8> @test_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { 493; SSE-LABEL: test_v16i8: 494; SSE: # %bb.0: 495; SSE-NEXT: movdqa %xmm0, %xmm2 496; SSE-NEXT: pcmpgtb %xmm1, %xmm2 497; SSE-NEXT: pand %xmm2, %xmm0 498; SSE-NEXT: pandn %xmm1, %xmm2 499; SSE-NEXT: por %xmm2, %xmm0 500; SSE-NEXT: retq 501; 502; AVX-LABEL: test_v16i8: 503; AVX: # %bb.0: 504; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 505; AVX-NEXT: retq 506; 507; X86-LABEL: test_v16i8: 508; X86: # %bb.0: 509; X86-NEXT: pushl %ebp 510; X86-NEXT: pushl %ebx 511; X86-NEXT: pushl %edi 512; X86-NEXT: pushl %esi 513; X86-NEXT: subl $40, %esp 514; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 515; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 516; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx 517; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 518; X86-NEXT: cmpb %bl, %al 519; X86-NEXT: cmovgl %eax, %ebx 520; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 521; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 522; X86-NEXT: cmpb %dl, %al 523; X86-NEXT: cmovgl %eax, %edx 524; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 525; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 526; X86-NEXT: cmpb %cl, %al 527; X86-NEXT: cmovgl %eax, %ecx 528; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 529; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 530; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 531; X86-NEXT: cmpb %cl, %al 532; X86-NEXT: cmovgl %eax, %ecx 533; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 534; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 535; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 536; X86-NEXT: cmpb %cl, %al 537; X86-NEXT: cmovgl %eax, %ecx 538; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 539; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 540; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 541; X86-NEXT: cmpb %cl, %al 542; X86-NEXT: cmovgl %eax, %ecx 543; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 544; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 545; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 546; X86-NEXT: cmpb %cl, %al 547; X86-NEXT: cmovgl %eax, %ecx 548; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 549; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 550; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 551; X86-NEXT: cmpb %cl, %al 552; X86-NEXT: cmovgl %eax, %ecx 553; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 554; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 555; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 556; X86-NEXT: cmpb %cl, %al 557; X86-NEXT: cmovgl %eax, %ecx 558; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 559; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 560; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 561; X86-NEXT: cmpb %cl, %al 562; X86-NEXT: cmovgl %eax, %ecx 563; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill 564; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 565; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 566; X86-NEXT: cmpb %cl, %al 567; X86-NEXT: cmovgl %eax, %ecx 568; X86-NEXT: movl %ecx, %ebp 569; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 570; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 571; X86-NEXT: cmpb %cl, %al 572; X86-NEXT: cmovgl %eax, %ecx 573; X86-NEXT: movl %ecx, %edi 574; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 575; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 576; X86-NEXT: cmpb %cl, %al 577; X86-NEXT: cmovgl %eax, %ecx 578; X86-NEXT: movl %ecx, %esi 579; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx 580; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 581; X86-NEXT: cmpb %bl, %al 582; X86-NEXT: cmovgl %eax, %ebx 583; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 584; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 585; X86-NEXT: cmpb %dl, %al 586; X86-NEXT: cmovgl %eax, %edx 587; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 588; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 589; X86-NEXT: cmpb %cl, %al 590; X86-NEXT: cmovgl %eax, %ecx 591; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 592; X86-NEXT: movb %cl, 15(%eax) 593; X86-NEXT: movb %dl, 14(%eax) 594; X86-NEXT: movb %bl, 13(%eax) 595; X86-NEXT: movl %esi, %ecx 596; X86-NEXT: movb %cl, 12(%eax) 597; X86-NEXT: movl %edi, %ecx 598; X86-NEXT: movb %cl, 11(%eax) 599; X86-NEXT: movl %ebp, %ecx 600; X86-NEXT: movb %cl, 10(%eax) 601; X86-NEXT: movl (%esp), %ecx # 4-byte Reload 602; X86-NEXT: movb %cl, 9(%eax) 603; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 604; X86-NEXT: movb %cl, 8(%eax) 605; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 606; X86-NEXT: movb %cl, 7(%eax) 607; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 608; X86-NEXT: movb %cl, 6(%eax) 609; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 610; X86-NEXT: movb %cl, 5(%eax) 611; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 612; X86-NEXT: movb %cl, 4(%eax) 613; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 614; X86-NEXT: movb %cl, 3(%eax) 615; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 616; X86-NEXT: movb %cl, 2(%eax) 617; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 618; X86-NEXT: movb %cl, 1(%eax) 619; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 620; X86-NEXT: movb %cl, (%eax) 621; X86-NEXT: addl $40, %esp 622; X86-NEXT: popl %esi 623; X86-NEXT: popl %edi 624; X86-NEXT: popl %ebx 625; X86-NEXT: popl %ebp 626; X86-NEXT: retl $4 627 %r = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %a, <16 x i8> %b) 628 ret <16 x i8> %r 629} 630 631define i16 @test_signbits_i16(i16 %a, i16 %b) nounwind { 632; X64-LABEL: test_signbits_i16: 633; X64: # %bb.0: 634; X64-NEXT: movswl %si, %eax 635; X64-NEXT: movswl %di, %ecx 636; X64-NEXT: shrl $15, %ecx 637; X64-NEXT: shrl $8, %eax 638; X64-NEXT: cmpw %ax, %cx 639; X64-NEXT: cmovgl %ecx, %eax 640; X64-NEXT: # kill: def $ax killed $ax killed $eax 641; X64-NEXT: retq 642; 643; X86-LABEL: test_signbits_i16: 644; X86: # %bb.0: 645; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx 646; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax 647; X86-NEXT: shrl $15, %eax 648; X86-NEXT: cmpw %cx, %ax 649; X86-NEXT: cmovlel %ecx, %eax 650; X86-NEXT: # kill: def $ax killed $ax killed $eax 651; X86-NEXT: retl 652 %ax = ashr i16 %a, 15 653 %bx = ashr i16 %b, 8 654 %r = call i16 @llvm.smax.i16(i16 %ax, i16 %bx) 655 ret i16 %r 656} 657 658define i32 @test_signbits_i32(i32 %a, i32 %b) nounwind { 659; X64-LABEL: test_signbits_i32: 660; X64: # %bb.0: 661; X64-NEXT: movl %esi, %eax 662; X64-NEXT: sarl $16, %edi 663; X64-NEXT: sarl $17, %eax 664; X64-NEXT: cmpl %eax, %edi 665; X64-NEXT: cmovgl %edi, %eax 666; X64-NEXT: retq 667; 668; X86-LABEL: test_signbits_i32: 669; X86: # %bb.0: 670; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx 671; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 672; X86-NEXT: sarl $17, %eax 673; X86-NEXT: cmpl %eax, %ecx 674; X86-NEXT: cmovgl %ecx, %eax 675; X86-NEXT: retl 676 %ax = ashr i32 %a, 16 677 %bx = ashr i32 %b, 17 678 %r = call i32 @llvm.smax.i32(i32 %ax, i32 %bx) 679 ret i32 %r 680} 681 682define i64 @test_signbits_i64(i64 %a, i64 %b) nounwind { 683; X64-LABEL: test_signbits_i64: 684; X64: # %bb.0: 685; X64-NEXT: movq %rsi, %rax 686; X64-NEXT: sarq $32, %rdi 687; X64-NEXT: sarq $32, %rax 688; X64-NEXT: cmpq %rax, %rdi 689; X64-NEXT: cmovgq %rdi, %rax 690; X64-NEXT: retq 691; 692; X86-LABEL: test_signbits_i64: 693; X86: # %bb.0: 694; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 695; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 696; X86-NEXT: cmpl %eax, %ecx 697; X86-NEXT: cmovgl %ecx, %eax 698; X86-NEXT: movl %eax, %edx 699; X86-NEXT: sarl $31, %edx 700; X86-NEXT: retl 701 %ax = ashr i64 %a, 32 702 %bx = ashr i64 %b, 32 703 %r = call i64 @llvm.smax.i64(i64 %ax, i64 %bx) 704 ret i64 %r 705} 706 707define i128 @test_signbits_i128(i128 %a, i128 %b) nounwind { 708; X64-LABEL: test_signbits_i128: 709; X64: # %bb.0: 710; X64-NEXT: movq %rcx, %rax 711; X64-NEXT: sarq $28, %rax 712; X64-NEXT: cmpq %rax, %rsi 713; X64-NEXT: cmovgq %rsi, %rax 714; X64-NEXT: movq %rax, %rdx 715; X64-NEXT: sarq $63, %rdx 716; X64-NEXT: retq 717; 718; X86-LABEL: test_signbits_i128: 719; X86: # %bb.0: 720; X86-NEXT: pushl %ebx 721; X86-NEXT: pushl %edi 722; X86-NEXT: pushl %esi 723; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 724; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 725; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 726; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 727; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 728; X86-NEXT: shrdl $28, %edi, %ecx 729; X86-NEXT: sarl $28, %edi 730; X86-NEXT: cmpl %esi, %ecx 731; X86-NEXT: movl %edi, %ebx 732; X86-NEXT: sbbl %edx, %ebx 733; X86-NEXT: cmovll %esi, %ecx 734; X86-NEXT: cmovll %edx, %edi 735; X86-NEXT: movl %edi, 4(%eax) 736; X86-NEXT: sarl $31, %edi 737; X86-NEXT: movl %ecx, (%eax) 738; X86-NEXT: movl %edi, 12(%eax) 739; X86-NEXT: movl %edi, 8(%eax) 740; X86-NEXT: popl %esi 741; X86-NEXT: popl %edi 742; X86-NEXT: popl %ebx 743; X86-NEXT: retl $4 744 %ax = ashr i128 %a, 64 745 %bx = ashr i128 %b, 92 746 %r = call i128 @llvm.smax.i128(i128 %ax, i128 %bx) 747 ret i128 %r 748} 749