1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+f16c -O3 | FileCheck %s --check-prefixes=AVX 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=AVX 5; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X86-FP16 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X64-FP16 7 8define i32 @test_f16_oeq_q(i32 %a, i32 %b, half %f1, half %f2) #0 { 9; SSE2-LABEL: test_f16_oeq_q: 10; SSE2: # %bb.0: 11; SSE2-NEXT: pushq %rbp 12; SSE2-NEXT: pushq %rbx 13; SSE2-NEXT: pushq %rax 14; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 15; SSE2-NEXT: movl %esi, %ebx 16; SSE2-NEXT: movl %edi, %ebp 17; SSE2-NEXT: movaps %xmm1, %xmm0 18; SSE2-NEXT: callq __extendhfsf2@PLT 19; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 20; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 21; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero 22; SSE2-NEXT: callq __extendhfsf2@PLT 23; SSE2-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload 24; SSE2-NEXT: cmovnel %ebx, %ebp 25; SSE2-NEXT: cmovpl %ebx, %ebp 26; SSE2-NEXT: movl %ebp, %eax 27; SSE2-NEXT: addq $8, %rsp 28; SSE2-NEXT: popq %rbx 29; SSE2-NEXT: popq %rbp 30; SSE2-NEXT: retq 31; 32; AVX-LABEL: test_f16_oeq_q: 33; AVX: # %bb.0: 34; AVX-NEXT: movl %edi, %eax 35; AVX-NEXT: vpextrw $0, %xmm0, %ecx 36; AVX-NEXT: vpextrw $0, %xmm1, %edx 37; AVX-NEXT: movzwl %dx, %edx 38; AVX-NEXT: vmovd %edx, %xmm0 39; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 40; AVX-NEXT: movzwl %cx, %ecx 41; AVX-NEXT: vmovd %ecx, %xmm1 42; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 43; AVX-NEXT: vucomiss %xmm0, %xmm1 44; AVX-NEXT: cmovnel %esi, %eax 45; AVX-NEXT: cmovpl %esi, %eax 46; AVX-NEXT: retq 47; 48; X86-FP16-LABEL: test_f16_oeq_q: 49; X86-FP16: # %bb.0: 50; X86-FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero 51; X86-FP16-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0 52; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %eax 53; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %ecx 54; X86-FP16-NEXT: cmovnel %eax, %ecx 55; X86-FP16-NEXT: cmovpl %eax, %ecx 56; X86-FP16-NEXT: movl (%ecx), %eax 57; X86-FP16-NEXT: retl 58; 59; X64-FP16-LABEL: test_f16_oeq_q: 60; X64-FP16: # %bb.0: 61; X64-FP16-NEXT: movl %edi, %eax 62; X64-FP16-NEXT: vucomish %xmm1, %xmm0 63; X64-FP16-NEXT: cmovnel %esi, %eax 64; X64-FP16-NEXT: cmovpl %esi, %eax 65; X64-FP16-NEXT: retq 66 %cond = call i1 @llvm.experimental.constrained.fcmp.f16( 67 half %f1, half %f2, metadata !"oeq", 68 metadata !"fpexcept.strict") #0 69 %res = select i1 %cond, i32 %a, i32 %b 70 ret i32 %res 71} 72 73define i32 @test_f16_ogt_q(i32 %a, i32 %b, half %f1, half %f2) #0 { 74; SSE2-LABEL: test_f16_ogt_q: 75; SSE2: # %bb.0: 76; SSE2-NEXT: pushq %rbp 77; SSE2-NEXT: pushq %rbx 78; SSE2-NEXT: pushq %rax 79; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 80; SSE2-NEXT: movl %esi, %ebx 81; SSE2-NEXT: movl %edi, %ebp 82; SSE2-NEXT: movaps %xmm1, %xmm0 83; SSE2-NEXT: callq __extendhfsf2@PLT 84; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 85; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 86; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero 87; SSE2-NEXT: callq __extendhfsf2@PLT 88; SSE2-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload 89; SSE2-NEXT: cmovbel %ebx, %ebp 90; SSE2-NEXT: movl %ebp, %eax 91; SSE2-NEXT: addq $8, %rsp 92; SSE2-NEXT: popq %rbx 93; SSE2-NEXT: popq %rbp 94; SSE2-NEXT: retq 95; 96; AVX-LABEL: test_f16_ogt_q: 97; AVX: # %bb.0: 98; AVX-NEXT: movl %edi, %eax 99; AVX-NEXT: vpextrw $0, %xmm0, %ecx 100; AVX-NEXT: vpextrw $0, %xmm1, %edx 101; AVX-NEXT: movzwl %dx, %edx 102; AVX-NEXT: vmovd %edx, %xmm0 103; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 104; AVX-NEXT: movzwl %cx, %ecx 105; AVX-NEXT: vmovd %ecx, %xmm1 106; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 107; AVX-NEXT: vucomiss %xmm0, %xmm1 108; AVX-NEXT: cmovbel %esi, %eax 109; AVX-NEXT: retq 110; 111; X86-FP16-LABEL: test_f16_ogt_q: 112; X86-FP16: # %bb.0: 113; X86-FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero 114; X86-FP16-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0 115; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %eax 116; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %ecx 117; X86-FP16-NEXT: cmoval %eax, %ecx 118; X86-FP16-NEXT: movl (%ecx), %eax 119; X86-FP16-NEXT: retl 120; 121; X64-FP16-LABEL: test_f16_ogt_q: 122; X64-FP16: # %bb.0: 123; X64-FP16-NEXT: movl %edi, %eax 124; X64-FP16-NEXT: vucomish %xmm1, %xmm0 125; X64-FP16-NEXT: cmovbel %esi, %eax 126; X64-FP16-NEXT: retq 127 %cond = call i1 @llvm.experimental.constrained.fcmp.f16( 128 half %f1, half %f2, metadata !"ogt", 129 metadata !"fpexcept.strict") #0 130 %res = select i1 %cond, i32 %a, i32 %b 131 ret i32 %res 132} 133 134define i32 @test_f16_oge_q(i32 %a, i32 %b, half %f1, half %f2) #0 { 135; SSE2-LABEL: test_f16_oge_q: 136; SSE2: # %bb.0: 137; SSE2-NEXT: pushq %rbp 138; SSE2-NEXT: pushq %rbx 139; SSE2-NEXT: pushq %rax 140; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 141; SSE2-NEXT: movl %esi, %ebx 142; SSE2-NEXT: movl %edi, %ebp 143; SSE2-NEXT: movaps %xmm1, %xmm0 144; SSE2-NEXT: callq __extendhfsf2@PLT 145; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 146; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 147; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero 148; SSE2-NEXT: callq __extendhfsf2@PLT 149; SSE2-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload 150; SSE2-NEXT: cmovbl %ebx, %ebp 151; SSE2-NEXT: movl %ebp, %eax 152; SSE2-NEXT: addq $8, %rsp 153; SSE2-NEXT: popq %rbx 154; SSE2-NEXT: popq %rbp 155; SSE2-NEXT: retq 156; 157; AVX-LABEL: test_f16_oge_q: 158; AVX: # %bb.0: 159; AVX-NEXT: movl %edi, %eax 160; AVX-NEXT: vpextrw $0, %xmm0, %ecx 161; AVX-NEXT: vpextrw $0, %xmm1, %edx 162; AVX-NEXT: movzwl %dx, %edx 163; AVX-NEXT: vmovd %edx, %xmm0 164; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 165; AVX-NEXT: movzwl %cx, %ecx 166; AVX-NEXT: vmovd %ecx, %xmm1 167; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 168; AVX-NEXT: vucomiss %xmm0, %xmm1 169; AVX-NEXT: cmovbl %esi, %eax 170; AVX-NEXT: retq 171; 172; X86-FP16-LABEL: test_f16_oge_q: 173; X86-FP16: # %bb.0: 174; X86-FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero 175; X86-FP16-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0 176; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %eax 177; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %ecx 178; X86-FP16-NEXT: cmovael %eax, %ecx 179; X86-FP16-NEXT: movl (%ecx), %eax 180; X86-FP16-NEXT: retl 181; 182; X64-FP16-LABEL: test_f16_oge_q: 183; X64-FP16: # %bb.0: 184; X64-FP16-NEXT: movl %edi, %eax 185; X64-FP16-NEXT: vucomish %xmm1, %xmm0 186; X64-FP16-NEXT: cmovbl %esi, %eax 187; X64-FP16-NEXT: retq 188 %cond = call i1 @llvm.experimental.constrained.fcmp.f16( 189 half %f1, half %f2, metadata !"oge", 190 metadata !"fpexcept.strict") #0 191 %res = select i1 %cond, i32 %a, i32 %b 192 ret i32 %res 193} 194 195define i32 @test_f16_olt_q(i32 %a, i32 %b, half %f1, half %f2) #0 { 196; SSE2-LABEL: test_f16_olt_q: 197; SSE2: # %bb.0: 198; SSE2-NEXT: pushq %rbp 199; SSE2-NEXT: pushq %rbx 200; SSE2-NEXT: pushq %rax 201; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 202; SSE2-NEXT: movl %esi, %ebx 203; SSE2-NEXT: movl %edi, %ebp 204; SSE2-NEXT: movaps %xmm1, %xmm0 205; SSE2-NEXT: callq __extendhfsf2@PLT 206; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 207; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 208; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero 209; SSE2-NEXT: callq __extendhfsf2@PLT 210; SSE2-NEXT: movss (%rsp), %xmm1 # 4-byte Reload 211; SSE2-NEXT: # xmm1 = mem[0],zero,zero,zero 212; SSE2-NEXT: ucomiss %xmm0, %xmm1 213; SSE2-NEXT: cmovbel %ebx, %ebp 214; SSE2-NEXT: movl %ebp, %eax 215; SSE2-NEXT: addq $8, %rsp 216; SSE2-NEXT: popq %rbx 217; SSE2-NEXT: popq %rbp 218; SSE2-NEXT: retq 219; 220; AVX-LABEL: test_f16_olt_q: 221; AVX: # %bb.0: 222; AVX-NEXT: movl %edi, %eax 223; AVX-NEXT: vpextrw $0, %xmm1, %ecx 224; AVX-NEXT: vpextrw $0, %xmm0, %edx 225; AVX-NEXT: movzwl %dx, %edx 226; AVX-NEXT: vmovd %edx, %xmm0 227; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 228; AVX-NEXT: movzwl %cx, %ecx 229; AVX-NEXT: vmovd %ecx, %xmm1 230; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 231; AVX-NEXT: vucomiss %xmm0, %xmm1 232; AVX-NEXT: cmovbel %esi, %eax 233; AVX-NEXT: retq 234; 235; X86-FP16-LABEL: test_f16_olt_q: 236; X86-FP16: # %bb.0: 237; X86-FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero 238; X86-FP16-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0 239; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %eax 240; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %ecx 241; X86-FP16-NEXT: cmoval %eax, %ecx 242; X86-FP16-NEXT: movl (%ecx), %eax 243; X86-FP16-NEXT: retl 244; 245; X64-FP16-LABEL: test_f16_olt_q: 246; X64-FP16: # %bb.0: 247; X64-FP16-NEXT: movl %edi, %eax 248; X64-FP16-NEXT: vucomish %xmm0, %xmm1 249; X64-FP16-NEXT: cmovbel %esi, %eax 250; X64-FP16-NEXT: retq 251 %cond = call i1 @llvm.experimental.constrained.fcmp.f16( 252 half %f1, half %f2, metadata !"olt", 253 metadata !"fpexcept.strict") #0 254 %res = select i1 %cond, i32 %a, i32 %b 255 ret i32 %res 256} 257 258define i32 @test_f16_ole_q(i32 %a, i32 %b, half %f1, half %f2) #0 { 259; SSE2-LABEL: test_f16_ole_q: 260; SSE2: # %bb.0: 261; SSE2-NEXT: pushq %rbp 262; SSE2-NEXT: pushq %rbx 263; SSE2-NEXT: pushq %rax 264; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 265; SSE2-NEXT: movl %esi, %ebx 266; SSE2-NEXT: movl %edi, %ebp 267; SSE2-NEXT: movaps %xmm1, %xmm0 268; SSE2-NEXT: callq __extendhfsf2@PLT 269; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 270; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 271; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero 272; SSE2-NEXT: callq __extendhfsf2@PLT 273; SSE2-NEXT: movss (%rsp), %xmm1 # 4-byte Reload 274; SSE2-NEXT: # xmm1 = mem[0],zero,zero,zero 275; SSE2-NEXT: ucomiss %xmm0, %xmm1 276; SSE2-NEXT: cmovbl %ebx, %ebp 277; SSE2-NEXT: movl %ebp, %eax 278; SSE2-NEXT: addq $8, %rsp 279; SSE2-NEXT: popq %rbx 280; SSE2-NEXT: popq %rbp 281; SSE2-NEXT: retq 282; 283; AVX-LABEL: test_f16_ole_q: 284; AVX: # %bb.0: 285; AVX-NEXT: movl %edi, %eax 286; AVX-NEXT: vpextrw $0, %xmm1, %ecx 287; AVX-NEXT: vpextrw $0, %xmm0, %edx 288; AVX-NEXT: movzwl %dx, %edx 289; AVX-NEXT: vmovd %edx, %xmm0 290; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 291; AVX-NEXT: movzwl %cx, %ecx 292; AVX-NEXT: vmovd %ecx, %xmm1 293; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 294; AVX-NEXT: vucomiss %xmm0, %xmm1 295; AVX-NEXT: cmovbl %esi, %eax 296; AVX-NEXT: retq 297; 298; X86-FP16-LABEL: test_f16_ole_q: 299; X86-FP16: # %bb.0: 300; X86-FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero 301; X86-FP16-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0 302; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %eax 303; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %ecx 304; X86-FP16-NEXT: cmovael %eax, %ecx 305; X86-FP16-NEXT: movl (%ecx), %eax 306; X86-FP16-NEXT: retl 307; 308; X64-FP16-LABEL: test_f16_ole_q: 309; X64-FP16: # %bb.0: 310; X64-FP16-NEXT: movl %edi, %eax 311; X64-FP16-NEXT: vucomish %xmm0, %xmm1 312; X64-FP16-NEXT: cmovbl %esi, %eax 313; X64-FP16-NEXT: retq 314 %cond = call i1 @llvm.experimental.constrained.fcmp.f16( 315 half %f1, half %f2, metadata !"ole", 316 metadata !"fpexcept.strict") #0 317 %res = select i1 %cond, i32 %a, i32 %b 318 ret i32 %res 319} 320 321define i32 @test_f16_one_q(i32 %a, i32 %b, half %f1, half %f2) #0 { 322; SSE2-LABEL: test_f16_one_q: 323; SSE2: # %bb.0: 324; SSE2-NEXT: pushq %rbp 325; SSE2-NEXT: pushq %rbx 326; SSE2-NEXT: pushq %rax 327; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 328; SSE2-NEXT: movl %esi, %ebx 329; SSE2-NEXT: movl %edi, %ebp 330; SSE2-NEXT: movaps %xmm1, %xmm0 331; SSE2-NEXT: callq __extendhfsf2@PLT 332; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 333; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 334; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero 335; SSE2-NEXT: callq __extendhfsf2@PLT 336; SSE2-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload 337; SSE2-NEXT: cmovel %ebx, %ebp 338; SSE2-NEXT: movl %ebp, %eax 339; SSE2-NEXT: addq $8, %rsp 340; SSE2-NEXT: popq %rbx 341; SSE2-NEXT: popq %rbp 342; SSE2-NEXT: retq 343; 344; AVX-LABEL: test_f16_one_q: 345; AVX: # %bb.0: 346; AVX-NEXT: movl %edi, %eax 347; AVX-NEXT: vpextrw $0, %xmm0, %ecx 348; AVX-NEXT: vpextrw $0, %xmm1, %edx 349; AVX-NEXT: movzwl %dx, %edx 350; AVX-NEXT: vmovd %edx, %xmm0 351; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 352; AVX-NEXT: movzwl %cx, %ecx 353; AVX-NEXT: vmovd %ecx, %xmm1 354; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 355; AVX-NEXT: vucomiss %xmm0, %xmm1 356; AVX-NEXT: cmovel %esi, %eax 357; AVX-NEXT: retq 358; 359; X86-FP16-LABEL: test_f16_one_q: 360; X86-FP16: # %bb.0: 361; X86-FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero 362; X86-FP16-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0 363; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %eax 364; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %ecx 365; X86-FP16-NEXT: cmovnel %eax, %ecx 366; X86-FP16-NEXT: movl (%ecx), %eax 367; X86-FP16-NEXT: retl 368; 369; X64-FP16-LABEL: test_f16_one_q: 370; X64-FP16: # %bb.0: 371; X64-FP16-NEXT: movl %edi, %eax 372; X64-FP16-NEXT: vucomish %xmm1, %xmm0 373; X64-FP16-NEXT: cmovel %esi, %eax 374; X64-FP16-NEXT: retq 375 %cond = call i1 @llvm.experimental.constrained.fcmp.f16( 376 half %f1, half %f2, metadata !"one", 377 metadata !"fpexcept.strict") #0 378 %res = select i1 %cond, i32 %a, i32 %b 379 ret i32 %res 380} 381 382define i32 @test_f16_ord_q(i32 %a, i32 %b, half %f1, half %f2) #0 { 383; SSE2-LABEL: test_f16_ord_q: 384; SSE2: # %bb.0: 385; SSE2-NEXT: pushq %rbp 386; SSE2-NEXT: pushq %rbx 387; SSE2-NEXT: pushq %rax 388; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 389; SSE2-NEXT: movl %esi, %ebx 390; SSE2-NEXT: movl %edi, %ebp 391; SSE2-NEXT: movaps %xmm1, %xmm0 392; SSE2-NEXT: callq __extendhfsf2@PLT 393; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 394; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 395; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero 396; SSE2-NEXT: callq __extendhfsf2@PLT 397; SSE2-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload 398; SSE2-NEXT: cmovpl %ebx, %ebp 399; SSE2-NEXT: movl %ebp, %eax 400; SSE2-NEXT: addq $8, %rsp 401; SSE2-NEXT: popq %rbx 402; SSE2-NEXT: popq %rbp 403; SSE2-NEXT: retq 404; 405; AVX-LABEL: test_f16_ord_q: 406; AVX: # %bb.0: 407; AVX-NEXT: movl %edi, %eax 408; AVX-NEXT: vpextrw $0, %xmm0, %ecx 409; AVX-NEXT: vpextrw $0, %xmm1, %edx 410; AVX-NEXT: movzwl %dx, %edx 411; AVX-NEXT: vmovd %edx, %xmm0 412; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 413; AVX-NEXT: movzwl %cx, %ecx 414; AVX-NEXT: vmovd %ecx, %xmm1 415; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 416; AVX-NEXT: vucomiss %xmm0, %xmm1 417; AVX-NEXT: cmovpl %esi, %eax 418; AVX-NEXT: retq 419; 420; X86-FP16-LABEL: test_f16_ord_q: 421; X86-FP16: # %bb.0: 422; X86-FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero 423; X86-FP16-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0 424; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %eax 425; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %ecx 426; X86-FP16-NEXT: cmovnpl %eax, %ecx 427; X86-FP16-NEXT: movl (%ecx), %eax 428; X86-FP16-NEXT: retl 429; 430; X64-FP16-LABEL: test_f16_ord_q: 431; X64-FP16: # %bb.0: 432; X64-FP16-NEXT: movl %edi, %eax 433; X64-FP16-NEXT: vucomish %xmm1, %xmm0 434; X64-FP16-NEXT: cmovpl %esi, %eax 435; X64-FP16-NEXT: retq 436 %cond = call i1 @llvm.experimental.constrained.fcmp.f16( 437 half %f1, half %f2, metadata !"ord", 438 metadata !"fpexcept.strict") #0 439 %res = select i1 %cond, i32 %a, i32 %b 440 ret i32 %res 441} 442 443define i32 @test_f16_ueq_q(i32 %a, i32 %b, half %f1, half %f2) #0 { 444; SSE2-LABEL: test_f16_ueq_q: 445; SSE2: # %bb.0: 446; SSE2-NEXT: pushq %rbp 447; SSE2-NEXT: pushq %rbx 448; SSE2-NEXT: pushq %rax 449; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 450; SSE2-NEXT: movl %esi, %ebx 451; SSE2-NEXT: movl %edi, %ebp 452; SSE2-NEXT: movaps %xmm1, %xmm0 453; SSE2-NEXT: callq __extendhfsf2@PLT 454; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 455; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 456; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero 457; SSE2-NEXT: callq __extendhfsf2@PLT 458; SSE2-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload 459; SSE2-NEXT: cmovnel %ebx, %ebp 460; SSE2-NEXT: movl %ebp, %eax 461; SSE2-NEXT: addq $8, %rsp 462; SSE2-NEXT: popq %rbx 463; SSE2-NEXT: popq %rbp 464; SSE2-NEXT: retq 465; 466; AVX-LABEL: test_f16_ueq_q: 467; AVX: # %bb.0: 468; AVX-NEXT: movl %edi, %eax 469; AVX-NEXT: vpextrw $0, %xmm0, %ecx 470; AVX-NEXT: vpextrw $0, %xmm1, %edx 471; AVX-NEXT: movzwl %dx, %edx 472; AVX-NEXT: vmovd %edx, %xmm0 473; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 474; AVX-NEXT: movzwl %cx, %ecx 475; AVX-NEXT: vmovd %ecx, %xmm1 476; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 477; AVX-NEXT: vucomiss %xmm0, %xmm1 478; AVX-NEXT: cmovnel %esi, %eax 479; AVX-NEXT: retq 480; 481; X86-FP16-LABEL: test_f16_ueq_q: 482; X86-FP16: # %bb.0: 483; X86-FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero 484; X86-FP16-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0 485; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %eax 486; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %ecx 487; X86-FP16-NEXT: cmovel %eax, %ecx 488; X86-FP16-NEXT: movl (%ecx), %eax 489; X86-FP16-NEXT: retl 490; 491; X64-FP16-LABEL: test_f16_ueq_q: 492; X64-FP16: # %bb.0: 493; X64-FP16-NEXT: movl %edi, %eax 494; X64-FP16-NEXT: vucomish %xmm1, %xmm0 495; X64-FP16-NEXT: cmovnel %esi, %eax 496; X64-FP16-NEXT: retq 497 %cond = call i1 @llvm.experimental.constrained.fcmp.f16( 498 half %f1, half %f2, metadata !"ueq", 499 metadata !"fpexcept.strict") #0 500 %res = select i1 %cond, i32 %a, i32 %b 501 ret i32 %res 502} 503 504define i32 @test_f16_ugt_q(i32 %a, i32 %b, half %f1, half %f2) #0 { 505; SSE2-LABEL: test_f16_ugt_q: 506; SSE2: # %bb.0: 507; SSE2-NEXT: pushq %rbp 508; SSE2-NEXT: pushq %rbx 509; SSE2-NEXT: pushq %rax 510; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 511; SSE2-NEXT: movl %esi, %ebx 512; SSE2-NEXT: movl %edi, %ebp 513; SSE2-NEXT: movaps %xmm1, %xmm0 514; SSE2-NEXT: callq __extendhfsf2@PLT 515; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 516; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 517; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero 518; SSE2-NEXT: callq __extendhfsf2@PLT 519; SSE2-NEXT: movss (%rsp), %xmm1 # 4-byte Reload 520; SSE2-NEXT: # xmm1 = mem[0],zero,zero,zero 521; SSE2-NEXT: ucomiss %xmm0, %xmm1 522; SSE2-NEXT: cmovael %ebx, %ebp 523; SSE2-NEXT: movl %ebp, %eax 524; SSE2-NEXT: addq $8, %rsp 525; SSE2-NEXT: popq %rbx 526; SSE2-NEXT: popq %rbp 527; SSE2-NEXT: retq 528; 529; AVX-LABEL: test_f16_ugt_q: 530; AVX: # %bb.0: 531; AVX-NEXT: movl %edi, %eax 532; AVX-NEXT: vpextrw $0, %xmm1, %ecx 533; AVX-NEXT: vpextrw $0, %xmm0, %edx 534; AVX-NEXT: movzwl %dx, %edx 535; AVX-NEXT: vmovd %edx, %xmm0 536; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 537; AVX-NEXT: movzwl %cx, %ecx 538; AVX-NEXT: vmovd %ecx, %xmm1 539; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 540; AVX-NEXT: vucomiss %xmm0, %xmm1 541; AVX-NEXT: cmovael %esi, %eax 542; AVX-NEXT: retq 543; 544; X86-FP16-LABEL: test_f16_ugt_q: 545; X86-FP16: # %bb.0: 546; X86-FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero 547; X86-FP16-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0 548; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %eax 549; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %ecx 550; X86-FP16-NEXT: cmovbl %eax, %ecx 551; X86-FP16-NEXT: movl (%ecx), %eax 552; X86-FP16-NEXT: retl 553; 554; X64-FP16-LABEL: test_f16_ugt_q: 555; X64-FP16: # %bb.0: 556; X64-FP16-NEXT: movl %edi, %eax 557; X64-FP16-NEXT: vucomish %xmm0, %xmm1 558; X64-FP16-NEXT: cmovael %esi, %eax 559; X64-FP16-NEXT: retq 560 %cond = call i1 @llvm.experimental.constrained.fcmp.f16( 561 half %f1, half %f2, metadata !"ugt", 562 metadata !"fpexcept.strict") #0 563 %res = select i1 %cond, i32 %a, i32 %b 564 ret i32 %res 565} 566 567define i32 @test_f16_uge_q(i32 %a, i32 %b, half %f1, half %f2) #0 { 568; SSE2-LABEL: test_f16_uge_q: 569; SSE2: # %bb.0: 570; SSE2-NEXT: pushq %rbp 571; SSE2-NEXT: pushq %rbx 572; SSE2-NEXT: pushq %rax 573; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 574; SSE2-NEXT: movl %esi, %ebx 575; SSE2-NEXT: movl %edi, %ebp 576; SSE2-NEXT: movaps %xmm1, %xmm0 577; SSE2-NEXT: callq __extendhfsf2@PLT 578; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 579; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 580; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero 581; SSE2-NEXT: callq __extendhfsf2@PLT 582; SSE2-NEXT: movss (%rsp), %xmm1 # 4-byte Reload 583; SSE2-NEXT: # xmm1 = mem[0],zero,zero,zero 584; SSE2-NEXT: ucomiss %xmm0, %xmm1 585; SSE2-NEXT: cmoval %ebx, %ebp 586; SSE2-NEXT: movl %ebp, %eax 587; SSE2-NEXT: addq $8, %rsp 588; SSE2-NEXT: popq %rbx 589; SSE2-NEXT: popq %rbp 590; SSE2-NEXT: retq 591; 592; AVX-LABEL: test_f16_uge_q: 593; AVX: # %bb.0: 594; AVX-NEXT: movl %edi, %eax 595; AVX-NEXT: vpextrw $0, %xmm1, %ecx 596; AVX-NEXT: vpextrw $0, %xmm0, %edx 597; AVX-NEXT: movzwl %dx, %edx 598; AVX-NEXT: vmovd %edx, %xmm0 599; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 600; AVX-NEXT: movzwl %cx, %ecx 601; AVX-NEXT: vmovd %ecx, %xmm1 602; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 603; AVX-NEXT: vucomiss %xmm0, %xmm1 604; AVX-NEXT: cmoval %esi, %eax 605; AVX-NEXT: retq 606; 607; X86-FP16-LABEL: test_f16_uge_q: 608; X86-FP16: # %bb.0: 609; X86-FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero 610; X86-FP16-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0 611; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %eax 612; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %ecx 613; X86-FP16-NEXT: cmovbel %eax, %ecx 614; X86-FP16-NEXT: movl (%ecx), %eax 615; X86-FP16-NEXT: retl 616; 617; X64-FP16-LABEL: test_f16_uge_q: 618; X64-FP16: # %bb.0: 619; X64-FP16-NEXT: movl %edi, %eax 620; X64-FP16-NEXT: vucomish %xmm0, %xmm1 621; X64-FP16-NEXT: cmoval %esi, %eax 622; X64-FP16-NEXT: retq 623 %cond = call i1 @llvm.experimental.constrained.fcmp.f16( 624 half %f1, half %f2, metadata !"uge", 625 metadata !"fpexcept.strict") #0 626 %res = select i1 %cond, i32 %a, i32 %b 627 ret i32 %res 628} 629 630define i32 @test_f16_ult_q(i32 %a, i32 %b, half %f1, half %f2) #0 { 631; SSE2-LABEL: test_f16_ult_q: 632; SSE2: # %bb.0: 633; SSE2-NEXT: pushq %rbp 634; SSE2-NEXT: pushq %rbx 635; SSE2-NEXT: pushq %rax 636; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 637; SSE2-NEXT: movl %esi, %ebx 638; SSE2-NEXT: movl %edi, %ebp 639; SSE2-NEXT: movaps %xmm1, %xmm0 640; SSE2-NEXT: callq __extendhfsf2@PLT 641; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 642; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 643; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero 644; SSE2-NEXT: callq __extendhfsf2@PLT 645; SSE2-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload 646; SSE2-NEXT: cmovael %ebx, %ebp 647; SSE2-NEXT: movl %ebp, %eax 648; SSE2-NEXT: addq $8, %rsp 649; SSE2-NEXT: popq %rbx 650; SSE2-NEXT: popq %rbp 651; SSE2-NEXT: retq 652; 653; AVX-LABEL: test_f16_ult_q: 654; AVX: # %bb.0: 655; AVX-NEXT: movl %edi, %eax 656; AVX-NEXT: vpextrw $0, %xmm0, %ecx 657; AVX-NEXT: vpextrw $0, %xmm1, %edx 658; AVX-NEXT: movzwl %dx, %edx 659; AVX-NEXT: vmovd %edx, %xmm0 660; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 661; AVX-NEXT: movzwl %cx, %ecx 662; AVX-NEXT: vmovd %ecx, %xmm1 663; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 664; AVX-NEXT: vucomiss %xmm0, %xmm1 665; AVX-NEXT: cmovael %esi, %eax 666; AVX-NEXT: retq 667; 668; X86-FP16-LABEL: test_f16_ult_q: 669; X86-FP16: # %bb.0: 670; X86-FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero 671; X86-FP16-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0 672; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %eax 673; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %ecx 674; X86-FP16-NEXT: cmovbl %eax, %ecx 675; X86-FP16-NEXT: movl (%ecx), %eax 676; X86-FP16-NEXT: retl 677; 678; X64-FP16-LABEL: test_f16_ult_q: 679; X64-FP16: # %bb.0: 680; X64-FP16-NEXT: movl %edi, %eax 681; X64-FP16-NEXT: vucomish %xmm1, %xmm0 682; X64-FP16-NEXT: cmovael %esi, %eax 683; X64-FP16-NEXT: retq 684 %cond = call i1 @llvm.experimental.constrained.fcmp.f16( 685 half %f1, half %f2, metadata !"ult", 686 metadata !"fpexcept.strict") #0 687 %res = select i1 %cond, i32 %a, i32 %b 688 ret i32 %res 689} 690 691define i32 @test_f16_ule_q(i32 %a, i32 %b, half %f1, half %f2) #0 { 692; SSE2-LABEL: test_f16_ule_q: 693; SSE2: # %bb.0: 694; SSE2-NEXT: pushq %rbp 695; SSE2-NEXT: pushq %rbx 696; SSE2-NEXT: pushq %rax 697; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 698; SSE2-NEXT: movl %esi, %ebx 699; SSE2-NEXT: movl %edi, %ebp 700; SSE2-NEXT: movaps %xmm1, %xmm0 701; SSE2-NEXT: callq __extendhfsf2@PLT 702; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 703; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 704; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero 705; SSE2-NEXT: callq __extendhfsf2@PLT 706; SSE2-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload 707; SSE2-NEXT: cmoval %ebx, %ebp 708; SSE2-NEXT: movl %ebp, %eax 709; SSE2-NEXT: addq $8, %rsp 710; SSE2-NEXT: popq %rbx 711; SSE2-NEXT: popq %rbp 712; SSE2-NEXT: retq 713; 714; AVX-LABEL: test_f16_ule_q: 715; AVX: # %bb.0: 716; AVX-NEXT: movl %edi, %eax 717; AVX-NEXT: vpextrw $0, %xmm0, %ecx 718; AVX-NEXT: vpextrw $0, %xmm1, %edx 719; AVX-NEXT: movzwl %dx, %edx 720; AVX-NEXT: vmovd %edx, %xmm0 721; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 722; AVX-NEXT: movzwl %cx, %ecx 723; AVX-NEXT: vmovd %ecx, %xmm1 724; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 725; AVX-NEXT: vucomiss %xmm0, %xmm1 726; AVX-NEXT: cmoval %esi, %eax 727; AVX-NEXT: retq 728; 729; X86-FP16-LABEL: test_f16_ule_q: 730; X86-FP16: # %bb.0: 731; X86-FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero 732; X86-FP16-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0 733; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %eax 734; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %ecx 735; X86-FP16-NEXT: cmovbel %eax, %ecx 736; X86-FP16-NEXT: movl (%ecx), %eax 737; X86-FP16-NEXT: retl 738; 739; X64-FP16-LABEL: test_f16_ule_q: 740; X64-FP16: # %bb.0: 741; X64-FP16-NEXT: movl %edi, %eax 742; X64-FP16-NEXT: vucomish %xmm1, %xmm0 743; X64-FP16-NEXT: cmoval %esi, %eax 744; X64-FP16-NEXT: retq 745 %cond = call i1 @llvm.experimental.constrained.fcmp.f16( 746 half %f1, half %f2, metadata !"ule", 747 metadata !"fpexcept.strict") #0 748 %res = select i1 %cond, i32 %a, i32 %b 749 ret i32 %res 750} 751 752define i32 @test_f16_une_q(i32 %a, i32 %b, half %f1, half %f2) #0 { 753; SSE2-LABEL: test_f16_une_q: 754; SSE2: # %bb.0: 755; SSE2-NEXT: pushq %rbp 756; SSE2-NEXT: pushq %rbx 757; SSE2-NEXT: pushq %rax 758; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 759; SSE2-NEXT: movl %esi, %ebx 760; SSE2-NEXT: movl %edi, %ebp 761; SSE2-NEXT: movaps %xmm1, %xmm0 762; SSE2-NEXT: callq __extendhfsf2@PLT 763; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 764; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 765; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero 766; SSE2-NEXT: callq __extendhfsf2@PLT 767; SSE2-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload 768; SSE2-NEXT: cmovnel %ebp, %ebx 769; SSE2-NEXT: cmovpl %ebp, %ebx 770; SSE2-NEXT: movl %ebx, %eax 771; SSE2-NEXT: addq $8, %rsp 772; SSE2-NEXT: popq %rbx 773; SSE2-NEXT: popq %rbp 774; SSE2-NEXT: retq 775; 776; AVX-LABEL: test_f16_une_q: 777; AVX: # %bb.0: 778; AVX-NEXT: movl %esi, %eax 779; AVX-NEXT: vpextrw $0, %xmm0, %ecx 780; AVX-NEXT: vpextrw $0, %xmm1, %edx 781; AVX-NEXT: movzwl %dx, %edx 782; AVX-NEXT: vmovd %edx, %xmm0 783; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 784; AVX-NEXT: movzwl %cx, %ecx 785; AVX-NEXT: vmovd %ecx, %xmm1 786; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 787; AVX-NEXT: vucomiss %xmm0, %xmm1 788; AVX-NEXT: cmovnel %edi, %eax 789; AVX-NEXT: cmovpl %edi, %eax 790; AVX-NEXT: retq 791; 792; X86-FP16-LABEL: test_f16_une_q: 793; X86-FP16: # %bb.0: 794; X86-FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero 795; X86-FP16-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0 796; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %eax 797; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %ecx 798; X86-FP16-NEXT: cmovnel %eax, %ecx 799; X86-FP16-NEXT: cmovpl %eax, %ecx 800; X86-FP16-NEXT: movl (%ecx), %eax 801; X86-FP16-NEXT: retl 802; 803; X64-FP16-LABEL: test_f16_une_q: 804; X64-FP16: # %bb.0: 805; X64-FP16-NEXT: movl %esi, %eax 806; X64-FP16-NEXT: vucomish %xmm1, %xmm0 807; X64-FP16-NEXT: cmovnel %edi, %eax 808; X64-FP16-NEXT: cmovpl %edi, %eax 809; X64-FP16-NEXT: retq 810 %cond = call i1 @llvm.experimental.constrained.fcmp.f16( 811 half %f1, half %f2, metadata !"une", 812 metadata !"fpexcept.strict") #0 813 %res = select i1 %cond, i32 %a, i32 %b 814 ret i32 %res 815} 816 817define i32 @test_f16_uno_q(i32 %a, i32 %b, half %f1, half %f2) #0 { 818; SSE2-LABEL: test_f16_uno_q: 819; SSE2: # %bb.0: 820; SSE2-NEXT: pushq %rbp 821; SSE2-NEXT: pushq %rbx 822; SSE2-NEXT: pushq %rax 823; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 824; SSE2-NEXT: movl %esi, %ebx 825; SSE2-NEXT: movl %edi, %ebp 826; SSE2-NEXT: movaps %xmm1, %xmm0 827; SSE2-NEXT: callq __extendhfsf2@PLT 828; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 829; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 830; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero 831; SSE2-NEXT: callq __extendhfsf2@PLT 832; SSE2-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload 833; SSE2-NEXT: cmovnpl %ebx, %ebp 834; SSE2-NEXT: movl %ebp, %eax 835; SSE2-NEXT: addq $8, %rsp 836; SSE2-NEXT: popq %rbx 837; SSE2-NEXT: popq %rbp 838; SSE2-NEXT: retq 839; 840; AVX-LABEL: test_f16_uno_q: 841; AVX: # %bb.0: 842; AVX-NEXT: movl %edi, %eax 843; AVX-NEXT: vpextrw $0, %xmm0, %ecx 844; AVX-NEXT: vpextrw $0, %xmm1, %edx 845; AVX-NEXT: movzwl %dx, %edx 846; AVX-NEXT: vmovd %edx, %xmm0 847; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 848; AVX-NEXT: movzwl %cx, %ecx 849; AVX-NEXT: vmovd %ecx, %xmm1 850; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 851; AVX-NEXT: vucomiss %xmm0, %xmm1 852; AVX-NEXT: cmovnpl %esi, %eax 853; AVX-NEXT: retq 854; 855; X86-FP16-LABEL: test_f16_uno_q: 856; X86-FP16: # %bb.0: 857; X86-FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero 858; X86-FP16-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0 859; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %eax 860; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %ecx 861; X86-FP16-NEXT: cmovpl %eax, %ecx 862; X86-FP16-NEXT: movl (%ecx), %eax 863; X86-FP16-NEXT: retl 864; 865; X64-FP16-LABEL: test_f16_uno_q: 866; X64-FP16: # %bb.0: 867; X64-FP16-NEXT: movl %edi, %eax 868; X64-FP16-NEXT: vucomish %xmm1, %xmm0 869; X64-FP16-NEXT: cmovnpl %esi, %eax 870; X64-FP16-NEXT: retq 871 %cond = call i1 @llvm.experimental.constrained.fcmp.f16( 872 half %f1, half %f2, metadata !"uno", 873 metadata !"fpexcept.strict") #0 874 %res = select i1 %cond, i32 %a, i32 %b 875 ret i32 %res 876} 877 878define i32 @test_f16_oeq_s(i32 %a, i32 %b, half %f1, half %f2) #0 { 879; SSE2-LABEL: test_f16_oeq_s: 880; SSE2: # %bb.0: 881; SSE2-NEXT: pushq %rbp 882; SSE2-NEXT: pushq %rbx 883; SSE2-NEXT: pushq %rax 884; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 885; SSE2-NEXT: movl %esi, %ebx 886; SSE2-NEXT: movl %edi, %ebp 887; SSE2-NEXT: movaps %xmm1, %xmm0 888; SSE2-NEXT: callq __extendhfsf2@PLT 889; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 890; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 891; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero 892; SSE2-NEXT: callq __extendhfsf2@PLT 893; SSE2-NEXT: comiss (%rsp), %xmm0 # 4-byte Folded Reload 894; SSE2-NEXT: cmovnel %ebx, %ebp 895; SSE2-NEXT: cmovpl %ebx, %ebp 896; SSE2-NEXT: movl %ebp, %eax 897; SSE2-NEXT: addq $8, %rsp 898; SSE2-NEXT: popq %rbx 899; SSE2-NEXT: popq %rbp 900; SSE2-NEXT: retq 901; 902; AVX-LABEL: test_f16_oeq_s: 903; AVX: # %bb.0: 904; AVX-NEXT: movl %edi, %eax 905; AVX-NEXT: vpextrw $0, %xmm0, %ecx 906; AVX-NEXT: vpextrw $0, %xmm1, %edx 907; AVX-NEXT: movzwl %dx, %edx 908; AVX-NEXT: vmovd %edx, %xmm0 909; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 910; AVX-NEXT: movzwl %cx, %ecx 911; AVX-NEXT: vmovd %ecx, %xmm1 912; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 913; AVX-NEXT: vcomiss %xmm0, %xmm1 914; AVX-NEXT: cmovnel %esi, %eax 915; AVX-NEXT: cmovpl %esi, %eax 916; AVX-NEXT: retq 917; 918; X86-FP16-LABEL: test_f16_oeq_s: 919; X86-FP16: # %bb.0: 920; X86-FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero 921; X86-FP16-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0 922; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %eax 923; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %ecx 924; X86-FP16-NEXT: cmovnel %eax, %ecx 925; X86-FP16-NEXT: cmovpl %eax, %ecx 926; X86-FP16-NEXT: movl (%ecx), %eax 927; X86-FP16-NEXT: retl 928; 929; X64-FP16-LABEL: test_f16_oeq_s: 930; X64-FP16: # %bb.0: 931; X64-FP16-NEXT: movl %edi, %eax 932; X64-FP16-NEXT: vcomish %xmm1, %xmm0 933; X64-FP16-NEXT: cmovnel %esi, %eax 934; X64-FP16-NEXT: cmovpl %esi, %eax 935; X64-FP16-NEXT: retq 936 %cond = call i1 @llvm.experimental.constrained.fcmps.f16( 937 half %f1, half %f2, metadata !"oeq", 938 metadata !"fpexcept.strict") #0 939 %res = select i1 %cond, i32 %a, i32 %b 940 ret i32 %res 941} 942 943define i32 @test_f16_ogt_s(i32 %a, i32 %b, half %f1, half %f2) #0 { 944; SSE2-LABEL: test_f16_ogt_s: 945; SSE2: # %bb.0: 946; SSE2-NEXT: pushq %rbp 947; SSE2-NEXT: pushq %rbx 948; SSE2-NEXT: pushq %rax 949; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 950; SSE2-NEXT: movl %esi, %ebx 951; SSE2-NEXT: movl %edi, %ebp 952; SSE2-NEXT: movaps %xmm1, %xmm0 953; SSE2-NEXT: callq __extendhfsf2@PLT 954; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 955; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 956; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero 957; SSE2-NEXT: callq __extendhfsf2@PLT 958; SSE2-NEXT: comiss (%rsp), %xmm0 # 4-byte Folded Reload 959; SSE2-NEXT: cmovbel %ebx, %ebp 960; SSE2-NEXT: movl %ebp, %eax 961; SSE2-NEXT: addq $8, %rsp 962; SSE2-NEXT: popq %rbx 963; SSE2-NEXT: popq %rbp 964; SSE2-NEXT: retq 965; 966; AVX-LABEL: test_f16_ogt_s: 967; AVX: # %bb.0: 968; AVX-NEXT: movl %edi, %eax 969; AVX-NEXT: vpextrw $0, %xmm0, %ecx 970; AVX-NEXT: vpextrw $0, %xmm1, %edx 971; AVX-NEXT: movzwl %dx, %edx 972; AVX-NEXT: vmovd %edx, %xmm0 973; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 974; AVX-NEXT: movzwl %cx, %ecx 975; AVX-NEXT: vmovd %ecx, %xmm1 976; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 977; AVX-NEXT: vcomiss %xmm0, %xmm1 978; AVX-NEXT: cmovbel %esi, %eax 979; AVX-NEXT: retq 980; 981; X86-FP16-LABEL: test_f16_ogt_s: 982; X86-FP16: # %bb.0: 983; X86-FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero 984; X86-FP16-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0 985; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %eax 986; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %ecx 987; X86-FP16-NEXT: cmoval %eax, %ecx 988; X86-FP16-NEXT: movl (%ecx), %eax 989; X86-FP16-NEXT: retl 990; 991; X64-FP16-LABEL: test_f16_ogt_s: 992; X64-FP16: # %bb.0: 993; X64-FP16-NEXT: movl %edi, %eax 994; X64-FP16-NEXT: vcomish %xmm1, %xmm0 995; X64-FP16-NEXT: cmovbel %esi, %eax 996; X64-FP16-NEXT: retq 997 %cond = call i1 @llvm.experimental.constrained.fcmps.f16( 998 half %f1, half %f2, metadata !"ogt", 999 metadata !"fpexcept.strict") #0 1000 %res = select i1 %cond, i32 %a, i32 %b 1001 ret i32 %res 1002} 1003 1004define i32 @test_f16_oge_s(i32 %a, i32 %b, half %f1, half %f2) #0 { 1005; SSE2-LABEL: test_f16_oge_s: 1006; SSE2: # %bb.0: 1007; SSE2-NEXT: pushq %rbp 1008; SSE2-NEXT: pushq %rbx 1009; SSE2-NEXT: pushq %rax 1010; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1011; SSE2-NEXT: movl %esi, %ebx 1012; SSE2-NEXT: movl %edi, %ebp 1013; SSE2-NEXT: movaps %xmm1, %xmm0 1014; SSE2-NEXT: callq __extendhfsf2@PLT 1015; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 1016; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 1017; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero 1018; SSE2-NEXT: callq __extendhfsf2@PLT 1019; SSE2-NEXT: comiss (%rsp), %xmm0 # 4-byte Folded Reload 1020; SSE2-NEXT: cmovbl %ebx, %ebp 1021; SSE2-NEXT: movl %ebp, %eax 1022; SSE2-NEXT: addq $8, %rsp 1023; SSE2-NEXT: popq %rbx 1024; SSE2-NEXT: popq %rbp 1025; SSE2-NEXT: retq 1026; 1027; AVX-LABEL: test_f16_oge_s: 1028; AVX: # %bb.0: 1029; AVX-NEXT: movl %edi, %eax 1030; AVX-NEXT: vpextrw $0, %xmm0, %ecx 1031; AVX-NEXT: vpextrw $0, %xmm1, %edx 1032; AVX-NEXT: movzwl %dx, %edx 1033; AVX-NEXT: vmovd %edx, %xmm0 1034; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 1035; AVX-NEXT: movzwl %cx, %ecx 1036; AVX-NEXT: vmovd %ecx, %xmm1 1037; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 1038; AVX-NEXT: vcomiss %xmm0, %xmm1 1039; AVX-NEXT: cmovbl %esi, %eax 1040; AVX-NEXT: retq 1041; 1042; X86-FP16-LABEL: test_f16_oge_s: 1043; X86-FP16: # %bb.0: 1044; X86-FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero 1045; X86-FP16-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0 1046; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %eax 1047; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %ecx 1048; X86-FP16-NEXT: cmovael %eax, %ecx 1049; X86-FP16-NEXT: movl (%ecx), %eax 1050; X86-FP16-NEXT: retl 1051; 1052; X64-FP16-LABEL: test_f16_oge_s: 1053; X64-FP16: # %bb.0: 1054; X64-FP16-NEXT: movl %edi, %eax 1055; X64-FP16-NEXT: vcomish %xmm1, %xmm0 1056; X64-FP16-NEXT: cmovbl %esi, %eax 1057; X64-FP16-NEXT: retq 1058 %cond = call i1 @llvm.experimental.constrained.fcmps.f16( 1059 half %f1, half %f2, metadata !"oge", 1060 metadata !"fpexcept.strict") #0 1061 %res = select i1 %cond, i32 %a, i32 %b 1062 ret i32 %res 1063} 1064 1065define i32 @test_f16_olt_s(i32 %a, i32 %b, half %f1, half %f2) #0 { 1066; SSE2-LABEL: test_f16_olt_s: 1067; SSE2: # %bb.0: 1068; SSE2-NEXT: pushq %rbp 1069; SSE2-NEXT: pushq %rbx 1070; SSE2-NEXT: pushq %rax 1071; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1072; SSE2-NEXT: movl %esi, %ebx 1073; SSE2-NEXT: movl %edi, %ebp 1074; SSE2-NEXT: movaps %xmm1, %xmm0 1075; SSE2-NEXT: callq __extendhfsf2@PLT 1076; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 1077; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 1078; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero 1079; SSE2-NEXT: callq __extendhfsf2@PLT 1080; SSE2-NEXT: movss (%rsp), %xmm1 # 4-byte Reload 1081; SSE2-NEXT: # xmm1 = mem[0],zero,zero,zero 1082; SSE2-NEXT: comiss %xmm0, %xmm1 1083; SSE2-NEXT: cmovbel %ebx, %ebp 1084; SSE2-NEXT: movl %ebp, %eax 1085; SSE2-NEXT: addq $8, %rsp 1086; SSE2-NEXT: popq %rbx 1087; SSE2-NEXT: popq %rbp 1088; SSE2-NEXT: retq 1089; 1090; AVX-LABEL: test_f16_olt_s: 1091; AVX: # %bb.0: 1092; AVX-NEXT: movl %edi, %eax 1093; AVX-NEXT: vpextrw $0, %xmm1, %ecx 1094; AVX-NEXT: vpextrw $0, %xmm0, %edx 1095; AVX-NEXT: movzwl %dx, %edx 1096; AVX-NEXT: vmovd %edx, %xmm0 1097; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 1098; AVX-NEXT: movzwl %cx, %ecx 1099; AVX-NEXT: vmovd %ecx, %xmm1 1100; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 1101; AVX-NEXT: vcomiss %xmm0, %xmm1 1102; AVX-NEXT: cmovbel %esi, %eax 1103; AVX-NEXT: retq 1104; 1105; X86-FP16-LABEL: test_f16_olt_s: 1106; X86-FP16: # %bb.0: 1107; X86-FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero 1108; X86-FP16-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0 1109; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %eax 1110; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %ecx 1111; X86-FP16-NEXT: cmoval %eax, %ecx 1112; X86-FP16-NEXT: movl (%ecx), %eax 1113; X86-FP16-NEXT: retl 1114; 1115; X64-FP16-LABEL: test_f16_olt_s: 1116; X64-FP16: # %bb.0: 1117; X64-FP16-NEXT: movl %edi, %eax 1118; X64-FP16-NEXT: vcomish %xmm0, %xmm1 1119; X64-FP16-NEXT: cmovbel %esi, %eax 1120; X64-FP16-NEXT: retq 1121 %cond = call i1 @llvm.experimental.constrained.fcmps.f16( 1122 half %f1, half %f2, metadata !"olt", 1123 metadata !"fpexcept.strict") #0 1124 %res = select i1 %cond, i32 %a, i32 %b 1125 ret i32 %res 1126} 1127 1128define i32 @test_f16_ole_s(i32 %a, i32 %b, half %f1, half %f2) #0 { 1129; SSE2-LABEL: test_f16_ole_s: 1130; SSE2: # %bb.0: 1131; SSE2-NEXT: pushq %rbp 1132; SSE2-NEXT: pushq %rbx 1133; SSE2-NEXT: pushq %rax 1134; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1135; SSE2-NEXT: movl %esi, %ebx 1136; SSE2-NEXT: movl %edi, %ebp 1137; SSE2-NEXT: movaps %xmm1, %xmm0 1138; SSE2-NEXT: callq __extendhfsf2@PLT 1139; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 1140; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 1141; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero 1142; SSE2-NEXT: callq __extendhfsf2@PLT 1143; SSE2-NEXT: movss (%rsp), %xmm1 # 4-byte Reload 1144; SSE2-NEXT: # xmm1 = mem[0],zero,zero,zero 1145; SSE2-NEXT: comiss %xmm0, %xmm1 1146; SSE2-NEXT: cmovbl %ebx, %ebp 1147; SSE2-NEXT: movl %ebp, %eax 1148; SSE2-NEXT: addq $8, %rsp 1149; SSE2-NEXT: popq %rbx 1150; SSE2-NEXT: popq %rbp 1151; SSE2-NEXT: retq 1152; 1153; AVX-LABEL: test_f16_ole_s: 1154; AVX: # %bb.0: 1155; AVX-NEXT: movl %edi, %eax 1156; AVX-NEXT: vpextrw $0, %xmm1, %ecx 1157; AVX-NEXT: vpextrw $0, %xmm0, %edx 1158; AVX-NEXT: movzwl %dx, %edx 1159; AVX-NEXT: vmovd %edx, %xmm0 1160; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 1161; AVX-NEXT: movzwl %cx, %ecx 1162; AVX-NEXT: vmovd %ecx, %xmm1 1163; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 1164; AVX-NEXT: vcomiss %xmm0, %xmm1 1165; AVX-NEXT: cmovbl %esi, %eax 1166; AVX-NEXT: retq 1167; 1168; X86-FP16-LABEL: test_f16_ole_s: 1169; X86-FP16: # %bb.0: 1170; X86-FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero 1171; X86-FP16-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0 1172; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %eax 1173; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %ecx 1174; X86-FP16-NEXT: cmovael %eax, %ecx 1175; X86-FP16-NEXT: movl (%ecx), %eax 1176; X86-FP16-NEXT: retl 1177; 1178; X64-FP16-LABEL: test_f16_ole_s: 1179; X64-FP16: # %bb.0: 1180; X64-FP16-NEXT: movl %edi, %eax 1181; X64-FP16-NEXT: vcomish %xmm0, %xmm1 1182; X64-FP16-NEXT: cmovbl %esi, %eax 1183; X64-FP16-NEXT: retq 1184 %cond = call i1 @llvm.experimental.constrained.fcmps.f16( 1185 half %f1, half %f2, metadata !"ole", 1186 metadata !"fpexcept.strict") #0 1187 %res = select i1 %cond, i32 %a, i32 %b 1188 ret i32 %res 1189} 1190 1191define i32 @test_f16_one_s(i32 %a, i32 %b, half %f1, half %f2) #0 { 1192; SSE2-LABEL: test_f16_one_s: 1193; SSE2: # %bb.0: 1194; SSE2-NEXT: pushq %rbp 1195; SSE2-NEXT: pushq %rbx 1196; SSE2-NEXT: pushq %rax 1197; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1198; SSE2-NEXT: movl %esi, %ebx 1199; SSE2-NEXT: movl %edi, %ebp 1200; SSE2-NEXT: movaps %xmm1, %xmm0 1201; SSE2-NEXT: callq __extendhfsf2@PLT 1202; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 1203; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 1204; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero 1205; SSE2-NEXT: callq __extendhfsf2@PLT 1206; SSE2-NEXT: comiss (%rsp), %xmm0 # 4-byte Folded Reload 1207; SSE2-NEXT: cmovel %ebx, %ebp 1208; SSE2-NEXT: movl %ebp, %eax 1209; SSE2-NEXT: addq $8, %rsp 1210; SSE2-NEXT: popq %rbx 1211; SSE2-NEXT: popq %rbp 1212; SSE2-NEXT: retq 1213; 1214; AVX-LABEL: test_f16_one_s: 1215; AVX: # %bb.0: 1216; AVX-NEXT: movl %edi, %eax 1217; AVX-NEXT: vpextrw $0, %xmm0, %ecx 1218; AVX-NEXT: vpextrw $0, %xmm1, %edx 1219; AVX-NEXT: movzwl %dx, %edx 1220; AVX-NEXT: vmovd %edx, %xmm0 1221; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 1222; AVX-NEXT: movzwl %cx, %ecx 1223; AVX-NEXT: vmovd %ecx, %xmm1 1224; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 1225; AVX-NEXT: vcomiss %xmm0, %xmm1 1226; AVX-NEXT: cmovel %esi, %eax 1227; AVX-NEXT: retq 1228; 1229; X86-FP16-LABEL: test_f16_one_s: 1230; X86-FP16: # %bb.0: 1231; X86-FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero 1232; X86-FP16-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0 1233; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %eax 1234; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %ecx 1235; X86-FP16-NEXT: cmovnel %eax, %ecx 1236; X86-FP16-NEXT: movl (%ecx), %eax 1237; X86-FP16-NEXT: retl 1238; 1239; X64-FP16-LABEL: test_f16_one_s: 1240; X64-FP16: # %bb.0: 1241; X64-FP16-NEXT: movl %edi, %eax 1242; X64-FP16-NEXT: vcomish %xmm1, %xmm0 1243; X64-FP16-NEXT: cmovel %esi, %eax 1244; X64-FP16-NEXT: retq 1245 %cond = call i1 @llvm.experimental.constrained.fcmps.f16( 1246 half %f1, half %f2, metadata !"one", 1247 metadata !"fpexcept.strict") #0 1248 %res = select i1 %cond, i32 %a, i32 %b 1249 ret i32 %res 1250} 1251 1252define i32 @test_f16_ord_s(i32 %a, i32 %b, half %f1, half %f2) #0 { 1253; SSE2-LABEL: test_f16_ord_s: 1254; SSE2: # %bb.0: 1255; SSE2-NEXT: pushq %rbp 1256; SSE2-NEXT: pushq %rbx 1257; SSE2-NEXT: pushq %rax 1258; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1259; SSE2-NEXT: movl %esi, %ebx 1260; SSE2-NEXT: movl %edi, %ebp 1261; SSE2-NEXT: movaps %xmm1, %xmm0 1262; SSE2-NEXT: callq __extendhfsf2@PLT 1263; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 1264; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 1265; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero 1266; SSE2-NEXT: callq __extendhfsf2@PLT 1267; SSE2-NEXT: comiss (%rsp), %xmm0 # 4-byte Folded Reload 1268; SSE2-NEXT: cmovpl %ebx, %ebp 1269; SSE2-NEXT: movl %ebp, %eax 1270; SSE2-NEXT: addq $8, %rsp 1271; SSE2-NEXT: popq %rbx 1272; SSE2-NEXT: popq %rbp 1273; SSE2-NEXT: retq 1274; 1275; AVX-LABEL: test_f16_ord_s: 1276; AVX: # %bb.0: 1277; AVX-NEXT: movl %edi, %eax 1278; AVX-NEXT: vpextrw $0, %xmm0, %ecx 1279; AVX-NEXT: vpextrw $0, %xmm1, %edx 1280; AVX-NEXT: movzwl %dx, %edx 1281; AVX-NEXT: vmovd %edx, %xmm0 1282; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 1283; AVX-NEXT: movzwl %cx, %ecx 1284; AVX-NEXT: vmovd %ecx, %xmm1 1285; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 1286; AVX-NEXT: vcomiss %xmm0, %xmm1 1287; AVX-NEXT: cmovpl %esi, %eax 1288; AVX-NEXT: retq 1289; 1290; X86-FP16-LABEL: test_f16_ord_s: 1291; X86-FP16: # %bb.0: 1292; X86-FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero 1293; X86-FP16-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0 1294; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %eax 1295; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %ecx 1296; X86-FP16-NEXT: cmovnpl %eax, %ecx 1297; X86-FP16-NEXT: movl (%ecx), %eax 1298; X86-FP16-NEXT: retl 1299; 1300; X64-FP16-LABEL: test_f16_ord_s: 1301; X64-FP16: # %bb.0: 1302; X64-FP16-NEXT: movl %edi, %eax 1303; X64-FP16-NEXT: vcomish %xmm1, %xmm0 1304; X64-FP16-NEXT: cmovpl %esi, %eax 1305; X64-FP16-NEXT: retq 1306 %cond = call i1 @llvm.experimental.constrained.fcmps.f16( 1307 half %f1, half %f2, metadata !"ord", 1308 metadata !"fpexcept.strict") #0 1309 %res = select i1 %cond, i32 %a, i32 %b 1310 ret i32 %res 1311} 1312 1313define i32 @test_f16_ueq_s(i32 %a, i32 %b, half %f1, half %f2) #0 { 1314; SSE2-LABEL: test_f16_ueq_s: 1315; SSE2: # %bb.0: 1316; SSE2-NEXT: pushq %rbp 1317; SSE2-NEXT: pushq %rbx 1318; SSE2-NEXT: pushq %rax 1319; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1320; SSE2-NEXT: movl %esi, %ebx 1321; SSE2-NEXT: movl %edi, %ebp 1322; SSE2-NEXT: movaps %xmm1, %xmm0 1323; SSE2-NEXT: callq __extendhfsf2@PLT 1324; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 1325; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 1326; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero 1327; SSE2-NEXT: callq __extendhfsf2@PLT 1328; SSE2-NEXT: comiss (%rsp), %xmm0 # 4-byte Folded Reload 1329; SSE2-NEXT: cmovnel %ebx, %ebp 1330; SSE2-NEXT: movl %ebp, %eax 1331; SSE2-NEXT: addq $8, %rsp 1332; SSE2-NEXT: popq %rbx 1333; SSE2-NEXT: popq %rbp 1334; SSE2-NEXT: retq 1335; 1336; AVX-LABEL: test_f16_ueq_s: 1337; AVX: # %bb.0: 1338; AVX-NEXT: movl %edi, %eax 1339; AVX-NEXT: vpextrw $0, %xmm0, %ecx 1340; AVX-NEXT: vpextrw $0, %xmm1, %edx 1341; AVX-NEXT: movzwl %dx, %edx 1342; AVX-NEXT: vmovd %edx, %xmm0 1343; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 1344; AVX-NEXT: movzwl %cx, %ecx 1345; AVX-NEXT: vmovd %ecx, %xmm1 1346; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 1347; AVX-NEXT: vcomiss %xmm0, %xmm1 1348; AVX-NEXT: cmovnel %esi, %eax 1349; AVX-NEXT: retq 1350; 1351; X86-FP16-LABEL: test_f16_ueq_s: 1352; X86-FP16: # %bb.0: 1353; X86-FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero 1354; X86-FP16-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0 1355; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %eax 1356; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %ecx 1357; X86-FP16-NEXT: cmovel %eax, %ecx 1358; X86-FP16-NEXT: movl (%ecx), %eax 1359; X86-FP16-NEXT: retl 1360; 1361; X64-FP16-LABEL: test_f16_ueq_s: 1362; X64-FP16: # %bb.0: 1363; X64-FP16-NEXT: movl %edi, %eax 1364; X64-FP16-NEXT: vcomish %xmm1, %xmm0 1365; X64-FP16-NEXT: cmovnel %esi, %eax 1366; X64-FP16-NEXT: retq 1367 %cond = call i1 @llvm.experimental.constrained.fcmps.f16( 1368 half %f1, half %f2, metadata !"ueq", 1369 metadata !"fpexcept.strict") #0 1370 %res = select i1 %cond, i32 %a, i32 %b 1371 ret i32 %res 1372} 1373 1374define i32 @test_f16_ugt_s(i32 %a, i32 %b, half %f1, half %f2) #0 { 1375; SSE2-LABEL: test_f16_ugt_s: 1376; SSE2: # %bb.0: 1377; SSE2-NEXT: pushq %rbp 1378; SSE2-NEXT: pushq %rbx 1379; SSE2-NEXT: pushq %rax 1380; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1381; SSE2-NEXT: movl %esi, %ebx 1382; SSE2-NEXT: movl %edi, %ebp 1383; SSE2-NEXT: movaps %xmm1, %xmm0 1384; SSE2-NEXT: callq __extendhfsf2@PLT 1385; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 1386; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 1387; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero 1388; SSE2-NEXT: callq __extendhfsf2@PLT 1389; SSE2-NEXT: movss (%rsp), %xmm1 # 4-byte Reload 1390; SSE2-NEXT: # xmm1 = mem[0],zero,zero,zero 1391; SSE2-NEXT: comiss %xmm0, %xmm1 1392; SSE2-NEXT: cmovael %ebx, %ebp 1393; SSE2-NEXT: movl %ebp, %eax 1394; SSE2-NEXT: addq $8, %rsp 1395; SSE2-NEXT: popq %rbx 1396; SSE2-NEXT: popq %rbp 1397; SSE2-NEXT: retq 1398; 1399; AVX-LABEL: test_f16_ugt_s: 1400; AVX: # %bb.0: 1401; AVX-NEXT: movl %edi, %eax 1402; AVX-NEXT: vpextrw $0, %xmm1, %ecx 1403; AVX-NEXT: vpextrw $0, %xmm0, %edx 1404; AVX-NEXT: movzwl %dx, %edx 1405; AVX-NEXT: vmovd %edx, %xmm0 1406; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 1407; AVX-NEXT: movzwl %cx, %ecx 1408; AVX-NEXT: vmovd %ecx, %xmm1 1409; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 1410; AVX-NEXT: vcomiss %xmm0, %xmm1 1411; AVX-NEXT: cmovael %esi, %eax 1412; AVX-NEXT: retq 1413; 1414; X86-FP16-LABEL: test_f16_ugt_s: 1415; X86-FP16: # %bb.0: 1416; X86-FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero 1417; X86-FP16-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0 1418; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %eax 1419; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %ecx 1420; X86-FP16-NEXT: cmovbl %eax, %ecx 1421; X86-FP16-NEXT: movl (%ecx), %eax 1422; X86-FP16-NEXT: retl 1423; 1424; X64-FP16-LABEL: test_f16_ugt_s: 1425; X64-FP16: # %bb.0: 1426; X64-FP16-NEXT: movl %edi, %eax 1427; X64-FP16-NEXT: vcomish %xmm0, %xmm1 1428; X64-FP16-NEXT: cmovael %esi, %eax 1429; X64-FP16-NEXT: retq 1430 %cond = call i1 @llvm.experimental.constrained.fcmps.f16( 1431 half %f1, half %f2, metadata !"ugt", 1432 metadata !"fpexcept.strict") #0 1433 %res = select i1 %cond, i32 %a, i32 %b 1434 ret i32 %res 1435} 1436 1437define i32 @test_f16_uge_s(i32 %a, i32 %b, half %f1, half %f2) #0 { 1438; SSE2-LABEL: test_f16_uge_s: 1439; SSE2: # %bb.0: 1440; SSE2-NEXT: pushq %rbp 1441; SSE2-NEXT: pushq %rbx 1442; SSE2-NEXT: pushq %rax 1443; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1444; SSE2-NEXT: movl %esi, %ebx 1445; SSE2-NEXT: movl %edi, %ebp 1446; SSE2-NEXT: movaps %xmm1, %xmm0 1447; SSE2-NEXT: callq __extendhfsf2@PLT 1448; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 1449; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 1450; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero 1451; SSE2-NEXT: callq __extendhfsf2@PLT 1452; SSE2-NEXT: movss (%rsp), %xmm1 # 4-byte Reload 1453; SSE2-NEXT: # xmm1 = mem[0],zero,zero,zero 1454; SSE2-NEXT: comiss %xmm0, %xmm1 1455; SSE2-NEXT: cmoval %ebx, %ebp 1456; SSE2-NEXT: movl %ebp, %eax 1457; SSE2-NEXT: addq $8, %rsp 1458; SSE2-NEXT: popq %rbx 1459; SSE2-NEXT: popq %rbp 1460; SSE2-NEXT: retq 1461; 1462; AVX-LABEL: test_f16_uge_s: 1463; AVX: # %bb.0: 1464; AVX-NEXT: movl %edi, %eax 1465; AVX-NEXT: vpextrw $0, %xmm1, %ecx 1466; AVX-NEXT: vpextrw $0, %xmm0, %edx 1467; AVX-NEXT: movzwl %dx, %edx 1468; AVX-NEXT: vmovd %edx, %xmm0 1469; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 1470; AVX-NEXT: movzwl %cx, %ecx 1471; AVX-NEXT: vmovd %ecx, %xmm1 1472; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 1473; AVX-NEXT: vcomiss %xmm0, %xmm1 1474; AVX-NEXT: cmoval %esi, %eax 1475; AVX-NEXT: retq 1476; 1477; X86-FP16-LABEL: test_f16_uge_s: 1478; X86-FP16: # %bb.0: 1479; X86-FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero 1480; X86-FP16-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0 1481; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %eax 1482; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %ecx 1483; X86-FP16-NEXT: cmovbel %eax, %ecx 1484; X86-FP16-NEXT: movl (%ecx), %eax 1485; X86-FP16-NEXT: retl 1486; 1487; X64-FP16-LABEL: test_f16_uge_s: 1488; X64-FP16: # %bb.0: 1489; X64-FP16-NEXT: movl %edi, %eax 1490; X64-FP16-NEXT: vcomish %xmm0, %xmm1 1491; X64-FP16-NEXT: cmoval %esi, %eax 1492; X64-FP16-NEXT: retq 1493 %cond = call i1 @llvm.experimental.constrained.fcmps.f16( 1494 half %f1, half %f2, metadata !"uge", 1495 metadata !"fpexcept.strict") #0 1496 %res = select i1 %cond, i32 %a, i32 %b 1497 ret i32 %res 1498} 1499 1500define i32 @test_f16_ult_s(i32 %a, i32 %b, half %f1, half %f2) #0 { 1501; SSE2-LABEL: test_f16_ult_s: 1502; SSE2: # %bb.0: 1503; SSE2-NEXT: pushq %rbp 1504; SSE2-NEXT: pushq %rbx 1505; SSE2-NEXT: pushq %rax 1506; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1507; SSE2-NEXT: movl %esi, %ebx 1508; SSE2-NEXT: movl %edi, %ebp 1509; SSE2-NEXT: movaps %xmm1, %xmm0 1510; SSE2-NEXT: callq __extendhfsf2@PLT 1511; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 1512; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 1513; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero 1514; SSE2-NEXT: callq __extendhfsf2@PLT 1515; SSE2-NEXT: comiss (%rsp), %xmm0 # 4-byte Folded Reload 1516; SSE2-NEXT: cmovael %ebx, %ebp 1517; SSE2-NEXT: movl %ebp, %eax 1518; SSE2-NEXT: addq $8, %rsp 1519; SSE2-NEXT: popq %rbx 1520; SSE2-NEXT: popq %rbp 1521; SSE2-NEXT: retq 1522; 1523; AVX-LABEL: test_f16_ult_s: 1524; AVX: # %bb.0: 1525; AVX-NEXT: movl %edi, %eax 1526; AVX-NEXT: vpextrw $0, %xmm0, %ecx 1527; AVX-NEXT: vpextrw $0, %xmm1, %edx 1528; AVX-NEXT: movzwl %dx, %edx 1529; AVX-NEXT: vmovd %edx, %xmm0 1530; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 1531; AVX-NEXT: movzwl %cx, %ecx 1532; AVX-NEXT: vmovd %ecx, %xmm1 1533; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 1534; AVX-NEXT: vcomiss %xmm0, %xmm1 1535; AVX-NEXT: cmovael %esi, %eax 1536; AVX-NEXT: retq 1537; 1538; X86-FP16-LABEL: test_f16_ult_s: 1539; X86-FP16: # %bb.0: 1540; X86-FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero 1541; X86-FP16-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0 1542; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %eax 1543; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %ecx 1544; X86-FP16-NEXT: cmovbl %eax, %ecx 1545; X86-FP16-NEXT: movl (%ecx), %eax 1546; X86-FP16-NEXT: retl 1547; 1548; X64-FP16-LABEL: test_f16_ult_s: 1549; X64-FP16: # %bb.0: 1550; X64-FP16-NEXT: movl %edi, %eax 1551; X64-FP16-NEXT: vcomish %xmm1, %xmm0 1552; X64-FP16-NEXT: cmovael %esi, %eax 1553; X64-FP16-NEXT: retq 1554 %cond = call i1 @llvm.experimental.constrained.fcmps.f16( 1555 half %f1, half %f2, metadata !"ult", 1556 metadata !"fpexcept.strict") #0 1557 %res = select i1 %cond, i32 %a, i32 %b 1558 ret i32 %res 1559} 1560 1561define i32 @test_f16_ule_s(i32 %a, i32 %b, half %f1, half %f2) #0 { 1562; SSE2-LABEL: test_f16_ule_s: 1563; SSE2: # %bb.0: 1564; SSE2-NEXT: pushq %rbp 1565; SSE2-NEXT: pushq %rbx 1566; SSE2-NEXT: pushq %rax 1567; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1568; SSE2-NEXT: movl %esi, %ebx 1569; SSE2-NEXT: movl %edi, %ebp 1570; SSE2-NEXT: movaps %xmm1, %xmm0 1571; SSE2-NEXT: callq __extendhfsf2@PLT 1572; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 1573; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 1574; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero 1575; SSE2-NEXT: callq __extendhfsf2@PLT 1576; SSE2-NEXT: comiss (%rsp), %xmm0 # 4-byte Folded Reload 1577; SSE2-NEXT: cmoval %ebx, %ebp 1578; SSE2-NEXT: movl %ebp, %eax 1579; SSE2-NEXT: addq $8, %rsp 1580; SSE2-NEXT: popq %rbx 1581; SSE2-NEXT: popq %rbp 1582; SSE2-NEXT: retq 1583; 1584; AVX-LABEL: test_f16_ule_s: 1585; AVX: # %bb.0: 1586; AVX-NEXT: movl %edi, %eax 1587; AVX-NEXT: vpextrw $0, %xmm0, %ecx 1588; AVX-NEXT: vpextrw $0, %xmm1, %edx 1589; AVX-NEXT: movzwl %dx, %edx 1590; AVX-NEXT: vmovd %edx, %xmm0 1591; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 1592; AVX-NEXT: movzwl %cx, %ecx 1593; AVX-NEXT: vmovd %ecx, %xmm1 1594; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 1595; AVX-NEXT: vcomiss %xmm0, %xmm1 1596; AVX-NEXT: cmoval %esi, %eax 1597; AVX-NEXT: retq 1598; 1599; X86-FP16-LABEL: test_f16_ule_s: 1600; X86-FP16: # %bb.0: 1601; X86-FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero 1602; X86-FP16-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0 1603; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %eax 1604; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %ecx 1605; X86-FP16-NEXT: cmovbel %eax, %ecx 1606; X86-FP16-NEXT: movl (%ecx), %eax 1607; X86-FP16-NEXT: retl 1608; 1609; X64-FP16-LABEL: test_f16_ule_s: 1610; X64-FP16: # %bb.0: 1611; X64-FP16-NEXT: movl %edi, %eax 1612; X64-FP16-NEXT: vcomish %xmm1, %xmm0 1613; X64-FP16-NEXT: cmoval %esi, %eax 1614; X64-FP16-NEXT: retq 1615 %cond = call i1 @llvm.experimental.constrained.fcmps.f16( 1616 half %f1, half %f2, metadata !"ule", 1617 metadata !"fpexcept.strict") #0 1618 %res = select i1 %cond, i32 %a, i32 %b 1619 ret i32 %res 1620} 1621 1622define i32 @test_f16_une_s(i32 %a, i32 %b, half %f1, half %f2) #0 { 1623; SSE2-LABEL: test_f16_une_s: 1624; SSE2: # %bb.0: 1625; SSE2-NEXT: pushq %rbp 1626; SSE2-NEXT: pushq %rbx 1627; SSE2-NEXT: pushq %rax 1628; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1629; SSE2-NEXT: movl %esi, %ebx 1630; SSE2-NEXT: movl %edi, %ebp 1631; SSE2-NEXT: movaps %xmm1, %xmm0 1632; SSE2-NEXT: callq __extendhfsf2@PLT 1633; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 1634; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 1635; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero 1636; SSE2-NEXT: callq __extendhfsf2@PLT 1637; SSE2-NEXT: comiss (%rsp), %xmm0 # 4-byte Folded Reload 1638; SSE2-NEXT: cmovnel %ebp, %ebx 1639; SSE2-NEXT: cmovpl %ebp, %ebx 1640; SSE2-NEXT: movl %ebx, %eax 1641; SSE2-NEXT: addq $8, %rsp 1642; SSE2-NEXT: popq %rbx 1643; SSE2-NEXT: popq %rbp 1644; SSE2-NEXT: retq 1645; 1646; AVX-LABEL: test_f16_une_s: 1647; AVX: # %bb.0: 1648; AVX-NEXT: movl %esi, %eax 1649; AVX-NEXT: vpextrw $0, %xmm0, %ecx 1650; AVX-NEXT: vpextrw $0, %xmm1, %edx 1651; AVX-NEXT: movzwl %dx, %edx 1652; AVX-NEXT: vmovd %edx, %xmm0 1653; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 1654; AVX-NEXT: movzwl %cx, %ecx 1655; AVX-NEXT: vmovd %ecx, %xmm1 1656; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 1657; AVX-NEXT: vcomiss %xmm0, %xmm1 1658; AVX-NEXT: cmovnel %edi, %eax 1659; AVX-NEXT: cmovpl %edi, %eax 1660; AVX-NEXT: retq 1661; 1662; X86-FP16-LABEL: test_f16_une_s: 1663; X86-FP16: # %bb.0: 1664; X86-FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero 1665; X86-FP16-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0 1666; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %eax 1667; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %ecx 1668; X86-FP16-NEXT: cmovnel %eax, %ecx 1669; X86-FP16-NEXT: cmovpl %eax, %ecx 1670; X86-FP16-NEXT: movl (%ecx), %eax 1671; X86-FP16-NEXT: retl 1672; 1673; X64-FP16-LABEL: test_f16_une_s: 1674; X64-FP16: # %bb.0: 1675; X64-FP16-NEXT: movl %esi, %eax 1676; X64-FP16-NEXT: vcomish %xmm1, %xmm0 1677; X64-FP16-NEXT: cmovnel %edi, %eax 1678; X64-FP16-NEXT: cmovpl %edi, %eax 1679; X64-FP16-NEXT: retq 1680 %cond = call i1 @llvm.experimental.constrained.fcmps.f16( 1681 half %f1, half %f2, metadata !"une", 1682 metadata !"fpexcept.strict") #0 1683 %res = select i1 %cond, i32 %a, i32 %b 1684 ret i32 %res 1685} 1686 1687define i32 @test_f16_uno_s(i32 %a, i32 %b, half %f1, half %f2) #0 { 1688; SSE2-LABEL: test_f16_uno_s: 1689; SSE2: # %bb.0: 1690; SSE2-NEXT: pushq %rbp 1691; SSE2-NEXT: pushq %rbx 1692; SSE2-NEXT: pushq %rax 1693; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1694; SSE2-NEXT: movl %esi, %ebx 1695; SSE2-NEXT: movl %edi, %ebp 1696; SSE2-NEXT: movaps %xmm1, %xmm0 1697; SSE2-NEXT: callq __extendhfsf2@PLT 1698; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 1699; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 1700; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero 1701; SSE2-NEXT: callq __extendhfsf2@PLT 1702; SSE2-NEXT: comiss (%rsp), %xmm0 # 4-byte Folded Reload 1703; SSE2-NEXT: cmovnpl %ebx, %ebp 1704; SSE2-NEXT: movl %ebp, %eax 1705; SSE2-NEXT: addq $8, %rsp 1706; SSE2-NEXT: popq %rbx 1707; SSE2-NEXT: popq %rbp 1708; SSE2-NEXT: retq 1709; 1710; AVX-LABEL: test_f16_uno_s: 1711; AVX: # %bb.0: 1712; AVX-NEXT: movl %edi, %eax 1713; AVX-NEXT: vpextrw $0, %xmm0, %ecx 1714; AVX-NEXT: vpextrw $0, %xmm1, %edx 1715; AVX-NEXT: movzwl %dx, %edx 1716; AVX-NEXT: vmovd %edx, %xmm0 1717; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 1718; AVX-NEXT: movzwl %cx, %ecx 1719; AVX-NEXT: vmovd %ecx, %xmm1 1720; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 1721; AVX-NEXT: vcomiss %xmm0, %xmm1 1722; AVX-NEXT: cmovnpl %esi, %eax 1723; AVX-NEXT: retq 1724; 1725; X86-FP16-LABEL: test_f16_uno_s: 1726; X86-FP16: # %bb.0: 1727; X86-FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero 1728; X86-FP16-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0 1729; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %eax 1730; X86-FP16-NEXT: leal {{[0-9]+}}(%esp), %ecx 1731; X86-FP16-NEXT: cmovpl %eax, %ecx 1732; X86-FP16-NEXT: movl (%ecx), %eax 1733; X86-FP16-NEXT: retl 1734; 1735; X64-FP16-LABEL: test_f16_uno_s: 1736; X64-FP16: # %bb.0: 1737; X64-FP16-NEXT: movl %edi, %eax 1738; X64-FP16-NEXT: vcomish %xmm1, %xmm0 1739; X64-FP16-NEXT: cmovnpl %esi, %eax 1740; X64-FP16-NEXT: retq 1741 %cond = call i1 @llvm.experimental.constrained.fcmps.f16( 1742 half %f1, half %f2, metadata !"uno", 1743 metadata !"fpexcept.strict") #0 1744 %res = select i1 %cond, i32 %a, i32 %b 1745 ret i32 %res 1746} 1747 1748define void @foo(half %0, half %1) #0 { 1749; SSE2-LABEL: foo: 1750; SSE2: # %bb.0: 1751; SSE2-NEXT: pushq %rax 1752; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1753; SSE2-NEXT: movaps %xmm1, %xmm0 1754; SSE2-NEXT: callq __extendhfsf2@PLT 1755; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 1756; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 1757; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero 1758; SSE2-NEXT: callq __extendhfsf2@PLT 1759; SSE2-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload 1760; SSE2-NEXT: jbe .LBB28_1 1761; SSE2-NEXT: # %bb.2: 1762; SSE2-NEXT: popq %rax 1763; SSE2-NEXT: jmp bar@PLT # TAILCALL 1764; SSE2-NEXT: .LBB28_1: 1765; SSE2-NEXT: popq %rax 1766; SSE2-NEXT: retq 1767; 1768; AVX-LABEL: foo: 1769; AVX: # %bb.0: 1770; AVX-NEXT: vpextrw $0, %xmm0, %eax 1771; AVX-NEXT: vpextrw $0, %xmm1, %ecx 1772; AVX-NEXT: movzwl %cx, %ecx 1773; AVX-NEXT: vmovd %ecx, %xmm0 1774; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 1775; AVX-NEXT: movzwl %ax, %eax 1776; AVX-NEXT: vmovd %eax, %xmm1 1777; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 1778; AVX-NEXT: vucomiss %xmm0, %xmm1 1779; AVX-NEXT: ja bar@PLT # TAILCALL 1780; AVX-NEXT: # %bb.1: 1781; AVX-NEXT: retq 1782; 1783; X86-FP16-LABEL: foo: 1784; X86-FP16: # %bb.0: 1785; X86-FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero 1786; X86-FP16-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0 1787; X86-FP16-NEXT: ja bar@PLT # TAILCALL 1788; X86-FP16-NEXT: # %bb.1: 1789; X86-FP16-NEXT: retl 1790; 1791; X64-FP16-LABEL: foo: 1792; X64-FP16: # %bb.0: 1793; X64-FP16-NEXT: vucomish %xmm1, %xmm0 1794; X64-FP16-NEXT: ja bar@PLT # TAILCALL 1795; X64-FP16-NEXT: # %bb.1: 1796; X64-FP16-NEXT: retq 1797 %3 = call i1 @llvm.experimental.constrained.fcmp.f16( half %0, half %1, metadata !"ogt", metadata !"fpexcept.strict") #0 1798 br i1 %3, label %4, label %5 1799 18004: ; preds = %2 1801 tail call void @bar() #0 1802 br label %5 1803 18045: ; preds = %4, %2 1805 ret void 1806} 1807declare void @bar() 1808 1809attributes #0 = { strictfp nounwind } 1810 1811declare i1 @llvm.experimental.constrained.fcmp.f16(half, half, metadata, metadata) 1812declare i1 @llvm.experimental.constrained.fcmps.f16(half, half, metadata, metadata) 1813