1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+f16c -O3 | FileCheck %s --check-prefixes=AVX,F16C 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=AVX,AVX512 5; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X86 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X64 7 8declare i1 @llvm.experimental.constrained.fptosi.i1.f16(half, metadata) 9declare i8 @llvm.experimental.constrained.fptosi.i8.f16(half, metadata) 10declare i16 @llvm.experimental.constrained.fptosi.i16.f16(half, metadata) 11declare i32 @llvm.experimental.constrained.fptosi.i32.f16(half, metadata) 12declare i64 @llvm.experimental.constrained.fptosi.i64.f16(half, metadata) 13declare i1 @llvm.experimental.constrained.fptoui.i1.f16(half, metadata) 14declare i8 @llvm.experimental.constrained.fptoui.i8.f16(half, metadata) 15declare i16 @llvm.experimental.constrained.fptoui.i16.f16(half, metadata) 16declare i32 @llvm.experimental.constrained.fptoui.i32.f16(half, metadata) 17declare i64 @llvm.experimental.constrained.fptoui.i64.f16(half, metadata) 18 19define i1 @fptosi_f16toi1(half %x) #0 { 20; SSE2-LABEL: fptosi_f16toi1: 21; SSE2: # %bb.0: 22; SSE2-NEXT: pushq %rax 23; SSE2-NEXT: callq __extendhfsf2@PLT 24; SSE2-NEXT: cvttss2si %xmm0, %eax 25; SSE2-NEXT: # kill: def $al killed $al killed $eax 26; SSE2-NEXT: popq %rcx 27; SSE2-NEXT: retq 28; 29; AVX-LABEL: fptosi_f16toi1: 30; AVX: # %bb.0: 31; AVX-NEXT: vpextrw $0, %xmm0, %eax 32; AVX-NEXT: movzwl %ax, %eax 33; AVX-NEXT: vmovd %eax, %xmm0 34; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 35; AVX-NEXT: vcvttss2si %xmm0, %eax 36; AVX-NEXT: # kill: def $al killed $al killed $eax 37; AVX-NEXT: retq 38; 39; X86-LABEL: fptosi_f16toi1: 40; X86: # %bb.0: 41; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax 42; X86-NEXT: # kill: def $al killed $al killed $eax 43; X86-NEXT: retl 44; 45; X64-LABEL: fptosi_f16toi1: 46; X64: # %bb.0: 47; X64-NEXT: vcvttsh2si %xmm0, %eax 48; X64-NEXT: # kill: def $al killed $al killed $eax 49; X64-NEXT: retq 50 %result = call i1 @llvm.experimental.constrained.fptosi.i1.f16(half %x, 51 metadata !"fpexcept.strict") #0 52 ret i1 %result 53} 54 55define i8 @fptosi_f16toi8(half %x) #0 { 56; SSE2-LABEL: fptosi_f16toi8: 57; SSE2: # %bb.0: 58; SSE2-NEXT: pushq %rax 59; SSE2-NEXT: callq __extendhfsf2@PLT 60; SSE2-NEXT: cvttss2si %xmm0, %eax 61; SSE2-NEXT: # kill: def $al killed $al killed $eax 62; SSE2-NEXT: popq %rcx 63; SSE2-NEXT: retq 64; 65; AVX-LABEL: fptosi_f16toi8: 66; AVX: # %bb.0: 67; AVX-NEXT: vpextrw $0, %xmm0, %eax 68; AVX-NEXT: movzwl %ax, %eax 69; AVX-NEXT: vmovd %eax, %xmm0 70; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 71; AVX-NEXT: vcvttss2si %xmm0, %eax 72; AVX-NEXT: # kill: def $al killed $al killed $eax 73; AVX-NEXT: retq 74; 75; X86-LABEL: fptosi_f16toi8: 76; X86: # %bb.0: 77; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax 78; X86-NEXT: # kill: def $al killed $al killed $eax 79; X86-NEXT: retl 80; 81; X64-LABEL: fptosi_f16toi8: 82; X64: # %bb.0: 83; X64-NEXT: vcvttsh2si %xmm0, %eax 84; X64-NEXT: # kill: def $al killed $al killed $eax 85; X64-NEXT: retq 86 %result = call i8 @llvm.experimental.constrained.fptosi.i8.f16(half %x, 87 metadata !"fpexcept.strict") #0 88 ret i8 %result 89} 90 91define i16 @fptosi_f16toi16(half %x) #0 { 92; SSE2-LABEL: fptosi_f16toi16: 93; SSE2: # %bb.0: 94; SSE2-NEXT: pushq %rax 95; SSE2-NEXT: callq __extendhfsf2@PLT 96; SSE2-NEXT: cvttss2si %xmm0, %eax 97; SSE2-NEXT: # kill: def $ax killed $ax killed $eax 98; SSE2-NEXT: popq %rcx 99; SSE2-NEXT: retq 100; 101; AVX-LABEL: fptosi_f16toi16: 102; AVX: # %bb.0: 103; AVX-NEXT: vpextrw $0, %xmm0, %eax 104; AVX-NEXT: movzwl %ax, %eax 105; AVX-NEXT: vmovd %eax, %xmm0 106; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 107; AVX-NEXT: vcvttss2si %xmm0, %eax 108; AVX-NEXT: # kill: def $ax killed $ax killed $eax 109; AVX-NEXT: retq 110; 111; X86-LABEL: fptosi_f16toi16: 112; X86: # %bb.0: 113; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax 114; X86-NEXT: # kill: def $ax killed $ax killed $eax 115; X86-NEXT: retl 116; 117; X64-LABEL: fptosi_f16toi16: 118; X64: # %bb.0: 119; X64-NEXT: vcvttsh2si %xmm0, %eax 120; X64-NEXT: # kill: def $ax killed $ax killed $eax 121; X64-NEXT: retq 122 %result = call i16 @llvm.experimental.constrained.fptosi.i16.f16(half %x, 123 metadata !"fpexcept.strict") #0 124 ret i16 %result 125} 126 127define i32 @fptosi_f16toi32(half %x) #0 { 128; SSE2-LABEL: fptosi_f16toi32: 129; SSE2: # %bb.0: 130; SSE2-NEXT: pushq %rax 131; SSE2-NEXT: callq __extendhfsf2@PLT 132; SSE2-NEXT: cvttss2si %xmm0, %eax 133; SSE2-NEXT: popq %rcx 134; SSE2-NEXT: retq 135; 136; AVX-LABEL: fptosi_f16toi32: 137; AVX: # %bb.0: 138; AVX-NEXT: vpextrw $0, %xmm0, %eax 139; AVX-NEXT: movzwl %ax, %eax 140; AVX-NEXT: vmovd %eax, %xmm0 141; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 142; AVX-NEXT: vcvttss2si %xmm0, %eax 143; AVX-NEXT: retq 144; 145; X86-LABEL: fptosi_f16toi32: 146; X86: # %bb.0: 147; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax 148; X86-NEXT: retl 149; 150; X64-LABEL: fptosi_f16toi32: 151; X64: # %bb.0: 152; X64-NEXT: vcvttsh2si %xmm0, %eax 153; X64-NEXT: retq 154 %result = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %x, 155 metadata !"fpexcept.strict") #0 156 ret i32 %result 157} 158 159define i64 @fptosi_f16toi64(half %x) #0 { 160; SSE2-LABEL: fptosi_f16toi64: 161; SSE2: # %bb.0: 162; SSE2-NEXT: pushq %rax 163; SSE2-NEXT: callq __extendhfsf2@PLT 164; SSE2-NEXT: cvttss2si %xmm0, %rax 165; SSE2-NEXT: popq %rcx 166; SSE2-NEXT: retq 167; 168; AVX-LABEL: fptosi_f16toi64: 169; AVX: # %bb.0: 170; AVX-NEXT: vpextrw $0, %xmm0, %eax 171; AVX-NEXT: movzwl %ax, %eax 172; AVX-NEXT: vmovd %eax, %xmm0 173; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 174; AVX-NEXT: vcvttss2si %xmm0, %rax 175; AVX-NEXT: retq 176; 177; X86-LABEL: fptosi_f16toi64: 178; X86: # %bb.0: 179; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 180; X86-NEXT: vcvttph2qq %xmm0, %xmm0 181; X86-NEXT: vmovd %xmm0, %eax 182; X86-NEXT: vpextrd $1, %xmm0, %edx 183; X86-NEXT: retl 184; 185; X64-LABEL: fptosi_f16toi64: 186; X64: # %bb.0: 187; X64-NEXT: vcvttsh2si %xmm0, %rax 188; X64-NEXT: retq 189 %result = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %x, 190 metadata !"fpexcept.strict") #0 191 ret i64 %result 192} 193 194define i1 @fptoui_f16toi1(half %x) #0 { 195; SSE2-LABEL: fptoui_f16toi1: 196; SSE2: # %bb.0: 197; SSE2-NEXT: pushq %rax 198; SSE2-NEXT: callq __extendhfsf2@PLT 199; SSE2-NEXT: cvttss2si %xmm0, %eax 200; SSE2-NEXT: # kill: def $al killed $al killed $eax 201; SSE2-NEXT: popq %rcx 202; SSE2-NEXT: retq 203; 204; AVX-LABEL: fptoui_f16toi1: 205; AVX: # %bb.0: 206; AVX-NEXT: vpextrw $0, %xmm0, %eax 207; AVX-NEXT: movzwl %ax, %eax 208; AVX-NEXT: vmovd %eax, %xmm0 209; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 210; AVX-NEXT: vcvttss2si %xmm0, %eax 211; AVX-NEXT: # kill: def $al killed $al killed $eax 212; AVX-NEXT: retq 213; 214; X86-LABEL: fptoui_f16toi1: 215; X86: # %bb.0: 216; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax 217; X86-NEXT: # kill: def $al killed $al killed $eax 218; X86-NEXT: retl 219; 220; X64-LABEL: fptoui_f16toi1: 221; X64: # %bb.0: 222; X64-NEXT: vcvttsh2si %xmm0, %eax 223; X64-NEXT: # kill: def $al killed $al killed $eax 224; X64-NEXT: retq 225 %result = call i1 @llvm.experimental.constrained.fptoui.i1.f16(half %x, 226 metadata !"fpexcept.strict") #0 227 ret i1 %result 228} 229 230define i8 @fptoui_f16toi8(half %x) #0 { 231; SSE2-LABEL: fptoui_f16toi8: 232; SSE2: # %bb.0: 233; SSE2-NEXT: pushq %rax 234; SSE2-NEXT: callq __extendhfsf2@PLT 235; SSE2-NEXT: cvttss2si %xmm0, %eax 236; SSE2-NEXT: # kill: def $al killed $al killed $eax 237; SSE2-NEXT: popq %rcx 238; SSE2-NEXT: retq 239; 240; AVX-LABEL: fptoui_f16toi8: 241; AVX: # %bb.0: 242; AVX-NEXT: vpextrw $0, %xmm0, %eax 243; AVX-NEXT: movzwl %ax, %eax 244; AVX-NEXT: vmovd %eax, %xmm0 245; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 246; AVX-NEXT: vcvttss2si %xmm0, %eax 247; AVX-NEXT: # kill: def $al killed $al killed $eax 248; AVX-NEXT: retq 249; 250; X86-LABEL: fptoui_f16toi8: 251; X86: # %bb.0: 252; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax 253; X86-NEXT: # kill: def $al killed $al killed $eax 254; X86-NEXT: retl 255; 256; X64-LABEL: fptoui_f16toi8: 257; X64: # %bb.0: 258; X64-NEXT: vcvttsh2si %xmm0, %eax 259; X64-NEXT: # kill: def $al killed $al killed $eax 260; X64-NEXT: retq 261 %result = call i8 @llvm.experimental.constrained.fptoui.i8.f16(half %x, 262 metadata !"fpexcept.strict") #0 263 ret i8 %result 264} 265 266define i16 @fptoui_f16toi16(half %x) #0 { 267; SSE2-LABEL: fptoui_f16toi16: 268; SSE2: # %bb.0: 269; SSE2-NEXT: pushq %rax 270; SSE2-NEXT: callq __extendhfsf2@PLT 271; SSE2-NEXT: cvttss2si %xmm0, %eax 272; SSE2-NEXT: # kill: def $ax killed $ax killed $eax 273; SSE2-NEXT: popq %rcx 274; SSE2-NEXT: retq 275; 276; AVX-LABEL: fptoui_f16toi16: 277; AVX: # %bb.0: 278; AVX-NEXT: vpextrw $0, %xmm0, %eax 279; AVX-NEXT: movzwl %ax, %eax 280; AVX-NEXT: vmovd %eax, %xmm0 281; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 282; AVX-NEXT: vcvttss2si %xmm0, %eax 283; AVX-NEXT: # kill: def $ax killed $ax killed $eax 284; AVX-NEXT: retq 285; 286; X86-LABEL: fptoui_f16toi16: 287; X86: # %bb.0: 288; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax 289; X86-NEXT: # kill: def $ax killed $ax killed $eax 290; X86-NEXT: retl 291; 292; X64-LABEL: fptoui_f16toi16: 293; X64: # %bb.0: 294; X64-NEXT: vcvttsh2si %xmm0, %eax 295; X64-NEXT: # kill: def $ax killed $ax killed $eax 296; X64-NEXT: retq 297 %result = call i16 @llvm.experimental.constrained.fptoui.i16.f16(half %x, 298 metadata !"fpexcept.strict") #0 299 ret i16 %result 300} 301 302define i32 @fptoui_f16toi32(half %x) #0 { 303; SSE2-LABEL: fptoui_f16toi32: 304; SSE2: # %bb.0: 305; SSE2-NEXT: pushq %rax 306; SSE2-NEXT: callq __extendhfsf2@PLT 307; SSE2-NEXT: cvttss2si %xmm0, %rax 308; SSE2-NEXT: # kill: def $eax killed $eax killed $rax 309; SSE2-NEXT: popq %rcx 310; SSE2-NEXT: retq 311; 312; F16C-LABEL: fptoui_f16toi32: 313; F16C: # %bb.0: 314; F16C-NEXT: vpextrw $0, %xmm0, %eax 315; F16C-NEXT: movzwl %ax, %eax 316; F16C-NEXT: vmovd %eax, %xmm0 317; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 318; F16C-NEXT: vcvttss2si %xmm0, %rax 319; F16C-NEXT: # kill: def $eax killed $eax killed $rax 320; F16C-NEXT: retq 321; 322; AVX512-LABEL: fptoui_f16toi32: 323; AVX512: # %bb.0: 324; AVX512-NEXT: vpextrw $0, %xmm0, %eax 325; AVX512-NEXT: movzwl %ax, %eax 326; AVX512-NEXT: vmovd %eax, %xmm0 327; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 328; AVX512-NEXT: vcvttss2usi %xmm0, %eax 329; AVX512-NEXT: retq 330; 331; X86-LABEL: fptoui_f16toi32: 332; X86: # %bb.0: 333; X86-NEXT: vcvttsh2usi {{[0-9]+}}(%esp), %eax 334; X86-NEXT: retl 335; 336; X64-LABEL: fptoui_f16toi32: 337; X64: # %bb.0: 338; X64-NEXT: vcvttsh2usi %xmm0, %eax 339; X64-NEXT: retq 340 %result = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x, 341 metadata !"fpexcept.strict") #0 342 ret i32 %result 343} 344 345define i64 @fptoui_f16toi64(half %x) #0 { 346; SSE2-LABEL: fptoui_f16toi64: 347; SSE2: # %bb.0: 348; SSE2-NEXT: pushq %rax 349; SSE2-NEXT: callq __extendhfsf2@PLT 350; SSE2-NEXT: movss {{.*#+}} xmm2 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 351; SSE2-NEXT: comiss %xmm2, %xmm0 352; SSE2-NEXT: xorps %xmm1, %xmm1 353; SSE2-NEXT: jb .LBB9_2 354; SSE2-NEXT: # %bb.1: 355; SSE2-NEXT: movaps %xmm2, %xmm1 356; SSE2-NEXT: .LBB9_2: 357; SSE2-NEXT: subss %xmm1, %xmm0 358; SSE2-NEXT: cvttss2si %xmm0, %rcx 359; SSE2-NEXT: setae %al 360; SSE2-NEXT: movzbl %al, %eax 361; SSE2-NEXT: shlq $63, %rax 362; SSE2-NEXT: xorq %rcx, %rax 363; SSE2-NEXT: popq %rcx 364; SSE2-NEXT: retq 365; 366; F16C-LABEL: fptoui_f16toi64: 367; F16C: # %bb.0: 368; F16C-NEXT: vpextrw $0, %xmm0, %eax 369; F16C-NEXT: movzwl %ax, %eax 370; F16C-NEXT: vmovd %eax, %xmm0 371; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 372; F16C-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 373; F16C-NEXT: vcomiss %xmm1, %xmm0 374; F16C-NEXT: vxorps %xmm2, %xmm2, %xmm2 375; F16C-NEXT: jb .LBB9_2 376; F16C-NEXT: # %bb.1: 377; F16C-NEXT: vmovaps %xmm1, %xmm2 378; F16C-NEXT: .LBB9_2: 379; F16C-NEXT: vsubss %xmm2, %xmm0, %xmm0 380; F16C-NEXT: vcvttss2si %xmm0, %rcx 381; F16C-NEXT: setae %al 382; F16C-NEXT: movzbl %al, %eax 383; F16C-NEXT: shlq $63, %rax 384; F16C-NEXT: xorq %rcx, %rax 385; F16C-NEXT: retq 386; 387; AVX512-LABEL: fptoui_f16toi64: 388; AVX512: # %bb.0: 389; AVX512-NEXT: vpextrw $0, %xmm0, %eax 390; AVX512-NEXT: movzwl %ax, %eax 391; AVX512-NEXT: vmovd %eax, %xmm0 392; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 393; AVX512-NEXT: vcvttss2usi %xmm0, %rax 394; AVX512-NEXT: retq 395; 396; X86-LABEL: fptoui_f16toi64: 397; X86: # %bb.0: 398; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 399; X86-NEXT: vcvttph2uqq %xmm0, %xmm0 400; X86-NEXT: vmovd %xmm0, %eax 401; X86-NEXT: vpextrd $1, %xmm0, %edx 402; X86-NEXT: retl 403; 404; X64-LABEL: fptoui_f16toi64: 405; X64: # %bb.0: 406; X64-NEXT: vcvttsh2usi %xmm0, %rax 407; X64-NEXT: retq 408 %result = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %x, 409 metadata !"fpexcept.strict") #0 410 ret i64 %result 411} 412 413attributes #0 = { strictfp nounwind } 414