1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+f16c -O3 | FileCheck %s --check-prefixes=AVX,F16C 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=AVX,AVX512 5; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X86 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X64 7 8declare half @llvm.experimental.constrained.sitofp.f16.i1(i1, metadata, metadata) 9declare half @llvm.experimental.constrained.sitofp.f16.i8(i8, metadata, metadata) 10declare half @llvm.experimental.constrained.sitofp.f16.i16(i16, metadata, metadata) 11declare half @llvm.experimental.constrained.sitofp.f16.i32(i32, metadata, metadata) 12declare half @llvm.experimental.constrained.sitofp.f16.i64(i64, metadata, metadata) 13declare half @llvm.experimental.constrained.uitofp.f16.i1(i1, metadata, metadata) 14declare half @llvm.experimental.constrained.uitofp.f16.i8(i8, metadata, metadata) 15declare half @llvm.experimental.constrained.uitofp.f16.i16(i16, metadata, metadata) 16declare half @llvm.experimental.constrained.uitofp.f16.i32(i32, metadata, metadata) 17declare half @llvm.experimental.constrained.uitofp.f16.i64(i64, metadata, metadata) 18 19define half @sitofp_i1tof16(i1 %x) #0 { 20; SSE2-LABEL: sitofp_i1tof16: 21; SSE2: # %bb.0: 22; SSE2-NEXT: pushq %rax 23; SSE2-NEXT: andb $1, %dil 24; SSE2-NEXT: negb %dil 25; SSE2-NEXT: movsbl %dil, %eax 26; SSE2-NEXT: cvtsi2ss %eax, %xmm0 27; SSE2-NEXT: callq __truncsfhf2@PLT 28; SSE2-NEXT: popq %rax 29; SSE2-NEXT: retq 30; 31; AVX-LABEL: sitofp_i1tof16: 32; AVX: # %bb.0: 33; AVX-NEXT: andb $1, %dil 34; AVX-NEXT: negb %dil 35; AVX-NEXT: movsbl %dil, %eax 36; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 37; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 38; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 39; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0 40; AVX-NEXT: vmovd %xmm0, %eax 41; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 42; AVX-NEXT: retq 43; 44; X86-LABEL: sitofp_i1tof16: 45; X86: # %bb.0: 46; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 47; X86-NEXT: andb $1, %al 48; X86-NEXT: negb %al 49; X86-NEXT: movsbl %al, %eax 50; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 51; X86-NEXT: retl 52; 53; X64-LABEL: sitofp_i1tof16: 54; X64: # %bb.0: 55; X64-NEXT: andb $1, %dil 56; X64-NEXT: negb %dil 57; X64-NEXT: movsbl %dil, %eax 58; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 59; X64-NEXT: retq 60 %result = call half @llvm.experimental.constrained.sitofp.f16.i1(i1 %x, 61 metadata !"round.dynamic", 62 metadata !"fpexcept.strict") #0 63 ret half %result 64} 65 66define half @sitofp_i8tof16(i8 %x) #0 { 67; SSE2-LABEL: sitofp_i8tof16: 68; SSE2: # %bb.0: 69; SSE2-NEXT: pushq %rax 70; SSE2-NEXT: movsbl %dil, %eax 71; SSE2-NEXT: cvtsi2ss %eax, %xmm0 72; SSE2-NEXT: callq __truncsfhf2@PLT 73; SSE2-NEXT: popq %rax 74; SSE2-NEXT: retq 75; 76; AVX-LABEL: sitofp_i8tof16: 77; AVX: # %bb.0: 78; AVX-NEXT: movsbl %dil, %eax 79; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 80; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 81; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 82; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0 83; AVX-NEXT: vmovd %xmm0, %eax 84; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 85; AVX-NEXT: retq 86; 87; X86-LABEL: sitofp_i8tof16: 88; X86: # %bb.0: 89; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax 90; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 91; X86-NEXT: retl 92; 93; X64-LABEL: sitofp_i8tof16: 94; X64: # %bb.0: 95; X64-NEXT: movsbl %dil, %eax 96; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 97; X64-NEXT: retq 98 %result = call half @llvm.experimental.constrained.sitofp.f16.i8(i8 %x, 99 metadata !"round.dynamic", 100 metadata !"fpexcept.strict") #0 101 ret half %result 102} 103 104define half @sitofp_i16tof16(i16 %x) #0 { 105; SSE2-LABEL: sitofp_i16tof16: 106; SSE2: # %bb.0: 107; SSE2-NEXT: pushq %rax 108; SSE2-NEXT: movswl %di, %eax 109; SSE2-NEXT: cvtsi2ss %eax, %xmm0 110; SSE2-NEXT: callq __truncsfhf2@PLT 111; SSE2-NEXT: popq %rax 112; SSE2-NEXT: retq 113; 114; AVX-LABEL: sitofp_i16tof16: 115; AVX: # %bb.0: 116; AVX-NEXT: movswl %di, %eax 117; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 118; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 119; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 120; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0 121; AVX-NEXT: vmovd %xmm0, %eax 122; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 123; AVX-NEXT: retq 124; 125; X86-LABEL: sitofp_i16tof16: 126; X86: # %bb.0: 127; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax 128; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 129; X86-NEXT: retl 130; 131; X64-LABEL: sitofp_i16tof16: 132; X64: # %bb.0: 133; X64-NEXT: movswl %di, %eax 134; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 135; X64-NEXT: retq 136 %result = call half @llvm.experimental.constrained.sitofp.f16.i16(i16 %x, 137 metadata !"round.dynamic", 138 metadata !"fpexcept.strict") #0 139 ret half %result 140} 141 142define half @sitofp_i32tof16(i32 %x) #0 { 143; SSE2-LABEL: sitofp_i32tof16: 144; SSE2: # %bb.0: 145; SSE2-NEXT: pushq %rax 146; SSE2-NEXT: cvtsi2ss %edi, %xmm0 147; SSE2-NEXT: callq __truncsfhf2@PLT 148; SSE2-NEXT: popq %rax 149; SSE2-NEXT: retq 150; 151; AVX-LABEL: sitofp_i32tof16: 152; AVX: # %bb.0: 153; AVX-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 154; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 155; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 156; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0 157; AVX-NEXT: vmovd %xmm0, %eax 158; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 159; AVX-NEXT: retq 160; 161; X86-LABEL: sitofp_i32tof16: 162; X86: # %bb.0: 163; X86-NEXT: vcvtsi2shl {{[0-9]+}}(%esp), %xmm0, %xmm0 164; X86-NEXT: retl 165; 166; X64-LABEL: sitofp_i32tof16: 167; X64: # %bb.0: 168; X64-NEXT: vcvtsi2sh %edi, %xmm0, %xmm0 169; X64-NEXT: retq 170 %result = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %x, 171 metadata !"round.dynamic", 172 metadata !"fpexcept.strict") #0 173 ret half %result 174} 175 176define half @sitofp_i64tof16(i64 %x) #0 { 177; SSE2-LABEL: sitofp_i64tof16: 178; SSE2: # %bb.0: 179; SSE2-NEXT: pushq %rax 180; SSE2-NEXT: cvtsi2ss %rdi, %xmm0 181; SSE2-NEXT: callq __truncsfhf2@PLT 182; SSE2-NEXT: popq %rax 183; SSE2-NEXT: retq 184; 185; AVX-LABEL: sitofp_i64tof16: 186; AVX: # %bb.0: 187; AVX-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0 188; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 189; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 190; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0 191; AVX-NEXT: vmovd %xmm0, %eax 192; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 193; AVX-NEXT: retq 194; 195; X86-LABEL: sitofp_i64tof16: 196; X86: # %bb.0: 197; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 198; X86-NEXT: vcvtqq2ph %xmm0, %xmm0 199; X86-NEXT: retl 200; 201; X64-LABEL: sitofp_i64tof16: 202; X64: # %bb.0: 203; X64-NEXT: vcvtsi2sh %rdi, %xmm0, %xmm0 204; X64-NEXT: retq 205 %result = call half @llvm.experimental.constrained.sitofp.f16.i64(i64 %x, 206 metadata !"round.dynamic", 207 metadata !"fpexcept.strict") #0 208 ret half %result 209} 210 211define half @uitofp_i1tof16(i1 %x) #0 { 212; SSE2-LABEL: uitofp_i1tof16: 213; SSE2: # %bb.0: 214; SSE2-NEXT: pushq %rax 215; SSE2-NEXT: andl $1, %edi 216; SSE2-NEXT: cvtsi2ss %edi, %xmm0 217; SSE2-NEXT: callq __truncsfhf2@PLT 218; SSE2-NEXT: popq %rax 219; SSE2-NEXT: retq 220; 221; AVX-LABEL: uitofp_i1tof16: 222; AVX: # %bb.0: 223; AVX-NEXT: andl $1, %edi 224; AVX-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 225; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 226; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 227; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0 228; AVX-NEXT: vmovd %xmm0, %eax 229; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 230; AVX-NEXT: retq 231; 232; X86-LABEL: uitofp_i1tof16: 233; X86: # %bb.0: 234; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 235; X86-NEXT: andb $1, %al 236; X86-NEXT: movzbl %al, %eax 237; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 238; X86-NEXT: retl 239; 240; X64-LABEL: uitofp_i1tof16: 241; X64: # %bb.0: 242; X64-NEXT: andl $1, %edi 243; X64-NEXT: vcvtsi2sh %edi, %xmm0, %xmm0 244; X64-NEXT: retq 245 %result = call half @llvm.experimental.constrained.uitofp.f16.i1(i1 %x, 246 metadata !"round.dynamic", 247 metadata !"fpexcept.strict") #0 248 ret half %result 249} 250 251define half @uitofp_i8tof16(i8 %x) #0 { 252; SSE2-LABEL: uitofp_i8tof16: 253; SSE2: # %bb.0: 254; SSE2-NEXT: pushq %rax 255; SSE2-NEXT: movzbl %dil, %eax 256; SSE2-NEXT: cvtsi2ss %eax, %xmm0 257; SSE2-NEXT: callq __truncsfhf2@PLT 258; SSE2-NEXT: popq %rax 259; SSE2-NEXT: retq 260; 261; AVX-LABEL: uitofp_i8tof16: 262; AVX: # %bb.0: 263; AVX-NEXT: movzbl %dil, %eax 264; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 265; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 266; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 267; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0 268; AVX-NEXT: vmovd %xmm0, %eax 269; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 270; AVX-NEXT: retq 271; 272; X86-LABEL: uitofp_i8tof16: 273; X86: # %bb.0: 274; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 275; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 276; X86-NEXT: retl 277; 278; X64-LABEL: uitofp_i8tof16: 279; X64: # %bb.0: 280; X64-NEXT: movzbl %dil, %eax 281; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 282; X64-NEXT: retq 283 %result = call half @llvm.experimental.constrained.uitofp.f16.i8(i8 %x, 284 metadata !"round.dynamic", 285 metadata !"fpexcept.strict") #0 286 ret half %result 287} 288 289define half @uitofp_i16tof16(i16 %x) #0 { 290; SSE2-LABEL: uitofp_i16tof16: 291; SSE2: # %bb.0: 292; SSE2-NEXT: pushq %rax 293; SSE2-NEXT: movzwl %di, %eax 294; SSE2-NEXT: cvtsi2ss %eax, %xmm0 295; SSE2-NEXT: callq __truncsfhf2@PLT 296; SSE2-NEXT: popq %rax 297; SSE2-NEXT: retq 298; 299; AVX-LABEL: uitofp_i16tof16: 300; AVX: # %bb.0: 301; AVX-NEXT: movzwl %di, %eax 302; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 303; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 304; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 305; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0 306; AVX-NEXT: vmovd %xmm0, %eax 307; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 308; AVX-NEXT: retq 309; 310; X86-LABEL: uitofp_i16tof16: 311; X86: # %bb.0: 312; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 313; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 314; X86-NEXT: retl 315; 316; X64-LABEL: uitofp_i16tof16: 317; X64: # %bb.0: 318; X64-NEXT: movzwl %di, %eax 319; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 320; X64-NEXT: retq 321 %result = call half @llvm.experimental.constrained.uitofp.f16.i16(i16 %x, 322 metadata !"round.dynamic", 323 metadata !"fpexcept.strict") #0 324 ret half %result 325} 326 327define half @uitofp_i32tof16(i32 %x) #0 { 328; SSE2-LABEL: uitofp_i32tof16: 329; SSE2: # %bb.0: 330; SSE2-NEXT: pushq %rax 331; SSE2-NEXT: movl %edi, %eax 332; SSE2-NEXT: cvtsi2ss %rax, %xmm0 333; SSE2-NEXT: callq __truncsfhf2@PLT 334; SSE2-NEXT: popq %rax 335; SSE2-NEXT: retq 336; 337; F16C-LABEL: uitofp_i32tof16: 338; F16C: # %bb.0: 339; F16C-NEXT: movl %edi, %eax 340; F16C-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0 341; F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1 342; F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 343; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 344; F16C-NEXT: vmovd %xmm0, %eax 345; F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 346; F16C-NEXT: retq 347; 348; AVX512-LABEL: uitofp_i32tof16: 349; AVX512: # %bb.0: 350; AVX512-NEXT: vcvtusi2ss %edi, %xmm0, %xmm0 351; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 352; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 353; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 354; AVX512-NEXT: vmovd %xmm0, %eax 355; AVX512-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 356; AVX512-NEXT: retq 357; 358; X86-LABEL: uitofp_i32tof16: 359; X86: # %bb.0: 360; X86-NEXT: vcvtusi2shl {{[0-9]+}}(%esp), %xmm0, %xmm0 361; X86-NEXT: retl 362; 363; X64-LABEL: uitofp_i32tof16: 364; X64: # %bb.0: 365; X64-NEXT: vcvtusi2sh %edi, %xmm0, %xmm0 366; X64-NEXT: retq 367 %result = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %x, 368 metadata !"round.dynamic", 369 metadata !"fpexcept.strict") #0 370 ret half %result 371} 372 373define half @uitofp_i64tof16(i64 %x) #0 { 374; SSE2-LABEL: uitofp_i64tof16: 375; SSE2: # %bb.0: 376; SSE2-NEXT: movq %rdi, %rax 377; SSE2-NEXT: shrq %rax 378; SSE2-NEXT: movl %edi, %ecx 379; SSE2-NEXT: andl $1, %ecx 380; SSE2-NEXT: orq %rax, %rcx 381; SSE2-NEXT: testq %rdi, %rdi 382; SSE2-NEXT: cmovnsq %rdi, %rcx 383; SSE2-NEXT: cvtsi2ss %rcx, %xmm1 384; SSE2-NEXT: movaps %xmm1, %xmm0 385; SSE2-NEXT: addss %xmm1, %xmm0 386; SSE2-NEXT: js .LBB9_2 387; SSE2-NEXT: # %bb.1: 388; SSE2-NEXT: movaps %xmm1, %xmm0 389; SSE2-NEXT: .LBB9_2: 390; SSE2-NEXT: pushq %rax 391; SSE2-NEXT: callq __truncsfhf2@PLT 392; SSE2-NEXT: popq %rax 393; SSE2-NEXT: retq 394; 395; F16C-LABEL: uitofp_i64tof16: 396; F16C: # %bb.0: 397; F16C-NEXT: movq %rdi, %rax 398; F16C-NEXT: shrq %rax 399; F16C-NEXT: movl %edi, %ecx 400; F16C-NEXT: andl $1, %ecx 401; F16C-NEXT: orq %rax, %rcx 402; F16C-NEXT: testq %rdi, %rdi 403; F16C-NEXT: cmovnsq %rdi, %rcx 404; F16C-NEXT: vcvtsi2ss %rcx, %xmm0, %xmm0 405; F16C-NEXT: jns .LBB9_2 406; F16C-NEXT: # %bb.1: 407; F16C-NEXT: vaddss %xmm0, %xmm0, %xmm0 408; F16C-NEXT: .LBB9_2: 409; F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1 410; F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 411; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 412; F16C-NEXT: vmovd %xmm0, %eax 413; F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 414; F16C-NEXT: retq 415; 416; AVX512-LABEL: uitofp_i64tof16: 417; AVX512: # %bb.0: 418; AVX512-NEXT: vcvtusi2ss %rdi, %xmm0, %xmm0 419; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 420; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 421; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 422; AVX512-NEXT: vmovd %xmm0, %eax 423; AVX512-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 424; AVX512-NEXT: retq 425; 426; X86-LABEL: uitofp_i64tof16: 427; X86: # %bb.0: 428; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 429; X86-NEXT: vcvtuqq2ph %xmm0, %xmm0 430; X86-NEXT: retl 431; 432; X64-LABEL: uitofp_i64tof16: 433; X64: # %bb.0: 434; X64-NEXT: vcvtusi2sh %rdi, %xmm0, %xmm0 435; X64-NEXT: retq 436 %result = call half @llvm.experimental.constrained.uitofp.f16.i64(i64 %x, 437 metadata !"round.dynamic", 438 metadata !"fpexcept.strict") #0 439 ret half %result 440} 441 442attributes #0 = { strictfp nounwind } 443