1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-f16c -fixup-byte-word-insts=1 -verify-machineinstrs \ 3; RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWON 4; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-f16c -fixup-byte-word-insts=0 -verify-machineinstrs \ 5; RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWOFF 6; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+f16c -fixup-byte-word-insts=1 -verify-machineinstrs \ 7; RUN: | FileCheck %s -check-prefixes=CHECK,BWON,BWON-F16C 8; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr +sse2 -fixup-byte-word-insts=0 -verify-machineinstrs \ 9; RUN: | FileCheck %s -check-prefixes=CHECK-I686 10 11define void @test_load_store(ptr %in, ptr %out) #0 { 12; CHECK-LIBCALL-LABEL: test_load_store: 13; CHECK-LIBCALL: # %bb.0: 14; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0 15; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 16; CHECK-LIBCALL-NEXT: movw %ax, (%rsi) 17; CHECK-LIBCALL-NEXT: retq 18; 19; BWON-F16C-LABEL: test_load_store: 20; BWON-F16C: # %bb.0: 21; BWON-F16C-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 22; BWON-F16C-NEXT: vpextrw $0, %xmm0, (%rsi) 23; BWON-F16C-NEXT: retq 24; 25; CHECK-I686-LABEL: test_load_store: 26; CHECK-I686: # %bb.0: 27; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 28; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ecx 29; CHECK-I686-NEXT: pinsrw $0, (%ecx), %xmm0 30; CHECK-I686-NEXT: pextrw $0, %xmm0, %ecx 31; CHECK-I686-NEXT: movw %cx, (%eax) 32; CHECK-I686-NEXT: retl 33 %val = load half, ptr %in 34 store half %val, ptr %out 35 ret void 36} 37 38define i16 @test_bitcast_from_half(ptr %addr) #0 { 39; BWON-LABEL: test_bitcast_from_half: 40; BWON: # %bb.0: 41; BWON-NEXT: movzwl (%rdi), %eax 42; BWON-NEXT: retq 43; 44; BWOFF-LABEL: test_bitcast_from_half: 45; BWOFF: # %bb.0: 46; BWOFF-NEXT: movw (%rdi), %ax 47; BWOFF-NEXT: retq 48; 49; CHECK-I686-LABEL: test_bitcast_from_half: 50; CHECK-I686: # %bb.0: 51; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 52; CHECK-I686-NEXT: movw (%eax), %ax 53; CHECK-I686-NEXT: retl 54 %val = load half, ptr %addr 55 %val_int = bitcast half %val to i16 56 ret i16 %val_int 57} 58 59define void @test_bitcast_to_half(ptr %addr, i16 %in) #0 { 60; CHECK-LABEL: test_bitcast_to_half: 61; CHECK: # %bb.0: 62; CHECK-NEXT: movw %si, (%rdi) 63; CHECK-NEXT: retq 64; 65; CHECK-I686-LABEL: test_bitcast_to_half: 66; CHECK-I686: # %bb.0: 67; CHECK-I686-NEXT: movw {{[0-9]+}}(%esp), %ax 68; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ecx 69; CHECK-I686-NEXT: movw %ax, (%ecx) 70; CHECK-I686-NEXT: retl 71 %val_fp = bitcast i16 %in to half 72 store half %val_fp, ptr %addr 73 ret void 74} 75 76define float @test_extend32(ptr %addr) #0 { 77; CHECK-LIBCALL-LABEL: test_extend32: 78; CHECK-LIBCALL: # %bb.0: 79; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0 80; CHECK-LIBCALL-NEXT: jmp __extendhfsf2@PLT # TAILCALL 81; 82; BWON-F16C-LABEL: test_extend32: 83; BWON-F16C: # %bb.0: 84; BWON-F16C-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 85; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 86; BWON-F16C-NEXT: retq 87; 88; CHECK-I686-LABEL: test_extend32: 89; CHECK-I686: # %bb.0: 90; CHECK-I686-NEXT: subl $12, %esp 91; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 92; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0 93; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 94; CHECK-I686-NEXT: movw %ax, (%esp) 95; CHECK-I686-NEXT: calll __extendhfsf2 96; CHECK-I686-NEXT: addl $12, %esp 97; CHECK-I686-NEXT: retl 98 %val16 = load half, ptr %addr 99 %val32 = fpext half %val16 to float 100 ret float %val32 101} 102 103define double @test_extend64(ptr %addr) #0 { 104; CHECK-LIBCALL-LABEL: test_extend64: 105; CHECK-LIBCALL: # %bb.0: 106; CHECK-LIBCALL-NEXT: pushq %rax 107; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0 108; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 109; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0 110; CHECK-LIBCALL-NEXT: popq %rax 111; CHECK-LIBCALL-NEXT: retq 112; 113; BWON-F16C-LABEL: test_extend64: 114; BWON-F16C: # %bb.0: 115; BWON-F16C-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 116; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 117; BWON-F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 118; BWON-F16C-NEXT: retq 119; 120; CHECK-I686-LABEL: test_extend64: 121; CHECK-I686: # %bb.0: 122; CHECK-I686-NEXT: subl $12, %esp 123; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 124; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0 125; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 126; CHECK-I686-NEXT: movw %ax, (%esp) 127; CHECK-I686-NEXT: calll __extendhfsf2 128; CHECK-I686-NEXT: addl $12, %esp 129; CHECK-I686-NEXT: retl 130 %val16 = load half, ptr %addr 131 %val32 = fpext half %val16 to double 132 ret double %val32 133} 134 135define void @test_trunc32(float %in, ptr %addr) #0 { 136; CHECK-LIBCALL-LABEL: test_trunc32: 137; CHECK-LIBCALL: # %bb.0: 138; CHECK-LIBCALL-NEXT: pushq %rbx 139; CHECK-LIBCALL-NEXT: movq %rdi, %rbx 140; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 141; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 142; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) 143; CHECK-LIBCALL-NEXT: popq %rbx 144; CHECK-LIBCALL-NEXT: retq 145; 146; BWON-F16C-LABEL: test_trunc32: 147; BWON-F16C: # %bb.0: 148; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 149; BWON-F16C-NEXT: vpextrw $0, %xmm0, (%rdi) 150; BWON-F16C-NEXT: retq 151; 152; CHECK-I686-LABEL: test_trunc32: 153; CHECK-I686: # %bb.0: 154; CHECK-I686-NEXT: pushl %esi 155; CHECK-I686-NEXT: subl $8, %esp 156; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 157; CHECK-I686-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 158; CHECK-I686-NEXT: movd %xmm0, (%esp) 159; CHECK-I686-NEXT: calll __truncsfhf2 160; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 161; CHECK-I686-NEXT: movw %ax, (%esi) 162; CHECK-I686-NEXT: addl $8, %esp 163; CHECK-I686-NEXT: popl %esi 164; CHECK-I686-NEXT: retl 165 %val16 = fptrunc float %in to half 166 store half %val16, ptr %addr 167 ret void 168} 169 170define void @test_trunc64(double %in, ptr %addr) #0 { 171; CHECK-LIBCALL-LABEL: test_trunc64: 172; CHECK-LIBCALL: # %bb.0: 173; CHECK-LIBCALL-NEXT: pushq %rbx 174; CHECK-LIBCALL-NEXT: movq %rdi, %rbx 175; CHECK-LIBCALL-NEXT: callq __truncdfhf2@PLT 176; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 177; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) 178; CHECK-LIBCALL-NEXT: popq %rbx 179; CHECK-LIBCALL-NEXT: retq 180; 181; BWON-F16C-LABEL: test_trunc64: 182; BWON-F16C: # %bb.0: 183; BWON-F16C-NEXT: pushq %rbx 184; BWON-F16C-NEXT: movq %rdi, %rbx 185; BWON-F16C-NEXT: callq __truncdfhf2@PLT 186; BWON-F16C-NEXT: vpextrw $0, %xmm0, (%rbx) 187; BWON-F16C-NEXT: popq %rbx 188; BWON-F16C-NEXT: retq 189; 190; CHECK-I686-LABEL: test_trunc64: 191; CHECK-I686: # %bb.0: 192; CHECK-I686-NEXT: pushl %esi 193; CHECK-I686-NEXT: subl $8, %esp 194; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 195; CHECK-I686-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 196; CHECK-I686-NEXT: movq %xmm0, (%esp) 197; CHECK-I686-NEXT: calll __truncdfhf2 198; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 199; CHECK-I686-NEXT: movw %ax, (%esi) 200; CHECK-I686-NEXT: addl $8, %esp 201; CHECK-I686-NEXT: popl %esi 202; CHECK-I686-NEXT: retl 203 %val16 = fptrunc double %in to half 204 store half %val16, ptr %addr 205 ret void 206} 207 208define i64 @test_fptosi_i64(ptr %p) #0 { 209; CHECK-LIBCALL-LABEL: test_fptosi_i64: 210; CHECK-LIBCALL: # %bb.0: 211; CHECK-LIBCALL-NEXT: pushq %rax 212; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0 213; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 214; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax 215; CHECK-LIBCALL-NEXT: popq %rcx 216; CHECK-LIBCALL-NEXT: retq 217; 218; BWON-F16C-LABEL: test_fptosi_i64: 219; BWON-F16C: # %bb.0: 220; BWON-F16C-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 221; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 222; BWON-F16C-NEXT: vcvttss2si %xmm0, %rax 223; BWON-F16C-NEXT: retq 224; 225; CHECK-I686-LABEL: test_fptosi_i64: 226; CHECK-I686: # %bb.0: 227; CHECK-I686-NEXT: subl $28, %esp 228; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 229; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0 230; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 231; CHECK-I686-NEXT: movw %ax, (%esp) 232; CHECK-I686-NEXT: calll __extendhfsf2 233; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 234; CHECK-I686-NEXT: flds {{[0-9]+}}(%esp) 235; CHECK-I686-NEXT: fnstcw {{[0-9]+}}(%esp) 236; CHECK-I686-NEXT: movzwl {{[0-9]+}}(%esp), %eax 237; CHECK-I686-NEXT: orl $3072, %eax # imm = 0xC00 238; CHECK-I686-NEXT: movw %ax, {{[0-9]+}}(%esp) 239; CHECK-I686-NEXT: fldcw {{[0-9]+}}(%esp) 240; CHECK-I686-NEXT: fistpll {{[0-9]+}}(%esp) 241; CHECK-I686-NEXT: fldcw {{[0-9]+}}(%esp) 242; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 243; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %edx 244; CHECK-I686-NEXT: addl $28, %esp 245; CHECK-I686-NEXT: retl 246 %a = load half, ptr %p, align 2 247 %r = fptosi half %a to i64 248 ret i64 %r 249} 250 251define void @test_sitofp_i64(i64 %a, ptr %p) #0 { 252; CHECK-LIBCALL-LABEL: test_sitofp_i64: 253; CHECK-LIBCALL: # %bb.0: 254; CHECK-LIBCALL-NEXT: pushq %rbx 255; CHECK-LIBCALL-NEXT: movq %rsi, %rbx 256; CHECK-LIBCALL-NEXT: cvtsi2ss %rdi, %xmm0 257; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 258; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 259; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) 260; CHECK-LIBCALL-NEXT: popq %rbx 261; CHECK-LIBCALL-NEXT: retq 262; 263; BWON-F16C-LABEL: test_sitofp_i64: 264; BWON-F16C: # %bb.0: 265; BWON-F16C-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0 266; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 267; BWON-F16C-NEXT: vpextrw $0, %xmm0, (%rsi) 268; BWON-F16C-NEXT: retq 269; 270; CHECK-I686-LABEL: test_sitofp_i64: 271; CHECK-I686: # %bb.0: 272; CHECK-I686-NEXT: pushl %esi 273; CHECK-I686-NEXT: subl $24, %esp 274; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 275; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 276; CHECK-I686-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) 277; CHECK-I686-NEXT: fildll {{[0-9]+}}(%esp) 278; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 279; CHECK-I686-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 280; CHECK-I686-NEXT: movd %xmm0, (%esp) 281; CHECK-I686-NEXT: calll __truncsfhf2 282; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 283; CHECK-I686-NEXT: movw %ax, (%esi) 284; CHECK-I686-NEXT: addl $24, %esp 285; CHECK-I686-NEXT: popl %esi 286; CHECK-I686-NEXT: retl 287 %r = sitofp i64 %a to half 288 store half %r, ptr %p 289 ret void 290} 291 292define i64 @test_fptoui_i64(ptr %p) #0 { 293; CHECK-LIBCALL-LABEL: test_fptoui_i64: 294; CHECK-LIBCALL: # %bb.0: 295; CHECK-LIBCALL-NEXT: pushq %rax 296; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0 297; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 298; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rcx 299; CHECK-LIBCALL-NEXT: movq %rcx, %rdx 300; CHECK-LIBCALL-NEXT: sarq $63, %rdx 301; CHECK-LIBCALL-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 302; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax 303; CHECK-LIBCALL-NEXT: andq %rdx, %rax 304; CHECK-LIBCALL-NEXT: orq %rcx, %rax 305; CHECK-LIBCALL-NEXT: popq %rcx 306; CHECK-LIBCALL-NEXT: retq 307; 308; BWON-F16C-LABEL: test_fptoui_i64: 309; BWON-F16C: # %bb.0: 310; BWON-F16C-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 311; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 312; BWON-F16C-NEXT: vcvttss2si %xmm0, %rcx 313; BWON-F16C-NEXT: movq %rcx, %rdx 314; BWON-F16C-NEXT: sarq $63, %rdx 315; BWON-F16C-NEXT: vsubss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 316; BWON-F16C-NEXT: vcvttss2si %xmm0, %rax 317; BWON-F16C-NEXT: andq %rdx, %rax 318; BWON-F16C-NEXT: orq %rcx, %rax 319; BWON-F16C-NEXT: retq 320; 321; CHECK-I686-LABEL: test_fptoui_i64: 322; CHECK-I686: # %bb.0: 323; CHECK-I686-NEXT: subl $28, %esp 324; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 325; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0 326; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 327; CHECK-I686-NEXT: movw %ax, (%esp) 328; CHECK-I686-NEXT: calll __extendhfsf2 329; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 330; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 331; CHECK-I686-NEXT: movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 332; CHECK-I686-NEXT: ucomiss %xmm1, %xmm0 333; CHECK-I686-NEXT: jae .LBB9_2 334; CHECK-I686-NEXT: # %bb.1: 335; CHECK-I686-NEXT: xorps %xmm1, %xmm1 336; CHECK-I686-NEXT: .LBB9_2: 337; CHECK-I686-NEXT: subss %xmm1, %xmm0 338; CHECK-I686-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 339; CHECK-I686-NEXT: setae %al 340; CHECK-I686-NEXT: flds {{[0-9]+}}(%esp) 341; CHECK-I686-NEXT: fnstcw {{[0-9]+}}(%esp) 342; CHECK-I686-NEXT: movzwl {{[0-9]+}}(%esp), %ecx 343; CHECK-I686-NEXT: orl $3072, %ecx # imm = 0xC00 344; CHECK-I686-NEXT: movw %cx, {{[0-9]+}}(%esp) 345; CHECK-I686-NEXT: fldcw {{[0-9]+}}(%esp) 346; CHECK-I686-NEXT: fistpll {{[0-9]+}}(%esp) 347; CHECK-I686-NEXT: fldcw {{[0-9]+}}(%esp) 348; CHECK-I686-NEXT: movzbl %al, %edx 349; CHECK-I686-NEXT: shll $31, %edx 350; CHECK-I686-NEXT: xorl {{[0-9]+}}(%esp), %edx 351; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 352; CHECK-I686-NEXT: addl $28, %esp 353; CHECK-I686-NEXT: retl 354 %a = load half, ptr %p, align 2 355 %r = fptoui half %a to i64 356 ret i64 %r 357} 358 359define void @test_uitofp_i64(i64 %a, ptr %p) #0 { 360; CHECK-LIBCALL-LABEL: test_uitofp_i64: 361; CHECK-LIBCALL: # %bb.0: 362; CHECK-LIBCALL-NEXT: pushq %rbx 363; CHECK-LIBCALL-NEXT: movq %rsi, %rbx 364; CHECK-LIBCALL-NEXT: testq %rdi, %rdi 365; CHECK-LIBCALL-NEXT: js .LBB10_1 366; CHECK-LIBCALL-NEXT: # %bb.2: 367; CHECK-LIBCALL-NEXT: cvtsi2ss %rdi, %xmm0 368; CHECK-LIBCALL-NEXT: jmp .LBB10_3 369; CHECK-LIBCALL-NEXT: .LBB10_1: 370; CHECK-LIBCALL-NEXT: movq %rdi, %rax 371; CHECK-LIBCALL-NEXT: shrq %rax 372; CHECK-LIBCALL-NEXT: andl $1, %edi 373; CHECK-LIBCALL-NEXT: orq %rax, %rdi 374; CHECK-LIBCALL-NEXT: cvtsi2ss %rdi, %xmm0 375; CHECK-LIBCALL-NEXT: addss %xmm0, %xmm0 376; CHECK-LIBCALL-NEXT: .LBB10_3: 377; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 378; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 379; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) 380; CHECK-LIBCALL-NEXT: popq %rbx 381; CHECK-LIBCALL-NEXT: retq 382; 383; BWON-F16C-LABEL: test_uitofp_i64: 384; BWON-F16C: # %bb.0: 385; BWON-F16C-NEXT: testq %rdi, %rdi 386; BWON-F16C-NEXT: js .LBB10_1 387; BWON-F16C-NEXT: # %bb.2: 388; BWON-F16C-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0 389; BWON-F16C-NEXT: jmp .LBB10_3 390; BWON-F16C-NEXT: .LBB10_1: 391; BWON-F16C-NEXT: movq %rdi, %rax 392; BWON-F16C-NEXT: shrq %rax 393; BWON-F16C-NEXT: andl $1, %edi 394; BWON-F16C-NEXT: orq %rax, %rdi 395; BWON-F16C-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0 396; BWON-F16C-NEXT: vaddss %xmm0, %xmm0, %xmm0 397; BWON-F16C-NEXT: .LBB10_3: 398; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 399; BWON-F16C-NEXT: vpextrw $0, %xmm0, (%rsi) 400; BWON-F16C-NEXT: retq 401; 402; CHECK-I686-LABEL: test_uitofp_i64: 403; CHECK-I686: # %bb.0: 404; CHECK-I686-NEXT: pushl %esi 405; CHECK-I686-NEXT: subl $24, %esp 406; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 407; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 408; CHECK-I686-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 409; CHECK-I686-NEXT: movq %xmm0, {{[0-9]+}}(%esp) 410; CHECK-I686-NEXT: shrl $31, %eax 411; CHECK-I686-NEXT: fildll {{[0-9]+}}(%esp) 412; CHECK-I686-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 413; CHECK-I686-NEXT: fstps (%esp) 414; CHECK-I686-NEXT: calll __truncsfhf2 415; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 416; CHECK-I686-NEXT: movw %ax, (%esi) 417; CHECK-I686-NEXT: addl $24, %esp 418; CHECK-I686-NEXT: popl %esi 419; CHECK-I686-NEXT: retl 420 %r = uitofp i64 %a to half 421 store half %r, ptr %p 422 ret void 423} 424 425define <4 x float> @test_extend32_vec4(ptr %p) #0 { 426; CHECK-LIBCALL-LABEL: test_extend32_vec4: 427; CHECK-LIBCALL: # %bb.0: 428; CHECK-LIBCALL-NEXT: subq $72, %rsp 429; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0 430; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 431; CHECK-LIBCALL-NEXT: pinsrw $0, 2(%rdi), %xmm0 432; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 433; CHECK-LIBCALL-NEXT: pinsrw $0, 4(%rdi), %xmm0 434; CHECK-LIBCALL-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill 435; CHECK-LIBCALL-NEXT: pinsrw $0, 6(%rdi), %xmm0 436; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 437; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 438; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 439; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 440; CHECK-LIBCALL-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 441; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 442; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 443; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 444; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 445; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 446; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 447; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 448; CHECK-LIBCALL-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 449; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 450; CHECK-LIBCALL-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 451; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0] 452; CHECK-LIBCALL-NEXT: addq $72, %rsp 453; CHECK-LIBCALL-NEXT: retq 454; 455; BWON-F16C-LABEL: test_extend32_vec4: 456; BWON-F16C: # %bb.0: 457; BWON-F16C-NEXT: vcvtph2ps (%rdi), %xmm0 458; BWON-F16C-NEXT: retq 459; 460; CHECK-I686-LABEL: test_extend32_vec4: 461; CHECK-I686: # %bb.0: 462; CHECK-I686-NEXT: pushl %esi 463; CHECK-I686-NEXT: subl $88, %esp 464; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 465; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0 466; CHECK-I686-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 467; CHECK-I686-NEXT: pinsrw $0, 6(%eax), %xmm0 468; CHECK-I686-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 469; CHECK-I686-NEXT: pinsrw $0, 4(%eax), %xmm0 470; CHECK-I686-NEXT: pinsrw $0, 2(%eax), %xmm1 471; CHECK-I686-NEXT: pextrw $0, %xmm1, %eax 472; CHECK-I686-NEXT: movw %ax, (%esp) 473; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi 474; CHECK-I686-NEXT: calll __extendhfsf2 475; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 476; CHECK-I686-NEXT: movw %si, (%esp) 477; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 478; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi 479; CHECK-I686-NEXT: calll __extendhfsf2 480; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 481; CHECK-I686-NEXT: movw %si, (%esp) 482; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 483; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi 484; CHECK-I686-NEXT: calll __extendhfsf2 485; CHECK-I686-NEXT: movw %si, (%esp) 486; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 487; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 488; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 489; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 490; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 491; CHECK-I686-NEXT: calll __extendhfsf2 492; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 493; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 494; CHECK-I686-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 495; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 496; CHECK-I686-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 497; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 498; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 499; CHECK-I686-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 500; CHECK-I686-NEXT: addl $88, %esp 501; CHECK-I686-NEXT: popl %esi 502; CHECK-I686-NEXT: retl 503 %a = load <4 x half>, ptr %p, align 8 504 %b = fpext <4 x half> %a to <4 x float> 505 ret <4 x float> %b 506} 507 508define <4 x double> @test_extend64_vec4(ptr %p) #0 { 509; CHECK-LIBCALL-LABEL: test_extend64_vec4: 510; CHECK-LIBCALL: # %bb.0: 511; CHECK-LIBCALL-NEXT: subq $72, %rsp 512; CHECK-LIBCALL-NEXT: pinsrw $0, 4(%rdi), %xmm0 513; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 514; CHECK-LIBCALL-NEXT: pinsrw $0, 6(%rdi), %xmm0 515; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 516; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0 517; CHECK-LIBCALL-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill 518; CHECK-LIBCALL-NEXT: pinsrw $0, 2(%rdi), %xmm0 519; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 520; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0 521; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 522; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 523; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 524; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0 525; CHECK-LIBCALL-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 526; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0] 527; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 528; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 529; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 530; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0 531; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 532; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 533; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 534; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm1 535; CHECK-LIBCALL-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 536; CHECK-LIBCALL-NEXT: # xmm1 = xmm1[0],mem[0] 537; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 538; CHECK-LIBCALL-NEXT: addq $72, %rsp 539; CHECK-LIBCALL-NEXT: retq 540; 541; BWON-F16C-LABEL: test_extend64_vec4: 542; BWON-F16C: # %bb.0: 543; BWON-F16C-NEXT: vcvtph2ps (%rdi), %xmm0 544; BWON-F16C-NEXT: vcvtps2pd %xmm0, %ymm0 545; BWON-F16C-NEXT: retq 546; 547; CHECK-I686-LABEL: test_extend64_vec4: 548; CHECK-I686: # %bb.0: 549; CHECK-I686-NEXT: pushl %esi 550; CHECK-I686-NEXT: subl $104, %esp 551; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 552; CHECK-I686-NEXT: pinsrw $0, 6(%eax), %xmm0 553; CHECK-I686-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 554; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0 555; CHECK-I686-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 556; CHECK-I686-NEXT: pinsrw $0, 2(%eax), %xmm0 557; CHECK-I686-NEXT: pinsrw $0, 4(%eax), %xmm1 558; CHECK-I686-NEXT: pextrw $0, %xmm1, %eax 559; CHECK-I686-NEXT: movw %ax, (%esp) 560; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi 561; CHECK-I686-NEXT: calll __extendhfsf2 562; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 563; CHECK-I686-NEXT: movw %si, (%esp) 564; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 565; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi 566; CHECK-I686-NEXT: calll __extendhfsf2 567; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 568; CHECK-I686-NEXT: movw %si, (%esp) 569; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 570; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi 571; CHECK-I686-NEXT: calll __extendhfsf2 572; CHECK-I686-NEXT: movw %si, (%esp) 573; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) 574; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 575; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) 576; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 577; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) 578; CHECK-I686-NEXT: calll __extendhfsf2 579; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) 580; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 581; CHECK-I686-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] 582; CHECK-I686-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 583; CHECK-I686-NEXT: movhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] 584; CHECK-I686-NEXT: addl $104, %esp 585; CHECK-I686-NEXT: popl %esi 586; CHECK-I686-NEXT: retl 587 %a = load <4 x half>, ptr %p, align 8 588 %b = fpext <4 x half> %a to <4 x double> 589 ret <4 x double> %b 590} 591 592define void @test_trunc32_vec4(<4 x float> %a, ptr %p) #0 { 593; CHECK-LIBCALL-LABEL: test_trunc32_vec4: 594; CHECK-LIBCALL: # %bb.0: 595; CHECK-LIBCALL-NEXT: pushq %rbx 596; CHECK-LIBCALL-NEXT: subq $64, %rsp 597; CHECK-LIBCALL-NEXT: movq %rdi, %rbx 598; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 599; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 600; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 601; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 602; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 603; CHECK-LIBCALL-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 604; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 605; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 606; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 607; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 608; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 609; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 610; CHECK-LIBCALL-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload 611; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 612; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 613; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) 614; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 615; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 616; CHECK-LIBCALL-NEXT: movw %ax, 6(%rbx) 617; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 618; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 619; CHECK-LIBCALL-NEXT: movw %ax, 4(%rbx) 620; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 621; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 622; CHECK-LIBCALL-NEXT: movw %ax, 2(%rbx) 623; CHECK-LIBCALL-NEXT: addq $64, %rsp 624; CHECK-LIBCALL-NEXT: popq %rbx 625; CHECK-LIBCALL-NEXT: retq 626; 627; BWON-F16C-LABEL: test_trunc32_vec4: 628; BWON-F16C: # %bb.0: 629; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, (%rdi) 630; BWON-F16C-NEXT: retq 631; 632; CHECK-I686-LABEL: test_trunc32_vec4: 633; CHECK-I686: # %bb.0: 634; CHECK-I686-NEXT: pushl %esi 635; CHECK-I686-NEXT: subl $88, %esp 636; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 637; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 638; CHECK-I686-NEXT: movaps %xmm0, %xmm1 639; CHECK-I686-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] 640; CHECK-I686-NEXT: movss %xmm1, (%esp) 641; CHECK-I686-NEXT: calll __truncsfhf2 642; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 643; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 644; CHECK-I686-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 645; CHECK-I686-NEXT: movss %xmm0, (%esp) 646; CHECK-I686-NEXT: calll __truncsfhf2 647; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 648; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 649; CHECK-I686-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 650; CHECK-I686-NEXT: movss %xmm0, (%esp) 651; CHECK-I686-NEXT: calll __truncsfhf2 652; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 653; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 654; CHECK-I686-NEXT: movd %xmm0, (%esp) 655; CHECK-I686-NEXT: calll __truncsfhf2 656; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 657; CHECK-I686-NEXT: movw %ax, (%esi) 658; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 659; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 660; CHECK-I686-NEXT: movw %ax, 6(%esi) 661; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 662; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 663; CHECK-I686-NEXT: movw %ax, 4(%esi) 664; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 665; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 666; CHECK-I686-NEXT: movw %ax, 2(%esi) 667; CHECK-I686-NEXT: addl $88, %esp 668; CHECK-I686-NEXT: popl %esi 669; CHECK-I686-NEXT: retl 670 %v = fptrunc <4 x float> %a to <4 x half> 671 store <4 x half> %v, ptr %p 672 ret void 673} 674 675define void @test_trunc64_vec4(<4 x double> %a, ptr %p) #0 { 676; CHECK-LIBCALL-LABEL: test_trunc64_vec4: 677; CHECK-LIBCALL: # %bb.0: 678; CHECK-LIBCALL-NEXT: pushq %rbx 679; CHECK-LIBCALL-NEXT: subq $64, %rsp 680; CHECK-LIBCALL-NEXT: movq %rdi, %rbx 681; CHECK-LIBCALL-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 682; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 683; CHECK-LIBCALL-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 684; CHECK-LIBCALL-NEXT: callq __truncdfhf2@PLT 685; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 686; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 687; CHECK-LIBCALL-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 688; CHECK-LIBCALL-NEXT: callq __truncdfhf2@PLT 689; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 690; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 691; CHECK-LIBCALL-NEXT: callq __truncdfhf2@PLT 692; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 693; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 694; CHECK-LIBCALL-NEXT: callq __truncdfhf2@PLT 695; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 696; CHECK-LIBCALL-NEXT: movw %ax, 4(%rbx) 697; CHECK-LIBCALL-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload 698; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 699; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) 700; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 701; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 702; CHECK-LIBCALL-NEXT: movw %ax, 6(%rbx) 703; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 704; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 705; CHECK-LIBCALL-NEXT: movw %ax, 2(%rbx) 706; CHECK-LIBCALL-NEXT: addq $64, %rsp 707; CHECK-LIBCALL-NEXT: popq %rbx 708; CHECK-LIBCALL-NEXT: retq 709; 710; BWON-F16C-LABEL: test_trunc64_vec4: 711; BWON-F16C: # %bb.0: 712; BWON-F16C-NEXT: pushq %rbx 713; BWON-F16C-NEXT: subq $64, %rsp 714; BWON-F16C-NEXT: movq %rdi, %rbx 715; BWON-F16C-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 716; BWON-F16C-NEXT: vextractf128 $1, %ymm0, %xmm0 717; BWON-F16C-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 718; BWON-F16C-NEXT: vzeroupper 719; BWON-F16C-NEXT: callq __truncdfhf2@PLT 720; BWON-F16C-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 721; BWON-F16C-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload 722; BWON-F16C-NEXT: # xmm0 = mem[1,0] 723; BWON-F16C-NEXT: callq __truncdfhf2@PLT 724; BWON-F16C-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 725; BWON-F16C-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 726; BWON-F16C-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 727; BWON-F16C-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 728; BWON-F16C-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 729; BWON-F16C-NEXT: vzeroupper 730; BWON-F16C-NEXT: callq __truncdfhf2@PLT 731; BWON-F16C-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 732; BWON-F16C-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 733; BWON-F16C-NEXT: # xmm0 = mem[1,0] 734; BWON-F16C-NEXT: callq __truncdfhf2@PLT 735; BWON-F16C-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload 736; BWON-F16C-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 737; BWON-F16C-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 738; BWON-F16C-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 739; BWON-F16C-NEXT: vmovq %xmm0, (%rbx) 740; BWON-F16C-NEXT: addq $64, %rsp 741; BWON-F16C-NEXT: popq %rbx 742; BWON-F16C-NEXT: retq 743; 744; CHECK-I686-LABEL: test_trunc64_vec4: 745; CHECK-I686: # %bb.0: 746; CHECK-I686-NEXT: pushl %esi 747; CHECK-I686-NEXT: subl $88, %esp 748; CHECK-I686-NEXT: movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 749; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 750; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 751; CHECK-I686-NEXT: movlps %xmm0, (%esp) 752; CHECK-I686-NEXT: calll __truncdfhf2 753; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 754; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 755; CHECK-I686-NEXT: movhps %xmm0, (%esp) 756; CHECK-I686-NEXT: calll __truncdfhf2 757; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 758; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 759; CHECK-I686-NEXT: movlps %xmm0, (%esp) 760; CHECK-I686-NEXT: calll __truncdfhf2 761; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 762; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 763; CHECK-I686-NEXT: movhps %xmm0, (%esp) 764; CHECK-I686-NEXT: calll __truncdfhf2 765; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 766; CHECK-I686-NEXT: movw %ax, 6(%esi) 767; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 768; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 769; CHECK-I686-NEXT: movw %ax, 4(%esi) 770; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 771; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 772; CHECK-I686-NEXT: movw %ax, 2(%esi) 773; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 774; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 775; CHECK-I686-NEXT: movw %ax, (%esi) 776; CHECK-I686-NEXT: addl $88, %esp 777; CHECK-I686-NEXT: popl %esi 778; CHECK-I686-NEXT: retl 779 %v = fptrunc <4 x double> %a to <4 x half> 780 store <4 x half> %v, ptr %p 781 ret void 782} 783 784declare float @test_floatret(); 785 786; On i686, if SSE2 is available, the return value from test_floatret is loaded 787; to f80 and then rounded to f32. The DAG combiner should not combine this 788; fp_round and the subsequent fptrunc from float to half. 789define half @test_f80trunc_nodagcombine() #0 { 790; CHECK-LIBCALL-LABEL: test_f80trunc_nodagcombine: 791; CHECK-LIBCALL: # %bb.0: 792; CHECK-LIBCALL-NEXT: pushq %rax 793; CHECK-LIBCALL-NEXT: callq test_floatret@PLT 794; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 795; CHECK-LIBCALL-NEXT: popq %rax 796; CHECK-LIBCALL-NEXT: retq 797; 798; BWON-F16C-LABEL: test_f80trunc_nodagcombine: 799; BWON-F16C: # %bb.0: 800; BWON-F16C-NEXT: pushq %rax 801; BWON-F16C-NEXT: callq test_floatret@PLT 802; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 803; BWON-F16C-NEXT: vmovd %xmm0, %eax 804; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 805; BWON-F16C-NEXT: popq %rax 806; BWON-F16C-NEXT: retq 807; 808; CHECK-I686-LABEL: test_f80trunc_nodagcombine: 809; CHECK-I686: # %bb.0: 810; CHECK-I686-NEXT: subl $12, %esp 811; CHECK-I686-NEXT: calll test_floatret@PLT 812; CHECK-I686-NEXT: fstps (%esp) 813; CHECK-I686-NEXT: calll __truncsfhf2 814; CHECK-I686-NEXT: addl $12, %esp 815; CHECK-I686-NEXT: retl 816 %1 = call float @test_floatret() 817 %2 = fptrunc float %1 to half 818 ret half %2 819} 820 821 822 823 824define float @test_sitofp_fadd_i32(i32 %a, ptr %b) #0 { 825; CHECK-LIBCALL-LABEL: test_sitofp_fadd_i32: 826; CHECK-LIBCALL: # %bb.0: 827; CHECK-LIBCALL-NEXT: subq $40, %rsp 828; CHECK-LIBCALL-NEXT: pinsrw $0, (%rsi), %xmm0 829; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 830; CHECK-LIBCALL-NEXT: xorps %xmm0, %xmm0 831; CHECK-LIBCALL-NEXT: cvtsi2ss %edi, %xmm0 832; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 833; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 834; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 835; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 836; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 837; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 838; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero 839; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 840; CHECK-LIBCALL-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 841; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 842; CHECK-LIBCALL-NEXT: addq $40, %rsp 843; CHECK-LIBCALL-NEXT: jmp __extendhfsf2@PLT # TAILCALL 844; 845; BWON-F16C-LABEL: test_sitofp_fadd_i32: 846; BWON-F16C: # %bb.0: 847; BWON-F16C-NEXT: vpinsrw $0, (%rsi), %xmm0, %xmm0 848; BWON-F16C-NEXT: vcvtsi2ss %edi, %xmm1, %xmm1 849; BWON-F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1 850; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 851; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 852; BWON-F16C-NEXT: vaddss %xmm1, %xmm0, %xmm0 853; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 854; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 855; BWON-F16C-NEXT: retq 856; 857; CHECK-I686-LABEL: test_sitofp_fadd_i32: 858; CHECK-I686: # %bb.0: 859; CHECK-I686-NEXT: subl $60, %esp 860; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 861; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0 862; CHECK-I686-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 863; CHECK-I686-NEXT: xorps %xmm0, %xmm0 864; CHECK-I686-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0 865; CHECK-I686-NEXT: movss %xmm0, (%esp) 866; CHECK-I686-NEXT: calll __truncsfhf2 867; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 868; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 869; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 870; CHECK-I686-NEXT: movw %ax, (%esp) 871; CHECK-I686-NEXT: calll __extendhfsf2 872; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 873; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 874; CHECK-I686-NEXT: movw %ax, (%esp) 875; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 876; CHECK-I686-NEXT: calll __extendhfsf2 877; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 878; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 879; CHECK-I686-NEXT: addss {{[0-9]+}}(%esp), %xmm0 880; CHECK-I686-NEXT: movss %xmm0, (%esp) 881; CHECK-I686-NEXT: calll __truncsfhf2 882; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 883; CHECK-I686-NEXT: movw %ax, (%esp) 884; CHECK-I686-NEXT: calll __extendhfsf2 885; CHECK-I686-NEXT: addl $60, %esp 886; CHECK-I686-NEXT: retl 887 %tmp0 = load half, ptr %b 888 %tmp1 = sitofp i32 %a to half 889 %tmp2 = fadd half %tmp0, %tmp1 890 %tmp3 = fpext half %tmp2 to float 891 ret float %tmp3 892} 893 894define half @PR40273(half) #0 { 895; CHECK-LIBCALL-LABEL: PR40273: 896; CHECK-LIBCALL: # %bb.0: 897; CHECK-LIBCALL-NEXT: pushq %rax 898; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 899; CHECK-LIBCALL-NEXT: xorl %eax, %eax 900; CHECK-LIBCALL-NEXT: xorps %xmm1, %xmm1 901; CHECK-LIBCALL-NEXT: ucomiss %xmm1, %xmm0 902; CHECK-LIBCALL-NEXT: movl $15360, %ecx # imm = 0x3C00 903; CHECK-LIBCALL-NEXT: cmovnel %ecx, %eax 904; CHECK-LIBCALL-NEXT: cmovpl %ecx, %eax 905; CHECK-LIBCALL-NEXT: pinsrw $0, %eax, %xmm0 906; CHECK-LIBCALL-NEXT: popq %rax 907; CHECK-LIBCALL-NEXT: retq 908; 909; BWON-F16C-LABEL: PR40273: 910; BWON-F16C: # %bb.0: 911; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 912; BWON-F16C-NEXT: xorl %eax, %eax 913; BWON-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1 914; BWON-F16C-NEXT: vucomiss %xmm1, %xmm0 915; BWON-F16C-NEXT: movl $15360, %ecx # imm = 0x3C00 916; BWON-F16C-NEXT: cmovnel %ecx, %eax 917; BWON-F16C-NEXT: cmovpl %ecx, %eax 918; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 919; BWON-F16C-NEXT: retq 920; 921; CHECK-I686-LABEL: PR40273: 922; CHECK-I686: # %bb.0: 923; CHECK-I686-NEXT: subl $12, %esp 924; CHECK-I686-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 925; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 926; CHECK-I686-NEXT: movw %ax, (%esp) 927; CHECK-I686-NEXT: calll __extendhfsf2 928; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 929; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 930; CHECK-I686-NEXT: xorl %eax, %eax 931; CHECK-I686-NEXT: xorps %xmm1, %xmm1 932; CHECK-I686-NEXT: ucomiss %xmm1, %xmm0 933; CHECK-I686-NEXT: movl $15360, %ecx # imm = 0x3C00 934; CHECK-I686-NEXT: cmovnel %ecx, %eax 935; CHECK-I686-NEXT: cmovpl %ecx, %eax 936; CHECK-I686-NEXT: pinsrw $0, %eax, %xmm0 937; CHECK-I686-NEXT: addl $12, %esp 938; CHECK-I686-NEXT: retl 939 %2 = fcmp une half %0, 0xH0000 940 %3 = uitofp i1 %2 to half 941 ret half %3 942} 943 944define void @brcond(half %0) #0 { 945; CHECK-LIBCALL-LABEL: brcond: 946; CHECK-LIBCALL: # %bb.0: # %entry 947; CHECK-LIBCALL-NEXT: pushq %rax 948; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 949; CHECK-LIBCALL-NEXT: xorps %xmm1, %xmm1 950; CHECK-LIBCALL-NEXT: ucomiss %xmm1, %xmm0 951; CHECK-LIBCALL-NEXT: setp %al 952; CHECK-LIBCALL-NEXT: setne %cl 953; CHECK-LIBCALL-NEXT: orb %al, %cl 954; CHECK-LIBCALL-NEXT: jne .LBB18_2 955; CHECK-LIBCALL-NEXT: # %bb.1: # %if.then 956; CHECK-LIBCALL-NEXT: popq %rax 957; CHECK-LIBCALL-NEXT: retq 958; CHECK-LIBCALL-NEXT: .LBB18_2: # %if.end 959; 960; BWON-F16C-LABEL: brcond: 961; BWON-F16C: # %bb.0: # %entry 962; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 963; BWON-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1 964; BWON-F16C-NEXT: vucomiss %xmm1, %xmm0 965; BWON-F16C-NEXT: setp %al 966; BWON-F16C-NEXT: setne %cl 967; BWON-F16C-NEXT: orb %al, %cl 968; BWON-F16C-NEXT: jne .LBB18_2 969; BWON-F16C-NEXT: # %bb.1: # %if.then 970; BWON-F16C-NEXT: retq 971; BWON-F16C-NEXT: .LBB18_2: # %if.end 972; 973; CHECK-I686-LABEL: brcond: 974; CHECK-I686: # %bb.0: # %entry 975; CHECK-I686-NEXT: subl $12, %esp 976; CHECK-I686-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 977; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 978; CHECK-I686-NEXT: movw %ax, (%esp) 979; CHECK-I686-NEXT: calll __extendhfsf2 980; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 981; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 982; CHECK-I686-NEXT: xorps %xmm1, %xmm1 983; CHECK-I686-NEXT: ucomiss %xmm1, %xmm0 984; CHECK-I686-NEXT: setp %al 985; CHECK-I686-NEXT: setne %cl 986; CHECK-I686-NEXT: orb %al, %cl 987; CHECK-I686-NEXT: jne .LBB18_2 988; CHECK-I686-NEXT: # %bb.1: # %if.then 989; CHECK-I686-NEXT: addl $12, %esp 990; CHECK-I686-NEXT: retl 991; CHECK-I686-NEXT: .LBB18_2: # %if.end 992entry: 993 %cmp = fcmp oeq half 0xH0000, %0 994 br i1 %cmp, label %if.then, label %if.end 995 996if.then: ; preds = %entry 997 ret void 998 999if.end: ; preds = %entry 1000 unreachable 1001} 1002 1003define half @test_sqrt(half %0) #0 { 1004; CHECK-LIBCALL-LABEL: test_sqrt: 1005; CHECK-LIBCALL: # %bb.0: # %entry 1006; CHECK-LIBCALL-NEXT: pushq %rax 1007; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1008; CHECK-LIBCALL-NEXT: sqrtss %xmm0, %xmm0 1009; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 1010; CHECK-LIBCALL-NEXT: popq %rax 1011; CHECK-LIBCALL-NEXT: retq 1012; 1013; BWON-F16C-LABEL: test_sqrt: 1014; BWON-F16C: # %bb.0: # %entry 1015; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 1016; BWON-F16C-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 1017; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 1018; BWON-F16C-NEXT: vmovd %xmm0, %eax 1019; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 1020; BWON-F16C-NEXT: retq 1021; 1022; CHECK-I686-LABEL: test_sqrt: 1023; CHECK-I686: # %bb.0: # %entry 1024; CHECK-I686-NEXT: subl $12, %esp 1025; CHECK-I686-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 1026; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 1027; CHECK-I686-NEXT: movw %ax, (%esp) 1028; CHECK-I686-NEXT: calll __extendhfsf2 1029; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1030; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1031; CHECK-I686-NEXT: sqrtss %xmm0, %xmm0 1032; CHECK-I686-NEXT: movss %xmm0, (%esp) 1033; CHECK-I686-NEXT: calll __truncsfhf2 1034; CHECK-I686-NEXT: addl $12, %esp 1035; CHECK-I686-NEXT: retl 1036entry: 1037 %1 = call half @llvm.sqrt.f16(half %0) 1038 ret half %1 1039} 1040 1041declare half @llvm.sqrt.f16(half) 1042 1043define void @main.158() #0 { 1044; CHECK-LIBCALL-LABEL: main.158: 1045; CHECK-LIBCALL: # %bb.0: # %entry 1046; CHECK-LIBCALL-NEXT: pushq %rax 1047; CHECK-LIBCALL-NEXT: xorps %xmm0, %xmm0 1048; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 1049; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1050; CHECK-LIBCALL-NEXT: movss {{.*#+}} xmm1 = [8.0E+0,0.0E+0,0.0E+0,0.0E+0] 1051; CHECK-LIBCALL-NEXT: ucomiss %xmm0, %xmm1 1052; CHECK-LIBCALL-NEXT: xorps %xmm0, %xmm0 1053; CHECK-LIBCALL-NEXT: jae .LBB20_2 1054; CHECK-LIBCALL-NEXT: # %bb.1: # %entry 1055; CHECK-LIBCALL-NEXT: movss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0] 1056; CHECK-LIBCALL-NEXT: .LBB20_2: # %entry 1057; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 1058; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 1059; CHECK-LIBCALL-NEXT: movw %ax, (%rax) 1060; CHECK-LIBCALL-NEXT: popq %rax 1061; CHECK-LIBCALL-NEXT: retq 1062; 1063; BWON-F16C-LABEL: main.158: 1064; BWON-F16C: # %bb.0: # %entry 1065; BWON-F16C-NEXT: vxorps %xmm0, %xmm0, %xmm0 1066; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm1 1067; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 1068; BWON-F16C-NEXT: vmovss {{.*#+}} xmm2 = [8.0E+0,0.0E+0,0.0E+0,0.0E+0] 1069; BWON-F16C-NEXT: vucomiss %xmm1, %xmm2 1070; BWON-F16C-NEXT: jae .LBB20_2 1071; BWON-F16C-NEXT: # %bb.1: # %entry 1072; BWON-F16C-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0] 1073; BWON-F16C-NEXT: .LBB20_2: # %entry 1074; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 1075; BWON-F16C-NEXT: vpextrw $0, %xmm0, (%rax) 1076; BWON-F16C-NEXT: retq 1077; 1078; CHECK-I686-LABEL: main.158: 1079; CHECK-I686: # %bb.0: # %entry 1080; CHECK-I686-NEXT: subl $12, %esp 1081; CHECK-I686-NEXT: movl $0, (%esp) 1082; CHECK-I686-NEXT: calll __truncsfhf2 1083; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 1084; CHECK-I686-NEXT: movw %ax, (%esp) 1085; CHECK-I686-NEXT: calll __extendhfsf2 1086; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1087; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = [8.0E+0,0.0E+0,0.0E+0,0.0E+0] 1088; CHECK-I686-NEXT: ucomiss {{[0-9]+}}(%esp), %xmm0 1089; CHECK-I686-NEXT: xorps %xmm0, %xmm0 1090; CHECK-I686-NEXT: jae .LBB20_2 1091; CHECK-I686-NEXT: # %bb.1: # %entry 1092; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0] 1093; CHECK-I686-NEXT: .LBB20_2: # %entry 1094; CHECK-I686-NEXT: movss %xmm0, (%esp) 1095; CHECK-I686-NEXT: calll __truncsfhf2 1096; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 1097; CHECK-I686-NEXT: movw %ax, (%eax) 1098; CHECK-I686-NEXT: addl $12, %esp 1099; CHECK-I686-NEXT: retl 1100entry: 1101 %0 = tail call half @llvm.fabs.f16(half undef) 1102 %1 = fpext half %0 to float 1103 %compare.2 = fcmp ole half %0, 0xH4800 1104 %multiply.95 = fmul float %1, 5.000000e-01 1105 %add.82 = fadd float %multiply.95, -2.000000e+00 1106 %multiply.68 = fmul float %add.82, 0.000000e+00 1107 %subtract.65 = fsub float %multiply.68, 0.000000e+00 1108 %multiply.57 = fmul float undef, 0.000000e+00 1109 %2 = select i1 %compare.2, float 0.000000e+00, float %multiply.57 1110 %3 = fptrunc float %2 to half 1111 store half %3, ptr undef, align 2 1112 ret void 1113} 1114 1115define void @main.45() #0 { 1116; CHECK-LIBCALL-LABEL: main.45: 1117; CHECK-LIBCALL: # %bb.0: # %entry 1118; CHECK-LIBCALL-NEXT: pushq %rbp 1119; CHECK-LIBCALL-NEXT: pushq %r15 1120; CHECK-LIBCALL-NEXT: pushq %r14 1121; CHECK-LIBCALL-NEXT: pushq %rbx 1122; CHECK-LIBCALL-NEXT: pushq %rax 1123; CHECK-LIBCALL-NEXT: pinsrw $0, (%rax), %xmm0 1124; CHECK-LIBCALL-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7] 1125; CHECK-LIBCALL-NEXT: movq %xmm1, %rbx 1126; CHECK-LIBCALL-NEXT: movq %rbx, %r14 1127; CHECK-LIBCALL-NEXT: shrq $48, %r14 1128; CHECK-LIBCALL-NEXT: movq %rbx, %r15 1129; CHECK-LIBCALL-NEXT: shrq $32, %r15 1130; CHECK-LIBCALL-NEXT: movl %ebx, %ebp 1131; CHECK-LIBCALL-NEXT: shrl $16, %ebp 1132; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1133; CHECK-LIBCALL-NEXT: ucomiss %xmm0, %xmm0 1134; CHECK-LIBCALL-NEXT: movl $32256, %eax # imm = 0x7E00 1135; CHECK-LIBCALL-NEXT: cmovpl %eax, %ebp 1136; CHECK-LIBCALL-NEXT: cmovpl %eax, %r15d 1137; CHECK-LIBCALL-NEXT: cmovpl %eax, %r14d 1138; CHECK-LIBCALL-NEXT: cmovpl %eax, %ebx 1139; CHECK-LIBCALL-NEXT: movw %bx, (%rax) 1140; CHECK-LIBCALL-NEXT: movw %r14w, (%rax) 1141; CHECK-LIBCALL-NEXT: movw %r15w, (%rax) 1142; CHECK-LIBCALL-NEXT: movw %bp, (%rax) 1143; CHECK-LIBCALL-NEXT: addq $8, %rsp 1144; CHECK-LIBCALL-NEXT: popq %rbx 1145; CHECK-LIBCALL-NEXT: popq %r14 1146; CHECK-LIBCALL-NEXT: popq %r15 1147; CHECK-LIBCALL-NEXT: popq %rbp 1148; CHECK-LIBCALL-NEXT: retq 1149; 1150; BWON-F16C-LABEL: main.45: 1151; BWON-F16C: # %bb.0: # %entry 1152; BWON-F16C-NEXT: vpinsrw $0, (%rax), %xmm0, %xmm0 1153; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 1154; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm1 1155; BWON-F16C-NEXT: vxorps %xmm2, %xmm2, %xmm2 1156; BWON-F16C-NEXT: vcmpunordps %xmm2, %xmm1, %xmm1 1157; BWON-F16C-NEXT: vpackssdw %xmm1, %xmm1, %xmm1 1158; BWON-F16C-NEXT: vpblendvb %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1159; BWON-F16C-NEXT: vmovq %xmm0, (%rax) 1160; BWON-F16C-NEXT: retq 1161; 1162; CHECK-I686-LABEL: main.45: 1163; CHECK-I686: # %bb.0: # %entry 1164; CHECK-I686-NEXT: pushl %edi 1165; CHECK-I686-NEXT: pushl %esi 1166; CHECK-I686-NEXT: subl $20, %esp 1167; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0 1168; CHECK-I686-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7] 1169; CHECK-I686-NEXT: movd %xmm1, %esi 1170; CHECK-I686-NEXT: movl %esi, %edi 1171; CHECK-I686-NEXT: shrl $16, %edi 1172; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 1173; CHECK-I686-NEXT: movw %ax, (%esp) 1174; CHECK-I686-NEXT: calll __extendhfsf2 1175; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1176; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1177; CHECK-I686-NEXT: ucomiss %xmm0, %xmm0 1178; CHECK-I686-NEXT: movl $32256, %eax # imm = 0x7E00 1179; CHECK-I686-NEXT: cmovpl %eax, %esi 1180; CHECK-I686-NEXT: cmovpl %eax, %edi 1181; CHECK-I686-NEXT: movw %di, (%eax) 1182; CHECK-I686-NEXT: movw %si, (%eax) 1183; CHECK-I686-NEXT: addl $20, %esp 1184; CHECK-I686-NEXT: popl %esi 1185; CHECK-I686-NEXT: popl %edi 1186; CHECK-I686-NEXT: retl 1187entry: 1188 %0 = load half, ptr undef, align 8 1189 %1 = bitcast half %0 to i16 1190 %broadcast.splatinsert = insertelement <4 x half> poison, half %0, i64 0 1191 %broadcast.splat = shufflevector <4 x half> %broadcast.splatinsert, <4 x half> poison, <4 x i32> zeroinitializer 1192 %broadcast.splatinsert13 = insertelement <4 x i16> poison, i16 %1, i64 0 1193 %broadcast.splat14 = shufflevector <4 x i16> %broadcast.splatinsert13, <4 x i16> poison, <4 x i32> zeroinitializer 1194 %2 = fcmp uno <4 x half> %broadcast.splat, zeroinitializer 1195 %3 = add <4 x i16> zeroinitializer, %broadcast.splat14 1196 %4 = select i1 undef, <4 x i16> undef, <4 x i16> %3 1197 %5 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> %4 1198 %6 = bitcast <4 x i16> %5 to <4 x half> 1199 %7 = select <4 x i1> %2, <4 x half> <half 0xH7E00, half 0xH7E00, half 0xH7E00, half 0xH7E00>, <4 x half> %6 1200 store <4 x half> %7, ptr undef, align 16 1201 ret void 1202} 1203 1204define half @fcopysign(half %x, half %y) { 1205; CHECK-LIBCALL-LABEL: fcopysign: 1206; CHECK-LIBCALL: # %bb.0: 1207; CHECK-LIBCALL-NEXT: pextrw $0, %xmm1, %eax 1208; CHECK-LIBCALL-NEXT: andl $-32768, %eax # imm = 0x8000 1209; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %ecx 1210; CHECK-LIBCALL-NEXT: andl $32767, %ecx # imm = 0x7FFF 1211; CHECK-LIBCALL-NEXT: orl %eax, %ecx 1212; CHECK-LIBCALL-NEXT: pinsrw $0, %ecx, %xmm0 1213; CHECK-LIBCALL-NEXT: retq 1214; 1215; BWON-F16C-LABEL: fcopysign: 1216; BWON-F16C: # %bb.0: 1217; BWON-F16C-NEXT: vpextrw $0, %xmm1, %eax 1218; BWON-F16C-NEXT: andl $-32768, %eax # imm = 0x8000 1219; BWON-F16C-NEXT: vpextrw $0, %xmm0, %ecx 1220; BWON-F16C-NEXT: andl $32767, %ecx # imm = 0x7FFF 1221; BWON-F16C-NEXT: orl %eax, %ecx 1222; BWON-F16C-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm0 1223; BWON-F16C-NEXT: retq 1224; 1225; CHECK-I686-LABEL: fcopysign: 1226; CHECK-I686: # %bb.0: 1227; CHECK-I686-NEXT: movl $-32768, %eax # imm = 0x8000 1228; CHECK-I686-NEXT: andl {{[0-9]+}}(%esp), %eax 1229; CHECK-I686-NEXT: movzwl {{[0-9]+}}(%esp), %ecx 1230; CHECK-I686-NEXT: andl $32767, %ecx # imm = 0x7FFF 1231; CHECK-I686-NEXT: orl %eax, %ecx 1232; CHECK-I686-NEXT: pinsrw $0, %ecx, %xmm0 1233; CHECK-I686-NEXT: retl 1234 %a = call half @llvm.copysign.f16(half %x, half %y) 1235 ret half %a 1236} 1237 1238declare half @llvm.fabs.f16(half) 1239declare half @llvm.copysign.f16(half, half) 1240 1241define <8 x half> @select(i1 %c, <8 x half> %x, <8 x half> %y) { 1242; CHECK-LIBCALL-LABEL: select: 1243; CHECK-LIBCALL: # %bb.0: 1244; CHECK-LIBCALL-NEXT: testb $1, %dil 1245; CHECK-LIBCALL-NEXT: jne .LBB23_2 1246; CHECK-LIBCALL-NEXT: # %bb.1: 1247; CHECK-LIBCALL-NEXT: movaps %xmm1, %xmm0 1248; CHECK-LIBCALL-NEXT: .LBB23_2: 1249; CHECK-LIBCALL-NEXT: retq 1250; 1251; BWON-F16C-LABEL: select: 1252; BWON-F16C: # %bb.0: 1253; BWON-F16C-NEXT: testb $1, %dil 1254; BWON-F16C-NEXT: jne .LBB23_2 1255; BWON-F16C-NEXT: # %bb.1: 1256; BWON-F16C-NEXT: vmovaps %xmm1, %xmm0 1257; BWON-F16C-NEXT: .LBB23_2: 1258; BWON-F16C-NEXT: retq 1259; 1260; CHECK-I686-LABEL: select: 1261; CHECK-I686: # %bb.0: 1262; CHECK-I686-NEXT: testb $1, {{[0-9]+}}(%esp) 1263; CHECK-I686-NEXT: jne .LBB23_2 1264; CHECK-I686-NEXT: # %bb.1: 1265; CHECK-I686-NEXT: movaps %xmm1, %xmm0 1266; CHECK-I686-NEXT: .LBB23_2: 1267; CHECK-I686-NEXT: retl 1268 %s = select i1 %c, <8 x half> %x, <8 x half> %y 1269 ret <8 x half> %s 1270} 1271 1272define <8 x half> @shuffle(ptr %p) { 1273; CHECK-LIBCALL-LABEL: shuffle: 1274; CHECK-LIBCALL: # %bb.0: 1275; CHECK-LIBCALL-NEXT: movdqu (%rdi), %xmm0 1276; CHECK-LIBCALL-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 1277; CHECK-LIBCALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,2,2] 1278; CHECK-LIBCALL-NEXT: retq 1279; 1280; BWON-F16C-LABEL: shuffle: 1281; BWON-F16C: # %bb.0: 1282; BWON-F16C-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,4,4,4,4] 1283; BWON-F16C-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2] 1284; BWON-F16C-NEXT: retq 1285; 1286; CHECK-I686-LABEL: shuffle: 1287; CHECK-I686: # %bb.0: 1288; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 1289; CHECK-I686-NEXT: movdqu (%eax), %xmm0 1290; CHECK-I686-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 1291; CHECK-I686-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,2,2] 1292; CHECK-I686-NEXT: retl 1293 %1 = load <8 x half>, ptr %p, align 8 1294 %2 = shufflevector <8 x half> %1, <8 x half> poison, <8 x i32> <i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1295 ret <8 x half> %2 1296} 1297 1298declare half @llvm.minnum.f16(half, half) 1299 1300define half @pr61271(half %0, half %1) #0 { 1301; CHECK-LIBCALL-LABEL: pr61271: 1302; CHECK-LIBCALL: # %bb.0: 1303; CHECK-LIBCALL-NEXT: pushq %rax 1304; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1305; CHECK-LIBCALL-NEXT: movaps %xmm1, %xmm0 1306; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1307; CHECK-LIBCALL-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 1308; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 1309; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero 1310; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1311; CHECK-LIBCALL-NEXT: minss (%rsp), %xmm0 # 4-byte Folded Reload 1312; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 1313; CHECK-LIBCALL-NEXT: popq %rax 1314; CHECK-LIBCALL-NEXT: retq 1315; 1316; BWON-F16C-LABEL: pr61271: 1317; BWON-F16C: # %bb.0: 1318; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 1319; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 1320; BWON-F16C-NEXT: vminss %xmm1, %xmm0, %xmm0 1321; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 1322; BWON-F16C-NEXT: vmovd %xmm0, %eax 1323; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 1324; BWON-F16C-NEXT: retq 1325; 1326; CHECK-I686-LABEL: pr61271: 1327; CHECK-I686: # %bb.0: 1328; CHECK-I686-NEXT: subl $44, %esp 1329; CHECK-I686-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 1330; CHECK-I686-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 1331; CHECK-I686-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 1332; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 1333; CHECK-I686-NEXT: movw %ax, (%esp) 1334; CHECK-I686-NEXT: calll __extendhfsf2 1335; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 1336; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 1337; CHECK-I686-NEXT: movw %ax, (%esp) 1338; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1339; CHECK-I686-NEXT: calll __extendhfsf2 1340; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1341; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1342; CHECK-I686-NEXT: minss {{[0-9]+}}(%esp), %xmm0 1343; CHECK-I686-NEXT: movss %xmm0, (%esp) 1344; CHECK-I686-NEXT: calll __truncsfhf2 1345; CHECK-I686-NEXT: addl $44, %esp 1346; CHECK-I686-NEXT: retl 1347 %3 = call fast half @llvm.minnum.f16(half %0, half %1) 1348 ret half %3 1349} 1350 1351declare <8 x half> @llvm.maxnum.v8f16(<8 x half>, <8 x half>) 1352 1353define <8 x half> @maxnum_v8f16(<8 x half> %0, <8 x half> %1) #0 { 1354; CHECK-LIBCALL-LABEL: maxnum_v8f16: 1355; CHECK-LIBCALL: # %bb.0: 1356; CHECK-LIBCALL-NEXT: subq $184, %rsp 1357; CHECK-LIBCALL-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1358; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1359; CHECK-LIBCALL-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1360; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1361; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1362; CHECK-LIBCALL-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 1363; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1364; CHECK-LIBCALL-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1365; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1366; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1367; CHECK-LIBCALL-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 1368; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1369; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1370; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1371; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1372; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1373; CHECK-LIBCALL-NEXT: ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 1374; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 1375; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero 1376; CHECK-LIBCALL-NEXT: ja .LBB26_2 1377; CHECK-LIBCALL-NEXT: # %bb.1: 1378; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 1379; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero 1380; CHECK-LIBCALL-NEXT: .LBB26_2: 1381; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 1382; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1383; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1384; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 1385; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1386; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1387; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1388; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1389; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 1390; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1391; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1392; CHECK-LIBCALL-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 1393; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1394; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1395; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1396; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1397; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1398; CHECK-LIBCALL-NEXT: ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 1399; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 1400; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero 1401; CHECK-LIBCALL-NEXT: ja .LBB26_4 1402; CHECK-LIBCALL-NEXT: # %bb.3: 1403; CHECK-LIBCALL-NEXT: movss (%rsp), %xmm0 # 4-byte Reload 1404; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero 1405; CHECK-LIBCALL-NEXT: .LBB26_4: 1406; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 1407; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1408; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1409; CHECK-LIBCALL-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1410; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1411; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1412; CHECK-LIBCALL-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 1413; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1414; CHECK-LIBCALL-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1415; CHECK-LIBCALL-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill 1416; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1417; CHECK-LIBCALL-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 1418; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 1419; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1420; CHECK-LIBCALL-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 1421; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1422; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1423; CHECK-LIBCALL-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload 1424; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 1425; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero 1426; CHECK-LIBCALL-NEXT: ja .LBB26_6 1427; CHECK-LIBCALL-NEXT: # %bb.5: 1428; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 1429; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero 1430; CHECK-LIBCALL-NEXT: .LBB26_6: 1431; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 1432; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1433; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1434; CHECK-LIBCALL-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 1435; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1436; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1437; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1438; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1439; CHECK-LIBCALL-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 1440; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1441; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1442; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1443; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1444; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1445; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1446; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 1447; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1448; CHECK-LIBCALL-NEXT: ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 1449; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 1450; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero 1451; CHECK-LIBCALL-NEXT: ja .LBB26_8 1452; CHECK-LIBCALL-NEXT: # %bb.7: 1453; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 1454; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero 1455; CHECK-LIBCALL-NEXT: .LBB26_8: 1456; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 1457; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1458; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1459; CHECK-LIBCALL-NEXT: psrlq $48, %xmm0 1460; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1461; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1462; CHECK-LIBCALL-NEXT: movd %xmm0, (%rsp) # 4-byte Folded Spill 1463; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1464; CHECK-LIBCALL-NEXT: psrlq $48, %xmm0 1465; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1466; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1467; CHECK-LIBCALL-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 1468; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1469; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1470; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1471; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1472; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1473; CHECK-LIBCALL-NEXT: ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 1474; CHECK-LIBCALL-NEXT: movss (%rsp), %xmm0 # 4-byte Reload 1475; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero 1476; CHECK-LIBCALL-NEXT: ja .LBB26_10 1477; CHECK-LIBCALL-NEXT: # %bb.9: 1478; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 1479; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero 1480; CHECK-LIBCALL-NEXT: .LBB26_10: 1481; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 1482; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1483; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1484; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 1485; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1486; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1487; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1488; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1489; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 1490; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1491; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1492; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1493; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1494; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1495; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1496; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1497; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1498; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 1499; CHECK-LIBCALL-NEXT: # xmm1 = mem[0],zero,zero,zero 1500; CHECK-LIBCALL-NEXT: ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 1501; CHECK-LIBCALL-NEXT: ja .LBB26_12 1502; CHECK-LIBCALL-NEXT: # %bb.11: 1503; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 1504; CHECK-LIBCALL-NEXT: # xmm1 = mem[0],zero,zero,zero 1505; CHECK-LIBCALL-NEXT: .LBB26_12: 1506; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1507; CHECK-LIBCALL-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1508; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1509; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1510; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1511; CHECK-LIBCALL-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1512; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1513; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1514; CHECK-LIBCALL-NEXT: movaps %xmm1, %xmm0 1515; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 1516; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1517; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1518; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1519; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1520; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1521; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1522; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1523; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1524; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1525; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1526; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1527; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1528; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 1529; CHECK-LIBCALL-NEXT: # xmm1 = mem[0],zero,zero,zero 1530; CHECK-LIBCALL-NEXT: ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 1531; CHECK-LIBCALL-NEXT: ja .LBB26_14 1532; CHECK-LIBCALL-NEXT: # %bb.13: 1533; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 1534; CHECK-LIBCALL-NEXT: # xmm1 = mem[0],zero,zero,zero 1535; CHECK-LIBCALL-NEXT: .LBB26_14: 1536; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1537; CHECK-LIBCALL-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1538; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 1539; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1540; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1541; CHECK-LIBCALL-NEXT: punpcklwd (%rsp), %xmm0 # 16-byte Folded Reload 1542; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1543; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1544; CHECK-LIBCALL-NEXT: movaps %xmm1, %xmm0 1545; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 1546; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1547; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1548; CHECK-LIBCALL-NEXT: psrld $16, %xmm0 1549; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1550; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1551; CHECK-LIBCALL-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 1552; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1553; CHECK-LIBCALL-NEXT: psrld $16, %xmm0 1554; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1555; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1556; CHECK-LIBCALL-NEXT: movd %xmm0, (%rsp) # 4-byte Folded Spill 1557; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1558; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1559; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1560; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1561; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1562; CHECK-LIBCALL-NEXT: ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 1563; CHECK-LIBCALL-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 1564; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero 1565; CHECK-LIBCALL-NEXT: ja .LBB26_16 1566; CHECK-LIBCALL-NEXT: # %bb.15: 1567; CHECK-LIBCALL-NEXT: movd (%rsp), %xmm0 # 4-byte Folded Reload 1568; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero 1569; CHECK-LIBCALL-NEXT: .LBB26_16: 1570; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 1571; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 1572; CHECK-LIBCALL-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1573; CHECK-LIBCALL-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 1574; CHECK-LIBCALL-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] 1575; CHECK-LIBCALL-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 1576; CHECK-LIBCALL-NEXT: # xmm1 = xmm1[0],mem[0] 1577; CHECK-LIBCALL-NEXT: movdqa %xmm1, %xmm0 1578; CHECK-LIBCALL-NEXT: addq $184, %rsp 1579; CHECK-LIBCALL-NEXT: retq 1580; 1581; BWON-F16C-LABEL: maxnum_v8f16: 1582; BWON-F16C: # %bb.0: 1583; BWON-F16C-NEXT: vpsrldq {{.*#+}} xmm2 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1584; BWON-F16C-NEXT: vcvtph2ps %xmm2, %xmm2 1585; BWON-F16C-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1586; BWON-F16C-NEXT: vcvtph2ps %xmm3, %xmm3 1587; BWON-F16C-NEXT: vucomiss %xmm2, %xmm3 1588; BWON-F16C-NEXT: ja .LBB26_2 1589; BWON-F16C-NEXT: # %bb.1: 1590; BWON-F16C-NEXT: vmovaps %xmm2, %xmm3 1591; BWON-F16C-NEXT: .LBB26_2: 1592; BWON-F16C-NEXT: vcvtps2ph $4, %xmm3, %xmm2 1593; BWON-F16C-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[3,3,3,3] 1594; BWON-F16C-NEXT: vcvtph2ps %xmm3, %xmm3 1595; BWON-F16C-NEXT: vpshufd {{.*#+}} xmm4 = xmm0[3,3,3,3] 1596; BWON-F16C-NEXT: vcvtph2ps %xmm4, %xmm4 1597; BWON-F16C-NEXT: vucomiss %xmm3, %xmm4 1598; BWON-F16C-NEXT: ja .LBB26_4 1599; BWON-F16C-NEXT: # %bb.3: 1600; BWON-F16C-NEXT: vmovaps %xmm3, %xmm4 1601; BWON-F16C-NEXT: .LBB26_4: 1602; BWON-F16C-NEXT: vmovd %xmm2, %eax 1603; BWON-F16C-NEXT: vcvtps2ph $4, %xmm4, %xmm2 1604; BWON-F16C-NEXT: vmovd %xmm2, %ecx 1605; BWON-F16C-NEXT: vpsrldq {{.*#+}} xmm2 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1606; BWON-F16C-NEXT: vcvtph2ps %xmm2, %xmm3 1607; BWON-F16C-NEXT: vpsrldq {{.*#+}} xmm2 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1608; BWON-F16C-NEXT: vcvtph2ps %xmm2, %xmm2 1609; BWON-F16C-NEXT: vucomiss %xmm3, %xmm2 1610; BWON-F16C-NEXT: ja .LBB26_6 1611; BWON-F16C-NEXT: # %bb.5: 1612; BWON-F16C-NEXT: vmovaps %xmm3, %xmm2 1613; BWON-F16C-NEXT: .LBB26_6: 1614; BWON-F16C-NEXT: vcvtps2ph $4, %xmm2, %xmm2 1615; BWON-F16C-NEXT: vmovd %xmm2, %edx 1616; BWON-F16C-NEXT: vshufpd {{.*#+}} xmm2 = xmm1[1,0] 1617; BWON-F16C-NEXT: vcvtph2ps %xmm2, %xmm3 1618; BWON-F16C-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0] 1619; BWON-F16C-NEXT: vcvtph2ps %xmm2, %xmm2 1620; BWON-F16C-NEXT: vucomiss %xmm3, %xmm2 1621; BWON-F16C-NEXT: ja .LBB26_8 1622; BWON-F16C-NEXT: # %bb.7: 1623; BWON-F16C-NEXT: vmovaps %xmm3, %xmm2 1624; BWON-F16C-NEXT: .LBB26_8: 1625; BWON-F16C-NEXT: vcvtps2ph $4, %xmm2, %xmm2 1626; BWON-F16C-NEXT: vmovd %xmm2, %esi 1627; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[3,3,3,3,4,5,6,7] 1628; BWON-F16C-NEXT: vcvtph2ps %xmm2, %xmm2 1629; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[3,3,3,3,4,5,6,7] 1630; BWON-F16C-NEXT: vcvtph2ps %xmm3, %xmm6 1631; BWON-F16C-NEXT: vucomiss %xmm2, %xmm6 1632; BWON-F16C-NEXT: ja .LBB26_10 1633; BWON-F16C-NEXT: # %bb.9: 1634; BWON-F16C-NEXT: vmovaps %xmm2, %xmm6 1635; BWON-F16C-NEXT: .LBB26_10: 1636; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm2 1637; BWON-F16C-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm3 1638; BWON-F16C-NEXT: vpinsrw $0, %edx, %xmm0, %xmm4 1639; BWON-F16C-NEXT: vpinsrw $0, %esi, %xmm0, %xmm5 1640; BWON-F16C-NEXT: vcvtps2ph $4, %xmm6, %xmm6 1641; BWON-F16C-NEXT: vmovd %xmm6, %eax 1642; BWON-F16C-NEXT: vmovshdup {{.*#+}} xmm6 = xmm1[1,1,3,3] 1643; BWON-F16C-NEXT: vcvtph2ps %xmm6, %xmm7 1644; BWON-F16C-NEXT: vmovshdup {{.*#+}} xmm6 = xmm0[1,1,3,3] 1645; BWON-F16C-NEXT: vcvtph2ps %xmm6, %xmm6 1646; BWON-F16C-NEXT: vucomiss %xmm7, %xmm6 1647; BWON-F16C-NEXT: ja .LBB26_12 1648; BWON-F16C-NEXT: # %bb.11: 1649; BWON-F16C-NEXT: vmovaps %xmm7, %xmm6 1650; BWON-F16C-NEXT: .LBB26_12: 1651; BWON-F16C-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 1652; BWON-F16C-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3] 1653; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm4 1654; BWON-F16C-NEXT: vcvtps2ph $4, %xmm6, %xmm5 1655; BWON-F16C-NEXT: vmovd %xmm5, %eax 1656; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm5 1657; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm7 1658; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm6 1659; BWON-F16C-NEXT: vucomiss %xmm7, %xmm6 1660; BWON-F16C-NEXT: ja .LBB26_14 1661; BWON-F16C-NEXT: # %bb.13: 1662; BWON-F16C-NEXT: vmovaps %xmm7, %xmm6 1663; BWON-F16C-NEXT: .LBB26_14: 1664; BWON-F16C-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 1665; BWON-F16C-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3] 1666; BWON-F16C-NEXT: vcvtps2ph $4, %xmm6, %xmm4 1667; BWON-F16C-NEXT: vmovd %xmm4, %eax 1668; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm4 1669; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7] 1670; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 1671; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] 1672; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 1673; BWON-F16C-NEXT: vucomiss %xmm1, %xmm0 1674; BWON-F16C-NEXT: ja .LBB26_16 1675; BWON-F16C-NEXT: # %bb.15: 1676; BWON-F16C-NEXT: vmovaps %xmm1, %xmm0 1677; BWON-F16C-NEXT: .LBB26_16: 1678; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 1679; BWON-F16C-NEXT: vmovd %xmm0, %eax 1680; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 1681; BWON-F16C-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3] 1682; BWON-F16C-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 1683; BWON-F16C-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 1684; BWON-F16C-NEXT: retq 1685; 1686; CHECK-I686-LABEL: maxnum_v8f16: 1687; CHECK-I686: # %bb.0: 1688; CHECK-I686-NEXT: pushl %ebx 1689; CHECK-I686-NEXT: pushl %edi 1690; CHECK-I686-NEXT: pushl %esi 1691; CHECK-I686-NEXT: subl $336, %esp # imm = 0x150 1692; CHECK-I686-NEXT: movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 1693; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 1694; CHECK-I686-NEXT: movaps %xmm1, %xmm0 1695; CHECK-I686-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[1,1] 1696; CHECK-I686-NEXT: pextrw $0, %xmm0, %edi 1697; CHECK-I686-NEXT: movw %di, (%esp) 1698; CHECK-I686-NEXT: calll __extendhfsf2 1699; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 1700; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 1701; CHECK-I686-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 1702; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi 1703; CHECK-I686-NEXT: movw %si, (%esp) 1704; CHECK-I686-NEXT: calll __extendhfsf2 1705; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 1706; CHECK-I686-NEXT: movw %di, (%esp) 1707; CHECK-I686-NEXT: calll __extendhfsf2 1708; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 1709; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 1710; CHECK-I686-NEXT: pextrw $0, %xmm0, %edi 1711; CHECK-I686-NEXT: movw %di, (%esp) 1712; CHECK-I686-NEXT: calll __extendhfsf2 1713; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 1714; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 1715; CHECK-I686-NEXT: pextrw $0, %xmm0, %ebx 1716; CHECK-I686-NEXT: movw %bx, (%esp) 1717; CHECK-I686-NEXT: calll __extendhfsf2 1718; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 1719; CHECK-I686-NEXT: movw %di, (%esp) 1720; CHECK-I686-NEXT: calll __extendhfsf2 1721; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 1722; CHECK-I686-NEXT: movw %bx, (%esp) 1723; CHECK-I686-NEXT: calll __extendhfsf2 1724; CHECK-I686-NEXT: movw %si, (%esp) 1725; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1726; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 1727; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1728; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 1729; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1730; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 1731; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1732; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1733; CHECK-I686-NEXT: ucomiss {{[0-9]+}}(%esp), %xmm0 1734; CHECK-I686-NEXT: ja .LBB26_1 1735; CHECK-I686-NEXT: # %bb.2: 1736; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1737; CHECK-I686-NEXT: jmp .LBB26_3 1738; CHECK-I686-NEXT: .LBB26_1: 1739; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1740; CHECK-I686-NEXT: .LBB26_3: 1741; CHECK-I686-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1742; CHECK-I686-NEXT: calll __extendhfsf2 1743; CHECK-I686-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload 1744; CHECK-I686-NEXT: # xmm0 = mem[0],zero,zero,zero 1745; CHECK-I686-NEXT: movss %xmm0, (%esp) 1746; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1747; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 1748; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1749; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 1750; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1751; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 1752; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1753; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1754; CHECK-I686-NEXT: ucomiss {{[0-9]+}}(%esp), %xmm0 1755; CHECK-I686-NEXT: ja .LBB26_4 1756; CHECK-I686-NEXT: # %bb.5: 1757; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1758; CHECK-I686-NEXT: jmp .LBB26_6 1759; CHECK-I686-NEXT: .LBB26_4: 1760; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1761; CHECK-I686-NEXT: .LBB26_6: 1762; CHECK-I686-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1763; CHECK-I686-NEXT: calll __truncsfhf2 1764; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 1765; CHECK-I686-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload 1766; CHECK-I686-NEXT: # xmm0 = mem[0],zero,zero,zero 1767; CHECK-I686-NEXT: movss %xmm0, (%esp) 1768; CHECK-I686-NEXT: calll __truncsfhf2 1769; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 1770; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 1771; CHECK-I686-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1,1] 1772; CHECK-I686-NEXT: pextrw $0, %xmm0, %edi 1773; CHECK-I686-NEXT: movw %di, (%esp) 1774; CHECK-I686-NEXT: calll __extendhfsf2 1775; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 1776; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 1777; CHECK-I686-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1,1] 1778; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi 1779; CHECK-I686-NEXT: movw %si, (%esp) 1780; CHECK-I686-NEXT: calll __extendhfsf2 1781; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 1782; CHECK-I686-NEXT: movw %di, (%esp) 1783; CHECK-I686-NEXT: calll __extendhfsf2 1784; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 1785; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 1786; CHECK-I686-NEXT: psrlq $48, %xmm0 1787; CHECK-I686-NEXT: pextrw $0, %xmm0, %edi 1788; CHECK-I686-NEXT: movw %di, (%esp) 1789; CHECK-I686-NEXT: calll __extendhfsf2 1790; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 1791; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 1792; CHECK-I686-NEXT: psrlq $48, %xmm0 1793; CHECK-I686-NEXT: pextrw $0, %xmm0, %ebx 1794; CHECK-I686-NEXT: movw %bx, (%esp) 1795; CHECK-I686-NEXT: calll __extendhfsf2 1796; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 1797; CHECK-I686-NEXT: movw %di, (%esp) 1798; CHECK-I686-NEXT: calll __extendhfsf2 1799; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 1800; CHECK-I686-NEXT: movw %bx, (%esp) 1801; CHECK-I686-NEXT: calll __extendhfsf2 1802; CHECK-I686-NEXT: movw %si, (%esp) 1803; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1804; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 1805; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1806; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 1807; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1808; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 1809; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1810; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1811; CHECK-I686-NEXT: ucomiss {{[0-9]+}}(%esp), %xmm0 1812; CHECK-I686-NEXT: ja .LBB26_7 1813; CHECK-I686-NEXT: # %bb.8: 1814; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1815; CHECK-I686-NEXT: jmp .LBB26_9 1816; CHECK-I686-NEXT: .LBB26_7: 1817; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1818; CHECK-I686-NEXT: .LBB26_9: 1819; CHECK-I686-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1820; CHECK-I686-NEXT: calll __extendhfsf2 1821; CHECK-I686-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload 1822; CHECK-I686-NEXT: # xmm0 = mem[0],zero,zero,zero 1823; CHECK-I686-NEXT: movss %xmm0, (%esp) 1824; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1825; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 1826; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1827; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 1828; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1829; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 1830; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1831; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1832; CHECK-I686-NEXT: ucomiss {{[0-9]+}}(%esp), %xmm0 1833; CHECK-I686-NEXT: ja .LBB26_10 1834; CHECK-I686-NEXT: # %bb.11: 1835; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1836; CHECK-I686-NEXT: jmp .LBB26_12 1837; CHECK-I686-NEXT: .LBB26_10: 1838; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1839; CHECK-I686-NEXT: .LBB26_12: 1840; CHECK-I686-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1841; CHECK-I686-NEXT: calll __truncsfhf2 1842; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 1843; CHECK-I686-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload 1844; CHECK-I686-NEXT: # xmm0 = mem[0],zero,zero,zero 1845; CHECK-I686-NEXT: movss %xmm0, (%esp) 1846; CHECK-I686-NEXT: calll __truncsfhf2 1847; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 1848; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 1849; CHECK-I686-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 1850; CHECK-I686-NEXT: pextrw $0, %xmm0, %edi 1851; CHECK-I686-NEXT: movw %di, (%esp) 1852; CHECK-I686-NEXT: calll __extendhfsf2 1853; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 1854; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 1855; CHECK-I686-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 1856; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi 1857; CHECK-I686-NEXT: movw %si, (%esp) 1858; CHECK-I686-NEXT: calll __extendhfsf2 1859; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 1860; CHECK-I686-NEXT: movw %di, (%esp) 1861; CHECK-I686-NEXT: calll __extendhfsf2 1862; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 1863; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 1864; CHECK-I686-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1865; CHECK-I686-NEXT: pextrw $0, %xmm0, %edi 1866; CHECK-I686-NEXT: movw %di, (%esp) 1867; CHECK-I686-NEXT: calll __extendhfsf2 1868; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 1869; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 1870; CHECK-I686-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1871; CHECK-I686-NEXT: pextrw $0, %xmm0, %ebx 1872; CHECK-I686-NEXT: movw %bx, (%esp) 1873; CHECK-I686-NEXT: calll __extendhfsf2 1874; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 1875; CHECK-I686-NEXT: movw %di, (%esp) 1876; CHECK-I686-NEXT: calll __extendhfsf2 1877; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 1878; CHECK-I686-NEXT: movw %bx, (%esp) 1879; CHECK-I686-NEXT: calll __extendhfsf2 1880; CHECK-I686-NEXT: movw %si, (%esp) 1881; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1882; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 1883; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1884; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 1885; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1886; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 1887; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1888; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1889; CHECK-I686-NEXT: ucomiss {{[0-9]+}}(%esp), %xmm0 1890; CHECK-I686-NEXT: ja .LBB26_13 1891; CHECK-I686-NEXT: # %bb.14: 1892; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1893; CHECK-I686-NEXT: jmp .LBB26_15 1894; CHECK-I686-NEXT: .LBB26_13: 1895; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1896; CHECK-I686-NEXT: .LBB26_15: 1897; CHECK-I686-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1898; CHECK-I686-NEXT: calll __extendhfsf2 1899; CHECK-I686-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload 1900; CHECK-I686-NEXT: # xmm0 = mem[0],zero,zero,zero 1901; CHECK-I686-NEXT: movss %xmm0, (%esp) 1902; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1903; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 1904; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1905; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 1906; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1907; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 1908; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1909; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1910; CHECK-I686-NEXT: ucomiss {{[0-9]+}}(%esp), %xmm0 1911; CHECK-I686-NEXT: ja .LBB26_16 1912; CHECK-I686-NEXT: # %bb.17: 1913; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1914; CHECK-I686-NEXT: jmp .LBB26_18 1915; CHECK-I686-NEXT: .LBB26_16: 1916; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1917; CHECK-I686-NEXT: .LBB26_18: 1918; CHECK-I686-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1919; CHECK-I686-NEXT: calll __truncsfhf2 1920; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 1921; CHECK-I686-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload 1922; CHECK-I686-NEXT: # xmm0 = mem[0],zero,zero,zero 1923; CHECK-I686-NEXT: movss %xmm0, (%esp) 1924; CHECK-I686-NEXT: calll __truncsfhf2 1925; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 1926; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 1927; CHECK-I686-NEXT: psrld $16, %xmm0 1928; CHECK-I686-NEXT: pextrw $0, %xmm0, %edi 1929; CHECK-I686-NEXT: movw %di, (%esp) 1930; CHECK-I686-NEXT: calll __extendhfsf2 1931; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 1932; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 1933; CHECK-I686-NEXT: psrld $16, %xmm0 1934; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi 1935; CHECK-I686-NEXT: movw %si, (%esp) 1936; CHECK-I686-NEXT: calll __extendhfsf2 1937; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 1938; CHECK-I686-NEXT: movw %di, (%esp) 1939; CHECK-I686-NEXT: calll __extendhfsf2 1940; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 1941; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 1942; CHECK-I686-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1943; CHECK-I686-NEXT: pextrw $0, %xmm0, %edi 1944; CHECK-I686-NEXT: movw %di, (%esp) 1945; CHECK-I686-NEXT: calll __extendhfsf2 1946; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 1947; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 1948; CHECK-I686-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1949; CHECK-I686-NEXT: pextrw $0, %xmm0, %ebx 1950; CHECK-I686-NEXT: movw %bx, (%esp) 1951; CHECK-I686-NEXT: calll __extendhfsf2 1952; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 1953; CHECK-I686-NEXT: movw %di, (%esp) 1954; CHECK-I686-NEXT: calll __extendhfsf2 1955; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 1956; CHECK-I686-NEXT: movw %bx, (%esp) 1957; CHECK-I686-NEXT: calll __extendhfsf2 1958; CHECK-I686-NEXT: movw %si, (%esp) 1959; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1960; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 1961; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1962; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 1963; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1964; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 1965; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1966; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1967; CHECK-I686-NEXT: ucomiss {{[0-9]+}}(%esp), %xmm0 1968; CHECK-I686-NEXT: ja .LBB26_19 1969; CHECK-I686-NEXT: # %bb.20: 1970; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1971; CHECK-I686-NEXT: jmp .LBB26_21 1972; CHECK-I686-NEXT: .LBB26_19: 1973; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1974; CHECK-I686-NEXT: .LBB26_21: 1975; CHECK-I686-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1976; CHECK-I686-NEXT: calll __extendhfsf2 1977; CHECK-I686-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload 1978; CHECK-I686-NEXT: # xmm0 = mem[0],zero,zero,zero 1979; CHECK-I686-NEXT: movss %xmm0, (%esp) 1980; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1981; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 1982; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1983; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 1984; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1985; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 1986; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1987; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1988; CHECK-I686-NEXT: ucomiss {{[0-9]+}}(%esp), %xmm0 1989; CHECK-I686-NEXT: ja .LBB26_22 1990; CHECK-I686-NEXT: # %bb.23: 1991; CHECK-I686-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1992; CHECK-I686-NEXT: jmp .LBB26_24 1993; CHECK-I686-NEXT: .LBB26_22: 1994; CHECK-I686-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1995; CHECK-I686-NEXT: .LBB26_24: 1996; CHECK-I686-NEXT: movd %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill 1997; CHECK-I686-NEXT: calll __truncsfhf2 1998; CHECK-I686-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 4-byte Reload 1999; CHECK-I686-NEXT: # xmm1 = mem[0],zero,zero,zero 2000; CHECK-I686-NEXT: movss %xmm1, (%esp) 2001; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload 2002; CHECK-I686-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2003; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 2004; CHECK-I686-NEXT: punpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Folded Reload 2005; CHECK-I686-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 2006; CHECK-I686-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2007; CHECK-I686-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 2008; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 2009; CHECK-I686-NEXT: punpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Folded Reload 2010; CHECK-I686-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 2011; CHECK-I686-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 2012; CHECK-I686-NEXT: calll __truncsfhf2 2013; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload 2014; CHECK-I686-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2015; CHECK-I686-NEXT: punpckldq {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Folded Reload 2016; CHECK-I686-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] 2017; CHECK-I686-NEXT: punpcklqdq {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Folded Reload 2018; CHECK-I686-NEXT: # xmm1 = xmm1[0],mem[0] 2019; CHECK-I686-NEXT: movdqa %xmm1, %xmm0 2020; CHECK-I686-NEXT: addl $336, %esp # imm = 0x150 2021; CHECK-I686-NEXT: popl %esi 2022; CHECK-I686-NEXT: popl %edi 2023; CHECK-I686-NEXT: popl %ebx 2024; CHECK-I686-NEXT: retl 2025 %3 = call fast <8 x half> @llvm.maxnum.v8f16(<8 x half> %0, <8 x half> %1) 2026 ret <8 x half> %3 2027} 2028 2029define void @pr63114() { 2030; CHECK-LIBCALL-LABEL: pr63114: 2031; CHECK-LIBCALL: # %bb.0: 2032; CHECK-LIBCALL-NEXT: movdqu (%rax), %xmm4 2033; CHECK-LIBCALL-NEXT: pshuflw {{.*#+}} xmm0 = xmm4[0,1,3,3,4,5,6,7] 2034; CHECK-LIBCALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,1] 2035; CHECK-LIBCALL-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,0,65535,65535,65535,65535] 2036; CHECK-LIBCALL-NEXT: pand %xmm1, %xmm0 2037; CHECK-LIBCALL-NEXT: movq {{.*#+}} xmm2 = [0,0,0,15360,0,0,0,0] 2038; CHECK-LIBCALL-NEXT: por %xmm2, %xmm0 2039; CHECK-LIBCALL-NEXT: movdqa {{.*#+}} xmm3 = [65535,65535,65535,65535,65535,65535,65535,0] 2040; CHECK-LIBCALL-NEXT: pand %xmm3, %xmm0 2041; CHECK-LIBCALL-NEXT: movdqa {{.*#+}} xmm5 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,60] 2042; CHECK-LIBCALL-NEXT: por %xmm5, %xmm0 2043; CHECK-LIBCALL-NEXT: pshufhw {{.*#+}} xmm6 = xmm4[0,1,2,3,4,5,7,7] 2044; CHECK-LIBCALL-NEXT: pshufd {{.*#+}} xmm6 = xmm6[0,2,2,3] 2045; CHECK-LIBCALL-NEXT: pand %xmm1, %xmm6 2046; CHECK-LIBCALL-NEXT: por %xmm2, %xmm6 2047; CHECK-LIBCALL-NEXT: pand %xmm3, %xmm6 2048; CHECK-LIBCALL-NEXT: por %xmm5, %xmm6 2049; CHECK-LIBCALL-NEXT: pshufhw {{.*#+}} xmm7 = xmm4[0,1,2,3,5,5,5,5] 2050; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,3,0,3] 2051; CHECK-LIBCALL-NEXT: pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,5,5,5,5] 2052; CHECK-LIBCALL-NEXT: pand %xmm1, %xmm4 2053; CHECK-LIBCALL-NEXT: por %xmm2, %xmm4 2054; CHECK-LIBCALL-NEXT: pand %xmm3, %xmm4 2055; CHECK-LIBCALL-NEXT: por %xmm5, %xmm4 2056; CHECK-LIBCALL-NEXT: pand %xmm1, %xmm7 2057; CHECK-LIBCALL-NEXT: por %xmm2, %xmm7 2058; CHECK-LIBCALL-NEXT: pand %xmm3, %xmm7 2059; CHECK-LIBCALL-NEXT: por %xmm5, %xmm7 2060; CHECK-LIBCALL-NEXT: movdqu %xmm7, 0 2061; CHECK-LIBCALL-NEXT: movdqu %xmm4, 32 2062; CHECK-LIBCALL-NEXT: movdqu %xmm6, 48 2063; CHECK-LIBCALL-NEXT: movdqu %xmm0, 16 2064; CHECK-LIBCALL-NEXT: retq 2065; 2066; BWON-F16C-LABEL: pr63114: 2067; BWON-F16C: # %bb.0: 2068; BWON-F16C-NEXT: vmovdqu (%rax), %xmm0 2069; BWON-F16C-NEXT: vpsrld $16, %xmm0, %xmm1 2070; BWON-F16C-NEXT: vbroadcastss (%rax), %xmm2 2071; BWON-F16C-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 2072; BWON-F16C-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,0],xmm3[0,0] 2073; BWON-F16C-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3 2074; BWON-F16C-NEXT: vpsllq $48, %xmm3, %xmm4 2075; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2],xmm4[3],xmm2[4,5,6,7] 2076; BWON-F16C-NEXT: vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1] 2077; BWON-F16C-NEXT: vpor %xmm3, %xmm2, %xmm2 2078; BWON-F16C-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,3],xmm1[2,0] 2079; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[3],xmm1[4,5,6,7] 2080; BWON-F16C-NEXT: vpor %xmm3, %xmm1, %xmm1 2081; BWON-F16C-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 2082; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,1,3,3,4,5,6,7] 2083; BWON-F16C-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,1] 2084; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2],xmm4[3],xmm2[4,5,6,7] 2085; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm3[7] 2086; BWON-F16C-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5] 2087; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm4[3],xmm0[4,5,6,7] 2088; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm3[7] 2089; BWON-F16C-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2090; BWON-F16C-NEXT: vmovups %ymm0, 0 2091; BWON-F16C-NEXT: vmovups %ymm1, 32 2092; BWON-F16C-NEXT: vzeroupper 2093; BWON-F16C-NEXT: retq 2094; 2095; CHECK-I686-LABEL: pr63114: 2096; CHECK-I686: # %bb.0: 2097; CHECK-I686-NEXT: movdqu (%eax), %xmm6 2098; CHECK-I686-NEXT: pshuflw {{.*#+}} xmm0 = xmm6[0,1,3,3,4,5,6,7] 2099; CHECK-I686-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,1] 2100; CHECK-I686-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,0,65535,65535,65535,65535] 2101; CHECK-I686-NEXT: pand %xmm1, %xmm0 2102; CHECK-I686-NEXT: movq {{.*#+}} xmm2 = [0,0,0,15360,0,0,0,0] 2103; CHECK-I686-NEXT: por %xmm2, %xmm0 2104; CHECK-I686-NEXT: movdqa {{.*#+}} xmm3 = [65535,65535,65535,65535,65535,65535,65535,0] 2105; CHECK-I686-NEXT: pand %xmm3, %xmm0 2106; CHECK-I686-NEXT: movdqa {{.*#+}} xmm4 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,60] 2107; CHECK-I686-NEXT: por %xmm4, %xmm0 2108; CHECK-I686-NEXT: pshufhw {{.*#+}} xmm5 = xmm6[0,1,2,3,4,5,7,7] 2109; CHECK-I686-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,2,2,3] 2110; CHECK-I686-NEXT: pand %xmm1, %xmm5 2111; CHECK-I686-NEXT: por %xmm2, %xmm5 2112; CHECK-I686-NEXT: pand %xmm3, %xmm5 2113; CHECK-I686-NEXT: por %xmm4, %xmm5 2114; CHECK-I686-NEXT: pshufhw {{.*#+}} xmm7 = xmm6[0,1,2,3,5,5,5,5] 2115; CHECK-I686-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,3,0,3] 2116; CHECK-I686-NEXT: pshufhw {{.*#+}} xmm6 = xmm6[0,1,2,3,5,5,5,5] 2117; CHECK-I686-NEXT: pand %xmm1, %xmm6 2118; CHECK-I686-NEXT: por %xmm2, %xmm6 2119; CHECK-I686-NEXT: pand %xmm3, %xmm6 2120; CHECK-I686-NEXT: por %xmm4, %xmm6 2121; CHECK-I686-NEXT: pand %xmm1, %xmm7 2122; CHECK-I686-NEXT: por %xmm2, %xmm7 2123; CHECK-I686-NEXT: pand %xmm3, %xmm7 2124; CHECK-I686-NEXT: por %xmm4, %xmm7 2125; CHECK-I686-NEXT: movdqu %xmm7, 0 2126; CHECK-I686-NEXT: movdqu %xmm6, 32 2127; CHECK-I686-NEXT: movdqu %xmm5, 48 2128; CHECK-I686-NEXT: movdqu %xmm0, 16 2129; CHECK-I686-NEXT: retl 2130 %1 = load <24 x half>, ptr poison, align 2 2131 %2 = shufflevector <24 x half> %1, <24 x half> poison, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23> 2132 %3 = shufflevector <8 x half> %2, <8 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2133 %4 = shufflevector <16 x half> poison, <16 x half> %3, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31> 2134 store <32 x half> %4, ptr null, align 2 2135 ret void 2136} 2137 2138attributes #0 = { nounwind } 2139