xref: /llvm-project/llvm/test/CodeGen/X86/half.ll (revision 67c3f2b4303972a6dc8ada54efe1d5d80d119a51)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-f16c -fixup-byte-word-insts=1 -verify-machineinstrs \
3; RUN:   | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWON
4; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-f16c  -fixup-byte-word-insts=0 -verify-machineinstrs \
5; RUN:   | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWOFF
6; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+f16c -fixup-byte-word-insts=1 -verify-machineinstrs \
7; RUN:    | FileCheck %s -check-prefixes=CHECK,BWON,BWON-F16C
8; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr +sse2 -fixup-byte-word-insts=0 -verify-machineinstrs \
9; RUN:    | FileCheck %s -check-prefixes=CHECK-I686
10
11define void @test_load_store(ptr %in, ptr %out) #0 {
12; CHECK-LIBCALL-LABEL: test_load_store:
13; CHECK-LIBCALL:       # %bb.0:
14; CHECK-LIBCALL-NEXT:    pinsrw $0, (%rdi), %xmm0
15; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
16; CHECK-LIBCALL-NEXT:    movw %ax, (%rsi)
17; CHECK-LIBCALL-NEXT:    retq
18;
19; BWON-F16C-LABEL: test_load_store:
20; BWON-F16C:       # %bb.0:
21; BWON-F16C-NEXT:    vpinsrw $0, (%rdi), %xmm0, %xmm0
22; BWON-F16C-NEXT:    vpextrw $0, %xmm0, (%rsi)
23; BWON-F16C-NEXT:    retq
24;
25; CHECK-I686-LABEL: test_load_store:
26; CHECK-I686:       # %bb.0:
27; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
28; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
29; CHECK-I686-NEXT:    pinsrw $0, (%ecx), %xmm0
30; CHECK-I686-NEXT:    pextrw $0, %xmm0, %ecx
31; CHECK-I686-NEXT:    movw %cx, (%eax)
32; CHECK-I686-NEXT:    retl
33  %val = load half, ptr %in
34  store half %val, ptr %out
35  ret void
36}
37
38define i16 @test_bitcast_from_half(ptr %addr) #0 {
39; BWON-LABEL: test_bitcast_from_half:
40; BWON:       # %bb.0:
41; BWON-NEXT:    movzwl (%rdi), %eax
42; BWON-NEXT:    retq
43;
44; BWOFF-LABEL: test_bitcast_from_half:
45; BWOFF:       # %bb.0:
46; BWOFF-NEXT:    movw (%rdi), %ax
47; BWOFF-NEXT:    retq
48;
49; CHECK-I686-LABEL: test_bitcast_from_half:
50; CHECK-I686:       # %bb.0:
51; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
52; CHECK-I686-NEXT:    movw (%eax), %ax
53; CHECK-I686-NEXT:    retl
54  %val = load half, ptr %addr
55  %val_int = bitcast half %val to i16
56  ret i16 %val_int
57}
58
59define void @test_bitcast_to_half(ptr %addr, i16 %in) #0 {
60; CHECK-LABEL: test_bitcast_to_half:
61; CHECK:       # %bb.0:
62; CHECK-NEXT:    movw %si, (%rdi)
63; CHECK-NEXT:    retq
64;
65; CHECK-I686-LABEL: test_bitcast_to_half:
66; CHECK-I686:       # %bb.0:
67; CHECK-I686-NEXT:    movw {{[0-9]+}}(%esp), %ax
68; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
69; CHECK-I686-NEXT:    movw %ax, (%ecx)
70; CHECK-I686-NEXT:    retl
71  %val_fp = bitcast i16 %in to half
72  store half %val_fp, ptr %addr
73  ret void
74}
75
76define float @test_extend32(ptr %addr) #0 {
77; CHECK-LIBCALL-LABEL: test_extend32:
78; CHECK-LIBCALL:       # %bb.0:
79; CHECK-LIBCALL-NEXT:    pinsrw $0, (%rdi), %xmm0
80; CHECK-LIBCALL-NEXT:    jmp __extendhfsf2@PLT # TAILCALL
81;
82; BWON-F16C-LABEL: test_extend32:
83; BWON-F16C:       # %bb.0:
84; BWON-F16C-NEXT:    vpinsrw $0, (%rdi), %xmm0, %xmm0
85; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
86; BWON-F16C-NEXT:    retq
87;
88; CHECK-I686-LABEL: test_extend32:
89; CHECK-I686:       # %bb.0:
90; CHECK-I686-NEXT:    subl $12, %esp
91; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
92; CHECK-I686-NEXT:    pinsrw $0, (%eax), %xmm0
93; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
94; CHECK-I686-NEXT:    movw %ax, (%esp)
95; CHECK-I686-NEXT:    calll __extendhfsf2
96; CHECK-I686-NEXT:    addl $12, %esp
97; CHECK-I686-NEXT:    retl
98  %val16 = load half, ptr %addr
99  %val32 = fpext half %val16 to float
100  ret float %val32
101}
102
103define double @test_extend64(ptr %addr) #0 {
104; CHECK-LIBCALL-LABEL: test_extend64:
105; CHECK-LIBCALL:       # %bb.0:
106; CHECK-LIBCALL-NEXT:    pushq %rax
107; CHECK-LIBCALL-NEXT:    pinsrw $0, (%rdi), %xmm0
108; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
109; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm0, %xmm0
110; CHECK-LIBCALL-NEXT:    popq %rax
111; CHECK-LIBCALL-NEXT:    retq
112;
113; BWON-F16C-LABEL: test_extend64:
114; BWON-F16C:       # %bb.0:
115; BWON-F16C-NEXT:    vpinsrw $0, (%rdi), %xmm0, %xmm0
116; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
117; BWON-F16C-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
118; BWON-F16C-NEXT:    retq
119;
120; CHECK-I686-LABEL: test_extend64:
121; CHECK-I686:       # %bb.0:
122; CHECK-I686-NEXT:    subl $12, %esp
123; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
124; CHECK-I686-NEXT:    pinsrw $0, (%eax), %xmm0
125; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
126; CHECK-I686-NEXT:    movw %ax, (%esp)
127; CHECK-I686-NEXT:    calll __extendhfsf2
128; CHECK-I686-NEXT:    addl $12, %esp
129; CHECK-I686-NEXT:    retl
130  %val16 = load half, ptr %addr
131  %val32 = fpext half %val16 to double
132  ret double %val32
133}
134
135define void @test_trunc32(float %in, ptr %addr) #0 {
136; CHECK-LIBCALL-LABEL: test_trunc32:
137; CHECK-LIBCALL:       # %bb.0:
138; CHECK-LIBCALL-NEXT:    pushq %rbx
139; CHECK-LIBCALL-NEXT:    movq %rdi, %rbx
140; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
141; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
142; CHECK-LIBCALL-NEXT:    movw %ax, (%rbx)
143; CHECK-LIBCALL-NEXT:    popq %rbx
144; CHECK-LIBCALL-NEXT:    retq
145;
146; BWON-F16C-LABEL: test_trunc32:
147; BWON-F16C:       # %bb.0:
148; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
149; BWON-F16C-NEXT:    vpextrw $0, %xmm0, (%rdi)
150; BWON-F16C-NEXT:    retq
151;
152; CHECK-I686-LABEL: test_trunc32:
153; CHECK-I686:       # %bb.0:
154; CHECK-I686-NEXT:    pushl %esi
155; CHECK-I686-NEXT:    subl $8, %esp
156; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
157; CHECK-I686-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
158; CHECK-I686-NEXT:    movd %xmm0, (%esp)
159; CHECK-I686-NEXT:    calll __truncsfhf2
160; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
161; CHECK-I686-NEXT:    movw %ax, (%esi)
162; CHECK-I686-NEXT:    addl $8, %esp
163; CHECK-I686-NEXT:    popl %esi
164; CHECK-I686-NEXT:    retl
165  %val16 = fptrunc float %in to half
166  store half %val16, ptr %addr
167  ret void
168}
169
170define void @test_trunc64(double %in, ptr %addr) #0 {
171; CHECK-LIBCALL-LABEL: test_trunc64:
172; CHECK-LIBCALL:       # %bb.0:
173; CHECK-LIBCALL-NEXT:    pushq %rbx
174; CHECK-LIBCALL-NEXT:    movq %rdi, %rbx
175; CHECK-LIBCALL-NEXT:    callq __truncdfhf2@PLT
176; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
177; CHECK-LIBCALL-NEXT:    movw %ax, (%rbx)
178; CHECK-LIBCALL-NEXT:    popq %rbx
179; CHECK-LIBCALL-NEXT:    retq
180;
181; BWON-F16C-LABEL: test_trunc64:
182; BWON-F16C:       # %bb.0:
183; BWON-F16C-NEXT:    pushq %rbx
184; BWON-F16C-NEXT:    movq %rdi, %rbx
185; BWON-F16C-NEXT:    callq __truncdfhf2@PLT
186; BWON-F16C-NEXT:    vpextrw $0, %xmm0, (%rbx)
187; BWON-F16C-NEXT:    popq %rbx
188; BWON-F16C-NEXT:    retq
189;
190; CHECK-I686-LABEL: test_trunc64:
191; CHECK-I686:       # %bb.0:
192; CHECK-I686-NEXT:    pushl %esi
193; CHECK-I686-NEXT:    subl $8, %esp
194; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
195; CHECK-I686-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
196; CHECK-I686-NEXT:    movq %xmm0, (%esp)
197; CHECK-I686-NEXT:    calll __truncdfhf2
198; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
199; CHECK-I686-NEXT:    movw %ax, (%esi)
200; CHECK-I686-NEXT:    addl $8, %esp
201; CHECK-I686-NEXT:    popl %esi
202; CHECK-I686-NEXT:    retl
203  %val16 = fptrunc double %in to half
204  store half %val16, ptr %addr
205  ret void
206}
207
208define i64 @test_fptosi_i64(ptr %p) #0 {
209; CHECK-LIBCALL-LABEL: test_fptosi_i64:
210; CHECK-LIBCALL:       # %bb.0:
211; CHECK-LIBCALL-NEXT:    pushq %rax
212; CHECK-LIBCALL-NEXT:    pinsrw $0, (%rdi), %xmm0
213; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
214; CHECK-LIBCALL-NEXT:    cvttss2si %xmm0, %rax
215; CHECK-LIBCALL-NEXT:    popq %rcx
216; CHECK-LIBCALL-NEXT:    retq
217;
218; BWON-F16C-LABEL: test_fptosi_i64:
219; BWON-F16C:       # %bb.0:
220; BWON-F16C-NEXT:    vpinsrw $0, (%rdi), %xmm0, %xmm0
221; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
222; BWON-F16C-NEXT:    vcvttss2si %xmm0, %rax
223; BWON-F16C-NEXT:    retq
224;
225; CHECK-I686-LABEL: test_fptosi_i64:
226; CHECK-I686:       # %bb.0:
227; CHECK-I686-NEXT:    subl $28, %esp
228; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
229; CHECK-I686-NEXT:    pinsrw $0, (%eax), %xmm0
230; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
231; CHECK-I686-NEXT:    movw %ax, (%esp)
232; CHECK-I686-NEXT:    calll __extendhfsf2
233; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
234; CHECK-I686-NEXT:    flds {{[0-9]+}}(%esp)
235; CHECK-I686-NEXT:    fnstcw {{[0-9]+}}(%esp)
236; CHECK-I686-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
237; CHECK-I686-NEXT:    orl $3072, %eax # imm = 0xC00
238; CHECK-I686-NEXT:    movw %ax, {{[0-9]+}}(%esp)
239; CHECK-I686-NEXT:    fldcw {{[0-9]+}}(%esp)
240; CHECK-I686-NEXT:    fistpll {{[0-9]+}}(%esp)
241; CHECK-I686-NEXT:    fldcw {{[0-9]+}}(%esp)
242; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
243; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %edx
244; CHECK-I686-NEXT:    addl $28, %esp
245; CHECK-I686-NEXT:    retl
246  %a = load half, ptr %p, align 2
247  %r = fptosi half %a to i64
248  ret i64 %r
249}
250
251define void @test_sitofp_i64(i64 %a, ptr %p) #0 {
252; CHECK-LIBCALL-LABEL: test_sitofp_i64:
253; CHECK-LIBCALL:       # %bb.0:
254; CHECK-LIBCALL-NEXT:    pushq %rbx
255; CHECK-LIBCALL-NEXT:    movq %rsi, %rbx
256; CHECK-LIBCALL-NEXT:    cvtsi2ss %rdi, %xmm0
257; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
258; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
259; CHECK-LIBCALL-NEXT:    movw %ax, (%rbx)
260; CHECK-LIBCALL-NEXT:    popq %rbx
261; CHECK-LIBCALL-NEXT:    retq
262;
263; BWON-F16C-LABEL: test_sitofp_i64:
264; BWON-F16C:       # %bb.0:
265; BWON-F16C-NEXT:    vcvtsi2ss %rdi, %xmm0, %xmm0
266; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
267; BWON-F16C-NEXT:    vpextrw $0, %xmm0, (%rsi)
268; BWON-F16C-NEXT:    retq
269;
270; CHECK-I686-LABEL: test_sitofp_i64:
271; CHECK-I686:       # %bb.0:
272; CHECK-I686-NEXT:    pushl %esi
273; CHECK-I686-NEXT:    subl $24, %esp
274; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
275; CHECK-I686-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
276; CHECK-I686-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
277; CHECK-I686-NEXT:    fildll {{[0-9]+}}(%esp)
278; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
279; CHECK-I686-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
280; CHECK-I686-NEXT:    movd %xmm0, (%esp)
281; CHECK-I686-NEXT:    calll __truncsfhf2
282; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
283; CHECK-I686-NEXT:    movw %ax, (%esi)
284; CHECK-I686-NEXT:    addl $24, %esp
285; CHECK-I686-NEXT:    popl %esi
286; CHECK-I686-NEXT:    retl
287  %r = sitofp i64 %a to half
288  store half %r, ptr %p
289  ret void
290}
291
292define i64 @test_fptoui_i64(ptr %p) #0 {
293; CHECK-LIBCALL-LABEL: test_fptoui_i64:
294; CHECK-LIBCALL:       # %bb.0:
295; CHECK-LIBCALL-NEXT:    pushq %rax
296; CHECK-LIBCALL-NEXT:    pinsrw $0, (%rdi), %xmm0
297; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
298; CHECK-LIBCALL-NEXT:    cvttss2si %xmm0, %rcx
299; CHECK-LIBCALL-NEXT:    movq %rcx, %rdx
300; CHECK-LIBCALL-NEXT:    sarq $63, %rdx
301; CHECK-LIBCALL-NEXT:    subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
302; CHECK-LIBCALL-NEXT:    cvttss2si %xmm0, %rax
303; CHECK-LIBCALL-NEXT:    andq %rdx, %rax
304; CHECK-LIBCALL-NEXT:    orq %rcx, %rax
305; CHECK-LIBCALL-NEXT:    popq %rcx
306; CHECK-LIBCALL-NEXT:    retq
307;
308; BWON-F16C-LABEL: test_fptoui_i64:
309; BWON-F16C:       # %bb.0:
310; BWON-F16C-NEXT:    vpinsrw $0, (%rdi), %xmm0, %xmm0
311; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
312; BWON-F16C-NEXT:    vcvttss2si %xmm0, %rcx
313; BWON-F16C-NEXT:    movq %rcx, %rdx
314; BWON-F16C-NEXT:    sarq $63, %rdx
315; BWON-F16C-NEXT:    vsubss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
316; BWON-F16C-NEXT:    vcvttss2si %xmm0, %rax
317; BWON-F16C-NEXT:    andq %rdx, %rax
318; BWON-F16C-NEXT:    orq %rcx, %rax
319; BWON-F16C-NEXT:    retq
320;
321; CHECK-I686-LABEL: test_fptoui_i64:
322; CHECK-I686:       # %bb.0:
323; CHECK-I686-NEXT:    subl $28, %esp
324; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
325; CHECK-I686-NEXT:    pinsrw $0, (%eax), %xmm0
326; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
327; CHECK-I686-NEXT:    movw %ax, (%esp)
328; CHECK-I686-NEXT:    calll __extendhfsf2
329; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
330; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
331; CHECK-I686-NEXT:    movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
332; CHECK-I686-NEXT:    ucomiss %xmm1, %xmm0
333; CHECK-I686-NEXT:    jae .LBB9_2
334; CHECK-I686-NEXT:  # %bb.1:
335; CHECK-I686-NEXT:    xorps %xmm1, %xmm1
336; CHECK-I686-NEXT:  .LBB9_2:
337; CHECK-I686-NEXT:    subss %xmm1, %xmm0
338; CHECK-I686-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
339; CHECK-I686-NEXT:    setae %al
340; CHECK-I686-NEXT:    flds {{[0-9]+}}(%esp)
341; CHECK-I686-NEXT:    fnstcw {{[0-9]+}}(%esp)
342; CHECK-I686-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
343; CHECK-I686-NEXT:    orl $3072, %ecx # imm = 0xC00
344; CHECK-I686-NEXT:    movw %cx, {{[0-9]+}}(%esp)
345; CHECK-I686-NEXT:    fldcw {{[0-9]+}}(%esp)
346; CHECK-I686-NEXT:    fistpll {{[0-9]+}}(%esp)
347; CHECK-I686-NEXT:    fldcw {{[0-9]+}}(%esp)
348; CHECK-I686-NEXT:    movzbl %al, %edx
349; CHECK-I686-NEXT:    shll $31, %edx
350; CHECK-I686-NEXT:    xorl {{[0-9]+}}(%esp), %edx
351; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
352; CHECK-I686-NEXT:    addl $28, %esp
353; CHECK-I686-NEXT:    retl
354  %a = load half, ptr %p, align 2
355  %r = fptoui half %a to i64
356  ret i64 %r
357}
358
359define void @test_uitofp_i64(i64 %a, ptr %p) #0 {
360; CHECK-LIBCALL-LABEL: test_uitofp_i64:
361; CHECK-LIBCALL:       # %bb.0:
362; CHECK-LIBCALL-NEXT:    pushq %rbx
363; CHECK-LIBCALL-NEXT:    movq %rsi, %rbx
364; CHECK-LIBCALL-NEXT:    testq %rdi, %rdi
365; CHECK-LIBCALL-NEXT:    js .LBB10_1
366; CHECK-LIBCALL-NEXT:  # %bb.2:
367; CHECK-LIBCALL-NEXT:    cvtsi2ss %rdi, %xmm0
368; CHECK-LIBCALL-NEXT:    jmp .LBB10_3
369; CHECK-LIBCALL-NEXT:  .LBB10_1:
370; CHECK-LIBCALL-NEXT:    movq %rdi, %rax
371; CHECK-LIBCALL-NEXT:    shrq %rax
372; CHECK-LIBCALL-NEXT:    andl $1, %edi
373; CHECK-LIBCALL-NEXT:    orq %rax, %rdi
374; CHECK-LIBCALL-NEXT:    cvtsi2ss %rdi, %xmm0
375; CHECK-LIBCALL-NEXT:    addss %xmm0, %xmm0
376; CHECK-LIBCALL-NEXT:  .LBB10_3:
377; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
378; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
379; CHECK-LIBCALL-NEXT:    movw %ax, (%rbx)
380; CHECK-LIBCALL-NEXT:    popq %rbx
381; CHECK-LIBCALL-NEXT:    retq
382;
383; BWON-F16C-LABEL: test_uitofp_i64:
384; BWON-F16C:       # %bb.0:
385; BWON-F16C-NEXT:    testq %rdi, %rdi
386; BWON-F16C-NEXT:    js .LBB10_1
387; BWON-F16C-NEXT:  # %bb.2:
388; BWON-F16C-NEXT:    vcvtsi2ss %rdi, %xmm0, %xmm0
389; BWON-F16C-NEXT:    jmp .LBB10_3
390; BWON-F16C-NEXT:  .LBB10_1:
391; BWON-F16C-NEXT:    movq %rdi, %rax
392; BWON-F16C-NEXT:    shrq %rax
393; BWON-F16C-NEXT:    andl $1, %edi
394; BWON-F16C-NEXT:    orq %rax, %rdi
395; BWON-F16C-NEXT:    vcvtsi2ss %rdi, %xmm0, %xmm0
396; BWON-F16C-NEXT:    vaddss %xmm0, %xmm0, %xmm0
397; BWON-F16C-NEXT:  .LBB10_3:
398; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
399; BWON-F16C-NEXT:    vpextrw $0, %xmm0, (%rsi)
400; BWON-F16C-NEXT:    retq
401;
402; CHECK-I686-LABEL: test_uitofp_i64:
403; CHECK-I686:       # %bb.0:
404; CHECK-I686-NEXT:    pushl %esi
405; CHECK-I686-NEXT:    subl $24, %esp
406; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
407; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
408; CHECK-I686-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
409; CHECK-I686-NEXT:    movq %xmm0, {{[0-9]+}}(%esp)
410; CHECK-I686-NEXT:    shrl $31, %eax
411; CHECK-I686-NEXT:    fildll {{[0-9]+}}(%esp)
412; CHECK-I686-NEXT:    fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
413; CHECK-I686-NEXT:    fstps (%esp)
414; CHECK-I686-NEXT:    calll __truncsfhf2
415; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
416; CHECK-I686-NEXT:    movw %ax, (%esi)
417; CHECK-I686-NEXT:    addl $24, %esp
418; CHECK-I686-NEXT:    popl %esi
419; CHECK-I686-NEXT:    retl
420  %r = uitofp i64 %a to half
421  store half %r, ptr %p
422  ret void
423}
424
425define <4 x float> @test_extend32_vec4(ptr %p) #0 {
426; CHECK-LIBCALL-LABEL: test_extend32_vec4:
427; CHECK-LIBCALL:       # %bb.0:
428; CHECK-LIBCALL-NEXT:    subq $72, %rsp
429; CHECK-LIBCALL-NEXT:    pinsrw $0, (%rdi), %xmm0
430; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
431; CHECK-LIBCALL-NEXT:    pinsrw $0, 2(%rdi), %xmm0
432; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
433; CHECK-LIBCALL-NEXT:    pinsrw $0, 4(%rdi), %xmm0
434; CHECK-LIBCALL-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
435; CHECK-LIBCALL-NEXT:    pinsrw $0, 6(%rdi), %xmm0
436; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
437; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
438; CHECK-LIBCALL-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
439; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
440; CHECK-LIBCALL-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
441; CHECK-LIBCALL-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
442; CHECK-LIBCALL-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
443; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
444; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
445; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
446; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
447; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
448; CHECK-LIBCALL-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
449; CHECK-LIBCALL-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
450; CHECK-LIBCALL-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
451; CHECK-LIBCALL-NEXT:    # xmm0 = xmm0[0],mem[0]
452; CHECK-LIBCALL-NEXT:    addq $72, %rsp
453; CHECK-LIBCALL-NEXT:    retq
454;
455; BWON-F16C-LABEL: test_extend32_vec4:
456; BWON-F16C:       # %bb.0:
457; BWON-F16C-NEXT:    vcvtph2ps (%rdi), %xmm0
458; BWON-F16C-NEXT:    retq
459;
460; CHECK-I686-LABEL: test_extend32_vec4:
461; CHECK-I686:       # %bb.0:
462; CHECK-I686-NEXT:    pushl %esi
463; CHECK-I686-NEXT:    subl $88, %esp
464; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
465; CHECK-I686-NEXT:    pinsrw $0, (%eax), %xmm0
466; CHECK-I686-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
467; CHECK-I686-NEXT:    pinsrw $0, 6(%eax), %xmm0
468; CHECK-I686-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
469; CHECK-I686-NEXT:    pinsrw $0, 4(%eax), %xmm0
470; CHECK-I686-NEXT:    pinsrw $0, 2(%eax), %xmm1
471; CHECK-I686-NEXT:    pextrw $0, %xmm1, %eax
472; CHECK-I686-NEXT:    movw %ax, (%esp)
473; CHECK-I686-NEXT:    pextrw $0, %xmm0, %esi
474; CHECK-I686-NEXT:    calll __extendhfsf2
475; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
476; CHECK-I686-NEXT:    movw %si, (%esp)
477; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
478; CHECK-I686-NEXT:    pextrw $0, %xmm0, %esi
479; CHECK-I686-NEXT:    calll __extendhfsf2
480; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
481; CHECK-I686-NEXT:    movw %si, (%esp)
482; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
483; CHECK-I686-NEXT:    pextrw $0, %xmm0, %esi
484; CHECK-I686-NEXT:    calll __extendhfsf2
485; CHECK-I686-NEXT:    movw %si, (%esp)
486; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
487; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
488; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
489; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
490; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
491; CHECK-I686-NEXT:    calll __extendhfsf2
492; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
493; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
494; CHECK-I686-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
495; CHECK-I686-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
496; CHECK-I686-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
497; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
498; CHECK-I686-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
499; CHECK-I686-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
500; CHECK-I686-NEXT:    addl $88, %esp
501; CHECK-I686-NEXT:    popl %esi
502; CHECK-I686-NEXT:    retl
503  %a = load <4 x half>, ptr %p, align 8
504  %b = fpext <4 x half> %a to <4 x float>
505  ret <4 x float> %b
506}
507
508define <4 x double> @test_extend64_vec4(ptr %p) #0 {
509; CHECK-LIBCALL-LABEL: test_extend64_vec4:
510; CHECK-LIBCALL:       # %bb.0:
511; CHECK-LIBCALL-NEXT:    subq $72, %rsp
512; CHECK-LIBCALL-NEXT:    pinsrw $0, 4(%rdi), %xmm0
513; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
514; CHECK-LIBCALL-NEXT:    pinsrw $0, 6(%rdi), %xmm0
515; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
516; CHECK-LIBCALL-NEXT:    pinsrw $0, (%rdi), %xmm0
517; CHECK-LIBCALL-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
518; CHECK-LIBCALL-NEXT:    pinsrw $0, 2(%rdi), %xmm0
519; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
520; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm0, %xmm0
521; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
522; CHECK-LIBCALL-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
523; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
524; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm0, %xmm0
525; CHECK-LIBCALL-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
526; CHECK-LIBCALL-NEXT:    # xmm0 = xmm0[0],mem[0]
527; CHECK-LIBCALL-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
528; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
529; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
530; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm0, %xmm0
531; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
532; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
533; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
534; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm0, %xmm1
535; CHECK-LIBCALL-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
536; CHECK-LIBCALL-NEXT:    # xmm1 = xmm1[0],mem[0]
537; CHECK-LIBCALL-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
538; CHECK-LIBCALL-NEXT:    addq $72, %rsp
539; CHECK-LIBCALL-NEXT:    retq
540;
541; BWON-F16C-LABEL: test_extend64_vec4:
542; BWON-F16C:       # %bb.0:
543; BWON-F16C-NEXT:    vcvtph2ps (%rdi), %xmm0
544; BWON-F16C-NEXT:    vcvtps2pd %xmm0, %ymm0
545; BWON-F16C-NEXT:    retq
546;
547; CHECK-I686-LABEL: test_extend64_vec4:
548; CHECK-I686:       # %bb.0:
549; CHECK-I686-NEXT:    pushl %esi
550; CHECK-I686-NEXT:    subl $104, %esp
551; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
552; CHECK-I686-NEXT:    pinsrw $0, 6(%eax), %xmm0
553; CHECK-I686-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
554; CHECK-I686-NEXT:    pinsrw $0, (%eax), %xmm0
555; CHECK-I686-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
556; CHECK-I686-NEXT:    pinsrw $0, 2(%eax), %xmm0
557; CHECK-I686-NEXT:    pinsrw $0, 4(%eax), %xmm1
558; CHECK-I686-NEXT:    pextrw $0, %xmm1, %eax
559; CHECK-I686-NEXT:    movw %ax, (%esp)
560; CHECK-I686-NEXT:    pextrw $0, %xmm0, %esi
561; CHECK-I686-NEXT:    calll __extendhfsf2
562; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
563; CHECK-I686-NEXT:    movw %si, (%esp)
564; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
565; CHECK-I686-NEXT:    pextrw $0, %xmm0, %esi
566; CHECK-I686-NEXT:    calll __extendhfsf2
567; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
568; CHECK-I686-NEXT:    movw %si, (%esp)
569; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
570; CHECK-I686-NEXT:    pextrw $0, %xmm0, %esi
571; CHECK-I686-NEXT:    calll __extendhfsf2
572; CHECK-I686-NEXT:    movw %si, (%esp)
573; CHECK-I686-NEXT:    fstpl {{[0-9]+}}(%esp)
574; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
575; CHECK-I686-NEXT:    fstpl {{[0-9]+}}(%esp)
576; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
577; CHECK-I686-NEXT:    fstpl {{[0-9]+}}(%esp)
578; CHECK-I686-NEXT:    calll __extendhfsf2
579; CHECK-I686-NEXT:    fstpl {{[0-9]+}}(%esp)
580; CHECK-I686-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
581; CHECK-I686-NEXT:    movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
582; CHECK-I686-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
583; CHECK-I686-NEXT:    movhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
584; CHECK-I686-NEXT:    addl $104, %esp
585; CHECK-I686-NEXT:    popl %esi
586; CHECK-I686-NEXT:    retl
587  %a = load <4 x half>, ptr %p, align 8
588  %b = fpext <4 x half> %a to <4 x double>
589  ret <4 x double> %b
590}
591
592define void @test_trunc32_vec4(<4 x float> %a, ptr %p) #0 {
593; CHECK-LIBCALL-LABEL: test_trunc32_vec4:
594; CHECK-LIBCALL:       # %bb.0:
595; CHECK-LIBCALL-NEXT:    pushq %rbx
596; CHECK-LIBCALL-NEXT:    subq $64, %rsp
597; CHECK-LIBCALL-NEXT:    movq %rdi, %rbx
598; CHECK-LIBCALL-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
599; CHECK-LIBCALL-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
600; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
601; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
602; CHECK-LIBCALL-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
603; CHECK-LIBCALL-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
604; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
605; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
606; CHECK-LIBCALL-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
607; CHECK-LIBCALL-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
608; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
609; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
610; CHECK-LIBCALL-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
611; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
612; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
613; CHECK-LIBCALL-NEXT:    movw %ax, (%rbx)
614; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
615; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
616; CHECK-LIBCALL-NEXT:    movw %ax, 6(%rbx)
617; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
618; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
619; CHECK-LIBCALL-NEXT:    movw %ax, 4(%rbx)
620; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
621; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
622; CHECK-LIBCALL-NEXT:    movw %ax, 2(%rbx)
623; CHECK-LIBCALL-NEXT:    addq $64, %rsp
624; CHECK-LIBCALL-NEXT:    popq %rbx
625; CHECK-LIBCALL-NEXT:    retq
626;
627; BWON-F16C-LABEL: test_trunc32_vec4:
628; BWON-F16C:       # %bb.0:
629; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, (%rdi)
630; BWON-F16C-NEXT:    retq
631;
632; CHECK-I686-LABEL: test_trunc32_vec4:
633; CHECK-I686:       # %bb.0:
634; CHECK-I686-NEXT:    pushl %esi
635; CHECK-I686-NEXT:    subl $88, %esp
636; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
637; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
638; CHECK-I686-NEXT:    movaps %xmm0, %xmm1
639; CHECK-I686-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
640; CHECK-I686-NEXT:    movss %xmm1, (%esp)
641; CHECK-I686-NEXT:    calll __truncsfhf2
642; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
643; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
644; CHECK-I686-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
645; CHECK-I686-NEXT:    movss %xmm0, (%esp)
646; CHECK-I686-NEXT:    calll __truncsfhf2
647; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
648; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
649; CHECK-I686-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
650; CHECK-I686-NEXT:    movss %xmm0, (%esp)
651; CHECK-I686-NEXT:    calll __truncsfhf2
652; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
653; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
654; CHECK-I686-NEXT:    movd %xmm0, (%esp)
655; CHECK-I686-NEXT:    calll __truncsfhf2
656; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
657; CHECK-I686-NEXT:    movw %ax, (%esi)
658; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
659; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
660; CHECK-I686-NEXT:    movw %ax, 6(%esi)
661; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
662; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
663; CHECK-I686-NEXT:    movw %ax, 4(%esi)
664; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
665; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
666; CHECK-I686-NEXT:    movw %ax, 2(%esi)
667; CHECK-I686-NEXT:    addl $88, %esp
668; CHECK-I686-NEXT:    popl %esi
669; CHECK-I686-NEXT:    retl
670  %v = fptrunc <4 x float> %a to <4 x half>
671  store <4 x half> %v, ptr %p
672  ret void
673}
674
675define void @test_trunc64_vec4(<4 x double> %a, ptr %p) #0 {
676; CHECK-LIBCALL-LABEL: test_trunc64_vec4:
677; CHECK-LIBCALL:       # %bb.0:
678; CHECK-LIBCALL-NEXT:    pushq %rbx
679; CHECK-LIBCALL-NEXT:    subq $64, %rsp
680; CHECK-LIBCALL-NEXT:    movq %rdi, %rbx
681; CHECK-LIBCALL-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
682; CHECK-LIBCALL-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
683; CHECK-LIBCALL-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
684; CHECK-LIBCALL-NEXT:    callq __truncdfhf2@PLT
685; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
686; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
687; CHECK-LIBCALL-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
688; CHECK-LIBCALL-NEXT:    callq __truncdfhf2@PLT
689; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
690; CHECK-LIBCALL-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
691; CHECK-LIBCALL-NEXT:    callq __truncdfhf2@PLT
692; CHECK-LIBCALL-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
693; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
694; CHECK-LIBCALL-NEXT:    callq __truncdfhf2@PLT
695; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
696; CHECK-LIBCALL-NEXT:    movw %ax, 4(%rbx)
697; CHECK-LIBCALL-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
698; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
699; CHECK-LIBCALL-NEXT:    movw %ax, (%rbx)
700; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
701; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
702; CHECK-LIBCALL-NEXT:    movw %ax, 6(%rbx)
703; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
704; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
705; CHECK-LIBCALL-NEXT:    movw %ax, 2(%rbx)
706; CHECK-LIBCALL-NEXT:    addq $64, %rsp
707; CHECK-LIBCALL-NEXT:    popq %rbx
708; CHECK-LIBCALL-NEXT:    retq
709;
710; BWON-F16C-LABEL: test_trunc64_vec4:
711; BWON-F16C:       # %bb.0:
712; BWON-F16C-NEXT:    pushq %rbx
713; BWON-F16C-NEXT:    subq $64, %rsp
714; BWON-F16C-NEXT:    movq %rdi, %rbx
715; BWON-F16C-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
716; BWON-F16C-NEXT:    vextractf128 $1, %ymm0, %xmm0
717; BWON-F16C-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
718; BWON-F16C-NEXT:    vzeroupper
719; BWON-F16C-NEXT:    callq __truncdfhf2@PLT
720; BWON-F16C-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
721; BWON-F16C-NEXT:    vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload
722; BWON-F16C-NEXT:    # xmm0 = mem[1,0]
723; BWON-F16C-NEXT:    callq __truncdfhf2@PLT
724; BWON-F16C-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
725; BWON-F16C-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
726; BWON-F16C-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
727; BWON-F16C-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
728; BWON-F16C-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
729; BWON-F16C-NEXT:    vzeroupper
730; BWON-F16C-NEXT:    callq __truncdfhf2@PLT
731; BWON-F16C-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
732; BWON-F16C-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
733; BWON-F16C-NEXT:    # xmm0 = mem[1,0]
734; BWON-F16C-NEXT:    callq __truncdfhf2@PLT
735; BWON-F16C-NEXT:    vmovdqa (%rsp), %xmm1 # 16-byte Reload
736; BWON-F16C-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
737; BWON-F16C-NEXT:    vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
738; BWON-F16C-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
739; BWON-F16C-NEXT:    vmovq %xmm0, (%rbx)
740; BWON-F16C-NEXT:    addq $64, %rsp
741; BWON-F16C-NEXT:    popq %rbx
742; BWON-F16C-NEXT:    retq
743;
744; CHECK-I686-LABEL: test_trunc64_vec4:
745; CHECK-I686:       # %bb.0:
746; CHECK-I686-NEXT:    pushl %esi
747; CHECK-I686-NEXT:    subl $88, %esp
748; CHECK-I686-NEXT:    movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
749; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
750; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
751; CHECK-I686-NEXT:    movlps %xmm0, (%esp)
752; CHECK-I686-NEXT:    calll __truncdfhf2
753; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
754; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
755; CHECK-I686-NEXT:    movhps %xmm0, (%esp)
756; CHECK-I686-NEXT:    calll __truncdfhf2
757; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
758; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
759; CHECK-I686-NEXT:    movlps %xmm0, (%esp)
760; CHECK-I686-NEXT:    calll __truncdfhf2
761; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
762; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
763; CHECK-I686-NEXT:    movhps %xmm0, (%esp)
764; CHECK-I686-NEXT:    calll __truncdfhf2
765; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
766; CHECK-I686-NEXT:    movw %ax, 6(%esi)
767; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
768; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
769; CHECK-I686-NEXT:    movw %ax, 4(%esi)
770; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
771; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
772; CHECK-I686-NEXT:    movw %ax, 2(%esi)
773; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
774; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
775; CHECK-I686-NEXT:    movw %ax, (%esi)
776; CHECK-I686-NEXT:    addl $88, %esp
777; CHECK-I686-NEXT:    popl %esi
778; CHECK-I686-NEXT:    retl
779  %v = fptrunc <4 x double> %a to <4 x half>
780  store <4 x half> %v, ptr %p
781  ret void
782}
783
784declare float @test_floatret();
785
786; On i686, if SSE2 is available, the return value from test_floatret is loaded
787; to f80 and then rounded to f32.  The DAG combiner should not combine this
788; fp_round and the subsequent fptrunc from float to half.
789define half @test_f80trunc_nodagcombine() #0 {
790; CHECK-LIBCALL-LABEL: test_f80trunc_nodagcombine:
791; CHECK-LIBCALL:       # %bb.0:
792; CHECK-LIBCALL-NEXT:    pushq %rax
793; CHECK-LIBCALL-NEXT:    callq test_floatret@PLT
794; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
795; CHECK-LIBCALL-NEXT:    popq %rax
796; CHECK-LIBCALL-NEXT:    retq
797;
798; BWON-F16C-LABEL: test_f80trunc_nodagcombine:
799; BWON-F16C:       # %bb.0:
800; BWON-F16C-NEXT:    pushq %rax
801; BWON-F16C-NEXT:    callq test_floatret@PLT
802; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
803; BWON-F16C-NEXT:    vmovd %xmm0, %eax
804; BWON-F16C-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
805; BWON-F16C-NEXT:    popq %rax
806; BWON-F16C-NEXT:    retq
807;
808; CHECK-I686-LABEL: test_f80trunc_nodagcombine:
809; CHECK-I686:       # %bb.0:
810; CHECK-I686-NEXT:    subl $12, %esp
811; CHECK-I686-NEXT:    calll test_floatret@PLT
812; CHECK-I686-NEXT:    fstps (%esp)
813; CHECK-I686-NEXT:    calll __truncsfhf2
814; CHECK-I686-NEXT:    addl $12, %esp
815; CHECK-I686-NEXT:    retl
816  %1 = call float @test_floatret()
817  %2 = fptrunc float %1 to half
818  ret half %2
819}
820
821
822
823
824define float @test_sitofp_fadd_i32(i32 %a, ptr %b) #0 {
825; CHECK-LIBCALL-LABEL: test_sitofp_fadd_i32:
826; CHECK-LIBCALL:       # %bb.0:
827; CHECK-LIBCALL-NEXT:    subq $40, %rsp
828; CHECK-LIBCALL-NEXT:    pinsrw $0, (%rsi), %xmm0
829; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
830; CHECK-LIBCALL-NEXT:    xorps %xmm0, %xmm0
831; CHECK-LIBCALL-NEXT:    cvtsi2ss %edi, %xmm0
832; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
833; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
834; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
835; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
836; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
837; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
838; CHECK-LIBCALL-NEXT:    # xmm0 = mem[0],zero,zero,zero
839; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
840; CHECK-LIBCALL-NEXT:    addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
841; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
842; CHECK-LIBCALL-NEXT:    addq $40, %rsp
843; CHECK-LIBCALL-NEXT:    jmp __extendhfsf2@PLT # TAILCALL
844;
845; BWON-F16C-LABEL: test_sitofp_fadd_i32:
846; BWON-F16C:       # %bb.0:
847; BWON-F16C-NEXT:    vpinsrw $0, (%rsi), %xmm0, %xmm0
848; BWON-F16C-NEXT:    vcvtsi2ss %edi, %xmm1, %xmm1
849; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
850; BWON-F16C-NEXT:    vcvtph2ps %xmm1, %xmm1
851; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
852; BWON-F16C-NEXT:    vaddss %xmm1, %xmm0, %xmm0
853; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
854; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
855; BWON-F16C-NEXT:    retq
856;
857; CHECK-I686-LABEL: test_sitofp_fadd_i32:
858; CHECK-I686:       # %bb.0:
859; CHECK-I686-NEXT:    subl $60, %esp
860; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
861; CHECK-I686-NEXT:    pinsrw $0, (%eax), %xmm0
862; CHECK-I686-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
863; CHECK-I686-NEXT:    xorps %xmm0, %xmm0
864; CHECK-I686-NEXT:    cvtsi2ssl {{[0-9]+}}(%esp), %xmm0
865; CHECK-I686-NEXT:    movss %xmm0, (%esp)
866; CHECK-I686-NEXT:    calll __truncsfhf2
867; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
868; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
869; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
870; CHECK-I686-NEXT:    movw %ax, (%esp)
871; CHECK-I686-NEXT:    calll __extendhfsf2
872; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
873; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
874; CHECK-I686-NEXT:    movw %ax, (%esp)
875; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
876; CHECK-I686-NEXT:    calll __extendhfsf2
877; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
878; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
879; CHECK-I686-NEXT:    addss {{[0-9]+}}(%esp), %xmm0
880; CHECK-I686-NEXT:    movss %xmm0, (%esp)
881; CHECK-I686-NEXT:    calll __truncsfhf2
882; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
883; CHECK-I686-NEXT:    movw %ax, (%esp)
884; CHECK-I686-NEXT:    calll __extendhfsf2
885; CHECK-I686-NEXT:    addl $60, %esp
886; CHECK-I686-NEXT:    retl
887  %tmp0 = load half, ptr %b
888  %tmp1 = sitofp i32 %a to half
889  %tmp2 = fadd half %tmp0, %tmp1
890  %tmp3 = fpext half %tmp2 to float
891  ret float %tmp3
892}
893
894define half @PR40273(half) #0 {
895; CHECK-LIBCALL-LABEL: PR40273:
896; CHECK-LIBCALL:       # %bb.0:
897; CHECK-LIBCALL-NEXT:    pushq %rax
898; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
899; CHECK-LIBCALL-NEXT:    xorl %eax, %eax
900; CHECK-LIBCALL-NEXT:    xorps %xmm1, %xmm1
901; CHECK-LIBCALL-NEXT:    ucomiss %xmm1, %xmm0
902; CHECK-LIBCALL-NEXT:    movl $15360, %ecx # imm = 0x3C00
903; CHECK-LIBCALL-NEXT:    cmovnel %ecx, %eax
904; CHECK-LIBCALL-NEXT:    cmovpl %ecx, %eax
905; CHECK-LIBCALL-NEXT:    pinsrw $0, %eax, %xmm0
906; CHECK-LIBCALL-NEXT:    popq %rax
907; CHECK-LIBCALL-NEXT:    retq
908;
909; BWON-F16C-LABEL: PR40273:
910; BWON-F16C:       # %bb.0:
911; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
912; BWON-F16C-NEXT:    xorl %eax, %eax
913; BWON-F16C-NEXT:    vxorps %xmm1, %xmm1, %xmm1
914; BWON-F16C-NEXT:    vucomiss %xmm1, %xmm0
915; BWON-F16C-NEXT:    movl $15360, %ecx # imm = 0x3C00
916; BWON-F16C-NEXT:    cmovnel %ecx, %eax
917; BWON-F16C-NEXT:    cmovpl %ecx, %eax
918; BWON-F16C-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
919; BWON-F16C-NEXT:    retq
920;
921; CHECK-I686-LABEL: PR40273:
922; CHECK-I686:       # %bb.0:
923; CHECK-I686-NEXT:    subl $12, %esp
924; CHECK-I686-NEXT:    pinsrw $0, {{[0-9]+}}(%esp), %xmm0
925; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
926; CHECK-I686-NEXT:    movw %ax, (%esp)
927; CHECK-I686-NEXT:    calll __extendhfsf2
928; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
929; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
930; CHECK-I686-NEXT:    xorl %eax, %eax
931; CHECK-I686-NEXT:    xorps %xmm1, %xmm1
932; CHECK-I686-NEXT:    ucomiss %xmm1, %xmm0
933; CHECK-I686-NEXT:    movl $15360, %ecx # imm = 0x3C00
934; CHECK-I686-NEXT:    cmovnel %ecx, %eax
935; CHECK-I686-NEXT:    cmovpl %ecx, %eax
936; CHECK-I686-NEXT:    pinsrw $0, %eax, %xmm0
937; CHECK-I686-NEXT:    addl $12, %esp
938; CHECK-I686-NEXT:    retl
939  %2 = fcmp une half %0, 0xH0000
940  %3 = uitofp i1 %2 to half
941  ret half %3
942}
943
944define void @brcond(half %0) #0 {
945; CHECK-LIBCALL-LABEL: brcond:
946; CHECK-LIBCALL:       # %bb.0: # %entry
947; CHECK-LIBCALL-NEXT:    pushq %rax
948; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
949; CHECK-LIBCALL-NEXT:    xorps %xmm1, %xmm1
950; CHECK-LIBCALL-NEXT:    ucomiss %xmm1, %xmm0
951; CHECK-LIBCALL-NEXT:    setp %al
952; CHECK-LIBCALL-NEXT:    setne %cl
953; CHECK-LIBCALL-NEXT:    orb %al, %cl
954; CHECK-LIBCALL-NEXT:    jne .LBB18_2
955; CHECK-LIBCALL-NEXT:  # %bb.1: # %if.then
956; CHECK-LIBCALL-NEXT:    popq %rax
957; CHECK-LIBCALL-NEXT:    retq
958; CHECK-LIBCALL-NEXT:  .LBB18_2: # %if.end
959;
960; BWON-F16C-LABEL: brcond:
961; BWON-F16C:       # %bb.0: # %entry
962; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
963; BWON-F16C-NEXT:    vxorps %xmm1, %xmm1, %xmm1
964; BWON-F16C-NEXT:    vucomiss %xmm1, %xmm0
965; BWON-F16C-NEXT:    setp %al
966; BWON-F16C-NEXT:    setne %cl
967; BWON-F16C-NEXT:    orb %al, %cl
968; BWON-F16C-NEXT:    jne .LBB18_2
969; BWON-F16C-NEXT:  # %bb.1: # %if.then
970; BWON-F16C-NEXT:    retq
971; BWON-F16C-NEXT:  .LBB18_2: # %if.end
972;
973; CHECK-I686-LABEL: brcond:
974; CHECK-I686:       # %bb.0: # %entry
975; CHECK-I686-NEXT:    subl $12, %esp
976; CHECK-I686-NEXT:    pinsrw $0, {{[0-9]+}}(%esp), %xmm0
977; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
978; CHECK-I686-NEXT:    movw %ax, (%esp)
979; CHECK-I686-NEXT:    calll __extendhfsf2
980; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
981; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
982; CHECK-I686-NEXT:    xorps %xmm1, %xmm1
983; CHECK-I686-NEXT:    ucomiss %xmm1, %xmm0
984; CHECK-I686-NEXT:    setp %al
985; CHECK-I686-NEXT:    setne %cl
986; CHECK-I686-NEXT:    orb %al, %cl
987; CHECK-I686-NEXT:    jne .LBB18_2
988; CHECK-I686-NEXT:  # %bb.1: # %if.then
989; CHECK-I686-NEXT:    addl $12, %esp
990; CHECK-I686-NEXT:    retl
991; CHECK-I686-NEXT:  .LBB18_2: # %if.end
992entry:
993  %cmp = fcmp oeq half 0xH0000, %0
994  br i1 %cmp, label %if.then, label %if.end
995
996if.then:                                          ; preds = %entry
997  ret void
998
999if.end:                                           ; preds = %entry
1000  unreachable
1001}
1002
1003define half @test_sqrt(half %0) #0 {
1004; CHECK-LIBCALL-LABEL: test_sqrt:
1005; CHECK-LIBCALL:       # %bb.0: # %entry
1006; CHECK-LIBCALL-NEXT:    pushq %rax
1007; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1008; CHECK-LIBCALL-NEXT:    sqrtss %xmm0, %xmm0
1009; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
1010; CHECK-LIBCALL-NEXT:    popq %rax
1011; CHECK-LIBCALL-NEXT:    retq
1012;
1013; BWON-F16C-LABEL: test_sqrt:
1014; BWON-F16C:       # %bb.0: # %entry
1015; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
1016; BWON-F16C-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
1017; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
1018; BWON-F16C-NEXT:    vmovd %xmm0, %eax
1019; BWON-F16C-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
1020; BWON-F16C-NEXT:    retq
1021;
1022; CHECK-I686-LABEL: test_sqrt:
1023; CHECK-I686:       # %bb.0: # %entry
1024; CHECK-I686-NEXT:    subl $12, %esp
1025; CHECK-I686-NEXT:    pinsrw $0, {{[0-9]+}}(%esp), %xmm0
1026; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
1027; CHECK-I686-NEXT:    movw %ax, (%esp)
1028; CHECK-I686-NEXT:    calll __extendhfsf2
1029; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1030; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1031; CHECK-I686-NEXT:    sqrtss %xmm0, %xmm0
1032; CHECK-I686-NEXT:    movss %xmm0, (%esp)
1033; CHECK-I686-NEXT:    calll __truncsfhf2
1034; CHECK-I686-NEXT:    addl $12, %esp
1035; CHECK-I686-NEXT:    retl
1036entry:
1037  %1 = call half @llvm.sqrt.f16(half %0)
1038  ret half %1
1039}
1040
1041declare half @llvm.sqrt.f16(half)
1042
1043define void @main.158() #0 {
1044; CHECK-LIBCALL-LABEL: main.158:
1045; CHECK-LIBCALL:       # %bb.0: # %entry
1046; CHECK-LIBCALL-NEXT:    pushq %rax
1047; CHECK-LIBCALL-NEXT:    xorps %xmm0, %xmm0
1048; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
1049; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1050; CHECK-LIBCALL-NEXT:    movss {{.*#+}} xmm1 = [8.0E+0,0.0E+0,0.0E+0,0.0E+0]
1051; CHECK-LIBCALL-NEXT:    ucomiss %xmm0, %xmm1
1052; CHECK-LIBCALL-NEXT:    xorps %xmm0, %xmm0
1053; CHECK-LIBCALL-NEXT:    jae .LBB20_2
1054; CHECK-LIBCALL-NEXT:  # %bb.1: # %entry
1055; CHECK-LIBCALL-NEXT:    movss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
1056; CHECK-LIBCALL-NEXT:  .LBB20_2: # %entry
1057; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
1058; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
1059; CHECK-LIBCALL-NEXT:    movw %ax, (%rax)
1060; CHECK-LIBCALL-NEXT:    popq %rax
1061; CHECK-LIBCALL-NEXT:    retq
1062;
1063; BWON-F16C-LABEL: main.158:
1064; BWON-F16C:       # %bb.0: # %entry
1065; BWON-F16C-NEXT:    vxorps %xmm0, %xmm0, %xmm0
1066; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm1
1067; BWON-F16C-NEXT:    vcvtph2ps %xmm1, %xmm1
1068; BWON-F16C-NEXT:    vmovss {{.*#+}} xmm2 = [8.0E+0,0.0E+0,0.0E+0,0.0E+0]
1069; BWON-F16C-NEXT:    vucomiss %xmm1, %xmm2
1070; BWON-F16C-NEXT:    jae .LBB20_2
1071; BWON-F16C-NEXT:  # %bb.1: # %entry
1072; BWON-F16C-NEXT:    vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
1073; BWON-F16C-NEXT:  .LBB20_2: # %entry
1074; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
1075; BWON-F16C-NEXT:    vpextrw $0, %xmm0, (%rax)
1076; BWON-F16C-NEXT:    retq
1077;
1078; CHECK-I686-LABEL: main.158:
1079; CHECK-I686:       # %bb.0: # %entry
1080; CHECK-I686-NEXT:    subl $12, %esp
1081; CHECK-I686-NEXT:    movl $0, (%esp)
1082; CHECK-I686-NEXT:    calll __truncsfhf2
1083; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
1084; CHECK-I686-NEXT:    movw %ax, (%esp)
1085; CHECK-I686-NEXT:    calll __extendhfsf2
1086; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1087; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = [8.0E+0,0.0E+0,0.0E+0,0.0E+0]
1088; CHECK-I686-NEXT:    ucomiss {{[0-9]+}}(%esp), %xmm0
1089; CHECK-I686-NEXT:    xorps %xmm0, %xmm0
1090; CHECK-I686-NEXT:    jae .LBB20_2
1091; CHECK-I686-NEXT:  # %bb.1: # %entry
1092; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
1093; CHECK-I686-NEXT:  .LBB20_2: # %entry
1094; CHECK-I686-NEXT:    movss %xmm0, (%esp)
1095; CHECK-I686-NEXT:    calll __truncsfhf2
1096; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
1097; CHECK-I686-NEXT:    movw %ax, (%eax)
1098; CHECK-I686-NEXT:    addl $12, %esp
1099; CHECK-I686-NEXT:    retl
1100entry:
1101  %0 = tail call half @llvm.fabs.f16(half undef)
1102  %1 = fpext half %0 to float
1103  %compare.2 = fcmp ole half %0, 0xH4800
1104  %multiply.95 = fmul float %1, 5.000000e-01
1105  %add.82 = fadd float %multiply.95, -2.000000e+00
1106  %multiply.68 = fmul float %add.82, 0.000000e+00
1107  %subtract.65 = fsub float %multiply.68, 0.000000e+00
1108  %multiply.57 = fmul float undef, 0.000000e+00
1109  %2 = select i1 %compare.2, float 0.000000e+00, float %multiply.57
1110  %3 = fptrunc float %2 to half
1111  store half %3, ptr undef, align 2
1112  ret void
1113}
1114
1115define void @main.45() #0 {
1116; CHECK-LIBCALL-LABEL: main.45:
1117; CHECK-LIBCALL:       # %bb.0: # %entry
1118; CHECK-LIBCALL-NEXT:    pushq %rbp
1119; CHECK-LIBCALL-NEXT:    pushq %r15
1120; CHECK-LIBCALL-NEXT:    pushq %r14
1121; CHECK-LIBCALL-NEXT:    pushq %rbx
1122; CHECK-LIBCALL-NEXT:    pushq %rax
1123; CHECK-LIBCALL-NEXT:    pinsrw $0, (%rax), %xmm0
1124; CHECK-LIBCALL-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
1125; CHECK-LIBCALL-NEXT:    movq %xmm1, %rbx
1126; CHECK-LIBCALL-NEXT:    movq %rbx, %r14
1127; CHECK-LIBCALL-NEXT:    shrq $48, %r14
1128; CHECK-LIBCALL-NEXT:    movq %rbx, %r15
1129; CHECK-LIBCALL-NEXT:    shrq $32, %r15
1130; CHECK-LIBCALL-NEXT:    movl %ebx, %ebp
1131; CHECK-LIBCALL-NEXT:    shrl $16, %ebp
1132; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1133; CHECK-LIBCALL-NEXT:    ucomiss %xmm0, %xmm0
1134; CHECK-LIBCALL-NEXT:    movl $32256, %eax # imm = 0x7E00
1135; CHECK-LIBCALL-NEXT:    cmovpl %eax, %ebp
1136; CHECK-LIBCALL-NEXT:    cmovpl %eax, %r15d
1137; CHECK-LIBCALL-NEXT:    cmovpl %eax, %r14d
1138; CHECK-LIBCALL-NEXT:    cmovpl %eax, %ebx
1139; CHECK-LIBCALL-NEXT:    movw %bx, (%rax)
1140; CHECK-LIBCALL-NEXT:    movw %r14w, (%rax)
1141; CHECK-LIBCALL-NEXT:    movw %r15w, (%rax)
1142; CHECK-LIBCALL-NEXT:    movw %bp, (%rax)
1143; CHECK-LIBCALL-NEXT:    addq $8, %rsp
1144; CHECK-LIBCALL-NEXT:    popq %rbx
1145; CHECK-LIBCALL-NEXT:    popq %r14
1146; CHECK-LIBCALL-NEXT:    popq %r15
1147; CHECK-LIBCALL-NEXT:    popq %rbp
1148; CHECK-LIBCALL-NEXT:    retq
1149;
1150; BWON-F16C-LABEL: main.45:
1151; BWON-F16C:       # %bb.0: # %entry
1152; BWON-F16C-NEXT:    vpinsrw $0, (%rax), %xmm0, %xmm0
1153; BWON-F16C-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1154; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm1
1155; BWON-F16C-NEXT:    vxorps %xmm2, %xmm2, %xmm2
1156; BWON-F16C-NEXT:    vcmpunordps %xmm2, %xmm1, %xmm1
1157; BWON-F16C-NEXT:    vpackssdw %xmm1, %xmm1, %xmm1
1158; BWON-F16C-NEXT:    vpblendvb %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1159; BWON-F16C-NEXT:    vmovq %xmm0, (%rax)
1160; BWON-F16C-NEXT:    retq
1161;
1162; CHECK-I686-LABEL: main.45:
1163; CHECK-I686:       # %bb.0: # %entry
1164; CHECK-I686-NEXT:    pushl %edi
1165; CHECK-I686-NEXT:    pushl %esi
1166; CHECK-I686-NEXT:    subl $20, %esp
1167; CHECK-I686-NEXT:    pinsrw $0, (%eax), %xmm0
1168; CHECK-I686-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
1169; CHECK-I686-NEXT:    movd %xmm1, %esi
1170; CHECK-I686-NEXT:    movl %esi, %edi
1171; CHECK-I686-NEXT:    shrl $16, %edi
1172; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
1173; CHECK-I686-NEXT:    movw %ax, (%esp)
1174; CHECK-I686-NEXT:    calll __extendhfsf2
1175; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1176; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1177; CHECK-I686-NEXT:    ucomiss %xmm0, %xmm0
1178; CHECK-I686-NEXT:    movl $32256, %eax # imm = 0x7E00
1179; CHECK-I686-NEXT:    cmovpl %eax, %esi
1180; CHECK-I686-NEXT:    cmovpl %eax, %edi
1181; CHECK-I686-NEXT:    movw %di, (%eax)
1182; CHECK-I686-NEXT:    movw %si, (%eax)
1183; CHECK-I686-NEXT:    addl $20, %esp
1184; CHECK-I686-NEXT:    popl %esi
1185; CHECK-I686-NEXT:    popl %edi
1186; CHECK-I686-NEXT:    retl
1187entry:
1188  %0 = load half, ptr undef, align 8
1189  %1 = bitcast half %0 to i16
1190  %broadcast.splatinsert = insertelement <4 x half> poison, half %0, i64 0
1191  %broadcast.splat = shufflevector <4 x half> %broadcast.splatinsert, <4 x half> poison, <4 x i32> zeroinitializer
1192  %broadcast.splatinsert13 = insertelement <4 x i16> poison, i16 %1, i64 0
1193  %broadcast.splat14 = shufflevector <4 x i16> %broadcast.splatinsert13, <4 x i16> poison, <4 x i32> zeroinitializer
1194  %2 = fcmp uno <4 x half> %broadcast.splat, zeroinitializer
1195  %3 = add <4 x i16> zeroinitializer, %broadcast.splat14
1196  %4 = select i1 undef, <4 x i16> undef, <4 x i16> %3
1197  %5 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> %4
1198  %6 = bitcast <4 x i16> %5 to <4 x half>
1199  %7 = select <4 x i1> %2, <4 x half> <half 0xH7E00, half 0xH7E00, half 0xH7E00, half 0xH7E00>, <4 x half> %6
1200  store <4 x half> %7, ptr undef, align 16
1201  ret void
1202}
1203
1204define half @fcopysign(half %x, half %y) {
1205; CHECK-LIBCALL-LABEL: fcopysign:
1206; CHECK-LIBCALL:       # %bb.0:
1207; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm1, %eax
1208; CHECK-LIBCALL-NEXT:    andl $-32768, %eax # imm = 0x8000
1209; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %ecx
1210; CHECK-LIBCALL-NEXT:    andl $32767, %ecx # imm = 0x7FFF
1211; CHECK-LIBCALL-NEXT:    orl %eax, %ecx
1212; CHECK-LIBCALL-NEXT:    pinsrw $0, %ecx, %xmm0
1213; CHECK-LIBCALL-NEXT:    retq
1214;
1215; BWON-F16C-LABEL: fcopysign:
1216; BWON-F16C:       # %bb.0:
1217; BWON-F16C-NEXT:    vpextrw $0, %xmm1, %eax
1218; BWON-F16C-NEXT:    andl $-32768, %eax # imm = 0x8000
1219; BWON-F16C-NEXT:    vpextrw $0, %xmm0, %ecx
1220; BWON-F16C-NEXT:    andl $32767, %ecx # imm = 0x7FFF
1221; BWON-F16C-NEXT:    orl %eax, %ecx
1222; BWON-F16C-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm0
1223; BWON-F16C-NEXT:    retq
1224;
1225; CHECK-I686-LABEL: fcopysign:
1226; CHECK-I686:       # %bb.0:
1227; CHECK-I686-NEXT:    movl $-32768, %eax # imm = 0x8000
1228; CHECK-I686-NEXT:    andl {{[0-9]+}}(%esp), %eax
1229; CHECK-I686-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
1230; CHECK-I686-NEXT:    andl $32767, %ecx # imm = 0x7FFF
1231; CHECK-I686-NEXT:    orl %eax, %ecx
1232; CHECK-I686-NEXT:    pinsrw $0, %ecx, %xmm0
1233; CHECK-I686-NEXT:    retl
1234  %a = call half @llvm.copysign.f16(half %x, half %y)
1235  ret half %a
1236}
1237
1238declare half @llvm.fabs.f16(half)
1239declare half @llvm.copysign.f16(half, half)
1240
1241define <8 x half> @select(i1 %c, <8 x half> %x, <8 x half> %y) {
1242; CHECK-LIBCALL-LABEL: select:
1243; CHECK-LIBCALL:       # %bb.0:
1244; CHECK-LIBCALL-NEXT:    testb $1, %dil
1245; CHECK-LIBCALL-NEXT:    jne .LBB23_2
1246; CHECK-LIBCALL-NEXT:  # %bb.1:
1247; CHECK-LIBCALL-NEXT:    movaps %xmm1, %xmm0
1248; CHECK-LIBCALL-NEXT:  .LBB23_2:
1249; CHECK-LIBCALL-NEXT:    retq
1250;
1251; BWON-F16C-LABEL: select:
1252; BWON-F16C:       # %bb.0:
1253; BWON-F16C-NEXT:    testb $1, %dil
1254; BWON-F16C-NEXT:    jne .LBB23_2
1255; BWON-F16C-NEXT:  # %bb.1:
1256; BWON-F16C-NEXT:    vmovaps %xmm1, %xmm0
1257; BWON-F16C-NEXT:  .LBB23_2:
1258; BWON-F16C-NEXT:    retq
1259;
1260; CHECK-I686-LABEL: select:
1261; CHECK-I686:       # %bb.0:
1262; CHECK-I686-NEXT:    testb $1, {{[0-9]+}}(%esp)
1263; CHECK-I686-NEXT:    jne .LBB23_2
1264; CHECK-I686-NEXT:  # %bb.1:
1265; CHECK-I686-NEXT:    movaps %xmm1, %xmm0
1266; CHECK-I686-NEXT:  .LBB23_2:
1267; CHECK-I686-NEXT:    retl
1268  %s = select i1 %c, <8 x half> %x, <8 x half> %y
1269  ret <8 x half> %s
1270}
1271
1272define <8 x half> @shuffle(ptr %p) {
1273; CHECK-LIBCALL-LABEL: shuffle:
1274; CHECK-LIBCALL:       # %bb.0:
1275; CHECK-LIBCALL-NEXT:    movdqu (%rdi), %xmm0
1276; CHECK-LIBCALL-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
1277; CHECK-LIBCALL-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
1278; CHECK-LIBCALL-NEXT:    retq
1279;
1280; BWON-F16C-LABEL: shuffle:
1281; BWON-F16C:       # %bb.0:
1282; BWON-F16C-NEXT:    vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,4,4,4,4]
1283; BWON-F16C-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
1284; BWON-F16C-NEXT:    retq
1285;
1286; CHECK-I686-LABEL: shuffle:
1287; CHECK-I686:       # %bb.0:
1288; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
1289; CHECK-I686-NEXT:    movdqu (%eax), %xmm0
1290; CHECK-I686-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
1291; CHECK-I686-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
1292; CHECK-I686-NEXT:    retl
1293  %1 = load <8 x half>, ptr %p, align 8
1294  %2 = shufflevector <8 x half> %1, <8 x half> poison, <8 x i32> <i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1295  ret <8 x half> %2
1296}
1297
1298declare half @llvm.minnum.f16(half, half)
1299
1300define half @pr61271(half %0, half %1) #0 {
1301; CHECK-LIBCALL-LABEL: pr61271:
1302; CHECK-LIBCALL:       # %bb.0:
1303; CHECK-LIBCALL-NEXT:    pushq %rax
1304; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1305; CHECK-LIBCALL-NEXT:    movaps %xmm1, %xmm0
1306; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1307; CHECK-LIBCALL-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
1308; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1309; CHECK-LIBCALL-NEXT:    # xmm0 = mem[0],zero,zero,zero
1310; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1311; CHECK-LIBCALL-NEXT:    minss (%rsp), %xmm0 # 4-byte Folded Reload
1312; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
1313; CHECK-LIBCALL-NEXT:    popq %rax
1314; CHECK-LIBCALL-NEXT:    retq
1315;
1316; BWON-F16C-LABEL: pr61271:
1317; BWON-F16C:       # %bb.0:
1318; BWON-F16C-NEXT:    vcvtph2ps %xmm1, %xmm1
1319; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
1320; BWON-F16C-NEXT:    vminss %xmm1, %xmm0, %xmm0
1321; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
1322; BWON-F16C-NEXT:    vmovd %xmm0, %eax
1323; BWON-F16C-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
1324; BWON-F16C-NEXT:    retq
1325;
1326; CHECK-I686-LABEL: pr61271:
1327; CHECK-I686:       # %bb.0:
1328; CHECK-I686-NEXT:    subl $44, %esp
1329; CHECK-I686-NEXT:    pinsrw $0, {{[0-9]+}}(%esp), %xmm0
1330; CHECK-I686-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
1331; CHECK-I686-NEXT:    pinsrw $0, {{[0-9]+}}(%esp), %xmm0
1332; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
1333; CHECK-I686-NEXT:    movw %ax, (%esp)
1334; CHECK-I686-NEXT:    calll __extendhfsf2
1335; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
1336; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
1337; CHECK-I686-NEXT:    movw %ax, (%esp)
1338; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1339; CHECK-I686-NEXT:    calll __extendhfsf2
1340; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1341; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1342; CHECK-I686-NEXT:    minss {{[0-9]+}}(%esp), %xmm0
1343; CHECK-I686-NEXT:    movss %xmm0, (%esp)
1344; CHECK-I686-NEXT:    calll __truncsfhf2
1345; CHECK-I686-NEXT:    addl $44, %esp
1346; CHECK-I686-NEXT:    retl
1347  %3 = call fast half @llvm.minnum.f16(half %0, half %1)
1348  ret half %3
1349}
1350
1351declare <8 x half> @llvm.maxnum.v8f16(<8 x half>, <8 x half>)
1352
1353define <8 x half> @maxnum_v8f16(<8 x half> %0, <8 x half> %1) #0 {
1354; CHECK-LIBCALL-LABEL: maxnum_v8f16:
1355; CHECK-LIBCALL:       # %bb.0:
1356; CHECK-LIBCALL-NEXT:    subq $184, %rsp
1357; CHECK-LIBCALL-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1358; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1359; CHECK-LIBCALL-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1360; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1361; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1362; CHECK-LIBCALL-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
1363; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1364; CHECK-LIBCALL-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1365; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1366; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1367; CHECK-LIBCALL-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
1368; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1369; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1370; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1371; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1372; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1373; CHECK-LIBCALL-NEXT:    ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
1374; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1375; CHECK-LIBCALL-NEXT:    # xmm0 = mem[0],zero,zero,zero
1376; CHECK-LIBCALL-NEXT:    ja .LBB26_2
1377; CHECK-LIBCALL-NEXT:  # %bb.1:
1378; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1379; CHECK-LIBCALL-NEXT:    # xmm0 = mem[0],zero,zero,zero
1380; CHECK-LIBCALL-NEXT:  .LBB26_2:
1381; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
1382; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1383; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1384; CHECK-LIBCALL-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1385; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1386; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1387; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1388; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1389; CHECK-LIBCALL-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1390; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1391; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1392; CHECK-LIBCALL-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
1393; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1394; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1395; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1396; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1397; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1398; CHECK-LIBCALL-NEXT:    ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
1399; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1400; CHECK-LIBCALL-NEXT:    # xmm0 = mem[0],zero,zero,zero
1401; CHECK-LIBCALL-NEXT:    ja .LBB26_4
1402; CHECK-LIBCALL-NEXT:  # %bb.3:
1403; CHECK-LIBCALL-NEXT:    movss (%rsp), %xmm0 # 4-byte Reload
1404; CHECK-LIBCALL-NEXT:    # xmm0 = mem[0],zero,zero,zero
1405; CHECK-LIBCALL-NEXT:  .LBB26_4:
1406; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
1407; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1408; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1409; CHECK-LIBCALL-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1410; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1411; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1412; CHECK-LIBCALL-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
1413; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1414; CHECK-LIBCALL-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1415; CHECK-LIBCALL-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
1416; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1417; CHECK-LIBCALL-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
1418; CHECK-LIBCALL-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
1419; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1420; CHECK-LIBCALL-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
1421; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1422; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1423; CHECK-LIBCALL-NEXT:    ucomiss (%rsp), %xmm0 # 4-byte Folded Reload
1424; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1425; CHECK-LIBCALL-NEXT:    # xmm0 = mem[0],zero,zero,zero
1426; CHECK-LIBCALL-NEXT:    ja .LBB26_6
1427; CHECK-LIBCALL-NEXT:  # %bb.5:
1428; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1429; CHECK-LIBCALL-NEXT:    # xmm0 = mem[0],zero,zero,zero
1430; CHECK-LIBCALL-NEXT:  .LBB26_6:
1431; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
1432; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1433; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1434; CHECK-LIBCALL-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
1435; CHECK-LIBCALL-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1436; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1437; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1438; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1439; CHECK-LIBCALL-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
1440; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1441; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1442; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1443; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1444; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1445; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1446; CHECK-LIBCALL-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
1447; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1448; CHECK-LIBCALL-NEXT:    ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
1449; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1450; CHECK-LIBCALL-NEXT:    # xmm0 = mem[0],zero,zero,zero
1451; CHECK-LIBCALL-NEXT:    ja .LBB26_8
1452; CHECK-LIBCALL-NEXT:  # %bb.7:
1453; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1454; CHECK-LIBCALL-NEXT:    # xmm0 = mem[0],zero,zero,zero
1455; CHECK-LIBCALL-NEXT:  .LBB26_8:
1456; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
1457; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1458; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1459; CHECK-LIBCALL-NEXT:    psrlq $48, %xmm0
1460; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1461; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1462; CHECK-LIBCALL-NEXT:    movd %xmm0, (%rsp) # 4-byte Folded Spill
1463; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1464; CHECK-LIBCALL-NEXT:    psrlq $48, %xmm0
1465; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1466; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1467; CHECK-LIBCALL-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
1468; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1469; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1470; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1471; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1472; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1473; CHECK-LIBCALL-NEXT:    ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
1474; CHECK-LIBCALL-NEXT:    movss (%rsp), %xmm0 # 4-byte Reload
1475; CHECK-LIBCALL-NEXT:    # xmm0 = mem[0],zero,zero,zero
1476; CHECK-LIBCALL-NEXT:    ja .LBB26_10
1477; CHECK-LIBCALL-NEXT:  # %bb.9:
1478; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1479; CHECK-LIBCALL-NEXT:    # xmm0 = mem[0],zero,zero,zero
1480; CHECK-LIBCALL-NEXT:  .LBB26_10:
1481; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
1482; CHECK-LIBCALL-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1483; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1484; CHECK-LIBCALL-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1485; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1486; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1487; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1488; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1489; CHECK-LIBCALL-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1490; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1491; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1492; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1493; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1494; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1495; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1496; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1497; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1498; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
1499; CHECK-LIBCALL-NEXT:    # xmm1 = mem[0],zero,zero,zero
1500; CHECK-LIBCALL-NEXT:    ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
1501; CHECK-LIBCALL-NEXT:    ja .LBB26_12
1502; CHECK-LIBCALL-NEXT:  # %bb.11:
1503; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
1504; CHECK-LIBCALL-NEXT:    # xmm1 = mem[0],zero,zero,zero
1505; CHECK-LIBCALL-NEXT:  .LBB26_12:
1506; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1507; CHECK-LIBCALL-NEXT:    punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1508; CHECK-LIBCALL-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
1509; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1510; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1511; CHECK-LIBCALL-NEXT:    punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1512; CHECK-LIBCALL-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
1513; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1514; CHECK-LIBCALL-NEXT:    movaps %xmm1, %xmm0
1515; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
1516; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1517; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1518; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1519; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1520; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1521; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1522; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1523; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1524; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1525; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1526; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1527; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1528; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
1529; CHECK-LIBCALL-NEXT:    # xmm1 = mem[0],zero,zero,zero
1530; CHECK-LIBCALL-NEXT:    ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
1531; CHECK-LIBCALL-NEXT:    ja .LBB26_14
1532; CHECK-LIBCALL-NEXT:  # %bb.13:
1533; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
1534; CHECK-LIBCALL-NEXT:    # xmm1 = mem[0],zero,zero,zero
1535; CHECK-LIBCALL-NEXT:  .LBB26_14:
1536; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1537; CHECK-LIBCALL-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1538; CHECK-LIBCALL-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
1539; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1540; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1541; CHECK-LIBCALL-NEXT:    punpcklwd (%rsp), %xmm0 # 16-byte Folded Reload
1542; CHECK-LIBCALL-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
1543; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1544; CHECK-LIBCALL-NEXT:    movaps %xmm1, %xmm0
1545; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
1546; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1547; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1548; CHECK-LIBCALL-NEXT:    psrld $16, %xmm0
1549; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1550; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1551; CHECK-LIBCALL-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
1552; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1553; CHECK-LIBCALL-NEXT:    psrld $16, %xmm0
1554; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1555; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1556; CHECK-LIBCALL-NEXT:    movd %xmm0, (%rsp) # 4-byte Folded Spill
1557; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1558; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1559; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1560; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1561; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1562; CHECK-LIBCALL-NEXT:    ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
1563; CHECK-LIBCALL-NEXT:    movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
1564; CHECK-LIBCALL-NEXT:    # xmm0 = mem[0],zero,zero,zero
1565; CHECK-LIBCALL-NEXT:    ja .LBB26_16
1566; CHECK-LIBCALL-NEXT:  # %bb.15:
1567; CHECK-LIBCALL-NEXT:    movd (%rsp), %xmm0 # 4-byte Folded Reload
1568; CHECK-LIBCALL-NEXT:    # xmm0 = mem[0],zero,zero,zero
1569; CHECK-LIBCALL-NEXT:  .LBB26_16:
1570; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
1571; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1572; CHECK-LIBCALL-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1573; CHECK-LIBCALL-NEXT:    punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1574; CHECK-LIBCALL-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
1575; CHECK-LIBCALL-NEXT:    punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1576; CHECK-LIBCALL-NEXT:    # xmm1 = xmm1[0],mem[0]
1577; CHECK-LIBCALL-NEXT:    movdqa %xmm1, %xmm0
1578; CHECK-LIBCALL-NEXT:    addq $184, %rsp
1579; CHECK-LIBCALL-NEXT:    retq
1580;
1581; BWON-F16C-LABEL: maxnum_v8f16:
1582; BWON-F16C:       # %bb.0:
1583; BWON-F16C-NEXT:    vpsrldq {{.*#+}} xmm2 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1584; BWON-F16C-NEXT:    vcvtph2ps %xmm2, %xmm2
1585; BWON-F16C-NEXT:    vpsrldq {{.*#+}} xmm3 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1586; BWON-F16C-NEXT:    vcvtph2ps %xmm3, %xmm3
1587; BWON-F16C-NEXT:    vucomiss %xmm2, %xmm3
1588; BWON-F16C-NEXT:    ja .LBB26_2
1589; BWON-F16C-NEXT:  # %bb.1:
1590; BWON-F16C-NEXT:    vmovaps %xmm2, %xmm3
1591; BWON-F16C-NEXT:  .LBB26_2:
1592; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm3, %xmm2
1593; BWON-F16C-NEXT:    vpshufd {{.*#+}} xmm3 = xmm1[3,3,3,3]
1594; BWON-F16C-NEXT:    vcvtph2ps %xmm3, %xmm3
1595; BWON-F16C-NEXT:    vpshufd {{.*#+}} xmm4 = xmm0[3,3,3,3]
1596; BWON-F16C-NEXT:    vcvtph2ps %xmm4, %xmm4
1597; BWON-F16C-NEXT:    vucomiss %xmm3, %xmm4
1598; BWON-F16C-NEXT:    ja .LBB26_4
1599; BWON-F16C-NEXT:  # %bb.3:
1600; BWON-F16C-NEXT:    vmovaps %xmm3, %xmm4
1601; BWON-F16C-NEXT:  .LBB26_4:
1602; BWON-F16C-NEXT:    vmovd %xmm2, %eax
1603; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm4, %xmm2
1604; BWON-F16C-NEXT:    vmovd %xmm2, %ecx
1605; BWON-F16C-NEXT:    vpsrldq {{.*#+}} xmm2 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1606; BWON-F16C-NEXT:    vcvtph2ps %xmm2, %xmm3
1607; BWON-F16C-NEXT:    vpsrldq {{.*#+}} xmm2 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1608; BWON-F16C-NEXT:    vcvtph2ps %xmm2, %xmm2
1609; BWON-F16C-NEXT:    vucomiss %xmm3, %xmm2
1610; BWON-F16C-NEXT:    ja .LBB26_6
1611; BWON-F16C-NEXT:  # %bb.5:
1612; BWON-F16C-NEXT:    vmovaps %xmm3, %xmm2
1613; BWON-F16C-NEXT:  .LBB26_6:
1614; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm2, %xmm2
1615; BWON-F16C-NEXT:    vmovd %xmm2, %edx
1616; BWON-F16C-NEXT:    vshufpd {{.*#+}} xmm2 = xmm1[1,0]
1617; BWON-F16C-NEXT:    vcvtph2ps %xmm2, %xmm3
1618; BWON-F16C-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1,0]
1619; BWON-F16C-NEXT:    vcvtph2ps %xmm2, %xmm2
1620; BWON-F16C-NEXT:    vucomiss %xmm3, %xmm2
1621; BWON-F16C-NEXT:    ja .LBB26_8
1622; BWON-F16C-NEXT:  # %bb.7:
1623; BWON-F16C-NEXT:    vmovaps %xmm3, %xmm2
1624; BWON-F16C-NEXT:  .LBB26_8:
1625; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm2, %xmm2
1626; BWON-F16C-NEXT:    vmovd %xmm2, %esi
1627; BWON-F16C-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm1[3,3,3,3,4,5,6,7]
1628; BWON-F16C-NEXT:    vcvtph2ps %xmm2, %xmm2
1629; BWON-F16C-NEXT:    vpshuflw {{.*#+}} xmm3 = xmm0[3,3,3,3,4,5,6,7]
1630; BWON-F16C-NEXT:    vcvtph2ps %xmm3, %xmm6
1631; BWON-F16C-NEXT:    vucomiss %xmm2, %xmm6
1632; BWON-F16C-NEXT:    ja .LBB26_10
1633; BWON-F16C-NEXT:  # %bb.9:
1634; BWON-F16C-NEXT:    vmovaps %xmm2, %xmm6
1635; BWON-F16C-NEXT:  .LBB26_10:
1636; BWON-F16C-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm2
1637; BWON-F16C-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm3
1638; BWON-F16C-NEXT:    vpinsrw $0, %edx, %xmm0, %xmm4
1639; BWON-F16C-NEXT:    vpinsrw $0, %esi, %xmm0, %xmm5
1640; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
1641; BWON-F16C-NEXT:    vmovd %xmm6, %eax
1642; BWON-F16C-NEXT:    vmovshdup {{.*#+}} xmm6 = xmm1[1,1,3,3]
1643; BWON-F16C-NEXT:    vcvtph2ps %xmm6, %xmm7
1644; BWON-F16C-NEXT:    vmovshdup {{.*#+}} xmm6 = xmm0[1,1,3,3]
1645; BWON-F16C-NEXT:    vcvtph2ps %xmm6, %xmm6
1646; BWON-F16C-NEXT:    vucomiss %xmm7, %xmm6
1647; BWON-F16C-NEXT:    ja .LBB26_12
1648; BWON-F16C-NEXT:  # %bb.11:
1649; BWON-F16C-NEXT:    vmovaps %xmm7, %xmm6
1650; BWON-F16C-NEXT:  .LBB26_12:
1651; BWON-F16C-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
1652; BWON-F16C-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
1653; BWON-F16C-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
1654; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm6, %xmm5
1655; BWON-F16C-NEXT:    vmovd %xmm5, %eax
1656; BWON-F16C-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
1657; BWON-F16C-NEXT:    vcvtph2ps %xmm1, %xmm7
1658; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm6
1659; BWON-F16C-NEXT:    vucomiss %xmm7, %xmm6
1660; BWON-F16C-NEXT:    ja .LBB26_14
1661; BWON-F16C-NEXT:  # %bb.13:
1662; BWON-F16C-NEXT:    vmovaps %xmm7, %xmm6
1663; BWON-F16C-NEXT:  .LBB26_14:
1664; BWON-F16C-NEXT:    vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
1665; BWON-F16C-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
1666; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm6, %xmm4
1667; BWON-F16C-NEXT:    vmovd %xmm4, %eax
1668; BWON-F16C-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
1669; BWON-F16C-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7]
1670; BWON-F16C-NEXT:    vcvtph2ps %xmm1, %xmm1
1671; BWON-F16C-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
1672; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
1673; BWON-F16C-NEXT:    vucomiss %xmm1, %xmm0
1674; BWON-F16C-NEXT:    ja .LBB26_16
1675; BWON-F16C-NEXT:  # %bb.15:
1676; BWON-F16C-NEXT:    vmovaps %xmm1, %xmm0
1677; BWON-F16C-NEXT:  .LBB26_16:
1678; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
1679; BWON-F16C-NEXT:    vmovd %xmm0, %eax
1680; BWON-F16C-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
1681; BWON-F16C-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
1682; BWON-F16C-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
1683; BWON-F16C-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
1684; BWON-F16C-NEXT:    retq
1685;
1686; CHECK-I686-LABEL: maxnum_v8f16:
1687; CHECK-I686:       # %bb.0:
1688; CHECK-I686-NEXT:    pushl %ebx
1689; CHECK-I686-NEXT:    pushl %edi
1690; CHECK-I686-NEXT:    pushl %esi
1691; CHECK-I686-NEXT:    subl $336, %esp # imm = 0x150
1692; CHECK-I686-NEXT:    movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
1693; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
1694; CHECK-I686-NEXT:    movaps %xmm1, %xmm0
1695; CHECK-I686-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[1,1]
1696; CHECK-I686-NEXT:    pextrw $0, %xmm0, %edi
1697; CHECK-I686-NEXT:    movw %di, (%esp)
1698; CHECK-I686-NEXT:    calll __extendhfsf2
1699; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1700; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
1701; CHECK-I686-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1702; CHECK-I686-NEXT:    pextrw $0, %xmm0, %esi
1703; CHECK-I686-NEXT:    movw %si, (%esp)
1704; CHECK-I686-NEXT:    calll __extendhfsf2
1705; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1706; CHECK-I686-NEXT:    movw %di, (%esp)
1707; CHECK-I686-NEXT:    calll __extendhfsf2
1708; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1709; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
1710; CHECK-I686-NEXT:    pextrw $0, %xmm0, %edi
1711; CHECK-I686-NEXT:    movw %di, (%esp)
1712; CHECK-I686-NEXT:    calll __extendhfsf2
1713; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1714; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
1715; CHECK-I686-NEXT:    pextrw $0, %xmm0, %ebx
1716; CHECK-I686-NEXT:    movw %bx, (%esp)
1717; CHECK-I686-NEXT:    calll __extendhfsf2
1718; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1719; CHECK-I686-NEXT:    movw %di, (%esp)
1720; CHECK-I686-NEXT:    calll __extendhfsf2
1721; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1722; CHECK-I686-NEXT:    movw %bx, (%esp)
1723; CHECK-I686-NEXT:    calll __extendhfsf2
1724; CHECK-I686-NEXT:    movw %si, (%esp)
1725; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1726; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1727; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1728; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1729; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1730; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1731; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1732; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1733; CHECK-I686-NEXT:    ucomiss {{[0-9]+}}(%esp), %xmm0
1734; CHECK-I686-NEXT:    ja .LBB26_1
1735; CHECK-I686-NEXT:  # %bb.2:
1736; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1737; CHECK-I686-NEXT:    jmp .LBB26_3
1738; CHECK-I686-NEXT:  .LBB26_1:
1739; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1740; CHECK-I686-NEXT:  .LBB26_3:
1741; CHECK-I686-NEXT:    movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1742; CHECK-I686-NEXT:    calll __extendhfsf2
1743; CHECK-I686-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
1744; CHECK-I686-NEXT:    # xmm0 = mem[0],zero,zero,zero
1745; CHECK-I686-NEXT:    movss %xmm0, (%esp)
1746; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1747; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1748; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1749; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1750; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1751; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1752; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1753; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1754; CHECK-I686-NEXT:    ucomiss {{[0-9]+}}(%esp), %xmm0
1755; CHECK-I686-NEXT:    ja .LBB26_4
1756; CHECK-I686-NEXT:  # %bb.5:
1757; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1758; CHECK-I686-NEXT:    jmp .LBB26_6
1759; CHECK-I686-NEXT:  .LBB26_4:
1760; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1761; CHECK-I686-NEXT:  .LBB26_6:
1762; CHECK-I686-NEXT:    movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1763; CHECK-I686-NEXT:    calll __truncsfhf2
1764; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
1765; CHECK-I686-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
1766; CHECK-I686-NEXT:    # xmm0 = mem[0],zero,zero,zero
1767; CHECK-I686-NEXT:    movss %xmm0, (%esp)
1768; CHECK-I686-NEXT:    calll __truncsfhf2
1769; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
1770; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
1771; CHECK-I686-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1,1]
1772; CHECK-I686-NEXT:    pextrw $0, %xmm0, %edi
1773; CHECK-I686-NEXT:    movw %di, (%esp)
1774; CHECK-I686-NEXT:    calll __extendhfsf2
1775; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1776; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
1777; CHECK-I686-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1,1]
1778; CHECK-I686-NEXT:    pextrw $0, %xmm0, %esi
1779; CHECK-I686-NEXT:    movw %si, (%esp)
1780; CHECK-I686-NEXT:    calll __extendhfsf2
1781; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1782; CHECK-I686-NEXT:    movw %di, (%esp)
1783; CHECK-I686-NEXT:    calll __extendhfsf2
1784; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1785; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
1786; CHECK-I686-NEXT:    psrlq $48, %xmm0
1787; CHECK-I686-NEXT:    pextrw $0, %xmm0, %edi
1788; CHECK-I686-NEXT:    movw %di, (%esp)
1789; CHECK-I686-NEXT:    calll __extendhfsf2
1790; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1791; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
1792; CHECK-I686-NEXT:    psrlq $48, %xmm0
1793; CHECK-I686-NEXT:    pextrw $0, %xmm0, %ebx
1794; CHECK-I686-NEXT:    movw %bx, (%esp)
1795; CHECK-I686-NEXT:    calll __extendhfsf2
1796; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1797; CHECK-I686-NEXT:    movw %di, (%esp)
1798; CHECK-I686-NEXT:    calll __extendhfsf2
1799; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1800; CHECK-I686-NEXT:    movw %bx, (%esp)
1801; CHECK-I686-NEXT:    calll __extendhfsf2
1802; CHECK-I686-NEXT:    movw %si, (%esp)
1803; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1804; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1805; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1806; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1807; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1808; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1809; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1810; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1811; CHECK-I686-NEXT:    ucomiss {{[0-9]+}}(%esp), %xmm0
1812; CHECK-I686-NEXT:    ja .LBB26_7
1813; CHECK-I686-NEXT:  # %bb.8:
1814; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1815; CHECK-I686-NEXT:    jmp .LBB26_9
1816; CHECK-I686-NEXT:  .LBB26_7:
1817; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1818; CHECK-I686-NEXT:  .LBB26_9:
1819; CHECK-I686-NEXT:    movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1820; CHECK-I686-NEXT:    calll __extendhfsf2
1821; CHECK-I686-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
1822; CHECK-I686-NEXT:    # xmm0 = mem[0],zero,zero,zero
1823; CHECK-I686-NEXT:    movss %xmm0, (%esp)
1824; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1825; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1826; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1827; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1828; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1829; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1830; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1831; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1832; CHECK-I686-NEXT:    ucomiss {{[0-9]+}}(%esp), %xmm0
1833; CHECK-I686-NEXT:    ja .LBB26_10
1834; CHECK-I686-NEXT:  # %bb.11:
1835; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1836; CHECK-I686-NEXT:    jmp .LBB26_12
1837; CHECK-I686-NEXT:  .LBB26_10:
1838; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1839; CHECK-I686-NEXT:  .LBB26_12:
1840; CHECK-I686-NEXT:    movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1841; CHECK-I686-NEXT:    calll __truncsfhf2
1842; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
1843; CHECK-I686-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
1844; CHECK-I686-NEXT:    # xmm0 = mem[0],zero,zero,zero
1845; CHECK-I686-NEXT:    movss %xmm0, (%esp)
1846; CHECK-I686-NEXT:    calll __truncsfhf2
1847; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
1848; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
1849; CHECK-I686-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1850; CHECK-I686-NEXT:    pextrw $0, %xmm0, %edi
1851; CHECK-I686-NEXT:    movw %di, (%esp)
1852; CHECK-I686-NEXT:    calll __extendhfsf2
1853; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1854; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
1855; CHECK-I686-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1856; CHECK-I686-NEXT:    pextrw $0, %xmm0, %esi
1857; CHECK-I686-NEXT:    movw %si, (%esp)
1858; CHECK-I686-NEXT:    calll __extendhfsf2
1859; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1860; CHECK-I686-NEXT:    movw %di, (%esp)
1861; CHECK-I686-NEXT:    calll __extendhfsf2
1862; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1863; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
1864; CHECK-I686-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1865; CHECK-I686-NEXT:    pextrw $0, %xmm0, %edi
1866; CHECK-I686-NEXT:    movw %di, (%esp)
1867; CHECK-I686-NEXT:    calll __extendhfsf2
1868; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1869; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
1870; CHECK-I686-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1871; CHECK-I686-NEXT:    pextrw $0, %xmm0, %ebx
1872; CHECK-I686-NEXT:    movw %bx, (%esp)
1873; CHECK-I686-NEXT:    calll __extendhfsf2
1874; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1875; CHECK-I686-NEXT:    movw %di, (%esp)
1876; CHECK-I686-NEXT:    calll __extendhfsf2
1877; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1878; CHECK-I686-NEXT:    movw %bx, (%esp)
1879; CHECK-I686-NEXT:    calll __extendhfsf2
1880; CHECK-I686-NEXT:    movw %si, (%esp)
1881; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1882; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1883; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1884; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1885; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1886; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1887; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1888; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1889; CHECK-I686-NEXT:    ucomiss {{[0-9]+}}(%esp), %xmm0
1890; CHECK-I686-NEXT:    ja .LBB26_13
1891; CHECK-I686-NEXT:  # %bb.14:
1892; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1893; CHECK-I686-NEXT:    jmp .LBB26_15
1894; CHECK-I686-NEXT:  .LBB26_13:
1895; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1896; CHECK-I686-NEXT:  .LBB26_15:
1897; CHECK-I686-NEXT:    movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1898; CHECK-I686-NEXT:    calll __extendhfsf2
1899; CHECK-I686-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
1900; CHECK-I686-NEXT:    # xmm0 = mem[0],zero,zero,zero
1901; CHECK-I686-NEXT:    movss %xmm0, (%esp)
1902; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1903; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1904; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1905; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1906; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1907; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1908; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1909; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1910; CHECK-I686-NEXT:    ucomiss {{[0-9]+}}(%esp), %xmm0
1911; CHECK-I686-NEXT:    ja .LBB26_16
1912; CHECK-I686-NEXT:  # %bb.17:
1913; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1914; CHECK-I686-NEXT:    jmp .LBB26_18
1915; CHECK-I686-NEXT:  .LBB26_16:
1916; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1917; CHECK-I686-NEXT:  .LBB26_18:
1918; CHECK-I686-NEXT:    movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1919; CHECK-I686-NEXT:    calll __truncsfhf2
1920; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
1921; CHECK-I686-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
1922; CHECK-I686-NEXT:    # xmm0 = mem[0],zero,zero,zero
1923; CHECK-I686-NEXT:    movss %xmm0, (%esp)
1924; CHECK-I686-NEXT:    calll __truncsfhf2
1925; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
1926; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
1927; CHECK-I686-NEXT:    psrld $16, %xmm0
1928; CHECK-I686-NEXT:    pextrw $0, %xmm0, %edi
1929; CHECK-I686-NEXT:    movw %di, (%esp)
1930; CHECK-I686-NEXT:    calll __extendhfsf2
1931; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1932; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
1933; CHECK-I686-NEXT:    psrld $16, %xmm0
1934; CHECK-I686-NEXT:    pextrw $0, %xmm0, %esi
1935; CHECK-I686-NEXT:    movw %si, (%esp)
1936; CHECK-I686-NEXT:    calll __extendhfsf2
1937; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1938; CHECK-I686-NEXT:    movw %di, (%esp)
1939; CHECK-I686-NEXT:    calll __extendhfsf2
1940; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1941; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
1942; CHECK-I686-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1943; CHECK-I686-NEXT:    pextrw $0, %xmm0, %edi
1944; CHECK-I686-NEXT:    movw %di, (%esp)
1945; CHECK-I686-NEXT:    calll __extendhfsf2
1946; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1947; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
1948; CHECK-I686-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1949; CHECK-I686-NEXT:    pextrw $0, %xmm0, %ebx
1950; CHECK-I686-NEXT:    movw %bx, (%esp)
1951; CHECK-I686-NEXT:    calll __extendhfsf2
1952; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1953; CHECK-I686-NEXT:    movw %di, (%esp)
1954; CHECK-I686-NEXT:    calll __extendhfsf2
1955; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1956; CHECK-I686-NEXT:    movw %bx, (%esp)
1957; CHECK-I686-NEXT:    calll __extendhfsf2
1958; CHECK-I686-NEXT:    movw %si, (%esp)
1959; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1960; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1961; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1962; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1963; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1964; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1965; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1966; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1967; CHECK-I686-NEXT:    ucomiss {{[0-9]+}}(%esp), %xmm0
1968; CHECK-I686-NEXT:    ja .LBB26_19
1969; CHECK-I686-NEXT:  # %bb.20:
1970; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1971; CHECK-I686-NEXT:    jmp .LBB26_21
1972; CHECK-I686-NEXT:  .LBB26_19:
1973; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1974; CHECK-I686-NEXT:  .LBB26_21:
1975; CHECK-I686-NEXT:    movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1976; CHECK-I686-NEXT:    calll __extendhfsf2
1977; CHECK-I686-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
1978; CHECK-I686-NEXT:    # xmm0 = mem[0],zero,zero,zero
1979; CHECK-I686-NEXT:    movss %xmm0, (%esp)
1980; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1981; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1982; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1983; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1984; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1985; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1986; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1987; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1988; CHECK-I686-NEXT:    ucomiss {{[0-9]+}}(%esp), %xmm0
1989; CHECK-I686-NEXT:    ja .LBB26_22
1990; CHECK-I686-NEXT:  # %bb.23:
1991; CHECK-I686-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1992; CHECK-I686-NEXT:    jmp .LBB26_24
1993; CHECK-I686-NEXT:  .LBB26_22:
1994; CHECK-I686-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1995; CHECK-I686-NEXT:  .LBB26_24:
1996; CHECK-I686-NEXT:    movd %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
1997; CHECK-I686-NEXT:    calll __truncsfhf2
1998; CHECK-I686-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 4-byte Reload
1999; CHECK-I686-NEXT:    # xmm1 = mem[0],zero,zero,zero
2000; CHECK-I686-NEXT:    movss %xmm1, (%esp)
2001; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
2002; CHECK-I686-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2003; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
2004; CHECK-I686-NEXT:    punpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2005; CHECK-I686-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
2006; CHECK-I686-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2007; CHECK-I686-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
2008; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
2009; CHECK-I686-NEXT:    punpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2010; CHECK-I686-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
2011; CHECK-I686-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
2012; CHECK-I686-NEXT:    calll __truncsfhf2
2013; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
2014; CHECK-I686-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2015; CHECK-I686-NEXT:    punpckldq {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Folded Reload
2016; CHECK-I686-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
2017; CHECK-I686-NEXT:    punpcklqdq {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Folded Reload
2018; CHECK-I686-NEXT:    # xmm1 = xmm1[0],mem[0]
2019; CHECK-I686-NEXT:    movdqa %xmm1, %xmm0
2020; CHECK-I686-NEXT:    addl $336, %esp # imm = 0x150
2021; CHECK-I686-NEXT:    popl %esi
2022; CHECK-I686-NEXT:    popl %edi
2023; CHECK-I686-NEXT:    popl %ebx
2024; CHECK-I686-NEXT:    retl
2025  %3 = call fast <8 x half> @llvm.maxnum.v8f16(<8 x half> %0, <8 x half> %1)
2026  ret <8 x half> %3
2027}
2028
2029define void @pr63114() {
2030; CHECK-LIBCALL-LABEL: pr63114:
2031; CHECK-LIBCALL:       # %bb.0:
2032; CHECK-LIBCALL-NEXT:    movdqu (%rax), %xmm4
2033; CHECK-LIBCALL-NEXT:    pshuflw {{.*#+}} xmm0 = xmm4[0,1,3,3,4,5,6,7]
2034; CHECK-LIBCALL-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,2,1]
2035; CHECK-LIBCALL-NEXT:    movdqa {{.*#+}} xmm1 = [65535,65535,65535,0,65535,65535,65535,65535]
2036; CHECK-LIBCALL-NEXT:    pand %xmm1, %xmm0
2037; CHECK-LIBCALL-NEXT:    movq {{.*#+}} xmm2 = [0,0,0,15360,0,0,0,0]
2038; CHECK-LIBCALL-NEXT:    por %xmm2, %xmm0
2039; CHECK-LIBCALL-NEXT:    movdqa {{.*#+}} xmm3 = [65535,65535,65535,65535,65535,65535,65535,0]
2040; CHECK-LIBCALL-NEXT:    pand %xmm3, %xmm0
2041; CHECK-LIBCALL-NEXT:    movdqa {{.*#+}} xmm5 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,60]
2042; CHECK-LIBCALL-NEXT:    por %xmm5, %xmm0
2043; CHECK-LIBCALL-NEXT:    pshufhw {{.*#+}} xmm6 = xmm4[0,1,2,3,4,5,7,7]
2044; CHECK-LIBCALL-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[0,2,2,3]
2045; CHECK-LIBCALL-NEXT:    pand %xmm1, %xmm6
2046; CHECK-LIBCALL-NEXT:    por %xmm2, %xmm6
2047; CHECK-LIBCALL-NEXT:    pand %xmm3, %xmm6
2048; CHECK-LIBCALL-NEXT:    por %xmm5, %xmm6
2049; CHECK-LIBCALL-NEXT:    pshufhw {{.*#+}} xmm7 = xmm4[0,1,2,3,5,5,5,5]
2050; CHECK-LIBCALL-NEXT:    shufps {{.*#+}} xmm4 = xmm4[0,3,0,3]
2051; CHECK-LIBCALL-NEXT:    pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,5,5,5,5]
2052; CHECK-LIBCALL-NEXT:    pand %xmm1, %xmm4
2053; CHECK-LIBCALL-NEXT:    por %xmm2, %xmm4
2054; CHECK-LIBCALL-NEXT:    pand %xmm3, %xmm4
2055; CHECK-LIBCALL-NEXT:    por %xmm5, %xmm4
2056; CHECK-LIBCALL-NEXT:    pand %xmm1, %xmm7
2057; CHECK-LIBCALL-NEXT:    por %xmm2, %xmm7
2058; CHECK-LIBCALL-NEXT:    pand %xmm3, %xmm7
2059; CHECK-LIBCALL-NEXT:    por %xmm5, %xmm7
2060; CHECK-LIBCALL-NEXT:    movdqu %xmm7, 0
2061; CHECK-LIBCALL-NEXT:    movdqu %xmm4, 32
2062; CHECK-LIBCALL-NEXT:    movdqu %xmm6, 48
2063; CHECK-LIBCALL-NEXT:    movdqu %xmm0, 16
2064; CHECK-LIBCALL-NEXT:    retq
2065;
2066; BWON-F16C-LABEL: pr63114:
2067; BWON-F16C:       # %bb.0:
2068; BWON-F16C-NEXT:    vmovdqu (%rax), %xmm0
2069; BWON-F16C-NEXT:    vpsrld $16, %xmm0, %xmm1
2070; BWON-F16C-NEXT:    vbroadcastss (%rax), %xmm2
2071; BWON-F16C-NEXT:    vpsrldq {{.*#+}} xmm3 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2072; BWON-F16C-NEXT:    vshufps {{.*#+}} xmm2 = xmm2[0,0],xmm3[0,0]
2073; BWON-F16C-NEXT:    vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3
2074; BWON-F16C-NEXT:    vpsllq $48, %xmm3, %xmm4
2075; BWON-F16C-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2],xmm4[3],xmm2[4,5,6,7]
2076; BWON-F16C-NEXT:    vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1]
2077; BWON-F16C-NEXT:    vpor %xmm3, %xmm2, %xmm2
2078; BWON-F16C-NEXT:    vshufps {{.*#+}} xmm1 = xmm0[0,3],xmm1[2,0]
2079; BWON-F16C-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[3],xmm1[4,5,6,7]
2080; BWON-F16C-NEXT:    vpor %xmm3, %xmm1, %xmm1
2081; BWON-F16C-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
2082; BWON-F16C-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm0[0,1,3,3,4,5,6,7]
2083; BWON-F16C-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,1]
2084; BWON-F16C-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2],xmm4[3],xmm2[4,5,6,7]
2085; BWON-F16C-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm3[7]
2086; BWON-F16C-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
2087; BWON-F16C-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm4[3],xmm0[4,5,6,7]
2088; BWON-F16C-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm3[7]
2089; BWON-F16C-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
2090; BWON-F16C-NEXT:    vmovups %ymm0, 0
2091; BWON-F16C-NEXT:    vmovups %ymm1, 32
2092; BWON-F16C-NEXT:    vzeroupper
2093; BWON-F16C-NEXT:    retq
2094;
2095; CHECK-I686-LABEL: pr63114:
2096; CHECK-I686:       # %bb.0:
2097; CHECK-I686-NEXT:    movdqu (%eax), %xmm6
2098; CHECK-I686-NEXT:    pshuflw {{.*#+}} xmm0 = xmm6[0,1,3,3,4,5,6,7]
2099; CHECK-I686-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,2,1]
2100; CHECK-I686-NEXT:    movdqa {{.*#+}} xmm1 = [65535,65535,65535,0,65535,65535,65535,65535]
2101; CHECK-I686-NEXT:    pand %xmm1, %xmm0
2102; CHECK-I686-NEXT:    movq {{.*#+}} xmm2 = [0,0,0,15360,0,0,0,0]
2103; CHECK-I686-NEXT:    por %xmm2, %xmm0
2104; CHECK-I686-NEXT:    movdqa {{.*#+}} xmm3 = [65535,65535,65535,65535,65535,65535,65535,0]
2105; CHECK-I686-NEXT:    pand %xmm3, %xmm0
2106; CHECK-I686-NEXT:    movdqa {{.*#+}} xmm4 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,60]
2107; CHECK-I686-NEXT:    por %xmm4, %xmm0
2108; CHECK-I686-NEXT:    pshufhw {{.*#+}} xmm5 = xmm6[0,1,2,3,4,5,7,7]
2109; CHECK-I686-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[0,2,2,3]
2110; CHECK-I686-NEXT:    pand %xmm1, %xmm5
2111; CHECK-I686-NEXT:    por %xmm2, %xmm5
2112; CHECK-I686-NEXT:    pand %xmm3, %xmm5
2113; CHECK-I686-NEXT:    por %xmm4, %xmm5
2114; CHECK-I686-NEXT:    pshufhw {{.*#+}} xmm7 = xmm6[0,1,2,3,5,5,5,5]
2115; CHECK-I686-NEXT:    shufps {{.*#+}} xmm6 = xmm6[0,3,0,3]
2116; CHECK-I686-NEXT:    pshufhw {{.*#+}} xmm6 = xmm6[0,1,2,3,5,5,5,5]
2117; CHECK-I686-NEXT:    pand %xmm1, %xmm6
2118; CHECK-I686-NEXT:    por %xmm2, %xmm6
2119; CHECK-I686-NEXT:    pand %xmm3, %xmm6
2120; CHECK-I686-NEXT:    por %xmm4, %xmm6
2121; CHECK-I686-NEXT:    pand %xmm1, %xmm7
2122; CHECK-I686-NEXT:    por %xmm2, %xmm7
2123; CHECK-I686-NEXT:    pand %xmm3, %xmm7
2124; CHECK-I686-NEXT:    por %xmm4, %xmm7
2125; CHECK-I686-NEXT:    movdqu %xmm7, 0
2126; CHECK-I686-NEXT:    movdqu %xmm6, 32
2127; CHECK-I686-NEXT:    movdqu %xmm5, 48
2128; CHECK-I686-NEXT:    movdqu %xmm0, 16
2129; CHECK-I686-NEXT:    retl
2130  %1 = load <24 x half>, ptr poison, align 2
2131  %2 = shufflevector <24 x half> %1, <24 x half> poison, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
2132  %3 = shufflevector <8 x half> %2, <8 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2133  %4 = shufflevector <16 x half> poison, <16 x half> %3, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
2134  store <32 x half> %4, ptr null, align 2
2135  ret void
2136}
2137
2138attributes #0 = { nounwind }
2139