xref: /llvm-project/llvm/test/CodeGen/X86/fp-strict-scalar-cmp-fp16.ll (revision 03000f09c1c01f85d62b9d27cd269566ccea1dec)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2  -O3 | FileCheck %s --check-prefixes=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+f16c  -O3 | FileCheck %s --check-prefixes=AVX
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f  -O3 | FileCheck %s --check-prefixes=AVX
5; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X86-FP16
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X64-FP16
7
8define i32 @test_f16_oeq_q(i32 %a, i32 %b, half %f1, half %f2) #0 {
9; SSE2-LABEL: test_f16_oeq_q:
10; SSE2:       # %bb.0:
11; SSE2-NEXT:    pushq %rbp
12; SSE2-NEXT:    pushq %rbx
13; SSE2-NEXT:    pushq %rax
14; SSE2-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
15; SSE2-NEXT:    movl %esi, %ebx
16; SSE2-NEXT:    movl %edi, %ebp
17; SSE2-NEXT:    movaps %xmm1, %xmm0
18; SSE2-NEXT:    callq __extendhfsf2@PLT
19; SSE2-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
20; SSE2-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
21; SSE2-NEXT:    # xmm0 = mem[0],zero,zero,zero
22; SSE2-NEXT:    callq __extendhfsf2@PLT
23; SSE2-NEXT:    ucomiss (%rsp), %xmm0 # 4-byte Folded Reload
24; SSE2-NEXT:    cmovnel %ebx, %ebp
25; SSE2-NEXT:    cmovpl %ebx, %ebp
26; SSE2-NEXT:    movl %ebp, %eax
27; SSE2-NEXT:    addq $8, %rsp
28; SSE2-NEXT:    popq %rbx
29; SSE2-NEXT:    popq %rbp
30; SSE2-NEXT:    retq
31;
32; AVX-LABEL: test_f16_oeq_q:
33; AVX:       # %bb.0:
34; AVX-NEXT:    movl %edi, %eax
35; AVX-NEXT:    vpextrw $0, %xmm0, %ecx
36; AVX-NEXT:    vpextrw $0, %xmm1, %edx
37; AVX-NEXT:    movzwl %dx, %edx
38; AVX-NEXT:    vmovd %edx, %xmm0
39; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
40; AVX-NEXT:    movzwl %cx, %ecx
41; AVX-NEXT:    vmovd %ecx, %xmm1
42; AVX-NEXT:    vcvtph2ps %xmm1, %xmm1
43; AVX-NEXT:    vucomiss %xmm0, %xmm1
44; AVX-NEXT:    cmovnel %esi, %eax
45; AVX-NEXT:    cmovpl %esi, %eax
46; AVX-NEXT:    retq
47;
48; X86-FP16-LABEL: test_f16_oeq_q:
49; X86-FP16:       # %bb.0:
50; X86-FP16-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
51; X86-FP16-NEXT:    vucomish {{[0-9]+}}(%esp), %xmm0
52; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %eax
53; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %ecx
54; X86-FP16-NEXT:    cmovnel %eax, %ecx
55; X86-FP16-NEXT:    cmovpl %eax, %ecx
56; X86-FP16-NEXT:    movl (%ecx), %eax
57; X86-FP16-NEXT:    retl
58;
59; X64-FP16-LABEL: test_f16_oeq_q:
60; X64-FP16:       # %bb.0:
61; X64-FP16-NEXT:    movl %edi, %eax
62; X64-FP16-NEXT:    vucomish %xmm1, %xmm0
63; X64-FP16-NEXT:    cmovnel %esi, %eax
64; X64-FP16-NEXT:    cmovpl %esi, %eax
65; X64-FP16-NEXT:    retq
66  %cond = call i1 @llvm.experimental.constrained.fcmp.f16(
67                                               half %f1, half %f2, metadata !"oeq",
68                                               metadata !"fpexcept.strict") #0
69  %res = select i1 %cond, i32 %a, i32 %b
70  ret i32 %res
71}
72
73define i32 @test_f16_ogt_q(i32 %a, i32 %b, half %f1, half %f2) #0 {
74; SSE2-LABEL: test_f16_ogt_q:
75; SSE2:       # %bb.0:
76; SSE2-NEXT:    pushq %rbp
77; SSE2-NEXT:    pushq %rbx
78; SSE2-NEXT:    pushq %rax
79; SSE2-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
80; SSE2-NEXT:    movl %esi, %ebx
81; SSE2-NEXT:    movl %edi, %ebp
82; SSE2-NEXT:    movaps %xmm1, %xmm0
83; SSE2-NEXT:    callq __extendhfsf2@PLT
84; SSE2-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
85; SSE2-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
86; SSE2-NEXT:    # xmm0 = mem[0],zero,zero,zero
87; SSE2-NEXT:    callq __extendhfsf2@PLT
88; SSE2-NEXT:    ucomiss (%rsp), %xmm0 # 4-byte Folded Reload
89; SSE2-NEXT:    cmovbel %ebx, %ebp
90; SSE2-NEXT:    movl %ebp, %eax
91; SSE2-NEXT:    addq $8, %rsp
92; SSE2-NEXT:    popq %rbx
93; SSE2-NEXT:    popq %rbp
94; SSE2-NEXT:    retq
95;
96; AVX-LABEL: test_f16_ogt_q:
97; AVX:       # %bb.0:
98; AVX-NEXT:    movl %edi, %eax
99; AVX-NEXT:    vpextrw $0, %xmm0, %ecx
100; AVX-NEXT:    vpextrw $0, %xmm1, %edx
101; AVX-NEXT:    movzwl %dx, %edx
102; AVX-NEXT:    vmovd %edx, %xmm0
103; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
104; AVX-NEXT:    movzwl %cx, %ecx
105; AVX-NEXT:    vmovd %ecx, %xmm1
106; AVX-NEXT:    vcvtph2ps %xmm1, %xmm1
107; AVX-NEXT:    vucomiss %xmm0, %xmm1
108; AVX-NEXT:    cmovbel %esi, %eax
109; AVX-NEXT:    retq
110;
111; X86-FP16-LABEL: test_f16_ogt_q:
112; X86-FP16:       # %bb.0:
113; X86-FP16-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
114; X86-FP16-NEXT:    vucomish {{[0-9]+}}(%esp), %xmm0
115; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %eax
116; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %ecx
117; X86-FP16-NEXT:    cmoval %eax, %ecx
118; X86-FP16-NEXT:    movl (%ecx), %eax
119; X86-FP16-NEXT:    retl
120;
121; X64-FP16-LABEL: test_f16_ogt_q:
122; X64-FP16:       # %bb.0:
123; X64-FP16-NEXT:    movl %edi, %eax
124; X64-FP16-NEXT:    vucomish %xmm1, %xmm0
125; X64-FP16-NEXT:    cmovbel %esi, %eax
126; X64-FP16-NEXT:    retq
127  %cond = call i1 @llvm.experimental.constrained.fcmp.f16(
128                                               half %f1, half %f2, metadata !"ogt",
129                                               metadata !"fpexcept.strict") #0
130  %res = select i1 %cond, i32 %a, i32 %b
131  ret i32 %res
132}
133
134define i32 @test_f16_oge_q(i32 %a, i32 %b, half %f1, half %f2) #0 {
135; SSE2-LABEL: test_f16_oge_q:
136; SSE2:       # %bb.0:
137; SSE2-NEXT:    pushq %rbp
138; SSE2-NEXT:    pushq %rbx
139; SSE2-NEXT:    pushq %rax
140; SSE2-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
141; SSE2-NEXT:    movl %esi, %ebx
142; SSE2-NEXT:    movl %edi, %ebp
143; SSE2-NEXT:    movaps %xmm1, %xmm0
144; SSE2-NEXT:    callq __extendhfsf2@PLT
145; SSE2-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
146; SSE2-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
147; SSE2-NEXT:    # xmm0 = mem[0],zero,zero,zero
148; SSE2-NEXT:    callq __extendhfsf2@PLT
149; SSE2-NEXT:    ucomiss (%rsp), %xmm0 # 4-byte Folded Reload
150; SSE2-NEXT:    cmovbl %ebx, %ebp
151; SSE2-NEXT:    movl %ebp, %eax
152; SSE2-NEXT:    addq $8, %rsp
153; SSE2-NEXT:    popq %rbx
154; SSE2-NEXT:    popq %rbp
155; SSE2-NEXT:    retq
156;
157; AVX-LABEL: test_f16_oge_q:
158; AVX:       # %bb.0:
159; AVX-NEXT:    movl %edi, %eax
160; AVX-NEXT:    vpextrw $0, %xmm0, %ecx
161; AVX-NEXT:    vpextrw $0, %xmm1, %edx
162; AVX-NEXT:    movzwl %dx, %edx
163; AVX-NEXT:    vmovd %edx, %xmm0
164; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
165; AVX-NEXT:    movzwl %cx, %ecx
166; AVX-NEXT:    vmovd %ecx, %xmm1
167; AVX-NEXT:    vcvtph2ps %xmm1, %xmm1
168; AVX-NEXT:    vucomiss %xmm0, %xmm1
169; AVX-NEXT:    cmovbl %esi, %eax
170; AVX-NEXT:    retq
171;
172; X86-FP16-LABEL: test_f16_oge_q:
173; X86-FP16:       # %bb.0:
174; X86-FP16-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
175; X86-FP16-NEXT:    vucomish {{[0-9]+}}(%esp), %xmm0
176; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %eax
177; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %ecx
178; X86-FP16-NEXT:    cmovael %eax, %ecx
179; X86-FP16-NEXT:    movl (%ecx), %eax
180; X86-FP16-NEXT:    retl
181;
182; X64-FP16-LABEL: test_f16_oge_q:
183; X64-FP16:       # %bb.0:
184; X64-FP16-NEXT:    movl %edi, %eax
185; X64-FP16-NEXT:    vucomish %xmm1, %xmm0
186; X64-FP16-NEXT:    cmovbl %esi, %eax
187; X64-FP16-NEXT:    retq
188  %cond = call i1 @llvm.experimental.constrained.fcmp.f16(
189                                               half %f1, half %f2, metadata !"oge",
190                                               metadata !"fpexcept.strict") #0
191  %res = select i1 %cond, i32 %a, i32 %b
192  ret i32 %res
193}
194
195define i32 @test_f16_olt_q(i32 %a, i32 %b, half %f1, half %f2) #0 {
196; SSE2-LABEL: test_f16_olt_q:
197; SSE2:       # %bb.0:
198; SSE2-NEXT:    pushq %rbp
199; SSE2-NEXT:    pushq %rbx
200; SSE2-NEXT:    pushq %rax
201; SSE2-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
202; SSE2-NEXT:    movl %esi, %ebx
203; SSE2-NEXT:    movl %edi, %ebp
204; SSE2-NEXT:    movaps %xmm1, %xmm0
205; SSE2-NEXT:    callq __extendhfsf2@PLT
206; SSE2-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
207; SSE2-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
208; SSE2-NEXT:    # xmm0 = mem[0],zero,zero,zero
209; SSE2-NEXT:    callq __extendhfsf2@PLT
210; SSE2-NEXT:    movss (%rsp), %xmm1 # 4-byte Reload
211; SSE2-NEXT:    # xmm1 = mem[0],zero,zero,zero
212; SSE2-NEXT:    ucomiss %xmm0, %xmm1
213; SSE2-NEXT:    cmovbel %ebx, %ebp
214; SSE2-NEXT:    movl %ebp, %eax
215; SSE2-NEXT:    addq $8, %rsp
216; SSE2-NEXT:    popq %rbx
217; SSE2-NEXT:    popq %rbp
218; SSE2-NEXT:    retq
219;
220; AVX-LABEL: test_f16_olt_q:
221; AVX:       # %bb.0:
222; AVX-NEXT:    movl %edi, %eax
223; AVX-NEXT:    vpextrw $0, %xmm1, %ecx
224; AVX-NEXT:    vpextrw $0, %xmm0, %edx
225; AVX-NEXT:    movzwl %dx, %edx
226; AVX-NEXT:    vmovd %edx, %xmm0
227; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
228; AVX-NEXT:    movzwl %cx, %ecx
229; AVX-NEXT:    vmovd %ecx, %xmm1
230; AVX-NEXT:    vcvtph2ps %xmm1, %xmm1
231; AVX-NEXT:    vucomiss %xmm0, %xmm1
232; AVX-NEXT:    cmovbel %esi, %eax
233; AVX-NEXT:    retq
234;
235; X86-FP16-LABEL: test_f16_olt_q:
236; X86-FP16:       # %bb.0:
237; X86-FP16-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
238; X86-FP16-NEXT:    vucomish {{[0-9]+}}(%esp), %xmm0
239; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %eax
240; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %ecx
241; X86-FP16-NEXT:    cmoval %eax, %ecx
242; X86-FP16-NEXT:    movl (%ecx), %eax
243; X86-FP16-NEXT:    retl
244;
245; X64-FP16-LABEL: test_f16_olt_q:
246; X64-FP16:       # %bb.0:
247; X64-FP16-NEXT:    movl %edi, %eax
248; X64-FP16-NEXT:    vucomish %xmm0, %xmm1
249; X64-FP16-NEXT:    cmovbel %esi, %eax
250; X64-FP16-NEXT:    retq
251  %cond = call i1 @llvm.experimental.constrained.fcmp.f16(
252                                               half %f1, half %f2, metadata !"olt",
253                                               metadata !"fpexcept.strict") #0
254  %res = select i1 %cond, i32 %a, i32 %b
255  ret i32 %res
256}
257
258define i32 @test_f16_ole_q(i32 %a, i32 %b, half %f1, half %f2) #0 {
259; SSE2-LABEL: test_f16_ole_q:
260; SSE2:       # %bb.0:
261; SSE2-NEXT:    pushq %rbp
262; SSE2-NEXT:    pushq %rbx
263; SSE2-NEXT:    pushq %rax
264; SSE2-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
265; SSE2-NEXT:    movl %esi, %ebx
266; SSE2-NEXT:    movl %edi, %ebp
267; SSE2-NEXT:    movaps %xmm1, %xmm0
268; SSE2-NEXT:    callq __extendhfsf2@PLT
269; SSE2-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
270; SSE2-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
271; SSE2-NEXT:    # xmm0 = mem[0],zero,zero,zero
272; SSE2-NEXT:    callq __extendhfsf2@PLT
273; SSE2-NEXT:    movss (%rsp), %xmm1 # 4-byte Reload
274; SSE2-NEXT:    # xmm1 = mem[0],zero,zero,zero
275; SSE2-NEXT:    ucomiss %xmm0, %xmm1
276; SSE2-NEXT:    cmovbl %ebx, %ebp
277; SSE2-NEXT:    movl %ebp, %eax
278; SSE2-NEXT:    addq $8, %rsp
279; SSE2-NEXT:    popq %rbx
280; SSE2-NEXT:    popq %rbp
281; SSE2-NEXT:    retq
282;
283; AVX-LABEL: test_f16_ole_q:
284; AVX:       # %bb.0:
285; AVX-NEXT:    movl %edi, %eax
286; AVX-NEXT:    vpextrw $0, %xmm1, %ecx
287; AVX-NEXT:    vpextrw $0, %xmm0, %edx
288; AVX-NEXT:    movzwl %dx, %edx
289; AVX-NEXT:    vmovd %edx, %xmm0
290; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
291; AVX-NEXT:    movzwl %cx, %ecx
292; AVX-NEXT:    vmovd %ecx, %xmm1
293; AVX-NEXT:    vcvtph2ps %xmm1, %xmm1
294; AVX-NEXT:    vucomiss %xmm0, %xmm1
295; AVX-NEXT:    cmovbl %esi, %eax
296; AVX-NEXT:    retq
297;
298; X86-FP16-LABEL: test_f16_ole_q:
299; X86-FP16:       # %bb.0:
300; X86-FP16-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
301; X86-FP16-NEXT:    vucomish {{[0-9]+}}(%esp), %xmm0
302; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %eax
303; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %ecx
304; X86-FP16-NEXT:    cmovael %eax, %ecx
305; X86-FP16-NEXT:    movl (%ecx), %eax
306; X86-FP16-NEXT:    retl
307;
308; X64-FP16-LABEL: test_f16_ole_q:
309; X64-FP16:       # %bb.0:
310; X64-FP16-NEXT:    movl %edi, %eax
311; X64-FP16-NEXT:    vucomish %xmm0, %xmm1
312; X64-FP16-NEXT:    cmovbl %esi, %eax
313; X64-FP16-NEXT:    retq
314  %cond = call i1 @llvm.experimental.constrained.fcmp.f16(
315                                               half %f1, half %f2, metadata !"ole",
316                                               metadata !"fpexcept.strict") #0
317  %res = select i1 %cond, i32 %a, i32 %b
318  ret i32 %res
319}
320
321define i32 @test_f16_one_q(i32 %a, i32 %b, half %f1, half %f2) #0 {
322; SSE2-LABEL: test_f16_one_q:
323; SSE2:       # %bb.0:
324; SSE2-NEXT:    pushq %rbp
325; SSE2-NEXT:    pushq %rbx
326; SSE2-NEXT:    pushq %rax
327; SSE2-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
328; SSE2-NEXT:    movl %esi, %ebx
329; SSE2-NEXT:    movl %edi, %ebp
330; SSE2-NEXT:    movaps %xmm1, %xmm0
331; SSE2-NEXT:    callq __extendhfsf2@PLT
332; SSE2-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
333; SSE2-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
334; SSE2-NEXT:    # xmm0 = mem[0],zero,zero,zero
335; SSE2-NEXT:    callq __extendhfsf2@PLT
336; SSE2-NEXT:    ucomiss (%rsp), %xmm0 # 4-byte Folded Reload
337; SSE2-NEXT:    cmovel %ebx, %ebp
338; SSE2-NEXT:    movl %ebp, %eax
339; SSE2-NEXT:    addq $8, %rsp
340; SSE2-NEXT:    popq %rbx
341; SSE2-NEXT:    popq %rbp
342; SSE2-NEXT:    retq
343;
344; AVX-LABEL: test_f16_one_q:
345; AVX:       # %bb.0:
346; AVX-NEXT:    movl %edi, %eax
347; AVX-NEXT:    vpextrw $0, %xmm0, %ecx
348; AVX-NEXT:    vpextrw $0, %xmm1, %edx
349; AVX-NEXT:    movzwl %dx, %edx
350; AVX-NEXT:    vmovd %edx, %xmm0
351; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
352; AVX-NEXT:    movzwl %cx, %ecx
353; AVX-NEXT:    vmovd %ecx, %xmm1
354; AVX-NEXT:    vcvtph2ps %xmm1, %xmm1
355; AVX-NEXT:    vucomiss %xmm0, %xmm1
356; AVX-NEXT:    cmovel %esi, %eax
357; AVX-NEXT:    retq
358;
359; X86-FP16-LABEL: test_f16_one_q:
360; X86-FP16:       # %bb.0:
361; X86-FP16-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
362; X86-FP16-NEXT:    vucomish {{[0-9]+}}(%esp), %xmm0
363; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %eax
364; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %ecx
365; X86-FP16-NEXT:    cmovnel %eax, %ecx
366; X86-FP16-NEXT:    movl (%ecx), %eax
367; X86-FP16-NEXT:    retl
368;
369; X64-FP16-LABEL: test_f16_one_q:
370; X64-FP16:       # %bb.0:
371; X64-FP16-NEXT:    movl %edi, %eax
372; X64-FP16-NEXT:    vucomish %xmm1, %xmm0
373; X64-FP16-NEXT:    cmovel %esi, %eax
374; X64-FP16-NEXT:    retq
375  %cond = call i1 @llvm.experimental.constrained.fcmp.f16(
376                                               half %f1, half %f2, metadata !"one",
377                                               metadata !"fpexcept.strict") #0
378  %res = select i1 %cond, i32 %a, i32 %b
379  ret i32 %res
380}
381
382define i32 @test_f16_ord_q(i32 %a, i32 %b, half %f1, half %f2) #0 {
383; SSE2-LABEL: test_f16_ord_q:
384; SSE2:       # %bb.0:
385; SSE2-NEXT:    pushq %rbp
386; SSE2-NEXT:    pushq %rbx
387; SSE2-NEXT:    pushq %rax
388; SSE2-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
389; SSE2-NEXT:    movl %esi, %ebx
390; SSE2-NEXT:    movl %edi, %ebp
391; SSE2-NEXT:    movaps %xmm1, %xmm0
392; SSE2-NEXT:    callq __extendhfsf2@PLT
393; SSE2-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
394; SSE2-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
395; SSE2-NEXT:    # xmm0 = mem[0],zero,zero,zero
396; SSE2-NEXT:    callq __extendhfsf2@PLT
397; SSE2-NEXT:    ucomiss (%rsp), %xmm0 # 4-byte Folded Reload
398; SSE2-NEXT:    cmovpl %ebx, %ebp
399; SSE2-NEXT:    movl %ebp, %eax
400; SSE2-NEXT:    addq $8, %rsp
401; SSE2-NEXT:    popq %rbx
402; SSE2-NEXT:    popq %rbp
403; SSE2-NEXT:    retq
404;
405; AVX-LABEL: test_f16_ord_q:
406; AVX:       # %bb.0:
407; AVX-NEXT:    movl %edi, %eax
408; AVX-NEXT:    vpextrw $0, %xmm0, %ecx
409; AVX-NEXT:    vpextrw $0, %xmm1, %edx
410; AVX-NEXT:    movzwl %dx, %edx
411; AVX-NEXT:    vmovd %edx, %xmm0
412; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
413; AVX-NEXT:    movzwl %cx, %ecx
414; AVX-NEXT:    vmovd %ecx, %xmm1
415; AVX-NEXT:    vcvtph2ps %xmm1, %xmm1
416; AVX-NEXT:    vucomiss %xmm0, %xmm1
417; AVX-NEXT:    cmovpl %esi, %eax
418; AVX-NEXT:    retq
419;
420; X86-FP16-LABEL: test_f16_ord_q:
421; X86-FP16:       # %bb.0:
422; X86-FP16-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
423; X86-FP16-NEXT:    vucomish {{[0-9]+}}(%esp), %xmm0
424; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %eax
425; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %ecx
426; X86-FP16-NEXT:    cmovnpl %eax, %ecx
427; X86-FP16-NEXT:    movl (%ecx), %eax
428; X86-FP16-NEXT:    retl
429;
430; X64-FP16-LABEL: test_f16_ord_q:
431; X64-FP16:       # %bb.0:
432; X64-FP16-NEXT:    movl %edi, %eax
433; X64-FP16-NEXT:    vucomish %xmm1, %xmm0
434; X64-FP16-NEXT:    cmovpl %esi, %eax
435; X64-FP16-NEXT:    retq
436  %cond = call i1 @llvm.experimental.constrained.fcmp.f16(
437                                               half %f1, half %f2, metadata !"ord",
438                                               metadata !"fpexcept.strict") #0
439  %res = select i1 %cond, i32 %a, i32 %b
440  ret i32 %res
441}
442
443define i32 @test_f16_ueq_q(i32 %a, i32 %b, half %f1, half %f2) #0 {
444; SSE2-LABEL: test_f16_ueq_q:
445; SSE2:       # %bb.0:
446; SSE2-NEXT:    pushq %rbp
447; SSE2-NEXT:    pushq %rbx
448; SSE2-NEXT:    pushq %rax
449; SSE2-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
450; SSE2-NEXT:    movl %esi, %ebx
451; SSE2-NEXT:    movl %edi, %ebp
452; SSE2-NEXT:    movaps %xmm1, %xmm0
453; SSE2-NEXT:    callq __extendhfsf2@PLT
454; SSE2-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
455; SSE2-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
456; SSE2-NEXT:    # xmm0 = mem[0],zero,zero,zero
457; SSE2-NEXT:    callq __extendhfsf2@PLT
458; SSE2-NEXT:    ucomiss (%rsp), %xmm0 # 4-byte Folded Reload
459; SSE2-NEXT:    cmovnel %ebx, %ebp
460; SSE2-NEXT:    movl %ebp, %eax
461; SSE2-NEXT:    addq $8, %rsp
462; SSE2-NEXT:    popq %rbx
463; SSE2-NEXT:    popq %rbp
464; SSE2-NEXT:    retq
465;
466; AVX-LABEL: test_f16_ueq_q:
467; AVX:       # %bb.0:
468; AVX-NEXT:    movl %edi, %eax
469; AVX-NEXT:    vpextrw $0, %xmm0, %ecx
470; AVX-NEXT:    vpextrw $0, %xmm1, %edx
471; AVX-NEXT:    movzwl %dx, %edx
472; AVX-NEXT:    vmovd %edx, %xmm0
473; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
474; AVX-NEXT:    movzwl %cx, %ecx
475; AVX-NEXT:    vmovd %ecx, %xmm1
476; AVX-NEXT:    vcvtph2ps %xmm1, %xmm1
477; AVX-NEXT:    vucomiss %xmm0, %xmm1
478; AVX-NEXT:    cmovnel %esi, %eax
479; AVX-NEXT:    retq
480;
481; X86-FP16-LABEL: test_f16_ueq_q:
482; X86-FP16:       # %bb.0:
483; X86-FP16-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
484; X86-FP16-NEXT:    vucomish {{[0-9]+}}(%esp), %xmm0
485; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %eax
486; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %ecx
487; X86-FP16-NEXT:    cmovel %eax, %ecx
488; X86-FP16-NEXT:    movl (%ecx), %eax
489; X86-FP16-NEXT:    retl
490;
491; X64-FP16-LABEL: test_f16_ueq_q:
492; X64-FP16:       # %bb.0:
493; X64-FP16-NEXT:    movl %edi, %eax
494; X64-FP16-NEXT:    vucomish %xmm1, %xmm0
495; X64-FP16-NEXT:    cmovnel %esi, %eax
496; X64-FP16-NEXT:    retq
497  %cond = call i1 @llvm.experimental.constrained.fcmp.f16(
498                                               half %f1, half %f2, metadata !"ueq",
499                                               metadata !"fpexcept.strict") #0
500  %res = select i1 %cond, i32 %a, i32 %b
501  ret i32 %res
502}
503
504define i32 @test_f16_ugt_q(i32 %a, i32 %b, half %f1, half %f2) #0 {
505; SSE2-LABEL: test_f16_ugt_q:
506; SSE2:       # %bb.0:
507; SSE2-NEXT:    pushq %rbp
508; SSE2-NEXT:    pushq %rbx
509; SSE2-NEXT:    pushq %rax
510; SSE2-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
511; SSE2-NEXT:    movl %esi, %ebx
512; SSE2-NEXT:    movl %edi, %ebp
513; SSE2-NEXT:    movaps %xmm1, %xmm0
514; SSE2-NEXT:    callq __extendhfsf2@PLT
515; SSE2-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
516; SSE2-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
517; SSE2-NEXT:    # xmm0 = mem[0],zero,zero,zero
518; SSE2-NEXT:    callq __extendhfsf2@PLT
519; SSE2-NEXT:    movss (%rsp), %xmm1 # 4-byte Reload
520; SSE2-NEXT:    # xmm1 = mem[0],zero,zero,zero
521; SSE2-NEXT:    ucomiss %xmm0, %xmm1
522; SSE2-NEXT:    cmovael %ebx, %ebp
523; SSE2-NEXT:    movl %ebp, %eax
524; SSE2-NEXT:    addq $8, %rsp
525; SSE2-NEXT:    popq %rbx
526; SSE2-NEXT:    popq %rbp
527; SSE2-NEXT:    retq
528;
529; AVX-LABEL: test_f16_ugt_q:
530; AVX:       # %bb.0:
531; AVX-NEXT:    movl %edi, %eax
532; AVX-NEXT:    vpextrw $0, %xmm1, %ecx
533; AVX-NEXT:    vpextrw $0, %xmm0, %edx
534; AVX-NEXT:    movzwl %dx, %edx
535; AVX-NEXT:    vmovd %edx, %xmm0
536; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
537; AVX-NEXT:    movzwl %cx, %ecx
538; AVX-NEXT:    vmovd %ecx, %xmm1
539; AVX-NEXT:    vcvtph2ps %xmm1, %xmm1
540; AVX-NEXT:    vucomiss %xmm0, %xmm1
541; AVX-NEXT:    cmovael %esi, %eax
542; AVX-NEXT:    retq
543;
544; X86-FP16-LABEL: test_f16_ugt_q:
545; X86-FP16:       # %bb.0:
546; X86-FP16-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
547; X86-FP16-NEXT:    vucomish {{[0-9]+}}(%esp), %xmm0
548; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %eax
549; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %ecx
550; X86-FP16-NEXT:    cmovbl %eax, %ecx
551; X86-FP16-NEXT:    movl (%ecx), %eax
552; X86-FP16-NEXT:    retl
553;
554; X64-FP16-LABEL: test_f16_ugt_q:
555; X64-FP16:       # %bb.0:
556; X64-FP16-NEXT:    movl %edi, %eax
557; X64-FP16-NEXT:    vucomish %xmm0, %xmm1
558; X64-FP16-NEXT:    cmovael %esi, %eax
559; X64-FP16-NEXT:    retq
560  %cond = call i1 @llvm.experimental.constrained.fcmp.f16(
561                                               half %f1, half %f2, metadata !"ugt",
562                                               metadata !"fpexcept.strict") #0
563  %res = select i1 %cond, i32 %a, i32 %b
564  ret i32 %res
565}
566
567define i32 @test_f16_uge_q(i32 %a, i32 %b, half %f1, half %f2) #0 {
568; SSE2-LABEL: test_f16_uge_q:
569; SSE2:       # %bb.0:
570; SSE2-NEXT:    pushq %rbp
571; SSE2-NEXT:    pushq %rbx
572; SSE2-NEXT:    pushq %rax
573; SSE2-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
574; SSE2-NEXT:    movl %esi, %ebx
575; SSE2-NEXT:    movl %edi, %ebp
576; SSE2-NEXT:    movaps %xmm1, %xmm0
577; SSE2-NEXT:    callq __extendhfsf2@PLT
578; SSE2-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
579; SSE2-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
580; SSE2-NEXT:    # xmm0 = mem[0],zero,zero,zero
581; SSE2-NEXT:    callq __extendhfsf2@PLT
582; SSE2-NEXT:    movss (%rsp), %xmm1 # 4-byte Reload
583; SSE2-NEXT:    # xmm1 = mem[0],zero,zero,zero
584; SSE2-NEXT:    ucomiss %xmm0, %xmm1
585; SSE2-NEXT:    cmoval %ebx, %ebp
586; SSE2-NEXT:    movl %ebp, %eax
587; SSE2-NEXT:    addq $8, %rsp
588; SSE2-NEXT:    popq %rbx
589; SSE2-NEXT:    popq %rbp
590; SSE2-NEXT:    retq
591;
592; AVX-LABEL: test_f16_uge_q:
593; AVX:       # %bb.0:
594; AVX-NEXT:    movl %edi, %eax
595; AVX-NEXT:    vpextrw $0, %xmm1, %ecx
596; AVX-NEXT:    vpextrw $0, %xmm0, %edx
597; AVX-NEXT:    movzwl %dx, %edx
598; AVX-NEXT:    vmovd %edx, %xmm0
599; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
600; AVX-NEXT:    movzwl %cx, %ecx
601; AVX-NEXT:    vmovd %ecx, %xmm1
602; AVX-NEXT:    vcvtph2ps %xmm1, %xmm1
603; AVX-NEXT:    vucomiss %xmm0, %xmm1
604; AVX-NEXT:    cmoval %esi, %eax
605; AVX-NEXT:    retq
606;
607; X86-FP16-LABEL: test_f16_uge_q:
608; X86-FP16:       # %bb.0:
609; X86-FP16-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
610; X86-FP16-NEXT:    vucomish {{[0-9]+}}(%esp), %xmm0
611; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %eax
612; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %ecx
613; X86-FP16-NEXT:    cmovbel %eax, %ecx
614; X86-FP16-NEXT:    movl (%ecx), %eax
615; X86-FP16-NEXT:    retl
616;
617; X64-FP16-LABEL: test_f16_uge_q:
618; X64-FP16:       # %bb.0:
619; X64-FP16-NEXT:    movl %edi, %eax
620; X64-FP16-NEXT:    vucomish %xmm0, %xmm1
621; X64-FP16-NEXT:    cmoval %esi, %eax
622; X64-FP16-NEXT:    retq
623  %cond = call i1 @llvm.experimental.constrained.fcmp.f16(
624                                               half %f1, half %f2, metadata !"uge",
625                                               metadata !"fpexcept.strict") #0
626  %res = select i1 %cond, i32 %a, i32 %b
627  ret i32 %res
628}
629
630define i32 @test_f16_ult_q(i32 %a, i32 %b, half %f1, half %f2) #0 {
631; SSE2-LABEL: test_f16_ult_q:
632; SSE2:       # %bb.0:
633; SSE2-NEXT:    pushq %rbp
634; SSE2-NEXT:    pushq %rbx
635; SSE2-NEXT:    pushq %rax
636; SSE2-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
637; SSE2-NEXT:    movl %esi, %ebx
638; SSE2-NEXT:    movl %edi, %ebp
639; SSE2-NEXT:    movaps %xmm1, %xmm0
640; SSE2-NEXT:    callq __extendhfsf2@PLT
641; SSE2-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
642; SSE2-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
643; SSE2-NEXT:    # xmm0 = mem[0],zero,zero,zero
644; SSE2-NEXT:    callq __extendhfsf2@PLT
645; SSE2-NEXT:    ucomiss (%rsp), %xmm0 # 4-byte Folded Reload
646; SSE2-NEXT:    cmovael %ebx, %ebp
647; SSE2-NEXT:    movl %ebp, %eax
648; SSE2-NEXT:    addq $8, %rsp
649; SSE2-NEXT:    popq %rbx
650; SSE2-NEXT:    popq %rbp
651; SSE2-NEXT:    retq
652;
653; AVX-LABEL: test_f16_ult_q:
654; AVX:       # %bb.0:
655; AVX-NEXT:    movl %edi, %eax
656; AVX-NEXT:    vpextrw $0, %xmm0, %ecx
657; AVX-NEXT:    vpextrw $0, %xmm1, %edx
658; AVX-NEXT:    movzwl %dx, %edx
659; AVX-NEXT:    vmovd %edx, %xmm0
660; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
661; AVX-NEXT:    movzwl %cx, %ecx
662; AVX-NEXT:    vmovd %ecx, %xmm1
663; AVX-NEXT:    vcvtph2ps %xmm1, %xmm1
664; AVX-NEXT:    vucomiss %xmm0, %xmm1
665; AVX-NEXT:    cmovael %esi, %eax
666; AVX-NEXT:    retq
667;
668; X86-FP16-LABEL: test_f16_ult_q:
669; X86-FP16:       # %bb.0:
670; X86-FP16-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
671; X86-FP16-NEXT:    vucomish {{[0-9]+}}(%esp), %xmm0
672; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %eax
673; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %ecx
674; X86-FP16-NEXT:    cmovbl %eax, %ecx
675; X86-FP16-NEXT:    movl (%ecx), %eax
676; X86-FP16-NEXT:    retl
677;
678; X64-FP16-LABEL: test_f16_ult_q:
679; X64-FP16:       # %bb.0:
680; X64-FP16-NEXT:    movl %edi, %eax
681; X64-FP16-NEXT:    vucomish %xmm1, %xmm0
682; X64-FP16-NEXT:    cmovael %esi, %eax
683; X64-FP16-NEXT:    retq
684  %cond = call i1 @llvm.experimental.constrained.fcmp.f16(
685                                               half %f1, half %f2, metadata !"ult",
686                                               metadata !"fpexcept.strict") #0
687  %res = select i1 %cond, i32 %a, i32 %b
688  ret i32 %res
689}
690
691define i32 @test_f16_ule_q(i32 %a, i32 %b, half %f1, half %f2) #0 {
692; SSE2-LABEL: test_f16_ule_q:
693; SSE2:       # %bb.0:
694; SSE2-NEXT:    pushq %rbp
695; SSE2-NEXT:    pushq %rbx
696; SSE2-NEXT:    pushq %rax
697; SSE2-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
698; SSE2-NEXT:    movl %esi, %ebx
699; SSE2-NEXT:    movl %edi, %ebp
700; SSE2-NEXT:    movaps %xmm1, %xmm0
701; SSE2-NEXT:    callq __extendhfsf2@PLT
702; SSE2-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
703; SSE2-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
704; SSE2-NEXT:    # xmm0 = mem[0],zero,zero,zero
705; SSE2-NEXT:    callq __extendhfsf2@PLT
706; SSE2-NEXT:    ucomiss (%rsp), %xmm0 # 4-byte Folded Reload
707; SSE2-NEXT:    cmoval %ebx, %ebp
708; SSE2-NEXT:    movl %ebp, %eax
709; SSE2-NEXT:    addq $8, %rsp
710; SSE2-NEXT:    popq %rbx
711; SSE2-NEXT:    popq %rbp
712; SSE2-NEXT:    retq
713;
714; AVX-LABEL: test_f16_ule_q:
715; AVX:       # %bb.0:
716; AVX-NEXT:    movl %edi, %eax
717; AVX-NEXT:    vpextrw $0, %xmm0, %ecx
718; AVX-NEXT:    vpextrw $0, %xmm1, %edx
719; AVX-NEXT:    movzwl %dx, %edx
720; AVX-NEXT:    vmovd %edx, %xmm0
721; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
722; AVX-NEXT:    movzwl %cx, %ecx
723; AVX-NEXT:    vmovd %ecx, %xmm1
724; AVX-NEXT:    vcvtph2ps %xmm1, %xmm1
725; AVX-NEXT:    vucomiss %xmm0, %xmm1
726; AVX-NEXT:    cmoval %esi, %eax
727; AVX-NEXT:    retq
728;
729; X86-FP16-LABEL: test_f16_ule_q:
730; X86-FP16:       # %bb.0:
731; X86-FP16-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
732; X86-FP16-NEXT:    vucomish {{[0-9]+}}(%esp), %xmm0
733; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %eax
734; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %ecx
735; X86-FP16-NEXT:    cmovbel %eax, %ecx
736; X86-FP16-NEXT:    movl (%ecx), %eax
737; X86-FP16-NEXT:    retl
738;
739; X64-FP16-LABEL: test_f16_ule_q:
740; X64-FP16:       # %bb.0:
741; X64-FP16-NEXT:    movl %edi, %eax
742; X64-FP16-NEXT:    vucomish %xmm1, %xmm0
743; X64-FP16-NEXT:    cmoval %esi, %eax
744; X64-FP16-NEXT:    retq
745  %cond = call i1 @llvm.experimental.constrained.fcmp.f16(
746                                               half %f1, half %f2, metadata !"ule",
747                                               metadata !"fpexcept.strict") #0
748  %res = select i1 %cond, i32 %a, i32 %b
749  ret i32 %res
750}
751
752define i32 @test_f16_une_q(i32 %a, i32 %b, half %f1, half %f2) #0 {
753; SSE2-LABEL: test_f16_une_q:
754; SSE2:       # %bb.0:
755; SSE2-NEXT:    pushq %rbp
756; SSE2-NEXT:    pushq %rbx
757; SSE2-NEXT:    pushq %rax
758; SSE2-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
759; SSE2-NEXT:    movl %esi, %ebx
760; SSE2-NEXT:    movl %edi, %ebp
761; SSE2-NEXT:    movaps %xmm1, %xmm0
762; SSE2-NEXT:    callq __extendhfsf2@PLT
763; SSE2-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
764; SSE2-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
765; SSE2-NEXT:    # xmm0 = mem[0],zero,zero,zero
766; SSE2-NEXT:    callq __extendhfsf2@PLT
767; SSE2-NEXT:    ucomiss (%rsp), %xmm0 # 4-byte Folded Reload
768; SSE2-NEXT:    cmovnel %ebp, %ebx
769; SSE2-NEXT:    cmovpl %ebp, %ebx
770; SSE2-NEXT:    movl %ebx, %eax
771; SSE2-NEXT:    addq $8, %rsp
772; SSE2-NEXT:    popq %rbx
773; SSE2-NEXT:    popq %rbp
774; SSE2-NEXT:    retq
775;
776; AVX-LABEL: test_f16_une_q:
777; AVX:       # %bb.0:
778; AVX-NEXT:    movl %esi, %eax
779; AVX-NEXT:    vpextrw $0, %xmm0, %ecx
780; AVX-NEXT:    vpextrw $0, %xmm1, %edx
781; AVX-NEXT:    movzwl %dx, %edx
782; AVX-NEXT:    vmovd %edx, %xmm0
783; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
784; AVX-NEXT:    movzwl %cx, %ecx
785; AVX-NEXT:    vmovd %ecx, %xmm1
786; AVX-NEXT:    vcvtph2ps %xmm1, %xmm1
787; AVX-NEXT:    vucomiss %xmm0, %xmm1
788; AVX-NEXT:    cmovnel %edi, %eax
789; AVX-NEXT:    cmovpl %edi, %eax
790; AVX-NEXT:    retq
791;
792; X86-FP16-LABEL: test_f16_une_q:
793; X86-FP16:       # %bb.0:
794; X86-FP16-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
795; X86-FP16-NEXT:    vucomish {{[0-9]+}}(%esp), %xmm0
796; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %eax
797; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %ecx
798; X86-FP16-NEXT:    cmovnel %eax, %ecx
799; X86-FP16-NEXT:    cmovpl %eax, %ecx
800; X86-FP16-NEXT:    movl (%ecx), %eax
801; X86-FP16-NEXT:    retl
802;
803; X64-FP16-LABEL: test_f16_une_q:
804; X64-FP16:       # %bb.0:
805; X64-FP16-NEXT:    movl %esi, %eax
806; X64-FP16-NEXT:    vucomish %xmm1, %xmm0
807; X64-FP16-NEXT:    cmovnel %edi, %eax
808; X64-FP16-NEXT:    cmovpl %edi, %eax
809; X64-FP16-NEXT:    retq
810  %cond = call i1 @llvm.experimental.constrained.fcmp.f16(
811                                               half %f1, half %f2, metadata !"une",
812                                               metadata !"fpexcept.strict") #0
813  %res = select i1 %cond, i32 %a, i32 %b
814  ret i32 %res
815}
816
817define i32 @test_f16_uno_q(i32 %a, i32 %b, half %f1, half %f2) #0 {
818; SSE2-LABEL: test_f16_uno_q:
819; SSE2:       # %bb.0:
820; SSE2-NEXT:    pushq %rbp
821; SSE2-NEXT:    pushq %rbx
822; SSE2-NEXT:    pushq %rax
823; SSE2-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
824; SSE2-NEXT:    movl %esi, %ebx
825; SSE2-NEXT:    movl %edi, %ebp
826; SSE2-NEXT:    movaps %xmm1, %xmm0
827; SSE2-NEXT:    callq __extendhfsf2@PLT
828; SSE2-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
829; SSE2-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
830; SSE2-NEXT:    # xmm0 = mem[0],zero,zero,zero
831; SSE2-NEXT:    callq __extendhfsf2@PLT
832; SSE2-NEXT:    ucomiss (%rsp), %xmm0 # 4-byte Folded Reload
833; SSE2-NEXT:    cmovnpl %ebx, %ebp
834; SSE2-NEXT:    movl %ebp, %eax
835; SSE2-NEXT:    addq $8, %rsp
836; SSE2-NEXT:    popq %rbx
837; SSE2-NEXT:    popq %rbp
838; SSE2-NEXT:    retq
839;
840; AVX-LABEL: test_f16_uno_q:
841; AVX:       # %bb.0:
842; AVX-NEXT:    movl %edi, %eax
843; AVX-NEXT:    vpextrw $0, %xmm0, %ecx
844; AVX-NEXT:    vpextrw $0, %xmm1, %edx
845; AVX-NEXT:    movzwl %dx, %edx
846; AVX-NEXT:    vmovd %edx, %xmm0
847; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
848; AVX-NEXT:    movzwl %cx, %ecx
849; AVX-NEXT:    vmovd %ecx, %xmm1
850; AVX-NEXT:    vcvtph2ps %xmm1, %xmm1
851; AVX-NEXT:    vucomiss %xmm0, %xmm1
852; AVX-NEXT:    cmovnpl %esi, %eax
853; AVX-NEXT:    retq
854;
855; X86-FP16-LABEL: test_f16_uno_q:
856; X86-FP16:       # %bb.0:
857; X86-FP16-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
858; X86-FP16-NEXT:    vucomish {{[0-9]+}}(%esp), %xmm0
859; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %eax
860; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %ecx
861; X86-FP16-NEXT:    cmovpl %eax, %ecx
862; X86-FP16-NEXT:    movl (%ecx), %eax
863; X86-FP16-NEXT:    retl
864;
865; X64-FP16-LABEL: test_f16_uno_q:
866; X64-FP16:       # %bb.0:
867; X64-FP16-NEXT:    movl %edi, %eax
868; X64-FP16-NEXT:    vucomish %xmm1, %xmm0
869; X64-FP16-NEXT:    cmovnpl %esi, %eax
870; X64-FP16-NEXT:    retq
871  %cond = call i1 @llvm.experimental.constrained.fcmp.f16(
872                                               half %f1, half %f2, metadata !"uno",
873                                               metadata !"fpexcept.strict") #0
874  %res = select i1 %cond, i32 %a, i32 %b
875  ret i32 %res
876}
877
878define i32 @test_f16_oeq_s(i32 %a, i32 %b, half %f1, half %f2) #0 {
879; SSE2-LABEL: test_f16_oeq_s:
880; SSE2:       # %bb.0:
881; SSE2-NEXT:    pushq %rbp
882; SSE2-NEXT:    pushq %rbx
883; SSE2-NEXT:    pushq %rax
884; SSE2-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
885; SSE2-NEXT:    movl %esi, %ebx
886; SSE2-NEXT:    movl %edi, %ebp
887; SSE2-NEXT:    movaps %xmm1, %xmm0
888; SSE2-NEXT:    callq __extendhfsf2@PLT
889; SSE2-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
890; SSE2-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
891; SSE2-NEXT:    # xmm0 = mem[0],zero,zero,zero
892; SSE2-NEXT:    callq __extendhfsf2@PLT
893; SSE2-NEXT:    comiss (%rsp), %xmm0 # 4-byte Folded Reload
894; SSE2-NEXT:    cmovnel %ebx, %ebp
895; SSE2-NEXT:    cmovpl %ebx, %ebp
896; SSE2-NEXT:    movl %ebp, %eax
897; SSE2-NEXT:    addq $8, %rsp
898; SSE2-NEXT:    popq %rbx
899; SSE2-NEXT:    popq %rbp
900; SSE2-NEXT:    retq
901;
902; AVX-LABEL: test_f16_oeq_s:
903; AVX:       # %bb.0:
904; AVX-NEXT:    movl %edi, %eax
905; AVX-NEXT:    vpextrw $0, %xmm0, %ecx
906; AVX-NEXT:    vpextrw $0, %xmm1, %edx
907; AVX-NEXT:    movzwl %dx, %edx
908; AVX-NEXT:    vmovd %edx, %xmm0
909; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
910; AVX-NEXT:    movzwl %cx, %ecx
911; AVX-NEXT:    vmovd %ecx, %xmm1
912; AVX-NEXT:    vcvtph2ps %xmm1, %xmm1
913; AVX-NEXT:    vcomiss %xmm0, %xmm1
914; AVX-NEXT:    cmovnel %esi, %eax
915; AVX-NEXT:    cmovpl %esi, %eax
916; AVX-NEXT:    retq
917;
918; X86-FP16-LABEL: test_f16_oeq_s:
919; X86-FP16:       # %bb.0:
920; X86-FP16-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
921; X86-FP16-NEXT:    vcomish {{[0-9]+}}(%esp), %xmm0
922; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %eax
923; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %ecx
924; X86-FP16-NEXT:    cmovnel %eax, %ecx
925; X86-FP16-NEXT:    cmovpl %eax, %ecx
926; X86-FP16-NEXT:    movl (%ecx), %eax
927; X86-FP16-NEXT:    retl
928;
929; X64-FP16-LABEL: test_f16_oeq_s:
930; X64-FP16:       # %bb.0:
931; X64-FP16-NEXT:    movl %edi, %eax
932; X64-FP16-NEXT:    vcomish %xmm1, %xmm0
933; X64-FP16-NEXT:    cmovnel %esi, %eax
934; X64-FP16-NEXT:    cmovpl %esi, %eax
935; X64-FP16-NEXT:    retq
936  %cond = call i1 @llvm.experimental.constrained.fcmps.f16(
937                                               half %f1, half %f2, metadata !"oeq",
938                                               metadata !"fpexcept.strict") #0
939  %res = select i1 %cond, i32 %a, i32 %b
940  ret i32 %res
941}
942
943define i32 @test_f16_ogt_s(i32 %a, i32 %b, half %f1, half %f2) #0 {
944; SSE2-LABEL: test_f16_ogt_s:
945; SSE2:       # %bb.0:
946; SSE2-NEXT:    pushq %rbp
947; SSE2-NEXT:    pushq %rbx
948; SSE2-NEXT:    pushq %rax
949; SSE2-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
950; SSE2-NEXT:    movl %esi, %ebx
951; SSE2-NEXT:    movl %edi, %ebp
952; SSE2-NEXT:    movaps %xmm1, %xmm0
953; SSE2-NEXT:    callq __extendhfsf2@PLT
954; SSE2-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
955; SSE2-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
956; SSE2-NEXT:    # xmm0 = mem[0],zero,zero,zero
957; SSE2-NEXT:    callq __extendhfsf2@PLT
958; SSE2-NEXT:    comiss (%rsp), %xmm0 # 4-byte Folded Reload
959; SSE2-NEXT:    cmovbel %ebx, %ebp
960; SSE2-NEXT:    movl %ebp, %eax
961; SSE2-NEXT:    addq $8, %rsp
962; SSE2-NEXT:    popq %rbx
963; SSE2-NEXT:    popq %rbp
964; SSE2-NEXT:    retq
965;
966; AVX-LABEL: test_f16_ogt_s:
967; AVX:       # %bb.0:
968; AVX-NEXT:    movl %edi, %eax
969; AVX-NEXT:    vpextrw $0, %xmm0, %ecx
970; AVX-NEXT:    vpextrw $0, %xmm1, %edx
971; AVX-NEXT:    movzwl %dx, %edx
972; AVX-NEXT:    vmovd %edx, %xmm0
973; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
974; AVX-NEXT:    movzwl %cx, %ecx
975; AVX-NEXT:    vmovd %ecx, %xmm1
976; AVX-NEXT:    vcvtph2ps %xmm1, %xmm1
977; AVX-NEXT:    vcomiss %xmm0, %xmm1
978; AVX-NEXT:    cmovbel %esi, %eax
979; AVX-NEXT:    retq
980;
981; X86-FP16-LABEL: test_f16_ogt_s:
982; X86-FP16:       # %bb.0:
983; X86-FP16-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
984; X86-FP16-NEXT:    vcomish {{[0-9]+}}(%esp), %xmm0
985; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %eax
986; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %ecx
987; X86-FP16-NEXT:    cmoval %eax, %ecx
988; X86-FP16-NEXT:    movl (%ecx), %eax
989; X86-FP16-NEXT:    retl
990;
991; X64-FP16-LABEL: test_f16_ogt_s:
992; X64-FP16:       # %bb.0:
993; X64-FP16-NEXT:    movl %edi, %eax
994; X64-FP16-NEXT:    vcomish %xmm1, %xmm0
995; X64-FP16-NEXT:    cmovbel %esi, %eax
996; X64-FP16-NEXT:    retq
997  %cond = call i1 @llvm.experimental.constrained.fcmps.f16(
998                                               half %f1, half %f2, metadata !"ogt",
999                                               metadata !"fpexcept.strict") #0
1000  %res = select i1 %cond, i32 %a, i32 %b
1001  ret i32 %res
1002}
1003
1004define i32 @test_f16_oge_s(i32 %a, i32 %b, half %f1, half %f2) #0 {
1005; SSE2-LABEL: test_f16_oge_s:
1006; SSE2:       # %bb.0:
1007; SSE2-NEXT:    pushq %rbp
1008; SSE2-NEXT:    pushq %rbx
1009; SSE2-NEXT:    pushq %rax
1010; SSE2-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1011; SSE2-NEXT:    movl %esi, %ebx
1012; SSE2-NEXT:    movl %edi, %ebp
1013; SSE2-NEXT:    movaps %xmm1, %xmm0
1014; SSE2-NEXT:    callq __extendhfsf2@PLT
1015; SSE2-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
1016; SSE2-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1017; SSE2-NEXT:    # xmm0 = mem[0],zero,zero,zero
1018; SSE2-NEXT:    callq __extendhfsf2@PLT
1019; SSE2-NEXT:    comiss (%rsp), %xmm0 # 4-byte Folded Reload
1020; SSE2-NEXT:    cmovbl %ebx, %ebp
1021; SSE2-NEXT:    movl %ebp, %eax
1022; SSE2-NEXT:    addq $8, %rsp
1023; SSE2-NEXT:    popq %rbx
1024; SSE2-NEXT:    popq %rbp
1025; SSE2-NEXT:    retq
1026;
1027; AVX-LABEL: test_f16_oge_s:
1028; AVX:       # %bb.0:
1029; AVX-NEXT:    movl %edi, %eax
1030; AVX-NEXT:    vpextrw $0, %xmm0, %ecx
1031; AVX-NEXT:    vpextrw $0, %xmm1, %edx
1032; AVX-NEXT:    movzwl %dx, %edx
1033; AVX-NEXT:    vmovd %edx, %xmm0
1034; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
1035; AVX-NEXT:    movzwl %cx, %ecx
1036; AVX-NEXT:    vmovd %ecx, %xmm1
1037; AVX-NEXT:    vcvtph2ps %xmm1, %xmm1
1038; AVX-NEXT:    vcomiss %xmm0, %xmm1
1039; AVX-NEXT:    cmovbl %esi, %eax
1040; AVX-NEXT:    retq
1041;
1042; X86-FP16-LABEL: test_f16_oge_s:
1043; X86-FP16:       # %bb.0:
1044; X86-FP16-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
1045; X86-FP16-NEXT:    vcomish {{[0-9]+}}(%esp), %xmm0
1046; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %eax
1047; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %ecx
1048; X86-FP16-NEXT:    cmovael %eax, %ecx
1049; X86-FP16-NEXT:    movl (%ecx), %eax
1050; X86-FP16-NEXT:    retl
1051;
1052; X64-FP16-LABEL: test_f16_oge_s:
1053; X64-FP16:       # %bb.0:
1054; X64-FP16-NEXT:    movl %edi, %eax
1055; X64-FP16-NEXT:    vcomish %xmm1, %xmm0
1056; X64-FP16-NEXT:    cmovbl %esi, %eax
1057; X64-FP16-NEXT:    retq
1058  %cond = call i1 @llvm.experimental.constrained.fcmps.f16(
1059                                               half %f1, half %f2, metadata !"oge",
1060                                               metadata !"fpexcept.strict") #0
1061  %res = select i1 %cond, i32 %a, i32 %b
1062  ret i32 %res
1063}
1064
1065define i32 @test_f16_olt_s(i32 %a, i32 %b, half %f1, half %f2) #0 {
1066; SSE2-LABEL: test_f16_olt_s:
1067; SSE2:       # %bb.0:
1068; SSE2-NEXT:    pushq %rbp
1069; SSE2-NEXT:    pushq %rbx
1070; SSE2-NEXT:    pushq %rax
1071; SSE2-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1072; SSE2-NEXT:    movl %esi, %ebx
1073; SSE2-NEXT:    movl %edi, %ebp
1074; SSE2-NEXT:    movaps %xmm1, %xmm0
1075; SSE2-NEXT:    callq __extendhfsf2@PLT
1076; SSE2-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
1077; SSE2-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1078; SSE2-NEXT:    # xmm0 = mem[0],zero,zero,zero
1079; SSE2-NEXT:    callq __extendhfsf2@PLT
1080; SSE2-NEXT:    movss (%rsp), %xmm1 # 4-byte Reload
1081; SSE2-NEXT:    # xmm1 = mem[0],zero,zero,zero
1082; SSE2-NEXT:    comiss %xmm0, %xmm1
1083; SSE2-NEXT:    cmovbel %ebx, %ebp
1084; SSE2-NEXT:    movl %ebp, %eax
1085; SSE2-NEXT:    addq $8, %rsp
1086; SSE2-NEXT:    popq %rbx
1087; SSE2-NEXT:    popq %rbp
1088; SSE2-NEXT:    retq
1089;
1090; AVX-LABEL: test_f16_olt_s:
1091; AVX:       # %bb.0:
1092; AVX-NEXT:    movl %edi, %eax
1093; AVX-NEXT:    vpextrw $0, %xmm1, %ecx
1094; AVX-NEXT:    vpextrw $0, %xmm0, %edx
1095; AVX-NEXT:    movzwl %dx, %edx
1096; AVX-NEXT:    vmovd %edx, %xmm0
1097; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
1098; AVX-NEXT:    movzwl %cx, %ecx
1099; AVX-NEXT:    vmovd %ecx, %xmm1
1100; AVX-NEXT:    vcvtph2ps %xmm1, %xmm1
1101; AVX-NEXT:    vcomiss %xmm0, %xmm1
1102; AVX-NEXT:    cmovbel %esi, %eax
1103; AVX-NEXT:    retq
1104;
1105; X86-FP16-LABEL: test_f16_olt_s:
1106; X86-FP16:       # %bb.0:
1107; X86-FP16-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
1108; X86-FP16-NEXT:    vcomish {{[0-9]+}}(%esp), %xmm0
1109; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %eax
1110; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %ecx
1111; X86-FP16-NEXT:    cmoval %eax, %ecx
1112; X86-FP16-NEXT:    movl (%ecx), %eax
1113; X86-FP16-NEXT:    retl
1114;
1115; X64-FP16-LABEL: test_f16_olt_s:
1116; X64-FP16:       # %bb.0:
1117; X64-FP16-NEXT:    movl %edi, %eax
1118; X64-FP16-NEXT:    vcomish %xmm0, %xmm1
1119; X64-FP16-NEXT:    cmovbel %esi, %eax
1120; X64-FP16-NEXT:    retq
1121  %cond = call i1 @llvm.experimental.constrained.fcmps.f16(
1122                                               half %f1, half %f2, metadata !"olt",
1123                                               metadata !"fpexcept.strict") #0
1124  %res = select i1 %cond, i32 %a, i32 %b
1125  ret i32 %res
1126}
1127
1128define i32 @test_f16_ole_s(i32 %a, i32 %b, half %f1, half %f2) #0 {
1129; SSE2-LABEL: test_f16_ole_s:
1130; SSE2:       # %bb.0:
1131; SSE2-NEXT:    pushq %rbp
1132; SSE2-NEXT:    pushq %rbx
1133; SSE2-NEXT:    pushq %rax
1134; SSE2-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1135; SSE2-NEXT:    movl %esi, %ebx
1136; SSE2-NEXT:    movl %edi, %ebp
1137; SSE2-NEXT:    movaps %xmm1, %xmm0
1138; SSE2-NEXT:    callq __extendhfsf2@PLT
1139; SSE2-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
1140; SSE2-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1141; SSE2-NEXT:    # xmm0 = mem[0],zero,zero,zero
1142; SSE2-NEXT:    callq __extendhfsf2@PLT
1143; SSE2-NEXT:    movss (%rsp), %xmm1 # 4-byte Reload
1144; SSE2-NEXT:    # xmm1 = mem[0],zero,zero,zero
1145; SSE2-NEXT:    comiss %xmm0, %xmm1
1146; SSE2-NEXT:    cmovbl %ebx, %ebp
1147; SSE2-NEXT:    movl %ebp, %eax
1148; SSE2-NEXT:    addq $8, %rsp
1149; SSE2-NEXT:    popq %rbx
1150; SSE2-NEXT:    popq %rbp
1151; SSE2-NEXT:    retq
1152;
1153; AVX-LABEL: test_f16_ole_s:
1154; AVX:       # %bb.0:
1155; AVX-NEXT:    movl %edi, %eax
1156; AVX-NEXT:    vpextrw $0, %xmm1, %ecx
1157; AVX-NEXT:    vpextrw $0, %xmm0, %edx
1158; AVX-NEXT:    movzwl %dx, %edx
1159; AVX-NEXT:    vmovd %edx, %xmm0
1160; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
1161; AVX-NEXT:    movzwl %cx, %ecx
1162; AVX-NEXT:    vmovd %ecx, %xmm1
1163; AVX-NEXT:    vcvtph2ps %xmm1, %xmm1
1164; AVX-NEXT:    vcomiss %xmm0, %xmm1
1165; AVX-NEXT:    cmovbl %esi, %eax
1166; AVX-NEXT:    retq
1167;
1168; X86-FP16-LABEL: test_f16_ole_s:
1169; X86-FP16:       # %bb.0:
1170; X86-FP16-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
1171; X86-FP16-NEXT:    vcomish {{[0-9]+}}(%esp), %xmm0
1172; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %eax
1173; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %ecx
1174; X86-FP16-NEXT:    cmovael %eax, %ecx
1175; X86-FP16-NEXT:    movl (%ecx), %eax
1176; X86-FP16-NEXT:    retl
1177;
1178; X64-FP16-LABEL: test_f16_ole_s:
1179; X64-FP16:       # %bb.0:
1180; X64-FP16-NEXT:    movl %edi, %eax
1181; X64-FP16-NEXT:    vcomish %xmm0, %xmm1
1182; X64-FP16-NEXT:    cmovbl %esi, %eax
1183; X64-FP16-NEXT:    retq
1184  %cond = call i1 @llvm.experimental.constrained.fcmps.f16(
1185                                               half %f1, half %f2, metadata !"ole",
1186                                               metadata !"fpexcept.strict") #0
1187  %res = select i1 %cond, i32 %a, i32 %b
1188  ret i32 %res
1189}
1190
1191define i32 @test_f16_one_s(i32 %a, i32 %b, half %f1, half %f2) #0 {
1192; SSE2-LABEL: test_f16_one_s:
1193; SSE2:       # %bb.0:
1194; SSE2-NEXT:    pushq %rbp
1195; SSE2-NEXT:    pushq %rbx
1196; SSE2-NEXT:    pushq %rax
1197; SSE2-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1198; SSE2-NEXT:    movl %esi, %ebx
1199; SSE2-NEXT:    movl %edi, %ebp
1200; SSE2-NEXT:    movaps %xmm1, %xmm0
1201; SSE2-NEXT:    callq __extendhfsf2@PLT
1202; SSE2-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
1203; SSE2-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1204; SSE2-NEXT:    # xmm0 = mem[0],zero,zero,zero
1205; SSE2-NEXT:    callq __extendhfsf2@PLT
1206; SSE2-NEXT:    comiss (%rsp), %xmm0 # 4-byte Folded Reload
1207; SSE2-NEXT:    cmovel %ebx, %ebp
1208; SSE2-NEXT:    movl %ebp, %eax
1209; SSE2-NEXT:    addq $8, %rsp
1210; SSE2-NEXT:    popq %rbx
1211; SSE2-NEXT:    popq %rbp
1212; SSE2-NEXT:    retq
1213;
1214; AVX-LABEL: test_f16_one_s:
1215; AVX:       # %bb.0:
1216; AVX-NEXT:    movl %edi, %eax
1217; AVX-NEXT:    vpextrw $0, %xmm0, %ecx
1218; AVX-NEXT:    vpextrw $0, %xmm1, %edx
1219; AVX-NEXT:    movzwl %dx, %edx
1220; AVX-NEXT:    vmovd %edx, %xmm0
1221; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
1222; AVX-NEXT:    movzwl %cx, %ecx
1223; AVX-NEXT:    vmovd %ecx, %xmm1
1224; AVX-NEXT:    vcvtph2ps %xmm1, %xmm1
1225; AVX-NEXT:    vcomiss %xmm0, %xmm1
1226; AVX-NEXT:    cmovel %esi, %eax
1227; AVX-NEXT:    retq
1228;
1229; X86-FP16-LABEL: test_f16_one_s:
1230; X86-FP16:       # %bb.0:
1231; X86-FP16-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
1232; X86-FP16-NEXT:    vcomish {{[0-9]+}}(%esp), %xmm0
1233; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %eax
1234; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %ecx
1235; X86-FP16-NEXT:    cmovnel %eax, %ecx
1236; X86-FP16-NEXT:    movl (%ecx), %eax
1237; X86-FP16-NEXT:    retl
1238;
1239; X64-FP16-LABEL: test_f16_one_s:
1240; X64-FP16:       # %bb.0:
1241; X64-FP16-NEXT:    movl %edi, %eax
1242; X64-FP16-NEXT:    vcomish %xmm1, %xmm0
1243; X64-FP16-NEXT:    cmovel %esi, %eax
1244; X64-FP16-NEXT:    retq
1245  %cond = call i1 @llvm.experimental.constrained.fcmps.f16(
1246                                               half %f1, half %f2, metadata !"one",
1247                                               metadata !"fpexcept.strict") #0
1248  %res = select i1 %cond, i32 %a, i32 %b
1249  ret i32 %res
1250}
1251
1252define i32 @test_f16_ord_s(i32 %a, i32 %b, half %f1, half %f2) #0 {
1253; SSE2-LABEL: test_f16_ord_s:
1254; SSE2:       # %bb.0:
1255; SSE2-NEXT:    pushq %rbp
1256; SSE2-NEXT:    pushq %rbx
1257; SSE2-NEXT:    pushq %rax
1258; SSE2-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1259; SSE2-NEXT:    movl %esi, %ebx
1260; SSE2-NEXT:    movl %edi, %ebp
1261; SSE2-NEXT:    movaps %xmm1, %xmm0
1262; SSE2-NEXT:    callq __extendhfsf2@PLT
1263; SSE2-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
1264; SSE2-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1265; SSE2-NEXT:    # xmm0 = mem[0],zero,zero,zero
1266; SSE2-NEXT:    callq __extendhfsf2@PLT
1267; SSE2-NEXT:    comiss (%rsp), %xmm0 # 4-byte Folded Reload
1268; SSE2-NEXT:    cmovpl %ebx, %ebp
1269; SSE2-NEXT:    movl %ebp, %eax
1270; SSE2-NEXT:    addq $8, %rsp
1271; SSE2-NEXT:    popq %rbx
1272; SSE2-NEXT:    popq %rbp
1273; SSE2-NEXT:    retq
1274;
1275; AVX-LABEL: test_f16_ord_s:
1276; AVX:       # %bb.0:
1277; AVX-NEXT:    movl %edi, %eax
1278; AVX-NEXT:    vpextrw $0, %xmm0, %ecx
1279; AVX-NEXT:    vpextrw $0, %xmm1, %edx
1280; AVX-NEXT:    movzwl %dx, %edx
1281; AVX-NEXT:    vmovd %edx, %xmm0
1282; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
1283; AVX-NEXT:    movzwl %cx, %ecx
1284; AVX-NEXT:    vmovd %ecx, %xmm1
1285; AVX-NEXT:    vcvtph2ps %xmm1, %xmm1
1286; AVX-NEXT:    vcomiss %xmm0, %xmm1
1287; AVX-NEXT:    cmovpl %esi, %eax
1288; AVX-NEXT:    retq
1289;
1290; X86-FP16-LABEL: test_f16_ord_s:
1291; X86-FP16:       # %bb.0:
1292; X86-FP16-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
1293; X86-FP16-NEXT:    vcomish {{[0-9]+}}(%esp), %xmm0
1294; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %eax
1295; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %ecx
1296; X86-FP16-NEXT:    cmovnpl %eax, %ecx
1297; X86-FP16-NEXT:    movl (%ecx), %eax
1298; X86-FP16-NEXT:    retl
1299;
1300; X64-FP16-LABEL: test_f16_ord_s:
1301; X64-FP16:       # %bb.0:
1302; X64-FP16-NEXT:    movl %edi, %eax
1303; X64-FP16-NEXT:    vcomish %xmm1, %xmm0
1304; X64-FP16-NEXT:    cmovpl %esi, %eax
1305; X64-FP16-NEXT:    retq
1306  %cond = call i1 @llvm.experimental.constrained.fcmps.f16(
1307                                               half %f1, half %f2, metadata !"ord",
1308                                               metadata !"fpexcept.strict") #0
1309  %res = select i1 %cond, i32 %a, i32 %b
1310  ret i32 %res
1311}
1312
1313define i32 @test_f16_ueq_s(i32 %a, i32 %b, half %f1, half %f2) #0 {
1314; SSE2-LABEL: test_f16_ueq_s:
1315; SSE2:       # %bb.0:
1316; SSE2-NEXT:    pushq %rbp
1317; SSE2-NEXT:    pushq %rbx
1318; SSE2-NEXT:    pushq %rax
1319; SSE2-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1320; SSE2-NEXT:    movl %esi, %ebx
1321; SSE2-NEXT:    movl %edi, %ebp
1322; SSE2-NEXT:    movaps %xmm1, %xmm0
1323; SSE2-NEXT:    callq __extendhfsf2@PLT
1324; SSE2-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
1325; SSE2-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1326; SSE2-NEXT:    # xmm0 = mem[0],zero,zero,zero
1327; SSE2-NEXT:    callq __extendhfsf2@PLT
1328; SSE2-NEXT:    comiss (%rsp), %xmm0 # 4-byte Folded Reload
1329; SSE2-NEXT:    cmovnel %ebx, %ebp
1330; SSE2-NEXT:    movl %ebp, %eax
1331; SSE2-NEXT:    addq $8, %rsp
1332; SSE2-NEXT:    popq %rbx
1333; SSE2-NEXT:    popq %rbp
1334; SSE2-NEXT:    retq
1335;
1336; AVX-LABEL: test_f16_ueq_s:
1337; AVX:       # %bb.0:
1338; AVX-NEXT:    movl %edi, %eax
1339; AVX-NEXT:    vpextrw $0, %xmm0, %ecx
1340; AVX-NEXT:    vpextrw $0, %xmm1, %edx
1341; AVX-NEXT:    movzwl %dx, %edx
1342; AVX-NEXT:    vmovd %edx, %xmm0
1343; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
1344; AVX-NEXT:    movzwl %cx, %ecx
1345; AVX-NEXT:    vmovd %ecx, %xmm1
1346; AVX-NEXT:    vcvtph2ps %xmm1, %xmm1
1347; AVX-NEXT:    vcomiss %xmm0, %xmm1
1348; AVX-NEXT:    cmovnel %esi, %eax
1349; AVX-NEXT:    retq
1350;
1351; X86-FP16-LABEL: test_f16_ueq_s:
1352; X86-FP16:       # %bb.0:
1353; X86-FP16-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
1354; X86-FP16-NEXT:    vcomish {{[0-9]+}}(%esp), %xmm0
1355; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %eax
1356; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %ecx
1357; X86-FP16-NEXT:    cmovel %eax, %ecx
1358; X86-FP16-NEXT:    movl (%ecx), %eax
1359; X86-FP16-NEXT:    retl
1360;
1361; X64-FP16-LABEL: test_f16_ueq_s:
1362; X64-FP16:       # %bb.0:
1363; X64-FP16-NEXT:    movl %edi, %eax
1364; X64-FP16-NEXT:    vcomish %xmm1, %xmm0
1365; X64-FP16-NEXT:    cmovnel %esi, %eax
1366; X64-FP16-NEXT:    retq
1367  %cond = call i1 @llvm.experimental.constrained.fcmps.f16(
1368                                               half %f1, half %f2, metadata !"ueq",
1369                                               metadata !"fpexcept.strict") #0
1370  %res = select i1 %cond, i32 %a, i32 %b
1371  ret i32 %res
1372}
1373
1374define i32 @test_f16_ugt_s(i32 %a, i32 %b, half %f1, half %f2) #0 {
1375; SSE2-LABEL: test_f16_ugt_s:
1376; SSE2:       # %bb.0:
1377; SSE2-NEXT:    pushq %rbp
1378; SSE2-NEXT:    pushq %rbx
1379; SSE2-NEXT:    pushq %rax
1380; SSE2-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1381; SSE2-NEXT:    movl %esi, %ebx
1382; SSE2-NEXT:    movl %edi, %ebp
1383; SSE2-NEXT:    movaps %xmm1, %xmm0
1384; SSE2-NEXT:    callq __extendhfsf2@PLT
1385; SSE2-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
1386; SSE2-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1387; SSE2-NEXT:    # xmm0 = mem[0],zero,zero,zero
1388; SSE2-NEXT:    callq __extendhfsf2@PLT
1389; SSE2-NEXT:    movss (%rsp), %xmm1 # 4-byte Reload
1390; SSE2-NEXT:    # xmm1 = mem[0],zero,zero,zero
1391; SSE2-NEXT:    comiss %xmm0, %xmm1
1392; SSE2-NEXT:    cmovael %ebx, %ebp
1393; SSE2-NEXT:    movl %ebp, %eax
1394; SSE2-NEXT:    addq $8, %rsp
1395; SSE2-NEXT:    popq %rbx
1396; SSE2-NEXT:    popq %rbp
1397; SSE2-NEXT:    retq
1398;
1399; AVX-LABEL: test_f16_ugt_s:
1400; AVX:       # %bb.0:
1401; AVX-NEXT:    movl %edi, %eax
1402; AVX-NEXT:    vpextrw $0, %xmm1, %ecx
1403; AVX-NEXT:    vpextrw $0, %xmm0, %edx
1404; AVX-NEXT:    movzwl %dx, %edx
1405; AVX-NEXT:    vmovd %edx, %xmm0
1406; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
1407; AVX-NEXT:    movzwl %cx, %ecx
1408; AVX-NEXT:    vmovd %ecx, %xmm1
1409; AVX-NEXT:    vcvtph2ps %xmm1, %xmm1
1410; AVX-NEXT:    vcomiss %xmm0, %xmm1
1411; AVX-NEXT:    cmovael %esi, %eax
1412; AVX-NEXT:    retq
1413;
1414; X86-FP16-LABEL: test_f16_ugt_s:
1415; X86-FP16:       # %bb.0:
1416; X86-FP16-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
1417; X86-FP16-NEXT:    vcomish {{[0-9]+}}(%esp), %xmm0
1418; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %eax
1419; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %ecx
1420; X86-FP16-NEXT:    cmovbl %eax, %ecx
1421; X86-FP16-NEXT:    movl (%ecx), %eax
1422; X86-FP16-NEXT:    retl
1423;
1424; X64-FP16-LABEL: test_f16_ugt_s:
1425; X64-FP16:       # %bb.0:
1426; X64-FP16-NEXT:    movl %edi, %eax
1427; X64-FP16-NEXT:    vcomish %xmm0, %xmm1
1428; X64-FP16-NEXT:    cmovael %esi, %eax
1429; X64-FP16-NEXT:    retq
1430  %cond = call i1 @llvm.experimental.constrained.fcmps.f16(
1431                                               half %f1, half %f2, metadata !"ugt",
1432                                               metadata !"fpexcept.strict") #0
1433  %res = select i1 %cond, i32 %a, i32 %b
1434  ret i32 %res
1435}
1436
1437define i32 @test_f16_uge_s(i32 %a, i32 %b, half %f1, half %f2) #0 {
1438; SSE2-LABEL: test_f16_uge_s:
1439; SSE2:       # %bb.0:
1440; SSE2-NEXT:    pushq %rbp
1441; SSE2-NEXT:    pushq %rbx
1442; SSE2-NEXT:    pushq %rax
1443; SSE2-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1444; SSE2-NEXT:    movl %esi, %ebx
1445; SSE2-NEXT:    movl %edi, %ebp
1446; SSE2-NEXT:    movaps %xmm1, %xmm0
1447; SSE2-NEXT:    callq __extendhfsf2@PLT
1448; SSE2-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
1449; SSE2-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1450; SSE2-NEXT:    # xmm0 = mem[0],zero,zero,zero
1451; SSE2-NEXT:    callq __extendhfsf2@PLT
1452; SSE2-NEXT:    movss (%rsp), %xmm1 # 4-byte Reload
1453; SSE2-NEXT:    # xmm1 = mem[0],zero,zero,zero
1454; SSE2-NEXT:    comiss %xmm0, %xmm1
1455; SSE2-NEXT:    cmoval %ebx, %ebp
1456; SSE2-NEXT:    movl %ebp, %eax
1457; SSE2-NEXT:    addq $8, %rsp
1458; SSE2-NEXT:    popq %rbx
1459; SSE2-NEXT:    popq %rbp
1460; SSE2-NEXT:    retq
1461;
1462; AVX-LABEL: test_f16_uge_s:
1463; AVX:       # %bb.0:
1464; AVX-NEXT:    movl %edi, %eax
1465; AVX-NEXT:    vpextrw $0, %xmm1, %ecx
1466; AVX-NEXT:    vpextrw $0, %xmm0, %edx
1467; AVX-NEXT:    movzwl %dx, %edx
1468; AVX-NEXT:    vmovd %edx, %xmm0
1469; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
1470; AVX-NEXT:    movzwl %cx, %ecx
1471; AVX-NEXT:    vmovd %ecx, %xmm1
1472; AVX-NEXT:    vcvtph2ps %xmm1, %xmm1
1473; AVX-NEXT:    vcomiss %xmm0, %xmm1
1474; AVX-NEXT:    cmoval %esi, %eax
1475; AVX-NEXT:    retq
1476;
1477; X86-FP16-LABEL: test_f16_uge_s:
1478; X86-FP16:       # %bb.0:
1479; X86-FP16-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
1480; X86-FP16-NEXT:    vcomish {{[0-9]+}}(%esp), %xmm0
1481; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %eax
1482; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %ecx
1483; X86-FP16-NEXT:    cmovbel %eax, %ecx
1484; X86-FP16-NEXT:    movl (%ecx), %eax
1485; X86-FP16-NEXT:    retl
1486;
1487; X64-FP16-LABEL: test_f16_uge_s:
1488; X64-FP16:       # %bb.0:
1489; X64-FP16-NEXT:    movl %edi, %eax
1490; X64-FP16-NEXT:    vcomish %xmm0, %xmm1
1491; X64-FP16-NEXT:    cmoval %esi, %eax
1492; X64-FP16-NEXT:    retq
1493  %cond = call i1 @llvm.experimental.constrained.fcmps.f16(
1494                                               half %f1, half %f2, metadata !"uge",
1495                                               metadata !"fpexcept.strict") #0
1496  %res = select i1 %cond, i32 %a, i32 %b
1497  ret i32 %res
1498}
1499
1500define i32 @test_f16_ult_s(i32 %a, i32 %b, half %f1, half %f2) #0 {
1501; SSE2-LABEL: test_f16_ult_s:
1502; SSE2:       # %bb.0:
1503; SSE2-NEXT:    pushq %rbp
1504; SSE2-NEXT:    pushq %rbx
1505; SSE2-NEXT:    pushq %rax
1506; SSE2-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1507; SSE2-NEXT:    movl %esi, %ebx
1508; SSE2-NEXT:    movl %edi, %ebp
1509; SSE2-NEXT:    movaps %xmm1, %xmm0
1510; SSE2-NEXT:    callq __extendhfsf2@PLT
1511; SSE2-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
1512; SSE2-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1513; SSE2-NEXT:    # xmm0 = mem[0],zero,zero,zero
1514; SSE2-NEXT:    callq __extendhfsf2@PLT
1515; SSE2-NEXT:    comiss (%rsp), %xmm0 # 4-byte Folded Reload
1516; SSE2-NEXT:    cmovael %ebx, %ebp
1517; SSE2-NEXT:    movl %ebp, %eax
1518; SSE2-NEXT:    addq $8, %rsp
1519; SSE2-NEXT:    popq %rbx
1520; SSE2-NEXT:    popq %rbp
1521; SSE2-NEXT:    retq
1522;
1523; AVX-LABEL: test_f16_ult_s:
1524; AVX:       # %bb.0:
1525; AVX-NEXT:    movl %edi, %eax
1526; AVX-NEXT:    vpextrw $0, %xmm0, %ecx
1527; AVX-NEXT:    vpextrw $0, %xmm1, %edx
1528; AVX-NEXT:    movzwl %dx, %edx
1529; AVX-NEXT:    vmovd %edx, %xmm0
1530; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
1531; AVX-NEXT:    movzwl %cx, %ecx
1532; AVX-NEXT:    vmovd %ecx, %xmm1
1533; AVX-NEXT:    vcvtph2ps %xmm1, %xmm1
1534; AVX-NEXT:    vcomiss %xmm0, %xmm1
1535; AVX-NEXT:    cmovael %esi, %eax
1536; AVX-NEXT:    retq
1537;
1538; X86-FP16-LABEL: test_f16_ult_s:
1539; X86-FP16:       # %bb.0:
1540; X86-FP16-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
1541; X86-FP16-NEXT:    vcomish {{[0-9]+}}(%esp), %xmm0
1542; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %eax
1543; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %ecx
1544; X86-FP16-NEXT:    cmovbl %eax, %ecx
1545; X86-FP16-NEXT:    movl (%ecx), %eax
1546; X86-FP16-NEXT:    retl
1547;
1548; X64-FP16-LABEL: test_f16_ult_s:
1549; X64-FP16:       # %bb.0:
1550; X64-FP16-NEXT:    movl %edi, %eax
1551; X64-FP16-NEXT:    vcomish %xmm1, %xmm0
1552; X64-FP16-NEXT:    cmovael %esi, %eax
1553; X64-FP16-NEXT:    retq
1554  %cond = call i1 @llvm.experimental.constrained.fcmps.f16(
1555                                               half %f1, half %f2, metadata !"ult",
1556                                               metadata !"fpexcept.strict") #0
1557  %res = select i1 %cond, i32 %a, i32 %b
1558  ret i32 %res
1559}
1560
1561define i32 @test_f16_ule_s(i32 %a, i32 %b, half %f1, half %f2) #0 {
1562; SSE2-LABEL: test_f16_ule_s:
1563; SSE2:       # %bb.0:
1564; SSE2-NEXT:    pushq %rbp
1565; SSE2-NEXT:    pushq %rbx
1566; SSE2-NEXT:    pushq %rax
1567; SSE2-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1568; SSE2-NEXT:    movl %esi, %ebx
1569; SSE2-NEXT:    movl %edi, %ebp
1570; SSE2-NEXT:    movaps %xmm1, %xmm0
1571; SSE2-NEXT:    callq __extendhfsf2@PLT
1572; SSE2-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
1573; SSE2-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1574; SSE2-NEXT:    # xmm0 = mem[0],zero,zero,zero
1575; SSE2-NEXT:    callq __extendhfsf2@PLT
1576; SSE2-NEXT:    comiss (%rsp), %xmm0 # 4-byte Folded Reload
1577; SSE2-NEXT:    cmoval %ebx, %ebp
1578; SSE2-NEXT:    movl %ebp, %eax
1579; SSE2-NEXT:    addq $8, %rsp
1580; SSE2-NEXT:    popq %rbx
1581; SSE2-NEXT:    popq %rbp
1582; SSE2-NEXT:    retq
1583;
1584; AVX-LABEL: test_f16_ule_s:
1585; AVX:       # %bb.0:
1586; AVX-NEXT:    movl %edi, %eax
1587; AVX-NEXT:    vpextrw $0, %xmm0, %ecx
1588; AVX-NEXT:    vpextrw $0, %xmm1, %edx
1589; AVX-NEXT:    movzwl %dx, %edx
1590; AVX-NEXT:    vmovd %edx, %xmm0
1591; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
1592; AVX-NEXT:    movzwl %cx, %ecx
1593; AVX-NEXT:    vmovd %ecx, %xmm1
1594; AVX-NEXT:    vcvtph2ps %xmm1, %xmm1
1595; AVX-NEXT:    vcomiss %xmm0, %xmm1
1596; AVX-NEXT:    cmoval %esi, %eax
1597; AVX-NEXT:    retq
1598;
1599; X86-FP16-LABEL: test_f16_ule_s:
1600; X86-FP16:       # %bb.0:
1601; X86-FP16-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
1602; X86-FP16-NEXT:    vcomish {{[0-9]+}}(%esp), %xmm0
1603; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %eax
1604; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %ecx
1605; X86-FP16-NEXT:    cmovbel %eax, %ecx
1606; X86-FP16-NEXT:    movl (%ecx), %eax
1607; X86-FP16-NEXT:    retl
1608;
1609; X64-FP16-LABEL: test_f16_ule_s:
1610; X64-FP16:       # %bb.0:
1611; X64-FP16-NEXT:    movl %edi, %eax
1612; X64-FP16-NEXT:    vcomish %xmm1, %xmm0
1613; X64-FP16-NEXT:    cmoval %esi, %eax
1614; X64-FP16-NEXT:    retq
1615  %cond = call i1 @llvm.experimental.constrained.fcmps.f16(
1616                                               half %f1, half %f2, metadata !"ule",
1617                                               metadata !"fpexcept.strict") #0
1618  %res = select i1 %cond, i32 %a, i32 %b
1619  ret i32 %res
1620}
1621
1622define i32 @test_f16_une_s(i32 %a, i32 %b, half %f1, half %f2) #0 {
1623; SSE2-LABEL: test_f16_une_s:
1624; SSE2:       # %bb.0:
1625; SSE2-NEXT:    pushq %rbp
1626; SSE2-NEXT:    pushq %rbx
1627; SSE2-NEXT:    pushq %rax
1628; SSE2-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1629; SSE2-NEXT:    movl %esi, %ebx
1630; SSE2-NEXT:    movl %edi, %ebp
1631; SSE2-NEXT:    movaps %xmm1, %xmm0
1632; SSE2-NEXT:    callq __extendhfsf2@PLT
1633; SSE2-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
1634; SSE2-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1635; SSE2-NEXT:    # xmm0 = mem[0],zero,zero,zero
1636; SSE2-NEXT:    callq __extendhfsf2@PLT
1637; SSE2-NEXT:    comiss (%rsp), %xmm0 # 4-byte Folded Reload
1638; SSE2-NEXT:    cmovnel %ebp, %ebx
1639; SSE2-NEXT:    cmovpl %ebp, %ebx
1640; SSE2-NEXT:    movl %ebx, %eax
1641; SSE2-NEXT:    addq $8, %rsp
1642; SSE2-NEXT:    popq %rbx
1643; SSE2-NEXT:    popq %rbp
1644; SSE2-NEXT:    retq
1645;
1646; AVX-LABEL: test_f16_une_s:
1647; AVX:       # %bb.0:
1648; AVX-NEXT:    movl %esi, %eax
1649; AVX-NEXT:    vpextrw $0, %xmm0, %ecx
1650; AVX-NEXT:    vpextrw $0, %xmm1, %edx
1651; AVX-NEXT:    movzwl %dx, %edx
1652; AVX-NEXT:    vmovd %edx, %xmm0
1653; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
1654; AVX-NEXT:    movzwl %cx, %ecx
1655; AVX-NEXT:    vmovd %ecx, %xmm1
1656; AVX-NEXT:    vcvtph2ps %xmm1, %xmm1
1657; AVX-NEXT:    vcomiss %xmm0, %xmm1
1658; AVX-NEXT:    cmovnel %edi, %eax
1659; AVX-NEXT:    cmovpl %edi, %eax
1660; AVX-NEXT:    retq
1661;
1662; X86-FP16-LABEL: test_f16_une_s:
1663; X86-FP16:       # %bb.0:
1664; X86-FP16-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
1665; X86-FP16-NEXT:    vcomish {{[0-9]+}}(%esp), %xmm0
1666; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %eax
1667; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %ecx
1668; X86-FP16-NEXT:    cmovnel %eax, %ecx
1669; X86-FP16-NEXT:    cmovpl %eax, %ecx
1670; X86-FP16-NEXT:    movl (%ecx), %eax
1671; X86-FP16-NEXT:    retl
1672;
1673; X64-FP16-LABEL: test_f16_une_s:
1674; X64-FP16:       # %bb.0:
1675; X64-FP16-NEXT:    movl %esi, %eax
1676; X64-FP16-NEXT:    vcomish %xmm1, %xmm0
1677; X64-FP16-NEXT:    cmovnel %edi, %eax
1678; X64-FP16-NEXT:    cmovpl %edi, %eax
1679; X64-FP16-NEXT:    retq
1680  %cond = call i1 @llvm.experimental.constrained.fcmps.f16(
1681                                               half %f1, half %f2, metadata !"une",
1682                                               metadata !"fpexcept.strict") #0
1683  %res = select i1 %cond, i32 %a, i32 %b
1684  ret i32 %res
1685}
1686
1687define i32 @test_f16_uno_s(i32 %a, i32 %b, half %f1, half %f2) #0 {
1688; SSE2-LABEL: test_f16_uno_s:
1689; SSE2:       # %bb.0:
1690; SSE2-NEXT:    pushq %rbp
1691; SSE2-NEXT:    pushq %rbx
1692; SSE2-NEXT:    pushq %rax
1693; SSE2-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1694; SSE2-NEXT:    movl %esi, %ebx
1695; SSE2-NEXT:    movl %edi, %ebp
1696; SSE2-NEXT:    movaps %xmm1, %xmm0
1697; SSE2-NEXT:    callq __extendhfsf2@PLT
1698; SSE2-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
1699; SSE2-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1700; SSE2-NEXT:    # xmm0 = mem[0],zero,zero,zero
1701; SSE2-NEXT:    callq __extendhfsf2@PLT
1702; SSE2-NEXT:    comiss (%rsp), %xmm0 # 4-byte Folded Reload
1703; SSE2-NEXT:    cmovnpl %ebx, %ebp
1704; SSE2-NEXT:    movl %ebp, %eax
1705; SSE2-NEXT:    addq $8, %rsp
1706; SSE2-NEXT:    popq %rbx
1707; SSE2-NEXT:    popq %rbp
1708; SSE2-NEXT:    retq
1709;
1710; AVX-LABEL: test_f16_uno_s:
1711; AVX:       # %bb.0:
1712; AVX-NEXT:    movl %edi, %eax
1713; AVX-NEXT:    vpextrw $0, %xmm0, %ecx
1714; AVX-NEXT:    vpextrw $0, %xmm1, %edx
1715; AVX-NEXT:    movzwl %dx, %edx
1716; AVX-NEXT:    vmovd %edx, %xmm0
1717; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
1718; AVX-NEXT:    movzwl %cx, %ecx
1719; AVX-NEXT:    vmovd %ecx, %xmm1
1720; AVX-NEXT:    vcvtph2ps %xmm1, %xmm1
1721; AVX-NEXT:    vcomiss %xmm0, %xmm1
1722; AVX-NEXT:    cmovnpl %esi, %eax
1723; AVX-NEXT:    retq
1724;
1725; X86-FP16-LABEL: test_f16_uno_s:
1726; X86-FP16:       # %bb.0:
1727; X86-FP16-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
1728; X86-FP16-NEXT:    vcomish {{[0-9]+}}(%esp), %xmm0
1729; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %eax
1730; X86-FP16-NEXT:    leal {{[0-9]+}}(%esp), %ecx
1731; X86-FP16-NEXT:    cmovpl %eax, %ecx
1732; X86-FP16-NEXT:    movl (%ecx), %eax
1733; X86-FP16-NEXT:    retl
1734;
1735; X64-FP16-LABEL: test_f16_uno_s:
1736; X64-FP16:       # %bb.0:
1737; X64-FP16-NEXT:    movl %edi, %eax
1738; X64-FP16-NEXT:    vcomish %xmm1, %xmm0
1739; X64-FP16-NEXT:    cmovnpl %esi, %eax
1740; X64-FP16-NEXT:    retq
1741  %cond = call i1 @llvm.experimental.constrained.fcmps.f16(
1742                                               half %f1, half %f2, metadata !"uno",
1743                                               metadata !"fpexcept.strict") #0
1744  %res = select i1 %cond, i32 %a, i32 %b
1745  ret i32 %res
1746}
1747
1748define void @foo(half %0, half %1) #0 {
1749; SSE2-LABEL: foo:
1750; SSE2:       # %bb.0:
1751; SSE2-NEXT:    pushq %rax
1752; SSE2-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1753; SSE2-NEXT:    movaps %xmm1, %xmm0
1754; SSE2-NEXT:    callq __extendhfsf2@PLT
1755; SSE2-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
1756; SSE2-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1757; SSE2-NEXT:    # xmm0 = mem[0],zero,zero,zero
1758; SSE2-NEXT:    callq __extendhfsf2@PLT
1759; SSE2-NEXT:    ucomiss (%rsp), %xmm0 # 4-byte Folded Reload
1760; SSE2-NEXT:    jbe .LBB28_1
1761; SSE2-NEXT:  # %bb.2:
1762; SSE2-NEXT:    popq %rax
1763; SSE2-NEXT:    jmp bar@PLT # TAILCALL
1764; SSE2-NEXT:  .LBB28_1:
1765; SSE2-NEXT:    popq %rax
1766; SSE2-NEXT:    retq
1767;
1768; AVX-LABEL: foo:
1769; AVX:       # %bb.0:
1770; AVX-NEXT:    vpextrw $0, %xmm0, %eax
1771; AVX-NEXT:    vpextrw $0, %xmm1, %ecx
1772; AVX-NEXT:    movzwl %cx, %ecx
1773; AVX-NEXT:    vmovd %ecx, %xmm0
1774; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
1775; AVX-NEXT:    movzwl %ax, %eax
1776; AVX-NEXT:    vmovd %eax, %xmm1
1777; AVX-NEXT:    vcvtph2ps %xmm1, %xmm1
1778; AVX-NEXT:    vucomiss %xmm0, %xmm1
1779; AVX-NEXT:    ja bar@PLT # TAILCALL
1780; AVX-NEXT:  # %bb.1:
1781; AVX-NEXT:    retq
1782;
1783; X86-FP16-LABEL: foo:
1784; X86-FP16:       # %bb.0:
1785; X86-FP16-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
1786; X86-FP16-NEXT:    vucomish {{[0-9]+}}(%esp), %xmm0
1787; X86-FP16-NEXT:    ja bar@PLT # TAILCALL
1788; X86-FP16-NEXT:  # %bb.1:
1789; X86-FP16-NEXT:    retl
1790;
1791; X64-FP16-LABEL: foo:
1792; X64-FP16:       # %bb.0:
1793; X64-FP16-NEXT:    vucomish %xmm1, %xmm0
1794; X64-FP16-NEXT:    ja bar@PLT # TAILCALL
1795; X64-FP16-NEXT:  # %bb.1:
1796; X64-FP16-NEXT:    retq
1797  %3 = call i1 @llvm.experimental.constrained.fcmp.f16( half %0, half %1, metadata !"ogt", metadata !"fpexcept.strict") #0
1798  br i1 %3, label %4, label %5
1799
18004:                                                ; preds = %2
1801  tail call void @bar() #0
1802  br label %5
1803
18045:                                                ; preds = %4, %2
1805  ret void
1806}
1807declare void @bar()
1808
1809attributes #0 = { strictfp nounwind }
1810
1811declare i1 @llvm.experimental.constrained.fcmp.f16(half, half, metadata, metadata)
1812declare i1 @llvm.experimental.constrained.fcmps.f16(half, half, metadata, metadata)
1813