xref: /llvm-project/llvm/test/CodeGen/X86/vec-strict-cmp-128-fp16.ll (revision b53046122fc5b4f087bfeee84ee6089b66fa7390)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X64
4
5define <8 x i16> @test_v8f16_oeq_q(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 {
6; X86-LABEL: test_v8f16_oeq_q:
7; X86:       # %bb.0:
8; X86-NEXT:    pushl %ebp
9; X86-NEXT:    movl %esp, %ebp
10; X86-NEXT:    andl $-16, %esp
11; X86-NEXT:    subl $16, %esp
12; X86-NEXT:    vcmpeqph 8(%ebp), %xmm2, %k1
13; X86-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
14; X86-NEXT:    movl %ebp, %esp
15; X86-NEXT:    popl %ebp
16; X86-NEXT:    retl
17;
18; X64-LABEL: test_v8f16_oeq_q:
19; X64:       # %bb.0:
20; X64-NEXT:    vcmpeqph %xmm3, %xmm2, %k1
21; X64-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
22; X64-NEXT:    retq
23  %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f16(
24                                               <8 x half> %f1, <8 x half> %f2, metadata !"oeq",
25                                               metadata !"fpexcept.strict") #0
26  %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b
27  ret <8 x i16> %res
28}
29
30define <8 x i16> @test_v8f16_ogt_q(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 {
31; X86-LABEL: test_v8f16_ogt_q:
32; X86:       # %bb.0:
33; X86-NEXT:    pushl %ebp
34; X86-NEXT:    movl %esp, %ebp
35; X86-NEXT:    andl $-16, %esp
36; X86-NEXT:    subl $16, %esp
37; X86-NEXT:    vcmpgt_oqph 8(%ebp), %xmm2, %k1
38; X86-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
39; X86-NEXT:    movl %ebp, %esp
40; X86-NEXT:    popl %ebp
41; X86-NEXT:    retl
42;
43; X64-LABEL: test_v8f16_ogt_q:
44; X64:       # %bb.0:
45; X64-NEXT:    vcmplt_oqph %xmm2, %xmm3, %k1
46; X64-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
47; X64-NEXT:    retq
48  %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f16(
49                                               <8 x half> %f1, <8 x half> %f2, metadata !"ogt",
50                                               metadata !"fpexcept.strict") #0
51  %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b
52  ret <8 x i16> %res
53}
54
55define <8 x i16> @test_v8f16_oge_q(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 {
56; X86-LABEL: test_v8f16_oge_q:
57; X86:       # %bb.0:
58; X86-NEXT:    pushl %ebp
59; X86-NEXT:    movl %esp, %ebp
60; X86-NEXT:    andl $-16, %esp
61; X86-NEXT:    subl $16, %esp
62; X86-NEXT:    vcmpge_oqph 8(%ebp), %xmm2, %k1
63; X86-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
64; X86-NEXT:    movl %ebp, %esp
65; X86-NEXT:    popl %ebp
66; X86-NEXT:    retl
67;
68; X64-LABEL: test_v8f16_oge_q:
69; X64:       # %bb.0:
70; X64-NEXT:    vcmple_oqph %xmm2, %xmm3, %k1
71; X64-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
72; X64-NEXT:    retq
73  %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f16(
74                                               <8 x half> %f1, <8 x half> %f2, metadata !"oge",
75                                               metadata !"fpexcept.strict") #0
76  %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b
77  ret <8 x i16> %res
78}
79
80define <8 x i16> @test_v8f16_olt_q(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 {
81; X86-LABEL: test_v8f16_olt_q:
82; X86:       # %bb.0:
83; X86-NEXT:    pushl %ebp
84; X86-NEXT:    movl %esp, %ebp
85; X86-NEXT:    andl $-16, %esp
86; X86-NEXT:    subl $16, %esp
87; X86-NEXT:    vcmplt_oqph 8(%ebp), %xmm2, %k1
88; X86-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
89; X86-NEXT:    movl %ebp, %esp
90; X86-NEXT:    popl %ebp
91; X86-NEXT:    retl
92;
93; X64-LABEL: test_v8f16_olt_q:
94; X64:       # %bb.0:
95; X64-NEXT:    vcmplt_oqph %xmm3, %xmm2, %k1
96; X64-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
97; X64-NEXT:    retq
98  %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f16(
99                                               <8 x half> %f1, <8 x half> %f2, metadata !"olt",
100                                               metadata !"fpexcept.strict") #0
101  %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b
102  ret <8 x i16> %res
103}
104
105define <8 x i16> @test_v8f16_ole_q(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 {
106; X86-LABEL: test_v8f16_ole_q:
107; X86:       # %bb.0:
108; X86-NEXT:    pushl %ebp
109; X86-NEXT:    movl %esp, %ebp
110; X86-NEXT:    andl $-16, %esp
111; X86-NEXT:    subl $16, %esp
112; X86-NEXT:    vcmple_oqph 8(%ebp), %xmm2, %k1
113; X86-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
114; X86-NEXT:    movl %ebp, %esp
115; X86-NEXT:    popl %ebp
116; X86-NEXT:    retl
117;
118; X64-LABEL: test_v8f16_ole_q:
119; X64:       # %bb.0:
120; X64-NEXT:    vcmple_oqph %xmm3, %xmm2, %k1
121; X64-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
122; X64-NEXT:    retq
123  %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f16(
124                                               <8 x half> %f1, <8 x half> %f2, metadata !"ole",
125                                               metadata !"fpexcept.strict") #0
126  %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b
127  ret <8 x i16> %res
128}
129
130define <8 x i16> @test_v8f16_one_q(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 {
131; X86-LABEL: test_v8f16_one_q:
132; X86:       # %bb.0:
133; X86-NEXT:    pushl %ebp
134; X86-NEXT:    movl %esp, %ebp
135; X86-NEXT:    andl $-16, %esp
136; X86-NEXT:    subl $16, %esp
137; X86-NEXT:    vcmpneq_oqph 8(%ebp), %xmm2, %k1
138; X86-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
139; X86-NEXT:    movl %ebp, %esp
140; X86-NEXT:    popl %ebp
141; X86-NEXT:    retl
142;
143; X64-LABEL: test_v8f16_one_q:
144; X64:       # %bb.0:
145; X64-NEXT:    vcmpneq_oqph %xmm3, %xmm2, %k1
146; X64-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
147; X64-NEXT:    retq
148  %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f16(
149                                               <8 x half> %f1, <8 x half> %f2, metadata !"one",
150                                               metadata !"fpexcept.strict") #0
151  %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b
152  ret <8 x i16> %res
153}
154
155define <8 x i16> @test_v8f16_ord_q(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 {
156; X86-LABEL: test_v8f16_ord_q:
157; X86:       # %bb.0:
158; X86-NEXT:    pushl %ebp
159; X86-NEXT:    movl %esp, %ebp
160; X86-NEXT:    andl $-16, %esp
161; X86-NEXT:    subl $16, %esp
162; X86-NEXT:    vcmpordph 8(%ebp), %xmm2, %k1
163; X86-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
164; X86-NEXT:    movl %ebp, %esp
165; X86-NEXT:    popl %ebp
166; X86-NEXT:    retl
167;
168; X64-LABEL: test_v8f16_ord_q:
169; X64:       # %bb.0:
170; X64-NEXT:    vcmpordph %xmm3, %xmm2, %k1
171; X64-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
172; X64-NEXT:    retq
173  %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f16(
174                                               <8 x half> %f1, <8 x half> %f2, metadata !"ord",
175                                               metadata !"fpexcept.strict") #0
176  %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b
177  ret <8 x i16> %res
178}
179
180define <8 x i16> @test_v8f16_ueq_q(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 {
181; X86-LABEL: test_v8f16_ueq_q:
182; X86:       # %bb.0:
183; X86-NEXT:    pushl %ebp
184; X86-NEXT:    movl %esp, %ebp
185; X86-NEXT:    andl $-16, %esp
186; X86-NEXT:    subl $16, %esp
187; X86-NEXT:    vcmpeq_uqph 8(%ebp), %xmm2, %k1
188; X86-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
189; X86-NEXT:    movl %ebp, %esp
190; X86-NEXT:    popl %ebp
191; X86-NEXT:    retl
192;
193; X64-LABEL: test_v8f16_ueq_q:
194; X64:       # %bb.0:
195; X64-NEXT:    vcmpeq_uqph %xmm3, %xmm2, %k1
196; X64-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
197; X64-NEXT:    retq
198  %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f16(
199                                               <8 x half> %f1, <8 x half> %f2, metadata !"ueq",
200                                               metadata !"fpexcept.strict") #0
201  %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b
202  ret <8 x i16> %res
203}
204
205define <8 x i16> @test_v8f16_ugt_q(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 {
206; X86-LABEL: test_v8f16_ugt_q:
207; X86:       # %bb.0:
208; X86-NEXT:    pushl %ebp
209; X86-NEXT:    movl %esp, %ebp
210; X86-NEXT:    andl $-16, %esp
211; X86-NEXT:    subl $16, %esp
212; X86-NEXT:    vcmpnle_uqph 8(%ebp), %xmm2, %k1
213; X86-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
214; X86-NEXT:    movl %ebp, %esp
215; X86-NEXT:    popl %ebp
216; X86-NEXT:    retl
217;
218; X64-LABEL: test_v8f16_ugt_q:
219; X64:       # %bb.0:
220; X64-NEXT:    vcmpnle_uqph %xmm3, %xmm2, %k1
221; X64-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
222; X64-NEXT:    retq
223  %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f16(
224                                               <8 x half> %f1, <8 x half> %f2, metadata !"ugt",
225                                               metadata !"fpexcept.strict") #0
226  %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b
227  ret <8 x i16> %res
228}
229
230define <8 x i16> @test_v8f16_uge_q(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 {
231; X86-LABEL: test_v8f16_uge_q:
232; X86:       # %bb.0:
233; X86-NEXT:    pushl %ebp
234; X86-NEXT:    movl %esp, %ebp
235; X86-NEXT:    andl $-16, %esp
236; X86-NEXT:    subl $16, %esp
237; X86-NEXT:    vcmpnlt_uqph 8(%ebp), %xmm2, %k1
238; X86-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
239; X86-NEXT:    movl %ebp, %esp
240; X86-NEXT:    popl %ebp
241; X86-NEXT:    retl
242;
243; X64-LABEL: test_v8f16_uge_q:
244; X64:       # %bb.0:
245; X64-NEXT:    vcmpnlt_uqph %xmm3, %xmm2, %k1
246; X64-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
247; X64-NEXT:    retq
248  %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f16(
249                                               <8 x half> %f1, <8 x half> %f2, metadata !"uge",
250                                               metadata !"fpexcept.strict") #0
251  %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b
252  ret <8 x i16> %res
253}
254
255define <8 x i16> @test_v8f16_ult_q(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 {
256; X86-LABEL: test_v8f16_ult_q:
257; X86:       # %bb.0:
258; X86-NEXT:    pushl %ebp
259; X86-NEXT:    movl %esp, %ebp
260; X86-NEXT:    andl $-16, %esp
261; X86-NEXT:    subl $16, %esp
262; X86-NEXT:    vcmpnge_uqph 8(%ebp), %xmm2, %k1
263; X86-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
264; X86-NEXT:    movl %ebp, %esp
265; X86-NEXT:    popl %ebp
266; X86-NEXT:    retl
267;
268; X64-LABEL: test_v8f16_ult_q:
269; X64:       # %bb.0:
270; X64-NEXT:    vcmpnle_uqph %xmm2, %xmm3, %k1
271; X64-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
272; X64-NEXT:    retq
273  %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f16(
274                                               <8 x half> %f1, <8 x half> %f2, metadata !"ult",
275                                               metadata !"fpexcept.strict") #0
276  %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b
277  ret <8 x i16> %res
278}
279
280define <8 x i16> @test_v8f16_ule_q(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 {
281; X86-LABEL: test_v8f16_ule_q:
282; X86:       # %bb.0:
283; X86-NEXT:    pushl %ebp
284; X86-NEXT:    movl %esp, %ebp
285; X86-NEXT:    andl $-16, %esp
286; X86-NEXT:    subl $16, %esp
287; X86-NEXT:    vcmpngt_uqph 8(%ebp), %xmm2, %k1
288; X86-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
289; X86-NEXT:    movl %ebp, %esp
290; X86-NEXT:    popl %ebp
291; X86-NEXT:    retl
292;
293; X64-LABEL: test_v8f16_ule_q:
294; X64:       # %bb.0:
295; X64-NEXT:    vcmpnlt_uqph %xmm2, %xmm3, %k1
296; X64-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
297; X64-NEXT:    retq
298  %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f16(
299                                               <8 x half> %f1, <8 x half> %f2, metadata !"ule",
300                                               metadata !"fpexcept.strict") #0
301  %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b
302  ret <8 x i16> %res
303}
304
305define <8 x i16> @test_v8f16_une_q(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 {
306; X86-LABEL: test_v8f16_une_q:
307; X86:       # %bb.0:
308; X86-NEXT:    pushl %ebp
309; X86-NEXT:    movl %esp, %ebp
310; X86-NEXT:    andl $-16, %esp
311; X86-NEXT:    subl $16, %esp
312; X86-NEXT:    vcmpneqph 8(%ebp), %xmm2, %k1
313; X86-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
314; X86-NEXT:    movl %ebp, %esp
315; X86-NEXT:    popl %ebp
316; X86-NEXT:    retl
317;
318; X64-LABEL: test_v8f16_une_q:
319; X64:       # %bb.0:
320; X64-NEXT:    vcmpneqph %xmm3, %xmm2, %k1
321; X64-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
322; X64-NEXT:    retq
323  %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f16(
324                                               <8 x half> %f1, <8 x half> %f2, metadata !"une",
325                                               metadata !"fpexcept.strict") #0
326  %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b
327  ret <8 x i16> %res
328}
329
330define <8 x i16> @test_v8f16_uno_q(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 {
331; X86-LABEL: test_v8f16_uno_q:
332; X86:       # %bb.0:
333; X86-NEXT:    pushl %ebp
334; X86-NEXT:    movl %esp, %ebp
335; X86-NEXT:    andl $-16, %esp
336; X86-NEXT:    subl $16, %esp
337; X86-NEXT:    vcmpunordph 8(%ebp), %xmm2, %k1
338; X86-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
339; X86-NEXT:    movl %ebp, %esp
340; X86-NEXT:    popl %ebp
341; X86-NEXT:    retl
342;
343; X64-LABEL: test_v8f16_uno_q:
344; X64:       # %bb.0:
345; X64-NEXT:    vcmpunordph %xmm3, %xmm2, %k1
346; X64-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
347; X64-NEXT:    retq
348  %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f16(
349                                               <8 x half> %f1, <8 x half> %f2, metadata !"uno",
350                                               metadata !"fpexcept.strict") #0
351  %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b
352  ret <8 x i16> %res
353}
354
355define <8 x i16> @test_v8f16_oeq_s(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 {
356; X86-LABEL: test_v8f16_oeq_s:
357; X86:       # %bb.0:
358; X86-NEXT:    pushl %ebp
359; X86-NEXT:    movl %esp, %ebp
360; X86-NEXT:    andl $-16, %esp
361; X86-NEXT:    subl $16, %esp
362; X86-NEXT:    vcmpeq_osph 8(%ebp), %xmm2, %k1
363; X86-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
364; X86-NEXT:    movl %ebp, %esp
365; X86-NEXT:    popl %ebp
366; X86-NEXT:    retl
367;
368; X64-LABEL: test_v8f16_oeq_s:
369; X64:       # %bb.0:
370; X64-NEXT:    vcmpeq_osph %xmm3, %xmm2, %k1
371; X64-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
372; X64-NEXT:    retq
373  %cond = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f16(
374                                               <8 x half> %f1, <8 x half> %f2, metadata !"oeq",
375                                               metadata !"fpexcept.strict") #0
376  %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b
377  ret <8 x i16> %res
378}
379
380define <8 x i16> @test_v8f16_ogt_s(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 {
381; X86-LABEL: test_v8f16_ogt_s:
382; X86:       # %bb.0:
383; X86-NEXT:    pushl %ebp
384; X86-NEXT:    movl %esp, %ebp
385; X86-NEXT:    andl $-16, %esp
386; X86-NEXT:    subl $16, %esp
387; X86-NEXT:    vcmpgtph 8(%ebp), %xmm2, %k1
388; X86-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
389; X86-NEXT:    movl %ebp, %esp
390; X86-NEXT:    popl %ebp
391; X86-NEXT:    retl
392;
393; X64-LABEL: test_v8f16_ogt_s:
394; X64:       # %bb.0:
395; X64-NEXT:    vcmpltph %xmm2, %xmm3, %k1
396; X64-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
397; X64-NEXT:    retq
398  %cond = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f16(
399                                               <8 x half> %f1, <8 x half> %f2, metadata !"ogt",
400                                               metadata !"fpexcept.strict") #0
401  %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b
402  ret <8 x i16> %res
403}
404
405define <8 x i16> @test_v8f16_oge_s(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 {
406; X86-LABEL: test_v8f16_oge_s:
407; X86:       # %bb.0:
408; X86-NEXT:    pushl %ebp
409; X86-NEXT:    movl %esp, %ebp
410; X86-NEXT:    andl $-16, %esp
411; X86-NEXT:    subl $16, %esp
412; X86-NEXT:    vcmpgeph 8(%ebp), %xmm2, %k1
413; X86-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
414; X86-NEXT:    movl %ebp, %esp
415; X86-NEXT:    popl %ebp
416; X86-NEXT:    retl
417;
418; X64-LABEL: test_v8f16_oge_s:
419; X64:       # %bb.0:
420; X64-NEXT:    vcmpleph %xmm2, %xmm3, %k1
421; X64-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
422; X64-NEXT:    retq
423  %cond = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f16(
424                                               <8 x half> %f1, <8 x half> %f2, metadata !"oge",
425                                               metadata !"fpexcept.strict") #0
426  %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b
427  ret <8 x i16> %res
428}
429
430define <8 x i16> @test_v8f16_olt_s(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 {
431; X86-LABEL: test_v8f16_olt_s:
432; X86:       # %bb.0:
433; X86-NEXT:    pushl %ebp
434; X86-NEXT:    movl %esp, %ebp
435; X86-NEXT:    andl $-16, %esp
436; X86-NEXT:    subl $16, %esp
437; X86-NEXT:    vcmpltph 8(%ebp), %xmm2, %k1
438; X86-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
439; X86-NEXT:    movl %ebp, %esp
440; X86-NEXT:    popl %ebp
441; X86-NEXT:    retl
442;
443; X64-LABEL: test_v8f16_olt_s:
444; X64:       # %bb.0:
445; X64-NEXT:    vcmpltph %xmm3, %xmm2, %k1
446; X64-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
447; X64-NEXT:    retq
448  %cond = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f16(
449                                               <8 x half> %f1, <8 x half> %f2, metadata !"olt",
450                                               metadata !"fpexcept.strict") #0
451  %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b
452  ret <8 x i16> %res
453}
454
455define <8 x i16> @test_v8f16_ole_s(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 {
456; X86-LABEL: test_v8f16_ole_s:
457; X86:       # %bb.0:
458; X86-NEXT:    pushl %ebp
459; X86-NEXT:    movl %esp, %ebp
460; X86-NEXT:    andl $-16, %esp
461; X86-NEXT:    subl $16, %esp
462; X86-NEXT:    vcmpleph 8(%ebp), %xmm2, %k1
463; X86-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
464; X86-NEXT:    movl %ebp, %esp
465; X86-NEXT:    popl %ebp
466; X86-NEXT:    retl
467;
468; X64-LABEL: test_v8f16_ole_s:
469; X64:       # %bb.0:
470; X64-NEXT:    vcmpleph %xmm3, %xmm2, %k1
471; X64-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
472; X64-NEXT:    retq
473  %cond = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f16(
474                                               <8 x half> %f1, <8 x half> %f2, metadata !"ole",
475                                               metadata !"fpexcept.strict") #0
476  %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b
477  ret <8 x i16> %res
478}
479
480define <8 x i16> @test_v8f16_one_s(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 {
481; X86-LABEL: test_v8f16_one_s:
482; X86:       # %bb.0:
483; X86-NEXT:    pushl %ebp
484; X86-NEXT:    movl %esp, %ebp
485; X86-NEXT:    andl $-16, %esp
486; X86-NEXT:    subl $16, %esp
487; X86-NEXT:    vcmpneq_osph 8(%ebp), %xmm2, %k1
488; X86-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
489; X86-NEXT:    movl %ebp, %esp
490; X86-NEXT:    popl %ebp
491; X86-NEXT:    retl
492;
493; X64-LABEL: test_v8f16_one_s:
494; X64:       # %bb.0:
495; X64-NEXT:    vcmpneq_osph %xmm3, %xmm2, %k1
496; X64-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
497; X64-NEXT:    retq
498  %cond = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f16(
499                                               <8 x half> %f1, <8 x half> %f2, metadata !"one",
500                                               metadata !"fpexcept.strict") #0
501  %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b
502  ret <8 x i16> %res
503}
504
505define <8 x i16> @test_v8f16_ord_s(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 {
506; X86-LABEL: test_v8f16_ord_s:
507; X86:       # %bb.0:
508; X86-NEXT:    pushl %ebp
509; X86-NEXT:    movl %esp, %ebp
510; X86-NEXT:    andl $-16, %esp
511; X86-NEXT:    subl $16, %esp
512; X86-NEXT:    vcmpord_sph 8(%ebp), %xmm2, %k1
513; X86-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
514; X86-NEXT:    movl %ebp, %esp
515; X86-NEXT:    popl %ebp
516; X86-NEXT:    retl
517;
518; X64-LABEL: test_v8f16_ord_s:
519; X64:       # %bb.0:
520; X64-NEXT:    vcmpord_sph %xmm3, %xmm2, %k1
521; X64-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
522; X64-NEXT:    retq
523  %cond = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f16(
524                                               <8 x half> %f1, <8 x half> %f2, metadata !"ord",
525                                               metadata !"fpexcept.strict") #0
526  %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b
527  ret <8 x i16> %res
528}
529
530define <8 x i16> @test_v8f16_ueq_s(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 {
531; X86-LABEL: test_v8f16_ueq_s:
532; X86:       # %bb.0:
533; X86-NEXT:    pushl %ebp
534; X86-NEXT:    movl %esp, %ebp
535; X86-NEXT:    andl $-16, %esp
536; X86-NEXT:    subl $16, %esp
537; X86-NEXT:    vcmpeq_usph 8(%ebp), %xmm2, %k1
538; X86-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
539; X86-NEXT:    movl %ebp, %esp
540; X86-NEXT:    popl %ebp
541; X86-NEXT:    retl
542;
543; X64-LABEL: test_v8f16_ueq_s:
544; X64:       # %bb.0:
545; X64-NEXT:    vcmpeq_usph %xmm3, %xmm2, %k1
546; X64-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
547; X64-NEXT:    retq
548  %cond = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f16(
549                                               <8 x half> %f1, <8 x half> %f2, metadata !"ueq",
550                                               metadata !"fpexcept.strict") #0
551  %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b
552  ret <8 x i16> %res
553}
554
555define <8 x i16> @test_v8f16_ugt_s(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 {
556; X86-LABEL: test_v8f16_ugt_s:
557; X86:       # %bb.0:
558; X86-NEXT:    pushl %ebp
559; X86-NEXT:    movl %esp, %ebp
560; X86-NEXT:    andl $-16, %esp
561; X86-NEXT:    subl $16, %esp
562; X86-NEXT:    vcmpnleph 8(%ebp), %xmm2, %k1
563; X86-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
564; X86-NEXT:    movl %ebp, %esp
565; X86-NEXT:    popl %ebp
566; X86-NEXT:    retl
567;
568; X64-LABEL: test_v8f16_ugt_s:
569; X64:       # %bb.0:
570; X64-NEXT:    vcmpnleph %xmm3, %xmm2, %k1
571; X64-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
572; X64-NEXT:    retq
573  %cond = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f16(
574                                               <8 x half> %f1, <8 x half> %f2, metadata !"ugt",
575                                               metadata !"fpexcept.strict") #0
576  %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b
577  ret <8 x i16> %res
578}
579
580define <8 x i16> @test_v8f16_uge_s(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 {
581; X86-LABEL: test_v8f16_uge_s:
582; X86:       # %bb.0:
583; X86-NEXT:    pushl %ebp
584; X86-NEXT:    movl %esp, %ebp
585; X86-NEXT:    andl $-16, %esp
586; X86-NEXT:    subl $16, %esp
587; X86-NEXT:    vcmpnltph 8(%ebp), %xmm2, %k1
588; X86-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
589; X86-NEXT:    movl %ebp, %esp
590; X86-NEXT:    popl %ebp
591; X86-NEXT:    retl
592;
593; X64-LABEL: test_v8f16_uge_s:
594; X64:       # %bb.0:
595; X64-NEXT:    vcmpnltph %xmm3, %xmm2, %k1
596; X64-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
597; X64-NEXT:    retq
598  %cond = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f16(
599                                               <8 x half> %f1, <8 x half> %f2, metadata !"uge",
600                                               metadata !"fpexcept.strict") #0
601  %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b
602  ret <8 x i16> %res
603}
604
605define <8 x i16> @test_v8f16_ult_s(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 {
606; X86-LABEL: test_v8f16_ult_s:
607; X86:       # %bb.0:
608; X86-NEXT:    pushl %ebp
609; X86-NEXT:    movl %esp, %ebp
610; X86-NEXT:    andl $-16, %esp
611; X86-NEXT:    subl $16, %esp
612; X86-NEXT:    vcmpngeph 8(%ebp), %xmm2, %k1
613; X86-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
614; X86-NEXT:    movl %ebp, %esp
615; X86-NEXT:    popl %ebp
616; X86-NEXT:    retl
617;
618; X64-LABEL: test_v8f16_ult_s:
619; X64:       # %bb.0:
620; X64-NEXT:    vcmpnleph %xmm2, %xmm3, %k1
621; X64-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
622; X64-NEXT:    retq
623  %cond = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f16(
624                                               <8 x half> %f1, <8 x half> %f2, metadata !"ult",
625                                               metadata !"fpexcept.strict") #0
626  %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b
627  ret <8 x i16> %res
628}
629
630define <8 x i16> @test_v8f16_ule_s(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 {
631; X86-LABEL: test_v8f16_ule_s:
632; X86:       # %bb.0:
633; X86-NEXT:    pushl %ebp
634; X86-NEXT:    movl %esp, %ebp
635; X86-NEXT:    andl $-16, %esp
636; X86-NEXT:    subl $16, %esp
637; X86-NEXT:    vcmpngtph 8(%ebp), %xmm2, %k1
638; X86-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
639; X86-NEXT:    movl %ebp, %esp
640; X86-NEXT:    popl %ebp
641; X86-NEXT:    retl
642;
643; X64-LABEL: test_v8f16_ule_s:
644; X64:       # %bb.0:
645; X64-NEXT:    vcmpnltph %xmm2, %xmm3, %k1
646; X64-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
647; X64-NEXT:    retq
648  %cond = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f16(
649                                               <8 x half> %f1, <8 x half> %f2, metadata !"ule",
650                                               metadata !"fpexcept.strict") #0
651  %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b
652  ret <8 x i16> %res
653}
654
655define <8 x i16> @test_v8f16_une_s(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 {
656; X86-LABEL: test_v8f16_une_s:
657; X86:       # %bb.0:
658; X86-NEXT:    pushl %ebp
659; X86-NEXT:    movl %esp, %ebp
660; X86-NEXT:    andl $-16, %esp
661; X86-NEXT:    subl $16, %esp
662; X86-NEXT:    vcmpneq_usph 8(%ebp), %xmm2, %k1
663; X86-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
664; X86-NEXT:    movl %ebp, %esp
665; X86-NEXT:    popl %ebp
666; X86-NEXT:    retl
667;
668; X64-LABEL: test_v8f16_une_s:
669; X64:       # %bb.0:
670; X64-NEXT:    vcmpneq_usph %xmm3, %xmm2, %k1
671; X64-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
672; X64-NEXT:    retq
673  %cond = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f16(
674                                               <8 x half> %f1, <8 x half> %f2, metadata !"une",
675                                               metadata !"fpexcept.strict") #0
676  %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b
677  ret <8 x i16> %res
678}
679
680define <8 x i16> @test_v8f16_uno_s(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 {
681; X86-LABEL: test_v8f16_uno_s:
682; X86:       # %bb.0:
683; X86-NEXT:    pushl %ebp
684; X86-NEXT:    movl %esp, %ebp
685; X86-NEXT:    andl $-16, %esp
686; X86-NEXT:    subl $16, %esp
687; X86-NEXT:    vcmpunord_sph 8(%ebp), %xmm2, %k1
688; X86-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
689; X86-NEXT:    movl %ebp, %esp
690; X86-NEXT:    popl %ebp
691; X86-NEXT:    retl
692;
693; X64-LABEL: test_v8f16_uno_s:
694; X64:       # %bb.0:
695; X64-NEXT:    vcmpunord_sph %xmm3, %xmm2, %k1
696; X64-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
697; X64-NEXT:    retq
698  %cond = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f16(
699                                               <8 x half> %f1, <8 x half> %f2, metadata !"uno",
700                                               metadata !"fpexcept.strict") #0
701  %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b
702  ret <8 x i16> %res
703}
704
705define <2 x i16> @test_v2f16_oeq_q(<2 x i16> %a, <2 x i16> %b, <2 x half> %f1, <2 x half> %f2) #0 {
706; X86-LABEL: test_v2f16_oeq_q:
707; X86:       # %bb.0:
708; X86-NEXT:    pushl %ebp
709; X86-NEXT:    movl %esp, %ebp
710; X86-NEXT:    andl $-16, %esp
711; X86-NEXT:    subl $16, %esp
712; X86-NEXT:    vucomish 8(%ebp), %xmm2
713; X86-NEXT:    setnp %al
714; X86-NEXT:    sete %cl
715; X86-NEXT:    testb %al, %cl
716; X86-NEXT:    setne %al
717; X86-NEXT:    kmovd %eax, %k0
718; X86-NEXT:    kshiftlb $7, %k0, %k0
719; X86-NEXT:    kshiftrb $7, %k0, %k0
720; X86-NEXT:    vpsrld $16, %xmm2, %xmm2
721; X86-NEXT:    vucomish 10(%ebp), %xmm2
722; X86-NEXT:    setnp %al
723; X86-NEXT:    sete %cl
724; X86-NEXT:    testb %al, %cl
725; X86-NEXT:    setne %al
726; X86-NEXT:    kmovd %eax, %k1
727; X86-NEXT:    kshiftlb $7, %k1, %k1
728; X86-NEXT:    kshiftrb $6, %k1, %k1
729; X86-NEXT:    korb %k1, %k0, %k1
730; X86-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
731; X86-NEXT:    movl %ebp, %esp
732; X86-NEXT:    popl %ebp
733; X86-NEXT:    retl
734;
735; X64-LABEL: test_v2f16_oeq_q:
736; X64:       # %bb.0:
737; X64-NEXT:    vucomish %xmm3, %xmm2
738; X64-NEXT:    setnp %al
739; X64-NEXT:    sete %cl
740; X64-NEXT:    testb %al, %cl
741; X64-NEXT:    setne %al
742; X64-NEXT:    kmovd %eax, %k0
743; X64-NEXT:    kshiftlb $7, %k0, %k0
744; X64-NEXT:    kshiftrb $7, %k0, %k0
745; X64-NEXT:    vpsrld $16, %xmm3, %xmm3
746; X64-NEXT:    vpsrld $16, %xmm2, %xmm2
747; X64-NEXT:    vucomish %xmm3, %xmm2
748; X64-NEXT:    setnp %al
749; X64-NEXT:    sete %cl
750; X64-NEXT:    testb %al, %cl
751; X64-NEXT:    setne %al
752; X64-NEXT:    kmovd %eax, %k1
753; X64-NEXT:    kshiftlb $7, %k1, %k1
754; X64-NEXT:    kshiftrb $6, %k1, %k1
755; X64-NEXT:    korb %k1, %k0, %k1
756; X64-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
757; X64-NEXT:    retq
758  %cond = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f16(
759                                               <2 x half> %f1, <2 x half> %f2, metadata !"oeq",
760                                               metadata !"fpexcept.strict") #0
761  %res = select <2 x i1> %cond, <2 x i16> %a, <2 x i16> %b
762  ret <2 x i16> %res
763}
764
765define <2 x i16> @test_v2f16_ogt_q(<2 x i16> %a, <2 x i16> %b, <2 x half> %f1, <2 x half> %f2) #0 {
766; X86-LABEL: test_v2f16_ogt_q:
767; X86:       # %bb.0:
768; X86-NEXT:    pushl %ebp
769; X86-NEXT:    movl %esp, %ebp
770; X86-NEXT:    andl $-16, %esp
771; X86-NEXT:    subl $16, %esp
772; X86-NEXT:    vcomish 8(%ebp), %xmm2
773; X86-NEXT:    seta %al
774; X86-NEXT:    kmovd %eax, %k0
775; X86-NEXT:    kshiftlb $7, %k0, %k0
776; X86-NEXT:    kshiftrb $7, %k0, %k0
777; X86-NEXT:    vpsrld $16, %xmm2, %xmm2
778; X86-NEXT:    vcomish 10(%ebp), %xmm2
779; X86-NEXT:    seta %al
780; X86-NEXT:    kmovd %eax, %k1
781; X86-NEXT:    kshiftlb $7, %k1, %k1
782; X86-NEXT:    kshiftrb $6, %k1, %k1
783; X86-NEXT:    korb %k1, %k0, %k1
784; X86-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
785; X86-NEXT:    movl %ebp, %esp
786; X86-NEXT:    popl %ebp
787; X86-NEXT:    retl
788;
789; X64-LABEL: test_v2f16_ogt_q:
790; X64:       # %bb.0:
791; X64-NEXT:    vcomish %xmm3, %xmm2
792; X64-NEXT:    seta %al
793; X64-NEXT:    kmovd %eax, %k0
794; X64-NEXT:    kshiftlb $7, %k0, %k0
795; X64-NEXT:    kshiftrb $7, %k0, %k0
796; X64-NEXT:    vpsrld $16, %xmm3, %xmm3
797; X64-NEXT:    vpsrld $16, %xmm2, %xmm2
798; X64-NEXT:    vcomish %xmm3, %xmm2
799; X64-NEXT:    seta %al
800; X64-NEXT:    kmovd %eax, %k1
801; X64-NEXT:    kshiftlb $7, %k1, %k1
802; X64-NEXT:    kshiftrb $6, %k1, %k1
803; X64-NEXT:    korb %k1, %k0, %k1
804; X64-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
805; X64-NEXT:    retq
806  %cond = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f16(
807                                               <2 x half> %f1, <2 x half> %f2, metadata !"ogt",
808                                               metadata !"fpexcept.strict") #0
809  %res = select <2 x i1> %cond, <2 x i16> %a, <2 x i16> %b
810  ret <2 x i16> %res
811}
812
813define <4 x i16> @test_v4f16_oge_q(<4 x i16> %a, <4 x i16> %b, <4 x half> %f1, <4 x half> %f2) #0 {
814; X86-LABEL: test_v4f16_oge_q:
815; X86:       # %bb.0:
816; X86-NEXT:    pushl %ebp
817; X86-NEXT:    movl %esp, %ebp
818; X86-NEXT:    andl $-16, %esp
819; X86-NEXT:    subl $16, %esp
820; X86-NEXT:    vucomish 8(%ebp), %xmm2
821; X86-NEXT:    setae %al
822; X86-NEXT:    kmovd %eax, %k0
823; X86-NEXT:    kshiftlb $7, %k0, %k0
824; X86-NEXT:    kshiftrb $7, %k0, %k0
825; X86-NEXT:    vpsrld $16, %xmm2, %xmm3
826; X86-NEXT:    vucomish 10(%ebp), %xmm3
827; X86-NEXT:    setae %al
828; X86-NEXT:    kmovd %eax, %k1
829; X86-NEXT:    kshiftlb $7, %k1, %k1
830; X86-NEXT:    kshiftrb $6, %k1, %k1
831; X86-NEXT:    korb %k1, %k0, %k0
832; X86-NEXT:    movb $-5, %al
833; X86-NEXT:    kmovd %eax, %k1
834; X86-NEXT:    kandb %k1, %k0, %k0
835; X86-NEXT:    vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3]
836; X86-NEXT:    vucomish 12(%ebp), %xmm3
837; X86-NEXT:    setae %al
838; X86-NEXT:    kmovd %eax, %k1
839; X86-NEXT:    kshiftlb $7, %k1, %k1
840; X86-NEXT:    kshiftrb $5, %k1, %k1
841; X86-NEXT:    korb %k1, %k0, %k0
842; X86-NEXT:    movb $-9, %al
843; X86-NEXT:    kmovd %eax, %k1
844; X86-NEXT:    kandb %k1, %k0, %k0
845; X86-NEXT:    vpsrlq $48, %xmm2, %xmm2
846; X86-NEXT:    vucomish 14(%ebp), %xmm2
847; X86-NEXT:    setae %al
848; X86-NEXT:    kmovd %eax, %k1
849; X86-NEXT:    kshiftlb $7, %k1, %k1
850; X86-NEXT:    kshiftrb $4, %k1, %k1
851; X86-NEXT:    korb %k1, %k0, %k1
852; X86-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
853; X86-NEXT:    movl %ebp, %esp
854; X86-NEXT:    popl %ebp
855; X86-NEXT:    retl
856;
857; X64-LABEL: test_v4f16_oge_q:
858; X64:       # %bb.0:
859; X64-NEXT:    vucomish %xmm3, %xmm2
860; X64-NEXT:    setae %al
861; X64-NEXT:    kmovd %eax, %k0
862; X64-NEXT:    kshiftlb $7, %k0, %k0
863; X64-NEXT:    kshiftrb $7, %k0, %k0
864; X64-NEXT:    vpsrld $16, %xmm3, %xmm4
865; X64-NEXT:    vpsrld $16, %xmm2, %xmm5
866; X64-NEXT:    vucomish %xmm4, %xmm5
867; X64-NEXT:    setae %al
868; X64-NEXT:    kmovd %eax, %k1
869; X64-NEXT:    kshiftlb $7, %k1, %k1
870; X64-NEXT:    kshiftrb $6, %k1, %k1
871; X64-NEXT:    korb %k1, %k0, %k0
872; X64-NEXT:    movb $-5, %al
873; X64-NEXT:    kmovd %eax, %k1
874; X64-NEXT:    kandb %k1, %k0, %k0
875; X64-NEXT:    vmovshdup {{.*#+}} xmm4 = xmm3[1,1,3,3]
876; X64-NEXT:    vmovshdup {{.*#+}} xmm5 = xmm2[1,1,3,3]
877; X64-NEXT:    vucomish %xmm4, %xmm5
878; X64-NEXT:    setae %al
879; X64-NEXT:    kmovd %eax, %k1
880; X64-NEXT:    kshiftlb $7, %k1, %k1
881; X64-NEXT:    kshiftrb $5, %k1, %k1
882; X64-NEXT:    korb %k1, %k0, %k0
883; X64-NEXT:    movb $-9, %al
884; X64-NEXT:    kmovd %eax, %k1
885; X64-NEXT:    kandb %k1, %k0, %k0
886; X64-NEXT:    vpsrlq $48, %xmm3, %xmm3
887; X64-NEXT:    vpsrlq $48, %xmm2, %xmm2
888; X64-NEXT:    vucomish %xmm3, %xmm2
889; X64-NEXT:    setae %al
890; X64-NEXT:    kmovd %eax, %k1
891; X64-NEXT:    kshiftlb $7, %k1, %k1
892; X64-NEXT:    kshiftrb $4, %k1, %k1
893; X64-NEXT:    korb %k1, %k0, %k1
894; X64-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
895; X64-NEXT:    retq
896  %cond = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f16(
897                                               <4 x half> %f1, <4 x half> %f2, metadata !"oge",
898                                               metadata !"fpexcept.strict") #0
899  %res = select <4 x i1> %cond, <4 x i16> %a, <4 x i16> %b
900  ret <4 x i16> %res
901}
902
903define <4 x i16> @test_v4f16_olt_q(<4 x i16> %a, <4 x i16> %b, <4 x half> %f1, <4 x half> %f2) #0 {
904; X86-LABEL: test_v4f16_olt_q:
905; X86:       # %bb.0:
906; X86-NEXT:    pushl %ebp
907; X86-NEXT:    movl %esp, %ebp
908; X86-NEXT:    andl $-16, %esp
909; X86-NEXT:    subl $16, %esp
910; X86-NEXT:    vmovsh 8(%ebp), %xmm3
911; X86-NEXT:    vcomish %xmm2, %xmm3
912; X86-NEXT:    seta %al
913; X86-NEXT:    kmovd %eax, %k0
914; X86-NEXT:    kshiftlb $7, %k0, %k0
915; X86-NEXT:    kshiftrb $7, %k0, %k0
916; X86-NEXT:    vpsrld $16, %xmm2, %xmm3
917; X86-NEXT:    vmovsh 10(%ebp), %xmm4
918; X86-NEXT:    vcomish %xmm3, %xmm4
919; X86-NEXT:    seta %al
920; X86-NEXT:    kmovd %eax, %k1
921; X86-NEXT:    kshiftlb $7, %k1, %k1
922; X86-NEXT:    kshiftrb $6, %k1, %k1
923; X86-NEXT:    korb %k1, %k0, %k0
924; X86-NEXT:    movb $-5, %al
925; X86-NEXT:    kmovd %eax, %k1
926; X86-NEXT:    kandb %k1, %k0, %k0
927; X86-NEXT:    vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3]
928; X86-NEXT:    vmovsh 12(%ebp), %xmm4
929; X86-NEXT:    vcomish %xmm3, %xmm4
930; X86-NEXT:    seta %al
931; X86-NEXT:    kmovd %eax, %k1
932; X86-NEXT:    kshiftlb $7, %k1, %k1
933; X86-NEXT:    kshiftrb $5, %k1, %k1
934; X86-NEXT:    korb %k1, %k0, %k0
935; X86-NEXT:    movb $-9, %al
936; X86-NEXT:    kmovd %eax, %k1
937; X86-NEXT:    kandb %k1, %k0, %k0
938; X86-NEXT:    vpsrlq $48, %xmm2, %xmm2
939; X86-NEXT:    vmovsh 14(%ebp), %xmm3
940; X86-NEXT:    vcomish %xmm2, %xmm3
941; X86-NEXT:    seta %al
942; X86-NEXT:    kmovd %eax, %k1
943; X86-NEXT:    kshiftlb $7, %k1, %k1
944; X86-NEXT:    kshiftrb $4, %k1, %k1
945; X86-NEXT:    korb %k1, %k0, %k1
946; X86-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
947; X86-NEXT:    movl %ebp, %esp
948; X86-NEXT:    popl %ebp
949; X86-NEXT:    retl
950;
951; X64-LABEL: test_v4f16_olt_q:
952; X64:       # %bb.0:
953; X64-NEXT:    vcomish %xmm2, %xmm3
954; X64-NEXT:    seta %al
955; X64-NEXT:    kmovd %eax, %k0
956; X64-NEXT:    kshiftlb $7, %k0, %k0
957; X64-NEXT:    kshiftrb $7, %k0, %k0
958; X64-NEXT:    vpsrld $16, %xmm2, %xmm4
959; X64-NEXT:    vpsrld $16, %xmm3, %xmm5
960; X64-NEXT:    vcomish %xmm4, %xmm5
961; X64-NEXT:    seta %al
962; X64-NEXT:    kmovd %eax, %k1
963; X64-NEXT:    kshiftlb $7, %k1, %k1
964; X64-NEXT:    kshiftrb $6, %k1, %k1
965; X64-NEXT:    korb %k1, %k0, %k0
966; X64-NEXT:    movb $-5, %al
967; X64-NEXT:    kmovd %eax, %k1
968; X64-NEXT:    kandb %k1, %k0, %k0
969; X64-NEXT:    vmovshdup {{.*#+}} xmm4 = xmm2[1,1,3,3]
970; X64-NEXT:    vmovshdup {{.*#+}} xmm5 = xmm3[1,1,3,3]
971; X64-NEXT:    vcomish %xmm4, %xmm5
972; X64-NEXT:    seta %al
973; X64-NEXT:    kmovd %eax, %k1
974; X64-NEXT:    kshiftlb $7, %k1, %k1
975; X64-NEXT:    kshiftrb $5, %k1, %k1
976; X64-NEXT:    korb %k1, %k0, %k0
977; X64-NEXT:    movb $-9, %al
978; X64-NEXT:    kmovd %eax, %k1
979; X64-NEXT:    kandb %k1, %k0, %k0
980; X64-NEXT:    vpsrlq $48, %xmm2, %xmm2
981; X64-NEXT:    vpsrlq $48, %xmm3, %xmm3
982; X64-NEXT:    vcomish %xmm2, %xmm3
983; X64-NEXT:    seta %al
984; X64-NEXT:    kmovd %eax, %k1
985; X64-NEXT:    kshiftlb $7, %k1, %k1
986; X64-NEXT:    kshiftrb $4, %k1, %k1
987; X64-NEXT:    korb %k1, %k0, %k1
988; X64-NEXT:    vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
989; X64-NEXT:    retq
990  %cond = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f16(
991                                               <4 x half> %f1, <4 x half> %f2, metadata !"olt",
992                                               metadata !"fpexcept.strict") #0
993  %res = select <4 x i1> %cond, <4 x i16> %a, <4 x i16> %b
994  ret <4 x i16> %res
995}
996
997attributes #0 = { strictfp nounwind }
998
999declare <2 x i1> @llvm.experimental.constrained.fcmp.v2f16(<2 x half>, <2 x half>, metadata, metadata)
1000declare <2 x i1> @llvm.experimental.constrained.fcmps.v2f16(<2 x half>, <2 x half>, metadata, metadata)
1001declare <4 x i1> @llvm.experimental.constrained.fcmp.v4f16(<4 x half>, <4 x half>, metadata, metadata)
1002declare <4 x i1> @llvm.experimental.constrained.fcmps.v4f16(<4 x half>, <4 x half>, metadata, metadata)
1003declare <8 x i1> @llvm.experimental.constrained.fcmp.v8f16(<8 x half>, <8 x half>, metadata, metadata)
1004declare <8 x i1> @llvm.experimental.constrained.fcmps.v8f16(<8 x half>, <8 x half>, metadata, metadata)
1005