xref: /llvm-project/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll (revision e088249b74586590c9e143d85b97a175acc9465e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=ALL,SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=ALL,SSE,SSE41
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=ALL,AVX
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=ALL,AVX512,AVX512F
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=ALL,AVX512,AVX512VL
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16,+avx512vl | FileCheck %s --check-prefixes=ALL,AVX512,AVX512FP16
9
10;
11; vXf32
12;
13
14define float @test_v1f32(<1 x float> %a0) {
15; ALL-LABEL: test_v1f32:
16; ALL:       # %bb.0:
17; ALL-NEXT:    retq
18  %1 = call nnan float @llvm.vector.reduce.fmin.v1f32(<1 x float> %a0)
19  ret float %1
20}
21
22define float @test_v2f32(<2 x float> %a0) {
23; SSE2-LABEL: test_v2f32:
24; SSE2:       # %bb.0:
25; SSE2-NEXT:    movaps %xmm0, %xmm1
26; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
27; SSE2-NEXT:    minss %xmm1, %xmm0
28; SSE2-NEXT:    retq
29;
30; SSE41-LABEL: test_v2f32:
31; SSE41:       # %bb.0:
32; SSE41-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
33; SSE41-NEXT:    minss %xmm1, %xmm0
34; SSE41-NEXT:    retq
35;
36; AVX-LABEL: test_v2f32:
37; AVX:       # %bb.0:
38; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
39; AVX-NEXT:    vminss %xmm1, %xmm0, %xmm0
40; AVX-NEXT:    retq
41;
42; AVX512-LABEL: test_v2f32:
43; AVX512:       # %bb.0:
44; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
45; AVX512-NEXT:    vminss %xmm1, %xmm0, %xmm0
46; AVX512-NEXT:    retq
47  %1 = call nnan float @llvm.vector.reduce.fmin.v2f32(<2 x float> %a0)
48  ret float %1
49}
50
51define float @test_v3f32(<3 x float> %a0) {
52; SSE2-LABEL: test_v3f32:
53; SSE2:       # %bb.0:
54; SSE2-NEXT:    movaps %xmm0, %xmm2
55; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[1,1]
56; SSE2-NEXT:    movaps %xmm0, %xmm1
57; SSE2-NEXT:    minss %xmm2, %xmm1
58; SSE2-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
59; SSE2-NEXT:    minss %xmm0, %xmm1
60; SSE2-NEXT:    movaps %xmm1, %xmm0
61; SSE2-NEXT:    retq
62;
63; SSE41-LABEL: test_v3f32:
64; SSE41:       # %bb.0:
65; SSE41-NEXT:    movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
66; SSE41-NEXT:    movaps %xmm0, %xmm1
67; SSE41-NEXT:    minss %xmm2, %xmm1
68; SSE41-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
69; SSE41-NEXT:    minss %xmm0, %xmm1
70; SSE41-NEXT:    movaps %xmm1, %xmm0
71; SSE41-NEXT:    retq
72;
73; AVX-LABEL: test_v3f32:
74; AVX:       # %bb.0:
75; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
76; AVX-NEXT:    vminss %xmm1, %xmm0, %xmm1
77; AVX-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
78; AVX-NEXT:    vminss %xmm0, %xmm1, %xmm0
79; AVX-NEXT:    retq
80;
81; AVX512-LABEL: test_v3f32:
82; AVX512:       # %bb.0:
83; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
84; AVX512-NEXT:    vminss %xmm1, %xmm0, %xmm1
85; AVX512-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
86; AVX512-NEXT:    vminss %xmm0, %xmm1, %xmm0
87; AVX512-NEXT:    retq
88  %1 = call nnan float @llvm.vector.reduce.fmin.v3f32(<3 x float> %a0)
89  ret float %1
90}
91
92define float @test_v4f32(<4 x float> %a0) {
93; SSE2-LABEL: test_v4f32:
94; SSE2:       # %bb.0:
95; SSE2-NEXT:    movaps %xmm0, %xmm1
96; SSE2-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
97; SSE2-NEXT:    minps %xmm1, %xmm0
98; SSE2-NEXT:    movaps %xmm0, %xmm1
99; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
100; SSE2-NEXT:    minss %xmm1, %xmm0
101; SSE2-NEXT:    retq
102;
103; SSE41-LABEL: test_v4f32:
104; SSE41:       # %bb.0:
105; SSE41-NEXT:    movaps %xmm0, %xmm1
106; SSE41-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
107; SSE41-NEXT:    minps %xmm1, %xmm0
108; SSE41-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
109; SSE41-NEXT:    minss %xmm1, %xmm0
110; SSE41-NEXT:    retq
111;
112; AVX-LABEL: test_v4f32:
113; AVX:       # %bb.0:
114; AVX-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
115; AVX-NEXT:    vminps %xmm1, %xmm0, %xmm0
116; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
117; AVX-NEXT:    vminss %xmm1, %xmm0, %xmm0
118; AVX-NEXT:    retq
119;
120; AVX512-LABEL: test_v4f32:
121; AVX512:       # %bb.0:
122; AVX512-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
123; AVX512-NEXT:    vminps %xmm1, %xmm0, %xmm0
124; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
125; AVX512-NEXT:    vminss %xmm1, %xmm0, %xmm0
126; AVX512-NEXT:    retq
127  %1 = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a0)
128  ret float %1
129}
130
131define float @test_v8f32(<8 x float> %a0) {
132; SSE2-LABEL: test_v8f32:
133; SSE2:       # %bb.0:
134; SSE2-NEXT:    minps %xmm1, %xmm0
135; SSE2-NEXT:    movaps %xmm0, %xmm1
136; SSE2-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
137; SSE2-NEXT:    minps %xmm1, %xmm0
138; SSE2-NEXT:    movaps %xmm0, %xmm1
139; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
140; SSE2-NEXT:    minss %xmm1, %xmm0
141; SSE2-NEXT:    retq
142;
143; SSE41-LABEL: test_v8f32:
144; SSE41:       # %bb.0:
145; SSE41-NEXT:    minps %xmm1, %xmm0
146; SSE41-NEXT:    movaps %xmm0, %xmm1
147; SSE41-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
148; SSE41-NEXT:    minps %xmm1, %xmm0
149; SSE41-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
150; SSE41-NEXT:    minss %xmm1, %xmm0
151; SSE41-NEXT:    retq
152;
153; AVX-LABEL: test_v8f32:
154; AVX:       # %bb.0:
155; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
156; AVX-NEXT:    vminps %xmm1, %xmm0, %xmm0
157; AVX-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
158; AVX-NEXT:    vminps %xmm1, %xmm0, %xmm0
159; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
160; AVX-NEXT:    vminss %xmm1, %xmm0, %xmm0
161; AVX-NEXT:    vzeroupper
162; AVX-NEXT:    retq
163;
164; AVX512-LABEL: test_v8f32:
165; AVX512:       # %bb.0:
166; AVX512-NEXT:    vextractf128 $1, %ymm0, %xmm1
167; AVX512-NEXT:    vminps %xmm1, %xmm0, %xmm0
168; AVX512-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
169; AVX512-NEXT:    vminps %xmm1, %xmm0, %xmm0
170; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
171; AVX512-NEXT:    vminss %xmm1, %xmm0, %xmm0
172; AVX512-NEXT:    vzeroupper
173; AVX512-NEXT:    retq
174  %1 = call nnan float @llvm.vector.reduce.fmin.v8f32(<8 x float> %a0)
175  ret float %1
176}
177
178define float @test_v16f32(<16 x float> %a0) {
179; SSE2-LABEL: test_v16f32:
180; SSE2:       # %bb.0:
181; SSE2-NEXT:    minps %xmm3, %xmm1
182; SSE2-NEXT:    minps %xmm2, %xmm0
183; SSE2-NEXT:    minps %xmm1, %xmm0
184; SSE2-NEXT:    movaps %xmm0, %xmm1
185; SSE2-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
186; SSE2-NEXT:    minps %xmm1, %xmm0
187; SSE2-NEXT:    movaps %xmm0, %xmm1
188; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
189; SSE2-NEXT:    minss %xmm1, %xmm0
190; SSE2-NEXT:    retq
191;
192; SSE41-LABEL: test_v16f32:
193; SSE41:       # %bb.0:
194; SSE41-NEXT:    minps %xmm3, %xmm1
195; SSE41-NEXT:    minps %xmm2, %xmm0
196; SSE41-NEXT:    minps %xmm1, %xmm0
197; SSE41-NEXT:    movaps %xmm0, %xmm1
198; SSE41-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
199; SSE41-NEXT:    minps %xmm1, %xmm0
200; SSE41-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
201; SSE41-NEXT:    minss %xmm1, %xmm0
202; SSE41-NEXT:    retq
203;
204; AVX-LABEL: test_v16f32:
205; AVX:       # %bb.0:
206; AVX-NEXT:    vminps %ymm1, %ymm0, %ymm0
207; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
208; AVX-NEXT:    vminps %xmm1, %xmm0, %xmm0
209; AVX-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
210; AVX-NEXT:    vminps %xmm1, %xmm0, %xmm0
211; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
212; AVX-NEXT:    vminss %xmm1, %xmm0, %xmm0
213; AVX-NEXT:    vzeroupper
214; AVX-NEXT:    retq
215;
216; AVX512-LABEL: test_v16f32:
217; AVX512:       # %bb.0:
218; AVX512-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
219; AVX512-NEXT:    vminps %zmm1, %zmm0, %zmm0
220; AVX512-NEXT:    vextractf128 $1, %ymm0, %xmm1
221; AVX512-NEXT:    vminps %xmm1, %xmm0, %xmm0
222; AVX512-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
223; AVX512-NEXT:    vminps %xmm1, %xmm0, %xmm0
224; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
225; AVX512-NEXT:    vminss %xmm1, %xmm0, %xmm0
226; AVX512-NEXT:    vzeroupper
227; AVX512-NEXT:    retq
228  %1 = call nnan float @llvm.vector.reduce.fmin.v16f32(<16 x float> %a0)
229  ret float %1
230}
231
232;
233; vXf64
234;
235
236define double @test_v2f64(<2 x double> %a0) {
237; SSE-LABEL: test_v2f64:
238; SSE:       # %bb.0:
239; SSE-NEXT:    movapd %xmm0, %xmm1
240; SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
241; SSE-NEXT:    minsd %xmm1, %xmm0
242; SSE-NEXT:    retq
243;
244; AVX-LABEL: test_v2f64:
245; AVX:       # %bb.0:
246; AVX-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
247; AVX-NEXT:    vminsd %xmm1, %xmm0, %xmm0
248; AVX-NEXT:    retq
249;
250; AVX512-LABEL: test_v2f64:
251; AVX512:       # %bb.0:
252; AVX512-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
253; AVX512-NEXT:    vminsd %xmm1, %xmm0, %xmm0
254; AVX512-NEXT:    retq
255  %1 = call nnan double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a0)
256  ret double %1
257}
258
259define double @test_v4f64(<4 x double> %a0) {
260; SSE-LABEL: test_v4f64:
261; SSE:       # %bb.0:
262; SSE-NEXT:    minpd %xmm1, %xmm0
263; SSE-NEXT:    movapd %xmm0, %xmm1
264; SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
265; SSE-NEXT:    minsd %xmm1, %xmm0
266; SSE-NEXT:    retq
267;
268; AVX-LABEL: test_v4f64:
269; AVX:       # %bb.0:
270; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
271; AVX-NEXT:    vminpd %xmm1, %xmm0, %xmm0
272; AVX-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
273; AVX-NEXT:    vminsd %xmm1, %xmm0, %xmm0
274; AVX-NEXT:    vzeroupper
275; AVX-NEXT:    retq
276;
277; AVX512-LABEL: test_v4f64:
278; AVX512:       # %bb.0:
279; AVX512-NEXT:    vextractf128 $1, %ymm0, %xmm1
280; AVX512-NEXT:    vminpd %xmm1, %xmm0, %xmm0
281; AVX512-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
282; AVX512-NEXT:    vminsd %xmm1, %xmm0, %xmm0
283; AVX512-NEXT:    vzeroupper
284; AVX512-NEXT:    retq
285  %1 = call nnan double @llvm.vector.reduce.fmin.v4f64(<4 x double> %a0)
286  ret double %1
287}
288
289define double @test_v8f64(<8 x double> %a0) {
290; SSE-LABEL: test_v8f64:
291; SSE:       # %bb.0:
292; SSE-NEXT:    minpd %xmm3, %xmm1
293; SSE-NEXT:    minpd %xmm2, %xmm0
294; SSE-NEXT:    minpd %xmm1, %xmm0
295; SSE-NEXT:    movapd %xmm0, %xmm1
296; SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
297; SSE-NEXT:    minsd %xmm1, %xmm0
298; SSE-NEXT:    retq
299;
300; AVX-LABEL: test_v8f64:
301; AVX:       # %bb.0:
302; AVX-NEXT:    vminpd %ymm1, %ymm0, %ymm0
303; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
304; AVX-NEXT:    vminpd %xmm1, %xmm0, %xmm0
305; AVX-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
306; AVX-NEXT:    vminsd %xmm1, %xmm0, %xmm0
307; AVX-NEXT:    vzeroupper
308; AVX-NEXT:    retq
309;
310; AVX512-LABEL: test_v8f64:
311; AVX512:       # %bb.0:
312; AVX512-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
313; AVX512-NEXT:    vminpd %zmm1, %zmm0, %zmm0
314; AVX512-NEXT:    vextractf128 $1, %ymm0, %xmm1
315; AVX512-NEXT:    vminpd %xmm1, %xmm0, %xmm0
316; AVX512-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
317; AVX512-NEXT:    vminsd %xmm1, %xmm0, %xmm0
318; AVX512-NEXT:    vzeroupper
319; AVX512-NEXT:    retq
320  %1 = call nnan double @llvm.vector.reduce.fmin.v8f64(<8 x double> %a0)
321  ret double %1
322}
323
324define double @test_v16f64(<16 x double> %a0) {
325; SSE-LABEL: test_v16f64:
326; SSE:       # %bb.0:
327; SSE-NEXT:    minpd %xmm6, %xmm2
328; SSE-NEXT:    minpd %xmm4, %xmm0
329; SSE-NEXT:    minpd %xmm2, %xmm0
330; SSE-NEXT:    minpd %xmm7, %xmm3
331; SSE-NEXT:    minpd %xmm5, %xmm1
332; SSE-NEXT:    minpd %xmm3, %xmm1
333; SSE-NEXT:    minpd %xmm1, %xmm0
334; SSE-NEXT:    movapd %xmm0, %xmm1
335; SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
336; SSE-NEXT:    minsd %xmm1, %xmm0
337; SSE-NEXT:    retq
338;
339; AVX-LABEL: test_v16f64:
340; AVX:       # %bb.0:
341; AVX-NEXT:    vminpd %ymm3, %ymm1, %ymm1
342; AVX-NEXT:    vminpd %ymm2, %ymm0, %ymm0
343; AVX-NEXT:    vminpd %ymm1, %ymm0, %ymm0
344; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
345; AVX-NEXT:    vminpd %xmm1, %xmm0, %xmm0
346; AVX-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
347; AVX-NEXT:    vminsd %xmm1, %xmm0, %xmm0
348; AVX-NEXT:    vzeroupper
349; AVX-NEXT:    retq
350;
351; AVX512-LABEL: test_v16f64:
352; AVX512:       # %bb.0:
353; AVX512-NEXT:    vminpd %zmm1, %zmm0, %zmm0
354; AVX512-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
355; AVX512-NEXT:    vminpd %zmm1, %zmm0, %zmm0
356; AVX512-NEXT:    vextractf128 $1, %ymm0, %xmm1
357; AVX512-NEXT:    vminpd %xmm1, %xmm0, %xmm0
358; AVX512-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
359; AVX512-NEXT:    vminsd %xmm1, %xmm0, %xmm0
360; AVX512-NEXT:    vzeroupper
361; AVX512-NEXT:    retq
362  %1 = call nnan double @llvm.vector.reduce.fmin.v16f64(<16 x double> %a0)
363  ret double %1
364}
365
366define half @test_v2f16(<2 x half> %a0) nounwind {
367; SSE-LABEL: test_v2f16:
368; SSE:       # %bb.0:
369; SSE-NEXT:    pushq %rbp
370; SSE-NEXT:    pushq %rbx
371; SSE-NEXT:    subq $40, %rsp
372; SSE-NEXT:    movdqa %xmm0, %xmm1
373; SSE-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
374; SSE-NEXT:    psrld $16, %xmm0
375; SSE-NEXT:    pextrw $0, %xmm0, %ebx
376; SSE-NEXT:    pextrw $0, %xmm1, %ebp
377; SSE-NEXT:    callq __extendhfsf2@PLT
378; SSE-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
379; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
380; SSE-NEXT:    callq __extendhfsf2@PLT
381; SSE-NEXT:    ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
382; SSE-NEXT:    cmovbl %ebp, %ebx
383; SSE-NEXT:    pinsrw $0, %ebx, %xmm0
384; SSE-NEXT:    addq $40, %rsp
385; SSE-NEXT:    popq %rbx
386; SSE-NEXT:    popq %rbp
387; SSE-NEXT:    retq
388;
389; AVX-LABEL: test_v2f16:
390; AVX:       # %bb.0:
391; AVX-NEXT:    pushq %rbp
392; AVX-NEXT:    pushq %rbx
393; AVX-NEXT:    subq $40, %rsp
394; AVX-NEXT:    vmovdqa %xmm0, %xmm1
395; AVX-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
396; AVX-NEXT:    vpsrld $16, %xmm0, %xmm0
397; AVX-NEXT:    vpextrw $0, %xmm0, %ebx
398; AVX-NEXT:    vpextrw $0, %xmm1, %ebp
399; AVX-NEXT:    callq __extendhfsf2@PLT
400; AVX-NEXT:    vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
401; AVX-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
402; AVX-NEXT:    callq __extendhfsf2@PLT
403; AVX-NEXT:    vucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
404; AVX-NEXT:    cmovbl %ebp, %ebx
405; AVX-NEXT:    vpinsrw $0, %ebx, %xmm0, %xmm0
406; AVX-NEXT:    addq $40, %rsp
407; AVX-NEXT:    popq %rbx
408; AVX-NEXT:    popq %rbp
409; AVX-NEXT:    retq
410;
411; AVX512F-LABEL: test_v2f16:
412; AVX512F:       # %bb.0:
413; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
414; AVX512F-NEXT:    vpsrld $16, %xmm0, %xmm1
415; AVX512F-NEXT:    vcvtph2ps %xmm0, %xmm2
416; AVX512F-NEXT:    vcvtph2ps %xmm1, %xmm3
417; AVX512F-NEXT:    xorl %eax, %eax
418; AVX512F-NEXT:    vucomiss %xmm3, %xmm2
419; AVX512F-NEXT:    sbbl %eax, %eax
420; AVX512F-NEXT:    kmovd %eax, %k1
421; AVX512F-NEXT:    vmovdqu16 %zmm0, %zmm1 {%k1}
422; AVX512F-NEXT:    vmovdqa %xmm1, %xmm0
423; AVX512F-NEXT:    vzeroupper
424; AVX512F-NEXT:    retq
425;
426; AVX512VL-LABEL: test_v2f16:
427; AVX512VL:       # %bb.0:
428; AVX512VL-NEXT:    vpsrld $16, %xmm0, %xmm1
429; AVX512VL-NEXT:    vcvtph2ps %xmm0, %ymm2
430; AVX512VL-NEXT:    vcvtph2ps %xmm1, %ymm3
431; AVX512VL-NEXT:    vcmpltps %ymm3, %ymm2, %k1
432; AVX512VL-NEXT:    vmovdqu16 %xmm0, %xmm1 {%k1}
433; AVX512VL-NEXT:    vmovdqa %xmm1, %xmm0
434; AVX512VL-NEXT:    vzeroupper
435; AVX512VL-NEXT:    retq
436;
437; AVX512FP16-LABEL: test_v2f16:
438; AVX512FP16:       # %bb.0:
439; AVX512FP16-NEXT:    vpsrld $16, %xmm0, %xmm1
440; AVX512FP16-NEXT:    vcmpltph %xmm1, %xmm0, %k1
441; AVX512FP16-NEXT:    vmovsh %xmm0, %xmm0, %xmm1 {%k1}
442; AVX512FP16-NEXT:    vmovaps %xmm1, %xmm0
443; AVX512FP16-NEXT:    retq
444  %1 = call nnan half @llvm.vector.reduce.fmin.v2f16(<2 x half> %a0)
445  ret half %1
446}
447
448declare float @llvm.vector.reduce.fmin.v1f32(<1 x float>)
449declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>)
450declare float @llvm.vector.reduce.fmin.v3f32(<3 x float>)
451declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
452declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>)
453declare float @llvm.vector.reduce.fmin.v16f32(<16 x float>)
454
455declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)
456declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>)
457declare double @llvm.vector.reduce.fmin.v8f64(<8 x double>)
458declare double @llvm.vector.reduce.fmin.v16f64(<16 x double>)
459
460declare half @llvm.vector.reduce.fmin.v2f16(<2 x half>)
461