xref: /llvm-project/llvm/test/CodeGen/X86/vec-strict-fptoint-128-fp16.ll (revision 2379949aadcee8d4028dec0508f88bda290636bc)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512fp16,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK
4
5declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f16(<2 x half>, metadata)
6declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f16(<2 x half>, metadata)
7declare <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f16(<2 x half>, metadata)
8declare <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f16(<2 x half>, metadata)
9declare <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f16(<2 x half>, metadata)
10declare <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f16(<2 x half>, metadata)
11declare <2 x i8> @llvm.experimental.constrained.fptosi.v2i8.v2f16(<2 x half>, metadata)
12declare <2 x i8> @llvm.experimental.constrained.fptoui.v2i8.v2f16(<2 x half>, metadata)
13declare <2 x i1> @llvm.experimental.constrained.fptosi.v2i1.v2f16(<2 x half>, metadata)
14declare <2 x i1> @llvm.experimental.constrained.fptoui.v2i1.v2f16(<2 x half>, metadata)
15declare <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f16(<4 x half>, metadata)
16declare <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f16(<4 x half>, metadata)
17declare <4 x i16> @llvm.experimental.constrained.fptosi.v4i16.v4f16(<4 x half>, metadata)
18declare <4 x i16> @llvm.experimental.constrained.fptoui.v4i16.v4f16(<4 x half>, metadata)
19declare <4 x i8> @llvm.experimental.constrained.fptosi.v4i8.v4f16(<4 x half>, metadata)
20declare <4 x i8> @llvm.experimental.constrained.fptoui.v4i8.v4f16(<4 x half>, metadata)
21declare <4 x i1> @llvm.experimental.constrained.fptosi.v4i1.v4f16(<4 x half>, metadata)
22declare <4 x i1> @llvm.experimental.constrained.fptoui.v4i1.v4f16(<4 x half>, metadata)
23declare <8 x i16> @llvm.experimental.constrained.fptosi.v8i16.v8f16(<8 x half>, metadata)
24declare <8 x i16> @llvm.experimental.constrained.fptoui.v8i16.v8f16(<8 x half>, metadata)
25declare <8 x i8> @llvm.experimental.constrained.fptosi.v8i8.v8f16(<8 x half>, metadata)
26declare <8 x i8> @llvm.experimental.constrained.fptoui.v8i8.v8f16(<8 x half>, metadata)
27declare <8 x i1> @llvm.experimental.constrained.fptosi.v8i1.v8f16(<8 x half>, metadata)
28declare <8 x i1> @llvm.experimental.constrained.fptoui.v8i1.v8f16(<8 x half>, metadata)
29
30define <2 x i64> @strict_vector_fptosi_v2f16_to_v2i64(<2 x half> %a) #0 {
31; CHECK-LABEL: strict_vector_fptosi_v2f16_to_v2i64:
32; CHECK:       # %bb.0:
33; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
34; CHECK-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
35; CHECK-NEXT:    vcvttph2qq %xmm0, %xmm0
36; CHECK-NEXT:    ret{{[l|q]}}
37  %ret = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f16(<2 x half> %a,
38                                              metadata !"fpexcept.strict") #0
39  ret <2 x i64> %ret
40}
41
42define <2 x i64> @strict_vector_fptoui_v2f16_to_v2i64(<2 x half> %a) #0 {
43; CHECK-LABEL: strict_vector_fptoui_v2f16_to_v2i64:
44; CHECK:       # %bb.0:
45; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
46; CHECK-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
47; CHECK-NEXT:    vcvttph2uqq %xmm0, %xmm0
48; CHECK-NEXT:    ret{{[l|q]}}
49  %ret = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f16(<2 x half> %a,
50                                              metadata !"fpexcept.strict") #0
51  ret <2 x i64> %ret
52}
53
54define <2 x i32> @strict_vector_fptosi_v2f16_to_v2i32(<2 x half> %a) #0 {
55; CHECK-LABEL: strict_vector_fptosi_v2f16_to_v2i32:
56; CHECK:       # %bb.0:
57; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
58; CHECK-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
59; CHECK-NEXT:    vcvttph2dq %xmm0, %xmm0
60; CHECK-NEXT:    ret{{[l|q]}}
61  %ret = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f16(<2 x half> %a,
62                                              metadata !"fpexcept.strict") #0
63  ret <2 x i32> %ret
64}
65
66define <2 x i32> @strict_vector_fptoui_v2f16_to_v2i32(<2 x half> %a) #0 {
67; CHECK-LABEL: strict_vector_fptoui_v2f16_to_v2i32:
68; CHECK:       # %bb.0:
69; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
70; CHECK-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
71; CHECK-NEXT:    vcvttph2udq %xmm0, %xmm0
72; CHECK-NEXT:    ret{{[l|q]}}
73  %ret = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f16(<2 x half> %a,
74                                              metadata !"fpexcept.strict") #0
75  ret <2 x i32> %ret
76}
77
78define <2 x i16> @strict_vector_fptosi_v2f16_to_v2i16(<2 x half> %a) #0 {
79; CHECK-LABEL: strict_vector_fptosi_v2f16_to_v2i16:
80; CHECK:       # %bb.0:
81; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
82; CHECK-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
83; CHECK-NEXT:    vcvttph2w %xmm0, %xmm0
84; CHECK-NEXT:    ret{{[l|q]}}
85  %ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f16(<2 x half> %a,
86                                              metadata !"fpexcept.strict") #0
87  ret <2 x i16> %ret
88}
89
90define <2 x i16> @strict_vector_fptoui_v2f16_to_v2i16(<2 x half> %a) #0 {
91; CHECK-LABEL: strict_vector_fptoui_v2f16_to_v2i16:
92; CHECK:       # %bb.0:
93; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
94; CHECK-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
95; CHECK-NEXT:    vcvttph2uw %xmm0, %xmm0
96; CHECK-NEXT:    ret{{[l|q]}}
97  %ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f16(<2 x half> %a,
98                                              metadata !"fpexcept.strict") #0
99  ret <2 x i16> %ret
100}
101
102define <2 x i8> @strict_vector_fptosi_v2f16_to_v2i8(<2 x half> %a) #0 {
103; CHECK-LABEL: strict_vector_fptosi_v2f16_to_v2i8:
104; CHECK:       # %bb.0:
105; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
106; CHECK-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
107; CHECK-NEXT:    vcvttph2w %xmm0, %xmm0
108; CHECK-NEXT:    vpmovwb %xmm0, %xmm0
109; CHECK-NEXT:    ret{{[l|q]}}
110  %ret = call <2 x i8> @llvm.experimental.constrained.fptosi.v2i8.v2f16(<2 x half> %a,
111                                              metadata !"fpexcept.strict") #0
112  ret <2 x i8> %ret
113}
114
115define <2 x i8> @strict_vector_fptoui_v2f16_to_v2i8(<2 x half> %a) #0 {
116; CHECK-LABEL: strict_vector_fptoui_v2f16_to_v2i8:
117; CHECK:       # %bb.0:
118; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
119; CHECK-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
120; CHECK-NEXT:    vcvttph2uw %xmm0, %xmm0
121; CHECK-NEXT:    vpmovwb %xmm0, %xmm0
122; CHECK-NEXT:    ret{{[l|q]}}
123  %ret = call <2 x i8> @llvm.experimental.constrained.fptoui.v2i8.v2f16(<2 x half> %a,
124                                              metadata !"fpexcept.strict") #0
125  ret <2 x i8> %ret
126}
127
128define <2 x i1> @strict_vector_fptosi_v2f16_to_v2i1(<2 x half> %a) #0 {
129; CHECK-LABEL: strict_vector_fptosi_v2f16_to_v2i1:
130; CHECK:       # %bb.0:
131; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
132; CHECK-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
133; CHECK-NEXT:    vcvttph2w %xmm0, %xmm0
134; CHECK-NEXT:    vpsllw $15, %xmm0, %xmm0
135; CHECK-NEXT:    vpmovw2m %xmm0, %k0
136; CHECK-NEXT:    vpmovm2q %k0, %xmm0
137; CHECK-NEXT:    ret{{[l|q]}}
138  %ret = call <2 x i1> @llvm.experimental.constrained.fptosi.v2i1.v2f16(<2 x half> %a,
139                                              metadata !"fpexcept.strict") #0
140  ret <2 x i1> %ret
141}
142
143define <2 x i1> @strict_vector_fptoui_v2f16_to_v2i1(<2 x half> %a) #0 {
144; CHECK-LABEL: strict_vector_fptoui_v2f16_to_v2i1:
145; CHECK:       # %bb.0:
146; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
147; CHECK-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
148; CHECK-NEXT:    vcvttph2uw %xmm0, %xmm0
149; CHECK-NEXT:    vpsllw $15, %xmm0, %xmm0
150; CHECK-NEXT:    vpmovw2m %xmm0, %k0
151; CHECK-NEXT:    vpmovm2q %k0, %xmm0
152; CHECK-NEXT:    ret{{[l|q]}}
153  %ret = call <2 x i1> @llvm.experimental.constrained.fptoui.v2i1.v2f16(<2 x half> %a,
154                                              metadata !"fpexcept.strict") #0
155  ret <2 x i1> %ret
156}
157
158define <4 x i32> @strict_vector_fptosi_v4f16_to_v4i32(<4 x half> %a) #0 {
159; CHECK-LABEL: strict_vector_fptosi_v4f16_to_v4i32:
160; CHECK:       # %bb.0:
161; CHECK-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
162; CHECK-NEXT:    vcvttph2dq %xmm0, %xmm0
163; CHECK-NEXT:    ret{{[l|q]}}
164  %ret = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f16(<4 x half> %a,
165                                              metadata !"fpexcept.strict") #0
166  ret <4 x i32> %ret
167}
168
169define <4 x i32> @strict_vector_fptoui_v4f16_to_v4i32(<4 x half> %a) #0 {
170; CHECK-LABEL: strict_vector_fptoui_v4f16_to_v4i32:
171; CHECK:       # %bb.0:
172; CHECK-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
173; CHECK-NEXT:    vcvttph2udq %xmm0, %xmm0
174; CHECK-NEXT:    ret{{[l|q]}}
175  %ret = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f16(<4 x half> %a,
176                                              metadata !"fpexcept.strict") #0
177  ret <4 x i32> %ret
178}
179
180define <4 x i16> @strict_vector_fptosi_v4f16_to_v4i16(<4 x half> %a) #0 {
181; CHECK-LABEL: strict_vector_fptosi_v4f16_to_v4i16:
182; CHECK:       # %bb.0:
183; CHECK-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
184; CHECK-NEXT:    vcvttph2w %xmm0, %xmm0
185; CHECK-NEXT:    ret{{[l|q]}}
186  %ret = call <4 x i16> @llvm.experimental.constrained.fptosi.v4i16.v4f16(<4 x half> %a,
187                                              metadata !"fpexcept.strict") #0
188  ret <4 x i16> %ret
189}
190
191define <4 x i16> @strict_vector_fptoui_v4f16_to_v4i16(<4 x half> %a) #0 {
192; CHECK-LABEL: strict_vector_fptoui_v4f16_to_v4i16:
193; CHECK:       # %bb.0:
194; CHECK-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
195; CHECK-NEXT:    vcvttph2uw %xmm0, %xmm0
196; CHECK-NEXT:    ret{{[l|q]}}
197  %ret = call <4 x i16> @llvm.experimental.constrained.fptoui.v4i16.v4f16(<4 x half> %a,
198                                              metadata !"fpexcept.strict") #0
199  ret <4 x i16> %ret
200}
201
202define <4 x i8> @strict_vector_fptosi_v4f16_to_v4i8(<4 x half> %a) #0 {
203; CHECK-LABEL: strict_vector_fptosi_v4f16_to_v4i8:
204; CHECK:       # %bb.0:
205; CHECK-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
206; CHECK-NEXT:    vcvttph2w %xmm0, %xmm0
207; CHECK-NEXT:    vpmovwb %xmm0, %xmm0
208; CHECK-NEXT:    ret{{[l|q]}}
209  %ret = call <4 x i8> @llvm.experimental.constrained.fptosi.v4i8.v4f16(<4 x half> %a,
210                                              metadata !"fpexcept.strict") #0
211  ret <4 x i8> %ret
212}
213
214define <4 x i8> @strict_vector_fptoui_v4f16_to_v4i8(<4 x half> %a) #0 {
215; CHECK-LABEL: strict_vector_fptoui_v4f16_to_v4i8:
216; CHECK:       # %bb.0:
217; CHECK-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
218; CHECK-NEXT:    vcvttph2uw %xmm0, %xmm0
219; CHECK-NEXT:    vpmovwb %xmm0, %xmm0
220; CHECK-NEXT:    ret{{[l|q]}}
221  %ret = call <4 x i8> @llvm.experimental.constrained.fptoui.v4i8.v4f16(<4 x half> %a,
222                                              metadata !"fpexcept.strict") #0
223  ret <4 x i8> %ret
224}
225
226define <4 x i1> @strict_vector_fptosi_v4f16_to_v4i1(<4 x half> %a) #0 {
227; CHECK-LABEL: strict_vector_fptosi_v4f16_to_v4i1:
228; CHECK:       # %bb.0:
229; CHECK-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
230; CHECK-NEXT:    vcvttph2w %xmm0, %xmm0
231; CHECK-NEXT:    vpsllw $15, %xmm0, %xmm0
232; CHECK-NEXT:    vpmovw2m %xmm0, %k0
233; CHECK-NEXT:    vpmovm2d %k0, %xmm0
234; CHECK-NEXT:    ret{{[l|q]}}
235  %ret = call <4 x i1> @llvm.experimental.constrained.fptosi.v4i1.v4f16(<4 x half> %a,
236                                              metadata !"fpexcept.strict") #0
237  ret <4 x i1> %ret
238}
239
240define <4 x i1> @strict_vector_fptoui_v4f16_to_v4i1(<4 x half> %a) #0 {
241; CHECK-LABEL: strict_vector_fptoui_v4f16_to_v4i1:
242; CHECK:       # %bb.0:
243; CHECK-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
244; CHECK-NEXT:    vcvttph2uw %xmm0, %xmm0
245; CHECK-NEXT:    vpsllw $15, %xmm0, %xmm0
246; CHECK-NEXT:    vpmovw2m %xmm0, %k0
247; CHECK-NEXT:    vpmovm2d %k0, %xmm0
248; CHECK-NEXT:    ret{{[l|q]}}
249  %ret = call <4 x i1> @llvm.experimental.constrained.fptoui.v4i1.v4f16(<4 x half> %a,
250                                              metadata !"fpexcept.strict") #0
251  ret <4 x i1> %ret
252}
253
254define <8 x i16> @strict_vector_fptosi_v8f16_to_v8i16(<8 x half> %a) #0 {
255; CHECK-LABEL: strict_vector_fptosi_v8f16_to_v8i16:
256; CHECK:       # %bb.0:
257; CHECK-NEXT:    vcvttph2w %xmm0, %xmm0
258; CHECK-NEXT:    ret{{[l|q]}}
259  %ret = call <8 x i16> @llvm.experimental.constrained.fptosi.v8i16.v8f16(<8 x half> %a,
260                                              metadata !"fpexcept.strict") #0
261  ret <8 x i16> %ret
262}
263
264define <8 x i16> @strict_vector_fptoui_v8f16_to_v8i16(<8 x half> %a) #0 {
265; CHECK-LABEL: strict_vector_fptoui_v8f16_to_v8i16:
266; CHECK:       # %bb.0:
267; CHECK-NEXT:    vcvttph2uw %xmm0, %xmm0
268; CHECK-NEXT:    ret{{[l|q]}}
269  %ret = call <8 x i16> @llvm.experimental.constrained.fptoui.v8i16.v8f16(<8 x half> %a,
270                                              metadata !"fpexcept.strict") #0
271  ret <8 x i16> %ret
272}
273
274define <8 x i8> @strict_vector_fptosi_v8f16_to_v8i8(<8 x half> %a) #0 {
275; CHECK-LABEL: strict_vector_fptosi_v8f16_to_v8i8:
276; CHECK:       # %bb.0:
277; CHECK-NEXT:    vcvttph2w %xmm0, %xmm0
278; CHECK-NEXT:    vpmovwb %xmm0, %xmm0
279; CHECK-NEXT:    ret{{[l|q]}}
280  %ret = call <8 x i8> @llvm.experimental.constrained.fptosi.v8i8.v8f16(<8 x half> %a,
281                                              metadata !"fpexcept.strict") #0
282  ret <8 x i8> %ret
283}
284
285define <8 x i8> @strict_vector_fptoui_v8f16_to_v8i8(<8 x half> %a) #0 {
286; CHECK-LABEL: strict_vector_fptoui_v8f16_to_v8i8:
287; CHECK:       # %bb.0:
288; CHECK-NEXT:    vcvttph2uw %xmm0, %xmm0
289; CHECK-NEXT:    vpmovwb %xmm0, %xmm0
290; CHECK-NEXT:    ret{{[l|q]}}
291  %ret = call <8 x i8> @llvm.experimental.constrained.fptoui.v8i8.v8f16(<8 x half> %a,
292                                              metadata !"fpexcept.strict") #0
293  ret <8 x i8> %ret
294}
295
296define <8 x i1> @strict_vector_fptosi_v8f16_to_v8i1(<8 x half> %a) #0 {
297; CHECK-LABEL: strict_vector_fptosi_v8f16_to_v8i1:
298; CHECK:       # %bb.0:
299; CHECK-NEXT:    vcvttph2dq %xmm0, %ymm0
300; CHECK-NEXT:    vpmovd2m %ymm0, %k0
301; CHECK-NEXT:    vpmovm2w %k0, %xmm0
302; CHECK-NEXT:    vzeroupper
303; CHECK-NEXT:    ret{{[l|q]}}
304  %ret = call <8 x i1> @llvm.experimental.constrained.fptosi.v8i1.v8f16(<8 x half> %a,
305                                              metadata !"fpexcept.strict") #0
306  ret <8 x i1> %ret
307}
308
309define <8 x i1> @strict_vector_fptoui_v8f16_to_v8i1(<8 x half> %a) #0 {
310; CHECK-LABEL: strict_vector_fptoui_v8f16_to_v8i1:
311; CHECK:       # %bb.0:
312; CHECK-NEXT:    vcvttph2dq %xmm0, %ymm0
313; CHECK-NEXT:    vpslld $31, %ymm0, %ymm0
314; CHECK-NEXT:    vpmovd2m %ymm0, %k0
315; CHECK-NEXT:    vpmovm2w %k0, %xmm0
316; CHECK-NEXT:    vzeroupper
317; CHECK-NEXT:    ret{{[l|q]}}
318  %ret = call <8 x i1> @llvm.experimental.constrained.fptoui.v8i1.v8f16(<8 x half> %a,
319                                              metadata !"fpexcept.strict") #0
320  ret <8 x i1> %ret
321}
322
323attributes #0 = { strictfp }
324