xref: /llvm-project/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll (revision cda2b01df708cc4b5448fa1bdb63ca5e15251545)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unkown-unkown -mattr=+avx512bw -mattr=+avx512vl -mattr=+avx512fp16 | FileCheck %s
3
4define <16 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_256(<16 x i16> %arg0, <16 x half> %arg1, i16 %mask) {
5; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_256:
6; CHECK:       # %bb.0:
7; CHECK-NEXT:    kmovd %edi, %k1
8; CHECK-NEXT:    vcvtw2ph %ymm0, %ymm1 {%k1}
9; CHECK-NEXT:    vmovaps %ymm1, %ymm0
10; CHECK-NEXT:    retq
11  %msk = bitcast i16 %mask to <16 x i1>
12  %res0 = sitofp <16 x i16> %arg0 to <16 x half>
13  %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1
14  ret <16 x half> %res
15}
16
17define <16 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_256_b(ptr %arg0, <16 x half> %arg1, i16 %mask) {
18; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_256_b:
19; CHECK:       # %bb.0:
20; CHECK-NEXT:    kmovd %esi, %k1
21; CHECK-NEXT:    vcvtw2ph (%rdi){1to16}, %ymm0 {%k1}
22; CHECK-NEXT:    retq
23  %msk = bitcast i16 %mask to <16 x i1>
24  %scalar = load i16, ptr %arg0
25  %scalar_in_vector = insertelement <16 x i16> undef, i16 %scalar, i32 0
26  %val = shufflevector <16 x i16> %scalar_in_vector, <16 x i16> undef, <16 x i32> zeroinitializer
27  %res0 = sitofp <16 x i16> %val to <16 x half>
28  %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1
29  ret <16 x half> %res
30}
31
32define <16 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_256_nomask(<16 x i16> %arg0, <16 x half> %arg1) {
33; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_256_nomask:
34; CHECK:       # %bb.0:
35; CHECK-NEXT:    vcvtw2ph %ymm0, %ymm0
36; CHECK-NEXT:    retq
37  %res = sitofp <16 x i16> %arg0 to <16 x half>
38  ret <16 x half> %res
39}
40
41define <16 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_256_z(<16 x i16> %arg0, i16 %mask) {
42; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_256_z:
43; CHECK:       # %bb.0:
44; CHECK-NEXT:    kmovd %edi, %k1
45; CHECK-NEXT:    vcvtw2ph %ymm0, %ymm0 {%k1} {z}
46; CHECK-NEXT:    retq
47  %msk = bitcast i16 %mask to <16 x i1>
48  %res0 = sitofp <16 x i16> %arg0 to <16 x half>
49  %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer
50  ret <16 x half> %res
51}
52
53define <16 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_256_load(ptr %arg0, <16 x half> %arg1, i16 %mask) {
54; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_256_load:
55; CHECK:       # %bb.0:
56; CHECK-NEXT:    kmovd %esi, %k1
57; CHECK-NEXT:    vcvtw2ph (%rdi), %ymm0 {%k1}
58; CHECK-NEXT:    retq
59  %msk = bitcast i16 %mask to <16 x i1>
60  %val = load <16 x i16>, ptr %arg0
61  %res0 = sitofp <16 x i16> %val to <16 x half>
62  %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1
63  ret <16 x half> %res
64}
65
66declare <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half>, <16 x i16>, i16)
67
68define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask) {
69; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_256:
70; CHECK:       # %bb.0:
71; CHECK-NEXT:    kmovd %edi, %k1
72; CHECK-NEXT:    vcvtph2w %ymm0, %ymm1 {%k1}
73; CHECK-NEXT:    vmovaps %ymm1, %ymm0
74; CHECK-NEXT:    retq
75  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask)
76  ret <16 x i16> %res
77}
78
79define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_256_b(ptr %arg0, <16 x i16> %arg1, i16 %mask) {
80; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_256_b:
81; CHECK:       # %bb.0:
82; CHECK-NEXT:    kmovd %esi, %k1
83; CHECK-NEXT:    vcvtph2w (%rdi){1to16}, %ymm0 {%k1}
84; CHECK-NEXT:    retq
85  %scalar = load half, ptr %arg0
86  %scalar_in_vector = insertelement <16 x half> undef, half %scalar, i32 0
87  %val = shufflevector <16 x half> %scalar_in_vector, <16 x half> undef, <16 x i32> zeroinitializer
88  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
89  ret <16 x i16> %res
90}
91
92define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_256_nomask(<16 x half> %arg0, <16 x i16> %arg1) {
93; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_256_nomask:
94; CHECK:       # %bb.0:
95; CHECK-NEXT:    vcvtph2w %ymm0, %ymm0
96; CHECK-NEXT:    retq
97  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half> %arg0, <16 x i16> %arg1, i16 -1)
98  ret <16 x i16> %res
99}
100
101define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_256_z(<16 x half> %arg0, i16 %mask) {
102; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_256_z:
103; CHECK:       # %bb.0:
104; CHECK-NEXT:    kmovd %edi, %k1
105; CHECK-NEXT:    vcvtph2w %ymm0, %ymm0 {%k1} {z}
106; CHECK-NEXT:    retq
107  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half> %arg0, <16 x i16> zeroinitializer, i16 %mask)
108  ret <16 x i16> %res
109}
110
111define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_256_load(ptr %arg0, <16 x i16> %arg1, i16 %mask) {
112; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_256_load:
113; CHECK:       # %bb.0:
114; CHECK-NEXT:    kmovd %esi, %k1
115; CHECK-NEXT:    vcvtph2w (%rdi), %ymm0 {%k1}
116; CHECK-NEXT:    retq
117  %val = load <16 x half>, ptr %arg0
118  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
119  ret <16 x i16> %res
120}
121
122define <16 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_256(<16 x i16> %arg0, <16 x half> %arg1, i16 %mask) {
123; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_256:
124; CHECK:       # %bb.0:
125; CHECK-NEXT:    kmovd %edi, %k1
126; CHECK-NEXT:    vcvtuw2ph %ymm0, %ymm1 {%k1}
127; CHECK-NEXT:    vmovaps %ymm1, %ymm0
128; CHECK-NEXT:    retq
129  %msk = bitcast i16 %mask to <16 x i1>
130  %res0 = uitofp <16 x i16> %arg0 to <16 x half>
131  %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1
132  ret <16 x half> %res
133}
134
135define <16 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_256_b(ptr %arg0, <16 x half> %arg1, i16 %mask) {
136; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_256_b:
137; CHECK:       # %bb.0:
138; CHECK-NEXT:    kmovd %esi, %k1
139; CHECK-NEXT:    vcvtuw2ph (%rdi){1to16}, %ymm0 {%k1}
140; CHECK-NEXT:    retq
141  %msk = bitcast i16 %mask to <16 x i1>
142  %scalar = load i16, ptr %arg0
143  %scalar_in_vector = insertelement <16 x i16> undef, i16 %scalar, i32 0
144  %val = shufflevector <16 x i16> %scalar_in_vector, <16 x i16> undef, <16 x i32> zeroinitializer
145  %res0 = uitofp <16 x i16> %val to <16 x half>
146  %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1
147  ret <16 x half> %res
148}
149
150define <16 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_256_nomask(<16 x i16> %arg0, <16 x half> %arg1) {
151; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_256_nomask:
152; CHECK:       # %bb.0:
153; CHECK-NEXT:    vcvtuw2ph %ymm0, %ymm0
154; CHECK-NEXT:    retq
155  %res = uitofp <16 x i16> %arg0 to <16 x half>
156  ret <16 x half> %res
157}
158
159define <16 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_256_z(<16 x i16> %arg0, i16 %mask) {
160; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_256_z:
161; CHECK:       # %bb.0:
162; CHECK-NEXT:    kmovd %edi, %k1
163; CHECK-NEXT:    vcvtuw2ph %ymm0, %ymm0 {%k1} {z}
164; CHECK-NEXT:    retq
165  %msk = bitcast i16 %mask to <16 x i1>
166  %res0 = uitofp <16 x i16> %arg0 to <16 x half>
167  %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer
168  ret <16 x half> %res
169}
170
171define <16 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_256_load(ptr %arg0, <16 x half> %arg1, i16 %mask) {
172; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_256_load:
173; CHECK:       # %bb.0:
174; CHECK-NEXT:    kmovd %esi, %k1
175; CHECK-NEXT:    vcvtuw2ph (%rdi), %ymm0 {%k1}
176; CHECK-NEXT:    retq
177  %msk = bitcast i16 %mask to <16 x i1>
178  %val = load <16 x i16>, ptr %arg0
179  %res0 = uitofp <16 x i16> %val to <16 x half>
180  %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1
181  ret <16 x half> %res
182}
183
184declare <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half>, <16 x i16>, i16)
185
186define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask) {
187; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_256:
188; CHECK:       # %bb.0:
189; CHECK-NEXT:    kmovd %edi, %k1
190; CHECK-NEXT:    vcvtph2uw %ymm0, %ymm1 {%k1}
191; CHECK-NEXT:    vmovaps %ymm1, %ymm0
192; CHECK-NEXT:    retq
193  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask)
194  ret <16 x i16> %res
195}
196
197define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_256_b(ptr %arg0, <16 x i16> %arg1, i16 %mask) {
198; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_256_b:
199; CHECK:       # %bb.0:
200; CHECK-NEXT:    kmovd %esi, %k1
201; CHECK-NEXT:    vcvtph2uw (%rdi){1to16}, %ymm0 {%k1}
202; CHECK-NEXT:    retq
203  %scalar = load half, ptr %arg0
204  %scalar_in_vector = insertelement <16 x half> undef, half %scalar, i32 0
205  %val = shufflevector <16 x half> %scalar_in_vector, <16 x half> undef, <16 x i32> zeroinitializer
206  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
207  ret <16 x i16> %res
208}
209
210define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_256_nomask(<16 x half> %arg0, <16 x i16> %arg1) {
211; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_256_nomask:
212; CHECK:       # %bb.0:
213; CHECK-NEXT:    vcvtph2uw %ymm0, %ymm0
214; CHECK-NEXT:    retq
215  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half> %arg0, <16 x i16> %arg1, i16 -1)
216  ret <16 x i16> %res
217}
218
219define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_256_z(<16 x half> %arg0, i16 %mask) {
220; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_256_z:
221; CHECK:       # %bb.0:
222; CHECK-NEXT:    kmovd %edi, %k1
223; CHECK-NEXT:    vcvtph2uw %ymm0, %ymm0 {%k1} {z}
224; CHECK-NEXT:    retq
225  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half> %arg0, <16 x i16> zeroinitializer, i16 %mask)
226  ret <16 x i16> %res
227}
228
229define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_256_load(ptr %arg0, <16 x i16> %arg1, i16 %mask) {
230; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_256_load:
231; CHECK:       # %bb.0:
232; CHECK-NEXT:    kmovd %esi, %k1
233; CHECK-NEXT:    vcvtph2uw (%rdi), %ymm0 {%k1}
234; CHECK-NEXT:    retq
235  %val = load <16 x half>, ptr %arg0
236  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
237  ret <16 x i16> %res
238}
239
240declare <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half>, <16 x i16>, i16)
241
242define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask) {
243; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_256:
244; CHECK:       # %bb.0:
245; CHECK-NEXT:    kmovd %edi, %k1
246; CHECK-NEXT:    vcvttph2w %ymm0, %ymm1 {%k1}
247; CHECK-NEXT:    vmovaps %ymm1, %ymm0
248; CHECK-NEXT:    retq
249  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask)
250  ret <16 x i16> %res
251}
252
253define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_256_b(ptr %arg0, <16 x i16> %arg1, i16 %mask) {
254; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_256_b:
255; CHECK:       # %bb.0:
256; CHECK-NEXT:    kmovd %esi, %k1
257; CHECK-NEXT:    vcvttph2w (%rdi){1to16}, %ymm0 {%k1}
258; CHECK-NEXT:    retq
259  %scalar = load half, ptr %arg0
260  %scalar_in_vector = insertelement <16 x half> undef, half %scalar, i32 0
261  %val = shufflevector <16 x half> %scalar_in_vector, <16 x half> undef, <16 x i32> zeroinitializer
262  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
263  ret <16 x i16> %res
264}
265
266define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_256_nomask(<16 x half> %arg0, <16 x i16> %arg1) {
267; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_256_nomask:
268; CHECK:       # %bb.0:
269; CHECK-NEXT:    vcvttph2w %ymm0, %ymm0
270; CHECK-NEXT:    retq
271  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half> %arg0, <16 x i16> %arg1, i16 -1)
272  ret <16 x i16> %res
273}
274
275define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_256_z(<16 x half> %arg0, i16 %mask) {
276; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_256_z:
277; CHECK:       # %bb.0:
278; CHECK-NEXT:    kmovd %edi, %k1
279; CHECK-NEXT:    vcvttph2w %ymm0, %ymm0 {%k1} {z}
280; CHECK-NEXT:    retq
281  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half> %arg0, <16 x i16> zeroinitializer, i16 %mask)
282  ret <16 x i16> %res
283}
284
285define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_256_load(ptr %arg0, <16 x i16> %arg1, i16 %mask) {
286; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_256_load:
287; CHECK:       # %bb.0:
288; CHECK-NEXT:    kmovd %esi, %k1
289; CHECK-NEXT:    vcvttph2w (%rdi), %ymm0 {%k1}
290; CHECK-NEXT:    retq
291  %val = load <16 x half>, ptr %arg0
292  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
293  ret <16 x i16> %res
294}
295
296declare <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half>, <16 x i16>, i16)
297
298define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask) {
299; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_256:
300; CHECK:       # %bb.0:
301; CHECK-NEXT:    kmovd %edi, %k1
302; CHECK-NEXT:    vcvttph2uw %ymm0, %ymm1 {%k1}
303; CHECK-NEXT:    vmovaps %ymm1, %ymm0
304; CHECK-NEXT:    retq
305  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask)
306  ret <16 x i16> %res
307}
308
309define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_256_b(ptr %arg0, <16 x i16> %arg1, i16 %mask) {
310; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_256_b:
311; CHECK:       # %bb.0:
312; CHECK-NEXT:    kmovd %esi, %k1
313; CHECK-NEXT:    vcvttph2uw (%rdi){1to16}, %ymm0 {%k1}
314; CHECK-NEXT:    retq
315  %scalar = load half, ptr %arg0
316  %scalar_in_vector = insertelement <16 x half> undef, half %scalar, i32 0
317  %val = shufflevector <16 x half> %scalar_in_vector, <16 x half> undef, <16 x i32> zeroinitializer
318  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
319  ret <16 x i16> %res
320}
321
322define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_256_nomask(<16 x half> %arg0, <16 x i16> %arg1) {
323; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_256_nomask:
324; CHECK:       # %bb.0:
325; CHECK-NEXT:    vcvttph2uw %ymm0, %ymm0
326; CHECK-NEXT:    retq
327  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half> %arg0, <16 x i16> %arg1, i16 -1)
328  ret <16 x i16> %res
329}
330
331define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_256_z(<16 x half> %arg0, i16 %mask) {
332; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_256_z:
333; CHECK:       # %bb.0:
334; CHECK-NEXT:    kmovd %edi, %k1
335; CHECK-NEXT:    vcvttph2uw %ymm0, %ymm0 {%k1} {z}
336; CHECK-NEXT:    retq
337  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half> %arg0, <16 x i16> zeroinitializer, i16 %mask)
338  ret <16 x i16> %res
339}
340
341define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_256_load(ptr %arg0, <16 x i16> %arg1, i16 %mask) {
342; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_256_load:
343; CHECK:       # %bb.0:
344; CHECK-NEXT:    kmovd %esi, %k1
345; CHECK-NEXT:    vcvttph2uw (%rdi), %ymm0 {%k1}
346; CHECK-NEXT:    retq
347  %val = load <16 x half>, ptr %arg0
348  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
349  ret <16 x i16> %res
350}
351
352define <8 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_128(<8 x i16> %arg0, <8 x half> %arg1, i8 %mask) {
353; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_128:
354; CHECK:       # %bb.0:
355; CHECK-NEXT:    kmovd %edi, %k1
356; CHECK-NEXT:    vcvtw2ph %xmm0, %xmm1 {%k1}
357; CHECK-NEXT:    vmovaps %xmm1, %xmm0
358; CHECK-NEXT:    retq
359  %msk = bitcast i8 %mask to <8 x i1>
360  %res0 = sitofp <8 x i16> %arg0 to <8 x half>
361  %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1
362  ret <8 x half> %res
363}
364
365define <8 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_128_b(ptr %arg0, <8 x half> %arg1, i8 %mask) {
366; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_128_b:
367; CHECK:       # %bb.0:
368; CHECK-NEXT:    kmovd %esi, %k1
369; CHECK-NEXT:    vcvtw2ph (%rdi){1to8}, %xmm0 {%k1}
370; CHECK-NEXT:    retq
371  %msk = bitcast i8 %mask to <8 x i1>
372  %scalar = load i16, ptr %arg0
373  %scalar_in_vector = insertelement <8 x i16> undef, i16 %scalar, i32 0
374  %val = shufflevector <8 x i16> %scalar_in_vector, <8 x i16> undef, <8 x i32> zeroinitializer
375  %res0 = sitofp <8 x i16> %val to <8 x half>
376  %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1
377  ret <8 x half> %res
378}
379
380define <8 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_128_nomask(<8 x i16> %arg0, <8 x half> %arg1) {
381; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_128_nomask:
382; CHECK:       # %bb.0:
383; CHECK-NEXT:    vcvtw2ph %xmm0, %xmm0
384; CHECK-NEXT:    retq
385  %res = sitofp <8 x i16> %arg0 to <8 x half>
386  ret <8 x half> %res
387}
388
389define <8 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_128_z(<8 x i16> %arg0, i8 %mask) {
390; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_128_z:
391; CHECK:       # %bb.0:
392; CHECK-NEXT:    kmovd %edi, %k1
393; CHECK-NEXT:    vcvtw2ph %xmm0, %xmm0 {%k1} {z}
394; CHECK-NEXT:    retq
395  %msk = bitcast i8 %mask to <8 x i1>
396  %res0 = sitofp <8 x i16> %arg0 to <8 x half>
397  %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer
398  ret <8 x half> %res
399}
400
401define <8 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_128_load(ptr %arg0, <8 x half> %arg1, i8 %mask) {
402; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_128_load:
403; CHECK:       # %bb.0:
404; CHECK-NEXT:    kmovd %esi, %k1
405; CHECK-NEXT:    vcvtw2ph (%rdi), %xmm0 {%k1}
406; CHECK-NEXT:    retq
407  %msk = bitcast i8 %mask to <8 x i1>
408  %val = load <8 x i16>, ptr %arg0
409  %res0 = sitofp <8 x i16> %val to <8 x half>
410  %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1
411  ret <8 x half> %res
412}
413
414declare <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half>, <8 x i16>, i8)
415
416define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask) {
417; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_128:
418; CHECK:       # %bb.0:
419; CHECK-NEXT:    kmovd %edi, %k1
420; CHECK-NEXT:    vcvtph2w %xmm0, %xmm1 {%k1}
421; CHECK-NEXT:    vmovaps %xmm1, %xmm0
422; CHECK-NEXT:    retq
423  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask)
424  ret <8 x i16> %res
425}
426
427define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_128_b(ptr %arg0, <8 x i16> %arg1, i8 %mask) {
428; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_128_b:
429; CHECK:       # %bb.0:
430; CHECK-NEXT:    kmovd %esi, %k1
431; CHECK-NEXT:    vcvtph2w (%rdi){1to8}, %xmm0 {%k1}
432; CHECK-NEXT:    retq
433  %scalar = load half, ptr %arg0
434  %scalar_in_vector = insertelement <8 x half> undef, half %scalar, i32 0
435  %val = shufflevector <8 x half> %scalar_in_vector, <8 x half> undef, <8 x i32> zeroinitializer
436  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
437  ret <8 x i16> %res
438}
439
440define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_128_nomask(<8 x half> %arg0, <8 x i16> %arg1) {
441; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_128_nomask:
442; CHECK:       # %bb.0:
443; CHECK-NEXT:    vcvtph2w %xmm0, %xmm0
444; CHECK-NEXT:    retq
445  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half> %arg0, <8 x i16> %arg1, i8 -1)
446  ret <8 x i16> %res
447}
448
449define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_128_z(<8 x half> %arg0, i8 %mask) {
450; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_128_z:
451; CHECK:       # %bb.0:
452; CHECK-NEXT:    kmovd %edi, %k1
453; CHECK-NEXT:    vcvtph2w %xmm0, %xmm0 {%k1} {z}
454; CHECK-NEXT:    retq
455  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half> %arg0, <8 x i16> zeroinitializer, i8 %mask)
456  ret <8 x i16> %res
457}
458
459define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_128_load(ptr %arg0, <8 x i16> %arg1, i8 %mask) {
460; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_128_load:
461; CHECK:       # %bb.0:
462; CHECK-NEXT:    kmovd %esi, %k1
463; CHECK-NEXT:    vcvtph2w (%rdi), %xmm0 {%k1}
464; CHECK-NEXT:    retq
465  %val = load <8 x half>, ptr %arg0
466  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
467  ret <8 x i16> %res
468}
469
470
471define <8 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_128(<8 x i16> %arg0, <8 x half> %arg1, i8 %mask) {
472; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_128:
473; CHECK:       # %bb.0:
474; CHECK-NEXT:    kmovd %edi, %k1
475; CHECK-NEXT:    vcvtuw2ph %xmm0, %xmm1 {%k1}
476; CHECK-NEXT:    vmovaps %xmm1, %xmm0
477; CHECK-NEXT:    retq
478  %msk = bitcast i8 %mask to <8 x i1>
479  %res0 = uitofp <8 x i16> %arg0 to <8 x half>
480  %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1
481  ret <8 x half> %res
482}
483
484define <8 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_128_b(ptr %arg0, <8 x half> %arg1, i8 %mask) {
485; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_128_b:
486; CHECK:       # %bb.0:
487; CHECK-NEXT:    kmovd %esi, %k1
488; CHECK-NEXT:    vcvtuw2ph (%rdi){1to8}, %xmm0 {%k1}
489; CHECK-NEXT:    retq
490  %msk = bitcast i8 %mask to <8 x i1>
491  %scalar = load i16, ptr %arg0
492  %scalar_in_vector = insertelement <8 x i16> undef, i16 %scalar, i32 0
493  %val = shufflevector <8 x i16> %scalar_in_vector, <8 x i16> undef, <8 x i32> zeroinitializer
494  %res0 = uitofp <8 x i16> %val to <8 x half>
495  %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1
496  ret <8 x half> %res
497}
498
499define <8 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_128_nomask(<8 x i16> %arg0, <8 x half> %arg1) {
500; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_128_nomask:
501; CHECK:       # %bb.0:
502; CHECK-NEXT:    vcvtuw2ph %xmm0, %xmm0
503; CHECK-NEXT:    retq
504  %res = uitofp <8 x i16> %arg0 to <8 x half>
505  ret <8 x half> %res
506}
507
508define <8 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_128_z(<8 x i16> %arg0, i8 %mask) {
509; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_128_z:
510; CHECK:       # %bb.0:
511; CHECK-NEXT:    kmovd %edi, %k1
512; CHECK-NEXT:    vcvtuw2ph %xmm0, %xmm0 {%k1} {z}
513; CHECK-NEXT:    retq
514  %msk = bitcast i8 %mask to <8 x i1>
515  %res0 = uitofp <8 x i16> %arg0 to <8 x half>
516  %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer
517  ret <8 x half> %res
518}
519
520define <8 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_128_load(ptr %arg0, <8 x half> %arg1, i8 %mask) {
521; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_128_load:
522; CHECK:       # %bb.0:
523; CHECK-NEXT:    kmovd %esi, %k1
524; CHECK-NEXT:    vcvtuw2ph (%rdi), %xmm0 {%k1}
525; CHECK-NEXT:    retq
526  %msk = bitcast i8 %mask to <8 x i1>
527  %val = load <8 x i16>, ptr %arg0
528  %res0 = uitofp <8 x i16> %val to <8 x half>
529  %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1
530  ret <8 x half> %res
531}
532
533declare <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half>, <8 x i16>, i8)
534
535define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask) {
536; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_128:
537; CHECK:       # %bb.0:
538; CHECK-NEXT:    kmovd %edi, %k1
539; CHECK-NEXT:    vcvtph2uw %xmm0, %xmm1 {%k1}
540; CHECK-NEXT:    vmovaps %xmm1, %xmm0
541; CHECK-NEXT:    retq
542  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask)
543  ret <8 x i16> %res
544}
545
546define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_128_b(ptr %arg0, <8 x i16> %arg1, i8 %mask) {
547; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_128_b:
548; CHECK:       # %bb.0:
549; CHECK-NEXT:    kmovd %esi, %k1
550; CHECK-NEXT:    vcvtph2uw (%rdi){1to8}, %xmm0 {%k1}
551; CHECK-NEXT:    retq
552  %scalar = load half, ptr %arg0
553  %scalar_in_vector = insertelement <8 x half> undef, half %scalar, i32 0
554  %val = shufflevector <8 x half> %scalar_in_vector, <8 x half> undef, <8 x i32> zeroinitializer
555  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
556  ret <8 x i16> %res
557}
558
559define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_128_nomask(<8 x half> %arg0, <8 x i16> %arg1) {
560; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_128_nomask:
561; CHECK:       # %bb.0:
562; CHECK-NEXT:    vcvtph2uw %xmm0, %xmm0
563; CHECK-NEXT:    retq
564  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half> %arg0, <8 x i16> %arg1, i8 -1)
565  ret <8 x i16> %res
566}
567
568define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_128_z(<8 x half> %arg0, i8 %mask) {
569; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_128_z:
570; CHECK:       # %bb.0:
571; CHECK-NEXT:    kmovd %edi, %k1
572; CHECK-NEXT:    vcvtph2uw %xmm0, %xmm0 {%k1} {z}
573; CHECK-NEXT:    retq
574  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half> %arg0, <8 x i16> zeroinitializer, i8 %mask)
575  ret <8 x i16> %res
576}
577
578define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_128_load(ptr %arg0, <8 x i16> %arg1, i8 %mask) {
579; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_128_load:
580; CHECK:       # %bb.0:
581; CHECK-NEXT:    kmovd %esi, %k1
582; CHECK-NEXT:    vcvtph2uw (%rdi), %xmm0 {%k1}
583; CHECK-NEXT:    retq
584  %val = load <8 x half>, ptr %arg0
585  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
586  ret <8 x i16> %res
587}
588
589declare <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half>, <8 x i16>, i8)
590
591define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask) {
592; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_128:
593; CHECK:       # %bb.0:
594; CHECK-NEXT:    kmovd %edi, %k1
595; CHECK-NEXT:    vcvttph2w %xmm0, %xmm1 {%k1}
596; CHECK-NEXT:    vmovaps %xmm1, %xmm0
597; CHECK-NEXT:    retq
598  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask)
599  ret <8 x i16> %res
600}
601
602define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_128_b(ptr %arg0, <8 x i16> %arg1, i8 %mask) {
603; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_128_b:
604; CHECK:       # %bb.0:
605; CHECK-NEXT:    kmovd %esi, %k1
606; CHECK-NEXT:    vcvttph2w (%rdi){1to8}, %xmm0 {%k1}
607; CHECK-NEXT:    retq
608  %scalar = load half, ptr %arg0
609  %scalar_in_vector = insertelement <8 x half> undef, half %scalar, i32 0
610  %val = shufflevector <8 x half> %scalar_in_vector, <8 x half> undef, <8 x i32> zeroinitializer
611  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
612  ret <8 x i16> %res
613}
614
615define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_128_nomask(<8 x half> %arg0, <8 x i16> %arg1) {
616; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_128_nomask:
617; CHECK:       # %bb.0:
618; CHECK-NEXT:    vcvttph2w %xmm0, %xmm0
619; CHECK-NEXT:    retq
620  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half> %arg0, <8 x i16> %arg1, i8 -1)
621  ret <8 x i16> %res
622}
623
624define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_128_z(<8 x half> %arg0, i8 %mask) {
625; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_128_z:
626; CHECK:       # %bb.0:
627; CHECK-NEXT:    kmovd %edi, %k1
628; CHECK-NEXT:    vcvttph2w %xmm0, %xmm0 {%k1} {z}
629; CHECK-NEXT:    retq
630  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half> %arg0, <8 x i16> zeroinitializer, i8 %mask)
631  ret <8 x i16> %res
632}
633
634define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_128_load(ptr %arg0, <8 x i16> %arg1, i8 %mask) {
635; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_128_load:
636; CHECK:       # %bb.0:
637; CHECK-NEXT:    kmovd %esi, %k1
638; CHECK-NEXT:    vcvttph2w (%rdi), %xmm0 {%k1}
639; CHECK-NEXT:    retq
640  %val = load <8 x half>, ptr %arg0
641  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
642  ret <8 x i16> %res
643}
644
645declare <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half>, <8 x i16>, i8)
646
647define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask) {
648; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_128:
649; CHECK:       # %bb.0:
650; CHECK-NEXT:    kmovd %edi, %k1
651; CHECK-NEXT:    vcvttph2uw %xmm0, %xmm1 {%k1}
652; CHECK-NEXT:    vmovaps %xmm1, %xmm0
653; CHECK-NEXT:    retq
654  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask)
655  ret <8 x i16> %res
656}
657
658define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_128_b(ptr %arg0, <8 x i16> %arg1, i8 %mask) {
659; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_128_b:
660; CHECK:       # %bb.0:
661; CHECK-NEXT:    kmovd %esi, %k1
662; CHECK-NEXT:    vcvttph2uw (%rdi){1to8}, %xmm0 {%k1}
663; CHECK-NEXT:    retq
664  %scalar = load half, ptr %arg0
665  %scalar_in_vector = insertelement <8 x half> undef, half %scalar, i32 0
666  %val = shufflevector <8 x half> %scalar_in_vector, <8 x half> undef, <8 x i32> zeroinitializer
667  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
668  ret <8 x i16> %res
669}
670
671define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_128_nomask(<8 x half> %arg0, <8 x i16> %arg1) {
672; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_128_nomask:
673; CHECK:       # %bb.0:
674; CHECK-NEXT:    vcvttph2uw %xmm0, %xmm0
675; CHECK-NEXT:    retq
676  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half> %arg0, <8 x i16> %arg1, i8 -1)
677  ret <8 x i16> %res
678}
679
680define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_128_z(<8 x half> %arg0, i8 %mask) {
681; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_128_z:
682; CHECK:       # %bb.0:
683; CHECK-NEXT:    kmovd %edi, %k1
684; CHECK-NEXT:    vcvttph2uw %xmm0, %xmm0 {%k1} {z}
685; CHECK-NEXT:    retq
686  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half> %arg0, <8 x i16> zeroinitializer, i8 %mask)
687  ret <8 x i16> %res
688}
689
690define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_128_load(ptr %arg0, <8 x i16> %arg1, i8 %mask) {
691; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_128_load:
692; CHECK:       # %bb.0:
693; CHECK-NEXT:    kmovd %esi, %k1
694; CHECK-NEXT:    vcvttph2uw (%rdi), %xmm0 {%k1}
695; CHECK-NEXT:    retq
696  %val = load <8 x half>, ptr %arg0
697  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
698  ret <8 x i16> %res
699}
700
701define <4 x half> @test_u16tofp4(<4 x i16> %arg0) {
702; CHECK-LABEL: test_u16tofp4:
703; CHECK:       # %bb.0:
704; CHECK-NEXT:    vcvtuw2ph %xmm0, %xmm0
705; CHECK-NEXT:    retq
706  %res = uitofp <4 x i16> %arg0 to <4 x half>
707  ret <4 x half> %res
708}
709
710define <2 x half> @test_s16tofp2(<2 x i16> %arg0) {
711; CHECK-LABEL: test_s16tofp2:
712; CHECK:       # %bb.0:
713; CHECK-NEXT:    vcvtw2ph %xmm0, %xmm0
714; CHECK-NEXT:    retq
715  %res = sitofp <2 x i16> %arg0 to <2 x half>
716  ret <2 x half> %res
717}
718
719define <4 x half> @test_u8tofp4(<4 x i8> %arg0) {
720; CHECK-LABEL: test_u8tofp4:
721; CHECK:       # %bb.0:
722; CHECK-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
723; CHECK-NEXT:    vcvtw2ph %xmm0, %xmm0
724; CHECK-NEXT:    retq
725  %res = uitofp <4 x i8> %arg0 to <4 x half>
726  ret <4 x half> %res
727}
728
729define <2 x half> @test_s8tofp2(<2 x i8> %arg0) {
730; CHECK-LABEL: test_s8tofp2:
731; CHECK:       # %bb.0:
732; CHECK-NEXT:    vpmovsxbw %xmm0, %xmm0
733; CHECK-NEXT:    vcvtw2ph %xmm0, %xmm0
734; CHECK-NEXT:    retq
735  %res = sitofp <2 x i8> %arg0 to <2 x half>
736  ret <2 x half> %res
737}
738
739define <2 x half> @test_u1tofp2(<2 x i1> %arg0) {
740; CHECK-LABEL: test_u1tofp2:
741; CHECK:       # %bb.0:
742; CHECK-NEXT:    vpmovqw %xmm0, %xmm0
743; CHECK-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
744; CHECK-NEXT:    vcvtw2ph %xmm0, %xmm0
745; CHECK-NEXT:    retq
746  %res = uitofp <2 x i1> %arg0 to <2 x half>
747  ret <2 x half> %res
748}
749
750define <4 x half> @test_s17tofp4(<4 x i17> %arg0) {
751; CHECK-LABEL: test_s17tofp4:
752; CHECK:       # %bb.0:
753; CHECK-NEXT:    vpslld $15, %xmm0, %xmm0
754; CHECK-NEXT:    vpsrad $15, %xmm0, %xmm0
755; CHECK-NEXT:    vcvtdq2ph %xmm0, %xmm0
756; CHECK-NEXT:    retq
757  %res = sitofp <4 x i17> %arg0 to <4 x half>
758  ret <4 x half> %res
759}
760
761define <2 x half> @test_u33tofp2(<2 x i33> %arg0) {
762; CHECK-LABEL: test_u33tofp2:
763; CHECK:       # %bb.0:
764; CHECK-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
765; CHECK-NEXT:    vcvtqq2ph %xmm0, %xmm0
766; CHECK-NEXT:    retq
767  %res = uitofp <2 x i33> %arg0 to <2 x half>
768  ret <2 x half> %res
769}
770
771define <16 x i16> @test_s16tof16(<16 x half> %a) {
772; CHECK-LABEL: test_s16tof16:
773; CHECK:       # %bb.0:
774; CHECK-NEXT:    vcvttph2w %ymm0, %ymm0
775; CHECK-NEXT:    retq
776  %res = fptosi <16 x half> %a to <16 x i16>
777  ret <16 x i16> %res
778}
779
780define <16 x i16> @test_u16tof16(<16 x half> %a) {
781; CHECK-LABEL: test_u16tof16:
782; CHECK:       # %bb.0:
783; CHECK-NEXT:    vcvttph2uw %ymm0, %ymm0
784; CHECK-NEXT:    retq
785  %res = fptoui <16 x half> %a to <16 x i16>
786  ret <16 x i16> %res
787}
788