xref: /llvm-project/llvm/test/CodeGen/X86/avx512-cvttp2i.ll (revision 2f448bf509432c1a19ec46ab8cbc7353c03c6280)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -mattr=avx512f,avx512vl,avx512dq | FileCheck %s --check-prefixes=CHECK
3
4; PR37751 - https://bugs.llvm.org/show_bug.cgi?id=37751
5; We can't combine into 'round' instructions because the behavior is different for out-of-range values.
6
7declare <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float>, <16 x i32>, i16, i32)
8declare <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float>, <4 x i32>, i8)
9declare <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float>, <8 x i32>, i8)
10declare <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float>, <16 x i32>, i16, i32)
11declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double>, <4 x i32>, i8)
12declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double>, <8 x i32>, i8, i32)
13declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
14declare <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float>, <4 x i64>, i8)
15declare <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float>, <8 x i64>, i8, i32)
16declare <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float>, <4 x i64>, i8)
17declare <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float>, <8 x i64>, i8, i32)
18declare <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double>, <2 x i64>, i8)
19declare <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double>, <4 x i64>, i8)
20declare <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double>, <8 x i64>, i8, i32)
21declare <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double>, <2 x i64>, i8)
22declare <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double>, <4 x i64>, i8)
23declare <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double>, <8 x i64>, i8, i32)
24
25define <16 x float> @float_to_sint_to_float_mem_v16f32(ptr %p) {
26; CHECK-LABEL: float_to_sint_to_float_mem_v16f32:
27; CHECK:       # %bb.0:
28; CHECK-NEXT:    vcvttps2dq (%rdi), %zmm0
29; CHECK-NEXT:    vcvtdq2ps %zmm0, %zmm0
30; CHECK-NEXT:    retq
31  %x = load <16 x float>, ptr %p
32  %fptosi = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x, <16 x i32> undef, i16 -1, i32 4)
33  %sitofp = sitofp <16 x i32> %fptosi to <16 x float>
34  ret <16 x float> %sitofp
35}
36
37define <16 x float> @float_to_sint_to_float_reg_v16f32(<16 x float> %x) {
38; CHECK-LABEL: float_to_sint_to_float_reg_v16f32:
39; CHECK:       # %bb.0:
40; CHECK-NEXT:    vcvttps2dq %zmm0, %zmm0
41; CHECK-NEXT:    vcvtdq2ps %zmm0, %zmm0
42; CHECK-NEXT:    retq
43  %fptosi = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x, <16 x i32> undef, i16 -1, i32 4)
44  %sitofp = sitofp <16 x i32> %fptosi to <16 x float>
45  ret <16 x float> %sitofp
46}
47
48define <16 x float> @float_to_uint_to_float_mem_v16f32(ptr %p) {
49; CHECK-LABEL: float_to_uint_to_float_mem_v16f32:
50; CHECK:       # %bb.0:
51; CHECK-NEXT:    vcvttps2udq (%rdi), %zmm0
52; CHECK-NEXT:    vcvtudq2ps %zmm0, %zmm0
53; CHECK-NEXT:    retq
54  %x = load <16 x float>, ptr %p
55  %fptoui = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x, <16 x i32> undef, i16 -1, i32 4)
56  %uitofp = uitofp <16 x i32> %fptoui to <16 x float>
57  ret <16 x float> %uitofp
58}
59
60define <16 x float> @float_to_uint_to_float_reg_v16f32(<16 x float> %x) {
61; CHECK-LABEL: float_to_uint_to_float_reg_v16f32:
62; CHECK:       # %bb.0:
63; CHECK-NEXT:    vcvttps2udq %zmm0, %zmm0
64; CHECK-NEXT:    vcvtudq2ps %zmm0, %zmm0
65; CHECK-NEXT:    retq
66  %fptoui = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x, <16 x i32> undef, i16 -1, i32 4)
67  %uitofp = uitofp <16 x i32> %fptoui to <16 x float>
68  ret <16 x float> %uitofp
69}
70
71define <4 x float> @float_to_uint_to_float_mem_v4f32(ptr %p) {
72; CHECK-LABEL: float_to_uint_to_float_mem_v4f32:
73; CHECK:       # %bb.0:
74; CHECK-NEXT:    vcvttps2udq (%rdi), %xmm0
75; CHECK-NEXT:    vcvtudq2ps %xmm0, %xmm0
76; CHECK-NEXT:    retq
77  %x = load <4 x float>, ptr %p
78  %fptoui = tail call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x, <4 x i32> undef, i8 -1)
79  %uitofp = uitofp <4 x i32> %fptoui to <4 x float>
80  ret <4 x float> %uitofp
81}
82
83define <4 x float> @float_to_uint_to_float_reg_v4f32(<4 x float> %x) {
84; CHECK-LABEL: float_to_uint_to_float_reg_v4f32:
85; CHECK:       # %bb.0:
86; CHECK-NEXT:    vcvttps2udq %xmm0, %xmm0
87; CHECK-NEXT:    vcvtudq2ps %xmm0, %xmm0
88; CHECK-NEXT:    retq
89  %fptoui = tail call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x, <4 x i32> undef, i8 -1)
90  %uitofp = uitofp <4 x i32> %fptoui to <4 x float>
91  ret <4 x float> %uitofp
92}
93
94define <8 x float> @float_to_uint_to_float_mem_v8f32(ptr %p) {
95; CHECK-LABEL: float_to_uint_to_float_mem_v8f32:
96; CHECK:       # %bb.0:
97; CHECK-NEXT:    vcvttps2udq (%rdi), %ymm0
98; CHECK-NEXT:    vcvtudq2ps %ymm0, %ymm0
99; CHECK-NEXT:    retq
100  %x = load <8 x float>, ptr %p
101  %fptoui = tail call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x, <8 x i32> undef, i8 -1)
102  %uitofp = uitofp <8 x i32> %fptoui to <8 x float>
103  ret <8 x float> %uitofp
104}
105
106define <8 x float> @float_to_uint_to_float_reg_v8f32(<8 x float> %x) {
107; CHECK-LABEL: float_to_uint_to_float_reg_v8f32:
108; CHECK:       # %bb.0:
109; CHECK-NEXT:    vcvttps2udq %ymm0, %ymm0
110; CHECK-NEXT:    vcvtudq2ps %ymm0, %ymm0
111; CHECK-NEXT:    retq
112  %fptoui = tail call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x, <8 x i32> undef, i8 -1)
113  %uitofp = uitofp <8 x i32> %fptoui to <8 x float>
114  ret <8 x float> %uitofp
115}
116
117define <4 x double> @double_to_uint_to_double_mem_v4f64(ptr %p) {
118; CHECK-LABEL: double_to_uint_to_double_mem_v4f64:
119; CHECK:       # %bb.0:
120; CHECK-NEXT:    vcvttpd2udqy (%rdi), %xmm0
121; CHECK-NEXT:    vcvtudq2pd %xmm0, %ymm0
122; CHECK-NEXT:    retq
123  %x = load <4 x double>, ptr %p
124  %fptoui = tail call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x, <4 x i32> undef, i8 -1)
125  %uitofp = uitofp <4 x i32> %fptoui to <4 x double>
126  ret <4 x double> %uitofp
127}
128
129define <4 x double> @double_to_uint_to_double_reg_v4f64(<4 x double> %x) {
130; CHECK-LABEL: double_to_uint_to_double_reg_v4f64:
131; CHECK:       # %bb.0:
132; CHECK-NEXT:    vcvttpd2udq %ymm0, %xmm0
133; CHECK-NEXT:    vcvtudq2pd %xmm0, %ymm0
134; CHECK-NEXT:    retq
135  %fptoui = tail call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x, <4 x i32> undef, i8 -1)
136  %uitofp = uitofp <4 x i32> %fptoui to <4 x double>
137  ret <4 x double> %uitofp
138}
139
140define <8 x double> @double_to_sint_to_double_mem_v8f64(ptr %p) {
141; CHECK-LABEL: double_to_sint_to_double_mem_v8f64:
142; CHECK:       # %bb.0:
143; CHECK-NEXT:    vcvttpd2dq (%rdi), %ymm0
144; CHECK-NEXT:    vcvtdq2pd %ymm0, %zmm0
145; CHECK-NEXT:    retq
146  %x = load <8 x double>, ptr %p
147  %fptosi = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x, <8 x i32> undef, i8 -1, i32 4)
148  %sitofp = sitofp <8 x i32> %fptosi to <8 x double>
149  ret <8 x double> %sitofp
150}
151
152define <8 x double> @double_to_sint_to_double_reg_v8f64(<8 x double> %x) {
153; CHECK-LABEL: double_to_sint_to_double_reg_v8f64:
154; CHECK:       # %bb.0:
155; CHECK-NEXT:    vcvttpd2dq %zmm0, %ymm0
156; CHECK-NEXT:    vcvtdq2pd %ymm0, %zmm0
157; CHECK-NEXT:    retq
158  %fptosi = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x, <8 x i32> undef, i8 -1, i32 4)
159  %sitofp = sitofp <8 x i32> %fptosi to <8 x double>
160  ret <8 x double> %sitofp
161}
162
163define <8 x double> @double_to_uint_to_double_mem_v8f64(ptr %p) {
164; CHECK-LABEL: double_to_uint_to_double_mem_v8f64:
165; CHECK:       # %bb.0:
166; CHECK-NEXT:    vcvttpd2udq (%rdi), %ymm0
167; CHECK-NEXT:    vcvtudq2pd %ymm0, %zmm0
168; CHECK-NEXT:    retq
169  %x = load <8 x double>, ptr %p
170  %fptoui = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x, <8 x i32> undef, i8 -1, i32 4)
171  %uitofp = uitofp <8 x i32> %fptoui to <8 x double>
172  ret <8 x double> %uitofp
173}
174
175define <8 x double> @double_to_uint_to_double_reg_v8f64(<8 x double> %x) {
176; CHECK-LABEL: double_to_uint_to_double_reg_v8f64:
177; CHECK:       # %bb.0:
178; CHECK-NEXT:    vcvttpd2udq %zmm0, %ymm0
179; CHECK-NEXT:    vcvtudq2pd %ymm0, %zmm0
180; CHECK-NEXT:    retq
181  %fptoui = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x, <8 x i32> undef, i8 -1, i32 4)
182  %uitofp = uitofp <8 x i32> %fptoui to <8 x double>
183  ret <8 x double> %uitofp
184}
185
186define <4 x float> @float_to_sint64_to_float_mem_v4f32(ptr %p) {
187; CHECK-LABEL: float_to_sint64_to_float_mem_v4f32:
188; CHECK:       # %bb.0:
189; CHECK-NEXT:    vcvttps2qq (%rdi), %ymm0
190; CHECK-NEXT:    vcvtqq2ps %ymm0, %xmm0
191; CHECK-NEXT:    vzeroupper
192; CHECK-NEXT:    retq
193  %x = load <4 x float>, ptr %p
194  %fptosi = tail call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x, <4 x i64> undef, i8 -1)
195  %sitofp = sitofp <4 x i64> %fptosi to <4 x float>
196  ret <4 x float> %sitofp
197}
198
199define <4 x float> @float_to_sint64_to_float_reg_v4f32(<4 x float> %x) {
200; CHECK-LABEL: float_to_sint64_to_float_reg_v4f32:
201; CHECK:       # %bb.0:
202; CHECK-NEXT:    vcvttps2qq %xmm0, %ymm0
203; CHECK-NEXT:    vcvtqq2ps %ymm0, %xmm0
204; CHECK-NEXT:    vzeroupper
205; CHECK-NEXT:    retq
206  %fptosi = tail call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x, <4 x i64> undef, i8 -1)
207  %sitofp = sitofp <4 x i64> %fptosi to <4 x float>
208  ret <4 x float> %sitofp
209}
210
211define <4 x float> @float_to_uint64_to_float_mem_v4f32(ptr %p) {
212; CHECK-LABEL: float_to_uint64_to_float_mem_v4f32:
213; CHECK:       # %bb.0:
214; CHECK-NEXT:    vcvttps2uqq (%rdi), %ymm0
215; CHECK-NEXT:    vcvtuqq2ps %ymm0, %xmm0
216; CHECK-NEXT:    vzeroupper
217; CHECK-NEXT:    retq
218  %x = load <4 x float>, ptr %p
219  %fptoui = tail call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x, <4 x i64> undef, i8 -1)
220  %uitofp = uitofp <4 x i64> %fptoui to <4 x float>
221  ret <4 x float> %uitofp
222}
223
224define <4 x float> @float_to_uint64_to_float_reg_v4f32(<4 x float> %x) {
225; CHECK-LABEL: float_to_uint64_to_float_reg_v4f32:
226; CHECK:       # %bb.0:
227; CHECK-NEXT:    vcvttps2uqq %xmm0, %ymm0
228; CHECK-NEXT:    vcvtuqq2ps %ymm0, %xmm0
229; CHECK-NEXT:    vzeroupper
230; CHECK-NEXT:    retq
231  %fptoui = tail call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x, <4 x i64> undef, i8 -1)
232  %uitofp = uitofp <4 x i64> %fptoui to <4 x float>
233  ret <4 x float> %uitofp
234}
235
236define <8 x float> @float_to_sint64_to_float_mem_v8f32(ptr %p) {
237; CHECK-LABEL: float_to_sint64_to_float_mem_v8f32:
238; CHECK:       # %bb.0:
239; CHECK-NEXT:    vcvttps2qq (%rdi), %zmm0
240; CHECK-NEXT:    vcvtqq2ps %zmm0, %ymm0
241; CHECK-NEXT:    retq
242  %x = load <8 x float>, ptr %p
243  %fptosi = tail call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x, <8 x i64> undef, i8 -1, i32 4)
244  %sitofp = sitofp <8 x i64> %fptosi to <8 x float>
245  ret <8 x float> %sitofp
246}
247
248define <8 x float> @float_to_sint64_to_float_reg_v8f32(<8 x float> %x) {
249; CHECK-LABEL: float_to_sint64_to_float_reg_v8f32:
250; CHECK:       # %bb.0:
251; CHECK-NEXT:    vcvttps2qq %ymm0, %zmm0
252; CHECK-NEXT:    vcvtqq2ps %zmm0, %ymm0
253; CHECK-NEXT:    retq
254  %fptosi = tail call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x, <8 x i64> undef, i8 -1, i32 4)
255  %sitofp = sitofp <8 x i64> %fptosi to <8 x float>
256  ret <8 x float> %sitofp
257}
258
259define <8 x float> @float_to_uint64_to_float_mem_v8f32(ptr %p) {
260; CHECK-LABEL: float_to_uint64_to_float_mem_v8f32:
261; CHECK:       # %bb.0:
262; CHECK-NEXT:    vcvttps2uqq (%rdi), %zmm0
263; CHECK-NEXT:    vcvtuqq2ps %zmm0, %ymm0
264; CHECK-NEXT:    retq
265  %x = load <8 x float>, ptr %p
266  %fptoui = tail call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x, <8 x i64> undef, i8 -1, i32 4)
267  %uitofp = uitofp <8 x i64> %fptoui to <8 x float>
268  ret <8 x float> %uitofp
269}
270
271define <8 x float> @float_to_uint64_to_float_reg_v8f32(<8 x float> %x) {
272; CHECK-LABEL: float_to_uint64_to_float_reg_v8f32:
273; CHECK:       # %bb.0:
274; CHECK-NEXT:    vcvttps2uqq %ymm0, %zmm0
275; CHECK-NEXT:    vcvtuqq2ps %zmm0, %ymm0
276; CHECK-NEXT:    retq
277  %fptoui = tail call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x, <8 x i64> undef, i8 -1, i32 4)
278  %uitofp = uitofp <8 x i64> %fptoui to <8 x float>
279  ret <8 x float> %uitofp
280}
281
282define <2 x double> @double_to_sint64_to_double_mem_v2f64(ptr %p) {
283; CHECK-LABEL: double_to_sint64_to_double_mem_v2f64:
284; CHECK:       # %bb.0:
285; CHECK-NEXT:    vcvttpd2qq (%rdi), %xmm0
286; CHECK-NEXT:    vcvtqq2pd %xmm0, %xmm0
287; CHECK-NEXT:    retq
288  %x = load <2 x double>, ptr %p
289  %fptosi = tail call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x, <2 x i64> undef, i8 -1)
290  %sitofp = sitofp <2 x i64> %fptosi to <2 x double>
291  ret <2 x double> %sitofp
292}
293
294define <2 x double> @double_to_sint64_to_double_reg_v2f64(<2 x double> %x) {
295; CHECK-LABEL: double_to_sint64_to_double_reg_v2f64:
296; CHECK:       # %bb.0:
297; CHECK-NEXT:    vcvttpd2qq %xmm0, %xmm0
298; CHECK-NEXT:    vcvtqq2pd %xmm0, %xmm0
299; CHECK-NEXT:    retq
300  %fptosi = tail call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x, <2 x i64> undef, i8 -1)
301  %sitofp = sitofp <2 x i64> %fptosi to <2 x double>
302  ret <2 x double> %sitofp
303}
304
305define <2 x double> @double_to_uint64_to_double_mem_v2f64(ptr %p) {
306; CHECK-LABEL: double_to_uint64_to_double_mem_v2f64:
307; CHECK:       # %bb.0:
308; CHECK-NEXT:    vcvttpd2uqq (%rdi), %xmm0
309; CHECK-NEXT:    vcvtuqq2pd %xmm0, %xmm0
310; CHECK-NEXT:    retq
311  %x = load <2 x double>, ptr %p
312  %fptoui = tail call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x, <2 x i64> undef, i8 -1)
313  %uitofp = uitofp <2 x i64> %fptoui to <2 x double>
314  ret <2 x double> %uitofp
315}
316
317define <2 x double> @double_to_uint64_to_double_reg_v2f64(<2 x double> %x) {
318; CHECK-LABEL: double_to_uint64_to_double_reg_v2f64:
319; CHECK:       # %bb.0:
320; CHECK-NEXT:    vcvttpd2uqq %xmm0, %xmm0
321; CHECK-NEXT:    vcvtuqq2pd %xmm0, %xmm0
322; CHECK-NEXT:    retq
323  %fptoui = tail call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x, <2 x i64> undef, i8 -1)
324  %uitofp = uitofp <2 x i64> %fptoui to <2 x double>
325  ret <2 x double> %uitofp
326}
327
328define <4 x double> @double_to_sint64_to_double_mem_v4f64(ptr %p) {
329; CHECK-LABEL: double_to_sint64_to_double_mem_v4f64:
330; CHECK:       # %bb.0:
331; CHECK-NEXT:    vcvttpd2qq (%rdi), %ymm0
332; CHECK-NEXT:    vcvtqq2pd %ymm0, %ymm0
333; CHECK-NEXT:    retq
334  %x = load <4 x double>, ptr %p
335  %fptosi = tail call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x, <4 x i64> undef, i8 -1)
336  %sitofp = sitofp <4 x i64> %fptosi to <4 x double>
337  ret <4 x double> %sitofp
338}
339
340define <4 x double> @double_to_sint64_to_double_reg_v4f64(<4 x double> %x) {
341; CHECK-LABEL: double_to_sint64_to_double_reg_v4f64:
342; CHECK:       # %bb.0:
343; CHECK-NEXT:    vcvttpd2qq %ymm0, %ymm0
344; CHECK-NEXT:    vcvtqq2pd %ymm0, %ymm0
345; CHECK-NEXT:    retq
346  %fptosi = tail call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x, <4 x i64> undef, i8 -1)
347  %sitofp = sitofp <4 x i64> %fptosi to <4 x double>
348  ret <4 x double> %sitofp
349}
350
351define <4 x double> @double_to_uint64_to_double_mem_v4f64(ptr %p) {
352; CHECK-LABEL: double_to_uint64_to_double_mem_v4f64:
353; CHECK:       # %bb.0:
354; CHECK-NEXT:    vcvttpd2uqq (%rdi), %ymm0
355; CHECK-NEXT:    vcvtuqq2pd %ymm0, %ymm0
356; CHECK-NEXT:    retq
357  %x = load <4 x double>, ptr %p
358  %fptoui = tail call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x, <4 x i64> undef, i8 -1)
359  %uitofp = uitofp <4 x i64> %fptoui to <4 x double>
360  ret <4 x double> %uitofp
361}
362
363define <4 x double> @double_to_uint64_to_double_reg_v4f64(<4 x double> %x) {
364; CHECK-LABEL: double_to_uint64_to_double_reg_v4f64:
365; CHECK:       # %bb.0:
366; CHECK-NEXT:    vcvttpd2uqq %ymm0, %ymm0
367; CHECK-NEXT:    vcvtuqq2pd %ymm0, %ymm0
368; CHECK-NEXT:    retq
369  %fptoui = tail call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x, <4 x i64> undef, i8 -1)
370  %uitofp = uitofp <4 x i64> %fptoui to <4 x double>
371  ret <4 x double> %uitofp
372}
373
374define <8 x double> @double_to_sint64_to_double_mem_v8f64(ptr %p) {
375; CHECK-LABEL: double_to_sint64_to_double_mem_v8f64:
376; CHECK:       # %bb.0:
377; CHECK-NEXT:    vcvttpd2qq (%rdi), %zmm0
378; CHECK-NEXT:    vcvtqq2pd %zmm0, %zmm0
379; CHECK-NEXT:    retq
380  %x = load <8 x double>, ptr %p
381  %fptosi = tail call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x, <8 x i64> undef, i8 -1, i32 4)
382  %sitofp = sitofp <8 x i64> %fptosi to <8 x double>
383  ret <8 x double> %sitofp
384}
385
386define <8 x double> @double_to_sint64_to_double_reg_v8f64(<8 x double> %x) {
387; CHECK-LABEL: double_to_sint64_to_double_reg_v8f64:
388; CHECK:       # %bb.0:
389; CHECK-NEXT:    vcvttpd2qq %zmm0, %zmm0
390; CHECK-NEXT:    vcvtqq2pd %zmm0, %zmm0
391; CHECK-NEXT:    retq
392  %fptosi = tail call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x, <8 x i64> undef, i8 -1, i32 4)
393  %sitofp = sitofp <8 x i64> %fptosi to <8 x double>
394  ret <8 x double> %sitofp
395}
396
397define <8 x double> @double_to_uint64_to_double_mem_v8f64(ptr %p) {
398; CHECK-LABEL: double_to_uint64_to_double_mem_v8f64:
399; CHECK:       # %bb.0:
400; CHECK-NEXT:    vcvttpd2uqq (%rdi), %zmm0
401; CHECK-NEXT:    vcvtuqq2pd %zmm0, %zmm0
402; CHECK-NEXT:    retq
403  %x = load <8 x double>, ptr %p
404  %fptoui = tail call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x, <8 x i64> undef, i8 -1, i32 4)
405  %uitofp = uitofp <8 x i64> %fptoui to <8 x double>
406  ret <8 x double> %uitofp
407}
408
409define <8 x double> @double_to_uint64_to_double_reg_v8f64(<8 x double> %x) {
410; CHECK-LABEL: double_to_uint64_to_double_reg_v8f64:
411; CHECK:       # %bb.0:
412; CHECK-NEXT:    vcvttpd2uqq %zmm0, %zmm0
413; CHECK-NEXT:    vcvtuqq2pd %zmm0, %zmm0
414; CHECK-NEXT:    retq
415  %fptoui = tail call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x, <8 x i64> undef, i8 -1, i32 4)
416  %uitofp = uitofp <8 x i64> %fptoui to <8 x double>
417  ret <8 x double> %uitofp
418}
419