xref: /llvm-project/llvm/test/CodeGen/X86/vector-lrint.ll (revision fd3e7e3a1e661482f46cd0347d0fa62adef30177)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefix=X86-SSE2
3; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=avx | FileCheck %s --check-prefixes=X86-AVX,X86-AVX1
4; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X86-AVX,AVX512-i32
5; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=avx512dq,avx512vl | FileCheck %s --check-prefixes=X86-AVX,AVX512-i32
6; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=X64-AVX-i32,X64-AVX1-i32
7; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64-AVX-i32,AVX512-i32
8; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512dq,avx512vl | FileCheck %s --check-prefixes=X64-AVX-i32,AVX512-i32
9; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=X64-AVX-i64,X64-AVX1-i64
10; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64-AVX-i64,AVX512-i64
11; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512dq,avx512vl | FileCheck %s --check-prefixes=X64-AVX-i64,AVX512DQ-i64
12
13define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
14; X86-SSE2-LABEL: lrint_v1f32:
15; X86-SSE2:       # %bb.0:
16; X86-SSE2-NEXT:    cvtss2si {{[0-9]+}}(%esp), %eax
17; X86-SSE2-NEXT:    retl
18;
19; X86-AVX-LABEL: lrint_v1f32:
20; X86-AVX:       # %bb.0:
21; X86-AVX-NEXT:    vcvtss2si {{[0-9]+}}(%esp), %eax
22; X86-AVX-NEXT:    retl
23;
24; X64-AVX-i32-LABEL: lrint_v1f32:
25; X64-AVX-i32:       # %bb.0:
26; X64-AVX-i32-NEXT:    vcvtss2si %xmm0, %eax
27; X64-AVX-i32-NEXT:    retq
28;
29; X64-AVX-i64-LABEL: lrint_v1f32:
30; X64-AVX-i64:       # %bb.0:
31; X64-AVX-i64-NEXT:    vcvtss2si %xmm0, %rax
32; X64-AVX-i64-NEXT:    retq
33  %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float> %x)
34  ret <1 x iXLen> %a
35}
36declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float>)
37
38define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
39; X86-SSE2-LABEL: lrint_v2f32:
40; X86-SSE2:       # %bb.0:
41; X86-SSE2-NEXT:    cvtps2dq %xmm0, %xmm0
42; X86-SSE2-NEXT:    retl
43;
44; X86-AVX-LABEL: lrint_v2f32:
45; X86-AVX:       # %bb.0:
46; X86-AVX-NEXT:    vcvtps2dq %xmm0, %xmm0
47; X86-AVX-NEXT:    retl
48;
49; X64-AVX-i32-LABEL: lrint_v2f32:
50; X64-AVX-i32:       # %bb.0:
51; X64-AVX-i32-NEXT:    vcvtps2dq %xmm0, %xmm0
52; X64-AVX-i32-NEXT:    retq
53;
54; X64-AVX1-i64-LABEL: lrint_v2f32:
55; X64-AVX1-i64:       # %bb.0:
56; X64-AVX1-i64-NEXT:    vcvtss2si %xmm0, %rax
57; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm1
58; X64-AVX1-i64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
59; X64-AVX1-i64-NEXT:    vcvtss2si %xmm0, %rax
60; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm0
61; X64-AVX1-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
62; X64-AVX1-i64-NEXT:    retq
63;
64; AVX512-i64-LABEL: lrint_v2f32:
65; AVX512-i64:       # %bb.0:
66; AVX512-i64-NEXT:    vcvtss2si %xmm0, %rax
67; AVX512-i64-NEXT:    vmovq %rax, %xmm1
68; AVX512-i64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
69; AVX512-i64-NEXT:    vcvtss2si %xmm0, %rax
70; AVX512-i64-NEXT:    vmovq %rax, %xmm0
71; AVX512-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
72; AVX512-i64-NEXT:    retq
73;
74; AVX512DQ-i64-LABEL: lrint_v2f32:
75; AVX512DQ-i64:       # %bb.0:
76; AVX512DQ-i64-NEXT:    vcvtps2qq %xmm0, %xmm0
77; AVX512DQ-i64-NEXT:    retq
78  %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float> %x)
79  ret <2 x iXLen> %a
80}
81declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float>)
82
83define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
84; X86-SSE2-LABEL: lrint_v4f32:
85; X86-SSE2:       # %bb.0:
86; X86-SSE2-NEXT:    cvtps2dq %xmm0, %xmm0
87; X86-SSE2-NEXT:    retl
88;
89; X86-AVX-LABEL: lrint_v4f32:
90; X86-AVX:       # %bb.0:
91; X86-AVX-NEXT:    vcvtps2dq %xmm0, %xmm0
92; X86-AVX-NEXT:    retl
93;
94; X64-AVX-i32-LABEL: lrint_v4f32:
95; X64-AVX-i32:       # %bb.0:
96; X64-AVX-i32-NEXT:    vcvtps2dq %xmm0, %xmm0
97; X64-AVX-i32-NEXT:    retq
98;
99; X64-AVX1-i64-LABEL: lrint_v4f32:
100; X64-AVX1-i64:       # %bb.0:
101; X64-AVX1-i64-NEXT:    vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
102; X64-AVX1-i64-NEXT:    vcvtss2si %xmm1, %rax
103; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm1
104; X64-AVX1-i64-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1,0]
105; X64-AVX1-i64-NEXT:    vcvtss2si %xmm2, %rax
106; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm2
107; X64-AVX1-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
108; X64-AVX1-i64-NEXT:    vcvtss2si %xmm0, %rax
109; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm2
110; X64-AVX1-i64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
111; X64-AVX1-i64-NEXT:    vcvtss2si %xmm0, %rax
112; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm0
113; X64-AVX1-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
114; X64-AVX1-i64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
115; X64-AVX1-i64-NEXT:    retq
116;
117; AVX512-i64-LABEL: lrint_v4f32:
118; AVX512-i64:       # %bb.0:
119; AVX512-i64-NEXT:    vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
120; AVX512-i64-NEXT:    vcvtss2si %xmm1, %rax
121; AVX512-i64-NEXT:    vmovq %rax, %xmm1
122; AVX512-i64-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1,0]
123; AVX512-i64-NEXT:    vcvtss2si %xmm2, %rax
124; AVX512-i64-NEXT:    vmovq %rax, %xmm2
125; AVX512-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
126; AVX512-i64-NEXT:    vcvtss2si %xmm0, %rax
127; AVX512-i64-NEXT:    vmovq %rax, %xmm2
128; AVX512-i64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
129; AVX512-i64-NEXT:    vcvtss2si %xmm0, %rax
130; AVX512-i64-NEXT:    vmovq %rax, %xmm0
131; AVX512-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
132; AVX512-i64-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
133; AVX512-i64-NEXT:    retq
134;
135; AVX512DQ-i64-LABEL: lrint_v4f32:
136; AVX512DQ-i64:       # %bb.0:
137; AVX512DQ-i64-NEXT:    vcvtps2qq %xmm0, %ymm0
138; AVX512DQ-i64-NEXT:    retq
139  %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float> %x)
140  ret <4 x iXLen> %a
141}
142declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float>)
143
144define <8 x iXLen> @lrint_v8f32(<8 x float> %x) {
145; X86-SSE2-LABEL: lrint_v8f32:
146; X86-SSE2:       # %bb.0:
147; X86-SSE2-NEXT:    cvtps2dq %xmm0, %xmm0
148; X86-SSE2-NEXT:    cvtps2dq %xmm1, %xmm1
149; X86-SSE2-NEXT:    retl
150;
151; X86-AVX-LABEL: lrint_v8f32:
152; X86-AVX:       # %bb.0:
153; X86-AVX-NEXT:    vcvtps2dq %ymm0, %ymm0
154; X86-AVX-NEXT:    retl
155;
156; X64-AVX-i32-LABEL: lrint_v8f32:
157; X64-AVX-i32:       # %bb.0:
158; X64-AVX-i32-NEXT:    vcvtps2dq %ymm0, %ymm0
159; X64-AVX-i32-NEXT:    retq
160;
161; X64-AVX1-i64-LABEL: lrint_v8f32:
162; X64-AVX1-i64:       # %bb.0:
163; X64-AVX1-i64-NEXT:    vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
164; X64-AVX1-i64-NEXT:    vcvtss2si %xmm1, %rax
165; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm1
166; X64-AVX1-i64-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1,0]
167; X64-AVX1-i64-NEXT:    vcvtss2si %xmm2, %rax
168; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm2
169; X64-AVX1-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
170; X64-AVX1-i64-NEXT:    vcvtss2si %xmm0, %rax
171; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm2
172; X64-AVX1-i64-NEXT:    vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
173; X64-AVX1-i64-NEXT:    vcvtss2si %xmm3, %rax
174; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm3
175; X64-AVX1-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
176; X64-AVX1-i64-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm2
177; X64-AVX1-i64-NEXT:    vextractf128 $1, %ymm0, %xmm0
178; X64-AVX1-i64-NEXT:    vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
179; X64-AVX1-i64-NEXT:    vcvtss2si %xmm1, %rax
180; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm1
181; X64-AVX1-i64-NEXT:    vshufpd {{.*#+}} xmm3 = xmm0[1,0]
182; X64-AVX1-i64-NEXT:    vcvtss2si %xmm3, %rax
183; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm3
184; X64-AVX1-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm1[0]
185; X64-AVX1-i64-NEXT:    vcvtss2si %xmm0, %rax
186; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm3
187; X64-AVX1-i64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
188; X64-AVX1-i64-NEXT:    vcvtss2si %xmm0, %rax
189; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm0
190; X64-AVX1-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
191; X64-AVX1-i64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
192; X64-AVX1-i64-NEXT:    vmovaps %ymm2, %ymm0
193; X64-AVX1-i64-NEXT:    retq
194;
195; AVX512-i64-LABEL: lrint_v8f32:
196; AVX512-i64:       # %bb.0:
197; AVX512-i64-NEXT:    vextractf128 $1, %ymm0, %xmm1
198; AVX512-i64-NEXT:    vshufps {{.*#+}} xmm2 = xmm1[3,3,3,3]
199; AVX512-i64-NEXT:    vcvtss2si %xmm2, %rax
200; AVX512-i64-NEXT:    vmovq %rax, %xmm2
201; AVX512-i64-NEXT:    vshufpd {{.*#+}} xmm3 = xmm1[1,0]
202; AVX512-i64-NEXT:    vcvtss2si %xmm3, %rax
203; AVX512-i64-NEXT:    vmovq %rax, %xmm3
204; AVX512-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
205; AVX512-i64-NEXT:    vcvtss2si %xmm1, %rax
206; AVX512-i64-NEXT:    vmovq %rax, %xmm3
207; AVX512-i64-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3]
208; AVX512-i64-NEXT:    vcvtss2si %xmm1, %rax
209; AVX512-i64-NEXT:    vmovq %rax, %xmm1
210; AVX512-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm1[0]
211; AVX512-i64-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
212; AVX512-i64-NEXT:    vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
213; AVX512-i64-NEXT:    vcvtss2si %xmm2, %rax
214; AVX512-i64-NEXT:    vmovq %rax, %xmm2
215; AVX512-i64-NEXT:    vshufpd {{.*#+}} xmm3 = xmm0[1,0]
216; AVX512-i64-NEXT:    vcvtss2si %xmm3, %rax
217; AVX512-i64-NEXT:    vmovq %rax, %xmm3
218; AVX512-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
219; AVX512-i64-NEXT:    vcvtss2si %xmm0, %rax
220; AVX512-i64-NEXT:    vmovq %rax, %xmm3
221; AVX512-i64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
222; AVX512-i64-NEXT:    vcvtss2si %xmm0, %rax
223; AVX512-i64-NEXT:    vmovq %rax, %xmm0
224; AVX512-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
225; AVX512-i64-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
226; AVX512-i64-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
227; AVX512-i64-NEXT:    retq
228;
229; AVX512DQ-i64-LABEL: lrint_v8f32:
230; AVX512DQ-i64:       # %bb.0:
231; AVX512DQ-i64-NEXT:    vcvtps2qq %ymm0, %zmm0
232; AVX512DQ-i64-NEXT:    retq
233  %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float> %x)
234  ret <8 x iXLen> %a
235}
236declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float>)
237
238define <16 x iXLen> @lrint_v16iXLen_v16f32(<16 x float> %x) {
239  %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float> %x)
240  ret <16 x iXLen> %a
241}
242declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float>)
243
244define <1 x iXLen> @lrint_v1f64(<1 x double> %x) {
245; X86-SSE2-LABEL: lrint_v1f64:
246; X86-SSE2:       # %bb.0:
247; X86-SSE2-NEXT:    cvtsd2si {{[0-9]+}}(%esp), %eax
248; X86-SSE2-NEXT:    retl
249;
250; X86-AVX-LABEL: lrint_v1f64:
251; X86-AVX:       # %bb.0:
252; X86-AVX-NEXT:    vcvtsd2si {{[0-9]+}}(%esp), %eax
253; X86-AVX-NEXT:    retl
254;
255; X64-AVX-i32-LABEL: lrint_v1f64:
256; X64-AVX-i32:       # %bb.0:
257; X64-AVX-i32-NEXT:    vcvtsd2si %xmm0, %eax
258; X64-AVX-i32-NEXT:    retq
259;
260; X64-AVX-i64-LABEL: lrint_v1f64:
261; X64-AVX-i64:       # %bb.0:
262; X64-AVX-i64-NEXT:    vcvtsd2si %xmm0, %rax
263; X64-AVX-i64-NEXT:    retq
264  %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double> %x)
265  ret <1 x iXLen> %a
266}
267declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>)
268
269define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
270; X86-SSE2-LABEL: lrint_v2f64:
271; X86-SSE2:       # %bb.0:
272; X86-SSE2-NEXT:    cvtsd2si %xmm0, %eax
273; X86-SSE2-NEXT:    movd %eax, %xmm1
274; X86-SSE2-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
275; X86-SSE2-NEXT:    cvtsd2si %xmm0, %eax
276; X86-SSE2-NEXT:    movd %eax, %xmm0
277; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
278; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
279; X86-SSE2-NEXT:    retl
280;
281; X86-AVX-LABEL: lrint_v2f64:
282; X86-AVX:       # %bb.0:
283; X86-AVX-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
284; X86-AVX-NEXT:    vcvtsd2si %xmm1, %eax
285; X86-AVX-NEXT:    vcvtsd2si %xmm0, %ecx
286; X86-AVX-NEXT:    vmovd %ecx, %xmm0
287; X86-AVX-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
288; X86-AVX-NEXT:    retl
289;
290; X64-AVX-i32-LABEL: lrint_v2f64:
291; X64-AVX-i32:       # %bb.0:
292; X64-AVX-i32-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
293; X64-AVX-i32-NEXT:    vcvtsd2si %xmm1, %eax
294; X64-AVX-i32-NEXT:    vcvtsd2si %xmm0, %ecx
295; X64-AVX-i32-NEXT:    vmovd %ecx, %xmm0
296; X64-AVX-i32-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
297; X64-AVX-i32-NEXT:    retq
298;
299; X64-AVX1-i64-LABEL: lrint_v2f64:
300; X64-AVX1-i64:       # %bb.0:
301; X64-AVX1-i64-NEXT:    vcvtsd2si %xmm0, %rax
302; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm1
303; X64-AVX1-i64-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
304; X64-AVX1-i64-NEXT:    vcvtsd2si %xmm0, %rax
305; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm0
306; X64-AVX1-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
307; X64-AVX1-i64-NEXT:    retq
308;
309; AVX512-i64-LABEL: lrint_v2f64:
310; AVX512-i64:       # %bb.0:
311; AVX512-i64-NEXT:    vcvtsd2si %xmm0, %rax
312; AVX512-i64-NEXT:    vmovq %rax, %xmm1
313; AVX512-i64-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
314; AVX512-i64-NEXT:    vcvtsd2si %xmm0, %rax
315; AVX512-i64-NEXT:    vmovq %rax, %xmm0
316; AVX512-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
317; AVX512-i64-NEXT:    retq
318;
319; AVX512DQ-i64-LABEL: lrint_v2f64:
320; AVX512DQ-i64:       # %bb.0:
321; AVX512DQ-i64-NEXT:    vcvtpd2qq %xmm0, %xmm0
322; AVX512DQ-i64-NEXT:    retq
323  %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double> %x)
324  ret <2 x iXLen> %a
325}
326declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double>)
327
328define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
329; X86-SSE2-LABEL: lrint_v4f64:
330; X86-SSE2:       # %bb.0:
331; X86-SSE2-NEXT:    cvtsd2si %xmm1, %eax
332; X86-SSE2-NEXT:    movd %eax, %xmm2
333; X86-SSE2-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
334; X86-SSE2-NEXT:    cvtsd2si %xmm1, %eax
335; X86-SSE2-NEXT:    movd %eax, %xmm1
336; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
337; X86-SSE2-NEXT:    cvtsd2si %xmm0, %eax
338; X86-SSE2-NEXT:    movd %eax, %xmm1
339; X86-SSE2-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
340; X86-SSE2-NEXT:    cvtsd2si %xmm0, %eax
341; X86-SSE2-NEXT:    movd %eax, %xmm0
342; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
343; X86-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
344; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
345; X86-SSE2-NEXT:    retl
346;
347; X86-AVX-LABEL: lrint_v4f64:
348; X86-AVX:       # %bb.0:
349; X86-AVX-NEXT:    vcvtpd2dq %ymm0, %xmm0
350; X86-AVX-NEXT:    vzeroupper
351; X86-AVX-NEXT:    retl
352;
353; X64-AVX-i32-LABEL: lrint_v4f64:
354; X64-AVX-i32:       # %bb.0:
355; X64-AVX-i32-NEXT:    vcvtpd2dq %ymm0, %xmm0
356; X64-AVX-i32-NEXT:    vzeroupper
357; X64-AVX-i32-NEXT:    retq
358;
359; X64-AVX1-i64-LABEL: lrint_v4f64:
360; X64-AVX1-i64:       # %bb.0:
361; X64-AVX1-i64-NEXT:    vextractf128 $1, %ymm0, %xmm1
362; X64-AVX1-i64-NEXT:    vcvtsd2si %xmm1, %rax
363; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm2
364; X64-AVX1-i64-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1,0]
365; X64-AVX1-i64-NEXT:    vcvtsd2si %xmm1, %rax
366; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm1
367; X64-AVX1-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
368; X64-AVX1-i64-NEXT:    vcvtsd2si %xmm0, %rax
369; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm2
370; X64-AVX1-i64-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
371; X64-AVX1-i64-NEXT:    vcvtsd2si %xmm0, %rax
372; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm0
373; X64-AVX1-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
374; X64-AVX1-i64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
375; X64-AVX1-i64-NEXT:    retq
376;
377; AVX512-i64-LABEL: lrint_v4f64:
378; AVX512-i64:       # %bb.0:
379; AVX512-i64-NEXT:    vextractf128 $1, %ymm0, %xmm1
380; AVX512-i64-NEXT:    vcvtsd2si %xmm1, %rax
381; AVX512-i64-NEXT:    vmovq %rax, %xmm2
382; AVX512-i64-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1,0]
383; AVX512-i64-NEXT:    vcvtsd2si %xmm1, %rax
384; AVX512-i64-NEXT:    vmovq %rax, %xmm1
385; AVX512-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
386; AVX512-i64-NEXT:    vcvtsd2si %xmm0, %rax
387; AVX512-i64-NEXT:    vmovq %rax, %xmm2
388; AVX512-i64-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
389; AVX512-i64-NEXT:    vcvtsd2si %xmm0, %rax
390; AVX512-i64-NEXT:    vmovq %rax, %xmm0
391; AVX512-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
392; AVX512-i64-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
393; AVX512-i64-NEXT:    retq
394;
395; AVX512DQ-i64-LABEL: lrint_v4f64:
396; AVX512DQ-i64:       # %bb.0:
397; AVX512DQ-i64-NEXT:    vcvtpd2qq %ymm0, %ymm0
398; AVX512DQ-i64-NEXT:    retq
399  %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double> %x)
400  ret <4 x iXLen> %a
401}
402declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double>)
403
404define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
405; X86-SSE2-LABEL: lrint_v8f64:
406; X86-SSE2:       # %bb.0:
407; X86-SSE2-NEXT:    pushl %ebp
408; X86-SSE2-NEXT:    .cfi_def_cfa_offset 8
409; X86-SSE2-NEXT:    .cfi_offset %ebp, -8
410; X86-SSE2-NEXT:    movl %esp, %ebp
411; X86-SSE2-NEXT:    .cfi_def_cfa_register %ebp
412; X86-SSE2-NEXT:    andl $-16, %esp
413; X86-SSE2-NEXT:    subl $16, %esp
414; X86-SSE2-NEXT:    movapd %xmm0, %xmm3
415; X86-SSE2-NEXT:    movapd 8(%ebp), %xmm4
416; X86-SSE2-NEXT:    cvtsd2si %xmm1, %eax
417; X86-SSE2-NEXT:    movd %eax, %xmm5
418; X86-SSE2-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
419; X86-SSE2-NEXT:    cvtsd2si %xmm1, %eax
420; X86-SSE2-NEXT:    movd %eax, %xmm0
421; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1]
422; X86-SSE2-NEXT:    cvtsd2si %xmm3, %eax
423; X86-SSE2-NEXT:    movd %eax, %xmm0
424; X86-SSE2-NEXT:    unpckhpd {{.*#+}} xmm3 = xmm3[1,1]
425; X86-SSE2-NEXT:    cvtsd2si %xmm3, %eax
426; X86-SSE2-NEXT:    movd %eax, %xmm1
427; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
428; X86-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0]
429; X86-SSE2-NEXT:    cvtsd2si %xmm4, %eax
430; X86-SSE2-NEXT:    movd %eax, %xmm3
431; X86-SSE2-NEXT:    unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
432; X86-SSE2-NEXT:    cvtsd2si %xmm4, %eax
433; X86-SSE2-NEXT:    movd %eax, %xmm1
434; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
435; X86-SSE2-NEXT:    cvtsd2si %xmm2, %eax
436; X86-SSE2-NEXT:    movd %eax, %xmm1
437; X86-SSE2-NEXT:    unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
438; X86-SSE2-NEXT:    cvtsd2si %xmm2, %eax
439; X86-SSE2-NEXT:    movd %eax, %xmm2
440; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
441; X86-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
442; X86-SSE2-NEXT:    movl %ebp, %esp
443; X86-SSE2-NEXT:    popl %ebp
444; X86-SSE2-NEXT:    .cfi_def_cfa %esp, 4
445; X86-SSE2-NEXT:    retl
446;
447; X86-AVX1-LABEL: lrint_v8f64:
448; X86-AVX1:       # %bb.0:
449; X86-AVX1-NEXT:    vcvtpd2dq %ymm0, %xmm0
450; X86-AVX1-NEXT:    vcvtpd2dq %ymm1, %xmm1
451; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
452; X86-AVX1-NEXT:    retl
453;
454; AVX512-i32-LABEL: lrint_v8f64:
455; AVX512-i32:       # %bb.0:
456; AVX512-i32-NEXT:    vcvtpd2dq %zmm0, %ymm0
457; AVX512-i32-NEXT:    ret{{[l|q]}}
458;
459; X64-AVX1-i32-LABEL: lrint_v8f64:
460; X64-AVX1-i32:       # %bb.0:
461; X64-AVX1-i32-NEXT:    vcvtpd2dq %ymm0, %xmm0
462; X64-AVX1-i32-NEXT:    vcvtpd2dq %ymm1, %xmm1
463; X64-AVX1-i32-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
464; X64-AVX1-i32-NEXT:    retq
465;
466; X64-AVX1-i64-LABEL: lrint_v8f64:
467; X64-AVX1-i64:       # %bb.0:
468; X64-AVX1-i64-NEXT:    vextractf128 $1, %ymm0, %xmm2
469; X64-AVX1-i64-NEXT:    vcvtsd2si %xmm2, %rax
470; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm3
471; X64-AVX1-i64-NEXT:    vshufpd {{.*#+}} xmm2 = xmm2[1,0]
472; X64-AVX1-i64-NEXT:    vcvtsd2si %xmm2, %rax
473; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm2
474; X64-AVX1-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
475; X64-AVX1-i64-NEXT:    vcvtsd2si %xmm0, %rax
476; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm3
477; X64-AVX1-i64-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
478; X64-AVX1-i64-NEXT:    vcvtsd2si %xmm0, %rax
479; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm0
480; X64-AVX1-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
481; X64-AVX1-i64-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
482; X64-AVX1-i64-NEXT:    vextractf128 $1, %ymm1, %xmm2
483; X64-AVX1-i64-NEXT:    vcvtsd2si %xmm2, %rax
484; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm3
485; X64-AVX1-i64-NEXT:    vshufpd {{.*#+}} xmm2 = xmm2[1,0]
486; X64-AVX1-i64-NEXT:    vcvtsd2si %xmm2, %rax
487; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm2
488; X64-AVX1-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
489; X64-AVX1-i64-NEXT:    vcvtsd2si %xmm1, %rax
490; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm3
491; X64-AVX1-i64-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1,0]
492; X64-AVX1-i64-NEXT:    vcvtsd2si %xmm1, %rax
493; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm1
494; X64-AVX1-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm1[0]
495; X64-AVX1-i64-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
496; X64-AVX1-i64-NEXT:    retq
497;
498; AVX512-i64-LABEL: lrint_v8f64:
499; AVX512-i64:       # %bb.0:
500; AVX512-i64-NEXT:    vextractf32x4 $3, %zmm0, %xmm1
501; AVX512-i64-NEXT:    vcvtsd2si %xmm1, %rax
502; AVX512-i64-NEXT:    vmovq %rax, %xmm2
503; AVX512-i64-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1,0]
504; AVX512-i64-NEXT:    vcvtsd2si %xmm1, %rax
505; AVX512-i64-NEXT:    vmovq %rax, %xmm1
506; AVX512-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
507; AVX512-i64-NEXT:    vextractf32x4 $2, %zmm0, %xmm2
508; AVX512-i64-NEXT:    vcvtsd2si %xmm2, %rax
509; AVX512-i64-NEXT:    vmovq %rax, %xmm3
510; AVX512-i64-NEXT:    vshufpd {{.*#+}} xmm2 = xmm2[1,0]
511; AVX512-i64-NEXT:    vcvtsd2si %xmm2, %rax
512; AVX512-i64-NEXT:    vmovq %rax, %xmm2
513; AVX512-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
514; AVX512-i64-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
515; AVX512-i64-NEXT:    vextractf128 $1, %ymm0, %xmm2
516; AVX512-i64-NEXT:    vcvtsd2si %xmm2, %rax
517; AVX512-i64-NEXT:    vmovq %rax, %xmm3
518; AVX512-i64-NEXT:    vshufpd {{.*#+}} xmm2 = xmm2[1,0]
519; AVX512-i64-NEXT:    vcvtsd2si %xmm2, %rax
520; AVX512-i64-NEXT:    vmovq %rax, %xmm2
521; AVX512-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
522; AVX512-i64-NEXT:    vcvtsd2si %xmm0, %rax
523; AVX512-i64-NEXT:    vmovq %rax, %xmm3
524; AVX512-i64-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
525; AVX512-i64-NEXT:    vcvtsd2si %xmm0, %rax
526; AVX512-i64-NEXT:    vmovq %rax, %xmm0
527; AVX512-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
528; AVX512-i64-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
529; AVX512-i64-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
530; AVX512-i64-NEXT:    retq
531;
532; AVX512DQ-i64-LABEL: lrint_v8f64:
533; AVX512DQ-i64:       # %bb.0:
534; AVX512DQ-i64-NEXT:    vcvtpd2qq %zmm0, %zmm0
535; AVX512DQ-i64-NEXT:    retq
536  %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double> %x)
537  ret <8 x iXLen> %a
538}
539declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double>)
540