xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/X86/arith-fp-call.ll (revision 8a0073ad4658033b6a4f6bae4fbaf924ac813bc6)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,SCALAR
3; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S -mcpu=x86-64-v2 -mattr=+prefer-128-bit | FileCheck %s --check-prefixes=CHECK,VEC128
4; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S -mcpu=x86-64-v2 -mattr=-prefer-128-bit | FileCheck %s --check-prefixes=CHECK,VEC128
5; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S -mcpu=x86-64-v3 -mattr=+prefer-128-bit | FileCheck %s --check-prefixes=CHECK,VEC128
6; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S -mcpu=x86-64-v3 -mattr=-prefer-128-bit | FileCheck %s --check-prefixes=CHECK,VEC256,VEC256-AVX2
7; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S -mcpu=x86-64-v4 -mattr=+prefer-256-bit | FileCheck %s --check-prefixes=CHECK,VEC256,VEC256-AVX512
8; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S -mcpu=x86-64-v4 -mattr=-prefer-256-bit | FileCheck %s --check-prefixes=CHECK,VEC512
9
10@f64 = common global [16 x double] zeroinitializer, align 64
11@f32 = common global [16 x float] zeroinitializer, align 64
12@r64 = common global [16 x i64] zeroinitializer, align 64
13@r32 = common global [16 x i32] zeroinitializer, align 64
14
15define void @rint_v8f32_v8f32() {
16; SCALAR-LABEL: @rint_v8f32_v8f32(
17; SCALAR-NEXT:    [[A0:%.*]] = load float, ptr @f32, align 8
18; SCALAR-NEXT:    [[A1:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 1), align 8
19; SCALAR-NEXT:    [[A2:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 2), align 8
20; SCALAR-NEXT:    [[A3:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 3), align 8
21; SCALAR-NEXT:    [[A4:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 4), align 8
22; SCALAR-NEXT:    [[A5:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 5), align 8
23; SCALAR-NEXT:    [[A6:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 6), align 8
24; SCALAR-NEXT:    [[A7:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 7), align 8
25; SCALAR-NEXT:    [[R0:%.*]] = call float @llvm.rint.f32(float [[A0]])
26; SCALAR-NEXT:    [[R1:%.*]] = call float @llvm.rint.f32(float [[A1]])
27; SCALAR-NEXT:    [[R2:%.*]] = call float @llvm.rint.f32(float [[A2]])
28; SCALAR-NEXT:    [[R3:%.*]] = call float @llvm.rint.f32(float [[A3]])
29; SCALAR-NEXT:    [[R4:%.*]] = call float @llvm.rint.f32(float [[A4]])
30; SCALAR-NEXT:    [[R5:%.*]] = call float @llvm.rint.f32(float [[A5]])
31; SCALAR-NEXT:    [[R6:%.*]] = call float @llvm.rint.f32(float [[A6]])
32; SCALAR-NEXT:    [[R7:%.*]] = call float @llvm.rint.f32(float [[A7]])
33; SCALAR-NEXT:    store float [[R0]], ptr @f32, align 8
34; SCALAR-NEXT:    store float [[R1]], ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 1), align 8
35; SCALAR-NEXT:    store float [[R2]], ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 2), align 8
36; SCALAR-NEXT:    store float [[R3]], ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 3), align 8
37; SCALAR-NEXT:    store float [[R4]], ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 4), align 8
38; SCALAR-NEXT:    store float [[R5]], ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 5), align 8
39; SCALAR-NEXT:    store float [[R6]], ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 6), align 8
40; SCALAR-NEXT:    store float [[R7]], ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 7), align 8
41; SCALAR-NEXT:    ret void
42;
43; VEC128-LABEL: @rint_v8f32_v8f32(
44; VEC128-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @f32, align 8
45; VEC128-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[TMP1]])
46; VEC128-NEXT:    store <4 x float> [[TMP2]], ptr @f32, align 8
47; VEC128-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 4), align 8
48; VEC128-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[TMP3]])
49; VEC128-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 4), align 8
50; VEC128-NEXT:    ret void
51;
52; VEC256-LABEL: @rint_v8f32_v8f32(
53; VEC256-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @f32, align 8
54; VEC256-NEXT:    [[TMP2:%.*]] = call <8 x float> @llvm.rint.v8f32(<8 x float> [[TMP1]])
55; VEC256-NEXT:    store <8 x float> [[TMP2]], ptr @f32, align 8
56; VEC256-NEXT:    ret void
57;
58; VEC512-LABEL: @rint_v8f32_v8f32(
59; VEC512-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @f32, align 8
60; VEC512-NEXT:    [[TMP2:%.*]] = call <8 x float> @llvm.rint.v8f32(<8 x float> [[TMP1]])
61; VEC512-NEXT:    store <8 x float> [[TMP2]], ptr @f32, align 8
62; VEC512-NEXT:    ret void
63;
64  %a0 = load float, ptr @f32, align 8
65  %a1 = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 1), align 8
66  %a2 = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 2), align 8
67  %a3 = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 3), align 8
68  %a4 = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 4), align 8
69  %a5 = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 5), align 8
70  %a6 = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 6), align 8
71  %a7 = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 7), align 8
72  %r0 = call float @llvm.rint.i32.f32(float %a0)
73  %r1 = call float @llvm.rint.i32.f32(float %a1)
74  %r2 = call float @llvm.rint.i32.f32(float %a2)
75  %r3 = call float @llvm.rint.i32.f32(float %a3)
76  %r4 = call float @llvm.rint.i32.f32(float %a4)
77  %r5 = call float @llvm.rint.i32.f32(float %a5)
78  %r6 = call float @llvm.rint.i32.f32(float %a6)
79  %r7 = call float @llvm.rint.i32.f32(float %a7)
80  store float %r0, ptr @f32, align 8
81  store float %r1, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 1), align 8
82  store float %r2, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 2), align 8
83  store float %r3, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 3), align 8
84  store float %r4, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 4), align 8
85  store float %r5, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 5), align 8
86  store float %r6, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 6), align 8
87  store float %r7, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 7), align 8
88  ret void
89}
90
91define void @rint_v8f64_v8f64() {
92; SCALAR-LABEL: @rint_v8f64_v8f64(
93; SCALAR-NEXT:    [[A0:%.*]] = load double, ptr @f64, align 8
94; SCALAR-NEXT:    [[A1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 1), align 8
95; SCALAR-NEXT:    [[A2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 2), align 8
96; SCALAR-NEXT:    [[A3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 3), align 8
97; SCALAR-NEXT:    [[A4:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 4), align 8
98; SCALAR-NEXT:    [[A5:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 5), align 8
99; SCALAR-NEXT:    [[A6:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 6), align 8
100; SCALAR-NEXT:    [[A7:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 7), align 8
101; SCALAR-NEXT:    [[R0:%.*]] = call double @llvm.rint.f64(double [[A0]])
102; SCALAR-NEXT:    [[R1:%.*]] = call double @llvm.rint.f64(double [[A1]])
103; SCALAR-NEXT:    [[R2:%.*]] = call double @llvm.rint.f64(double [[A2]])
104; SCALAR-NEXT:    [[R3:%.*]] = call double @llvm.rint.f64(double [[A3]])
105; SCALAR-NEXT:    [[R4:%.*]] = call double @llvm.rint.f64(double [[A4]])
106; SCALAR-NEXT:    [[R5:%.*]] = call double @llvm.rint.f64(double [[A5]])
107; SCALAR-NEXT:    [[R6:%.*]] = call double @llvm.rint.f64(double [[A6]])
108; SCALAR-NEXT:    [[R7:%.*]] = call double @llvm.rint.f64(double [[A7]])
109; SCALAR-NEXT:    store double [[R0]], ptr @f64, align 8
110; SCALAR-NEXT:    store double [[R1]], ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 1), align 8
111; SCALAR-NEXT:    store double [[R2]], ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 2), align 8
112; SCALAR-NEXT:    store double [[R3]], ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 3), align 8
113; SCALAR-NEXT:    store double [[R4]], ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 4), align 8
114; SCALAR-NEXT:    store double [[R5]], ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 5), align 8
115; SCALAR-NEXT:    store double [[R6]], ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 6), align 8
116; SCALAR-NEXT:    store double [[R7]], ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 7), align 8
117; SCALAR-NEXT:    ret void
118;
119; VEC128-LABEL: @rint_v8f64_v8f64(
120; VEC128-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @f64, align 8
121; VEC128-NEXT:    [[TMP2:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[TMP1]])
122; VEC128-NEXT:    store <2 x double> [[TMP2]], ptr @f64, align 8
123; VEC128-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 2), align 8
124; VEC128-NEXT:    [[TMP4:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[TMP3]])
125; VEC128-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 2), align 8
126; VEC128-NEXT:    [[TMP5:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 4), align 8
127; VEC128-NEXT:    [[TMP6:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[TMP5]])
128; VEC128-NEXT:    store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 4), align 8
129; VEC128-NEXT:    [[TMP7:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 6), align 8
130; VEC128-NEXT:    [[TMP8:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[TMP7]])
131; VEC128-NEXT:    store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 6), align 8
132; VEC128-NEXT:    ret void
133;
134; VEC256-LABEL: @rint_v8f64_v8f64(
135; VEC256-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @f64, align 8
136; VEC256-NEXT:    [[TMP2:%.*]] = call <4 x double> @llvm.rint.v4f64(<4 x double> [[TMP1]])
137; VEC256-NEXT:    store <4 x double> [[TMP2]], ptr @f64, align 8
138; VEC256-NEXT:    [[TMP3:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 4), align 8
139; VEC256-NEXT:    [[TMP4:%.*]] = call <4 x double> @llvm.rint.v4f64(<4 x double> [[TMP3]])
140; VEC256-NEXT:    store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 4), align 8
141; VEC256-NEXT:    ret void
142;
143; VEC512-LABEL: @rint_v8f64_v8f64(
144; VEC512-NEXT:    [[TMP1:%.*]] = load <8 x double>, ptr @f64, align 8
145; VEC512-NEXT:    [[TMP2:%.*]] = call <8 x double> @llvm.rint.v8f64(<8 x double> [[TMP1]])
146; VEC512-NEXT:    store <8 x double> [[TMP2]], ptr @f64, align 8
147; VEC512-NEXT:    ret void
148;
149  %a0 = load double, ptr @f64, align 8
150  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 1), align 8
151  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 2), align 8
152  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 3), align 8
153  %a4 = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 4), align 8
154  %a5 = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 5), align 8
155  %a6 = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 6), align 8
156  %a7 = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 7), align 8
157  %r0 = call double @llvm.rint.f64.f64(double %a0)
158  %r1 = call double @llvm.rint.f64.f64(double %a1)
159  %r2 = call double @llvm.rint.f64.f64(double %a2)
160  %r3 = call double @llvm.rint.f64.f64(double %a3)
161  %r4 = call double @llvm.rint.f64.f64(double %a4)
162  %r5 = call double @llvm.rint.f64.f64(double %a5)
163  %r6 = call double @llvm.rint.f64.f64(double %a6)
164  %r7 = call double @llvm.rint.f64.f64(double %a7)
165  store double %r0, ptr @f64, align 8
166  store double %r1, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 1), align 8
167  store double %r2, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 2), align 8
168  store double %r3, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 3), align 8
169  store double %r4, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 4), align 8
170  store double %r5, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 5), align 8
171  store double %r6, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 6), align 8
172  store double %r7, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 7), align 8
173  ret void
174}
175
176define void @lrint_v8f32_v8i32() {
177; SCALAR-LABEL: @lrint_v8f32_v8i32(
178; SCALAR-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @f32, align 8
179; SCALAR-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> [[TMP1]])
180; SCALAR-NEXT:    store <4 x i32> [[TMP2]], ptr @r32, align 8
181; SCALAR-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 4), align 8
182; SCALAR-NEXT:    [[TMP4:%.*]] = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> [[TMP3]])
183; SCALAR-NEXT:    store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 4), align 8
184; SCALAR-NEXT:    ret void
185;
186; VEC128-LABEL: @lrint_v8f32_v8i32(
187; VEC128-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @f32, align 8
188; VEC128-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> [[TMP1]])
189; VEC128-NEXT:    store <4 x i32> [[TMP2]], ptr @r32, align 8
190; VEC128-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 4), align 8
191; VEC128-NEXT:    [[TMP4:%.*]] = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> [[TMP3]])
192; VEC128-NEXT:    store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 4), align 8
193; VEC128-NEXT:    ret void
194;
195; VEC256-LABEL: @lrint_v8f32_v8i32(
196; VEC256-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @f32, align 8
197; VEC256-NEXT:    [[TMP2:%.*]] = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> [[TMP1]])
198; VEC256-NEXT:    store <8 x i32> [[TMP2]], ptr @r32, align 8
199; VEC256-NEXT:    ret void
200;
201; VEC512-LABEL: @lrint_v8f32_v8i32(
202; VEC512-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @f32, align 8
203; VEC512-NEXT:    [[TMP2:%.*]] = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> [[TMP1]])
204; VEC512-NEXT:    store <8 x i32> [[TMP2]], ptr @r32, align 8
205; VEC512-NEXT:    ret void
206;
207  %a0 = load float, ptr @f32, align 8
208  %a1 = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 1), align 8
209  %a2 = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 2), align 8
210  %a3 = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 3), align 8
211  %a4 = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 4), align 8
212  %a5 = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 5), align 8
213  %a6 = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 6), align 8
214  %a7 = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 7), align 8
215  %r0 = call i32 @llvm.lrint.i32.f32(float %a0)
216  %r1 = call i32 @llvm.lrint.i32.f32(float %a1)
217  %r2 = call i32 @llvm.lrint.i32.f32(float %a2)
218  %r3 = call i32 @llvm.lrint.i32.f32(float %a3)
219  %r4 = call i32 @llvm.lrint.i32.f32(float %a4)
220  %r5 = call i32 @llvm.lrint.i32.f32(float %a5)
221  %r6 = call i32 @llvm.lrint.i32.f32(float %a6)
222  %r7 = call i32 @llvm.lrint.i32.f32(float %a7)
223  store i32 %r0, ptr @r32, align 8
224  store i32 %r1, ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 1), align 8
225  store i32 %r2, ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 2), align 8
226  store i32 %r3, ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 3), align 8
227  store i32 %r4, ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 4), align 8
228  store i32 %r5, ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 5), align 8
229  store i32 %r6, ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 6), align 8
230  store i32 %r7, ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 7), align 8
231  ret void
232}
233
234define void @lrint_v8f64_v8i32() {
235; SCALAR-LABEL: @lrint_v8f64_v8i32(
236; SCALAR-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @f64, align 8
237; SCALAR-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> [[TMP1]])
238; SCALAR-NEXT:    store <4 x i32> [[TMP2]], ptr @r32, align 8
239; SCALAR-NEXT:    [[TMP3:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 4), align 8
240; SCALAR-NEXT:    [[TMP4:%.*]] = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> [[TMP3]])
241; SCALAR-NEXT:    store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 4), align 8
242; SCALAR-NEXT:    ret void
243;
244; VEC128-LABEL: @lrint_v8f64_v8i32(
245; VEC128-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @f64, align 8
246; VEC128-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> [[TMP1]])
247; VEC128-NEXT:    store <4 x i32> [[TMP2]], ptr @r32, align 8
248; VEC128-NEXT:    [[TMP3:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 4), align 8
249; VEC128-NEXT:    [[TMP4:%.*]] = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> [[TMP3]])
250; VEC128-NEXT:    store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 4), align 8
251; VEC128-NEXT:    ret void
252;
253; VEC256-LABEL: @lrint_v8f64_v8i32(
254; VEC256-NEXT:    [[TMP1:%.*]] = load <8 x double>, ptr @f64, align 8
255; VEC256-NEXT:    [[TMP2:%.*]] = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> [[TMP1]])
256; VEC256-NEXT:    store <8 x i32> [[TMP2]], ptr @r32, align 8
257; VEC256-NEXT:    ret void
258;
259; VEC512-LABEL: @lrint_v8f64_v8i32(
260; VEC512-NEXT:    [[TMP1:%.*]] = load <8 x double>, ptr @f64, align 8
261; VEC512-NEXT:    [[TMP2:%.*]] = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> [[TMP1]])
262; VEC512-NEXT:    store <8 x i32> [[TMP2]], ptr @r32, align 8
263; VEC512-NEXT:    ret void
264;
265  %a0 = load double, ptr @f64, align 8
266  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 1), align 8
267  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 2), align 8
268  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 3), align 8
269  %a4 = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 4), align 8
270  %a5 = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 5), align 8
271  %a6 = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 6), align 8
272  %a7 = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 7), align 8
273  %r0 = call i32 @llvm.lrint.i32.f64(double %a0)
274  %r1 = call i32 @llvm.lrint.i32.f64(double %a1)
275  %r2 = call i32 @llvm.lrint.i32.f64(double %a2)
276  %r3 = call i32 @llvm.lrint.i32.f64(double %a3)
277  %r4 = call i32 @llvm.lrint.i32.f64(double %a4)
278  %r5 = call i32 @llvm.lrint.i32.f64(double %a5)
279  %r6 = call i32 @llvm.lrint.i32.f64(double %a6)
280  %r7 = call i32 @llvm.lrint.i32.f64(double %a7)
281  store i32 %r0, ptr @r32, align 8
282  store i32 %r1, ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 1), align 8
283  store i32 %r2, ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 2), align 8
284  store i32 %r3, ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 3), align 8
285  store i32 %r4, ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 4), align 8
286  store i32 %r5, ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 5), align 8
287  store i32 %r6, ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 6), align 8
288  store i32 %r7, ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 7), align 8
289  ret void
290}
291
292define void @llrint_v8f32_v8i64() {
293; SCALAR-LABEL: @llrint_v8f32_v8i64(
294; SCALAR-NEXT:    [[A0:%.*]] = load float, ptr @f32, align 8
295; SCALAR-NEXT:    [[A1:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 1), align 8
296; SCALAR-NEXT:    [[A2:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 2), align 8
297; SCALAR-NEXT:    [[A3:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 3), align 8
298; SCALAR-NEXT:    [[A4:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 4), align 8
299; SCALAR-NEXT:    [[A5:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 5), align 8
300; SCALAR-NEXT:    [[A6:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 6), align 8
301; SCALAR-NEXT:    [[A7:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 7), align 8
302; SCALAR-NEXT:    [[R0:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A0]])
303; SCALAR-NEXT:    [[R1:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A1]])
304; SCALAR-NEXT:    [[R2:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A2]])
305; SCALAR-NEXT:    [[R3:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A3]])
306; SCALAR-NEXT:    [[R4:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A4]])
307; SCALAR-NEXT:    [[R5:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A5]])
308; SCALAR-NEXT:    [[R6:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A6]])
309; SCALAR-NEXT:    [[R7:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A7]])
310; SCALAR-NEXT:    store i64 [[R0]], ptr @r64, align 8
311; SCALAR-NEXT:    store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 1), align 8
312; SCALAR-NEXT:    store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 2), align 8
313; SCALAR-NEXT:    store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 3), align 8
314; SCALAR-NEXT:    store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 4), align 8
315; SCALAR-NEXT:    store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 5), align 8
316; SCALAR-NEXT:    store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 6), align 8
317; SCALAR-NEXT:    store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 7), align 8
318; SCALAR-NEXT:    ret void
319;
320; VEC128-LABEL: @llrint_v8f32_v8i64(
321; VEC128-NEXT:    [[A0:%.*]] = load float, ptr @f32, align 8
322; VEC128-NEXT:    [[A1:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 1), align 8
323; VEC128-NEXT:    [[A2:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 2), align 8
324; VEC128-NEXT:    [[A3:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 3), align 8
325; VEC128-NEXT:    [[A4:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 4), align 8
326; VEC128-NEXT:    [[A5:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 5), align 8
327; VEC128-NEXT:    [[A6:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 6), align 8
328; VEC128-NEXT:    [[A7:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 7), align 8
329; VEC128-NEXT:    [[R0:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A0]])
330; VEC128-NEXT:    [[R1:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A1]])
331; VEC128-NEXT:    [[R2:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A2]])
332; VEC128-NEXT:    [[R3:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A3]])
333; VEC128-NEXT:    [[R4:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A4]])
334; VEC128-NEXT:    [[R5:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A5]])
335; VEC128-NEXT:    [[R6:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A6]])
336; VEC128-NEXT:    [[R7:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A7]])
337; VEC128-NEXT:    store i64 [[R0]], ptr @r64, align 8
338; VEC128-NEXT:    store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 1), align 8
339; VEC128-NEXT:    store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 2), align 8
340; VEC128-NEXT:    store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 3), align 8
341; VEC128-NEXT:    store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 4), align 8
342; VEC128-NEXT:    store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 5), align 8
343; VEC128-NEXT:    store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 6), align 8
344; VEC128-NEXT:    store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 7), align 8
345; VEC128-NEXT:    ret void
346;
347; VEC256-AVX2-LABEL: @llrint_v8f32_v8i64(
348; VEC256-AVX2-NEXT:    [[A0:%.*]] = load float, ptr @f32, align 8
349; VEC256-AVX2-NEXT:    [[A1:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 1), align 8
350; VEC256-AVX2-NEXT:    [[A2:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 2), align 8
351; VEC256-AVX2-NEXT:    [[A3:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 3), align 8
352; VEC256-AVX2-NEXT:    [[A4:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 4), align 8
353; VEC256-AVX2-NEXT:    [[A5:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 5), align 8
354; VEC256-AVX2-NEXT:    [[A6:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 6), align 8
355; VEC256-AVX2-NEXT:    [[A7:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 7), align 8
356; VEC256-AVX2-NEXT:    [[R0:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A0]])
357; VEC256-AVX2-NEXT:    [[R1:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A1]])
358; VEC256-AVX2-NEXT:    [[R2:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A2]])
359; VEC256-AVX2-NEXT:    [[R3:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A3]])
360; VEC256-AVX2-NEXT:    [[R4:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A4]])
361; VEC256-AVX2-NEXT:    [[R5:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A5]])
362; VEC256-AVX2-NEXT:    [[R6:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A6]])
363; VEC256-AVX2-NEXT:    [[R7:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A7]])
364; VEC256-AVX2-NEXT:    store i64 [[R0]], ptr @r64, align 8
365; VEC256-AVX2-NEXT:    store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 1), align 8
366; VEC256-AVX2-NEXT:    store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 2), align 8
367; VEC256-AVX2-NEXT:    store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 3), align 8
368; VEC256-AVX2-NEXT:    store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 4), align 8
369; VEC256-AVX2-NEXT:    store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 5), align 8
370; VEC256-AVX2-NEXT:    store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 6), align 8
371; VEC256-AVX2-NEXT:    store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 7), align 8
372; VEC256-AVX2-NEXT:    ret void
373;
374; VEC256-AVX512-LABEL: @llrint_v8f32_v8i64(
375; VEC256-AVX512-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @f32, align 8
376; VEC256-AVX512-NEXT:    [[TMP2:%.*]] = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> [[TMP1]])
377; VEC256-AVX512-NEXT:    store <4 x i64> [[TMP2]], ptr @r64, align 8
378; VEC256-AVX512-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 4), align 8
379; VEC256-AVX512-NEXT:    [[TMP4:%.*]] = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> [[TMP3]])
380; VEC256-AVX512-NEXT:    store <4 x i64> [[TMP4]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 4), align 8
381; VEC256-AVX512-NEXT:    ret void
382;
383; VEC512-LABEL: @llrint_v8f32_v8i64(
384; VEC512-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @f32, align 8
385; VEC512-NEXT:    [[TMP2:%.*]] = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> [[TMP1]])
386; VEC512-NEXT:    store <8 x i64> [[TMP2]], ptr @r64, align 8
387; VEC512-NEXT:    ret void
388;
389  %a0 = load float, ptr @f32, align 8
390  %a1 = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 1), align 8
391  %a2 = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 2), align 8
392  %a3 = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 3), align 8
393  %a4 = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 4), align 8
394  %a5 = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 5), align 8
395  %a6 = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 6), align 8
396  %a7 = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 7), align 8
397  %r0 = call i64 @llvm.llrint.i64.f32(float %a0)
398  %r1 = call i64 @llvm.llrint.i64.f32(float %a1)
399  %r2 = call i64 @llvm.llrint.i64.f32(float %a2)
400  %r3 = call i64 @llvm.llrint.i64.f32(float %a3)
401  %r4 = call i64 @llvm.llrint.i64.f32(float %a4)
402  %r5 = call i64 @llvm.llrint.i64.f32(float %a5)
403  %r6 = call i64 @llvm.llrint.i64.f32(float %a6)
404  %r7 = call i64 @llvm.llrint.i64.f32(float %a7)
405  store i64 %r0, ptr @r64, align 8
406  store i64 %r1, ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 1), align 8
407  store i64 %r2, ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 2), align 8
408  store i64 %r3, ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 3), align 8
409  store i64 %r4, ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 4), align 8
410  store i64 %r5, ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 5), align 8
411  store i64 %r6, ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 6), align 8
412  store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 7), align 8
413  ret void
414}
415
416define void @llrint_v8f64_v8i64() {
417; SCALAR-LABEL: @llrint_v8f64_v8i64(
418; SCALAR-NEXT:    [[A0:%.*]] = load double, ptr @f64, align 8
419; SCALAR-NEXT:    [[A1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 1), align 8
420; SCALAR-NEXT:    [[A2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 2), align 8
421; SCALAR-NEXT:    [[A3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 3), align 8
422; SCALAR-NEXT:    [[A4:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 4), align 8
423; SCALAR-NEXT:    [[A5:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 5), align 8
424; SCALAR-NEXT:    [[A6:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 6), align 8
425; SCALAR-NEXT:    [[A7:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 7), align 8
426; SCALAR-NEXT:    [[R0:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A0]])
427; SCALAR-NEXT:    [[R1:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A1]])
428; SCALAR-NEXT:    [[R2:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A2]])
429; SCALAR-NEXT:    [[R3:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A3]])
430; SCALAR-NEXT:    [[R4:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A4]])
431; SCALAR-NEXT:    [[R5:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A5]])
432; SCALAR-NEXT:    [[R6:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A6]])
433; SCALAR-NEXT:    [[R7:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A7]])
434; SCALAR-NEXT:    store i64 [[R0]], ptr @r64, align 8
435; SCALAR-NEXT:    store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 1), align 8
436; SCALAR-NEXT:    store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 2), align 8
437; SCALAR-NEXT:    store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 3), align 8
438; SCALAR-NEXT:    store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 4), align 8
439; SCALAR-NEXT:    store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 5), align 8
440; SCALAR-NEXT:    store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 6), align 8
441; SCALAR-NEXT:    store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 7), align 8
442; SCALAR-NEXT:    ret void
443;
444; VEC128-LABEL: @llrint_v8f64_v8i64(
445; VEC128-NEXT:    [[A0:%.*]] = load double, ptr @f64, align 8
446; VEC128-NEXT:    [[A1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 1), align 8
447; VEC128-NEXT:    [[A2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 2), align 8
448; VEC128-NEXT:    [[A3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 3), align 8
449; VEC128-NEXT:    [[A4:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 4), align 8
450; VEC128-NEXT:    [[A5:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 5), align 8
451; VEC128-NEXT:    [[A6:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 6), align 8
452; VEC128-NEXT:    [[A7:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 7), align 8
453; VEC128-NEXT:    [[R0:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A0]])
454; VEC128-NEXT:    [[R1:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A1]])
455; VEC128-NEXT:    [[R2:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A2]])
456; VEC128-NEXT:    [[R3:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A3]])
457; VEC128-NEXT:    [[R4:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A4]])
458; VEC128-NEXT:    [[R5:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A5]])
459; VEC128-NEXT:    [[R6:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A6]])
460; VEC128-NEXT:    [[R7:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A7]])
461; VEC128-NEXT:    store i64 [[R0]], ptr @r64, align 8
462; VEC128-NEXT:    store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 1), align 8
463; VEC128-NEXT:    store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 2), align 8
464; VEC128-NEXT:    store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 3), align 8
465; VEC128-NEXT:    store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 4), align 8
466; VEC128-NEXT:    store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 5), align 8
467; VEC128-NEXT:    store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 6), align 8
468; VEC128-NEXT:    store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 7), align 8
469; VEC128-NEXT:    ret void
470;
471; VEC256-AVX2-LABEL: @llrint_v8f64_v8i64(
472; VEC256-AVX2-NEXT:    [[A0:%.*]] = load double, ptr @f64, align 8
473; VEC256-AVX2-NEXT:    [[A1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 1), align 8
474; VEC256-AVX2-NEXT:    [[A2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 2), align 8
475; VEC256-AVX2-NEXT:    [[A3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 3), align 8
476; VEC256-AVX2-NEXT:    [[A4:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 4), align 8
477; VEC256-AVX2-NEXT:    [[A5:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 5), align 8
478; VEC256-AVX2-NEXT:    [[A6:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 6), align 8
479; VEC256-AVX2-NEXT:    [[A7:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 7), align 8
480; VEC256-AVX2-NEXT:    [[R0:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A0]])
481; VEC256-AVX2-NEXT:    [[R1:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A1]])
482; VEC256-AVX2-NEXT:    [[R2:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A2]])
483; VEC256-AVX2-NEXT:    [[R3:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A3]])
484; VEC256-AVX2-NEXT:    [[R4:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A4]])
485; VEC256-AVX2-NEXT:    [[R5:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A5]])
486; VEC256-AVX2-NEXT:    [[R6:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A6]])
487; VEC256-AVX2-NEXT:    [[R7:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A7]])
488; VEC256-AVX2-NEXT:    store i64 [[R0]], ptr @r64, align 8
489; VEC256-AVX2-NEXT:    store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 1), align 8
490; VEC256-AVX2-NEXT:    store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 2), align 8
491; VEC256-AVX2-NEXT:    store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 3), align 8
492; VEC256-AVX2-NEXT:    store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 4), align 8
493; VEC256-AVX2-NEXT:    store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 5), align 8
494; VEC256-AVX2-NEXT:    store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 6), align 8
495; VEC256-AVX2-NEXT:    store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 7), align 8
496; VEC256-AVX2-NEXT:    ret void
497;
498; VEC256-AVX512-LABEL: @llrint_v8f64_v8i64(
499; VEC256-AVX512-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @f64, align 8
500; VEC256-AVX512-NEXT:    [[TMP2:%.*]] = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> [[TMP1]])
501; VEC256-AVX512-NEXT:    store <4 x i64> [[TMP2]], ptr @r64, align 8
502; VEC256-AVX512-NEXT:    [[TMP3:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 4), align 8
503; VEC256-AVX512-NEXT:    [[TMP4:%.*]] = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> [[TMP3]])
504; VEC256-AVX512-NEXT:    store <4 x i64> [[TMP4]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 4), align 8
505; VEC256-AVX512-NEXT:    ret void
506;
507; VEC512-LABEL: @llrint_v8f64_v8i64(
508; VEC512-NEXT:    [[TMP1:%.*]] = load <8 x double>, ptr @f64, align 8
509; VEC512-NEXT:    [[TMP2:%.*]] = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> [[TMP1]])
510; VEC512-NEXT:    store <8 x i64> [[TMP2]], ptr @r64, align 8
511; VEC512-NEXT:    ret void
512;
513  %a0 = load double, ptr @f64, align 8
514  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 1), align 8
515  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 2), align 8
516  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 3), align 8
517  %a4 = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 4), align 8
518  %a5 = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 5), align 8
519  %a6 = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 6), align 8
520  %a7 = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 7), align 8
521  %r0 = call i64 @llvm.llrint.i64.f64(double %a0)
522  %r1 = call i64 @llvm.llrint.i64.f64(double %a1)
523  %r2 = call i64 @llvm.llrint.i64.f64(double %a2)
524  %r3 = call i64 @llvm.llrint.i64.f64(double %a3)
525  %r4 = call i64 @llvm.llrint.i64.f64(double %a4)
526  %r5 = call i64 @llvm.llrint.i64.f64(double %a5)
527  %r6 = call i64 @llvm.llrint.i64.f64(double %a6)
528  %r7 = call i64 @llvm.llrint.i64.f64(double %a7)
529  store i64 %r0, ptr @r64, align 8
530  store i64 %r1, ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 1), align 8
531  store i64 %r2, ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 2), align 8
532  store i64 %r3, ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 3), align 8
533  store i64 %r4, ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 4), align 8
534  store i64 %r5, ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 5), align 8
535  store i64 %r6, ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 6), align 8
536  store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 7), align 8
537  ret void
538}
539;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
540; CHECK: {{.*}}
541