xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/X86/fptoui.ll (revision 580210a0c938531ef9fd79f9ffedb93eeb2e66c2)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE
3; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256NODQ
4; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver1 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256NODQ
5; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256NODQ
6; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512F
7; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256DQ
8
9target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
10
11@src64 = common global [8 x double] zeroinitializer, align 64
12@src32 = common global [16 x float] zeroinitializer, align 64
13@dst64 = common global [8 x i64] zeroinitializer, align 64
14@dst32 = common global [16 x i32] zeroinitializer, align 64
15@dst16 = common global [32 x i16] zeroinitializer, align 64
16@dst8 = common global [64 x i8] zeroinitializer, align 64
17
18;
19; FPTOUI vXf64
20;
21
22define void @fptoui_8f64_8i64() #0 {
23; SSE-LABEL: @fptoui_8f64_8i64(
24; SSE-NEXT:    [[A0:%.*]] = load double, ptr @src64, align 8
25; SSE-NEXT:    [[A1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
26; SSE-NEXT:    [[A2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
27; SSE-NEXT:    [[A3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
28; SSE-NEXT:    [[A4:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
29; SSE-NEXT:    [[A5:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 8
30; SSE-NEXT:    [[A6:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
31; SSE-NEXT:    [[A7:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 8
32; SSE-NEXT:    [[CVT0:%.*]] = fptoui double [[A0]] to i64
33; SSE-NEXT:    [[CVT1:%.*]] = fptoui double [[A1]] to i64
34; SSE-NEXT:    [[CVT2:%.*]] = fptoui double [[A2]] to i64
35; SSE-NEXT:    [[CVT3:%.*]] = fptoui double [[A3]] to i64
36; SSE-NEXT:    [[CVT4:%.*]] = fptoui double [[A4]] to i64
37; SSE-NEXT:    [[CVT5:%.*]] = fptoui double [[A5]] to i64
38; SSE-NEXT:    [[CVT6:%.*]] = fptoui double [[A6]] to i64
39; SSE-NEXT:    [[CVT7:%.*]] = fptoui double [[A7]] to i64
40; SSE-NEXT:    store i64 [[CVT0]], ptr @dst64, align 8
41; SSE-NEXT:    store i64 [[CVT1]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 1), align 8
42; SSE-NEXT:    store i64 [[CVT2]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 2), align 8
43; SSE-NEXT:    store i64 [[CVT3]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 3), align 8
44; SSE-NEXT:    store i64 [[CVT4]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 4), align 8
45; SSE-NEXT:    store i64 [[CVT5]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 5), align 8
46; SSE-NEXT:    store i64 [[CVT6]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 6), align 8
47; SSE-NEXT:    store i64 [[CVT7]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 7), align 8
48; SSE-NEXT:    ret void
49;
50; AVX256NODQ-LABEL: @fptoui_8f64_8i64(
51; AVX256NODQ-NEXT:    [[A0:%.*]] = load double, ptr @src64, align 8
52; AVX256NODQ-NEXT:    [[A1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
53; AVX256NODQ-NEXT:    [[A2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
54; AVX256NODQ-NEXT:    [[A3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
55; AVX256NODQ-NEXT:    [[A4:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
56; AVX256NODQ-NEXT:    [[A5:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 8
57; AVX256NODQ-NEXT:    [[A6:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
58; AVX256NODQ-NEXT:    [[A7:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 8
59; AVX256NODQ-NEXT:    [[CVT0:%.*]] = fptoui double [[A0]] to i64
60; AVX256NODQ-NEXT:    [[CVT1:%.*]] = fptoui double [[A1]] to i64
61; AVX256NODQ-NEXT:    [[CVT2:%.*]] = fptoui double [[A2]] to i64
62; AVX256NODQ-NEXT:    [[CVT3:%.*]] = fptoui double [[A3]] to i64
63; AVX256NODQ-NEXT:    [[CVT4:%.*]] = fptoui double [[A4]] to i64
64; AVX256NODQ-NEXT:    [[CVT5:%.*]] = fptoui double [[A5]] to i64
65; AVX256NODQ-NEXT:    [[CVT6:%.*]] = fptoui double [[A6]] to i64
66; AVX256NODQ-NEXT:    [[CVT7:%.*]] = fptoui double [[A7]] to i64
67; AVX256NODQ-NEXT:    store i64 [[CVT0]], ptr @dst64, align 8
68; AVX256NODQ-NEXT:    store i64 [[CVT1]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 1), align 8
69; AVX256NODQ-NEXT:    store i64 [[CVT2]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 2), align 8
70; AVX256NODQ-NEXT:    store i64 [[CVT3]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 3), align 8
71; AVX256NODQ-NEXT:    store i64 [[CVT4]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 4), align 8
72; AVX256NODQ-NEXT:    store i64 [[CVT5]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 5), align 8
73; AVX256NODQ-NEXT:    store i64 [[CVT6]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 6), align 8
74; AVX256NODQ-NEXT:    store i64 [[CVT7]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 7), align 8
75; AVX256NODQ-NEXT:    ret void
76;
77; AVX512F-LABEL: @fptoui_8f64_8i64(
78; AVX512F-NEXT:    [[TMP1:%.*]] = load <8 x double>, ptr @src64, align 8
79; AVX512F-NEXT:    [[TMP2:%.*]] = fptoui <8 x double> [[TMP1]] to <8 x i64>
80; AVX512F-NEXT:    store <8 x i64> [[TMP2]], ptr @dst64, align 8
81; AVX512F-NEXT:    ret void
82;
83; AVX256DQ-LABEL: @fptoui_8f64_8i64(
84; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @src64, align 8
85; AVX256DQ-NEXT:    [[TMP2:%.*]] = fptoui <4 x double> [[TMP1]] to <4 x i64>
86; AVX256DQ-NEXT:    store <4 x i64> [[TMP2]], ptr @dst64, align 8
87; AVX256DQ-NEXT:    [[TMP3:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
88; AVX256DQ-NEXT:    [[TMP4:%.*]] = fptoui <4 x double> [[TMP3]] to <4 x i64>
89; AVX256DQ-NEXT:    store <4 x i64> [[TMP4]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 4), align 8
90; AVX256DQ-NEXT:    ret void
91;
92  %a0 = load double, ptr @src64, align 8
93  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
94  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
95  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
96  %a4 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
97  %a5 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 8
98  %a6 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
99  %a7 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 8
100  %cvt0 = fptoui double %a0 to i64
101  %cvt1 = fptoui double %a1 to i64
102  %cvt2 = fptoui double %a2 to i64
103  %cvt3 = fptoui double %a3 to i64
104  %cvt4 = fptoui double %a4 to i64
105  %cvt5 = fptoui double %a5 to i64
106  %cvt6 = fptoui double %a6 to i64
107  %cvt7 = fptoui double %a7 to i64
108  store i64 %cvt0, ptr @dst64, align 8
109  store i64 %cvt1, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 1), align 8
110  store i64 %cvt2, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 2), align 8
111  store i64 %cvt3, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 3), align 8
112  store i64 %cvt4, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 4), align 8
113  store i64 %cvt5, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 5), align 8
114  store i64 %cvt6, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 6), align 8
115  store i64 %cvt7, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 7), align 8
116  ret void
117}
118
119define void @fptoui_8f64_8i32() #0 {
120; SSE-LABEL: @fptoui_8f64_8i32(
121; SSE-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @src64, align 8
122; SSE-NEXT:    [[TMP2:%.*]] = fptoui <4 x double> [[TMP1]] to <4 x i32>
123; SSE-NEXT:    store <4 x i32> [[TMP2]], ptr @dst32, align 4
124; SSE-NEXT:    [[TMP3:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
125; SSE-NEXT:    [[TMP4:%.*]] = fptoui <4 x double> [[TMP3]] to <4 x i32>
126; SSE-NEXT:    store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 4), align 4
127; SSE-NEXT:    ret void
128;
129; AVX-LABEL: @fptoui_8f64_8i32(
130; AVX-NEXT:    [[TMP1:%.*]] = load <8 x double>, ptr @src64, align 8
131; AVX-NEXT:    [[TMP2:%.*]] = fptoui <8 x double> [[TMP1]] to <8 x i32>
132; AVX-NEXT:    store <8 x i32> [[TMP2]], ptr @dst32, align 4
133; AVX-NEXT:    ret void
134;
135  %a0 = load double, ptr @src64, align 8
136  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
137  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
138  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
139  %a4 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
140  %a5 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 8
141  %a6 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
142  %a7 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 8
143  %cvt0 = fptoui double %a0 to i32
144  %cvt1 = fptoui double %a1 to i32
145  %cvt2 = fptoui double %a2 to i32
146  %cvt3 = fptoui double %a3 to i32
147  %cvt4 = fptoui double %a4 to i32
148  %cvt5 = fptoui double %a5 to i32
149  %cvt6 = fptoui double %a6 to i32
150  %cvt7 = fptoui double %a7 to i32
151  store i32 %cvt0, ptr @dst32, align 4
152  store i32 %cvt1, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 1), align 4
153  store i32 %cvt2, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 2), align 4
154  store i32 %cvt3, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 3), align 4
155  store i32 %cvt4, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 4), align 4
156  store i32 %cvt5, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 5), align 4
157  store i32 %cvt6, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 6), align 4
158  store i32 %cvt7, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 7), align 4
159  ret void
160}
161
162define void @fptoui_8f64_8i16() #0 {
163; CHECK-LABEL: @fptoui_8f64_8i16(
164; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x double>, ptr @src64, align 8
165; CHECK-NEXT:    [[TMP2:%.*]] = fptoui <8 x double> [[TMP1]] to <8 x i16>
166; CHECK-NEXT:    store <8 x i16> [[TMP2]], ptr @dst16, align 2
167; CHECK-NEXT:    ret void
168;
169  %a0 = load double, ptr @src64, align 8
170  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
171  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
172  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
173  %a4 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
174  %a5 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 8
175  %a6 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
176  %a7 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 8
177  %cvt0 = fptoui double %a0 to i16
178  %cvt1 = fptoui double %a1 to i16
179  %cvt2 = fptoui double %a2 to i16
180  %cvt3 = fptoui double %a3 to i16
181  %cvt4 = fptoui double %a4 to i16
182  %cvt5 = fptoui double %a5 to i16
183  %cvt6 = fptoui double %a6 to i16
184  %cvt7 = fptoui double %a7 to i16
185  store i16 %cvt0, ptr @dst16, align 2
186  store i16 %cvt1, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 1), align 2
187  store i16 %cvt2, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 2), align 2
188  store i16 %cvt3, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 3), align 2
189  store i16 %cvt4, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 4), align 2
190  store i16 %cvt5, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 5), align 2
191  store i16 %cvt6, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 6), align 2
192  store i16 %cvt7, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 7), align 2
193  ret void
194}
195
196define void @fptoui_8f64_8i8() #0 {
197; CHECK-LABEL: @fptoui_8f64_8i8(
198; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x double>, ptr @src64, align 8
199; CHECK-NEXT:    [[TMP2:%.*]] = fptoui <8 x double> [[TMP1]] to <8 x i8>
200; CHECK-NEXT:    store <8 x i8> [[TMP2]], ptr @dst8, align 1
201; CHECK-NEXT:    ret void
202;
203  %a0 = load double, ptr @src64, align 8
204  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
205  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
206  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
207  %a4 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
208  %a5 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 8
209  %a6 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
210  %a7 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 8
211  %cvt0 = fptoui double %a0 to i8
212  %cvt1 = fptoui double %a1 to i8
213  %cvt2 = fptoui double %a2 to i8
214  %cvt3 = fptoui double %a3 to i8
215  %cvt4 = fptoui double %a4 to i8
216  %cvt5 = fptoui double %a5 to i8
217  %cvt6 = fptoui double %a6 to i8
218  %cvt7 = fptoui double %a7 to i8
219  store i8 %cvt0, ptr @dst8, align 1
220  store i8 %cvt1, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 1), align 1
221  store i8 %cvt2, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 2), align 1
222  store i8 %cvt3, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 3), align 1
223  store i8 %cvt4, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 4), align 1
224  store i8 %cvt5, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 5), align 1
225  store i8 %cvt6, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 6), align 1
226  store i8 %cvt7, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 7), align 1
227  ret void
228}
229
230;
231; FPTOUI vXf32
232;
233
234define void @fptoui_8f32_8i64() #0 {
235; SSE-LABEL: @fptoui_8f32_8i64(
236; SSE-NEXT:    [[A0:%.*]] = load float, ptr @src32, align 4
237; SSE-NEXT:    [[A1:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
238; SSE-NEXT:    [[A2:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
239; SSE-NEXT:    [[A3:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
240; SSE-NEXT:    [[A4:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
241; SSE-NEXT:    [[A5:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
242; SSE-NEXT:    [[A6:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
243; SSE-NEXT:    [[A7:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
244; SSE-NEXT:    [[CVT0:%.*]] = fptoui float [[A0]] to i64
245; SSE-NEXT:    [[CVT1:%.*]] = fptoui float [[A1]] to i64
246; SSE-NEXT:    [[CVT2:%.*]] = fptoui float [[A2]] to i64
247; SSE-NEXT:    [[CVT3:%.*]] = fptoui float [[A3]] to i64
248; SSE-NEXT:    [[CVT4:%.*]] = fptoui float [[A4]] to i64
249; SSE-NEXT:    [[CVT5:%.*]] = fptoui float [[A5]] to i64
250; SSE-NEXT:    [[CVT6:%.*]] = fptoui float [[A6]] to i64
251; SSE-NEXT:    [[CVT7:%.*]] = fptoui float [[A7]] to i64
252; SSE-NEXT:    store i64 [[CVT0]], ptr @dst64, align 8
253; SSE-NEXT:    store i64 [[CVT1]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 1), align 8
254; SSE-NEXT:    store i64 [[CVT2]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 2), align 8
255; SSE-NEXT:    store i64 [[CVT3]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 3), align 8
256; SSE-NEXT:    store i64 [[CVT4]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 4), align 8
257; SSE-NEXT:    store i64 [[CVT5]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 5), align 8
258; SSE-NEXT:    store i64 [[CVT6]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 6), align 8
259; SSE-NEXT:    store i64 [[CVT7]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 7), align 8
260; SSE-NEXT:    ret void
261;
262; AVX256NODQ-LABEL: @fptoui_8f32_8i64(
263; AVX256NODQ-NEXT:    [[A0:%.*]] = load float, ptr @src32, align 4
264; AVX256NODQ-NEXT:    [[A1:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
265; AVX256NODQ-NEXT:    [[A2:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
266; AVX256NODQ-NEXT:    [[A3:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
267; AVX256NODQ-NEXT:    [[A4:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
268; AVX256NODQ-NEXT:    [[A5:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
269; AVX256NODQ-NEXT:    [[A6:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
270; AVX256NODQ-NEXT:    [[A7:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
271; AVX256NODQ-NEXT:    [[CVT0:%.*]] = fptoui float [[A0]] to i64
272; AVX256NODQ-NEXT:    [[CVT1:%.*]] = fptoui float [[A1]] to i64
273; AVX256NODQ-NEXT:    [[CVT2:%.*]] = fptoui float [[A2]] to i64
274; AVX256NODQ-NEXT:    [[CVT3:%.*]] = fptoui float [[A3]] to i64
275; AVX256NODQ-NEXT:    [[CVT4:%.*]] = fptoui float [[A4]] to i64
276; AVX256NODQ-NEXT:    [[CVT5:%.*]] = fptoui float [[A5]] to i64
277; AVX256NODQ-NEXT:    [[CVT6:%.*]] = fptoui float [[A6]] to i64
278; AVX256NODQ-NEXT:    [[CVT7:%.*]] = fptoui float [[A7]] to i64
279; AVX256NODQ-NEXT:    store i64 [[CVT0]], ptr @dst64, align 8
280; AVX256NODQ-NEXT:    store i64 [[CVT1]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 1), align 8
281; AVX256NODQ-NEXT:    store i64 [[CVT2]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 2), align 8
282; AVX256NODQ-NEXT:    store i64 [[CVT3]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 3), align 8
283; AVX256NODQ-NEXT:    store i64 [[CVT4]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 4), align 8
284; AVX256NODQ-NEXT:    store i64 [[CVT5]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 5), align 8
285; AVX256NODQ-NEXT:    store i64 [[CVT6]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 6), align 8
286; AVX256NODQ-NEXT:    store i64 [[CVT7]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 7), align 8
287; AVX256NODQ-NEXT:    ret void
288;
289; AVX512F-LABEL: @fptoui_8f32_8i64(
290; AVX512F-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
291; AVX512F-NEXT:    [[TMP2:%.*]] = fptoui <8 x float> [[TMP1]] to <8 x i64>
292; AVX512F-NEXT:    store <8 x i64> [[TMP2]], ptr @dst64, align 8
293; AVX512F-NEXT:    ret void
294;
295; AVX256DQ-LABEL: @fptoui_8f32_8i64(
296; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @src32, align 4
297; AVX256DQ-NEXT:    [[TMP2:%.*]] = fptoui <4 x float> [[TMP1]] to <4 x i64>
298; AVX256DQ-NEXT:    store <4 x i64> [[TMP2]], ptr @dst64, align 8
299; AVX256DQ-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
300; AVX256DQ-NEXT:    [[TMP4:%.*]] = fptoui <4 x float> [[TMP3]] to <4 x i64>
301; AVX256DQ-NEXT:    store <4 x i64> [[TMP4]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 4), align 8
302; AVX256DQ-NEXT:    ret void
303;
304  %a0 = load float, ptr @src32, align 4
305  %a1 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
306  %a2 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
307  %a3 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
308  %a4 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
309  %a5 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
310  %a6 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
311  %a7 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
312  %cvt0 = fptoui float %a0 to i64
313  %cvt1 = fptoui float %a1 to i64
314  %cvt2 = fptoui float %a2 to i64
315  %cvt3 = fptoui float %a3 to i64
316  %cvt4 = fptoui float %a4 to i64
317  %cvt5 = fptoui float %a5 to i64
318  %cvt6 = fptoui float %a6 to i64
319  %cvt7 = fptoui float %a7 to i64
320  store i64 %cvt0, ptr @dst64, align 8
321  store i64 %cvt1, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 1), align 8
322  store i64 %cvt2, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 2), align 8
323  store i64 %cvt3, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 3), align 8
324  store i64 %cvt4, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 4), align 8
325  store i64 %cvt5, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 5), align 8
326  store i64 %cvt6, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 6), align 8
327  store i64 %cvt7, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 7), align 8
328  ret void
329}
330
331define void @fptoui_8f32_8i32() #0 {
332; SSE-LABEL: @fptoui_8f32_8i32(
333; SSE-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @src32, align 4
334; SSE-NEXT:    [[TMP2:%.*]] = fptoui <4 x float> [[TMP1]] to <4 x i32>
335; SSE-NEXT:    store <4 x i32> [[TMP2]], ptr @dst32, align 4
336; SSE-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
337; SSE-NEXT:    [[TMP4:%.*]] = fptoui <4 x float> [[TMP3]] to <4 x i32>
338; SSE-NEXT:    store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 4), align 4
339; SSE-NEXT:    ret void
340;
341; AVX-LABEL: @fptoui_8f32_8i32(
342; AVX-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
343; AVX-NEXT:    [[TMP2:%.*]] = fptoui <8 x float> [[TMP1]] to <8 x i32>
344; AVX-NEXT:    store <8 x i32> [[TMP2]], ptr @dst32, align 4
345; AVX-NEXT:    ret void
346;
347  %a0 = load float, ptr @src32, align 4
348  %a1 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
349  %a2 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
350  %a3 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
351  %a4 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
352  %a5 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
353  %a6 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
354  %a7 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
355  %cvt0 = fptoui float %a0 to i32
356  %cvt1 = fptoui float %a1 to i32
357  %cvt2 = fptoui float %a2 to i32
358  %cvt3 = fptoui float %a3 to i32
359  %cvt4 = fptoui float %a4 to i32
360  %cvt5 = fptoui float %a5 to i32
361  %cvt6 = fptoui float %a6 to i32
362  %cvt7 = fptoui float %a7 to i32
363  store i32 %cvt0, ptr @dst32, align 4
364  store i32 %cvt1, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 1), align 4
365  store i32 %cvt2, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 2), align 4
366  store i32 %cvt3, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 3), align 4
367  store i32 %cvt4, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 4), align 4
368  store i32 %cvt5, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 5), align 4
369  store i32 %cvt6, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 6), align 4
370  store i32 %cvt7, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 7), align 4
371  ret void
372}
373
374define void @fptoui_8f32_8i16() #0 {
375; CHECK-LABEL: @fptoui_8f32_8i16(
376; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
377; CHECK-NEXT:    [[TMP2:%.*]] = fptoui <8 x float> [[TMP1]] to <8 x i16>
378; CHECK-NEXT:    store <8 x i16> [[TMP2]], ptr @dst16, align 2
379; CHECK-NEXT:    ret void
380;
381  %a0 = load float, ptr @src32, align 4
382  %a1 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
383  %a2 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
384  %a3 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
385  %a4 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
386  %a5 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
387  %a6 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
388  %a7 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
389  %cvt0 = fptoui float %a0 to i16
390  %cvt1 = fptoui float %a1 to i16
391  %cvt2 = fptoui float %a2 to i16
392  %cvt3 = fptoui float %a3 to i16
393  %cvt4 = fptoui float %a4 to i16
394  %cvt5 = fptoui float %a5 to i16
395  %cvt6 = fptoui float %a6 to i16
396  %cvt7 = fptoui float %a7 to i16
397  store i16 %cvt0, ptr @dst16, align 2
398  store i16 %cvt1, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 1), align 2
399  store i16 %cvt2, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 2), align 2
400  store i16 %cvt3, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 3), align 2
401  store i16 %cvt4, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 4), align 2
402  store i16 %cvt5, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 5), align 2
403  store i16 %cvt6, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 6), align 2
404  store i16 %cvt7, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 7), align 2
405  ret void
406}
407
408define void @fptoui_8f32_8i8() #0 {
409; CHECK-LABEL: @fptoui_8f32_8i8(
410; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
411; CHECK-NEXT:    [[TMP2:%.*]] = fptoui <8 x float> [[TMP1]] to <8 x i8>
412; CHECK-NEXT:    store <8 x i8> [[TMP2]], ptr @dst8, align 1
413; CHECK-NEXT:    ret void
414;
415  %a0 = load float, ptr @src32, align 4
416  %a1 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
417  %a2 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
418  %a3 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
419  %a4 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
420  %a5 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
421  %a6 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
422  %a7 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
423  %cvt0 = fptoui float %a0 to i8
424  %cvt1 = fptoui float %a1 to i8
425  %cvt2 = fptoui float %a2 to i8
426  %cvt3 = fptoui float %a3 to i8
427  %cvt4 = fptoui float %a4 to i8
428  %cvt5 = fptoui float %a5 to i8
429  %cvt6 = fptoui float %a6 to i8
430  %cvt7 = fptoui float %a7 to i8
431  store i8 %cvt0, ptr @dst8, align 1
432  store i8 %cvt1, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 1), align 1
433  store i8 %cvt2, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 2), align 1
434  store i8 %cvt3, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 3), align 1
435  store i8 %cvt4, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 4), align 1
436  store i8 %cvt5, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 5), align 1
437  store i8 %cvt6, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 6), align 1
438  store i8 %cvt7, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 7), align 1
439  ret void
440}
441
442attributes #0 = { nounwind }
443