xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/X86/uitofp.ll (revision 580210a0c938531ef9fd79f9ffedb93eeb2e66c2)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE
3; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX1
4; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver1 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX1
5; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX2
6; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512
7; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256DQ
8
9target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
10
11@src64 = common global [8 x i64] zeroinitializer, align 64
12@src32 = common global [16 x i32] zeroinitializer, align 64
13@src16 = common global [32 x i16] zeroinitializer, align 64
14@src8  = common global [64 x i8] zeroinitializer, align 64
15
16@dst64 = common global [8 x double] zeroinitializer, align 64
17@dst32 = common global [16 x float] zeroinitializer, align 64
18
19;
20; UITOFP to vXf64
21;
22
23define void @uitofp_2i64_2f64() #0 {
24; CHECK-LABEL: @uitofp_2i64_2f64(
25; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64
26; CHECK-NEXT:    [[TMP2:%.*]] = uitofp <2 x i64> [[TMP1]] to <2 x double>
27; CHECK-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
28; CHECK-NEXT:    ret void
29;
30  %ld0 = load i64, ptr @src64, align 64
31  %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
32  %cvt0 = uitofp i64 %ld0 to double
33  %cvt1 = uitofp i64 %ld1 to double
34  store double %cvt0, ptr @dst64, align 64
35  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
36  ret void
37}
38
39define void @uitofp_4i64_4f64() #0 {
40; SSE-LABEL: @uitofp_4i64_4f64(
41; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64
42; SSE-NEXT:    [[TMP2:%.*]] = uitofp <2 x i64> [[TMP1]] to <2 x double>
43; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
44; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
45; SSE-NEXT:    [[TMP4:%.*]] = uitofp <2 x i64> [[TMP3]] to <2 x double>
46; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
47; SSE-NEXT:    ret void
48;
49; AVX-LABEL: @uitofp_4i64_4f64(
50; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64
51; AVX-NEXT:    [[TMP2:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x double>
52; AVX-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
53; AVX-NEXT:    ret void
54;
55  %ld0 = load i64, ptr @src64, align 64
56  %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
57  %ld2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
58  %ld3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
59  %cvt0 = uitofp i64 %ld0 to double
60  %cvt1 = uitofp i64 %ld1 to double
61  %cvt2 = uitofp i64 %ld2 to double
62  %cvt3 = uitofp i64 %ld3 to double
63  store double %cvt0, ptr @dst64, align 64
64  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
65  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
66  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
67  ret void
68}
69
70define void @uitofp_8i64_8f64() #0 {
71; SSE-LABEL: @uitofp_8i64_8f64(
72; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64
73; SSE-NEXT:    [[TMP2:%.*]] = uitofp <2 x i64> [[TMP1]] to <2 x double>
74; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
75; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
76; SSE-NEXT:    [[TMP4:%.*]] = uitofp <2 x i64> [[TMP3]] to <2 x double>
77; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
78; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
79; SSE-NEXT:    [[TMP6:%.*]] = uitofp <2 x i64> [[TMP5]] to <2 x double>
80; SSE-NEXT:    store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
81; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 6), align 16
82; SSE-NEXT:    [[TMP8:%.*]] = uitofp <2 x i64> [[TMP7]] to <2 x double>
83; SSE-NEXT:    store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
84; SSE-NEXT:    ret void
85;
86; AVX256-LABEL: @uitofp_8i64_8f64(
87; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64
88; AVX256-NEXT:    [[TMP2:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x double>
89; AVX256-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
90; AVX256-NEXT:    [[TMP3:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
91; AVX256-NEXT:    [[TMP4:%.*]] = uitofp <4 x i64> [[TMP3]] to <4 x double>
92; AVX256-NEXT:    store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
93; AVX256-NEXT:    ret void
94;
95; AVX512-LABEL: @uitofp_8i64_8f64(
96; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @src64, align 64
97; AVX512-NEXT:    [[TMP2:%.*]] = uitofp <8 x i64> [[TMP1]] to <8 x double>
98; AVX512-NEXT:    store <8 x double> [[TMP2]], ptr @dst64, align 64
99; AVX512-NEXT:    ret void
100;
101  %ld0 = load i64, ptr @src64, align 64
102  %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
103  %ld2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
104  %ld3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
105  %ld4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
106  %ld5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 5), align 8
107  %ld6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 6), align 16
108  %ld7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 7), align 8
109  %cvt0 = uitofp i64 %ld0 to double
110  %cvt1 = uitofp i64 %ld1 to double
111  %cvt2 = uitofp i64 %ld2 to double
112  %cvt3 = uitofp i64 %ld3 to double
113  %cvt4 = uitofp i64 %ld4 to double
114  %cvt5 = uitofp i64 %ld5 to double
115  %cvt6 = uitofp i64 %ld6 to double
116  %cvt7 = uitofp i64 %ld7 to double
117  store double %cvt0, ptr @dst64, align 64
118  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
119  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
120  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
121  store double %cvt4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
122  store double %cvt5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
123  store double %cvt6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
124  store double %cvt7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
125  ret void
126}
127
128define void @uitofp_2i32_2f64() #0 {
129; SSE-LABEL: @uitofp_2i32_2f64(
130; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64
131; SSE-NEXT:    [[TMP2:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x double>
132; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
133; SSE-NEXT:    ret void
134;
135; AVX1-LABEL: @uitofp_2i32_2f64(
136; AVX1-NEXT:    [[LD0:%.*]] = load i32, ptr @src32, align 64
137; AVX1-NEXT:    [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
138; AVX1-NEXT:    [[CVT0:%.*]] = uitofp i32 [[LD0]] to double
139; AVX1-NEXT:    [[CVT1:%.*]] = uitofp i32 [[LD1]] to double
140; AVX1-NEXT:    store double [[CVT0]], ptr @dst64, align 64
141; AVX1-NEXT:    store double [[CVT1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
142; AVX1-NEXT:    ret void
143;
144; AVX2-LABEL: @uitofp_2i32_2f64(
145; AVX2-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64
146; AVX2-NEXT:    [[TMP2:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x double>
147; AVX2-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
148; AVX2-NEXT:    ret void
149;
150; AVX512-LABEL: @uitofp_2i32_2f64(
151; AVX512-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64
152; AVX512-NEXT:    [[TMP2:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x double>
153; AVX512-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
154; AVX512-NEXT:    ret void
155;
156; AVX256DQ-LABEL: @uitofp_2i32_2f64(
157; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64
158; AVX256DQ-NEXT:    [[TMP2:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x double>
159; AVX256DQ-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
160; AVX256DQ-NEXT:    ret void
161;
162  %ld0 = load i32, ptr @src32, align 64
163  %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
164  %cvt0 = uitofp i32 %ld0 to double
165  %cvt1 = uitofp i32 %ld1 to double
166  store double %cvt0, ptr @dst64, align 64
167  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
168  ret void
169}
170
171define void @uitofp_4i32_4f64() #0 {
172; SSE-LABEL: @uitofp_4i32_4f64(
173; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64
174; SSE-NEXT:    [[TMP2:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x double>
175; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
176; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
177; SSE-NEXT:    [[TMP4:%.*]] = uitofp <2 x i32> [[TMP3]] to <2 x double>
178; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
179; SSE-NEXT:    ret void
180;
181; AVX-LABEL: @uitofp_4i32_4f64(
182; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
183; AVX-NEXT:    [[TMP2:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x double>
184; AVX-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
185; AVX-NEXT:    ret void
186;
187  %ld0 = load i32, ptr @src32, align 64
188  %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
189  %ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
190  %ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3), align 4
191  %cvt0 = uitofp i32 %ld0 to double
192  %cvt1 = uitofp i32 %ld1 to double
193  %cvt2 = uitofp i32 %ld2 to double
194  %cvt3 = uitofp i32 %ld3 to double
195  store double %cvt0, ptr @dst64, align 64
196  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
197  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
198  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
199  ret void
200}
201
202define void @uitofp_8i32_8f64() #0 {
203; SSE-LABEL: @uitofp_8i32_8f64(
204; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64
205; SSE-NEXT:    [[TMP2:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x double>
206; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
207; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
208; SSE-NEXT:    [[TMP4:%.*]] = uitofp <2 x i32> [[TMP3]] to <2 x double>
209; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
210; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
211; SSE-NEXT:    [[TMP6:%.*]] = uitofp <2 x i32> [[TMP5]] to <2 x double>
212; SSE-NEXT:    store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
213; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 6), align 8
214; SSE-NEXT:    [[TMP8:%.*]] = uitofp <2 x i32> [[TMP7]] to <2 x double>
215; SSE-NEXT:    store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
216; SSE-NEXT:    ret void
217;
218; AVX256-LABEL: @uitofp_8i32_8f64(
219; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
220; AVX256-NEXT:    [[TMP2:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x double>
221; AVX256-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
222; AVX256-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
223; AVX256-NEXT:    [[TMP4:%.*]] = uitofp <4 x i32> [[TMP3]] to <4 x double>
224; AVX256-NEXT:    store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
225; AVX256-NEXT:    ret void
226;
227; AVX512-LABEL: @uitofp_8i32_8f64(
228; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 64
229; AVX512-NEXT:    [[TMP2:%.*]] = uitofp <8 x i32> [[TMP1]] to <8 x double>
230; AVX512-NEXT:    store <8 x double> [[TMP2]], ptr @dst64, align 64
231; AVX512-NEXT:    ret void
232;
233  %ld0 = load i32, ptr @src32, align 64
234  %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
235  %ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
236  %ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3), align 4
237  %ld4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
238  %ld5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 5), align 4
239  %ld6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 6), align 8
240  %ld7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 7), align 4
241  %cvt0 = uitofp i32 %ld0 to double
242  %cvt1 = uitofp i32 %ld1 to double
243  %cvt2 = uitofp i32 %ld2 to double
244  %cvt3 = uitofp i32 %ld3 to double
245  %cvt4 = uitofp i32 %ld4 to double
246  %cvt5 = uitofp i32 %ld5 to double
247  %cvt6 = uitofp i32 %ld6 to double
248  %cvt7 = uitofp i32 %ld7 to double
249  store double %cvt0, ptr @dst64, align 64
250  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
251  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
252  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
253  store double %cvt4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
254  store double %cvt5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
255  store double %cvt6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
256  store double %cvt7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
257  ret void
258}
259
260define void @uitofp_2i16_2f64() #0 {
261; CHECK-LABEL: @uitofp_2i16_2f64(
262; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i16>, ptr @src16, align 64
263; CHECK-NEXT:    [[TMP2:%.*]] = uitofp <2 x i16> [[TMP1]] to <2 x double>
264; CHECK-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
265; CHECK-NEXT:    ret void
266;
267  %ld0 = load i16, ptr @src16, align 64
268  %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
269  %cvt0 = uitofp i16 %ld0 to double
270  %cvt1 = uitofp i16 %ld1 to double
271  store double %cvt0, ptr @dst64, align 64
272  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
273  ret void
274}
275
276define void @uitofp_4i16_4f64() #0 {
277; SSE-LABEL: @uitofp_4i16_4f64(
278; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i16>, ptr @src16, align 64
279; SSE-NEXT:    [[TMP2:%.*]] = uitofp <2 x i16> [[TMP1]] to <2 x double>
280; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
281; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
282; SSE-NEXT:    [[TMP4:%.*]] = uitofp <2 x i16> [[TMP3]] to <2 x double>
283; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
284; SSE-NEXT:    ret void
285;
286; AVX-LABEL: @uitofp_4i16_4f64(
287; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
288; AVX-NEXT:    [[TMP2:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x double>
289; AVX-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
290; AVX-NEXT:    ret void
291;
292  %ld0 = load i16, ptr @src16, align 64
293  %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
294  %ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
295  %ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3), align 2
296  %cvt0 = uitofp i16 %ld0 to double
297  %cvt1 = uitofp i16 %ld1 to double
298  %cvt2 = uitofp i16 %ld2 to double
299  %cvt3 = uitofp i16 %ld3 to double
300  store double %cvt0, ptr @dst64, align 64
301  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
302  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
303  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
304  ret void
305}
306
307define void @uitofp_8i16_8f64() #0 {
308; SSE-LABEL: @uitofp_8i16_8f64(
309; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i16>, ptr @src16, align 64
310; SSE-NEXT:    [[TMP2:%.*]] = uitofp <2 x i16> [[TMP1]] to <2 x double>
311; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
312; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
313; SSE-NEXT:    [[TMP4:%.*]] = uitofp <2 x i16> [[TMP3]] to <2 x double>
314; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
315; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
316; SSE-NEXT:    [[TMP6:%.*]] = uitofp <2 x i16> [[TMP5]] to <2 x double>
317; SSE-NEXT:    store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
318; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 6), align 4
319; SSE-NEXT:    [[TMP8:%.*]] = uitofp <2 x i16> [[TMP7]] to <2 x double>
320; SSE-NEXT:    store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
321; SSE-NEXT:    ret void
322;
323; AVX256-LABEL: @uitofp_8i16_8f64(
324; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
325; AVX256-NEXT:    [[TMP2:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x double>
326; AVX256-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
327; AVX256-NEXT:    [[TMP3:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
328; AVX256-NEXT:    [[TMP4:%.*]] = uitofp <4 x i16> [[TMP3]] to <4 x double>
329; AVX256-NEXT:    store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
330; AVX256-NEXT:    ret void
331;
332; AVX512-LABEL: @uitofp_8i16_8f64(
333; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 64
334; AVX512-NEXT:    [[TMP2:%.*]] = uitofp <8 x i16> [[TMP1]] to <8 x double>
335; AVX512-NEXT:    store <8 x double> [[TMP2]], ptr @dst64, align 64
336; AVX512-NEXT:    ret void
337;
338  %ld0 = load i16, ptr @src16, align 64
339  %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
340  %ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
341  %ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3), align 2
342  %ld4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
343  %ld5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 5), align 2
344  %ld6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 6), align 4
345  %ld7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 7), align 2
346  %cvt0 = uitofp i16 %ld0 to double
347  %cvt1 = uitofp i16 %ld1 to double
348  %cvt2 = uitofp i16 %ld2 to double
349  %cvt3 = uitofp i16 %ld3 to double
350  %cvt4 = uitofp i16 %ld4 to double
351  %cvt5 = uitofp i16 %ld5 to double
352  %cvt6 = uitofp i16 %ld6 to double
353  %cvt7 = uitofp i16 %ld7 to double
354  store double %cvt0, ptr @dst64, align 64
355  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
356  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
357  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
358  store double %cvt4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
359  store double %cvt5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
360  store double %cvt6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
361  store double %cvt7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
362  ret void
363}
364
365define void @uitofp_2i8_2f64() #0 {
366; CHECK-LABEL: @uitofp_2i8_2f64(
367; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i8>, ptr @src8, align 64
368; CHECK-NEXT:    [[TMP2:%.*]] = uitofp <2 x i8> [[TMP1]] to <2 x double>
369; CHECK-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
370; CHECK-NEXT:    ret void
371;
372  %ld0 = load i8, ptr @src8, align 64
373  %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
374  %cvt0 = uitofp i8 %ld0 to double
375  %cvt1 = uitofp i8 %ld1 to double
376  store double %cvt0, ptr @dst64, align 64
377  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
378  ret void
379}
380
381define void @uitofp_4i8_4f64() #0 {
382; SSE-LABEL: @uitofp_4i8_4f64(
383; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i8>, ptr @src8, align 64
384; SSE-NEXT:    [[TMP2:%.*]] = uitofp <2 x i8> [[TMP1]] to <2 x double>
385; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
386; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
387; SSE-NEXT:    [[TMP4:%.*]] = uitofp <2 x i8> [[TMP3]] to <2 x double>
388; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
389; SSE-NEXT:    ret void
390;
391; AVX-LABEL: @uitofp_4i8_4f64(
392; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
393; AVX-NEXT:    [[TMP2:%.*]] = uitofp <4 x i8> [[TMP1]] to <4 x double>
394; AVX-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
395; AVX-NEXT:    ret void
396;
397  %ld0 = load i8, ptr @src8, align 64
398  %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
399  %ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
400  %ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3), align 1
401  %cvt0 = uitofp i8 %ld0 to double
402  %cvt1 = uitofp i8 %ld1 to double
403  %cvt2 = uitofp i8 %ld2 to double
404  %cvt3 = uitofp i8 %ld3 to double
405  store double %cvt0, ptr @dst64, align 64
406  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
407  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
408  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
409  ret void
410}
411
412define void @uitofp_8i8_8f64() #0 {
413; SSE-LABEL: @uitofp_8i8_8f64(
414; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i8>, ptr @src8, align 64
415; SSE-NEXT:    [[TMP2:%.*]] = uitofp <2 x i8> [[TMP1]] to <2 x double>
416; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
417; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
418; SSE-NEXT:    [[TMP4:%.*]] = uitofp <2 x i8> [[TMP3]] to <2 x double>
419; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
420; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
421; SSE-NEXT:    [[TMP6:%.*]] = uitofp <2 x i8> [[TMP5]] to <2 x double>
422; SSE-NEXT:    store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
423; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 6), align 2
424; SSE-NEXT:    [[TMP8:%.*]] = uitofp <2 x i8> [[TMP7]] to <2 x double>
425; SSE-NEXT:    store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
426; SSE-NEXT:    ret void
427;
428; AVX256-LABEL: @uitofp_8i8_8f64(
429; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
430; AVX256-NEXT:    [[TMP2:%.*]] = uitofp <4 x i8> [[TMP1]] to <4 x double>
431; AVX256-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
432; AVX256-NEXT:    [[TMP3:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
433; AVX256-NEXT:    [[TMP4:%.*]] = uitofp <4 x i8> [[TMP3]] to <4 x double>
434; AVX256-NEXT:    store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
435; AVX256-NEXT:    ret void
436;
437; AVX512-LABEL: @uitofp_8i8_8f64(
438; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr @src8, align 64
439; AVX512-NEXT:    [[TMP2:%.*]] = uitofp <8 x i8> [[TMP1]] to <8 x double>
440; AVX512-NEXT:    store <8 x double> [[TMP2]], ptr @dst64, align 64
441; AVX512-NEXT:    ret void
442;
443  %ld0 = load i8, ptr @src8, align 64
444  %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
445  %ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
446  %ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3), align 1
447  %ld4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
448  %ld5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 5), align 1
449  %ld6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 6), align 2
450  %ld7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 7), align 1
451  %cvt0 = uitofp i8 %ld0 to double
452  %cvt1 = uitofp i8 %ld1 to double
453  %cvt2 = uitofp i8 %ld2 to double
454  %cvt3 = uitofp i8 %ld3 to double
455  %cvt4 = uitofp i8 %ld4 to double
456  %cvt5 = uitofp i8 %ld5 to double
457  %cvt6 = uitofp i8 %ld6 to double
458  %cvt7 = uitofp i8 %ld7 to double
459  store double %cvt0, ptr @dst64, align 64
460  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
461  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
462  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
463  store double %cvt4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
464  store double %cvt5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
465  store double %cvt6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
466  store double %cvt7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
467  ret void
468}
469
470;
471; UITOFP to vXf32
472;
473
474define void @uitofp_2i64_2f32() #0 {
475; SSE-LABEL: @uitofp_2i64_2f32(
476; SSE-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 64
477; SSE-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
478; SSE-NEXT:    [[CVT0:%.*]] = uitofp i64 [[LD0]] to float
479; SSE-NEXT:    [[CVT1:%.*]] = uitofp i64 [[LD1]] to float
480; SSE-NEXT:    store float [[CVT0]], ptr @dst32, align 64
481; SSE-NEXT:    store float [[CVT1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
482; SSE-NEXT:    ret void
483;
484; AVX1-LABEL: @uitofp_2i64_2f32(
485; AVX1-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 64
486; AVX1-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
487; AVX1-NEXT:    [[CVT0:%.*]] = uitofp i64 [[LD0]] to float
488; AVX1-NEXT:    [[CVT1:%.*]] = uitofp i64 [[LD1]] to float
489; AVX1-NEXT:    store float [[CVT0]], ptr @dst32, align 64
490; AVX1-NEXT:    store float [[CVT1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
491; AVX1-NEXT:    ret void
492;
493; AVX2-LABEL: @uitofp_2i64_2f32(
494; AVX2-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 64
495; AVX2-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
496; AVX2-NEXT:    [[CVT0:%.*]] = uitofp i64 [[LD0]] to float
497; AVX2-NEXT:    [[CVT1:%.*]] = uitofp i64 [[LD1]] to float
498; AVX2-NEXT:    store float [[CVT0]], ptr @dst32, align 64
499; AVX2-NEXT:    store float [[CVT1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
500; AVX2-NEXT:    ret void
501;
502; AVX512-LABEL: @uitofp_2i64_2f32(
503; AVX512-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64
504; AVX512-NEXT:    [[TMP2:%.*]] = uitofp <2 x i64> [[TMP1]] to <2 x float>
505; AVX512-NEXT:    store <2 x float> [[TMP2]], ptr @dst32, align 64
506; AVX512-NEXT:    ret void
507;
508; AVX256DQ-LABEL: @uitofp_2i64_2f32(
509; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64
510; AVX256DQ-NEXT:    [[TMP2:%.*]] = uitofp <2 x i64> [[TMP1]] to <2 x float>
511; AVX256DQ-NEXT:    store <2 x float> [[TMP2]], ptr @dst32, align 64
512; AVX256DQ-NEXT:    ret void
513;
514  %ld0 = load i64, ptr @src64, align 64
515  %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
516  %cvt0 = uitofp i64 %ld0 to float
517  %cvt1 = uitofp i64 %ld1 to float
518  store float %cvt0, ptr @dst32, align 64
519  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
520  ret void
521}
522
523define void @uitofp_4i64_4f32() #0 {
524; CHECK-LABEL: @uitofp_4i64_4f32(
525; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64
526; CHECK-NEXT:    [[TMP2:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x float>
527; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
528; CHECK-NEXT:    ret void
529;
530  %ld0 = load i64, ptr @src64, align 64
531  %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
532  %ld2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
533  %ld3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
534  %cvt0 = uitofp i64 %ld0 to float
535  %cvt1 = uitofp i64 %ld1 to float
536  %cvt2 = uitofp i64 %ld2 to float
537  %cvt3 = uitofp i64 %ld3 to float
538  store float %cvt0, ptr @dst32, align 64
539  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
540  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
541  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
542  ret void
543}
544
545define void @uitofp_8i64_8f32() #0 {
546; SSE-LABEL: @uitofp_8i64_8f32(
547; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64
548; SSE-NEXT:    [[TMP2:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x float>
549; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
550; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
551; SSE-NEXT:    [[TMP4:%.*]] = uitofp <4 x i64> [[TMP3]] to <4 x float>
552; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
553; SSE-NEXT:    ret void
554;
555; AVX-LABEL: @uitofp_8i64_8f32(
556; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @src64, align 64
557; AVX-NEXT:    [[TMP2:%.*]] = uitofp <8 x i64> [[TMP1]] to <8 x float>
558; AVX-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
559; AVX-NEXT:    ret void
560;
561  %ld0 = load i64, ptr @src64, align 64
562  %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
563  %ld2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
564  %ld3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
565  %ld4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
566  %ld5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 5), align 8
567  %ld6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 6), align 16
568  %ld7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 7), align 8
569  %cvt0 = uitofp i64 %ld0 to float
570  %cvt1 = uitofp i64 %ld1 to float
571  %cvt2 = uitofp i64 %ld2 to float
572  %cvt3 = uitofp i64 %ld3 to float
573  %cvt4 = uitofp i64 %ld4 to float
574  %cvt5 = uitofp i64 %ld5 to float
575  %cvt6 = uitofp i64 %ld6 to float
576  %cvt7 = uitofp i64 %ld7 to float
577  store float %cvt0, ptr @dst32, align 64
578  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
579  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
580  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
581  store float %cvt4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
582  store float %cvt5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
583  store float %cvt6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 8
584  store float %cvt7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
585  ret void
586}
587
588define void @uitofp_4i32_4f32() #0 {
589; CHECK-LABEL: @uitofp_4i32_4f32(
590; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
591; CHECK-NEXT:    [[TMP2:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x float>
592; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
593; CHECK-NEXT:    ret void
594;
595  %ld0 = load i32, ptr @src32, align 64
596  %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
597  %ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
598  %ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3), align 4
599  %cvt0 = uitofp i32 %ld0 to float
600  %cvt1 = uitofp i32 %ld1 to float
601  %cvt2 = uitofp i32 %ld2 to float
602  %cvt3 = uitofp i32 %ld3 to float
603  store float %cvt0, ptr @dst32, align 64
604  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
605  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
606  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
607  ret void
608}
609
610define void @uitofp_8i32_8f32() #0 {
611; SSE-LABEL: @uitofp_8i32_8f32(
612; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
613; SSE-NEXT:    [[TMP2:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x float>
614; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
615; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
616; SSE-NEXT:    [[TMP4:%.*]] = uitofp <4 x i32> [[TMP3]] to <4 x float>
617; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
618; SSE-NEXT:    ret void
619;
620; AVX-LABEL: @uitofp_8i32_8f32(
621; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 64
622; AVX-NEXT:    [[TMP2:%.*]] = uitofp <8 x i32> [[TMP1]] to <8 x float>
623; AVX-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
624; AVX-NEXT:    ret void
625;
626  %ld0 = load i32, ptr @src32, align 64
627  %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
628  %ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
629  %ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3), align 4
630  %ld4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
631  %ld5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 5), align 4
632  %ld6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 6), align 8
633  %ld7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 7), align 4
634  %cvt0 = uitofp i32 %ld0 to float
635  %cvt1 = uitofp i32 %ld1 to float
636  %cvt2 = uitofp i32 %ld2 to float
637  %cvt3 = uitofp i32 %ld3 to float
638  %cvt4 = uitofp i32 %ld4 to float
639  %cvt5 = uitofp i32 %ld5 to float
640  %cvt6 = uitofp i32 %ld6 to float
641  %cvt7 = uitofp i32 %ld7 to float
642  store float %cvt0, ptr @dst32, align 64
643  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
644  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
645  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
646  store float %cvt4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
647  store float %cvt5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
648  store float %cvt6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 8
649  store float %cvt7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
650  ret void
651}
652
653define void @uitofp_16i32_16f32() #0 {
654; SSE-LABEL: @uitofp_16i32_16f32(
655; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
656; SSE-NEXT:    [[TMP2:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x float>
657; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
658; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
659; SSE-NEXT:    [[TMP4:%.*]] = uitofp <4 x i32> [[TMP3]] to <4 x float>
660; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
661; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 8), align 32
662; SSE-NEXT:    [[TMP6:%.*]] = uitofp <4 x i32> [[TMP5]] to <4 x float>
663; SSE-NEXT:    store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
664; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 12), align 16
665; SSE-NEXT:    [[TMP8:%.*]] = uitofp <4 x i32> [[TMP7]] to <4 x float>
666; SSE-NEXT:    store <4 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
667; SSE-NEXT:    ret void
668;
669; AVX256-LABEL: @uitofp_16i32_16f32(
670; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 64
671; AVX256-NEXT:    [[TMP2:%.*]] = uitofp <8 x i32> [[TMP1]] to <8 x float>
672; AVX256-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
673; AVX256-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 8), align 32
674; AVX256-NEXT:    [[TMP4:%.*]] = uitofp <8 x i32> [[TMP3]] to <8 x float>
675; AVX256-NEXT:    store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
676; AVX256-NEXT:    ret void
677;
678; AVX512-LABEL: @uitofp_16i32_16f32(
679; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @src32, align 64
680; AVX512-NEXT:    [[TMP2:%.*]] = uitofp <16 x i32> [[TMP1]] to <16 x float>
681; AVX512-NEXT:    store <16 x float> [[TMP2]], ptr @dst32, align 64
682; AVX512-NEXT:    ret void
683;
684  %ld0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 0 ), align 64
685  %ld1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1 ), align 4
686  %ld2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2 ), align 8
687  %ld3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3 ), align 4
688  %ld4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4 ), align 16
689  %ld5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 5 ), align 4
690  %ld6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 6 ), align 8
691  %ld7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 7 ), align 4
692  %ld8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 8 ), align 32
693  %ld9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 9 ), align 4
694  %ld10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 10), align 8
695  %ld11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 11), align 4
696  %ld12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 12), align 16
697  %ld13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 13), align 4
698  %ld14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 14), align 8
699  %ld15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 15), align 4
700  %cvt0  = uitofp i32 %ld0  to float
701  %cvt1  = uitofp i32 %ld1  to float
702  %cvt2  = uitofp i32 %ld2  to float
703  %cvt3  = uitofp i32 %ld3  to float
704  %cvt4  = uitofp i32 %ld4  to float
705  %cvt5  = uitofp i32 %ld5  to float
706  %cvt6  = uitofp i32 %ld6  to float
707  %cvt7  = uitofp i32 %ld7  to float
708  %cvt8  = uitofp i32 %ld8  to float
709  %cvt9  = uitofp i32 %ld9  to float
710  %cvt10 = uitofp i32 %ld10 to float
711  %cvt11 = uitofp i32 %ld11 to float
712  %cvt12 = uitofp i32 %ld12 to float
713  %cvt13 = uitofp i32 %ld13 to float
714  %cvt14 = uitofp i32 %ld14 to float
715  %cvt15 = uitofp i32 %ld15 to float
716  store float %cvt0 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 0 ), align 64
717  store float %cvt1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1 ), align 4
718  store float %cvt2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2 ), align 8
719  store float %cvt3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3 ), align 4
720  store float %cvt4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4 ), align 16
721  store float %cvt5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5 ), align 4
722  store float %cvt6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6 ), align 8
723  store float %cvt7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7 ), align 4
724  store float %cvt8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8 ), align 32
725  store float %cvt9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9 ), align 4
726  store float %cvt10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 8
727  store float %cvt11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
728  store float %cvt12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
729  store float %cvt13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
730  store float %cvt14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 8
731  store float %cvt15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
732  ret void
733}
734
735define void @uitofp_4i16_4f32() #0 {
736; CHECK-LABEL: @uitofp_4i16_4f32(
737; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
738; CHECK-NEXT:    [[TMP2:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x float>
739; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
740; CHECK-NEXT:    ret void
741;
742  %ld0 = load i16, ptr @src16, align 64
743  %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
744  %ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
745  %ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3), align 2
746  %cvt0 = uitofp i16 %ld0 to float
747  %cvt1 = uitofp i16 %ld1 to float
748  %cvt2 = uitofp i16 %ld2 to float
749  %cvt3 = uitofp i16 %ld3 to float
750  store float %cvt0, ptr @dst32, align 64
751  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
752  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
753  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
754  ret void
755}
756
757define void @uitofp_8i16_8f32() #0 {
758; SSE-LABEL: @uitofp_8i16_8f32(
759; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
760; SSE-NEXT:    [[TMP2:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x float>
761; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
762; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
763; SSE-NEXT:    [[TMP4:%.*]] = uitofp <4 x i16> [[TMP3]] to <4 x float>
764; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
765; SSE-NEXT:    ret void
766;
767; AVX-LABEL: @uitofp_8i16_8f32(
768; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 64
769; AVX-NEXT:    [[TMP2:%.*]] = uitofp <8 x i16> [[TMP1]] to <8 x float>
770; AVX-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
771; AVX-NEXT:    ret void
772;
773  %ld0 = load i16, ptr @src16, align 64
774  %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
775  %ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
776  %ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3), align 2
777  %ld4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
778  %ld5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 5), align 2
779  %ld6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 6), align 4
780  %ld7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 7), align 2
781  %cvt0 = uitofp i16 %ld0 to float
782  %cvt1 = uitofp i16 %ld1 to float
783  %cvt2 = uitofp i16 %ld2 to float
784  %cvt3 = uitofp i16 %ld3 to float
785  %cvt4 = uitofp i16 %ld4 to float
786  %cvt5 = uitofp i16 %ld5 to float
787  %cvt6 = uitofp i16 %ld6 to float
788  %cvt7 = uitofp i16 %ld7 to float
789  store float %cvt0, ptr @dst32, align 64
790  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
791  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
792  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
793  store float %cvt4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
794  store float %cvt5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
795  store float %cvt6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 8
796  store float %cvt7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
797  ret void
798}
799
800define void @uitofp_16i16_16f32() #0 {
801; SSE-LABEL: @uitofp_16i16_16f32(
802; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
803; SSE-NEXT:    [[TMP2:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x float>
804; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
805; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
806; SSE-NEXT:    [[TMP4:%.*]] = uitofp <4 x i16> [[TMP3]] to <4 x float>
807; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
808; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 8), align 16
809; SSE-NEXT:    [[TMP6:%.*]] = uitofp <4 x i16> [[TMP5]] to <4 x float>
810; SSE-NEXT:    store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
811; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 12), align 8
812; SSE-NEXT:    [[TMP8:%.*]] = uitofp <4 x i16> [[TMP7]] to <4 x float>
813; SSE-NEXT:    store <4 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
814; SSE-NEXT:    ret void
815;
816; AVX256-LABEL: @uitofp_16i16_16f32(
817; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 64
818; AVX256-NEXT:    [[TMP2:%.*]] = uitofp <8 x i16> [[TMP1]] to <8 x float>
819; AVX256-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
820; AVX256-NEXT:    [[TMP3:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 8), align 16
821; AVX256-NEXT:    [[TMP4:%.*]] = uitofp <8 x i16> [[TMP3]] to <8 x float>
822; AVX256-NEXT:    store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
823; AVX256-NEXT:    ret void
824;
825; AVX512-LABEL: @uitofp_16i16_16f32(
826; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i16>, ptr @src16, align 64
827; AVX512-NEXT:    [[TMP2:%.*]] = uitofp <16 x i16> [[TMP1]] to <16 x float>
828; AVX512-NEXT:    store <16 x float> [[TMP2]], ptr @dst32, align 64
829; AVX512-NEXT:    ret void
830;
831  %ld0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 0 ), align 64
832  %ld1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1 ), align 2
833  %ld2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2 ), align 4
834  %ld3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3 ), align 2
835  %ld4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4 ), align 8
836  %ld5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 5 ), align 2
837  %ld6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 6 ), align 4
838  %ld7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 7 ), align 2
839  %ld8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 8 ), align 16
840  %ld9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 9 ), align 2
841  %ld10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 10), align 4
842  %ld11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 11), align 2
843  %ld12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 12), align 8
844  %ld13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 13), align 2
845  %ld14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 14), align 4
846  %ld15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 15), align 2
847  %cvt0  = uitofp i16 %ld0  to float
848  %cvt1  = uitofp i16 %ld1  to float
849  %cvt2  = uitofp i16 %ld2  to float
850  %cvt3  = uitofp i16 %ld3  to float
851  %cvt4  = uitofp i16 %ld4  to float
852  %cvt5  = uitofp i16 %ld5  to float
853  %cvt6  = uitofp i16 %ld6  to float
854  %cvt7  = uitofp i16 %ld7  to float
855  %cvt8  = uitofp i16 %ld8  to float
856  %cvt9  = uitofp i16 %ld9  to float
857  %cvt10 = uitofp i16 %ld10 to float
858  %cvt11 = uitofp i16 %ld11 to float
859  %cvt12 = uitofp i16 %ld12 to float
860  %cvt13 = uitofp i16 %ld13 to float
861  %cvt14 = uitofp i16 %ld14 to float
862  %cvt15 = uitofp i16 %ld15 to float
863  store float %cvt0 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 0 ), align 64
864  store float %cvt1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1 ), align 4
865  store float %cvt2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2 ), align 8
866  store float %cvt3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3 ), align 4
867  store float %cvt4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4 ), align 16
868  store float %cvt5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5 ), align 4
869  store float %cvt6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6 ), align 8
870  store float %cvt7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7 ), align 4
871  store float %cvt8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8 ), align 32
872  store float %cvt9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9 ), align 4
873  store float %cvt10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 8
874  store float %cvt11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
875  store float %cvt12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
876  store float %cvt13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
877  store float %cvt14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 8
878  store float %cvt15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
879  ret void
880}
881
882define void @uitofp_4i8_4f32() #0 {
883; CHECK-LABEL: @uitofp_4i8_4f32(
884; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
885; CHECK-NEXT:    [[TMP2:%.*]] = uitofp <4 x i8> [[TMP1]] to <4 x float>
886; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
887; CHECK-NEXT:    ret void
888;
889  %ld0 = load i8, ptr @src8, align 64
890  %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
891  %ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
892  %ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3), align 1
893  %cvt0 = uitofp i8 %ld0 to float
894  %cvt1 = uitofp i8 %ld1 to float
895  %cvt2 = uitofp i8 %ld2 to float
896  %cvt3 = uitofp i8 %ld3 to float
897  store float %cvt0, ptr @dst32, align 64
898  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
899  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
900  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
901  ret void
902}
903
904define void @uitofp_8i8_8f32() #0 {
905; SSE-LABEL: @uitofp_8i8_8f32(
906; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
907; SSE-NEXT:    [[TMP2:%.*]] = uitofp <4 x i8> [[TMP1]] to <4 x float>
908; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
909; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
910; SSE-NEXT:    [[TMP4:%.*]] = uitofp <4 x i8> [[TMP3]] to <4 x float>
911; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
912; SSE-NEXT:    ret void
913;
914; AVX-LABEL: @uitofp_8i8_8f32(
915; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr @src8, align 64
916; AVX-NEXT:    [[TMP2:%.*]] = uitofp <8 x i8> [[TMP1]] to <8 x float>
917; AVX-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
918; AVX-NEXT:    ret void
919;
920  %ld0 = load i8, ptr @src8, align 64
921  %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
922  %ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
923  %ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3), align 1
924  %ld4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
925  %ld5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 5), align 1
926  %ld6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 6), align 2
927  %ld7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 7), align 1
928  %cvt0 = uitofp i8 %ld0 to float
929  %cvt1 = uitofp i8 %ld1 to float
930  %cvt2 = uitofp i8 %ld2 to float
931  %cvt3 = uitofp i8 %ld3 to float
932  %cvt4 = uitofp i8 %ld4 to float
933  %cvt5 = uitofp i8 %ld5 to float
934  %cvt6 = uitofp i8 %ld6 to float
935  %cvt7 = uitofp i8 %ld7 to float
936  store float %cvt0, ptr @dst32, align 64
937  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
938  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
939  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
940  store float %cvt4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
941  store float %cvt5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
942  store float %cvt6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 8
943  store float %cvt7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
944  ret void
945}
946
947define void @uitofp_16i8_16f32() #0 {
948; SSE-LABEL: @uitofp_16i8_16f32(
949; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
950; SSE-NEXT:    [[TMP2:%.*]] = uitofp <4 x i8> [[TMP1]] to <4 x float>
951; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
952; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
953; SSE-NEXT:    [[TMP4:%.*]] = uitofp <4 x i8> [[TMP3]] to <4 x float>
954; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
955; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 8), align 8
956; SSE-NEXT:    [[TMP6:%.*]] = uitofp <4 x i8> [[TMP5]] to <4 x float>
957; SSE-NEXT:    store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
958; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 12), align 4
959; SSE-NEXT:    [[TMP8:%.*]] = uitofp <4 x i8> [[TMP7]] to <4 x float>
960; SSE-NEXT:    store <4 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
961; SSE-NEXT:    ret void
962;
963; AVX256-LABEL: @uitofp_16i8_16f32(
964; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr @src8, align 64
965; AVX256-NEXT:    [[TMP2:%.*]] = uitofp <8 x i8> [[TMP1]] to <8 x float>
966; AVX256-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
967; AVX256-NEXT:    [[TMP3:%.*]] = load <8 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 8), align 8
968; AVX256-NEXT:    [[TMP4:%.*]] = uitofp <8 x i8> [[TMP3]] to <8 x float>
969; AVX256-NEXT:    store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
970; AVX256-NEXT:    ret void
971;
972; AVX512-LABEL: @uitofp_16i8_16f32(
973; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 64
974; AVX512-NEXT:    [[TMP2:%.*]] = uitofp <16 x i8> [[TMP1]] to <16 x float>
975; AVX512-NEXT:    store <16 x float> [[TMP2]], ptr @dst32, align 64
976; AVX512-NEXT:    ret void
977;
978  %ld0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 0 ), align 64
979  %ld1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1 ), align 1
980  %ld2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2 ), align 2
981  %ld3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3 ), align 1
982  %ld4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4 ), align 4
983  %ld5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 5 ), align 1
984  %ld6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 6 ), align 2
985  %ld7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 7 ), align 1
986  %ld8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 8 ), align 8
987  %ld9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 9 ), align 1
988  %ld10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 10), align 2
989  %ld11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 11), align 1
990  %ld12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 12), align 4
991  %ld13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 13), align 1
992  %ld14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 14), align 2
993  %ld15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 15), align 1
994  %cvt0  = uitofp i8 %ld0  to float
995  %cvt1  = uitofp i8 %ld1  to float
996  %cvt2  = uitofp i8 %ld2  to float
997  %cvt3  = uitofp i8 %ld3  to float
998  %cvt4  = uitofp i8 %ld4  to float
999  %cvt5  = uitofp i8 %ld5  to float
1000  %cvt6  = uitofp i8 %ld6  to float
1001  %cvt7  = uitofp i8 %ld7  to float
1002  %cvt8  = uitofp i8 %ld8  to float
1003  %cvt9  = uitofp i8 %ld9  to float
1004  %cvt10 = uitofp i8 %ld10 to float
1005  %cvt11 = uitofp i8 %ld11 to float
1006  %cvt12 = uitofp i8 %ld12 to float
1007  %cvt13 = uitofp i8 %ld13 to float
1008  %cvt14 = uitofp i8 %ld14 to float
1009  %cvt15 = uitofp i8 %ld15 to float
1010  store float %cvt0 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 0 ), align 64
1011  store float %cvt1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1 ), align 4
1012  store float %cvt2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2 ), align 8
1013  store float %cvt3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3 ), align 4
1014  store float %cvt4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4 ), align 16
1015  store float %cvt5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5 ), align 4
1016  store float %cvt6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6 ), align 8
1017  store float %cvt7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7 ), align 4
1018  store float %cvt8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8 ), align 32
1019  store float %cvt9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9 ), align 4
1020  store float %cvt10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 8
1021  store float %cvt11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
1022  store float %cvt12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
1023  store float %cvt13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
1024  store float %cvt14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 8
1025  store float %cvt15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
1026  ret void
1027}
1028
1029attributes #0 = { nounwind }
1030