xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll (revision 8b56da5e9f3ba737a5ff4bf5dee654416849042f)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE
3; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256NODQ
4; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver1 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256NODQ
5; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256NODQ
6; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512
7; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256DQ
8
9target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
10
11@src64 = common global [8 x i64] zeroinitializer, align 64
12@src32 = common global [16 x i32] zeroinitializer, align 64
13@src16 = common global [32 x i16] zeroinitializer, align 64
14@src8  = common global [64 x i8] zeroinitializer, align 64
15
16@dst64 = common global [8 x double] zeroinitializer, align 64
17@dst32 = common global [16 x float] zeroinitializer, align 64
18
19;
20; SITOFP to vXf64
21;
22
23define void @sitofp_2i64_2f64() #0 {
24; SSE-LABEL: @sitofp_2i64_2f64(
25; SSE-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 64
26; SSE-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
27; SSE-NEXT:    [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
28; SSE-NEXT:    [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
29; SSE-NEXT:    store double [[CVT0]], ptr @dst64, align 64
30; SSE-NEXT:    store double [[CVT1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
31; SSE-NEXT:    ret void
32;
33; AVX256NODQ-LABEL: @sitofp_2i64_2f64(
34; AVX256NODQ-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 64
35; AVX256NODQ-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
36; AVX256NODQ-NEXT:    [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
37; AVX256NODQ-NEXT:    [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
38; AVX256NODQ-NEXT:    store double [[CVT0]], ptr @dst64, align 64
39; AVX256NODQ-NEXT:    store double [[CVT1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
40; AVX256NODQ-NEXT:    ret void
41;
42; AVX512-LABEL: @sitofp_2i64_2f64(
43; AVX512-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64
44; AVX512-NEXT:    [[TMP2:%.*]] = sitofp <2 x i64> [[TMP1]] to <2 x double>
45; AVX512-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
46; AVX512-NEXT:    ret void
47;
48; AVX256DQ-LABEL: @sitofp_2i64_2f64(
49; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64
50; AVX256DQ-NEXT:    [[TMP2:%.*]] = sitofp <2 x i64> [[TMP1]] to <2 x double>
51; AVX256DQ-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
52; AVX256DQ-NEXT:    ret void
53;
54  %ld0 = load i64, ptr @src64, align 64
55  %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
56  %cvt0 = sitofp i64 %ld0 to double
57  %cvt1 = sitofp i64 %ld1 to double
58  store double %cvt0, ptr @dst64, align 64
59  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
60  ret void
61}
62
63define void @sitofp_4i64_4f64() #0 {
64; SSE-LABEL: @sitofp_4i64_4f64(
65; SSE-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 64
66; SSE-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
67; SSE-NEXT:    [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
68; SSE-NEXT:    [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
69; SSE-NEXT:    [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
70; SSE-NEXT:    [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
71; SSE-NEXT:    [[CVT2:%.*]] = sitofp i64 [[LD2]] to double
72; SSE-NEXT:    [[CVT3:%.*]] = sitofp i64 [[LD3]] to double
73; SSE-NEXT:    store double [[CVT0]], ptr @dst64, align 64
74; SSE-NEXT:    store double [[CVT1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
75; SSE-NEXT:    store double [[CVT2]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
76; SSE-NEXT:    store double [[CVT3]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
77; SSE-NEXT:    ret void
78;
79; AVX256NODQ-LABEL: @sitofp_4i64_4f64(
80; AVX256NODQ-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 64
81; AVX256NODQ-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
82; AVX256NODQ-NEXT:    [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
83; AVX256NODQ-NEXT:    [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
84; AVX256NODQ-NEXT:    [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
85; AVX256NODQ-NEXT:    [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
86; AVX256NODQ-NEXT:    [[CVT2:%.*]] = sitofp i64 [[LD2]] to double
87; AVX256NODQ-NEXT:    [[CVT3:%.*]] = sitofp i64 [[LD3]] to double
88; AVX256NODQ-NEXT:    store double [[CVT0]], ptr @dst64, align 64
89; AVX256NODQ-NEXT:    store double [[CVT1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
90; AVX256NODQ-NEXT:    store double [[CVT2]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
91; AVX256NODQ-NEXT:    store double [[CVT3]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
92; AVX256NODQ-NEXT:    ret void
93;
94; AVX512-LABEL: @sitofp_4i64_4f64(
95; AVX512-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64
96; AVX512-NEXT:    [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x double>
97; AVX512-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
98; AVX512-NEXT:    ret void
99;
100; AVX256DQ-LABEL: @sitofp_4i64_4f64(
101; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64
102; AVX256DQ-NEXT:    [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x double>
103; AVX256DQ-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
104; AVX256DQ-NEXT:    ret void
105;
106  %ld0 = load i64, ptr @src64, align 64
107  %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
108  %ld2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
109  %ld3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
110  %cvt0 = sitofp i64 %ld0 to double
111  %cvt1 = sitofp i64 %ld1 to double
112  %cvt2 = sitofp i64 %ld2 to double
113  %cvt3 = sitofp i64 %ld3 to double
114  store double %cvt0, ptr @dst64, align 64
115  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
116  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
117  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
118  ret void
119}
120
121define void @sitofp_8i64_8f64() #0 {
122; SSE-LABEL: @sitofp_8i64_8f64(
123; SSE-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 64
124; SSE-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
125; SSE-NEXT:    [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
126; SSE-NEXT:    [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
127; SSE-NEXT:    [[LD4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
128; SSE-NEXT:    [[LD5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 5), align 8
129; SSE-NEXT:    [[LD6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 6), align 16
130; SSE-NEXT:    [[LD7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 7), align 8
131; SSE-NEXT:    [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
132; SSE-NEXT:    [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
133; SSE-NEXT:    [[CVT2:%.*]] = sitofp i64 [[LD2]] to double
134; SSE-NEXT:    [[CVT3:%.*]] = sitofp i64 [[LD3]] to double
135; SSE-NEXT:    [[CVT4:%.*]] = sitofp i64 [[LD4]] to double
136; SSE-NEXT:    [[CVT5:%.*]] = sitofp i64 [[LD5]] to double
137; SSE-NEXT:    [[CVT6:%.*]] = sitofp i64 [[LD6]] to double
138; SSE-NEXT:    [[CVT7:%.*]] = sitofp i64 [[LD7]] to double
139; SSE-NEXT:    store double [[CVT0]], ptr @dst64, align 64
140; SSE-NEXT:    store double [[CVT1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
141; SSE-NEXT:    store double [[CVT2]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
142; SSE-NEXT:    store double [[CVT3]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
143; SSE-NEXT:    store double [[CVT4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
144; SSE-NEXT:    store double [[CVT5]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
145; SSE-NEXT:    store double [[CVT6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
146; SSE-NEXT:    store double [[CVT7]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
147; SSE-NEXT:    ret void
148;
149; AVX256NODQ-LABEL: @sitofp_8i64_8f64(
150; AVX256NODQ-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 64
151; AVX256NODQ-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
152; AVX256NODQ-NEXT:    [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
153; AVX256NODQ-NEXT:    [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
154; AVX256NODQ-NEXT:    [[LD4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
155; AVX256NODQ-NEXT:    [[LD5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 5), align 8
156; AVX256NODQ-NEXT:    [[LD6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 6), align 16
157; AVX256NODQ-NEXT:    [[LD7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 7), align 8
158; AVX256NODQ-NEXT:    [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
159; AVX256NODQ-NEXT:    [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
160; AVX256NODQ-NEXT:    [[CVT2:%.*]] = sitofp i64 [[LD2]] to double
161; AVX256NODQ-NEXT:    [[CVT3:%.*]] = sitofp i64 [[LD3]] to double
162; AVX256NODQ-NEXT:    [[CVT4:%.*]] = sitofp i64 [[LD4]] to double
163; AVX256NODQ-NEXT:    [[CVT5:%.*]] = sitofp i64 [[LD5]] to double
164; AVX256NODQ-NEXT:    [[CVT6:%.*]] = sitofp i64 [[LD6]] to double
165; AVX256NODQ-NEXT:    [[CVT7:%.*]] = sitofp i64 [[LD7]] to double
166; AVX256NODQ-NEXT:    store double [[CVT0]], ptr @dst64, align 64
167; AVX256NODQ-NEXT:    store double [[CVT1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
168; AVX256NODQ-NEXT:    store double [[CVT2]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
169; AVX256NODQ-NEXT:    store double [[CVT3]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
170; AVX256NODQ-NEXT:    store double [[CVT4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
171; AVX256NODQ-NEXT:    store double [[CVT5]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
172; AVX256NODQ-NEXT:    store double [[CVT6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
173; AVX256NODQ-NEXT:    store double [[CVT7]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
174; AVX256NODQ-NEXT:    ret void
175;
176; AVX512-LABEL: @sitofp_8i64_8f64(
177; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @src64, align 64
178; AVX512-NEXT:    [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x double>
179; AVX512-NEXT:    store <8 x double> [[TMP2]], ptr @dst64, align 64
180; AVX512-NEXT:    ret void
181;
182; AVX256DQ-LABEL: @sitofp_8i64_8f64(
183; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64
184; AVX256DQ-NEXT:    [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x double>
185; AVX256DQ-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
186; AVX256DQ-NEXT:    [[TMP3:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
187; AVX256DQ-NEXT:    [[TMP4:%.*]] = sitofp <4 x i64> [[TMP3]] to <4 x double>
188; AVX256DQ-NEXT:    store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
189; AVX256DQ-NEXT:    ret void
190;
191  %ld0 = load i64, ptr @src64, align 64
192  %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
193  %ld2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
194  %ld3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
195  %ld4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
196  %ld5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 5), align 8
197  %ld6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 6), align 16
198  %ld7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 7), align 8
199  %cvt0 = sitofp i64 %ld0 to double
200  %cvt1 = sitofp i64 %ld1 to double
201  %cvt2 = sitofp i64 %ld2 to double
202  %cvt3 = sitofp i64 %ld3 to double
203  %cvt4 = sitofp i64 %ld4 to double
204  %cvt5 = sitofp i64 %ld5 to double
205  %cvt6 = sitofp i64 %ld6 to double
206  %cvt7 = sitofp i64 %ld7 to double
207  store double %cvt0, ptr @dst64, align 64
208  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
209  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
210  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
211  store double %cvt4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
212  store double %cvt5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
213  store double %cvt6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
214  store double %cvt7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
215  ret void
216}
217
218define void @sitofp_2i32_2f64() #0 {
219; CHECK-LABEL: @sitofp_2i32_2f64(
220; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64
221; CHECK-NEXT:    [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double>
222; CHECK-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
223; CHECK-NEXT:    ret void
224;
225  %ld0 = load i32, ptr @src32, align 64
226  %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
227  %cvt0 = sitofp i32 %ld0 to double
228  %cvt1 = sitofp i32 %ld1 to double
229  store double %cvt0, ptr @dst64, align 64
230  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
231  ret void
232}
233
234define void @sitofp_4i32_4f64() #0 {
235; SSE-LABEL: @sitofp_4i32_4f64(
236; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64
237; SSE-NEXT:    [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double>
238; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
239; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
240; SSE-NEXT:    [[TMP4:%.*]] = sitofp <2 x i32> [[TMP3]] to <2 x double>
241; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
242; SSE-NEXT:    ret void
243;
244; AVX-LABEL: @sitofp_4i32_4f64(
245; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
246; AVX-NEXT:    [[TMP2:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x double>
247; AVX-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
248; AVX-NEXT:    ret void
249;
250  %ld0 = load i32, ptr @src32, align 64
251  %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
252  %ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
253  %ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3), align 4
254  %cvt0 = sitofp i32 %ld0 to double
255  %cvt1 = sitofp i32 %ld1 to double
256  %cvt2 = sitofp i32 %ld2 to double
257  %cvt3 = sitofp i32 %ld3 to double
258  store double %cvt0, ptr @dst64, align 64
259  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
260  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
261  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
262  ret void
263}
264
265define void @sitofp_8i32_8f64() #0 {
266; SSE-LABEL: @sitofp_8i32_8f64(
267; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64
268; SSE-NEXT:    [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double>
269; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
270; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
271; SSE-NEXT:    [[TMP4:%.*]] = sitofp <2 x i32> [[TMP3]] to <2 x double>
272; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
273; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
274; SSE-NEXT:    [[TMP6:%.*]] = sitofp <2 x i32> [[TMP5]] to <2 x double>
275; SSE-NEXT:    store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
276; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 6), align 8
277; SSE-NEXT:    [[TMP8:%.*]] = sitofp <2 x i32> [[TMP7]] to <2 x double>
278; SSE-NEXT:    store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
279; SSE-NEXT:    ret void
280;
281; AVX256-LABEL: @sitofp_8i32_8f64(
282; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
283; AVX256-NEXT:    [[TMP2:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x double>
284; AVX256-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
285; AVX256-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
286; AVX256-NEXT:    [[TMP4:%.*]] = sitofp <4 x i32> [[TMP3]] to <4 x double>
287; AVX256-NEXT:    store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
288; AVX256-NEXT:    ret void
289;
290; AVX512-LABEL: @sitofp_8i32_8f64(
291; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 64
292; AVX512-NEXT:    [[TMP2:%.*]] = sitofp <8 x i32> [[TMP1]] to <8 x double>
293; AVX512-NEXT:    store <8 x double> [[TMP2]], ptr @dst64, align 64
294; AVX512-NEXT:    ret void
295;
296  %ld0 = load i32, ptr @src32, align 64
297  %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
298  %ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
299  %ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3), align 4
300  %ld4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
301  %ld5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 5), align 4
302  %ld6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 6), align 8
303  %ld7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 7), align 4
304  %cvt0 = sitofp i32 %ld0 to double
305  %cvt1 = sitofp i32 %ld1 to double
306  %cvt2 = sitofp i32 %ld2 to double
307  %cvt3 = sitofp i32 %ld3 to double
308  %cvt4 = sitofp i32 %ld4 to double
309  %cvt5 = sitofp i32 %ld5 to double
310  %cvt6 = sitofp i32 %ld6 to double
311  %cvt7 = sitofp i32 %ld7 to double
312  store double %cvt0, ptr @dst64, align 64
313  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
314  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
315  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
316  store double %cvt4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
317  store double %cvt5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
318  store double %cvt6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
319  store double %cvt7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
320  ret void
321}
322
323define void @sitofp_2i16_2f64() #0 {
324; CHECK-LABEL: @sitofp_2i16_2f64(
325; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i16>, ptr @src16, align 64
326; CHECK-NEXT:    [[TMP2:%.*]] = sitofp <2 x i16> [[TMP1]] to <2 x double>
327; CHECK-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
328; CHECK-NEXT:    ret void
329;
330  %ld0 = load i16, ptr @src16, align 64
331  %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
332  %cvt0 = sitofp i16 %ld0 to double
333  %cvt1 = sitofp i16 %ld1 to double
334  store double %cvt0, ptr @dst64, align 64
335  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
336  ret void
337}
338
339define void @sitofp_4i16_4f64() #0 {
340; SSE-LABEL: @sitofp_4i16_4f64(
341; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i16>, ptr @src16, align 64
342; SSE-NEXT:    [[TMP2:%.*]] = sitofp <2 x i16> [[TMP1]] to <2 x double>
343; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
344; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
345; SSE-NEXT:    [[TMP4:%.*]] = sitofp <2 x i16> [[TMP3]] to <2 x double>
346; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
347; SSE-NEXT:    ret void
348;
349; AVX-LABEL: @sitofp_4i16_4f64(
350; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
351; AVX-NEXT:    [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x double>
352; AVX-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
353; AVX-NEXT:    ret void
354;
355  %ld0 = load i16, ptr @src16, align 64
356  %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
357  %ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
358  %ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3), align 2
359  %cvt0 = sitofp i16 %ld0 to double
360  %cvt1 = sitofp i16 %ld1 to double
361  %cvt2 = sitofp i16 %ld2 to double
362  %cvt3 = sitofp i16 %ld3 to double
363  store double %cvt0, ptr @dst64, align 64
364  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
365  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
366  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
367  ret void
368}
369
370define void @sitofp_8i16_8f64() #0 {
371; SSE-LABEL: @sitofp_8i16_8f64(
372; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i16>, ptr @src16, align 64
373; SSE-NEXT:    [[TMP2:%.*]] = sitofp <2 x i16> [[TMP1]] to <2 x double>
374; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
375; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
376; SSE-NEXT:    [[TMP4:%.*]] = sitofp <2 x i16> [[TMP3]] to <2 x double>
377; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
378; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
379; SSE-NEXT:    [[TMP6:%.*]] = sitofp <2 x i16> [[TMP5]] to <2 x double>
380; SSE-NEXT:    store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
381; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 6), align 4
382; SSE-NEXT:    [[TMP8:%.*]] = sitofp <2 x i16> [[TMP7]] to <2 x double>
383; SSE-NEXT:    store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
384; SSE-NEXT:    ret void
385;
386; AVX256-LABEL: @sitofp_8i16_8f64(
387; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
388; AVX256-NEXT:    [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x double>
389; AVX256-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
390; AVX256-NEXT:    [[TMP3:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
391; AVX256-NEXT:    [[TMP4:%.*]] = sitofp <4 x i16> [[TMP3]] to <4 x double>
392; AVX256-NEXT:    store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
393; AVX256-NEXT:    ret void
394;
395; AVX512-LABEL: @sitofp_8i16_8f64(
396; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 64
397; AVX512-NEXT:    [[TMP2:%.*]] = sitofp <8 x i16> [[TMP1]] to <8 x double>
398; AVX512-NEXT:    store <8 x double> [[TMP2]], ptr @dst64, align 64
399; AVX512-NEXT:    ret void
400;
401  %ld0 = load i16, ptr @src16, align 64
402  %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
403  %ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
404  %ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3), align 2
405  %ld4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
406  %ld5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 5), align 2
407  %ld6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 6), align 4
408  %ld7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 7), align 2
409  %cvt0 = sitofp i16 %ld0 to double
410  %cvt1 = sitofp i16 %ld1 to double
411  %cvt2 = sitofp i16 %ld2 to double
412  %cvt3 = sitofp i16 %ld3 to double
413  %cvt4 = sitofp i16 %ld4 to double
414  %cvt5 = sitofp i16 %ld5 to double
415  %cvt6 = sitofp i16 %ld6 to double
416  %cvt7 = sitofp i16 %ld7 to double
417  store double %cvt0, ptr @dst64, align 64
418  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
419  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
420  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
421  store double %cvt4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
422  store double %cvt5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
423  store double %cvt6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
424  store double %cvt7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
425  ret void
426}
427
428define void @sitofp_2i8_2f64() #0 {
429; CHECK-LABEL: @sitofp_2i8_2f64(
430; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i8>, ptr @src8, align 64
431; CHECK-NEXT:    [[TMP2:%.*]] = sitofp <2 x i8> [[TMP1]] to <2 x double>
432; CHECK-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
433; CHECK-NEXT:    ret void
434;
435  %ld0 = load i8, ptr @src8, align 64
436  %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
437  %cvt0 = sitofp i8 %ld0 to double
438  %cvt1 = sitofp i8 %ld1 to double
439  store double %cvt0, ptr @dst64, align 64
440  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
441  ret void
442}
443
444define void @sitofp_4i8_4f64() #0 {
445; SSE-LABEL: @sitofp_4i8_4f64(
446; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i8>, ptr @src8, align 64
447; SSE-NEXT:    [[TMP2:%.*]] = sitofp <2 x i8> [[TMP1]] to <2 x double>
448; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
449; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
450; SSE-NEXT:    [[TMP4:%.*]] = sitofp <2 x i8> [[TMP3]] to <2 x double>
451; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
452; SSE-NEXT:    ret void
453;
454; AVX-LABEL: @sitofp_4i8_4f64(
455; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
456; AVX-NEXT:    [[TMP2:%.*]] = sitofp <4 x i8> [[TMP1]] to <4 x double>
457; AVX-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
458; AVX-NEXT:    ret void
459;
460  %ld0 = load i8, ptr @src8, align 64
461  %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
462  %ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
463  %ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3), align 1
464  %cvt0 = sitofp i8 %ld0 to double
465  %cvt1 = sitofp i8 %ld1 to double
466  %cvt2 = sitofp i8 %ld2 to double
467  %cvt3 = sitofp i8 %ld3 to double
468  store double %cvt0, ptr @dst64, align 64
469  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
470  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
471  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
472  ret void
473}
474
475define void @sitofp_8i8_8f64() #0 {
476; SSE-LABEL: @sitofp_8i8_8f64(
477; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i8>, ptr @src8, align 64
478; SSE-NEXT:    [[TMP2:%.*]] = sitofp <2 x i8> [[TMP1]] to <2 x double>
479; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
480; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
481; SSE-NEXT:    [[TMP4:%.*]] = sitofp <2 x i8> [[TMP3]] to <2 x double>
482; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
483; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
484; SSE-NEXT:    [[TMP6:%.*]] = sitofp <2 x i8> [[TMP5]] to <2 x double>
485; SSE-NEXT:    store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
486; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 6), align 2
487; SSE-NEXT:    [[TMP8:%.*]] = sitofp <2 x i8> [[TMP7]] to <2 x double>
488; SSE-NEXT:    store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
489; SSE-NEXT:    ret void
490;
491; AVX256-LABEL: @sitofp_8i8_8f64(
492; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
493; AVX256-NEXT:    [[TMP2:%.*]] = sitofp <4 x i8> [[TMP1]] to <4 x double>
494; AVX256-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
495; AVX256-NEXT:    [[TMP3:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
496; AVX256-NEXT:    [[TMP4:%.*]] = sitofp <4 x i8> [[TMP3]] to <4 x double>
497; AVX256-NEXT:    store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
498; AVX256-NEXT:    ret void
499;
500; AVX512-LABEL: @sitofp_8i8_8f64(
501; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr @src8, align 64
502; AVX512-NEXT:    [[TMP2:%.*]] = sitofp <8 x i8> [[TMP1]] to <8 x double>
503; AVX512-NEXT:    store <8 x double> [[TMP2]], ptr @dst64, align 64
504; AVX512-NEXT:    ret void
505;
506  %ld0 = load i8, ptr @src8, align 64
507  %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
508  %ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
509  %ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3), align 1
510  %ld4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
511  %ld5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 5), align 1
512  %ld6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 6), align 2
513  %ld7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 7), align 1
514  %cvt0 = sitofp i8 %ld0 to double
515  %cvt1 = sitofp i8 %ld1 to double
516  %cvt2 = sitofp i8 %ld2 to double
517  %cvt3 = sitofp i8 %ld3 to double
518  %cvt4 = sitofp i8 %ld4 to double
519  %cvt5 = sitofp i8 %ld5 to double
520  %cvt6 = sitofp i8 %ld6 to double
521  %cvt7 = sitofp i8 %ld7 to double
522  store double %cvt0, ptr @dst64, align 64
523  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
524  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
525  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
526  store double %cvt4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
527  store double %cvt5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
528  store double %cvt6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
529  store double %cvt7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
530  ret void
531}
532
533;
534; SITOFP to vXf32
535;
536
537define void @sitofp_2i64_2f32() #0 {
538; SSE-LABEL: @sitofp_2i64_2f32(
539; SSE-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 64
540; SSE-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
541; SSE-NEXT:    [[CVT0:%.*]] = sitofp i64 [[LD0]] to float
542; SSE-NEXT:    [[CVT1:%.*]] = sitofp i64 [[LD1]] to float
543; SSE-NEXT:    store float [[CVT0]], ptr @dst32, align 64
544; SSE-NEXT:    store float [[CVT1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
545; SSE-NEXT:    ret void
546;
547; AVX256NODQ-LABEL: @sitofp_2i64_2f32(
548; AVX256NODQ-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 64
549; AVX256NODQ-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
550; AVX256NODQ-NEXT:    [[CVT0:%.*]] = sitofp i64 [[LD0]] to float
551; AVX256NODQ-NEXT:    [[CVT1:%.*]] = sitofp i64 [[LD1]] to float
552; AVX256NODQ-NEXT:    store float [[CVT0]], ptr @dst32, align 64
553; AVX256NODQ-NEXT:    store float [[CVT1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
554; AVX256NODQ-NEXT:    ret void
555;
556; AVX512-LABEL: @sitofp_2i64_2f32(
557; AVX512-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64
558; AVX512-NEXT:    [[TMP2:%.*]] = sitofp <2 x i64> [[TMP1]] to <2 x float>
559; AVX512-NEXT:    store <2 x float> [[TMP2]], ptr @dst32, align 64
560; AVX512-NEXT:    ret void
561;
562; AVX256DQ-LABEL: @sitofp_2i64_2f32(
563; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64
564; AVX256DQ-NEXT:    [[TMP2:%.*]] = sitofp <2 x i64> [[TMP1]] to <2 x float>
565; AVX256DQ-NEXT:    store <2 x float> [[TMP2]], ptr @dst32, align 64
566; AVX256DQ-NEXT:    ret void
567;
568  %ld0 = load i64, ptr @src64, align 64
569  %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
570  %cvt0 = sitofp i64 %ld0 to float
571  %cvt1 = sitofp i64 %ld1 to float
572  store float %cvt0, ptr @dst32, align 64
573  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
574  ret void
575}
576
577define void @sitofp_4i64_4f32() #0 {
578; CHECK-LABEL: @sitofp_4i64_4f32(
579; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64
580; CHECK-NEXT:    [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
581; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
582; CHECK-NEXT:    ret void
583;
584  %ld0 = load i64, ptr @src64, align 64
585  %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
586  %ld2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
587  %ld3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
588  %cvt0 = sitofp i64 %ld0 to float
589  %cvt1 = sitofp i64 %ld1 to float
590  %cvt2 = sitofp i64 %ld2 to float
591  %cvt3 = sitofp i64 %ld3 to float
592  store float %cvt0, ptr @dst32, align 64
593  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
594  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
595  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
596  ret void
597}
598
599define void @sitofp_8i64_8f32() #0 {
600; SSE-LABEL: @sitofp_8i64_8f32(
601; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64
602; SSE-NEXT:    [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
603; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
604; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
605; SSE-NEXT:    [[TMP4:%.*]] = sitofp <4 x i64> [[TMP3]] to <4 x float>
606; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
607; SSE-NEXT:    ret void
608;
609; AVX-LABEL: @sitofp_8i64_8f32(
610; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @src64, align 64
611; AVX-NEXT:    [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x float>
612; AVX-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
613; AVX-NEXT:    ret void
614;
615  %ld0 = load i64, ptr @src64, align 64
616  %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
617  %ld2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
618  %ld3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
619  %ld4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
620  %ld5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 5), align 8
621  %ld6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 6), align 16
622  %ld7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 7), align 8
623  %cvt0 = sitofp i64 %ld0 to float
624  %cvt1 = sitofp i64 %ld1 to float
625  %cvt2 = sitofp i64 %ld2 to float
626  %cvt3 = sitofp i64 %ld3 to float
627  %cvt4 = sitofp i64 %ld4 to float
628  %cvt5 = sitofp i64 %ld5 to float
629  %cvt6 = sitofp i64 %ld6 to float
630  %cvt7 = sitofp i64 %ld7 to float
631  store float %cvt0, ptr @dst32, align 64
632  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
633  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
634  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
635  store float %cvt4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
636  store float %cvt5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
637  store float %cvt6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 8
638  store float %cvt7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
639  ret void
640}
641
642define void @sitofp_4i32_4f32() #0 {
643; CHECK-LABEL: @sitofp_4i32_4f32(
644; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
645; CHECK-NEXT:    [[TMP2:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float>
646; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
647; CHECK-NEXT:    ret void
648;
649  %ld0 = load i32, ptr @src32, align 64
650  %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
651  %ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
652  %ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3), align 4
653  %cvt0 = sitofp i32 %ld0 to float
654  %cvt1 = sitofp i32 %ld1 to float
655  %cvt2 = sitofp i32 %ld2 to float
656  %cvt3 = sitofp i32 %ld3 to float
657  store float %cvt0, ptr @dst32, align 64
658  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
659  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
660  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
661  ret void
662}
663
664define void @sitofp_8i32_8f32() #0 {
665; SSE-LABEL: @sitofp_8i32_8f32(
666; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
667; SSE-NEXT:    [[TMP2:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float>
668; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
669; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
670; SSE-NEXT:    [[TMP4:%.*]] = sitofp <4 x i32> [[TMP3]] to <4 x float>
671; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
672; SSE-NEXT:    ret void
673;
674; AVX-LABEL: @sitofp_8i32_8f32(
675; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 64
676; AVX-NEXT:    [[TMP2:%.*]] = sitofp <8 x i32> [[TMP1]] to <8 x float>
677; AVX-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
678; AVX-NEXT:    ret void
679;
680  %ld0 = load i32, ptr @src32, align 64
681  %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
682  %ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
683  %ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3), align 4
684  %ld4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
685  %ld5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 5), align 4
686  %ld6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 6), align 8
687  %ld7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 7), align 4
688  %cvt0 = sitofp i32 %ld0 to float
689  %cvt1 = sitofp i32 %ld1 to float
690  %cvt2 = sitofp i32 %ld2 to float
691  %cvt3 = sitofp i32 %ld3 to float
692  %cvt4 = sitofp i32 %ld4 to float
693  %cvt5 = sitofp i32 %ld5 to float
694  %cvt6 = sitofp i32 %ld6 to float
695  %cvt7 = sitofp i32 %ld7 to float
696  store float %cvt0, ptr @dst32, align 64
697  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
698  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
699  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
700  store float %cvt4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
701  store float %cvt5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
702  store float %cvt6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 8
703  store float %cvt7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
704  ret void
705}
706
707define void @sitofp_16i32_16f32() #0 {
708; SSE-LABEL: @sitofp_16i32_16f32(
709; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
710; SSE-NEXT:    [[TMP2:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float>
711; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
712; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
713; SSE-NEXT:    [[TMP4:%.*]] = sitofp <4 x i32> [[TMP3]] to <4 x float>
714; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
715; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 8), align 32
716; SSE-NEXT:    [[TMP6:%.*]] = sitofp <4 x i32> [[TMP5]] to <4 x float>
717; SSE-NEXT:    store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
718; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 12), align 16
719; SSE-NEXT:    [[TMP8:%.*]] = sitofp <4 x i32> [[TMP7]] to <4 x float>
720; SSE-NEXT:    store <4 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
721; SSE-NEXT:    ret void
722;
723; AVX256-LABEL: @sitofp_16i32_16f32(
724; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 64
725; AVX256-NEXT:    [[TMP2:%.*]] = sitofp <8 x i32> [[TMP1]] to <8 x float>
726; AVX256-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
727; AVX256-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 8), align 32
728; AVX256-NEXT:    [[TMP4:%.*]] = sitofp <8 x i32> [[TMP3]] to <8 x float>
729; AVX256-NEXT:    store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
730; AVX256-NEXT:    ret void
731;
732; AVX512-LABEL: @sitofp_16i32_16f32(
733; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @src32, align 64
734; AVX512-NEXT:    [[TMP2:%.*]] = sitofp <16 x i32> [[TMP1]] to <16 x float>
735; AVX512-NEXT:    store <16 x float> [[TMP2]], ptr @dst32, align 64
736; AVX512-NEXT:    ret void
737;
738  %ld0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 0 ), align 64
739  %ld1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1 ), align 4
740  %ld2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2 ), align 8
741  %ld3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3 ), align 4
742  %ld4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4 ), align 16
743  %ld5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 5 ), align 4
744  %ld6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 6 ), align 8
745  %ld7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 7 ), align 4
746  %ld8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 8 ), align 32
747  %ld9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 9 ), align 4
748  %ld10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 10), align 8
749  %ld11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 11), align 4
750  %ld12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 12), align 16
751  %ld13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 13), align 4
752  %ld14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 14), align 8
753  %ld15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 15), align 4
754  %cvt0  = sitofp i32 %ld0  to float
755  %cvt1  = sitofp i32 %ld1  to float
756  %cvt2  = sitofp i32 %ld2  to float
757  %cvt3  = sitofp i32 %ld3  to float
758  %cvt4  = sitofp i32 %ld4  to float
759  %cvt5  = sitofp i32 %ld5  to float
760  %cvt6  = sitofp i32 %ld6  to float
761  %cvt7  = sitofp i32 %ld7  to float
762  %cvt8  = sitofp i32 %ld8  to float
763  %cvt9  = sitofp i32 %ld9  to float
764  %cvt10 = sitofp i32 %ld10 to float
765  %cvt11 = sitofp i32 %ld11 to float
766  %cvt12 = sitofp i32 %ld12 to float
767  %cvt13 = sitofp i32 %ld13 to float
768  %cvt14 = sitofp i32 %ld14 to float
769  %cvt15 = sitofp i32 %ld15 to float
770  store float %cvt0 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 0 ), align 64
771  store float %cvt1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1 ), align 4
772  store float %cvt2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2 ), align 8
773  store float %cvt3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3 ), align 4
774  store float %cvt4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4 ), align 16
775  store float %cvt5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5 ), align 4
776  store float %cvt6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6 ), align 8
777  store float %cvt7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7 ), align 4
778  store float %cvt8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8 ), align 32
779  store float %cvt9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9 ), align 4
780  store float %cvt10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 8
781  store float %cvt11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
782  store float %cvt12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
783  store float %cvt13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
784  store float %cvt14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 8
785  store float %cvt15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
786  ret void
787}
788
789define void @sitofp_4i16_4f32() #0 {
790; CHECK-LABEL: @sitofp_4i16_4f32(
791; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
792; CHECK-NEXT:    [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x float>
793; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
794; CHECK-NEXT:    ret void
795;
796  %ld0 = load i16, ptr @src16, align 64
797  %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
798  %ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
799  %ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3), align 2
800  %cvt0 = sitofp i16 %ld0 to float
801  %cvt1 = sitofp i16 %ld1 to float
802  %cvt2 = sitofp i16 %ld2 to float
803  %cvt3 = sitofp i16 %ld3 to float
804  store float %cvt0, ptr @dst32, align 64
805  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
806  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
807  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
808  ret void
809}
810
811define void @sitofp_8i16_8f32() #0 {
812; SSE-LABEL: @sitofp_8i16_8f32(
813; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
814; SSE-NEXT:    [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x float>
815; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
816; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
817; SSE-NEXT:    [[TMP4:%.*]] = sitofp <4 x i16> [[TMP3]] to <4 x float>
818; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
819; SSE-NEXT:    ret void
820;
821; AVX-LABEL: @sitofp_8i16_8f32(
822; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 64
823; AVX-NEXT:    [[TMP2:%.*]] = sitofp <8 x i16> [[TMP1]] to <8 x float>
824; AVX-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
825; AVX-NEXT:    ret void
826;
827  %ld0 = load i16, ptr @src16, align 64
828  %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
829  %ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
830  %ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3), align 2
831  %ld4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
832  %ld5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 5), align 2
833  %ld6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 6), align 4
834  %ld7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 7), align 2
835  %cvt0 = sitofp i16 %ld0 to float
836  %cvt1 = sitofp i16 %ld1 to float
837  %cvt2 = sitofp i16 %ld2 to float
838  %cvt3 = sitofp i16 %ld3 to float
839  %cvt4 = sitofp i16 %ld4 to float
840  %cvt5 = sitofp i16 %ld5 to float
841  %cvt6 = sitofp i16 %ld6 to float
842  %cvt7 = sitofp i16 %ld7 to float
843  store float %cvt0, ptr @dst32, align 64
844  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
845  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
846  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
847  store float %cvt4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
848  store float %cvt5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
849  store float %cvt6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 8
850  store float %cvt7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
851  ret void
852}
853
854define void @sitofp_16i16_16f32() #0 {
855; SSE-LABEL: @sitofp_16i16_16f32(
856; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
857; SSE-NEXT:    [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x float>
858; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
859; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
860; SSE-NEXT:    [[TMP4:%.*]] = sitofp <4 x i16> [[TMP3]] to <4 x float>
861; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
862; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 8), align 16
863; SSE-NEXT:    [[TMP6:%.*]] = sitofp <4 x i16> [[TMP5]] to <4 x float>
864; SSE-NEXT:    store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
865; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 12), align 8
866; SSE-NEXT:    [[TMP8:%.*]] = sitofp <4 x i16> [[TMP7]] to <4 x float>
867; SSE-NEXT:    store <4 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
868; SSE-NEXT:    ret void
869;
870; AVX256-LABEL: @sitofp_16i16_16f32(
871; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 64
872; AVX256-NEXT:    [[TMP2:%.*]] = sitofp <8 x i16> [[TMP1]] to <8 x float>
873; AVX256-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
874; AVX256-NEXT:    [[TMP3:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 8), align 16
875; AVX256-NEXT:    [[TMP4:%.*]] = sitofp <8 x i16> [[TMP3]] to <8 x float>
876; AVX256-NEXT:    store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
877; AVX256-NEXT:    ret void
878;
879; AVX512-LABEL: @sitofp_16i16_16f32(
880; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i16>, ptr @src16, align 64
881; AVX512-NEXT:    [[TMP2:%.*]] = sitofp <16 x i16> [[TMP1]] to <16 x float>
882; AVX512-NEXT:    store <16 x float> [[TMP2]], ptr @dst32, align 64
883; AVX512-NEXT:    ret void
884;
885  %ld0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 0 ), align 64
886  %ld1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1 ), align 2
887  %ld2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2 ), align 4
888  %ld3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3 ), align 2
889  %ld4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4 ), align 8
890  %ld5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 5 ), align 2
891  %ld6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 6 ), align 4
892  %ld7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 7 ), align 2
893  %ld8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 8 ), align 16
894  %ld9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 9 ), align 2
895  %ld10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 10), align 4
896  %ld11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 11), align 2
897  %ld12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 12), align 8
898  %ld13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 13), align 2
899  %ld14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 14), align 4
900  %ld15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 15), align 2
901  %cvt0  = sitofp i16 %ld0  to float
902  %cvt1  = sitofp i16 %ld1  to float
903  %cvt2  = sitofp i16 %ld2  to float
904  %cvt3  = sitofp i16 %ld3  to float
905  %cvt4  = sitofp i16 %ld4  to float
906  %cvt5  = sitofp i16 %ld5  to float
907  %cvt6  = sitofp i16 %ld6  to float
908  %cvt7  = sitofp i16 %ld7  to float
909  %cvt8  = sitofp i16 %ld8  to float
910  %cvt9  = sitofp i16 %ld9  to float
911  %cvt10 = sitofp i16 %ld10 to float
912  %cvt11 = sitofp i16 %ld11 to float
913  %cvt12 = sitofp i16 %ld12 to float
914  %cvt13 = sitofp i16 %ld13 to float
915  %cvt14 = sitofp i16 %ld14 to float
916  %cvt15 = sitofp i16 %ld15 to float
917  store float %cvt0 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 0 ), align 64
918  store float %cvt1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1 ), align 4
919  store float %cvt2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2 ), align 8
920  store float %cvt3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3 ), align 4
921  store float %cvt4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4 ), align 16
922  store float %cvt5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5 ), align 4
923  store float %cvt6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6 ), align 8
924  store float %cvt7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7 ), align 4
925  store float %cvt8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8 ), align 32
926  store float %cvt9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9 ), align 4
927  store float %cvt10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 8
928  store float %cvt11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
929  store float %cvt12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
930  store float %cvt13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
931  store float %cvt14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 8
932  store float %cvt15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
933  ret void
934}
935
936define void @sitofp_4i8_4f32() #0 {
937; CHECK-LABEL: @sitofp_4i8_4f32(
938; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
939; CHECK-NEXT:    [[TMP2:%.*]] = sitofp <4 x i8> [[TMP1]] to <4 x float>
940; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
941; CHECK-NEXT:    ret void
942;
943  %ld0 = load i8, ptr @src8, align 64
944  %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
945  %ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
946  %ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3), align 1
947  %cvt0 = sitofp i8 %ld0 to float
948  %cvt1 = sitofp i8 %ld1 to float
949  %cvt2 = sitofp i8 %ld2 to float
950  %cvt3 = sitofp i8 %ld3 to float
951  store float %cvt0, ptr @dst32, align 64
952  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
953  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
954  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
955  ret void
956}
957
958define void @sitofp_8i8_8f32() #0 {
959; SSE-LABEL: @sitofp_8i8_8f32(
960; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
961; SSE-NEXT:    [[TMP2:%.*]] = sitofp <4 x i8> [[TMP1]] to <4 x float>
962; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
963; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
964; SSE-NEXT:    [[TMP4:%.*]] = sitofp <4 x i8> [[TMP3]] to <4 x float>
965; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
966; SSE-NEXT:    ret void
967;
968; AVX-LABEL: @sitofp_8i8_8f32(
969; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr @src8, align 64
970; AVX-NEXT:    [[TMP2:%.*]] = sitofp <8 x i8> [[TMP1]] to <8 x float>
971; AVX-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
972; AVX-NEXT:    ret void
973;
974  %ld0 = load i8, ptr @src8, align 64
975  %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
976  %ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
977  %ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3), align 1
978  %ld4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
979  %ld5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 5), align 1
980  %ld6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 6), align 2
981  %ld7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 7), align 1
982  %cvt0 = sitofp i8 %ld0 to float
983  %cvt1 = sitofp i8 %ld1 to float
984  %cvt2 = sitofp i8 %ld2 to float
985  %cvt3 = sitofp i8 %ld3 to float
986  %cvt4 = sitofp i8 %ld4 to float
987  %cvt5 = sitofp i8 %ld5 to float
988  %cvt6 = sitofp i8 %ld6 to float
989  %cvt7 = sitofp i8 %ld7 to float
990  store float %cvt0, ptr @dst32, align 64
991  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
992  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
993  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
994  store float %cvt4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
995  store float %cvt5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
996  store float %cvt6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 8
997  store float %cvt7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
998  ret void
999}
1000
1001define void @sitofp_16i8_16f32() #0 {
1002; SSE-LABEL: @sitofp_16i8_16f32(
1003; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
1004; SSE-NEXT:    [[TMP2:%.*]] = sitofp <4 x i8> [[TMP1]] to <4 x float>
1005; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
1006; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
1007; SSE-NEXT:    [[TMP4:%.*]] = sitofp <4 x i8> [[TMP3]] to <4 x float>
1008; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
1009; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 8), align 8
1010; SSE-NEXT:    [[TMP6:%.*]] = sitofp <4 x i8> [[TMP5]] to <4 x float>
1011; SSE-NEXT:    store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
1012; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 12), align 4
1013; SSE-NEXT:    [[TMP8:%.*]] = sitofp <4 x i8> [[TMP7]] to <4 x float>
1014; SSE-NEXT:    store <4 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
1015; SSE-NEXT:    ret void
1016;
1017; AVX256-LABEL: @sitofp_16i8_16f32(
1018; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr @src8, align 64
1019; AVX256-NEXT:    [[TMP2:%.*]] = sitofp <8 x i8> [[TMP1]] to <8 x float>
1020; AVX256-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
1021; AVX256-NEXT:    [[TMP3:%.*]] = load <8 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 8), align 8
1022; AVX256-NEXT:    [[TMP4:%.*]] = sitofp <8 x i8> [[TMP3]] to <8 x float>
1023; AVX256-NEXT:    store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
1024; AVX256-NEXT:    ret void
1025;
1026; AVX512-LABEL: @sitofp_16i8_16f32(
1027; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 64
1028; AVX512-NEXT:    [[TMP2:%.*]] = sitofp <16 x i8> [[TMP1]] to <16 x float>
1029; AVX512-NEXT:    store <16 x float> [[TMP2]], ptr @dst32, align 64
1030; AVX512-NEXT:    ret void
1031;
1032  %ld0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 0 ), align 64
1033  %ld1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1 ), align 1
1034  %ld2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2 ), align 2
1035  %ld3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3 ), align 1
1036  %ld4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4 ), align 4
1037  %ld5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 5 ), align 1
1038  %ld6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 6 ), align 2
1039  %ld7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 7 ), align 1
1040  %ld8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 8 ), align 8
1041  %ld9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 9 ), align 1
1042  %ld10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 10), align 2
1043  %ld11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 11), align 1
1044  %ld12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 12), align 4
1045  %ld13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 13), align 1
1046  %ld14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 14), align 2
1047  %ld15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 15), align 1
1048  %cvt0  = sitofp i8 %ld0  to float
1049  %cvt1  = sitofp i8 %ld1  to float
1050  %cvt2  = sitofp i8 %ld2  to float
1051  %cvt3  = sitofp i8 %ld3  to float
1052  %cvt4  = sitofp i8 %ld4  to float
1053  %cvt5  = sitofp i8 %ld5  to float
1054  %cvt6  = sitofp i8 %ld6  to float
1055  %cvt7  = sitofp i8 %ld7  to float
1056  %cvt8  = sitofp i8 %ld8  to float
1057  %cvt9  = sitofp i8 %ld9  to float
1058  %cvt10 = sitofp i8 %ld10 to float
1059  %cvt11 = sitofp i8 %ld11 to float
1060  %cvt12 = sitofp i8 %ld12 to float
1061  %cvt13 = sitofp i8 %ld13 to float
1062  %cvt14 = sitofp i8 %ld14 to float
1063  %cvt15 = sitofp i8 %ld15 to float
1064  store float %cvt0 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 0 ), align 64
1065  store float %cvt1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1 ), align 4
1066  store float %cvt2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2 ), align 8
1067  store float %cvt3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3 ), align 4
1068  store float %cvt4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4 ), align 16
1069  store float %cvt5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5 ), align 4
1070  store float %cvt6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6 ), align 8
1071  store float %cvt7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7 ), align 4
1072  store float %cvt8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8 ), align 32
1073  store float %cvt9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9 ), align 4
1074  store float %cvt10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 8
1075  store float %cvt11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
1076  store float %cvt12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
1077  store float %cvt13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
1078  store float %cvt14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 8
1079  store float %cvt15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
1080  ret void
1081}
1082
1083;
1084; SITOFP BUILDVECTOR
1085;
1086
1087define <4 x double> @sitofp_4xi32_4f64(i32 %a0, i32 %a1, i32 %a2, i32 %a3) #0 {
1088; SSE-LABEL: @sitofp_4xi32_4f64(
1089; SSE-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[A0:%.*]], i32 0
1090; SSE-NEXT:    [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1
1091; SSE-NEXT:    [[TMP3:%.*]] = sitofp <2 x i32> [[TMP2]] to <2 x double>
1092; SSE-NEXT:    [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[A2:%.*]], i32 0
1093; SSE-NEXT:    [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[A3:%.*]], i32 1
1094; SSE-NEXT:    [[TMP6:%.*]] = sitofp <2 x i32> [[TMP5]] to <2 x double>
1095; SSE-NEXT:    [[TMP7:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
1096; SSE-NEXT:    [[TMP8:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
1097; SSE-NEXT:    [[RES31:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1098; SSE-NEXT:    ret <4 x double> [[RES31]]
1099;
1100; AVX-LABEL: @sitofp_4xi32_4f64(
1101; AVX-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[A0:%.*]], i32 0
1102; AVX-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1
1103; AVX-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A2:%.*]], i32 2
1104; AVX-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[A3:%.*]], i32 3
1105; AVX-NEXT:    [[TMP5:%.*]] = sitofp <4 x i32> [[TMP4]] to <4 x double>
1106; AVX-NEXT:    ret <4 x double> [[TMP5]]
1107;
1108  %cvt0 = sitofp i32 %a0 to double
1109  %cvt1 = sitofp i32 %a1 to double
1110  %cvt2 = sitofp i32 %a2 to double
1111  %cvt3 = sitofp i32 %a3 to double
1112  %res0 = insertelement <4 x double> poison, double %cvt0, i32 0
1113  %res1 = insertelement <4 x double> %res0, double %cvt1, i32 1
1114  %res2 = insertelement <4 x double> %res1, double %cvt2, i32 2
1115  %res3 = insertelement <4 x double> %res2, double %cvt3, i32 3
1116  ret <4 x double> %res3
1117}
1118
1119define <4 x float> @sitofp_4xi32_4f32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) #0 {
1120; CHECK-LABEL: @sitofp_4xi32_4f32(
1121; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[A0:%.*]], i32 0
1122; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1
1123; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A2:%.*]], i32 2
1124; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[A3:%.*]], i32 3
1125; CHECK-NEXT:    [[TMP5:%.*]] = sitofp <4 x i32> [[TMP4]] to <4 x float>
1126; CHECK-NEXT:    ret <4 x float> [[TMP5]]
1127;
1128  %cvt0 = sitofp i32 %a0 to float
1129  %cvt1 = sitofp i32 %a1 to float
1130  %cvt2 = sitofp i32 %a2 to float
1131  %cvt3 = sitofp i32 %a3 to float
1132  %res0 = insertelement <4 x float> poison, float %cvt0, i32 0
1133  %res1 = insertelement <4 x float> %res0, float %cvt1, i32 1
1134  %res2 = insertelement <4 x float> %res1, float %cvt2, i32 2
1135  %res3 = insertelement <4 x float> %res2, float %cvt3, i32 3
1136  ret <4 x float> %res3
1137}
1138
1139attributes #0 = { nounwind }
1140